mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-27 04:26:24 +08:00
Merge remote-tracking branch 'remotes/nodchip/master' into trainer
This commit is contained in:
@@ -8,9 +8,6 @@
|
||||
#include "position.h"
|
||||
#include "tt.h"
|
||||
|
||||
// evaluate header for learning
|
||||
#include "eval/evaluate_common.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "syzygy/tbprobe.h"
|
||||
@@ -480,7 +477,7 @@ namespace Learner
|
||||
{
|
||||
if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
|
||||
StateInfo si;
|
||||
tpos.set_from_packed_sfen(p.sfen, &si, th, false);
|
||||
tpos.set_from_packed_sfen(p.sfen, &si, th);
|
||||
|
||||
// write as plain text
|
||||
ofs << "fen " << tpos.fen() << std::endl;
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
#include "tt.h"
|
||||
#include "uci.h"
|
||||
|
||||
#include "eval/evaluate_common.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "nnue/evaluate_nnue_learner.h"
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
#include "uci.h"
|
||||
#include "search.h"
|
||||
|
||||
#include "eval/evaluate_common.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "nnue/evaluate_nnue_learner.h"
|
||||
@@ -58,6 +56,7 @@
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
extern double global_learning_rate;
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -92,12 +91,6 @@ namespace Learner
|
||||
static double dest_score_min_value = 0.0;
|
||||
static double dest_score_max_value = 1.0;
|
||||
|
||||
// Assume teacher signals are the scores of deep searches,
|
||||
// and convert them into winning probabilities in the trainer.
|
||||
// Sometimes we want to use the winning probabilities in the training
|
||||
// data directly. In those cases, we set false to this variable.
|
||||
static bool convert_teacher_signal_to_winning_probability = true;
|
||||
|
||||
// Using stockfish's WDL with win rate model instead of sigmoid
|
||||
static bool use_wdl = false;
|
||||
|
||||
@@ -164,14 +157,6 @@ namespace Learner
|
||||
return ((y2 - y1) / epsilon) / winning_probability_coefficient;
|
||||
}
|
||||
|
||||
// A constant used in elmo (WCSC27). Adjustment required.
|
||||
// Since elmo does not internally divide the expression, the value is different.
|
||||
// You can set this value with the learn command.
|
||||
// 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
|
||||
double ELMO_LAMBDA = 0.33;
|
||||
double ELMO_LAMBDA2 = 0.33;
|
||||
double ELMO_LAMBDA_LIMIT = 32000;
|
||||
|
||||
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
|
||||
double get_scaled_signal(double signal)
|
||||
{
|
||||
@@ -194,26 +179,7 @@ namespace Learner
|
||||
double calculate_p(double teacher_signal, int ply)
|
||||
{
|
||||
const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
|
||||
|
||||
double p = scaled_teacher_signal;
|
||||
if (convert_teacher_signal_to_winning_probability)
|
||||
{
|
||||
p = winning_percentage(scaled_teacher_signal, ply);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
double calculate_lambda(double teacher_signal)
|
||||
{
|
||||
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
|
||||
// then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
|
||||
const double lambda =
|
||||
(std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
|
||||
? ELMO_LAMBDA2
|
||||
: ELMO_LAMBDA;
|
||||
|
||||
return lambda;
|
||||
return winning_percentage(scaled_teacher_signal, ply);
|
||||
}
|
||||
|
||||
double calculate_t(int game_result)
|
||||
@@ -236,21 +202,16 @@ namespace Learner
|
||||
const PackedSfenValue& psv,
|
||||
double& cross_entropy_eval,
|
||||
double& cross_entropy_win,
|
||||
double& cross_entropy,
|
||||
double& entropy_eval,
|
||||
double& entropy_win,
|
||||
double& entropy)
|
||||
double& entropy_win)
|
||||
{
|
||||
// Teacher winning probability.
|
||||
const double q = winning_percentage(shallow, psv.gamePly);
|
||||
const double p = calculate_p(teacher_signal, psv.gamePly);
|
||||
const double t = calculate_t(psv.game_result);
|
||||
const double lambda = calculate_lambda(teacher_signal);
|
||||
|
||||
constexpr double epsilon = 0.000001;
|
||||
|
||||
const double m = (1.0 - lambda) * t + lambda * p;
|
||||
|
||||
cross_entropy_eval =
|
||||
(-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
|
||||
cross_entropy_win =
|
||||
@@ -259,11 +220,6 @@ namespace Learner
|
||||
(-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
|
||||
entropy_win =
|
||||
(-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
|
||||
|
||||
cross_entropy =
|
||||
(-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
|
||||
entropy =
|
||||
(-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
|
||||
}
|
||||
|
||||
// Other objective functions may be considered in the future...
|
||||
@@ -761,9 +717,6 @@ namespace Learner
|
||||
|
||||
std::atomic<bool> stop_flag;
|
||||
|
||||
// Discount rate
|
||||
double discount_rate;
|
||||
|
||||
// Option to exclude early stage from learning
|
||||
int reduction_gameply;
|
||||
|
||||
@@ -796,7 +749,6 @@ namespace Learner
|
||||
|
||||
uint64_t eval_save_interval;
|
||||
uint64_t loss_output_interval;
|
||||
uint64_t mirror_percentage;
|
||||
|
||||
// Loss calculation.
|
||||
// done: Number of phases targeted this time
|
||||
@@ -840,20 +792,18 @@ namespace Learner
|
||||
// It doesn't matter if you have disabled the substitution table.
|
||||
TT.new_search();
|
||||
|
||||
std::cout << "PROGRESS: " << now_string() << ", ";
|
||||
std::cout << sr.total_done << " sfens";
|
||||
std::cout << ", iteration " << epoch;
|
||||
std::cout << ", eta = " << Eval::get_eta() << ", ";
|
||||
cout << "PROGRESS: " << now_string() << ", ";
|
||||
cout << sr.total_done << " sfens";
|
||||
cout << ", iteration " << epoch;
|
||||
cout << ", learning rate = " << global_learning_rate << ", ";
|
||||
|
||||
// For calculation of verification data loss
|
||||
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
|
||||
atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
|
||||
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
|
||||
atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
|
||||
test_sum_cross_entropy_eval = 0;
|
||||
test_sum_cross_entropy_win = 0;
|
||||
test_sum_cross_entropy = 0;
|
||||
test_sum_entropy_eval = 0;
|
||||
test_sum_entropy_win = 0;
|
||||
test_sum_entropy = 0;
|
||||
|
||||
// norm for learning
|
||||
atomic<double> sum_norm;
|
||||
@@ -869,7 +819,7 @@ namespace Learner
|
||||
auto& pos = th->rootPos;
|
||||
StateInfo si;
|
||||
pos.set(StartFEN, false, &si, th);
|
||||
std::cout << "hirate eval = " << Eval::evaluate(pos);
|
||||
cout << "hirate eval = " << Eval::evaluate(pos) << endl;
|
||||
|
||||
// It's better to parallelize here, but it's a bit
|
||||
// troublesome because the search before slave has not finished.
|
||||
@@ -893,10 +843,8 @@ namespace Learner
|
||||
&ps,
|
||||
&test_sum_cross_entropy_eval,
|
||||
&test_sum_cross_entropy_win,
|
||||
&test_sum_cross_entropy,
|
||||
&test_sum_entropy_eval,
|
||||
&test_sum_entropy_win,
|
||||
&test_sum_entropy,
|
||||
&sum_norm,
|
||||
&task_count,
|
||||
&move_accord_count
|
||||
@@ -924,26 +872,22 @@ namespace Learner
|
||||
// For the time being, regarding the win rate and loss terms only in the elmo method
|
||||
// Calculate and display the cross entropy.
|
||||
|
||||
double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
|
||||
double test_entropy_eval, test_entropy_win, test_entropy;
|
||||
double test_cross_entropy_eval, test_cross_entropy_win;
|
||||
double test_entropy_eval, test_entropy_win;
|
||||
calc_cross_entropy(
|
||||
deep_value,
|
||||
shallow_value,
|
||||
ps,
|
||||
test_cross_entropy_eval,
|
||||
test_cross_entropy_win,
|
||||
test_cross_entropy,
|
||||
test_entropy_eval,
|
||||
test_entropy_win,
|
||||
test_entropy);
|
||||
test_entropy_win);
|
||||
|
||||
// The total cross entropy need not be abs() by definition.
|
||||
test_sum_cross_entropy_eval += test_cross_entropy_eval;
|
||||
test_sum_cross_entropy_win += test_cross_entropy_win;
|
||||
test_sum_cross_entropy += test_cross_entropy;
|
||||
test_sum_entropy_eval += test_entropy_eval;
|
||||
test_sum_entropy_win += test_entropy_win;
|
||||
test_sum_entropy += test_entropy;
|
||||
sum_norm += (double)abs(shallow_value);
|
||||
|
||||
// Determine if the teacher's move and the score of the shallow search match
|
||||
@@ -968,7 +912,7 @@ namespace Learner
|
||||
while (task_count)
|
||||
sleep(1);
|
||||
|
||||
latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
|
||||
latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
|
||||
latest_loss_count += sr.sfen_for_mse.size();
|
||||
|
||||
// learn_cross_entropy may be called train cross
|
||||
@@ -978,27 +922,24 @@ namespace Learner
|
||||
|
||||
if (sr.sfen_for_mse.size() && done)
|
||||
{
|
||||
cout
|
||||
<< " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
|
||||
cout << "INFO: "
|
||||
<< "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
|
||||
<< " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
|
||||
<< " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
|
||||
<< " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
|
||||
<< " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
|
||||
<< " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
|
||||
<< " , norm = " << sum_norm
|
||||
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
|
||||
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
|
||||
<< endl;
|
||||
|
||||
if (done != static_cast<uint64_t>(-1))
|
||||
{
|
||||
cout
|
||||
<< " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
|
||||
cout << "INFO: "
|
||||
<< "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
|
||||
<< " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
|
||||
<< " , learn_entropy_eval = " << learn_sum_entropy_eval / done
|
||||
<< " , learn_entropy_win = " << learn_sum_entropy_win / done
|
||||
<< " , learn_cross_entropy = " << learn_sum_cross_entropy / done
|
||||
<< " , learn_entropy = " << learn_sum_entropy / done;
|
||||
<< endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1008,10 +949,8 @@ namespace Learner
|
||||
// Clear 0 for next time.
|
||||
learn_sum_cross_entropy_eval = 0.0;
|
||||
learn_sum_cross_entropy_win = 0.0;
|
||||
learn_sum_cross_entropy = 0.0;
|
||||
learn_sum_entropy_eval = 0.0;
|
||||
learn_sum_entropy_win = 0.0;
|
||||
learn_sum_entropy = 0.0;
|
||||
}
|
||||
|
||||
void LearnerThink::thread_worker(size_t thread_id)
|
||||
@@ -1060,7 +999,7 @@ namespace Learner
|
||||
|
||||
// Lock the evaluation function so that it is not used during updating.
|
||||
lock_guard<shared_timed_mutex> write_lock(nn_mutex);
|
||||
Eval::NNUE::UpdateParameters(epoch);
|
||||
Eval::NNUE::UpdateParameters();
|
||||
}
|
||||
|
||||
++epoch;
|
||||
@@ -1137,8 +1076,7 @@ namespace Learner
|
||||
goto RETRY_READ;
|
||||
|
||||
StateInfo si;
|
||||
const bool mirror = prng.rand(100) < mirror_percentage;
|
||||
if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
|
||||
if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0)
|
||||
{
|
||||
// I got a strange sfen. Should be debugged!
|
||||
// Since it is an illegal sfen, it may not be
|
||||
@@ -1157,8 +1095,7 @@ namespace Learner
|
||||
|
||||
if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
|
||||
{
|
||||
sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
|
||||
continue;
|
||||
goto RETRY_READ;
|
||||
}
|
||||
|
||||
pos.do_move((Move)ps.move, state[ply++]);
|
||||
@@ -1205,29 +1142,23 @@ namespace Learner
|
||||
: -Eval::evaluate(pos);
|
||||
|
||||
// Calculate loss for training data
|
||||
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
|
||||
double learn_entropy_eval, learn_entropy_win, learn_entropy;
|
||||
double learn_cross_entropy_eval, learn_cross_entropy_win;
|
||||
double learn_entropy_eval, learn_entropy_win;
|
||||
calc_cross_entropy(
|
||||
deep_value,
|
||||
shallow_value,
|
||||
ps,
|
||||
learn_cross_entropy_eval,
|
||||
learn_cross_entropy_win,
|
||||
learn_cross_entropy,
|
||||
learn_entropy_eval,
|
||||
learn_entropy_win,
|
||||
learn_entropy);
|
||||
learn_entropy_win);
|
||||
|
||||
learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
|
||||
learn_sum_cross_entropy_win += learn_cross_entropy_win;
|
||||
learn_sum_cross_entropy += learn_cross_entropy;
|
||||
learn_sum_entropy_eval += learn_entropy_eval;
|
||||
learn_sum_entropy_win += learn_entropy_win;
|
||||
learn_sum_entropy += learn_entropy;
|
||||
|
||||
const double example_weight =
|
||||
(discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
|
||||
Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
|
||||
Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
|
||||
|
||||
// Since the processing is completed, the counter of the processed number is incremented
|
||||
sr.total_done++;
|
||||
@@ -1246,18 +1177,12 @@ namespace Learner
|
||||
break;
|
||||
}
|
||||
|
||||
// Processing when adding the gradient to the node on each PV.
|
||||
//If discount_rate is 0, this process is not performed.
|
||||
if (discount_rate != 0)
|
||||
pos_add_grad();
|
||||
|
||||
pos.do_move(m, state[ply++]);
|
||||
}
|
||||
|
||||
if (illegal_move)
|
||||
{
|
||||
sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
|
||||
continue;
|
||||
goto RETRY_READ;
|
||||
}
|
||||
|
||||
// Since we have reached the end phase of PV, add the slope here.
|
||||
@@ -1276,18 +1201,18 @@ namespace Learner
|
||||
{
|
||||
// When EVAL_SAVE_ONLY_ONCE is defined,
|
||||
// Do not dig a subfolder because I want to save it only once.
|
||||
Eval::save_eval("");
|
||||
Eval::NNUE::save_eval("");
|
||||
}
|
||||
else if (is_final)
|
||||
{
|
||||
Eval::save_eval("final");
|
||||
Eval::NNUE::save_eval("final");
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
static int dir_number = 0;
|
||||
const std::string dir_name = std::to_string(dir_number++);
|
||||
Eval::save_eval(dir_name);
|
||||
Eval::NNUE::save_eval(dir_name);
|
||||
|
||||
if (newbob_decay != 1.0 && latest_loss_count > 0) {
|
||||
static int trials = newbob_num_trials;
|
||||
@@ -1310,12 +1235,12 @@ namespace Learner
|
||||
if (--trials > 0 && !is_final)
|
||||
{
|
||||
cout
|
||||
<< "reducing learning rate scale from " << newbob_scale
|
||||
<< "reducing learning rate from " << newbob_scale
|
||||
<< " to " << (newbob_scale * newbob_decay)
|
||||
<< " (" << trials << " more trials)" << endl;
|
||||
|
||||
newbob_scale *= newbob_decay;
|
||||
Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
|
||||
global_learning_rate = newbob_scale;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1593,13 +1518,6 @@ namespace Learner
|
||||
|
||||
string target_dir;
|
||||
|
||||
// If 0, it will be the default value.
|
||||
double eta1 = 0.0;
|
||||
double eta2 = 0.0;
|
||||
double eta3 = 0.0;
|
||||
uint64_t eta1_epoch = 0; // eta2 is not applied by default
|
||||
uint64_t eta2_epoch = 0; // eta3 is not applied by default
|
||||
|
||||
// --- Function that only shuffles the teacher aspect
|
||||
|
||||
// normal shuffle
|
||||
@@ -1640,15 +1558,7 @@ namespace Learner
|
||||
// Turn on if you want to pass a pre-shuffled file.
|
||||
bool no_shuffle = false;
|
||||
|
||||
// elmo lambda
|
||||
ELMO_LAMBDA = 0.33;
|
||||
ELMO_LAMBDA2 = 0.33;
|
||||
ELMO_LAMBDA_LIMIT = 32000;
|
||||
|
||||
// Discount rate. If this is set to a value other than 0,
|
||||
// the slope will be added even at other than the PV termination.
|
||||
// (At that time, apply this discount rate)
|
||||
double discount_rate = 0;
|
||||
global_learning_rate = 1.0;
|
||||
|
||||
// if (gamePly <rand(reduction_gameply)) continue;
|
||||
// An option to exclude the early stage from the learning target moderately like
|
||||
@@ -1662,7 +1572,6 @@ namespace Learner
|
||||
|
||||
uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
|
||||
uint64_t loss_output_interval = 0;
|
||||
uint64_t mirror_percentage = 0;
|
||||
|
||||
string validation_set_file_name;
|
||||
string seed;
|
||||
@@ -1696,12 +1605,7 @@ namespace Learner
|
||||
else if (option == "batchsize") is >> mini_batch_size;
|
||||
|
||||
// learning rate
|
||||
else if (option == "eta") is >> eta1;
|
||||
else if (option == "eta1") is >> eta1; // alias
|
||||
else if (option == "eta2") is >> eta2;
|
||||
else if (option == "eta3") is >> eta3;
|
||||
else if (option == "eta1_epoch") is >> eta1_epoch;
|
||||
else if (option == "eta2_epoch") is >> eta2_epoch;
|
||||
else if (option == "lr") is >> global_learning_rate;
|
||||
|
||||
// Accept also the old option name.
|
||||
else if (option == "use_draw_in_training"
|
||||
@@ -1720,17 +1624,9 @@ namespace Learner
|
||||
|
||||
else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
|
||||
|
||||
// Discount rate
|
||||
else if (option == "discount_rate") is >> discount_rate;
|
||||
|
||||
// Using WDL with win rate model instead of sigmoid
|
||||
else if (option == "use_wdl") is >> use_wdl;
|
||||
|
||||
// LAMBDA
|
||||
else if (option == "lambda") is >> ELMO_LAMBDA;
|
||||
else if (option == "lambda2") is >> ELMO_LAMBDA2;
|
||||
else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
|
||||
|
||||
else if (option == "reduction_gameply") is >> reduction_gameply;
|
||||
|
||||
// shuffle related
|
||||
@@ -1751,7 +1647,6 @@ namespace Learner
|
||||
|
||||
else if (option == "eval_save_interval") is >> eval_save_interval;
|
||||
else if (option == "loss_output_interval") is >> loss_output_interval;
|
||||
else if (option == "mirror_percentage") is >> mirror_percentage;
|
||||
else if (option == "validation_set_file_name") is >> validation_set_file_name;
|
||||
|
||||
// Rabbit convert related
|
||||
@@ -1767,7 +1662,6 @@ namespace Learner
|
||||
else if (option == "src_score_max_value") is >> src_score_max_value;
|
||||
else if (option == "dest_score_min_value") is >> dest_score_min_value;
|
||||
else if (option == "dest_score_max_value") is >> dest_score_max_value;
|
||||
else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability;
|
||||
else if (option == "seed") is >> seed;
|
||||
// Otherwise, it's a filename.
|
||||
else
|
||||
@@ -1903,8 +1797,7 @@ namespace Learner
|
||||
cout << "nn_batch_size : " << nn_batch_size << endl;
|
||||
cout << "nn_options : " << nn_options << endl;
|
||||
|
||||
cout << "learning rate : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
|
||||
cout << "eta_epoch : " << eta1_epoch << " , " << eta2_epoch << endl;
|
||||
cout << "learning rate : " << global_learning_rate << endl;
|
||||
cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
|
||||
cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
|
||||
cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
|
||||
@@ -1917,17 +1810,10 @@ namespace Learner
|
||||
cout << "scheduling : default" << endl;
|
||||
}
|
||||
|
||||
cout << "discount rate : " << discount_rate << endl;
|
||||
|
||||
// If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
|
||||
reduction_gameply = max(reduction_gameply, 1);
|
||||
cout << "reduction_gameply : " << reduction_gameply << endl;
|
||||
|
||||
cout << "LAMBDA : " << ELMO_LAMBDA << endl;
|
||||
cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl;
|
||||
cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl;
|
||||
|
||||
cout << "mirror_percentage : " << mirror_percentage << endl;
|
||||
cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl;
|
||||
cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
|
||||
|
||||
@@ -1942,13 +1828,31 @@ namespace Learner
|
||||
|
||||
Threads.main()->ponder = false;
|
||||
|
||||
// About Search::Limits
|
||||
// Be careful because this member variable is global and affects other threads.
|
||||
{
|
||||
auto& limits = Search::Limits;
|
||||
|
||||
// Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
|
||||
limits.infinite = true;
|
||||
|
||||
// Since PV is an obstacle when displayed, erase it.
|
||||
limits.silent = true;
|
||||
|
||||
// If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
|
||||
limits.nodes = 0;
|
||||
|
||||
// depth is also processed by the one passed as an argument of Learner::search().
|
||||
limits.depth = 0;
|
||||
}
|
||||
|
||||
cout << "init_training.." << endl;
|
||||
Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
|
||||
Eval::NNUE::InitializeTraining(seed);
|
||||
Eval::NNUE::SetBatchSize(nn_batch_size);
|
||||
Eval::NNUE::SetOptions(nn_options);
|
||||
if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
|
||||
// Save the current net to [EvalSaveDir]\original.
|
||||
Eval::save_eval("original");
|
||||
Eval::NNUE::save_eval("original");
|
||||
|
||||
// Set the folder above to best_nn_directory so that the trainer can
|
||||
// resotre the network parameters from the original net file.
|
||||
@@ -1959,7 +1863,6 @@ namespace Learner
|
||||
cout << "init done." << endl;
|
||||
|
||||
// Reflect other option settings.
|
||||
learn_think.discount_rate = discount_rate;
|
||||
learn_think.eval_limit = eval_limit;
|
||||
learn_think.save_only_once = save_only_once;
|
||||
learn_think.sr.no_shuffle = no_shuffle;
|
||||
@@ -1971,7 +1874,6 @@ namespace Learner
|
||||
|
||||
learn_think.eval_save_interval = eval_save_interval;
|
||||
learn_think.loss_output_interval = loss_output_interval;
|
||||
learn_think.mirror_percentage = mirror_percentage;
|
||||
|
||||
// Start a thread that loads the phase file in the background
|
||||
// (If this is not started, mse cannot be calculated.)
|
||||
@@ -1992,24 +1894,6 @@ namespace Learner
|
||||
// Calculate rmse once at this point (timing of 0 sfen)
|
||||
// sr.calc_rmse();
|
||||
|
||||
// About Search::Limits
|
||||
// Be careful because this member variable is global and affects other threads.
|
||||
{
|
||||
auto& limits = Search::Limits;
|
||||
|
||||
// Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
|
||||
limits.infinite = true;
|
||||
|
||||
// Since PV is an obstacle when displayed, erase it.
|
||||
limits.silent = true;
|
||||
|
||||
// If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
|
||||
limits.nodes = 0;
|
||||
|
||||
// depth is also processed by the one passed as an argument of Learner::search().
|
||||
limits.depth = 0;
|
||||
}
|
||||
|
||||
if (newbob_decay != 1.0) {
|
||||
learn_think.calc_loss(0, -1);
|
||||
learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count;
|
||||
|
||||
@@ -23,11 +23,7 @@ using LearnFloatType = float;
|
||||
// configure
|
||||
// ======================
|
||||
|
||||
// ----------------------
|
||||
// Learning with the method of elmo (WCSC27)
|
||||
// ----------------------
|
||||
|
||||
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
|
||||
#define LOSS_FUNCTION "cross_entropy_eval"
|
||||
|
||||
// ----------------------
|
||||
// Definition of struct used in Learner
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
#include "learning_tools.h"
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
using namespace Eval;
|
||||
|
||||
namespace EvalLearningTools
|
||||
{
|
||||
|
||||
// --- static variables
|
||||
|
||||
double Weight::eta;
|
||||
double Weight::eta1;
|
||||
double Weight::eta2;
|
||||
double Weight::eta3;
|
||||
uint64_t Weight::eta1_epoch;
|
||||
uint64_t Weight::eta2_epoch;
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
#ifndef __LEARN_WEIGHT_H__
|
||||
#define __LEARN_WEIGHT_H__
|
||||
|
||||
// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
|
||||
|
||||
#include "learn.h"
|
||||
|
||||
#include "misc.h" // PRNG , my_insertion_sort
|
||||
|
||||
#include <array>
|
||||
#include <cmath> // std::sqrt()
|
||||
|
||||
namespace EvalLearningTools
|
||||
{
|
||||
// -------------------------------------------------
|
||||
// Array for learning that stores gradients etc.
|
||||
// -------------------------------------------------
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma pack(push,2)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma pack(2)
|
||||
#endif
|
||||
struct Weight
|
||||
{
|
||||
// cumulative value of one mini-batch gradient
|
||||
LearnFloatType g = LearnFloatType(0);
|
||||
|
||||
// Learning rate η(eta) such as AdaGrad.
|
||||
// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
|
||||
// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
|
||||
// After eta2_epoch, gradually change from eta2 to eta3.
|
||||
static double eta;
|
||||
static double eta1;
|
||||
static double eta2;
|
||||
static double eta3;
|
||||
static uint64_t eta1_epoch;
|
||||
static uint64_t eta2_epoch;
|
||||
|
||||
// Batch initialization of eta. If 0 is passed, the default value will be set.
|
||||
static void init_eta(double new_eta1, double new_eta2, double new_eta3,
|
||||
uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
|
||||
{
|
||||
Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
|
||||
Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
|
||||
Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
|
||||
Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
|
||||
Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
|
||||
}
|
||||
|
||||
// Set eta according to epoch.
|
||||
static void calc_eta(uint64_t epoch)
|
||||
{
|
||||
if (Weight::eta1_epoch == 0) // Exclude eta2
|
||||
Weight::eta = Weight::eta1;
|
||||
else if (epoch < Weight::eta1_epoch)
|
||||
// apportion
|
||||
Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
|
||||
else if (Weight::eta2_epoch == 0) // Exclude eta3
|
||||
Weight::eta = Weight::eta2;
|
||||
else if (epoch < Weight::eta2_epoch)
|
||||
Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
|
||||
else
|
||||
Weight::eta = Weight::eta3;
|
||||
}
|
||||
|
||||
template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
|
||||
|
||||
// grad setting
|
||||
template <typename T> void set_grad(const T& g_) { g = g_; }
|
||||
|
||||
// Add grad
|
||||
template <typename T> void add_grad(const T& g_) { g += g_; }
|
||||
|
||||
LearnFloatType get_grad() const { return g; }
|
||||
};
|
||||
#if defined(_MSC_VER)
|
||||
#pragma pack(pop)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma pack(0)
|
||||
#endif
|
||||
|
||||
// Turned weight array
|
||||
// In order to be able to handle it transparently, let's have the same member as Weight.
|
||||
struct Weight2
|
||||
{
|
||||
Weight w[2];
|
||||
|
||||
//Evaluate your turn, eta 1/8.
|
||||
template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
|
||||
|
||||
template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
|
||||
template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
|
||||
|
||||
std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -259,7 +259,7 @@ namespace Learner {
|
||||
return make_piece(c, pr);
|
||||
}
|
||||
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror)
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
|
||||
{
|
||||
SfenPacker packer;
|
||||
auto& stream = packer.stream;
|
||||
@@ -280,16 +280,8 @@ namespace Learner {
|
||||
pos.pieceList[B_KING][0] = SQUARE_NB;
|
||||
|
||||
// First the position of the ball
|
||||
if (mirror)
|
||||
{
|
||||
for (auto c : Colors)
|
||||
pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto c : Colors)
|
||||
pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
|
||||
}
|
||||
for (auto c : Colors)
|
||||
pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
|
||||
|
||||
// Piece placement
|
||||
for (Rank r = RANK_8; r >= RANK_1; --r)
|
||||
@@ -297,9 +289,6 @@ namespace Learner {
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
{
|
||||
auto sq = make_square(f, r);
|
||||
if (mirror) {
|
||||
sq = flip_file(sq);
|
||||
}
|
||||
|
||||
// it seems there are already balls
|
||||
Piece pc;
|
||||
@@ -355,9 +344,6 @@ namespace Learner {
|
||||
// En passant square. Ignore if no pawn capture is possible
|
||||
if (stream.read_one_bit()) {
|
||||
Square ep_square = static_cast<Square>(stream.read_n_bit(6));
|
||||
if (mirror) {
|
||||
ep_square = flip_file(ep_square);
|
||||
}
|
||||
pos.st->epSquare = ep_square;
|
||||
|
||||
if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
|
||||
|
||||
@@ -13,7 +13,7 @@ class Thread;
|
||||
|
||||
namespace Learner {
|
||||
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
|
||||
PackedSfen sfen_pack(Position& pos);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user