diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 7021fd7f..98c8e32e 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -66,7 +66,7 @@ using namespace std; //extern Book::BookMoveSelector book; template -T operator += (std::atomic& x, const T rhs) +T operator +=(std::atomic& x, const T rhs) { T old = x.load(std::memory_order_consume); // It is allowed that the value is rewritten from other thread at this timing. @@ -84,8 +84,9 @@ namespace Learner static bool use_draw_games_in_training = false; static bool use_draw_games_in_validation = false; static bool skip_duplicated_positions_in_training = true; - // 1.0 / PawnValueEg / 4.0 * log(10.0) - static double winning_probability_coefficient = 0.00276753015984861260098316280611; + + static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0); + // Score scale factors. ex) If we set src_score_min_value = 0.0, // src_score_max_value = 1.0, dest_score_min_value = 0.0, // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. @@ -93,6 +94,7 @@ namespace Learner static double src_score_max_value = 1.0; static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; + // Assume teacher signals are the scores of deep searches, and convert them into winning // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training // data directly. In those cases, we set false to this variable. @@ -102,7 +104,7 @@ namespace Learner // generation and training don't work well. // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 // This CANNOT be static since it's used elsewhere. - bool use_raw_nnue_eval = true; + bool use_raw_nnue_eval = false; // Using WDL with win rate model instead of sigmoid static bool use_wdl = false; @@ -111,38 +113,37 @@ namespace Learner // command to learn from the generated game (learn) // ----------------------------------- - // ordinary sigmoid function - double sigmoid(double x) - { - return 1.0 / (1.0 + std::exp(-x)); - } - // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { // 1/(1+10^(-Eval/4)) // = 1/(1+e^(-Eval/4*ln(10)) // = sigmoid(Eval/4*ln(10)) - return sigmoid(value * winning_probability_coefficient); + return Math::sigmoid(value * winning_probability_coefficient); } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage_wdl(double value, int ply) { + constexpr double wdl_total = 1000.0; + constexpr double draw_score = 0.5; + double wdl_w = UCI::win_rate_model_double(value, ply); double wdl_l = UCI::win_rate_model_double(-value, ply); - double wdl_d = 1000.0 - wdl_w - wdl_l; + double wdl_d = wdl_total - wdl_w - wdl_l; - return (wdl_w + wdl_d / 2.0) / 1000.0; + return (wdl_w + wdl_d * draw_score) / wdl_total; } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value, int ply) { - if (use_wdl) { + if (use_wdl) + { return winning_percentage_wdl(value, ply); } - else { + else + { return winning_percentage(value); } } @@ -151,7 +152,7 @@ namespace Learner { double p = deep_win_rate; double q = winning_percentage(shallow_eval, ply); - return -p * std::log(q) - (1 - p) * std::log(1 - q); + return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); } double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) @@ -164,17 +165,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } - double dsigmoid(double x) - { - // Sigmoid function - // f(x) = 1/(1+exp(-x)) - // the first derivative is - // f'(x) = df/dx = f(x)・{ 1-f(x)} - // becomes - - return sigmoid(x) * (1.0 - sigmoid(x)); - } - // When the objective function is the sum of squares of the difference in winning percentage #if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE) // function to calculate the gradient @@ -202,7 +192,7 @@ namespace Learner double p = winning_percentage(deep); double q = winning_percentage(shallow); - return (q - p) * dsigmoid(double(shallow) / 600.0); + return (q - p) * Math::dsigmoid(double(shallow) / 600.0); } #endif @@ -253,39 +243,75 @@ namespace Learner double ELMO_LAMBDA2 = 0.33; double ELMO_LAMBDA_LIMIT = 32000; + // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + double get_scaled_signal(double signal) + { + double scaled_signal = signal; + + // Normalize to [0.0, 1.0]. + scaled_signal = + (scaled_signal - src_score_min_value) + / (src_score_max_value - src_score_min_value); + + // Scale to [dest_score_min_value, dest_score_max_value]. + scaled_signal = + scaled_signal * (dest_score_max_value - dest_score_min_value) + + dest_score_min_value; + + return scaled_signal; + } + + // Teacher winning probability. + double calculate_p(double teacher_signal, int ply) + { + const double scaled_teacher_signal = get_scaled_signal(teacher_signal); + + // Teacher winning probability. + double p = scaled_teacher_signal; + if (convert_teacher_signal_to_winning_probability) + { + p = winning_percentage(scaled_teacher_signal); + } + } + + double calculate_lambda(double teacher_signal) + { + // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. + const double lambda = + (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) + ? ELMO_LAMBDA2 + : ELMO_LAMBDA; + + return lambda; + } + + double calculate_t(int game_result) + { + // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. + // game_result = 1,0,-1 so add 1 and divide by 2. + const double t = double(game_result + 1) * 0.5; + + return t; + } + double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv) { // elmo (WCSC27) method // Correct with the actual game wins and losses. - - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - const double q = winning_percentage(shallow, psv.gamePly); - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal, psv.gamePly); - } - - // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. - // game_result = 1,0,-1 so add 1 and divide by 2. - const double t = double(psv.game_result + 1) / 2; - - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; + const double p = calculate_p(teacher_signal, psv.gamePly); + const double t = calculate_t(psv.game_result); + const double lambda = calculate_lambda(teacher_signal); double grad; - if (use_wdl) { - double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); - double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); + if (use_wdl) + { + const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); + const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); grad = lambda * dce_p + (1.0 - lambda) * dce_t; } - else { + else + { // Use the actual win rate as a correction term. // This is the idea of ​​elmo (WCSC27), modern O-parts. grad = lambda * (q - p) + (1.0 - lambda) * (q - t); @@ -296,30 +322,25 @@ namespace Learner // Calculate cross entropy during learning // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win. - void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv, - double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, - double& entropy_eval, double& entropy_win, double& entropy) + void calc_cross_entropy( + Value teacher_signal, + Value shallow, + const PackedSfenValue& psv, + double& cross_entropy_eval, + double& cross_entropy_win, + double& cross_entropy, + double& entropy_eval, + double& entropy_win, + double& entropy) { - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal); - } - const double q /* eval_winrate */ = winning_percentage(shallow); - const double t = double(psv.game_result + 1) / 2; + const double q = winning_percentage(shallow, psv.gamePly); + const double p = calculate_p(teacher_signal, psv.gamePly); + const double t = calculate_t(psv.game_result); + const double lambda = calculate_lambda(teacher_signal); constexpr double epsilon = 0.000001; - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; - const double m = (1.0 - lambda) * t + lambda * p; cross_entropy_eval = @@ -343,7 +364,8 @@ namespace Learner // Other variations may be prepared as the objective function.. - double calc_grad(Value shallow, const PackedSfenValue& psv) { + double calc_grad(Value shallow, const PackedSfenValue& psv) + { return calc_grad((Value)psv.score, shallow, psv); } @@ -363,8 +385,14 @@ namespace Learner // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE. static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE; + // hash to limit the reading of the same situation + // Is there too many 64 million phases? Or Not really.. + // It must be 2**N because it will be used as the mask to calculate hash_index. + static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; + // Do not use std::random_device(). Because it always the same integers on MinGW. - SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count()) + SfenReader(int thread_num) : + prng(std::chrono::system_clock::now().time_since_epoch().count()) { packed_sfens.resize(thread_num); total_read = 0; @@ -398,6 +426,7 @@ namespace Learner cout << "Error! read packed sfen , failed." << endl; break; } + sfen_for_mse.push_back(ps); // Get the hash key. @@ -418,8 +447,10 @@ namespace Learner { if (eval_limit < abs(p.score)) continue; + if (!use_draw_games_in_validation && p.game_result == 0) continue; + sfen_for_mse.push_back(p); } else @@ -436,7 +467,7 @@ namespace Learner auto& thread_ps = packed_sfens[thread_id]; // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish. - if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it. + if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it. && !read_to_thread_buffer_impl(thread_id)) return false; @@ -444,11 +475,11 @@ namespace Learner // Since the filling of the thread buffer with the phase has been completed successfully // thread_ps->rbegin() is alive. - ps = *(thread_ps->rbegin()); + ps = thread_ps->back(); thread_ps->pop_back(); // If you've run out of buffers, call delete yourself to free this buffer. - if (thread_ps->size() == 0) + if (thread_ps->empty()) { thread_ps.reset(); } @@ -507,7 +538,7 @@ namespace Learner return false; // Get the next file name. - string filename = *filenames.rbegin(); + string filename = filenames.back(); filenames.pop_back(); fs.open(filename, ios::in | ios::binary); @@ -523,6 +554,7 @@ namespace Learner // This size() is read only, so you don't need to lock it. while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE) sleep(100); + if (stop_flag) return; @@ -555,9 +587,7 @@ namespace Learner if (!no_shuffle) { - auto size = sfens.size(); - for (size_t i = 0; i < size; ++i) - swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]); + Algo::shuffle(sfens, prng); } // Divide this by THREAD_BUFFER_SIZE. There should be size pieces. @@ -591,6 +621,13 @@ namespace Learner } } + // Determine if it is a phase for calculating rmse. + // (The computational aspects of rmse should not be used for learning.) + bool is_for_rmse(Key key) const + { + return sfen_for_mse_hash.count(key) != 0; + } + // sfen files vector filenames; @@ -613,17 +650,6 @@ namespace Learner bool stop_flag; - // Determine if it is a phase for calculating rmse. - // (The computational aspects of rmse should not be used for learning.) - bool is_for_rmse(Key key) const - { - return sfen_for_mse_hash.count(key) != 0; - } - - // hash to limit the reading of the same situation - // Is there too many 64 million phases? Or Not really.. - // It must be 2**N because it will be used as the mask to calculate hash_index. - static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; vector hash; // 64MB*8 = 512MB // test phase for mse calculation @@ -663,7 +689,10 @@ namespace Learner // Class to generate sfen with multiple threads struct LearnerThink : public MultiThink { - LearnerThink(SfenReader& sr_) :sr(sr_), stop_flag(false), save_only_once(false) + LearnerThink(SfenReader& sr_) : + sr(sr_), + stop_flag(false), + save_only_once(false) { #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) learn_sum_cross_entropy_eval = 0.0; @@ -686,7 +715,12 @@ namespace Learner virtual void thread_worker(size_t thread_id); // Start a thread that loads the phase file in the background. - void start_file_read_worker() { sr.start_file_read_worker(); } + void start_file_read_worker() + { + sr.start_file_read_worker(); + } + + Value get_shallow_value(Position& task_pos); // save merit function parameters to a file bool save(bool is_final = false); @@ -753,6 +787,33 @@ namespace Learner TaskDispatcher task_dispatcher; }; + Value LearnerThink::get_shallow_value(Position& task_pos) + { + // Evaluation value for shallow search + // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and + // Use qsearch() because it is difficult to compare the values. + // EvalHash has been disabled in advance. (If not, the same value will be returned every time) + const auto [_, pv] = qsearch(task_pos); + + std::vector> states(pv.size()); + for (size_t i = 0; i < pv.size(); ++i) + { + task_pos.do_move(pv[i], states[i]); + Eval::NNUE::update_eval(task_pos); + } + + const auto rootColor = task_pos.side_to_move(); + const Value shallow_value = + (rootColor == task_pos.side_to_move()) + ? Eval::evaluate(task_pos) + : -Eval::evaluate(task_pos); + + for (auto it = pv.rbegin(); it != pv.rend(); ++it) + task_pos.undo_move(*it); + + return shallow_value; + } + void LearnerThink::calc_loss(size_t thread_id, uint64_t done) { // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated. @@ -800,8 +861,6 @@ namespace Learner pos.set(StartFEN, false, &si, th); std::cout << "hirate eval = " << Eval::evaluate(pos); - //Eval::print_eval_stat(pos); - // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished. // I created a mechanism to call task, so I will use it. @@ -818,6 +877,7 @@ namespace Learner // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one. auto task = [ + this, &ps, &test_sum_cross_entropy_eval, &test_sum_cross_entropy_win, @@ -830,7 +890,6 @@ namespace Learner &move_accord_count ](size_t task_thread_id) { - // Does C++ properly capture a new ps instance for each loop?. auto task_th = Threads[task_thread_id]; auto& task_pos = task_th->rootPos; StateInfo task_si; @@ -840,26 +899,7 @@ namespace Learner cout << "Error! : illegal packed sfen " << task_pos.fen() << endl; } - // Evaluation value for shallow search - // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and - // Use qsearch() because it is difficult to compare the values. - // EvalHash has been disabled in advance. (If not, the same value will be returned every time) - auto task_search_result = qsearch(task_pos); - - auto shallow_value = task_search_result.first; - { - const auto rootColor = task_pos.side_to_move(); - const auto pv = task_search_result.second; - std::vector> states(pv.size()); - for (size_t i = 0; i < pv.size(); ++i) - { - task_pos.do_move(pv[i], states[i]); - Eval::NNUE::update_eval(task_pos); - } - shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos); - for (auto it = pv.rbegin(); it != pv.rend(); ++it) - task_pos.undo_move(*it); - } + const Value shallow_value = get_shallow_value(task_pos); // Evaluation value of deep search auto deep_value = (Value)ps.score; @@ -887,7 +927,17 @@ namespace Learner #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; double test_entropy_eval, test_entropy_win, test_entropy; - calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy); + calc_cross_entropy( + deep_value, + shallow_value, + ps, + test_cross_entropy_eval, + test_cross_entropy_win, + test_cross_entropy, + test_entropy_eval, + test_entropy_win, + test_entropy); + // The total cross entropy need not be abs() by definition. test_sum_cross_entropy_eval += test_cross_entropy_eval; test_sum_cross_entropy_win += test_cross_entropy_win; @@ -900,8 +950,8 @@ namespace Learner // Determine if the teacher's move and the score of the shallow search match { - auto r = search(task_pos, 1); - if ((uint16_t)r.second[0] == ps.move) + const auto [value, pv] = search(task_pos, 1); + if ((uint16_t)pv[0] == ps.move) move_accord_count.fetch_add(1, std::memory_order_relaxed); } @@ -950,6 +1000,7 @@ namespace Learner << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size() << " , norm = " << sum_norm << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"; + if (done != static_cast(-1)) { cout @@ -962,7 +1013,8 @@ namespace Learner } cout << endl; } - else { + else + { cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl; } @@ -978,7 +1030,6 @@ namespace Learner #endif } - void LearnerThink::thread_worker(size_t thread_id) { #if defined(_OPENMP) @@ -1092,7 +1143,9 @@ namespace Learner } PackedSfenValue ps; - RetryRead:; + + RETRY_READ:; + if (!sr.read_to_thread_buffer(thread_id, ps)) { // ran out of thread pool for my thread. @@ -1106,16 +1159,14 @@ namespace Learner // The evaluation value exceeds the learning target value. // Ignore this aspect information. if (eval_limit < abs(ps.score)) - goto RetryRead; - + goto RETRY_READ; if (!use_draw_games_in_training && ps.game_result == 0) - goto RetryRead; - + goto RETRY_READ; // Skip over the opening phase if (ps.gamePly < prng.rand(reduction_gameply)) - goto RetryRead; + goto RETRY_READ; #if 0 auto sfen = pos.sfen_unpack(ps.data); @@ -1129,20 +1180,24 @@ namespace Learner // I got a strange sfen. Should be debugged! // Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not. cout << "Error! : illigal packed sfen = " << pos.fen() << endl; - goto RetryRead; + goto RETRY_READ; } + #if !defined(EVAL_NNUE) + if (skip_duplicated_positions_in_training) { - auto key = pos.key(); + const auto key = pos.key(); + // Exclude the phase used for rmse calculation. - if (sr.is_for_rmse(key) && skip_duplicated_positions_in_training) - goto RetryRead; + if (sr.is_for_rmse(key)) + goto RETRY_READ; // Exclude the most recently used aspect. - auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1)); - auto key2 = sr.hash[hash_index]; - if (key == key2 && skip_duplicated_positions_in_training) - goto RetryRead; + const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1)); + const auto key2 = sr.hash[hash_index]; + if (key == key2) + goto RETRY_READ; + sr.hash[hash_index] = key; // Replace with the current key. } #endif @@ -1152,22 +1207,21 @@ namespace Learner // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine) // Skip the position if there are no legal moves (=checkmated or stalemate). if (MoveList(pos).size() == 0) - goto RetryRead; + goto RETRY_READ; // I can read it, so try displaying it. // cout << pos << value << endl; // Evaluation value of shallow search (qsearch) - auto r = qsearch(pos); - auto pv = r.second; + const auto [shallow_value, pv] = qsearch(pos); // Evaluation value of deep search - auto deep_value = (Value)ps.score; + const auto deep_value = (Value)ps.score; // I feel that the mini batch has a better gradient. // Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation. - auto rootColor = pos.side_to_move(); + const auto rootColor = pos.side_to_move(); // If the initial PV is different, it is better not to use it for learning. // If it is the result of searching a completely different place, it may become noise. @@ -1203,13 +1257,26 @@ namespace Learner // I don't think this is a very desirable property, as the aspect that gives that gradient will be different. // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc... - Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); + const Value shallow_value = + (rootColor == pos.side_to_move()) + ? Eval::evaluate(pos) + : -Eval::evaluate(pos); #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // Calculate loss for training data double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy; double learn_entropy_eval, learn_entropy_win, learn_entropy; - calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy); + calc_cross_entropy( + deep_value, + shallow_value, + ps, + learn_cross_entropy_eval, + learn_cross_entropy_win, + learn_cross_entropy, + learn_entropy_eval, + learn_entropy_win, + learn_entropy); + learn_sum_cross_entropy_eval += learn_cross_entropy_eval; learn_sum_cross_entropy_win += learn_cross_entropy_win; learn_sum_cross_entropy += learn_cross_entropy; @@ -1266,7 +1333,8 @@ namespace Learner Eval::NNUE::update_eval(pos); } - if (illegal_move) { + if (illegal_move) + { sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl; continue; } @@ -1284,7 +1352,6 @@ namespace Learner dj_dw = calc_grad(deep_value, shallow_value, ps); Eval::add_grad(pos, rootColor, dj_dw, without_kpp); #endif - } } @@ -1301,14 +1368,17 @@ namespace Learner // Do not dig a subfolder because I want to save it only once. Eval::save_eval(""); } - else if (is_final) { + else if (is_final) + { Eval::save_eval("final"); return true; } - else { + else + { static int dir_number = 0; const std::string dir_name = std::to_string(dir_number++); Eval::save_eval(dir_name); + #if defined(EVAL_NNUE) if (newbob_decay != 1.0 && latest_loss_count > 0) { static int trials = newbob_num_trials; @@ -1316,22 +1386,28 @@ namespace Learner latest_loss_sum = 0.0; latest_loss_count = 0; cout << "loss: " << latest_loss; - if (latest_loss < best_loss) { + if (latest_loss < best_loss) + { cout << " < best (" << best_loss << "), accepted" << endl; best_loss = latest_loss; best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name); trials = newbob_num_trials; } - else { + else + { cout << " >= best (" << best_loss << "), rejected" << endl; - if (best_nn_directory.empty()) { + if (best_nn_directory.empty()) + { cout << "WARNING: no improvement from initial model" << endl; } - else { + else + { cout << "restoring parameters from " << best_nn_directory << endl; Eval::NNUE::RestoreParameters(best_nn_directory); } - if (--trials > 0 && !is_final) { + + if (--trials > 0 && !is_final) + { cout << "reducing learning rate scale from " << newbob_scale << " to " << (newbob_scale * newbob_decay) << " (" << trials << " more trials)" << endl; @@ -1339,7 +1415,9 @@ namespace Learner Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); } } - if (trials == 0) { + + if (trials == 0) + { cout << "converged" << endl; return true; } @@ -1371,10 +1449,11 @@ namespace Learner // Output progress every 10M phase or when all writing is completed if (((write_sfen_count % buffer_size) == 0) || (write_sfen_count == total_sfen_count)) + { cout << write_sfen_count << " / " << total_sfen_count << endl; + } }; - cout << endl << "write : " << output_file_name << endl; fstream fs(output_file_name, ios::out | ios::binary); @@ -1453,9 +1532,7 @@ namespace Learner auto write_buffer = [&](uint64_t size) { - // shuffle from buf[0] to buf[size-1] - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + Algo::shuffle(buf, prng); // write to a file fstream fs; @@ -1533,13 +1610,8 @@ namespace Learner auto& fs = afs[i]; fs.open(filename, ios::in | ios::binary); - fs.seekg(0, fstream::end); - uint64_t eofPos = (uint64_t)fs.tellg(); - fs.clear(); // Otherwise, the next seek may fail. - fs.seekg(0, fstream::beg); - uint64_t begPos = (uint64_t)fs.tellg(); - uint64_t file_size = eofPos - begPos; - uint64_t sfen_count = file_size / sizeof(PackedSfenValue); + const uint64_t file_size = get_file_size(fs); + const uint64_t sfen_count = file_size / sizeof(PackedSfenValue); a_count[i] = sfen_count; // Output the number of sfen stored in each file. @@ -1578,8 +1650,8 @@ namespace Learner PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); uint64_t size = (uint64_t)buf.size(); std::cout << "shuffle buf.size() = " << size << std::endl; - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + + Algo::shuffle(buf, prng); std::cout << "write : " << output_file_name << endl; diff --git a/src/misc.cpp b/src/misc.cpp index a23b1205..5ef5ecdc 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -627,18 +627,27 @@ void* aligned_malloc(size_t size, size_t align) return p; } +std::uint64_t get_file_size(std::fstream& fs) +{ + auto pos = fs.tellg(); + + fs.seekg(0, fstream::end); + const uint64_t eofPos = (uint64_t)fs.tellg(); + fs.clear(); // Otherwise, the next seek may fail. + fs.seekg(0, fstream::beg); + const uint64_t begPos = (uint64_t)fs.tellg(); + fs.seekg(pos); + + return eofPos - begPos; +} + int read_file_to_memory(std::string filename, std::function callback_func) { fstream fs(filename, ios::in | ios::binary); if (fs.fail()) return 1; - fs.seekg(0, fstream::end); - uint64_t eofPos = (uint64_t)fs.tellg(); - fs.clear(); // Otherwise the next seek may fail. - fs.seekg(0, fstream::beg); - uint64_t begPos = (uint64_t)fs.tellg(); - uint64_t file_size = eofPos - begPos; + const uint64_t file_size = get_file_size(fs); //std::cout << "filename = " << filename << " , file_size = " << file_size << endl; // I know the file size, so call callback_func to get a buffer for this, diff --git a/src/misc.h b/src/misc.h index c918a351..5add3b36 100644 --- a/src/misc.h +++ b/src/misc.h @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include "types.h" @@ -155,6 +157,7 @@ std::string now_string(); // Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size, // Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error. +std::uint64_t get_file_size(std::fstream& fs); int read_file_to_memory(std::string filename, std::function callback_func); int write_memory_to_file(std::string filename, void* ptr, uint64_t size); @@ -199,20 +202,37 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng) // Mathematical function used for progress calculation and learning namespace Math { - // Sigmoid function - // = 1.0 / (1.0 + std::exp(-x)) - double sigmoid(double x); + inline double sigmoid(double x) + { + return 1.0 / (1.0 + std::exp(-x)); + } - // Differentiation of sigmoid function - // = sigmoid(x) * (1.0-sigmoid(x)) - double dsigmoid(double x); + inline double dsigmoid(double x) + { + // Sigmoid function + // f(x) = 1/(1+exp(-x)) + // the first derivative is + // f'(x) = df/dx = f(x)・{ 1-f(x)} + // becomes + + return sigmoid(x) * (1.0 - sigmoid(x)); + } // Clip v so that it fits between [lo,hi]. // * In Stockfish, this function is written in bitboard.h. template constexpr const T& clamp(const T& v, const T& lo, const T& hi) { return v < lo ? lo : v > hi ? hi : v; } +} +namespace Algo { + template + void shuffle(std::vector& buf, Rng&& prng) + { + const auto size = buf.size(); + for (uint64_t i = 0; i < size; ++i) + std::swap(buf[i], buf[prng.rand(size - i) + i]); + } } // --------------------