From 58863c32436c22ea05121e039850253510d923d1 Mon Sep 17 00:00:00 2001 From: noobpwnftw Date: Tue, 8 Sep 2020 11:39:21 +0800 Subject: [PATCH 01/30] Update gensfen.cpp --- src/learn/gensfen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 6c8c455e..4214233b 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -58,7 +58,7 @@ namespace Learner // If hybrid eval is enabled, training data // generation and training don't work well. // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 - static bool use_raw_nnue_eval = true; + extern bool use_raw_nnue_eval; // Helper class for exporting Sfen struct SfenWriter From 832c414b0d78263595b4e7cd6d19c87e61519010 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 7 Sep 2020 23:03:53 +0200 Subject: [PATCH 02/30] First batch of reorganization. --- src/learn/learner.cpp | 402 +++++++++++++++++++++++++----------------- src/misc.cpp | 21 ++- src/misc.h | 32 +++- 3 files changed, 278 insertions(+), 177 deletions(-) diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 7021fd7f..98c8e32e 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -66,7 +66,7 @@ using namespace std; //extern Book::BookMoveSelector book; template -T operator += (std::atomic& x, const T rhs) +T operator +=(std::atomic& x, const T rhs) { T old = x.load(std::memory_order_consume); // It is allowed that the value is rewritten from other thread at this timing. @@ -84,8 +84,9 @@ namespace Learner static bool use_draw_games_in_training = false; static bool use_draw_games_in_validation = false; static bool skip_duplicated_positions_in_training = true; - // 1.0 / PawnValueEg / 4.0 * log(10.0) - static double winning_probability_coefficient = 0.00276753015984861260098316280611; + + static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0); + // Score scale factors. ex) If we set src_score_min_value = 0.0, // src_score_max_value = 1.0, dest_score_min_value = 0.0, // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. @@ -93,6 +94,7 @@ namespace Learner static double src_score_max_value = 1.0; static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; + // Assume teacher signals are the scores of deep searches, and convert them into winning // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training // data directly. In those cases, we set false to this variable. @@ -102,7 +104,7 @@ namespace Learner // generation and training don't work well. // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 // This CANNOT be static since it's used elsewhere. - bool use_raw_nnue_eval = true; + bool use_raw_nnue_eval = false; // Using WDL with win rate model instead of sigmoid static bool use_wdl = false; @@ -111,38 +113,37 @@ namespace Learner // command to learn from the generated game (learn) // ----------------------------------- - // ordinary sigmoid function - double sigmoid(double x) - { - return 1.0 / (1.0 + std::exp(-x)); - } - // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { // 1/(1+10^(-Eval/4)) // = 1/(1+e^(-Eval/4*ln(10)) // = sigmoid(Eval/4*ln(10)) - return sigmoid(value * winning_probability_coefficient); + return Math::sigmoid(value * winning_probability_coefficient); } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage_wdl(double value, int ply) { + constexpr double wdl_total = 1000.0; + constexpr double draw_score = 0.5; + double wdl_w = UCI::win_rate_model_double(value, ply); double wdl_l = UCI::win_rate_model_double(-value, ply); - double wdl_d = 1000.0 - wdl_w - wdl_l; + double wdl_d = wdl_total - wdl_w - wdl_l; - return (wdl_w + wdl_d / 2.0) / 1000.0; + return (wdl_w + wdl_d * draw_score) / wdl_total; } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value, int ply) { - if (use_wdl) { + if (use_wdl) + { return winning_percentage_wdl(value, ply); } - else { + else + { return winning_percentage(value); } } @@ -151,7 +152,7 @@ namespace Learner { double p = deep_win_rate; double q = winning_percentage(shallow_eval, ply); - return -p * std::log(q) - (1 - p) * std::log(1 - q); + return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); } double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) @@ -164,17 +165,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } - double dsigmoid(double x) - { - // Sigmoid function - // f(x) = 1/(1+exp(-x)) - // the first derivative is - // f'(x) = df/dx = f(x)・{ 1-f(x)} - // becomes - - return sigmoid(x) * (1.0 - sigmoid(x)); - } - // When the objective function is the sum of squares of the difference in winning percentage #if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE) // function to calculate the gradient @@ -202,7 +192,7 @@ namespace Learner double p = winning_percentage(deep); double q = winning_percentage(shallow); - return (q - p) * dsigmoid(double(shallow) / 600.0); + return (q - p) * Math::dsigmoid(double(shallow) / 600.0); } #endif @@ -253,39 +243,75 @@ namespace Learner double ELMO_LAMBDA2 = 0.33; double ELMO_LAMBDA_LIMIT = 32000; + // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + double get_scaled_signal(double signal) + { + double scaled_signal = signal; + + // Normalize to [0.0, 1.0]. + scaled_signal = + (scaled_signal - src_score_min_value) + / (src_score_max_value - src_score_min_value); + + // Scale to [dest_score_min_value, dest_score_max_value]. + scaled_signal = + scaled_signal * (dest_score_max_value - dest_score_min_value) + + dest_score_min_value; + + return scaled_signal; + } + + // Teacher winning probability. + double calculate_p(double teacher_signal, int ply) + { + const double scaled_teacher_signal = get_scaled_signal(teacher_signal); + + // Teacher winning probability. + double p = scaled_teacher_signal; + if (convert_teacher_signal_to_winning_probability) + { + p = winning_percentage(scaled_teacher_signal); + } + } + + double calculate_lambda(double teacher_signal) + { + // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. + const double lambda = + (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) + ? ELMO_LAMBDA2 + : ELMO_LAMBDA; + + return lambda; + } + + double calculate_t(int game_result) + { + // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. + // game_result = 1,0,-1 so add 1 and divide by 2. + const double t = double(game_result + 1) * 0.5; + + return t; + } + double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv) { // elmo (WCSC27) method // Correct with the actual game wins and losses. - - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - const double q = winning_percentage(shallow, psv.gamePly); - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal, psv.gamePly); - } - - // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. - // game_result = 1,0,-1 so add 1 and divide by 2. - const double t = double(psv.game_result + 1) / 2; - - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; + const double p = calculate_p(teacher_signal, psv.gamePly); + const double t = calculate_t(psv.game_result); + const double lambda = calculate_lambda(teacher_signal); double grad; - if (use_wdl) { - double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); - double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); + if (use_wdl) + { + const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); + const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); grad = lambda * dce_p + (1.0 - lambda) * dce_t; } - else { + else + { // Use the actual win rate as a correction term. // This is the idea of ​​elmo (WCSC27), modern O-parts. grad = lambda * (q - p) + (1.0 - lambda) * (q - t); @@ -296,30 +322,25 @@ namespace Learner // Calculate cross entropy during learning // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win. - void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv, - double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, - double& entropy_eval, double& entropy_win, double& entropy) + void calc_cross_entropy( + Value teacher_signal, + Value shallow, + const PackedSfenValue& psv, + double& cross_entropy_eval, + double& cross_entropy_win, + double& cross_entropy, + double& entropy_eval, + double& entropy_win, + double& entropy) { - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal); - } - const double q /* eval_winrate */ = winning_percentage(shallow); - const double t = double(psv.game_result + 1) / 2; + const double q = winning_percentage(shallow, psv.gamePly); + const double p = calculate_p(teacher_signal, psv.gamePly); + const double t = calculate_t(psv.game_result); + const double lambda = calculate_lambda(teacher_signal); constexpr double epsilon = 0.000001; - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; - const double m = (1.0 - lambda) * t + lambda * p; cross_entropy_eval = @@ -343,7 +364,8 @@ namespace Learner // Other variations may be prepared as the objective function.. - double calc_grad(Value shallow, const PackedSfenValue& psv) { + double calc_grad(Value shallow, const PackedSfenValue& psv) + { return calc_grad((Value)psv.score, shallow, psv); } @@ -363,8 +385,14 @@ namespace Learner // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE. static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE; + // hash to limit the reading of the same situation + // Is there too many 64 million phases? Or Not really.. + // It must be 2**N because it will be used as the mask to calculate hash_index. + static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; + // Do not use std::random_device(). Because it always the same integers on MinGW. - SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count()) + SfenReader(int thread_num) : + prng(std::chrono::system_clock::now().time_since_epoch().count()) { packed_sfens.resize(thread_num); total_read = 0; @@ -398,6 +426,7 @@ namespace Learner cout << "Error! read packed sfen , failed." << endl; break; } + sfen_for_mse.push_back(ps); // Get the hash key. @@ -418,8 +447,10 @@ namespace Learner { if (eval_limit < abs(p.score)) continue; + if (!use_draw_games_in_validation && p.game_result == 0) continue; + sfen_for_mse.push_back(p); } else @@ -436,7 +467,7 @@ namespace Learner auto& thread_ps = packed_sfens[thread_id]; // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish. - if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it. + if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it. && !read_to_thread_buffer_impl(thread_id)) return false; @@ -444,11 +475,11 @@ namespace Learner // Since the filling of the thread buffer with the phase has been completed successfully // thread_ps->rbegin() is alive. - ps = *(thread_ps->rbegin()); + ps = thread_ps->back(); thread_ps->pop_back(); // If you've run out of buffers, call delete yourself to free this buffer. - if (thread_ps->size() == 0) + if (thread_ps->empty()) { thread_ps.reset(); } @@ -507,7 +538,7 @@ namespace Learner return false; // Get the next file name. - string filename = *filenames.rbegin(); + string filename = filenames.back(); filenames.pop_back(); fs.open(filename, ios::in | ios::binary); @@ -523,6 +554,7 @@ namespace Learner // This size() is read only, so you don't need to lock it. while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE) sleep(100); + if (stop_flag) return; @@ -555,9 +587,7 @@ namespace Learner if (!no_shuffle) { - auto size = sfens.size(); - for (size_t i = 0; i < size; ++i) - swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]); + Algo::shuffle(sfens, prng); } // Divide this by THREAD_BUFFER_SIZE. There should be size pieces. @@ -591,6 +621,13 @@ namespace Learner } } + // Determine if it is a phase for calculating rmse. + // (The computational aspects of rmse should not be used for learning.) + bool is_for_rmse(Key key) const + { + return sfen_for_mse_hash.count(key) != 0; + } + // sfen files vector filenames; @@ -613,17 +650,6 @@ namespace Learner bool stop_flag; - // Determine if it is a phase for calculating rmse. - // (The computational aspects of rmse should not be used for learning.) - bool is_for_rmse(Key key) const - { - return sfen_for_mse_hash.count(key) != 0; - } - - // hash to limit the reading of the same situation - // Is there too many 64 million phases? Or Not really.. - // It must be 2**N because it will be used as the mask to calculate hash_index. - static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; vector hash; // 64MB*8 = 512MB // test phase for mse calculation @@ -663,7 +689,10 @@ namespace Learner // Class to generate sfen with multiple threads struct LearnerThink : public MultiThink { - LearnerThink(SfenReader& sr_) :sr(sr_), stop_flag(false), save_only_once(false) + LearnerThink(SfenReader& sr_) : + sr(sr_), + stop_flag(false), + save_only_once(false) { #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) learn_sum_cross_entropy_eval = 0.0; @@ -686,7 +715,12 @@ namespace Learner virtual void thread_worker(size_t thread_id); // Start a thread that loads the phase file in the background. - void start_file_read_worker() { sr.start_file_read_worker(); } + void start_file_read_worker() + { + sr.start_file_read_worker(); + } + + Value get_shallow_value(Position& task_pos); // save merit function parameters to a file bool save(bool is_final = false); @@ -753,6 +787,33 @@ namespace Learner TaskDispatcher task_dispatcher; }; + Value LearnerThink::get_shallow_value(Position& task_pos) + { + // Evaluation value for shallow search + // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and + // Use qsearch() because it is difficult to compare the values. + // EvalHash has been disabled in advance. (If not, the same value will be returned every time) + const auto [_, pv] = qsearch(task_pos); + + std::vector> states(pv.size()); + for (size_t i = 0; i < pv.size(); ++i) + { + task_pos.do_move(pv[i], states[i]); + Eval::NNUE::update_eval(task_pos); + } + + const auto rootColor = task_pos.side_to_move(); + const Value shallow_value = + (rootColor == task_pos.side_to_move()) + ? Eval::evaluate(task_pos) + : -Eval::evaluate(task_pos); + + for (auto it = pv.rbegin(); it != pv.rend(); ++it) + task_pos.undo_move(*it); + + return shallow_value; + } + void LearnerThink::calc_loss(size_t thread_id, uint64_t done) { // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated. @@ -800,8 +861,6 @@ namespace Learner pos.set(StartFEN, false, &si, th); std::cout << "hirate eval = " << Eval::evaluate(pos); - //Eval::print_eval_stat(pos); - // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished. // I created a mechanism to call task, so I will use it. @@ -818,6 +877,7 @@ namespace Learner // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one. auto task = [ + this, &ps, &test_sum_cross_entropy_eval, &test_sum_cross_entropy_win, @@ -830,7 +890,6 @@ namespace Learner &move_accord_count ](size_t task_thread_id) { - // Does C++ properly capture a new ps instance for each loop?. auto task_th = Threads[task_thread_id]; auto& task_pos = task_th->rootPos; StateInfo task_si; @@ -840,26 +899,7 @@ namespace Learner cout << "Error! : illegal packed sfen " << task_pos.fen() << endl; } - // Evaluation value for shallow search - // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and - // Use qsearch() because it is difficult to compare the values. - // EvalHash has been disabled in advance. (If not, the same value will be returned every time) - auto task_search_result = qsearch(task_pos); - - auto shallow_value = task_search_result.first; - { - const auto rootColor = task_pos.side_to_move(); - const auto pv = task_search_result.second; - std::vector> states(pv.size()); - for (size_t i = 0; i < pv.size(); ++i) - { - task_pos.do_move(pv[i], states[i]); - Eval::NNUE::update_eval(task_pos); - } - shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos); - for (auto it = pv.rbegin(); it != pv.rend(); ++it) - task_pos.undo_move(*it); - } + const Value shallow_value = get_shallow_value(task_pos); // Evaluation value of deep search auto deep_value = (Value)ps.score; @@ -887,7 +927,17 @@ namespace Learner #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; double test_entropy_eval, test_entropy_win, test_entropy; - calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy); + calc_cross_entropy( + deep_value, + shallow_value, + ps, + test_cross_entropy_eval, + test_cross_entropy_win, + test_cross_entropy, + test_entropy_eval, + test_entropy_win, + test_entropy); + // The total cross entropy need not be abs() by definition. test_sum_cross_entropy_eval += test_cross_entropy_eval; test_sum_cross_entropy_win += test_cross_entropy_win; @@ -900,8 +950,8 @@ namespace Learner // Determine if the teacher's move and the score of the shallow search match { - auto r = search(task_pos, 1); - if ((uint16_t)r.second[0] == ps.move) + const auto [value, pv] = search(task_pos, 1); + if ((uint16_t)pv[0] == ps.move) move_accord_count.fetch_add(1, std::memory_order_relaxed); } @@ -950,6 +1000,7 @@ namespace Learner << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size() << " , norm = " << sum_norm << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"; + if (done != static_cast(-1)) { cout @@ -962,7 +1013,8 @@ namespace Learner } cout << endl; } - else { + else + { cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl; } @@ -978,7 +1030,6 @@ namespace Learner #endif } - void LearnerThink::thread_worker(size_t thread_id) { #if defined(_OPENMP) @@ -1092,7 +1143,9 @@ namespace Learner } PackedSfenValue ps; - RetryRead:; + + RETRY_READ:; + if (!sr.read_to_thread_buffer(thread_id, ps)) { // ran out of thread pool for my thread. @@ -1106,16 +1159,14 @@ namespace Learner // The evaluation value exceeds the learning target value. // Ignore this aspect information. if (eval_limit < abs(ps.score)) - goto RetryRead; - + goto RETRY_READ; if (!use_draw_games_in_training && ps.game_result == 0) - goto RetryRead; - + goto RETRY_READ; // Skip over the opening phase if (ps.gamePly < prng.rand(reduction_gameply)) - goto RetryRead; + goto RETRY_READ; #if 0 auto sfen = pos.sfen_unpack(ps.data); @@ -1129,20 +1180,24 @@ namespace Learner // I got a strange sfen. Should be debugged! // Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not. cout << "Error! : illigal packed sfen = " << pos.fen() << endl; - goto RetryRead; + goto RETRY_READ; } + #if !defined(EVAL_NNUE) + if (skip_duplicated_positions_in_training) { - auto key = pos.key(); + const auto key = pos.key(); + // Exclude the phase used for rmse calculation. - if (sr.is_for_rmse(key) && skip_duplicated_positions_in_training) - goto RetryRead; + if (sr.is_for_rmse(key)) + goto RETRY_READ; // Exclude the most recently used aspect. - auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1)); - auto key2 = sr.hash[hash_index]; - if (key == key2 && skip_duplicated_positions_in_training) - goto RetryRead; + const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1)); + const auto key2 = sr.hash[hash_index]; + if (key == key2) + goto RETRY_READ; + sr.hash[hash_index] = key; // Replace with the current key. } #endif @@ -1152,22 +1207,21 @@ namespace Learner // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine) // Skip the position if there are no legal moves (=checkmated or stalemate). if (MoveList(pos).size() == 0) - goto RetryRead; + goto RETRY_READ; // I can read it, so try displaying it. // cout << pos << value << endl; // Evaluation value of shallow search (qsearch) - auto r = qsearch(pos); - auto pv = r.second; + const auto [shallow_value, pv] = qsearch(pos); // Evaluation value of deep search - auto deep_value = (Value)ps.score; + const auto deep_value = (Value)ps.score; // I feel that the mini batch has a better gradient. // Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation. - auto rootColor = pos.side_to_move(); + const auto rootColor = pos.side_to_move(); // If the initial PV is different, it is better not to use it for learning. // If it is the result of searching a completely different place, it may become noise. @@ -1203,13 +1257,26 @@ namespace Learner // I don't think this is a very desirable property, as the aspect that gives that gradient will be different. // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc... - Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); + const Value shallow_value = + (rootColor == pos.side_to_move()) + ? Eval::evaluate(pos) + : -Eval::evaluate(pos); #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // Calculate loss for training data double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy; double learn_entropy_eval, learn_entropy_win, learn_entropy; - calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy); + calc_cross_entropy( + deep_value, + shallow_value, + ps, + learn_cross_entropy_eval, + learn_cross_entropy_win, + learn_cross_entropy, + learn_entropy_eval, + learn_entropy_win, + learn_entropy); + learn_sum_cross_entropy_eval += learn_cross_entropy_eval; learn_sum_cross_entropy_win += learn_cross_entropy_win; learn_sum_cross_entropy += learn_cross_entropy; @@ -1266,7 +1333,8 @@ namespace Learner Eval::NNUE::update_eval(pos); } - if (illegal_move) { + if (illegal_move) + { sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl; continue; } @@ -1284,7 +1352,6 @@ namespace Learner dj_dw = calc_grad(deep_value, shallow_value, ps); Eval::add_grad(pos, rootColor, dj_dw, without_kpp); #endif - } } @@ -1301,14 +1368,17 @@ namespace Learner // Do not dig a subfolder because I want to save it only once. Eval::save_eval(""); } - else if (is_final) { + else if (is_final) + { Eval::save_eval("final"); return true; } - else { + else + { static int dir_number = 0; const std::string dir_name = std::to_string(dir_number++); Eval::save_eval(dir_name); + #if defined(EVAL_NNUE) if (newbob_decay != 1.0 && latest_loss_count > 0) { static int trials = newbob_num_trials; @@ -1316,22 +1386,28 @@ namespace Learner latest_loss_sum = 0.0; latest_loss_count = 0; cout << "loss: " << latest_loss; - if (latest_loss < best_loss) { + if (latest_loss < best_loss) + { cout << " < best (" << best_loss << "), accepted" << endl; best_loss = latest_loss; best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name); trials = newbob_num_trials; } - else { + else + { cout << " >= best (" << best_loss << "), rejected" << endl; - if (best_nn_directory.empty()) { + if (best_nn_directory.empty()) + { cout << "WARNING: no improvement from initial model" << endl; } - else { + else + { cout << "restoring parameters from " << best_nn_directory << endl; Eval::NNUE::RestoreParameters(best_nn_directory); } - if (--trials > 0 && !is_final) { + + if (--trials > 0 && !is_final) + { cout << "reducing learning rate scale from " << newbob_scale << " to " << (newbob_scale * newbob_decay) << " (" << trials << " more trials)" << endl; @@ -1339,7 +1415,9 @@ namespace Learner Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); } } - if (trials == 0) { + + if (trials == 0) + { cout << "converged" << endl; return true; } @@ -1371,10 +1449,11 @@ namespace Learner // Output progress every 10M phase or when all writing is completed if (((write_sfen_count % buffer_size) == 0) || (write_sfen_count == total_sfen_count)) + { cout << write_sfen_count << " / " << total_sfen_count << endl; + } }; - cout << endl << "write : " << output_file_name << endl; fstream fs(output_file_name, ios::out | ios::binary); @@ -1453,9 +1532,7 @@ namespace Learner auto write_buffer = [&](uint64_t size) { - // shuffle from buf[0] to buf[size-1] - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + Algo::shuffle(buf, prng); // write to a file fstream fs; @@ -1533,13 +1610,8 @@ namespace Learner auto& fs = afs[i]; fs.open(filename, ios::in | ios::binary); - fs.seekg(0, fstream::end); - uint64_t eofPos = (uint64_t)fs.tellg(); - fs.clear(); // Otherwise, the next seek may fail. - fs.seekg(0, fstream::beg); - uint64_t begPos = (uint64_t)fs.tellg(); - uint64_t file_size = eofPos - begPos; - uint64_t sfen_count = file_size / sizeof(PackedSfenValue); + const uint64_t file_size = get_file_size(fs); + const uint64_t sfen_count = file_size / sizeof(PackedSfenValue); a_count[i] = sfen_count; // Output the number of sfen stored in each file. @@ -1578,8 +1650,8 @@ namespace Learner PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); uint64_t size = (uint64_t)buf.size(); std::cout << "shuffle buf.size() = " << size << std::endl; - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + + Algo::shuffle(buf, prng); std::cout << "write : " << output_file_name << endl; diff --git a/src/misc.cpp b/src/misc.cpp index a23b1205..5ef5ecdc 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -627,18 +627,27 @@ void* aligned_malloc(size_t size, size_t align) return p; } +std::uint64_t get_file_size(std::fstream& fs) +{ + auto pos = fs.tellg(); + + fs.seekg(0, fstream::end); + const uint64_t eofPos = (uint64_t)fs.tellg(); + fs.clear(); // Otherwise, the next seek may fail. + fs.seekg(0, fstream::beg); + const uint64_t begPos = (uint64_t)fs.tellg(); + fs.seekg(pos); + + return eofPos - begPos; +} + int read_file_to_memory(std::string filename, std::function callback_func) { fstream fs(filename, ios::in | ios::binary); if (fs.fail()) return 1; - fs.seekg(0, fstream::end); - uint64_t eofPos = (uint64_t)fs.tellg(); - fs.clear(); // Otherwise the next seek may fail. - fs.seekg(0, fstream::beg); - uint64_t begPos = (uint64_t)fs.tellg(); - uint64_t file_size = eofPos - begPos; + const uint64_t file_size = get_file_size(fs); //std::cout << "filename = " << filename << " , file_size = " << file_size << endl; // I know the file size, so call callback_func to get a buffer for this, diff --git a/src/misc.h b/src/misc.h index c918a351..5add3b36 100644 --- a/src/misc.h +++ b/src/misc.h @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include "types.h" @@ -155,6 +157,7 @@ std::string now_string(); // Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size, // Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error. +std::uint64_t get_file_size(std::fstream& fs); int read_file_to_memory(std::string filename, std::function callback_func); int write_memory_to_file(std::string filename, void* ptr, uint64_t size); @@ -199,20 +202,37 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng) // Mathematical function used for progress calculation and learning namespace Math { - // Sigmoid function - // = 1.0 / (1.0 + std::exp(-x)) - double sigmoid(double x); + inline double sigmoid(double x) + { + return 1.0 / (1.0 + std::exp(-x)); + } - // Differentiation of sigmoid function - // = sigmoid(x) * (1.0-sigmoid(x)) - double dsigmoid(double x); + inline double dsigmoid(double x) + { + // Sigmoid function + // f(x) = 1/(1+exp(-x)) + // the first derivative is + // f'(x) = df/dx = f(x)・{ 1-f(x)} + // becomes + + return sigmoid(x) * (1.0 - sigmoid(x)); + } // Clip v so that it fits between [lo,hi]. // * In Stockfish, this function is written in bitboard.h. template constexpr const T& clamp(const T& v, const T& lo, const T& hi) { return v < lo ? lo : v > hi ? hi : v; } +} +namespace Algo { + template + void shuffle(std::vector& buf, Rng&& prng) + { + const auto size = buf.size(); + for (uint64_t i = 0; i < size; ++i) + std::swap(buf[i], buf[prng.rand(size - i) + i]); + } } // -------------------- From 1482e5215afa1b457418d45805bb57a25f4529f4 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 7 Sep 2020 23:26:38 +0200 Subject: [PATCH 03/30] A second batch of code reorganization. --- src/Makefile | 1 - src/learn/convert.cpp | 10 +-- src/learn/gensfen.cpp | 8 +- src/learn/gensfen2019.cpp | 1 - src/learn/learn.h | 56 ++++++------- src/learn/learner.cpp | 170 +++++++++++++++----------------------- 6 files changed, 96 insertions(+), 150 deletions(-) delete mode 100644 src/learn/gensfen2019.cpp diff --git a/src/Makefile b/src/Makefile index 9db13e44..ca851dba 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp nnue/features/enpassant.cpp \ nnue/nnue_test_command.cpp \ extra/sfen_packer.cpp \ - learn/gensfen2019.cpp \ learn/learner.cpp \ learn/gensfen.cpp \ learn/convert.cpp \ diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp index b84dc2f8..9bd9548d 100644 --- a/src/learn/convert.cpp +++ b/src/learn/convert.cpp @@ -25,20 +25,12 @@ #include #include #include +#include #if defined (_OPENMP) #include #endif -#if defined(_MSC_VER) -// The C++ filesystem cannot be used unless it is C++17 or later or MSVC. -// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well. -// Use dirent.h because there is no help for it. -#include -#elif defined(__GNUC__) -#include -#endif - using namespace std; namespace Learner diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 4214233b..b049192e 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -28,18 +28,12 @@ #include #include #include +#include #if defined (_OPENMP) #include #endif -#if defined(_MSC_VER) -// std::filesystem doesn't work on GCC even though it claims to support C++17. -#include -#elif defined(__GNUC__) -#include -#endif - #if defined(EVAL_NNUE) #include "../nnue/evaluate_nnue_learner.h" #include diff --git a/src/learn/gensfen2019.cpp b/src/learn/gensfen2019.cpp deleted file mode 100644 index 01293b9c..00000000 --- a/src/learn/gensfen2019.cpp +++ /dev/null @@ -1 +0,0 @@ -// just a place holder diff --git a/src/learn/learn.h b/src/learn/learn.h index e29ed74a..1bc39cf9 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -27,30 +27,6 @@ // SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is... // #define SGD_UPDATE -// ---------------------- -// Settings for learning -// ---------------------- - -// mini-batch size. -// Calculate the gradient by combining this number of phases. -// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect. -// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately. -// I don't think you need to change this value in most cases. - -#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1) - -// The number of phases to read from the file at one time. After reading this much, shuffle. -// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase. -// Must be a multiple of THREAD_BUFFER_SIZE(=10000). - -#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10) - -// Saving interval of evaluation function at learning. Save each time you learn this number of phases. -// Needless to say, the longer the saving interval, the shorter the learning time. -// Folder name is incremented for each save like 0/, 1/, 2/... -// By default, once every 1 billion phases. -#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL) - // ---------------------- // Select the objective function @@ -79,10 +55,6 @@ // debug settings for learning // ---------------------- -// Reduce the output of rmse during learning to 1 for this number of times. -// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective. -#define LEARN_RMSE_OUTPUT_INTERVAL 1 - // ---------------------- // learning from zero vector @@ -205,6 +177,34 @@ typedef float LearnFloatType; namespace Learner { + // ---------------------- + // Settings for learning + // ---------------------- + + // mini-batch size. + // Calculate the gradient by combining this number of phases. + // If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect. + // If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately. + // I don't think you need to change this value in most cases. + + constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1; + + // The number of phases to read from the file at one time. After reading this much, shuffle. + // It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase. + // Must be a multiple of THREAD_BUFFER_SIZE(=10000). + + constexpr std::size_t LEARN_SFEN_READ_SIZE = 1000 * 1000 * 10; + + // Saving interval of evaluation function at learning. Save each time you learn this number of phases. + // Needless to say, the longer the saving interval, the shorter the learning time. + // Folder name is incremented for each save like 0/, 1/, 2/... + // By default, once every 1 billion phases. + constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 1000000000ULL; + + // Reduce the output of rmse during learning to 1 for this number of times. + // rmse calculation is done in one thread, so it takes some time, so reducing the output is effective. + constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1; + //Structure in which PackedSfen and evaluation value are integrated // If you write different contents for each option, it will be a problem when reusing the teacher game // For the time being, write all the following members regardless of the options. diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 98c8e32e..ddfaff5a 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -45,15 +45,6 @@ #include #endif -#if defined(_MSC_VER) -// The C++ filesystem cannot be used unless it is C++17 or later or MSVC. -// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well. -// Use dirent.h because there is no help for it. -#include -#elif defined(__GNUC__) -#include -#endif - #if defined(EVAL_NNUE) #include "../nnue/evaluate_nnue_learner.h" #include @@ -62,8 +53,11 @@ using namespace std; -//// This is defined in the search section. -//extern Book::BookMoveSelector book; + +#if defined(USE_BOOK) +// This is defined in the search section. +extern Book::BookMoveSelector book; +#endif template T operator +=(std::atomic& x, const T rhs) @@ -128,9 +122,9 @@ namespace Learner constexpr double wdl_total = 1000.0; constexpr double draw_score = 0.5; - double wdl_w = UCI::win_rate_model_double(value, ply); - double wdl_l = UCI::win_rate_model_double(-value, ply); - double wdl_d = wdl_total - wdl_w - wdl_l; + const double wdl_w = UCI::win_rate_model_double(value, ply); + const double wdl_l = UCI::win_rate_model_double(-value, ply); + const double wdl_d = wdl_total - wdl_w - wdl_l; return (wdl_w + wdl_d * draw_score) / wdl_total; } @@ -150,16 +144,17 @@ namespace Learner double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) { - double p = deep_win_rate; - double q = winning_percentage(shallow_eval, ply); + const double p = deep_win_rate; + const double q = winning_percentage(shallow_eval, ply); return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); } double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) { constexpr double epsilon = 0.000001; - double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply); - double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply); + + const double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply); + const double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply); // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate return ((y2 - y1) / epsilon) / winning_probability_coefficient; @@ -190,8 +185,8 @@ namespace Learner // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. // Therefore, it is not necessary to save it in memory. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + const double p = winning_percentage(deep, psv.gamePly); + const double q = winning_percentage(shallow, psv.gamePly); return (q - p) * Math::dsigmoid(double(shallow) / 600.0); } #endif @@ -216,8 +211,8 @@ namespace Learner // = ... // = q-p. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + const double p = winning_percentage(deep, psv.gamePly); + const double q = winning_percentage(shallow, psv.gamePly); return q - p; } @@ -270,8 +265,10 @@ namespace Learner double p = scaled_teacher_signal; if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal); + p = winning_percentage(scaled_teacher_signal, ply); } + + return p; } double calculate_lambda(double teacher_signal) @@ -534,7 +531,7 @@ namespace Learner fs.close(); // no more - if (filenames.size() == 0) + if (filenames.empty()) return false; // Get the next file name. @@ -543,6 +540,7 @@ namespace Learner fs.open(filename, ios::in | ios::binary); cout << "open filename = " << filename << endl; + assert(fs); return true; @@ -569,16 +567,12 @@ namespace Learner { sfens.push_back(p); } - else + else if(!open_next_file()) { - // read failure - if (!open_next_file()) - { - // There was no next file. Abon. - cout << "..end of files." << endl; - end_of_files = true; - return; - } + // There was no next file. Abon. + cout << "..end of files." << endl; + end_of_files = true; + return; } } @@ -702,6 +696,7 @@ namespace Learner learn_sum_entropy_win = 0.0; learn_sum_entropy = 0.0; #endif + #if defined(EVAL_NNUE) newbob_scale = 1.0; newbob_decay = 1.0; @@ -1213,7 +1208,7 @@ namespace Learner // cout << pos << value << endl; // Evaluation value of shallow search (qsearch) - const auto [shallow_value, pv] = qsearch(pos); + const auto [_, pv] = qsearch(pos); // Evaluation value of deep search const auto deep_value = (Value)ps.score; @@ -1408,9 +1403,11 @@ namespace Learner if (--trials > 0 && !is_final) { - cout << "reducing learning rate scale from " << newbob_scale + cout + << "reducing learning rate scale from " << newbob_scale << " to " << (newbob_scale * newbob_decay) << " (" << trials << " more trials)" << endl; + newbob_scale *= newbob_decay; Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); } @@ -1432,10 +1429,10 @@ namespace Learner // prng: random number // afs: fstream of each teacher phase file // a_count: The number of teacher positions inherent in each file. - void shuffle_write(const string& output_file_name, PRNG& prng, vector& afs, vector& a_count) + void shuffle_write(const string& output_file_name, PRNG& prng, vector& sfen_file_streams, vector& sfen_count_in_file) { uint64_t total_sfen_count = 0; - for (auto c : a_count) + for (auto c : sfen_count_in_file) total_sfen_count += c; // number of exported phases @@ -1459,39 +1456,39 @@ namespace Learner fstream fs(output_file_name, ios::out | ios::binary); // total teacher positions - uint64_t sum = 0; - for (auto c : a_count) - sum += c; + uint64_t sfen_count_left = total_sfen_count; - while (sum != 0) + while (sfen_count_left != 0) { - auto r = prng.rand(sum); + auto r = prng.rand(sfen_count_left); // Aspects stored in fs[0] file ... Aspects stored in fs[1] file ... //Think of it as a series like, and determine in which file r is pointing. // The contents of the file are shuffled, so you can take the next element from that file. // Each file has a_count[x] phases, so this process can be written as follows. - uint64_t n = 0; - while (a_count[n] <= r) - r -= a_count[n++]; + uint64_t i = 0; + while (sfen_count_in_file[i] <= r) + r -= sfen_count_in_file[i++]; // This confirms n. Before you forget it, reduce the remaining number. - --a_count[n]; - --sum; + --sfen_count_in_file[i]; + --sfen_count_left; PackedSfenValue psv; // It's better to read and write all at once until the performance is not so good... - if (afs[n].read((char*)&psv, sizeof(PackedSfenValue))) + if (sfen_file_streams[i].read((char*)&psv, sizeof(PackedSfenValue))) { fs.write((char*)&psv, sizeof(PackedSfenValue)); ++write_sfen_count; print_status(); } } + print_status(); fs.close(); + cout << "done!" << endl; } @@ -1509,8 +1506,8 @@ namespace Learner // There should have been a limit of 512 per process on Windows, so you can open here as 500, // The current setting is 500 files x 20M = 10G = 10 billion phases. - PSVector buf; - buf.resize(buffer_size); + PSVector buf(buffer_size); + // ↑ buffer, a marker that indicates how much you have used uint64_t buf_write_marker = 0; @@ -1537,7 +1534,7 @@ namespace Learner // write to a file fstream fs; fs.open(make_filename(write_file_count++), ios::out | ios::binary); - fs.write((char*)&buf[0], size * sizeof(PackedSfenValue)); + fs.write(reinterpret_cast(buf.data()), size * sizeof(PackedSfenValue)); fs.close(); a_count.push_back(size); @@ -1552,14 +1549,13 @@ namespace Learner { fstream fs(filename, ios::in | ios::binary); cout << endl << "open file = " << filename; - while (fs.read((char*)&buf[buf_write_marker], sizeof(PackedSfenValue))) + while (fs.read(reinterpret_cast(&buf[buf_write_marker]), sizeof(PackedSfenValue))) if (++buf_write_marker == buffer_size) write_buffer(buffer_size); // Read in units of sizeof(PackedSfenValue), // Ignore the last remaining fraction. (Fails in fs.read, so exit while) // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.) - } if (buf_write_marker != 0) @@ -1599,20 +1595,20 @@ namespace Learner size_t file_count = filenames.size(); // Number of teacher positions stored in each file in filenames - vector a_count(file_count); + vector sfen_count_in_file(file_count); // Count the number of teacher aspects in each file. - vector afs(file_count); + vector sfen_file_streams(file_count); for (size_t i = 0; i < file_count; ++i) { auto filename = filenames[i]; - auto& fs = afs[i]; + auto& fs = sfen_file_streams[i]; fs.open(filename, ios::in | ios::binary); const uint64_t file_size = get_file_size(fs); const uint64_t sfen_count = file_size / sizeof(PackedSfenValue); - a_count[i] = sfen_count; + sfen_count_in_file[i] = sfen_count; // Output the number of sfen stored in each file. cout << filename << " = " << sfen_count << " sfens." << endl; @@ -1624,7 +1620,7 @@ namespace Learner // Now you have shuffled. // Throw to the subcontract function and end. - shuffle_write(output_file_name, prng, afs, a_count); + shuffle_write(output_file_name, prng, sfen_file_streams, sfen_count_in_file); } // Subcontracting the teacher shuffle "learn shufflem" command. @@ -1656,7 +1652,10 @@ namespace Learner std::cout << "write : " << output_file_name << endl; // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper. - write_memory_to_file(output_file_name, (void*)&buf[0], (uint64_t)sizeof(PackedSfenValue) * (uint64_t)buf.size()); + write_memory_to_file( + output_file_name, + (void*)&buf[0], + sizeof(PackedSfenValue) * buf.size()); std::cout << "..shuffle_on_memory done." << std::endl; } @@ -1664,7 +1663,7 @@ namespace Learner // Learning from the generated game record void learn(Position&, istringstream& is) { - auto thread_num = (int)Options["Threads"]; + const auto thread_num = (int)Options["Threads"]; SfenReader sr(thread_num); LearnerThink learn_think(sr); @@ -1889,13 +1888,6 @@ namespace Learner { string kif_base_dir = Path::Combine(base_dir, target_dir); - // Remove this folder. Keep it relative to base_dir. -#if defined(_MSC_VER) - // If you use std::tr2, warning C4996 will appear, so suppress it. - // * std::tr2 issued a deprecation warning by default under std:c++14, and was deleted by default in /std:c++17. -#pragma warning(push) -#pragma warning(disable:4996) - namespace sys = std::filesystem; sys::path p(kif_base_dir); // Origin of enumeration std::for_each(sys::directory_iterator(p), sys::directory_iterator(), @@ -1903,36 +1895,6 @@ namespace Learner if (sys::is_regular_file(p)) filenames.push_back(Path::Combine(target_dir, p.filename().generic_string())); }); -#pragma warning(pop) - -#elif defined(__GNUC__) - - auto ends_with = [](std::string const& value, std::string const& ending) - { - if (ending.size() > value.size()) return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); - }; - - // It can't be helped, so read it using dirent.h. - DIR* dp; // pointer to directory - dirent* entry; // entry point returned by readdir() - - dp = opendir(kif_base_dir.c_str()); - if (dp != NULL) - { - do { - entry = readdir(dp); - // Only list files ending with ".bin" - // →I hate this restriction when generating files with serial numbers... - if (entry != NULL && ends_with(entry->d_name, ".bin")) - { - //cout << entry->d_name << endl; - filenames.push_back(Path::Combine(target_dir, entry->d_name)); - } - } while (entry != NULL); - closedir(dp); - } -#endif } cout << "learn from "; @@ -1990,6 +1952,7 @@ namespace Learner dest_score_max_value, check_invalid_fen, check_illegal_move); + return; } @@ -1997,7 +1960,12 @@ namespace Learner { Eval::init_NNUE(); cout << "convert_bin_from_pgn-extract.." << endl; - convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move, convert_no_eval_fens_as_score_zero); + convert_bin_from_pgn_extract( + filenames, + output_file_name, + pgn_eval_side_to_move, + convert_no_eval_fens_as_score_zero); + return; } @@ -2154,12 +2122,6 @@ namespace Learner #endif } - } // namespace Learner -#if defined(GENSFEN2019) -#include "gensfen2019.cpp" -#endif - - #endif // EVAL_LEARN From a0b2d6a01e39627e9ea87b234a18067e4e404faf Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 7 Sep 2020 23:33:32 +0200 Subject: [PATCH 04/30] Note a potential defect in sfen packer. --- src/extra/sfen_packer.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp index ac789ce8..fd013fa2 100644 --- a/src/extra/sfen_packer.cpp +++ b/src/extra/sfen_packer.cpp @@ -218,7 +218,7 @@ struct SfenPacker PieceType pr = type_of(pc); auto c = huffman_table[pr]; stream.write_n_bit(c.code, c.bits); - + if (pc == NO_PIECE) return; @@ -249,7 +249,7 @@ struct SfenPacker // first and second flag Color c = (Color)stream.read_one_bit(); - + return make_piece(c, pr); } }; @@ -266,7 +266,10 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre { SfenPacker packer; auto& stream = packer.stream; - stream.set_data((uint8_t*)&sfen); + + // TODO: separate streams for writing and reading. Here we actually have to + // const_cast which is not safe in the long run. + stream.set_data(const_cast(&sfen)); std::memset(this, 0, sizeof(Position)); std::memset(si, 0, sizeof(StateInfo)); From 0202218f58467dac447b73b7724158ebec4a221f Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 7 Sep 2020 23:34:13 +0200 Subject: [PATCH 05/30] fix cast --- src/extra/sfen_packer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp index fd013fa2..1d82111d 100644 --- a/src/extra/sfen_packer.cpp +++ b/src/extra/sfen_packer.cpp @@ -269,7 +269,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre // TODO: separate streams for writing and reading. Here we actually have to // const_cast which is not safe in the long run. - stream.set_data(const_cast(&sfen)); + stream.set_data(const_cast(reinterpret_cast(&sfen))); std::memset(this, 0, sizeof(Position)); std::memset(si, 0, sizeof(StateInfo)); From 41b7674aee3920cb72554f8d22eb4e2cb6c57e09 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Mon, 7 Sep 2020 23:55:07 +0200 Subject: [PATCH 06/30] Improve comments, break long lines. --- src/learn/learner.cpp | 321 ++++++++++++++++++++++++++++-------------- src/misc.h | 1 + 2 files changed, 213 insertions(+), 109 deletions(-) diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index ddfaff5a..f9d188b8 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -1,18 +1,24 @@ -// learning routines +// Learning routines: // -// 1) Automatic generation of game records +// 1) Automatic generation of game records in .bin format // → "gensfen" command -// 2) Learning evaluation function parameters from the generated game record +// +// 2) Learning evaluation function parameters from the generated .bin files // → "learn" command +// // → Shuffle in the teacher phase is also an extension of this command. // Example) "learn shuffle" +// // 3) Automatic generation of fixed traces // → "makebook think" command // → implemented in extra/book/book.cpp +// // 4) Post-station automatic review mode // → I will not be involved in the engine because it is a problem that the GUI should assist. // etc.. +#define EVAL_LEARN + #if defined(EVAL_LEARN) #include "../eval/evaluate_common.h" @@ -53,7 +59,6 @@ using namespace std; - #if defined(USE_BOOK) // This is defined in the search section. extern Book::BookMoveSelector book; @@ -63,6 +68,7 @@ template T operator +=(std::atomic& x, const T rhs) { T old = x.load(std::memory_order_consume); + // It is allowed that the value is rewritten from other thread at this timing. // The idea that the value is not destroyed is good. T desired = old + rhs; @@ -81,7 +87,7 @@ namespace Learner static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0); - // Score scale factors. ex) If we set src_score_min_value = 0.0, + // Score scale factors. ex) If we set src_score_min_value = 0.0, // src_score_max_value = 1.0, dest_score_min_value = 0.0, // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. static double src_score_min_value = 0.0; @@ -89,8 +95,9 @@ namespace Learner static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; - // Assume teacher signals are the scores of deep searches, and convert them into winning - // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training + // Assume teacher signals are the scores of deep searches, + // and convert them into winning probabilities in the trainer. + // Sometimes we want to use the winning probabilities in the training // data directly. In those cases, we set false to this variable. static bool convert_teacher_signal_to_winning_probability = true; @@ -100,13 +107,9 @@ namespace Learner // This CANNOT be static since it's used elsewhere. bool use_raw_nnue_eval = false; - // Using WDL with win rate model instead of sigmoid + // Using stockfish's WDL with win rate model instead of sigmoid static bool use_wdl = false; - // ----------------------------------- - // command to learn from the generated game (learn) - // ----------------------------------- - // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { @@ -142,21 +145,31 @@ namespace Learner } } - double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) + double calc_cross_entropy_of_winning_percentage( + double deep_win_rate, + double shallow_eval, + int ply) { const double p = deep_win_rate; const double q = winning_percentage(shallow_eval, ply); return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); } - double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) + double calc_d_cross_entropy_of_winning_percentage( + double deep_win_rate, + double shallow_eval, + int ply) { constexpr double epsilon = 0.000001; - const double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply); - const double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply); + const double y1 = calc_cross_entropy_of_winning_percentage( + deep_win_rate, shallow_eval, ply); - // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate + const double y2 = calc_cross_entropy_of_winning_percentage( + deep_win_rate, shallow_eval + epsilon, ply); + + // Divide by the winning_probability_coefficient to + // match scale with the sigmoidal win rate return ((y2 - y1) / epsilon) / winning_probability_coefficient; } @@ -167,9 +180,12 @@ namespace Learner { // The square of the win rate difference minimizes it in the objective function. // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2 - // However, σ is a sigmoid function that converts the evaluation value into the difference in the winning percentage. - // m is the number of samples. shallow is the evaluation value for a shallow search (qsearch()). deep is the evaluation value for deep search. - // If W is the feature vector (parameter of the evaluation function) and Xi and Yi are teachers + // However, σ is a sigmoid function that converts the + // evaluation value into the difference in the winning percentage. + // m is the number of samples. shallow is the evaluation value + // for a shallow search (qsearch()). deep is the evaluation value for deep search. + // If W is the feature vector (parameter of the evaluation function) + // and Xi and Yi are teachers // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X // f(Xi) = win_rate(W*Xi) // If σ(i th deep) = Yi, @@ -179,10 +195,12 @@ namespace Learner // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1 - // 1/m will be multiplied later, but the contents of Σ can be retained in the array as the value of the gradient. + // 1/m will be multiplied later, but the contents of Σ can + // be retained in the array as the value of the gradient. // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600 // This /600 at the end is adjusted by the learning rate, so do not write it.. - // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. + // Also, the coefficient of 1/m is unnecessary if you use the update + // formula that has the automatic gradient adjustment function like Adam and AdaGrad. // Therefore, it is not necessary to save it in memory. const double p = winning_percentage(deep, psv.gamePly); @@ -202,7 +220,9 @@ namespace Learner // Refer to etc. // Objective function design) - // We want to make the distribution of p closer to the distribution of q → Think of it as the problem of minimizing the cross entropy between the probability distributions of p and q. + // We want to make the distribution of p closer to the distribution of q + // → Think of it as the problem of minimizing the cross entropy + // between the probability distributions of p and q. // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q) // x @@ -222,7 +242,8 @@ namespace Learner double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) { // Version that does not pass the winning percentage function - // This, unless EVAL_LIMIT is set low, trying to match the evaluation value with the shape of the end stage + // This, unless EVAL_LIMIT is set low, trying to + // match the evaluation value with the shape of the end stage // eval may exceed the range of eval. return shallow - deep; } @@ -261,7 +282,6 @@ namespace Learner { const double scaled_teacher_signal = get_scaled_signal(teacher_signal); - // Teacher winning probability. double p = scaled_teacher_signal; if (convert_teacher_signal_to_winning_probability) { @@ -273,7 +293,8 @@ namespace Learner double calculate_lambda(double teacher_signal) { - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. + // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT + // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. const double lambda = (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 @@ -284,7 +305,8 @@ namespace Learner double calculate_t(int game_result) { - // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. + // Use 1 as the correction term if the expected win rate is 1, + // 0 if you lose, and 0.5 if you draw. // game_result = 1,0,-1 so add 1 and divide by 2. const double t = double(game_result + 1) * 0.5; @@ -318,7 +340,9 @@ namespace Learner } // Calculate cross entropy during learning - // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win. + // The individual cross entropy of the win/loss term and win + // rate term of the elmo expression is returned + // to the arguments cross_entropy_eval and cross_entropy_win. void calc_cross_entropy( Value teacher_signal, Value shallow, @@ -356,11 +380,7 @@ namespace Learner } #endif - - - // Other variations may be prepared as the objective function.. - - + // Other objective functions may be considered in the future... double calc_grad(Value shallow, const PackedSfenValue& psv) { return calc_grad((Value)psv.score, shallow, psv); @@ -369,15 +389,17 @@ namespace Learner // Sfen reader struct SfenReader { - // number of phases used for calculation such as mse + // Number of phases used for calculation such as mse // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time. - //Since search() is performed with depth = 1 in calculation of move match rate, simple comparison is not possible... + // Since search() is performed with depth = 1 in calculation of + // move match rate, simple comparison is not possible... static constexpr uint64_t sfen_for_mse_size = 2000; // Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT static constexpr size_t THREAD_BUFFER_SIZE = 10 * 1000; - // Buffer for reading files (If this is made larger, the shuffle becomes larger and the phases may vary. + // Buffer for reading files (If this is made larger, + // the shuffle becomes larger and the phases may vary. // If it is too large, the memory consumption will increase. // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE. static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE; @@ -387,7 +409,8 @@ namespace Learner // It must be 2**N because it will be used as the mask to calculate hash_index. static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; - // Do not use std::random_device(). Because it always the same integers on MinGW. + // Do not use std::random_device(). + // Because it always the same integers on MinGW. SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count()) { @@ -460,16 +483,20 @@ namespace Learner // [ASYNC] Thread returns one aspect. Otherwise returns false. bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps) { - // If there are any positions left in the thread buffer, retrieve one and return it. + // If there are any positions left in the thread buffer + // then retrieve one and return it. auto& thread_ps = packed_sfens[thread_id]; - // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish. - if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it. + // Fill the read buffer if there is no remaining buffer, + // but if it doesn't even exist, finish. + // If the buffer is empty, fill it. + if ((thread_ps == nullptr || thread_ps->empty()) && !read_to_thread_buffer_impl(thread_id)) return false; // read_to_thread_buffer_impl() returned true, - // Since the filling of the thread buffer with the phase has been completed successfully + // Since the filling of the thread buffer with the + // phase has been completed successfully // thread_ps->rbegin() is alive. ps = thread_ps->back(); @@ -511,6 +538,7 @@ namespace Learner // Waiting for file worker to fill packed_sfens_pool. // The mutex isn't locked, so it should fill up soon. + // Poor man's condition variable. sleep(1); } @@ -519,14 +547,14 @@ namespace Learner // Start a thread that loads the phase file in the background. void start_file_read_worker() { - file_worker_thread = std::thread([&] { this->file_read_worker(); }); + file_worker_thread = std::thread([&] { + this->file_read_worker(); + }); } - // for file read-only threads void file_read_worker() { - auto open_next_file = [&]() - { + auto open_next_file = [&]() { if (fs.is_open()) fs.close(); @@ -569,7 +597,7 @@ namespace Learner } else if(!open_next_file()) { - // There was no next file. Abon. + // There was no next file. Abort. cout << "..end of files." << endl; end_of_files = true; return; @@ -577,8 +605,6 @@ namespace Learner } // Shuffle the read phase data. - // random shuffle by Fisher-Yates algorithm - if (!no_shuffle) { Algo::shuffle(sfens, prng); @@ -597,17 +623,19 @@ namespace Learner // Delete this pointer on the receiving side. auto buf = std::make_unique(); buf->resize(THREAD_BUFFER_SIZE); - memcpy(buf->data(), &sfens[i * THREAD_BUFFER_SIZE], sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE); + memcpy( + buf->data(), + &sfens[i * THREAD_BUFFER_SIZE], + sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE); buffers.emplace_back(std::move(buf)); } - // Since sfens is ready, look at the occasion and copy { std::unique_lock lk(mutex); - // You can ignore this time because you just copy the pointer... - // The mutex lock is required because the contents of packed_sfens_pool are changed. + // The mutex lock is required because the + // contents of packed_sfens_pool are changed. for (auto& buf : buffers) packed_sfens_pool.emplace_back(std::move(buf)); @@ -644,7 +672,7 @@ namespace Learner bool stop_flag; - vector hash; // 64MB*8 = 512MB + vector hash; // test phase for mse calculation PSVector sfen_for_mse; @@ -660,7 +688,6 @@ namespace Learner // Did you read the files and reached the end? atomic end_of_files; - // handle of sfen file std::fstream fs; @@ -727,7 +754,7 @@ namespace Learner uint64_t epoch = 0; // Mini batch size size. Be sure to set it on the side that uses this class. - uint64_t mini_batch_size = 1000 * 1000; + uint64_t mini_batch_size = LEARN_MINI_BATCH_SIZE; bool stop_flag; @@ -740,7 +767,8 @@ namespace Learner // Option not to learn kk/kkp/kpp/kppp std::array freeze; - // If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase. + // If the absolute value of the evaluation value of the deep search + // of the teacher phase exceeds this value, discard the teacher phase. int eval_limit; // Flag whether to dig a folder each time the evaluation function is saved. @@ -811,7 +839,8 @@ namespace Learner void LearnerThink::calc_loss(size_t thread_id, uint64_t done) { - // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated. + // There is no point in hitting the replacement table, + // so at this timing the generation of the replacement table is updated. // It doesn't matter if you have disabled the substitution table. TT.new_search(); @@ -845,7 +874,8 @@ namespace Learner sum_norm = 0; #endif - // The number of times the pv first move of deep search matches the pv first move of search(1). + // The number of times the pv first move of deep + // search matches the pv first move of search(1). atomic move_accord_count; move_accord_count = 0; @@ -856,7 +886,8 @@ namespace Learner pos.set(StartFEN, false, &si, th); std::cout << "hirate eval = " << Eval::evaluate(pos); - // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished. + // It's better to parallelize here, but it's a bit + // troublesome because the search before slave has not finished. // I created a mechanism to call task, so I will use it. // The number of tasks to do. @@ -869,7 +900,8 @@ namespace Learner { // Assign work to each thread using TaskDispatcher. // A task definition for that. - // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one. + // It is not possible to capture pos used in ↑, + // so specify the variables you want to capture one by one. auto task = [ this, @@ -899,7 +931,8 @@ namespace Learner // Evaluation value of deep search auto deep_value = (Value)ps.score; - // Note) This code does not consider when eval_limit is specified in the learn command. + // Note) This code does not consider when + // eval_limit is specified in the learn command. // --- error calculation @@ -975,14 +1008,16 @@ namespace Learner << " , eval mae = " << eval_mae; #endif -#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) +#if defined(LOSS_FUNCTION_IS_ELMO_METHOD) #if defined(EVAL_NNUE) latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; latest_loss_count += sr.sfen_for_mse.size(); #endif - // learn_cross_entropy may be called train cross entropy in the world of machine learning, - // When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce. + // learn_cross_entropy may be called train cross + // entropy in the world of machine learning, + // When omitting the acronym, it is nice to be able to + // distinguish it from test cross entropy(tce) by writing it as lce. if (sr.sfen_for_mse.size() && done) { @@ -1074,7 +1109,9 @@ namespace Learner // Output the current time. Output every time. std::cout << sr.total_done << " sfens , at " << now_string() << std::endl; - // Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch. + // Reflect the gradient in the weight array at this timing. + // The calculation of the gradient is just right for + // each 1M phase in terms of mini-batch. Eval::update_weights(epoch, freeze); // Display epoch and current eta for debugging. @@ -1090,14 +1127,13 @@ namespace Learner #endif ++epoch; - // Save once every 1 billion phases. - // However, the elapsed time during update_weights() and calc_rmse() is ignored. if (++sr.save_count * mini_batch_size >= eval_save_interval) { sr.save_count = 0; - // During this time, as the gradient calculation proceeds, the value becomes too large and I feel annoyed, so stop other threads. + // During this time, as the gradient calculation proceeds, + // the value becomes too large and I feel annoyed, so stop other threads. const bool converged = save(); if (converged) { @@ -1109,7 +1145,6 @@ namespace Learner // Calculate rmse. This is done for samples of 10,000 phases. // If you do with 40 cores, update_weights every 1 million phases - // I don't think it's so good to be tiring. static uint64_t loss_output_count = 0; if (++loss_output_count * mini_batch_size >= loss_output_interval) { @@ -1129,10 +1164,12 @@ namespace Learner sr.last_done = sr.total_done; } - // Next time, I want you to do this series of processing again when you process only mini_batch_size. + // Next time, I want you to do this series of + // processing again when you process only mini_batch_size. sr.next_update_weights += mini_batch_size; - // Since I was waiting for the update of this sr.next_update_weights except the main thread, + // Since I was waiting for the update of this + // sr.next_update_weights except the main thread, // Once this value is updated, it will start moving again. } } @@ -1173,7 +1210,8 @@ namespace Learner if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0) { // I got a strange sfen. Should be debugged! - // Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not. + // Since it is an illegal sfen, it may not be + // displayed with pos.sfen(), but it is better than not. cout << "Error! : illigal packed sfen = " << pos.fen() << endl; goto RETRY_READ; } @@ -1198,9 +1236,11 @@ namespace Learner #endif // There is a possibility that all the pieces are blocked and stuck. - // Also, the declaration win phase is excluded from learning because you cannot go to leaf with PV moves. - // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine) - // Skip the position if there are no legal moves (=checkmated or stalemate). + // Also, the declaration win phase is excluded from + // learning because you cannot go to leaf with PV moves. + // (shouldn't write out such teacher aspect itself, + // but may have written it out with an old generation routine) + // Skip the position if there are no legal moves (=checkmated or stalemate). if (MoveList(pos).size() == 0) goto RETRY_READ; @@ -1214,7 +1254,8 @@ namespace Learner const auto deep_value = (Value)ps.score; // I feel that the mini batch has a better gradient. - // Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation. + // Go to the leaf node as it is, add only to the gradient array, + // and later try AdaGrad at the time of rmse aggregation. const auto rootColor = pos.side_to_move(); @@ -1223,23 +1264,25 @@ namespace Learner // It may be better not to study where the difference in evaluation values ​​is too large. #if 0 - // If you do this, about 13% of the phases will be excluded from the learning target. Good and bad are subtle. + // If you do this, about 13% of the phases will be excluded + // from the learning target. Good and bad are subtle. if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move) { - // dbg_hit_on(false); + //dbg_hit_on(false); continue; } #endif #if 0 // It may be better not to study where the difference in evaluation values ​​is too large. - // → It's okay because it passes the win rate function... About 30% of the phases are out of the scope of learning... + // → It's okay because it passes the win rate function... + // About 30% of the phases are out of the scope of learning... if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4) { - // dbg_hit_on(false); + //dbg_hit_on(false); continue; } - // dbg_hit_on(true); + //dbg_hit_on(true); #endif int ply = 0; @@ -1248,9 +1291,12 @@ namespace Learner auto pos_add_grad = [&]() { // Use the value of evaluate in leaf as shallow_value. // Using the return value of qsearch() as shallow_value, - // If PV is interrupted in the middle, the phase where evaluate() is called to calculate the gradient, and - // I don't think this is a very desirable property, as the aspect that gives that gradient will be different. - // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc... + // If PV is interrupted in the middle, the phase where + // evaluate() is called to calculate the gradient, + // and I don't think this is a very desirable property, + // as the aspect that gives that gradient will be different. + // I have turned off the substitution table, but since + // the pv array has not been updated due to one stumbling block etc... const Value shallow_value = (rootColor == pos.side_to_move()) @@ -1284,7 +1330,8 @@ namespace Learner // Slope double dj_dw = calc_grad(deep_value, shallow_value, ps); - // Add jd_dw as the gradient (∂J/∂Wj) for the feature vector currently appearing in the leaf node. + // Add jd_dw as the gradient (∂J/∂Wj) for the + // feature vector currently appearing in the leaf node. // If it is not PV termination, apply a discount rate. if (discount_rate != 0 && ply != (int)pv.size()) @@ -1330,7 +1377,7 @@ namespace Learner if (illegal_move) { - sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl; + sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl; continue; } @@ -1343,7 +1390,11 @@ namespace Learner #if 0 // When adding the gradient to the root phase - shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); + shallow_value = + (rootColor == pos.side_to_move()) + ? Eval::evaluate(pos) + : -Eval::evaluate(pos); + dj_dw = calc_grad(deep_value, shallow_value, ps); Eval::add_grad(pos, rootColor, dj_dw, without_kpp); #endif @@ -1426,10 +1477,14 @@ namespace Learner // Shuffle_files(), shuffle_files_quick() subcontracting, writing part. // output_file_name: Name of the file to write - // prng: random number - // afs: fstream of each teacher phase file - // a_count: The number of teacher positions inherent in each file. - void shuffle_write(const string& output_file_name, PRNG& prng, vector& sfen_file_streams, vector& sfen_count_in_file) + // prng: random number generator + // sfen_file_streams: fstream of each teacher phase file + // sfen_count_in_file: The number of teacher positions present in each file. + void shuffle_write( + const string& output_file_name, + PRNG& prng, + vector& sfen_file_streams, + vector& sfen_count_in_file) { uint64_t total_sfen_count = 0; for (auto c : sfen_count_in_file) @@ -1502,7 +1557,8 @@ namespace Learner // Temporary file is written to tmp/ folder for each buffer_size phase. // For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB. // In a PC with a small memory, it would be better to reduce this. - // However, if the number of files increases too much, it will not be possible to open at the same time due to OS restrictions. + // However, if the number of files increases too much, + // it will not be possible to open at the same time due to OS restrictions. // There should have been a limit of 512 per process on Windows, so you can open here as 500, // The current setting is 500 files x 20M = 10G = 10 billion phases. @@ -1555,19 +1611,23 @@ namespace Learner // Read in units of sizeof(PackedSfenValue), // Ignore the last remaining fraction. (Fails in fs.read, so exit while) - // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.) + // (The remaining fraction seems to be half-finished data + // that was created because it was stopped halfway during teacher generation.) } if (buf_write_marker != 0) write_buffer(buf_write_marker); // Only shuffled files have been written write_file_count. - // As a second pass, if you open all of them at the same time, select one at random and load one phase at a time + // As a second pass, if you open all of them at the same time, + // select one at random and load one phase at a time // Now you have shuffled. - // Original file for shirt full + tmp file + file to write requires 3 times the storage capacity of the original file. + // Original file for shirt full + tmp file + file to write + // requires 3 times the storage capacity of the original file. // 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases. - // If you want to delete (or delete by hand) the original file at this point after writing to tmp, + // If you want to delete (or delete by hand) the + // original file at this point after writing to tmp, // The storage capacity is about twice that of the original file. // So, maybe we should have an option to delete the original file. @@ -1592,7 +1652,7 @@ namespace Learner PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); // number of files - size_t file_count = filenames.size(); + const size_t file_count = filenames.size(); // Number of teacher positions stored in each file in filenames vector sfen_count_in_file(file_count); @@ -1651,7 +1711,8 @@ namespace Learner std::cout << "write : " << output_file_name << endl; - // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper. + // If the file to be written exceeds 2GB, it cannot be + // written in one shot with fstream::write, so use wrapper. write_memory_to_file( output_file_name, (void*)&buf[0], @@ -1703,9 +1764,11 @@ namespace Learner uint64_t buffer_size = 20000000; // fast shuffling assuming each file is shuffled bool shuffle_quick = false; - // A function to read the entire file in memory and shuffle it. (Requires file size memory) + // A function to read the entire file in memory and shuffle it. + // (Requires file size memory) bool shuffle_on_memory = false; - // Conversion of packed sfen. In plain, it consists of sfen(string), evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0) + // Conversion of packed sfen. In plain, it consists of sfen(string), + // evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0) bool use_convert_plain = false; // convert plain format teacher to Yaneura King's bin bool use_convert_bin = false; @@ -1721,13 +1784,16 @@ namespace Learner // File name to write in those cases (default is "shuffled_sfen.bin") string output_file_name = "shuffled_sfen.bin"; - // If the absolute value of the evaluation value in the deep search of the teacher phase exceeds this value, that phase is discarded. + // If the absolute value of the evaluation value + // in the deep search of the teacher phase exceeds this value, + // that phase is discarded. int eval_limit = 32000; // Flag to save the evaluation function file only once near the end. bool save_only_once = false; - // Shuffle about what you are pre-reading on the teacher aspect. (Shuffle of about 10 million phases) + // Shuffle about what you are pre-reading on the teacher aspect. + // (Shuffle of about 10 million phases) // Turn on if you want to pass a pre-shuffled file. bool no_shuffle = false; @@ -1738,7 +1804,9 @@ namespace Learner ELMO_LAMBDA_LIMIT = 32000; #endif - // Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate) + // Discount rate. If this is set to a value other than 0, + // the slope will be added even at other than the PV termination. + // (At that time, apply this discount rate) double discount_rate = 0; // if (gamePly > eta3; else if (option == "eta1_epoch") is >> eta1_epoch; else if (option == "eta2_epoch") is >> eta2_epoch; + // Accept also the old option name. - else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> use_draw_games_in_training; + else if (option == "use_draw_in_training" + || option == "use_draw_games_in_training") + is >> use_draw_games_in_training; + // Accept also the old option name. - else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> use_draw_games_in_validation; + else if (option == "use_draw_in_validation" + || option == "use_draw_games_in_validation") + is >> use_draw_games_in_validation; + // Accept also the old option name. - else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training; + else if (option == "use_hash_in_training" + || option == "skip_duplicated_positions_in_training") + is >> skip_duplicated_positions_in_training; + else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; + // Discount rate else if (option == "discount_rate") is >> discount_rate; + // Using WDL with win rate model instead of sigmoid else if (option == "use_wdl") is >> use_wdl; @@ -1873,8 +1953,11 @@ namespace Learner else filenames.push_back(option); } + if (loss_output_interval == 0) + { loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; + } cout << "learn command , "; @@ -1900,6 +1983,7 @@ namespace Learner cout << "learn from "; for (auto s : filenames) cout << s << " , "; + cout << endl; if (!validation_set_file_name.empty()) { @@ -1917,18 +2001,21 @@ namespace Learner shuffle_files(filenames, output_file_name, buffer_size); return; } + if (shuffle_quick) { cout << "quick shuffle mode.." << endl; shuffle_files_quick(filenames, output_file_name); return; } + if (shuffle_on_memory) { cout << "shuffle on memory.." << endl; shuffle_files_on_memory(filenames, output_file_name); return; } + if (use_convert_plain) { Eval::init_NNUE(); @@ -1936,6 +2023,7 @@ namespace Learner convert_plain(filenames, output_file_name); return; } + if (use_convert_bin) { Eval::init_NNUE(); @@ -1956,6 +2044,7 @@ namespace Learner return; } + if (use_convert_bin_from_pgn_extract) { Eval::init_NNUE(); @@ -1976,15 +2065,21 @@ namespace Learner // Insert the file name for the number of loops. for (int i = 0; i < loop; ++i) - // sfen reader, I'll read it in reverse order so I'll reverse it here. I'm sorry. + { + // sfen reader, I'll read it in reverse + // order so I'll reverse it here. I'm sorry. for (auto it = filenames.rbegin(); it != filenames.rend(); ++it) + { sr.filenames.push_back(Path::Combine(base_dir, *it)); + } + } #if !defined(EVAL_NNUE) cout << "Gradient Method : " << LEARN_UPDATE << endl; #endif cout << "Loss Function : " << LOSS_FUNCTION << endl; cout << "mini-batch size : " << mini_batch_size << endl; + #if defined(EVAL_NNUE) cout << "nn_batch_size : " << nn_batch_size << endl; cout << "nn_options : " << nn_options << endl; @@ -1994,6 +2089,7 @@ namespace Learner cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; + #if defined(EVAL_NNUE) if (newbob_decay != 1.0) { cout << "scheduling : newbob with decay = " << newbob_decay @@ -2003,6 +2099,7 @@ namespace Learner cout << "scheduling : default" << endl; } #endif + cout << "discount rate : " << discount_rate << endl; // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. @@ -2014,6 +2111,7 @@ namespace Learner cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; #endif + cout << "mirror_percentage : " << mirror_percentage << endl; cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; @@ -2071,11 +2169,13 @@ namespace Learner learn_think.sr.no_shuffle = no_shuffle; learn_think.freeze = freeze; learn_think.reduction_gameply = reduction_gameply; + #if defined(EVAL_NNUE) learn_think.newbob_scale = 1.0; learn_think.newbob_decay = newbob_decay; learn_think.newbob_num_trials = newbob_num_trials; #endif + learn_think.eval_save_interval = eval_save_interval; learn_think.loss_output_interval = loss_output_interval; learn_think.mirror_percentage = mirror_percentage; @@ -2086,16 +2186,19 @@ namespace Learner learn_think.mini_batch_size = mini_batch_size; - if (validation_set_file_name.empty()) { + if (validation_set_file_name.empty()) + { // Get about 10,000 data for mse calculation. sr.read_for_mse(); } - else { + else + { sr.read_validation_set(validation_set_file_name, eval_limit); } // Calculate rmse once at this point (timing of 0 sfen) // sr.calc_rmse(); + #if defined(EVAL_NNUE) if (newbob_decay != 1.0) { learn_think.calc_loss(0, -1); diff --git a/src/misc.h b/src/misc.h index 5add3b36..4c04d3f0 100644 --- a/src/misc.h +++ b/src/misc.h @@ -226,6 +226,7 @@ namespace Math { } namespace Algo { + // Fisher-Yates template void shuffle(std::vector& buf, Rng&& prng) { From a6013557f2cb5d13c21a2d406a02d504a643c885 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 22:45:29 +0900 Subject: [PATCH 07/30] Removed EVAL_NNUE macro. --- src/Makefile | 6 +- src/eval/evaluate_common.h | 3 - src/learn/gensfen.cpp | 40 +++--- src/learn/learner.cpp | 133 +++--------------- src/nnue/evaluate_nnue.cpp | 2 - src/nnue/evaluate_nnue_learner.cpp | 4 +- src/nnue/evaluate_nnue_learner.h | 4 +- src/nnue/features/castling_right.cpp | 4 - src/nnue/features/castling_right.h | 4 - src/nnue/features/enpassant.cpp | 4 - src/nnue/features/enpassant.h | 4 - src/nnue/features/half_relative_kp.cpp | 4 - src/nnue/features/half_relative_kp.h | 4 - src/nnue/features/k.cpp | 4 - src/nnue/features/k.h | 4 - src/nnue/features/p.cpp | 4 - src/nnue/features/p.h | 4 - src/nnue/layers/sum.h | 4 - src/nnue/nnue_test_command.cpp | 4 +- src/nnue/nnue_test_command.h | 4 +- src/nnue/trainer/features/factorizer.h | 4 - .../trainer/features/factorizer_feature_set.h | 4 - .../trainer/features/factorizer_half_kp.h | 4 - src/nnue/trainer/trainer.h | 4 +- src/nnue/trainer/trainer_affine_transform.h | 4 +- src/nnue/trainer/trainer_clipped_relu.h | 4 +- .../trainer/trainer_feature_transformer.h | 4 +- src/nnue/trainer/trainer_input_slice.h | 4 +- src/nnue/trainer/trainer_sum.h | 4 +- src/uci.cpp | 6 +- src/ucioption.cpp | 2 - 31 files changed, 65 insertions(+), 223 deletions(-) diff --git a/src/Makefile b/src/Makefile index ca851dba..a07e1251 100644 --- a/src/Makefile +++ b/src/Makefile @@ -903,7 +903,7 @@ icc-profile-use: learn: config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + EXTRACXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp ' \ all @@ -911,7 +911,7 @@ profile-learn: net config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \ - LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp ' @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." @@ -920,7 +920,7 @@ profile-learn: net config-sanity objclean profileclean @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \ - LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp ' @echo "" @echo "Step 4/4. Deleting profile data ..." diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h index dacbd2ba..3fb161ab 100644 --- a/src/eval/evaluate_common.h +++ b/src/eval/evaluate_common.h @@ -3,7 +3,6 @@ // A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT). -#if defined(EVAL_NNUE) || defined(EVAL_LEARN) #include // KK file name @@ -79,6 +78,4 @@ namespace Eval } -#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN) - #endif // _EVALUATE_KPPT_COMMON_H_ diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index b049192e..9ae83174 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -1,45 +1,41 @@ #if defined(EVAL_LEARN) #include "../eval/evaluate_common.h" - -#include "learn.h" -#include "multi_think.h" #include "../misc.h" -#include "../thread.h" +#include "../nnue/evaluate_nnue_learner.h" #include "../position.h" +#include "../syzygy/tbprobe.h" +#include "../thread.h" #include "../tt.h" #include "../uci.h" -#include "../syzygy/tbprobe.h" +#include "learn.h" +#include "multi_think.h" #if defined(USE_BOOK) #include "../extra/book/book.h" #endif #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #if defined (_OPENMP) #include #endif -#if defined(EVAL_NNUE) -#include "../nnue/evaluate_nnue_learner.h" -#include -#include -#endif - using namespace std; namespace Learner @@ -692,12 +688,10 @@ namespace Learner // performed unless each node evaluate() is called! // If the depth is 8 or more, it seems // faster not to calculate this difference. -#if defined(EVAL_NNUE) if (depth < 8) { Eval::NNUE::update_eval(pos); } -#endif // defined(EVAL_NNUE) } // Reach leaf diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index f9d188b8..358848ec 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -17,45 +17,40 @@ // → I will not be involved in the engine because it is a problem that the GUI should assist. // etc.. -#define EVAL_LEARN - #if defined(EVAL_LEARN) #include "../eval/evaluate_common.h" - +#include "../misc.h" +#include "../nnue/evaluate_nnue_learner.h" +#include "../position.h" +#include "../syzygy/tbprobe.h" +#include "../thread.h" +#include "../tt.h" +#include "../uci.h" #include "learn.h" #include "multi_think.h" -#include "../uci.h" -#include "../syzygy/tbprobe.h" -#include "../misc.h" -#include "../thread.h" -#include "../position.h" -#include "../tt.h" #include -#include -#include -#include -#include -#include -#include -#include +#include #include // std::exp(),std::pow(),std::log() #include // memcpy() -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #if defined (_OPENMP) #include #endif -#if defined(EVAL_NNUE) -#include "../nnue/evaluate_nnue_learner.h" -#include -#include -#endif using namespace std; @@ -724,14 +719,12 @@ namespace Learner learn_sum_entropy = 0.0; #endif -#if defined(EVAL_NNUE) newbob_scale = 1.0; newbob_decay = 1.0; newbob_num_trials = 2; best_loss = std::numeric_limits::infinity(); latest_loss_sum = 0.0; latest_loss_count = 0; -#endif } virtual void thread_worker(size_t thread_id); @@ -787,7 +780,6 @@ namespace Learner atomic learn_sum_entropy; #endif -#if defined(EVAL_NNUE) shared_timed_mutex nn_mutex; double newbob_scale; double newbob_decay; @@ -796,7 +788,6 @@ namespace Learner double latest_loss_sum; uint64_t latest_loss_count; std::string best_nn_directory; -#endif uint64_t eval_save_interval; uint64_t loss_output_interval; @@ -844,13 +835,10 @@ namespace Learner // It doesn't matter if you have disabled the substitution table. TT.new_search(); - -#if defined(EVAL_NNUE) std::cout << "PROGRESS: " << now_string() << ", "; std::cout << sr.total_done << " sfens"; std::cout << ", iteration " << epoch; std::cout << ", eta = " << Eval::get_eta() << ", "; -#endif #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) double sum_error = 0; @@ -1009,10 +997,8 @@ namespace Learner #endif #if defined(LOSS_FUNCTION_IS_ELMO_METHOD) -#if defined(EVAL_NNUE) latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; latest_loss_count += sr.sfen_for_mse.size(); -#endif // learn_cross_entropy may be called train cross // entropy in the world of machine learning, @@ -1074,14 +1060,10 @@ namespace Learner // display mse (this is sometimes done only for thread 0) // Immediately after being read from the file... -#if defined(EVAL_NNUE) // Lock the evaluation function so that it is not used during updating. shared_lock read_lock(nn_mutex, defer_lock); if (sr.next_update_weights <= sr.total_done || (thread_id != 0 && !read_lock.try_lock())) -#else - if (sr.next_update_weights <= sr.total_done) -#endif { if (thread_id != 0) { @@ -1105,18 +1087,6 @@ namespace Learner continue; } -#if !defined(EVAL_NNUE) - // Output the current time. Output every time. - std::cout << sr.total_done << " sfens , at " << now_string() << std::endl; - - // Reflect the gradient in the weight array at this timing. - // The calculation of the gradient is just right for - // each 1M phase in terms of mini-batch. - Eval::update_weights(epoch, freeze); - - // Display epoch and current eta for debugging. - std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl; -#else { // update parameters @@ -1124,7 +1094,7 @@ namespace Learner lock_guard write_lock(nn_mutex); Eval::NNUE::UpdateParameters(epoch); } -#endif + ++epoch; // However, the elapsed time during update_weights() and calc_rmse() is ignored. @@ -1156,9 +1126,7 @@ namespace Learner // loss calculation calc_loss(thread_id, done); -#if defined(EVAL_NNUE) Eval::NNUE::CheckHealth(); -#endif // Make a note of how far you have totaled. sr.last_done = sr.total_done; @@ -1216,25 +1184,6 @@ namespace Learner goto RETRY_READ; } -#if !defined(EVAL_NNUE) - if (skip_duplicated_positions_in_training) - { - const auto key = pos.key(); - - // Exclude the phase used for rmse calculation. - if (sr.is_for_rmse(key)) - goto RETRY_READ; - - // Exclude the most recently used aspect. - const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1)); - const auto key2 = sr.hash[hash_index]; - if (key == key2) - goto RETRY_READ; - - sr.hash[hash_index] = key; // Replace with the current key. - } -#endif - // There is a possibility that all the pieces are blocked and stuck. // Also, the declaration win phase is excluded from // learning because you cannot go to leaf with PV moves. @@ -1326,25 +1275,9 @@ namespace Learner learn_sum_entropy += learn_entropy; #endif -#if !defined(EVAL_NNUE) - // Slope - double dj_dw = calc_grad(deep_value, shallow_value, ps); - - // Add jd_dw as the gradient (∂J/∂Wj) for the - // feature vector currently appearing in the leaf node. - - // If it is not PV termination, apply a discount rate. - if (discount_rate != 0 && ply != (int)pv.size()) - dj_dw *= discount_rate; - - // Since we have reached leaf, add the gradient to the features that appear in this phase. - // Update based on gradient later. - Eval::add_grad(pos, rootColor, dj_dw, freeze); -#else const double example_weight = (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0; Eval::NNUE::AddExample(pos, rootColor, ps, example_weight); -#endif // Since the processing is completed, the counter of the processed number is incremented sr.total_done++; @@ -1425,7 +1358,6 @@ namespace Learner const std::string dir_name = std::to_string(dir_number++); Eval::save_eval(dir_name); -#if defined(EVAL_NNUE) if (newbob_decay != 1.0 && latest_loss_count > 0) { static int trials = newbob_num_trials; const double latest_loss = latest_loss_sum / latest_loss_count; @@ -1470,7 +1402,6 @@ namespace Learner return true; } } -#endif } return false; } @@ -1817,12 +1748,10 @@ namespace Learner // Optional item that does not let you learn KK/KKP/KPP/KPPP array freeze = {}; -#if defined(EVAL_NNUE) uint64_t nn_batch_size = 1000; double newbob_decay = 1.0; int newbob_num_trials = 2; string nn_options; -#endif uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL; uint64_t loss_output_interval = 0; @@ -1922,12 +1851,11 @@ namespace Learner else if (option == "save_only_once") save_only_once = true; else if (option == "no_shuffle") no_shuffle = true; -#if defined(EVAL_NNUE) else if (option == "nn_batch_size") is >> nn_batch_size; else if (option == "newbob_decay") is >> newbob_decay; else if (option == "newbob_num_trials") is >> newbob_num_trials; else if (option == "nn_options") is >> nn_options; -#endif + else if (option == "eval_save_interval") is >> eval_save_interval; else if (option == "loss_output_interval") is >> loss_output_interval; else if (option == "mirror_percentage") is >> mirror_percentage; @@ -2074,23 +2002,18 @@ namespace Learner } } -#if !defined(EVAL_NNUE) - cout << "Gradient Method : " << LEARN_UPDATE << endl; -#endif cout << "Loss Function : " << LOSS_FUNCTION << endl; cout << "mini-batch size : " << mini_batch_size << endl; -#if defined(EVAL_NNUE) cout << "nn_batch_size : " << nn_batch_size << endl; cout << "nn_options : " << nn_options << endl; -#endif + cout << "learning rate : " << eta1 << " , " << eta2 << " , " << eta3 << endl; cout << "eta_epoch : " << eta1_epoch << " , " << eta2_epoch << endl; cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; -#if defined(EVAL_NNUE) if (newbob_decay != 1.0) { cout << "scheduling : newbob with decay = " << newbob_decay << ", " << newbob_num_trials << " trials" << endl; @@ -2098,7 +2021,6 @@ namespace Learner else { cout << "scheduling : default" << endl; } -#endif cout << "discount rate : " << discount_rate << endl; @@ -2133,12 +2055,6 @@ namespace Learner // Read evaluation function parameters Eval::init_NNUE(); -#if !defined(EVAL_NNUE) - cout << "init_grad.." << endl; - - // Initialize gradient array of merit function parameters - Eval::init_grad(eta1, eta1_epoch, eta2, eta2_epoch, eta3); -#else cout << "init_training.." << endl; Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3); Eval::NNUE::SetBatchSize(nn_batch_size); @@ -2146,7 +2062,6 @@ namespace Learner if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { learn_think.best_nn_directory = std::string(Options["EvalDir"]); } -#endif #if 0 // A test to give a gradient of 1.0 to the initial stage of Hirate. @@ -2170,11 +2085,9 @@ namespace Learner learn_think.freeze = freeze; learn_think.reduction_gameply = reduction_gameply; -#if defined(EVAL_NNUE) learn_think.newbob_scale = 1.0; learn_think.newbob_decay = newbob_decay; learn_think.newbob_num_trials = newbob_num_trials; -#endif learn_think.eval_save_interval = eval_save_interval; learn_think.loss_output_interval = loss_output_interval; @@ -2199,7 +2112,6 @@ namespace Learner // Calculate rmse once at this point (timing of 0 sfen) // sr.calc_rmse(); -#if defined(EVAL_NNUE) if (newbob_decay != 1.0) { learn_think.calc_loss(0, -1); learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count; @@ -2207,7 +2119,6 @@ namespace Learner learn_think.latest_loss_count = 0; cout << "initial loss: " << learn_think.best_loss << endl; } -#endif // ----------------------------------- // start learning evaluation function parameters diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 5c8cee71..a2845c96 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -184,13 +184,11 @@ namespace Eval::NNUE { Initialize(); -#if defined(EVAL_NNUE) if (Options["SkipLoadingEval"]) { std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl; return true; } -#endif fileName = evalFile; diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp index 13d9d578..7be06832 100644 --- a/src/nnue/evaluate_nnue_learner.cpp +++ b/src/nnue/evaluate_nnue_learner.cpp @@ -1,6 +1,6 @@ // Code for learning NNUE evaluation function -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include #include @@ -229,4 +229,4 @@ double get_eta() { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h index 1e4a463e..0e5fbcd2 100644 --- a/src/nnue/evaluate_nnue_learner.h +++ b/src/nnue/evaluate_nnue_learner.h @@ -3,7 +3,7 @@ #ifndef _EVALUATE_NNUE_LEARNER_H_ #define _EVALUATE_NNUE_LEARNER_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../learn/learn.h" @@ -41,6 +41,6 @@ void CheckHealth(); } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp index ee7b6576..47fbd986 100644 --- a/src/nnue/features/castling_right.cpp +++ b/src/nnue/features/castling_right.cpp @@ -1,7 +1,5 @@ //Definition of input feature quantity K of NNUE evaluation function -#if defined(EVAL_NNUE) - #include "castling_right.h" #include "index_list.h" @@ -69,5 +67,3 @@ namespace Eval { } // namespace NNUE } // namespace Eval - -#endif // defined(EVAL_NNUE) diff --git a/src/nnue/features/castling_right.h b/src/nnue/features/castling_right.h index 3af5b074..27074080 100644 --- a/src/nnue/features/castling_right.h +++ b/src/nnue/features/castling_right.h @@ -3,8 +3,6 @@ #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_ #define _NNUE_FEATURES_CASTLING_RIGHT_H_ -#if defined(EVAL_NNUE) - #include "../../evaluate.h" #include "features_common.h" @@ -43,6 +41,4 @@ namespace Eval { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp index ea70529a..77bc936e 100644 --- a/src/nnue/features/enpassant.cpp +++ b/src/nnue/features/enpassant.cpp @@ -1,7 +1,5 @@ //Definition of input feature quantity K of NNUE evaluation function -#if defined(EVAL_NNUE) - #include "enpassant.h" #include "index_list.h" @@ -43,5 +41,3 @@ namespace Eval { } // namespace NNUE } // namespace Eval - -#endif // defined(EVAL_NNUE) diff --git a/src/nnue/features/enpassant.h b/src/nnue/features/enpassant.h index f77f9c4f..70a8eb5a 100644 --- a/src/nnue/features/enpassant.h +++ b/src/nnue/features/enpassant.h @@ -3,8 +3,6 @@ #ifndef _NNUE_FEATURES_ENPASSANT_H_ #define _NNUE_FEATURES_ENPASSANT_H_ -#if defined(EVAL_NNUE) - #include "../../evaluate.h" #include "features_common.h" @@ -43,6 +41,4 @@ namespace Eval { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp index 015ecb73..597d65fb 100644 --- a/src/nnue/features/half_relative_kp.cpp +++ b/src/nnue/features/half_relative_kp.cpp @@ -1,7 +1,5 @@ //Definition of input features HalfRelativeKP of NNUE evaluation function -#if defined(EVAL_NNUE) - #include "half_relative_kp.h" #include "index_list.h" @@ -74,5 +72,3 @@ template class HalfRelativeKP; } // namespace NNUE } // namespace Eval - -#endif // defined(EVAL_NNUE) diff --git a/src/nnue/features/half_relative_kp.h b/src/nnue/features/half_relative_kp.h index 2d4182e4..1b384c14 100644 --- a/src/nnue/features/half_relative_kp.h +++ b/src/nnue/features/half_relative_kp.h @@ -3,8 +3,6 @@ #ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_ #define _NNUE_FEATURES_HALF_RELATIVE_KP_H_ -#if defined(EVAL_NNUE) - #include "../../evaluate.h" #include "features_common.h" @@ -60,6 +58,4 @@ class HalfRelativeKP { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/features/k.cpp b/src/nnue/features/k.cpp index 314b1338..38ec9997 100644 --- a/src/nnue/features/k.cpp +++ b/src/nnue/features/k.cpp @@ -1,7 +1,5 @@ //Definition of input feature quantity K of NNUE evaluation function -#if defined(EVAL_NNUE) - #include "k.h" #include "index_list.h" @@ -54,5 +52,3 @@ void K::AppendChangedIndices( } // namespace NNUE } // namespace Eval - -#endif // defined(EVAL_NNUE) diff --git a/src/nnue/features/k.h b/src/nnue/features/k.h index 0c394f4e..9a0be4bb 100644 --- a/src/nnue/features/k.h +++ b/src/nnue/features/k.h @@ -3,8 +3,6 @@ #ifndef _NNUE_FEATURES_K_H_ #define _NNUE_FEATURES_K_H_ -#if defined(EVAL_NNUE) - #include "../../evaluate.h" #include "features_common.h" @@ -47,6 +45,4 @@ private: } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/features/p.cpp b/src/nnue/features/p.cpp index b4a6faf9..0c1b7d50 100644 --- a/src/nnue/features/p.cpp +++ b/src/nnue/features/p.cpp @@ -1,7 +1,5 @@ //Definition of input feature P of NNUE evaluation function -#if defined(EVAL_NNUE) - #include "p.h" #include "index_list.h" @@ -52,5 +50,3 @@ void P::AppendChangedIndices( } // namespace NNUE } // namespace Eval - -#endif // defined(EVAL_NNUE) diff --git a/src/nnue/features/p.h b/src/nnue/features/p.h index b3d4191e..07d88952 100644 --- a/src/nnue/features/p.h +++ b/src/nnue/features/p.h @@ -3,8 +3,6 @@ #ifndef _NNUE_FEATURES_P_H_ #define _NNUE_FEATURES_P_H_ -#if defined(EVAL_NNUE) - #include "../../evaluate.h" #include "features_common.h" @@ -47,6 +45,4 @@ class P { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/layers/sum.h b/src/nnue/layers/sum.h index d8c7bf93..419ced89 100644 --- a/src/nnue/layers/sum.h +++ b/src/nnue/layers/sum.h @@ -3,8 +3,6 @@ #ifndef _NNUE_LAYERS_SUM_H_ #define _NNUE_LAYERS_SUM_H_ -#if defined(EVAL_NNUE) - #include "../nnue_common.h" namespace Eval { @@ -158,6 +156,4 @@ class Sum { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp index 311c5ded..b8346693 100644 --- a/src/nnue/nnue_test_command.cpp +++ b/src/nnue/nnue_test_command.cpp @@ -1,6 +1,6 @@ // USI extended command for NNUE evaluation function -#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE) +#if defined(ENABLE_TEST_CMD) #include "../thread.h" #include "../uci.h" @@ -198,4 +198,4 @@ void TestCommand(Position& pos, std::istream& stream) { } // namespace Eval -#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE) +#endif // defined(ENABLE_TEST_CMD) diff --git a/src/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h index 570ef01b..30854fd2 100644 --- a/src/nnue/nnue_test_command.h +++ b/src/nnue/nnue_test_command.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TEST_COMMAND_H_ #define _NNUE_TEST_COMMAND_H_ -#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE) +#if defined(ENABLE_TEST_CMD) namespace Eval { @@ -16,6 +16,6 @@ void TestCommand(Position& pos, std::istream& stream); } // namespace Eval -#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE) +#endif // defined(ENABLE_TEST_CMD) #endif diff --git a/src/nnue/trainer/features/factorizer.h b/src/nnue/trainer/features/factorizer.h index 148ee8ec..43950de2 100644 --- a/src/nnue/trainer/features/factorizer.h +++ b/src/nnue/trainer/features/factorizer.h @@ -3,8 +3,6 @@ #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_ #define _NNUE_TRAINER_FEATURES_FACTORIZER_H_ -#if defined(EVAL_NNUE) - #include "../../nnue_common.h" #include "../trainer.h" @@ -105,6 +103,4 @@ constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) { } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/trainer/features/factorizer_feature_set.h b/src/nnue/trainer/features/factorizer_feature_set.h index af524719..caf6608b 100644 --- a/src/nnue/trainer/features/factorizer_feature_set.h +++ b/src/nnue/trainer/features/factorizer_feature_set.h @@ -3,8 +3,6 @@ #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_ #define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_ -#if defined(EVAL_NNUE) - #include "../../features/feature_set.h" #include "factorizer.h" @@ -99,6 +97,4 @@ public: } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/trainer/features/factorizer_half_kp.h b/src/nnue/trainer/features/factorizer_half_kp.h index 955894e8..70a6acca 100644 --- a/src/nnue/trainer/features/factorizer_half_kp.h +++ b/src/nnue/trainer/features/factorizer_half_kp.h @@ -3,8 +3,6 @@ #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_ #define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_ -#if defined(EVAL_NNUE) - #include "../../features/half_kp.h" #include "../../features/p.h" #include "../../features/half_relative_kp.h" @@ -98,6 +96,4 @@ constexpr FeatureProperties Factorizer>::kProperties[]; } // namespace Eval -#endif // defined(EVAL_NNUE) - #endif diff --git a/src/nnue/trainer/trainer.h b/src/nnue/trainer/trainer.h index 4b467041..d526557a 100644 --- a/src/nnue/trainer/trainer.h +++ b/src/nnue/trainer/trainer.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_H_ #define _NNUE_TRAINER_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../nnue_common.h" #include "../features/index_list.h" @@ -120,6 +120,6 @@ std::shared_ptr MakeAlignedSharedPtr(ArgumentTypes&&... arguments) { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h index da11ca29..4b5ddee6 100644 --- a/src/nnue/trainer/trainer_affine_transform.h +++ b/src/nnue/trainer/trainer_affine_transform.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_ #define _NNUE_TRAINER_AFFINE_TRANSFORM_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../../learn/learn.h" #include "../layers/affine_transform.h" @@ -296,6 +296,6 @@ class Trainer> { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/trainer/trainer_clipped_relu.h b/src/nnue/trainer/trainer_clipped_relu.h index bd59a02d..72575bf8 100644 --- a/src/nnue/trainer/trainer_clipped_relu.h +++ b/src/nnue/trainer/trainer_clipped_relu.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_CLIPPED_RELU_H_ #define _NNUE_TRAINER_CLIPPED_RELU_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../../learn/learn.h" #include "../layers/clipped_relu.h" @@ -137,6 +137,6 @@ class Trainer> { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h index 97dbeff4..6b94d952 100644 --- a/src/nnue/trainer/trainer_feature_transformer.h +++ b/src/nnue/trainer/trainer_feature_transformer.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_ #define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../../learn/learn.h" #include "../nnue_feature_transformer.h" @@ -372,6 +372,6 @@ class Trainer { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/trainer/trainer_input_slice.h b/src/nnue/trainer/trainer_input_slice.h index 7d9e76c3..b6d6635b 100644 --- a/src/nnue/trainer/trainer_input_slice.h +++ b/src/nnue/trainer/trainer_input_slice.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_INPUT_SLICE_H_ #define _NNUE_TRAINER_INPUT_SLICE_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../../learn/learn.h" #include "../layers/input_slice.h" @@ -246,6 +246,6 @@ class Trainer> { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/nnue/trainer/trainer_sum.h b/src/nnue/trainer/trainer_sum.h index f7bf3b3d..0b7abe36 100644 --- a/src/nnue/trainer/trainer_sum.h +++ b/src/nnue/trainer/trainer_sum.h @@ -3,7 +3,7 @@ #ifndef _NNUE_TRAINER_SUM_H_ #define _NNUE_TRAINER_SUM_H_ -#if defined(EVAL_LEARN) && defined(EVAL_NNUE) +#if defined(EVAL_LEARN) #include "../../learn/learn.h" #include "../layers/sum.h" @@ -185,6 +185,6 @@ class Trainer> { } // namespace Eval -#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE) +#endif // defined(EVAL_LEARN) #endif diff --git a/src/uci.cpp b/src/uci.cpp index d6745d19..5be2afbb 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -32,7 +32,7 @@ #include "uci.h" #include "syzygy/tbprobe.h" -#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD) +#if defined(ENABLE_TEST_CMD) #include "nnue/nnue_test_command.h" #endif @@ -67,7 +67,7 @@ namespace Learner } #endif -#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD) +#if defined(ENABLE_TEST_CMD) void test_cmd(Position& pos, istringstream& is) { // Initialize as it may be searched. @@ -373,7 +373,7 @@ void UCI::loop(int argc, char* argv[]) { #endif -#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD) +#if defined(ENABLE_TEST_CMD) // test command else if (token == "test") test_cmd(pos, is); #endif diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 0007b559..4f9fab5e 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -83,7 +83,6 @@ void init(OptionsMap& o) { // The default must follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); -#ifdef EVAL_NNUE // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function. // I want to hit the test eval convert command, but there is no new evaluation function // It ends abnormally before executing this command. @@ -92,7 +91,6 @@ void init(OptionsMap& o) { o["SkipLoadingEval"] << Option(false); // how many moves to use a fixed move // o["BookMoves"] << Option(16, 0, 10000); -#endif #if defined(EVAL_LEARN) // When learning the evaluation function, you can change the folder to save the evaluation function. // Evalsave by default. This folder shall be prepared in advance. From e6a6ba52213290d0996913ec6367a8364c5199ec Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 22:49:55 +0900 Subject: [PATCH 08/30] Removed USE_BOOK macro. --- src/learn/gensfen.cpp | 40 ---------------------------------------- src/learn/learner.cpp | 5 ----- 2 files changed, 45 deletions(-) diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 9ae83174..589d9559 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -11,10 +11,6 @@ #include "learn.h" #include "multi_think.h" -#if defined(USE_BOOK) -#include "../extra/book/book.h" -#endif - #include #include #include @@ -750,11 +746,6 @@ namespace Learner auto& pos = th->rootPos; pos.set(StartFEN, false, &si, th); -#if defined(USE_BOOK) - // Refer to the members of BookMoveSelector defined in the search section. - auto& book = ::book; -#endif - // Vector for holding the sfens in the current simulated game. PSVector a_psv; a_psv.reserve(write_maxply + MAX_PLY); @@ -788,35 +779,7 @@ namespace Learner flush_psv(result.value()); break; } -#if defined(USE_BOOK) - if ((next_move = book.probe(pos)) != MOVE_NONE) - { - // Hit the constant track. - // The move was stored in next_move. - // Do not use the fixed phase for learning. - sfens.clear(); - - if (random_move_minply != -1) - { - // Random move is performed with a certain - // probability even in the constant phase. - goto RANDOM_MOVE; - } - else - { - // When -1 is specified as random_move_minply, - // it points according to the standard until - // it goes out of the standard. - // Prepare an innumerable number of situations - // that have left the constant as - // ConsiderationBookMoveCount true using a huge constant - // Used for purposes such as performing - // a random move 5 times from there. - goto DO_MOVE; - } - } -#endif { auto [search_value, search_pv] = search(pos, depth, 1, nodes); @@ -1124,9 +1087,6 @@ namespace Learner << " loop_max = " << loop_max << endl << " eval_limit = " << eval_limit << endl << " thread_num (set by USI setoption) = " << thread_num << endl -#if defined(USE_BOOK) - << " book_moves (set by USI setoption) = " << Options["BookMoves"] << endl -#endif << " random_move_minply = " << random_move_minply << endl << " random_move_maxply = " << random_move_maxply << endl << " random_move_count = " << random_move_count << endl diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 358848ec..e7f021fe 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -54,11 +54,6 @@ using namespace std; -#if defined(USE_BOOK) -// This is defined in the search section. -extern Book::BookMoveSelector book; -#endif - template T operator +=(std::atomic& x, const T rhs) { From 21cfead52c2a77abc4e9eed21739ccc3df9826c0 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 22:52:46 +0900 Subject: [PATCH 09/30] Removed unused OMP_ macro. --- src/learn/convert.cpp | 4 ---- src/learn/gensfen.cpp | 4 ---- src/learn/learning_tools.cpp | 3 --- 3 files changed, 11 deletions(-) diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp index 9bd9548d..d07fc00c 100644 --- a/src/learn/convert.cpp +++ b/src/learn/convert.cpp @@ -27,10 +27,6 @@ #include #include -#if defined (_OPENMP) -#include -#endif - using namespace std; namespace Learner diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 589d9559..65e64177 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -28,10 +28,6 @@ #include #include -#if defined (_OPENMP) -#include -#endif - using namespace std; namespace Learner diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp index de6da9c5..eca11c47 100644 --- a/src/learn/learning_tools.cpp +++ b/src/learn/learning_tools.cpp @@ -2,9 +2,6 @@ #if defined (EVAL_LEARN) -#if defined(_OPENMP) -#include -#endif #include "../misc.h" using namespace Eval; From 1d00d002412e11505430a9da32297b81e11b6801 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 22:59:57 +0900 Subject: [PATCH 10/30] Removed ENABLE_TEST_CMD macro. --- src/Makefile | 6 +++--- src/nnue/nnue_test_command.cpp | 4 ---- src/nnue/nnue_test_command.h | 4 ---- src/uci.cpp | 11 ++--------- 4 files changed, 5 insertions(+), 20 deletions(-) diff --git a/src/Makefile b/src/Makefile index a07e1251..49c6c1b3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -903,7 +903,7 @@ icc-profile-use: learn: config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + EXTRACXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp ' \ all @@ -911,7 +911,7 @@ profile-learn: net config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \ - LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + LEARNCXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp ' @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." @@ -920,7 +920,7 @@ profile-learn: net config-sanity objclean profileclean @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \ - LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ + LEARNCXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \ LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp ' @echo "" @echo "Step 4/4. Deleting profile data ..." diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp index b8346693..c3a53c7d 100644 --- a/src/nnue/nnue_test_command.cpp +++ b/src/nnue/nnue_test_command.cpp @@ -1,7 +1,5 @@ // USI extended command for NNUE evaluation function -#if defined(ENABLE_TEST_CMD) - #include "../thread.h" #include "../uci.h" #include "evaluate_nnue.h" @@ -197,5 +195,3 @@ void TestCommand(Position& pos, std::istream& stream) { } // namespace NNUE } // namespace Eval - -#endif // defined(ENABLE_TEST_CMD) diff --git a/src/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h index 30854fd2..75d33e82 100644 --- a/src/nnue/nnue_test_command.h +++ b/src/nnue/nnue_test_command.h @@ -3,8 +3,6 @@ #ifndef _NNUE_TEST_COMMAND_H_ #define _NNUE_TEST_COMMAND_H_ -#if defined(ENABLE_TEST_CMD) - namespace Eval { namespace NNUE { @@ -16,6 +14,4 @@ void TestCommand(Position& pos, std::istream& stream); } // namespace Eval -#endif // defined(ENABLE_TEST_CMD) - #endif diff --git a/src/uci.cpp b/src/uci.cpp index 5be2afbb..1454e4e0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -24,17 +24,14 @@ #include "evaluate.h" #include "movegen.h" +#include "nnue/nnue_test_command.h" #include "position.h" #include "search.h" +#include "syzygy/tbprobe.h" #include "thread.h" #include "timeman.h" #include "tt.h" #include "uci.h" -#include "syzygy/tbprobe.h" - -#if defined(ENABLE_TEST_CMD) -#include "nnue/nnue_test_command.h" -#endif using namespace std; @@ -67,7 +64,6 @@ namespace Learner } #endif -#if defined(ENABLE_TEST_CMD) void test_cmd(Position& pos, istringstream& is) { // Initialize as it may be searched. @@ -78,7 +74,6 @@ void test_cmd(Position& pos, istringstream& is) if (param == "nnue") Eval::NNUE::TestCommand(pos, is); } -#endif namespace { @@ -373,10 +368,8 @@ void UCI::loop(int argc, char* argv[]) { #endif -#if defined(ENABLE_TEST_CMD) // test command else if (token == "test") test_cmd(pos, is); -#endif else sync_cout << "Unknown command: " << cmd << sync_endl; From 458771a18199d4f64f4190521bea4aa91460c462 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:02:31 +0900 Subject: [PATCH 11/30] Removed GENSFEN2019 macro. --- src/uci.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/uci.cpp b/src/uci.cpp index 1454e4e0..6675f2e0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -50,11 +50,6 @@ namespace Learner // Learning from the generated game record void learn(Position& pos, istringstream& is); -#if defined(GENSFEN2019) - // Automatic generation command of teacher phase under development - void gen_sfen2019(Position& pos, istringstream& is); -#endif - // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch(). typedef std::pair > ValueAndPV; @@ -358,10 +353,6 @@ void UCI::loop(int argc, char* argv[]) { else if (token == "gensfen") Learner::gen_sfen(pos, is); else if (token == "learn") Learner::learn(pos, is); -#if defined (GENSFEN2019) - // Command to generate teacher phase under development - else if (token == "gensfen2019") Learner::gen_sfen2019(pos, is); -#endif // Command to call qsearch(),search() directly for testing else if (token == "qsearch") qsearch_cmd(pos); else if (token == "search") search_cmd(pos, is); From 04a9a951b8611d6f176d49c9edd24d22ec5ba457 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:08:39 +0900 Subject: [PATCH 12/30] Removed "#if 0" and "#if 1". --- src/learn/gensfen.cpp | 24 +++------------------ src/learn/learner.cpp | 50 ------------------------------------------- 2 files changed, 3 insertions(+), 71 deletions(-) diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 65e64177..ec3de570 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -166,7 +166,7 @@ namespace Learner output_file_stream.write(reinterpret_cast(buf->data()), sizeof(PackedSfenValue) * buf->size()); sfen_write_count += buf->size(); -#if 1 + // Add the processed number here, and if it exceeds save_every, // change the file name and reset this counter. sfen_write_count_current_file += buf->size(); @@ -186,7 +186,7 @@ namespace Learner output_file_stream.open(new_filename, ios::out | ios::binary | ios::app); cout << endl << "output sfen file = " << new_filename << endl; } -#endif + // Output '.' every time when writing a game record. std::cout << "."; @@ -519,10 +519,6 @@ namespace Learner { // Write out one sfen. sfen_writer.write(thread_id, *it); -#if 0 - pos.set_from_packed_sfen(it->sfen); - cout << pos << "Win : " << it->is_win << " , " << it->score << endl; -#endif } return quit; @@ -667,13 +663,12 @@ namespace Learner for (auto m : pv) { -#if 1 // There should be no illegal move. This is as a debugging precaution. if (!pos.pseudo_legal(m) || !pos.legal(m)) { cout << "Error! : " << pos.fen() << m << endl; } -#endif + pos.do_move(m, states[ply++]); // Because the difference calculation of evaluate() cannot be @@ -803,19 +798,6 @@ namespace Learner // Save the move score for adjudication. move_hist_scores.push_back(search_value); -#if 0 - dbg_hit_on(search_value == leaf_value); - // gensfen depth 3 eval_limit 32000 - // Total 217749 Hits 203579 hit rate (%) 93.490 - // gensfen depth 6 eval_limit 32000 - // Total 78407 Hits 69190 hit rate (%) 88.245 - // gensfen depth 6 eval_limit 3000 - // Total 53879 Hits 43713 hit rate (%) 81.132 - - // Problems such as pruning with moves in the substitution table. - // This is a little uncomfortable as a teacher... -#endif - // If depth 0, pv is not obtained, so search again at depth 2. if (search_depth_min <= 0) { diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index e7f021fe..2f1d27b2 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -1163,11 +1163,6 @@ namespace Learner if (ps.gamePly < prng.rand(reduction_gameply)) goto RETRY_READ; -#if 0 - auto sfen = pos.sfen_unpack(ps.data); - pos.set(sfen); -#endif - // ↑ Since it is slow when passing through sfen, I made a dedicated function. StateInfo si; const bool mirror = prng.rand(100) < mirror_percentage; if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0) @@ -1207,28 +1202,6 @@ namespace Learner // If it is the result of searching a completely different place, it may become noise. // It may be better not to study where the difference in evaluation values ​​is too large. -#if 0 - // If you do this, about 13% of the phases will be excluded - // from the learning target. Good and bad are subtle. - if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move) - { - //dbg_hit_on(false); - continue; - } -#endif - -#if 0 - // It may be better not to study where the difference in evaluation values ​​is too large. - // → It's okay because it passes the win rate function... - // About 30% of the phases are out of the scope of learning... - if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4) - { - //dbg_hit_on(false); - continue; - } - //dbg_hit_on(true); -#endif - int ply = 0; // A helper function that adds the gradient to the current phase. @@ -1315,17 +1288,6 @@ namespace Learner // rewind the phase for (auto it = pv.rbegin(); it != pv.rend(); ++it) pos.undo_move(*it); - -#if 0 - // When adding the gradient to the root phase - shallow_value = - (rootColor == pos.side_to_move()) - ? Eval::evaluate(pos) - : -Eval::evaluate(pos); - - dj_dw = calc_grad(deep_value, shallow_value, ps); - Eval::add_grad(pos, rootColor, dj_dw, without_kpp); -#endif } } @@ -2058,18 +2020,6 @@ namespace Learner learn_think.best_nn_directory = std::string(Options["EvalDir"]); } -#if 0 - // A test to give a gradient of 1.0 to the initial stage of Hirate. - pos.set_hirate(); - cout << Eval::evaluate(pos) << endl; - //Eval::print_eval_stat(pos); - Eval::add_grad(pos, BLACK, 32.0, false); - Eval::update_weights(1); - pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED; - cout << Eval::evaluate(pos) << endl; - //Eval::print_eval_stat(pos); -#endif - cout << "init done." << endl; // Reflect other option settings. From ec96409176fa8f2cdc2e8a003150fcabf037f85c Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:30:57 +0900 Subject: [PATCH 13/30] Replaced DNDEBUG macro to _DEBUG macro. --- src/learn/gensfen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index ec3de570..0232e5d4 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -70,7 +70,7 @@ namespace Learner file_worker_thread.join(); output_file_stream.close(); -#if !defined(DNDEBUG) +#if defined(_DEBUG) { // All buffers should be empty since file_worker_thread // should have written everything before exiting. From aa2452caf39446fded3c0ee79c18c3ecb43369b3 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:45:19 +0900 Subject: [PATCH 14/30] Removed #if for USE_EVAL_HASH. --- src/eval/evaluate_common.h | 6 ------ src/learn/gensfen.cpp | 10 ---------- src/learn/learner.cpp | 2 -- 3 files changed, 18 deletions(-) diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h index 3fb161ab..927783cd 100644 --- a/src/eval/evaluate_common.h +++ b/src/eval/evaluate_common.h @@ -18,12 +18,6 @@ namespace Eval { - -#if defined(USE_EVAL_HASH) - // prefetch function - void prefetch_evalhash(const Key key); -#endif - // An operator that applies the function f to each parameter of the evaluation function. // Used for parameter analysis etc. // type indicates the survey target. diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 0232e5d4..4050d983 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -956,12 +956,6 @@ namespace Learner string token; - // When hit to eval hash, as a evaluation value near the initial stage, if a hash collision occurs and a large value is written - // When eval_limit is set small, eval_limit will be exceeded every time in the initial phase, and phase generation will not proceed. - // Therefore, eval hash needs to be disabled. - // After that, when the hash of the eval hash collides, the evaluation value of a strange value is used, and it may be unpleasant to use it for the teacher. - bool use_eval_hash = false; - // Save to file in this unit. // File names are serialized like file_1.bin, file_2.bin. uint64_t save_every = UINT64_MAX; @@ -1010,8 +1004,6 @@ namespace Learner is >> write_minply; else if (token == "write_maxply") is >> write_maxply; - else if (token == "use_eval_hash") - is >> use_eval_hash; else if (token == "save_every") is >> save_every; else if (token == "random_file_name") @@ -1033,7 +1025,6 @@ namespace Learner #if defined(USE_GLOBAL_OPTIONS) // Save it for later restore. auto oldGlobalOptions = GlobalOptions; - GlobalOptions.use_eval_hash = use_eval_hash; #endif // If search depth2 is not set, leave it the same as search depth. @@ -1075,7 +1066,6 @@ namespace Learner << " write_minply = " << write_minply << endl << " write_maxply = " << write_maxply << endl << " output_file_name = " << output_file_name << endl - << " use_eval_hash = " << use_eval_hash << endl << " save_every = " << save_every << endl << " random_file_name = " << random_file_name << endl << " write_out_draw_game_in_training_data_generation = " << write_out_draw_game_in_training_data_generation << endl diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 2f1d27b2..9e6f10cb 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -1639,8 +1639,6 @@ namespace Learner #if defined(USE_GLOBAL_OPTIONS) // Save it for later restore. auto oldGlobalOptions = GlobalOptions; - // If you hit the eval hash, you can not calculate rmse etc. so turn it off. - GlobalOptions.use_eval_hash = false; // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off. GlobalOptions.use_hash_probe = false; #endif From 82dc68ba9ffe1d5fe849eef1f0fcc565ef810512 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:47:04 +0900 Subject: [PATCH 15/30] Removed #if for USE_GLOBAL_OPTIONS. --- src/learn/gensfen.cpp | 11 ----------- src/learn/learner.cpp | 12 ------------ src/search.cpp | 11 ----------- 3 files changed, 34 deletions(-) diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index 4050d983..3d015acf 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -1022,11 +1022,6 @@ namespace Learner cout << "Error! : Illegal token " << token << endl; } -#if defined(USE_GLOBAL_OPTIONS) - // Save it for later restore. - auto oldGlobalOptions = GlobalOptions; -#endif - // If search depth2 is not set, leave it the same as search depth. if (search_depth_max == INT_MIN) search_depth_max = search_depth_min; @@ -1103,12 +1098,6 @@ namespace Learner } std::cout << "gensfen finished." << endl; - -#if defined(USE_GLOBAL_OPTIONS) - // Restore Global Options. - GlobalOptions = oldGlobalOptions; -#endif - } } #endif diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 9e6f10cb..daea9594 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -1636,13 +1636,6 @@ namespace Learner uint64_t eta1_epoch = 0; // eta2 is not applied by default uint64_t eta2_epoch = 0; // eta3 is not applied by default -#if defined(USE_GLOBAL_OPTIONS) - // Save it for later restore. - auto oldGlobalOptions = GlobalOptions; - // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off. - GlobalOptions.use_hash_probe = false; -#endif - // --- Function that only shuffles the teacher aspect // normal shuffle @@ -2072,11 +2065,6 @@ namespace Learner // Save once at the end. learn_think.save(true); - -#if defined(USE_GLOBAL_OPTIONS) - // Restore Global Options. - GlobalOptions = oldGlobalOptions; -#endif } } // namespace Learner diff --git a/src/search.cpp b/src/search.cpp index 8f258ae4..67348a2b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -2070,17 +2070,6 @@ namespace Learner rootMoves.push_back(Search::RootMove(m)); assert(!rootMoves.empty()); - - //#if defined(USE_GLOBAL_OPTIONS) - // Since the generation of the substitution table for each search thread should be managed, - // Increase the generation of the substitution table for this thread because it is a new search. - //TT.new_search(th->thread_id()); - - // ª If you call new_search here, it may be a loss because you can't use the previous search result. - // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ... - - // ¨Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers. - //#endif } } From 05d26499b42878447a21b6d721f4868151357665 Mon Sep 17 00:00:00 2001 From: nodchip Date: Tue, 8 Sep 2020 23:57:51 +0900 Subject: [PATCH 16/30] Removed LEARN_ELMO_METHOD macro. --- src/learn/learn.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 1bc39cf9..7285f61a 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -5,18 +5,6 @@ #include -// ===================== -// Settings for learning -// ===================== - -// If you select one of the following, the details after that will be automatically selected. -// If you don't select any of them, you need to set the subsequent details one by one. - -// Learning setting by elmo method. This is the default setting. -// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command. -#define LEARN_ELMO_METHOD - - // ---------------------- // update formula // ---------------------- @@ -147,10 +135,8 @@ typedef float LearnFloatType; // Learning with the method of elmo (WCSC27) // ---------------------- -#if defined( LEARN_ELMO_METHOD ) #define LOSS_FUNCTION_IS_ELMO_METHOD #define ADA_GRAD_UPDATE -#endif // Character string according to update formula. (Output for debugging.) // Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory. From 0271d707759117af6557beb93319aa51c07280aa Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:01:53 +0900 Subject: [PATCH 17/30] Removed ADA_GRAD_UPDATE macro. --- src/learn/learn.h | 10 +------ src/learn/learning_tools.h | 54 +------------------------------------- 2 files changed, 2 insertions(+), 62 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 7285f61a..8fb6217f 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -9,9 +9,6 @@ // update formula // ---------------------- -// Ada Grad. Recommended because it is stable. -// #define ADA_GRAD_UPDATE - // SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is... // #define SGD_UPDATE @@ -136,13 +133,8 @@ typedef float LearnFloatType; // ---------------------- #define LOSS_FUNCTION_IS_ELMO_METHOD -#define ADA_GRAD_UPDATE -// Character string according to update formula. (Output for debugging.) -// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory. -#if defined(ADA_GRAD_UPDATE) -#define LEARN_UPDATE "AdaGrad" -#elif defined(SGD_UPDATE) +#if defined(SGD_UPDATE) #define LEARN_UPDATE "SGD" #endif diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h index 3c4be08a..854133e4 100644 --- a/src/learn/learning_tools.h +++ b/src/learn/learning_tools.h @@ -76,59 +76,7 @@ namespace EvalLearningTools template void updateFV(T& v) { updateFV(v, 1.0); } -#if defined (ADA_GRAD_UPDATE) - - // Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1 - // Keep the small value as a marker. - const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128); - - // What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory, - // Since it is doubtful in accuracy and the visibility is bad, it was abolished. - LearnFloatType v0 = LearnFloatType(V0_NOT_INIT); - - // AdaGrad g2 - LearnFloatType g2 = LearnFloatType(0); - - // update with AdaGrad - // When executing this function, the value of g and the member do not change - // Guaranteed by the caller. It does not have to be an atomic operation. - // k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc. - template - void updateFV(T& v,double k) - { - // AdaGrad update formula - // Gradient vector is g, vector to be updated is v, η(eta) is a constant, - // g2 = g2 + g^2 - // v = v - ηg/sqrt(g2) - - constexpr double epsilon = 0.000001; - - if (g == LearnFloatType(0)) - return; - - g2 += g * g; - - // If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array, - // In this case, read the value of v from the one passed in the argument. - double V = (v0 == V0_NOT_INIT) ? v : v0; - - V -= k * eta * (double)g / sqrt((double)g2 + epsilon); - - // Limit the value of V to be within the range of types. - // By the way, windows.h defines the min and max macros, so to avoid it, - // Here, it is enclosed in parentheses so that it is not treated as a function-like macro. - V = (std::min)((double)(std::numeric_limits::max)() , V); - V = (std::max)((double)(std::numeric_limits::min)() , V); - - v0 = (LearnFloatType)V; - v = (T)round(V); - - // Clear g because one update of mini-batch for this element is over - // g[i] = 0; - // → There is a problem of dimension reduction, so this will be done by the caller. - } - -#elif defined(SGD_UPDATE) +#if defined(SGD_UPDATE) // See only the sign of the gradient Update with SGD // When executing this function, the value of g and the member do not change From f3a158725d573753cf4b81fc5866c0f3bbdb1e88 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:07:09 +0900 Subject: [PATCH 18/30] Removed SGD_UPDATE macro. --- src/learn/learn.h | 12 --------- src/learn/learning_tools.h | 51 ++------------------------------------ 2 files changed, 2 insertions(+), 61 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 8fb6217f..91b40213 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -5,14 +5,6 @@ #include -// ---------------------- -// update formula -// ---------------------- - -// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is... -// #define SGD_UPDATE - - // ---------------------- // Select the objective function // ---------------------- @@ -134,10 +126,6 @@ typedef float LearnFloatType; #define LOSS_FUNCTION_IS_ELMO_METHOD -#if defined(SGD_UPDATE) -#define LEARN_UPDATE "SGD" -#endif - #if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE) #define LOSS_FUNCTION "WINNING_PERCENTAGE" #elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h index 854133e4..348105b6 100644 --- a/src/learn/learning_tools.h +++ b/src/learn/learning_tools.h @@ -4,13 +4,12 @@ // A set of machine learning tools related to the weight array used for machine learning of evaluation functions #include "learn.h" + #if defined (EVAL_LEARN) -#include -#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE) #include "../misc.h" // PRNG , my_insertion_sort -#endif +#include #include // std::sqrt() namespace EvalLearningTools @@ -29,14 +28,6 @@ namespace EvalLearningTools // cumulative value of one mini-batch gradient LearnFloatType g = LearnFloatType(0); - // When ADA_GRAD_UPDATE. LearnFloatType == float, - // total 4*2 + 4*2 + 1*2 = 18 bytes - // It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB. - // However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so - // Specify pragma pack(2). - - // For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes. - // Learning rate η(eta) such as AdaGrad. // It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called. // The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch. @@ -76,44 +67,6 @@ namespace EvalLearningTools template void updateFV(T& v) { updateFV(v, 1.0); } -#if defined(SGD_UPDATE) - - // See only the sign of the gradient Update with SGD - // When executing this function, the value of g and the member do not change - // Guaranteed by the caller. It does not have to be an atomic operation. - template - void updateFV(T & v , double k) - { - if (g == 0) - return; - - // See only the sign of g and update. - // If g <0, add v a little. - // If g> 0, subtract v slightly. - - // Since we only add integers, no decimal part is required. - - // It's a good idea to move around 0-5. - // It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1), - // Pop_count() it. At this time, it has a binomial distribution. - //int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31)); - // → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good. - int16_t diff = 1; - - double V = v; - if (g > 0.0) - V-= diff; - else - V+= diff; - - V = (std::min)((double)(std::numeric_limits::max)(), V); - V = (std::max)((double)(std::numeric_limits::min)(), V); - - v = (T)V; - } - -#endif - // grad setting template void set_grad(const T& g_) { g = g_; } From d37eb63581ce2de8fd1a8406a9bc06b6377d2176 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:08:56 +0900 Subject: [PATCH 19/30] Removed LOSS_FUNCTION_IS_WINNING_PERCENTAGE macro. --- src/learn/learn.h | 9 +-------- src/learn/learner.cpp | 36 ------------------------------------ 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 91b40213..9d783986 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -9,11 +9,6 @@ // Select the objective function // ---------------------- -// The objective function is the sum of squares of the difference in winning percentage -// See learner.cpp for more information. - -//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE - // Objective function is cross entropy // See learner.cpp for more information. // So-called ordinary "rag cloth squeezer" @@ -126,9 +121,7 @@ typedef float LearnFloatType; #define LOSS_FUNCTION_IS_ELMO_METHOD -#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE) -#define LOSS_FUNCTION "WINNING_PERCENTAGE" -#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) +#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) #define LOSS_FUNCTION "CROSS_ENTOROPY" #elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) #define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE" diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index daea9594..e9658da6 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -163,42 +163,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } - // When the objective function is the sum of squares of the difference in winning percentage -#if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE) -// function to calculate the gradient - double calc_grad(Value deep, Value shallow, PackedSfenValue& psv) - { - // The square of the win rate difference minimizes it in the objective function. - // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2 - // However, σ is a sigmoid function that converts the - // evaluation value into the difference in the winning percentage. - // m is the number of samples. shallow is the evaluation value - // for a shallow search (qsearch()). deep is the evaluation value for deep search. - // If W is the feature vector (parameter of the evaluation function) - // and Xi and Yi are teachers - // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X - // f(Xi) = win_rate(W*Xi) - // If σ(i th deep) = Yi, - // J = m/2 Σ (f(Xi)-Yi )^2 - // becomes a common expression. - // W is a vector, and if we write the jth element as Wj, from the chain rule - // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj - // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1 - - // 1/m will be multiplied later, but the contents of Σ can - // be retained in the array as the value of the gradient. - // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600 - // This /600 at the end is adjusted by the learning rate, so do not write it.. - // Also, the coefficient of 1/m is unnecessary if you use the update - // formula that has the automatic gradient adjustment function like Adam and AdaGrad. - // Therefore, it is not necessary to save it in memory. - - const double p = winning_percentage(deep, psv.gamePly); - const double q = winning_percentage(shallow, psv.gamePly); - return (q - p) * Math::dsigmoid(double(shallow) / 600.0); - } -#endif - #if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY) double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) { From f52fbf8006174023fa137feda1d7db67a884ac2e Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:10:04 +0900 Subject: [PATCH 20/30] Removed LOSS_FUNCTION_IS_CROSS_ENTOROPY macro. --- src/learn/learn.h | 9 +-------- src/learn/learner.cpp | 29 ----------------------------- 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 9d783986..da542d67 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -9,11 +9,6 @@ // Select the objective function // ---------------------- -// Objective function is cross entropy -// See learner.cpp for more information. -// So-called ordinary "rag cloth squeezer" -//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY - // A version in which the objective function is cross entropy, but the win rate function is not passed // #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE @@ -121,9 +116,7 @@ typedef float LearnFloatType; #define LOSS_FUNCTION_IS_ELMO_METHOD -#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) -#define LOSS_FUNCTION "CROSS_ENTOROPY" -#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) +#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) #define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE" #elif defined(LOSS_FUNCTION_IS_ELMO_METHOD) #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index e9658da6..66835ce5 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -163,35 +163,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } -#if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY) - double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) - { - // Objective function with cross entropy - - // For the concept and nature of cross entropy, - // http://nnadl-ja.github.io/nnadl_site_ja/chap3.html#the_cross-entropy_cost_function - // http://postd.cc/visual-information-theory-3/ - // Refer to etc. - - // Objective function design) - // We want to make the distribution of p closer to the distribution of q - // → Think of it as the problem of minimizing the cross entropy - // between the probability distributions of p and q. - // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q) - // x - - // p is a constant and q is a Wi function (q = σ(W・Xi) ). - // ∂J/∂Wi = -p・q'/q-(1-p)(1-q)'/(1-q) - // = ... - // = q-p. - - const double p = winning_percentage(deep, psv.gamePly); - const double q = winning_percentage(shallow, psv.gamePly); - - return q - p; - } -#endif - #if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE ) double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) { From ef1601218db703b42e31b34d8c324f0ec3001f83 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:11:11 +0900 Subject: [PATCH 21/30] Removed LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE macro. --- src/learn/learn.h | 7 +------ src/learn/learner.cpp | 11 ----------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index da542d67..d2477277 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -9,9 +9,6 @@ // Select the objective function // ---------------------- -// A version in which the objective function is cross entropy, but the win rate function is not passed -// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE - // elmo (WCSC27) method // #define LOSS_FUNCTION_IS_ELMO_METHOD @@ -116,9 +113,7 @@ typedef float LearnFloatType; #define LOSS_FUNCTION_IS_ELMO_METHOD -#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) -#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE" -#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD) +#if defined(LOSS_FUNCTION_IS_ELMO_METHOD) #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" #endif diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 66835ce5..82bcfa09 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -163,17 +163,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } -#if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE ) - double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) - { - // Version that does not pass the winning percentage function - // This, unless EVAL_LIMIT is set low, trying to - // match the evaluation value with the shape of the end stage - // eval may exceed the range of eval. - return shallow - deep; - } -#endif - #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) // A constant used in elmo (WCSC27). Adjustment required. From dbad9d96e0fc2923edfdbef37162ecd5b0645d50 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:17:02 +0900 Subject: [PATCH 22/30] Removed LOSS_FUNCTION_IS_ELMO_METHOD macro. --- src/learn/learn.h | 19 --------------- src/learn/learner.cpp | 54 +------------------------------------------ 2 files changed, 1 insertion(+), 72 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index d2477277..2ee2f8d6 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -5,21 +5,6 @@ #include -// ---------------------- -// Select the objective function -// ---------------------- - -// elmo (WCSC27) method -// #define LOSS_FUNCTION_IS_ELMO_METHOD - -// ※ Other things may be added. - - -// ---------------------- -// debug settings for learning -// ---------------------- - - // ---------------------- // learning from zero vector // ---------------------- @@ -111,11 +96,7 @@ typedef float LearnFloatType; // Learning with the method of elmo (WCSC27) // ---------------------- -#define LOSS_FUNCTION_IS_ELMO_METHOD - -#if defined(LOSS_FUNCTION_IS_ELMO_METHOD) #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" -#endif // ---------------------- // Definition of struct used in Learner diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 82bcfa09..84cade5c 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -163,8 +163,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } -#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) - // A constant used in elmo (WCSC27). Adjustment required. // Since elmo does not internally divide the expression, the value is different. // You can set this value with the learn command. @@ -293,7 +291,6 @@ namespace Learner (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); } -#endif // Other objective functions may be considered in the future... double calc_grad(Value shallow, const PackedSfenValue& psv) { @@ -629,14 +626,12 @@ namespace Learner stop_flag(false), save_only_once(false) { -#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) learn_sum_cross_entropy_eval = 0.0; learn_sum_cross_entropy_win = 0.0; learn_sum_cross_entropy = 0.0; learn_sum_entropy_eval = 0.0; learn_sum_entropy_win = 0.0; learn_sum_entropy = 0.0; -#endif newbob_scale = 1.0; newbob_decay = 1.0; @@ -689,15 +684,13 @@ namespace Learner // --- loss calculation -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - // For calculation of learning data loss + // For calculation of learning data loss atomic learn_sum_cross_entropy_eval; atomic learn_sum_cross_entropy_win; atomic learn_sum_cross_entropy; atomic learn_sum_entropy_eval; atomic learn_sum_entropy_win; atomic learn_sum_entropy; -#endif shared_timed_mutex nn_mutex; double newbob_scale; @@ -759,13 +752,6 @@ namespace Learner std::cout << ", iteration " << epoch; std::cout << ", eta = " << Eval::get_eta() << ", "; -#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - double sum_error = 0; - double sum_error2 = 0; - double sum_error3 = 0; -#endif - -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // For calculation of verification data loss atomic test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy; atomic test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy; @@ -779,7 +765,6 @@ namespace Learner // norm for learning atomic sum_norm; sum_norm = 0; -#endif // The number of times the pv first move of deep // search matches the pv first move of search(1). @@ -841,25 +826,11 @@ namespace Learner // Note) This code does not consider when // eval_limit is specified in the learn command. - // --- error calculation - -#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - auto grad = calc_grad(deep_value, shallow_value, ps); - - // something like rmse - sum_error += grad * grad; - // Add the absolute value of the gradient - sum_error2 += abs(grad); - // Add the absolute value of the difference between the evaluation values - sum_error3 += abs(shallow_value - deep_value); -#endif - // --- calculation of cross entropy // For the time being, regarding the win rate and loss terms only in the elmo method // Calculate and display the cross entropy. -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; double test_entropy_eval, test_entropy_win, test_entropy; calc_cross_entropy( @@ -881,7 +852,6 @@ namespace Learner test_sum_entropy_win += test_entropy_win; test_sum_entropy += test_entropy; sum_norm += (double)abs(shallow_value); -#endif // Determine if the teacher's move and the score of the shallow search match { @@ -905,17 +875,6 @@ namespace Learner while (task_count) sleep(1); -#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - // rmse = root mean square error: mean square error - // mae = mean absolute error: mean absolute error - auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon)); - auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon); - auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon); - cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae - << " , eval mae = " << eval_mae; -#endif - -#if defined(LOSS_FUNCTION_IS_ELMO_METHOD) latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; latest_loss_count += sr.sfen_for_mse.size(); @@ -960,9 +919,6 @@ namespace Learner learn_sum_entropy_eval = 0.0; learn_sum_entropy_win = 0.0; learn_sum_entropy = 0.0; -#else - << endl; -#endif } void LearnerThink::thread_worker(size_t thread_id) @@ -1144,7 +1100,6 @@ namespace Learner ? Eval::evaluate(pos) : -Eval::evaluate(pos); -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // Calculate loss for training data double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy; double learn_entropy_eval, learn_entropy_win, learn_entropy; @@ -1165,7 +1120,6 @@ namespace Learner learn_sum_entropy_eval += learn_entropy_eval; learn_sum_entropy_win += learn_entropy_win; learn_sum_entropy += learn_entropy; -#endif const double example_weight = (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0; @@ -1600,12 +1554,10 @@ namespace Learner // Turn on if you want to pass a pre-shuffled file. bool no_shuffle = false; -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // elmo lambda ELMO_LAMBDA = 0.33; ELMO_LAMBDA2 = 0.33; ELMO_LAMBDA_LIMIT = 32000; -#endif // Discount rate. If this is set to a value other than 0, // the slope will be added even at other than the PV termination. @@ -1703,13 +1655,11 @@ namespace Learner else if (option == "freeze_kkpp") is >> freeze[3]; #endif -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) // LAMBDA else if (option == "lambda") is >> ELMO_LAMBDA; else if (option == "lambda2") is >> ELMO_LAMBDA2; else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT; -#endif else if (option == "reduction_gameply") is >> reduction_gameply; // shuffle related @@ -1900,11 +1850,9 @@ namespace Learner reduction_gameply = max(reduction_gameply, 1); cout << "reduction_gameply : " << reduction_gameply << endl; -#if defined (LOSS_FUNCTION_IS_ELMO_METHOD) cout << "LAMBDA : " << ELMO_LAMBDA << endl; cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; -#endif cout << "mirror_percentage : " << mirror_percentage << endl; cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; From f52165e1d3b8bebdd702e089eb9fdd7761d45076 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:19:14 +0900 Subject: [PATCH 23/30] Removed RESET_TO_ZERO_VECTOR macro. --- src/learn/learn.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 2ee2f8d6..6056e8c6 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -5,18 +5,6 @@ #include -// ---------------------- -// learning from zero vector -// ---------------------- - -// Start learning the evaluation function parameters from the zero vector. -// Initialize to zero, generate a game, learn from zero vector, -// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe) -// (very time consuming) - -//#define RESET_TO_ZERO_VECTOR - - // ---------------------- // Floating point for learning // ---------------------- From 5e2570267228653a11bf42c14d77d1baf26b99ac Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:19:53 +0900 Subject: [PATCH 24/30] Removed USE_TRIANGLE_WEIGHT_ARRAY macro. --- src/learn/learn.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 6056e8c6..ea622bce 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -23,15 +23,6 @@ typedef float LearnFloatType; //#include "half_float.h" //typedef HalfFloat::float16 LearnFloatType; -// ---------------------- -// save memory -// ---------------------- - -// Use a triangular array for the Weight array (of which is KPP) to save memory. -// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file. - -#define USE_TRIANGLE_WEIGHT_ARRAY - // ---------------------- // dimension down // ---------------------- From eafa5693658a91e97612a04b2c620ec5a545e3a0 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:22:02 +0900 Subject: [PATCH 25/30] Removed macros for KPP factorization. --- src/learn/learn.h | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index ea622bce..0df71c7a 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -23,37 +23,6 @@ typedef float LearnFloatType; //#include "half_float.h" //typedef HalfFloat::float16 LearnFloatType; -// ---------------------- -// dimension down -// ---------------------- - -// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry). -// All on by default. - -// Dimension reduction using mirror and inverse for KK. (Unclear effect) -// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on. -#define USE_KK_MIRROR_WRITE -#define USE_KK_INVERSE_WRITE - -// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective) -// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on. -#define USE_KKP_MIRROR_WRITE -#define USE_KKP_INVERSE_WRITE - -// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position) -// KPP has no inverse. (Because there is only K on the front side) -#define USE_KPP_MIRROR_WRITE - -// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position) -// KPPP has no inverse. (Because there is only K on the front side) -#define USE_KPPP_MIRROR_WRITE - -// Reduce the dimension by KPP for learning the KKPP component. -// Learning is very slow. -// Do not use as it is not debugged. -//#define USE_KKPP_LOWER_DIM - - // ====================== // Settings for creating teacher phases // ====================== From 8d763fb503fed49e4b7fa2be115e0fa6eb0e74d7 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:22:38 +0900 Subject: [PATCH 26/30] Removed LEARN_GENSFEN_USE_DRAW_RESULT macro. --- src/learn/learn.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 0df71c7a..b7ca18e8 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -23,19 +23,6 @@ typedef float LearnFloatType; //#include "half_float.h" //typedef HalfFloat::float16 LearnFloatType; -// ====================== -// Settings for creating teacher phases -// ====================== - -// ---------------------- -// write out the draw -// ---------------------- - -// When you reach a draw, write it out as a teacher position -// It's subtle whether it's better to do this. -// #define LEARN_GENSFEN_USE_DRAW_RESULT - - // ====================== // configure // ====================== From cea17c92f9ad91d0dd2d73db272e6ce6712ba048 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 08:53:57 +0900 Subject: [PATCH 27/30] Simplified evaluate_common.h. --- src/eval/evaluate_common.h | 59 ++++---------------------------------- 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h index 927783cd..989169b3 100644 --- a/src/eval/evaluate_common.h +++ b/src/eval/evaluate_common.h @@ -1,75 +1,26 @@ #ifndef _EVALUATE_COMMON_H_ #define _EVALUATE_COMMON_H_ +#if defined(EVAL_LEARN) + // A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT). -#include - -// KK file name -#define KK_BIN "KK_synthesized.bin" - -// KKP file name -#define KKP_BIN "KKP_synthesized.bin" - -// KPP file name -#define KPP_BIN "KPP_synthesized.bin" - -#include "../position.h" +#include namespace Eval { - // An operator that applies the function f to each parameter of the evaluation function. - // Used for parameter analysis etc. - // type indicates the survey target. - // type = -1 :KK,KKP,KPP all - // type = 0: KK only - // type = 1: KKP only - // type = 2: KPP only - void foreach_eval_param(std::functionf, int type = -1); - // -------------------------- // for learning // -------------------------- -#if defined(EVAL_LEARN) - // Initialize the gradient array during learning - // Pass the learning rate as an argument. If 0.0, the default value is used. - // The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch. - // After eta2_epoch, gradually change from eta2 to eta3. - void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3); - - // Add the gradient difference value to the gradient array for all features that appear in the current phase. - // freeze[0]: Flag that kk does not learn - // freeze[1]: Flag that kkp does not learn - // freeze[2]: Flag that kpp does not learn - // freeze[3]: Flag that kppp does not learn - void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array& freeze); - - // Do SGD or AdaGrad or something based on the current gradient. - // epoch: Generation counter (starting from 0) - // freeze[0]: Flag that kk does not learn - // freeze[1]: Flag that kkp does not learn - // freeze[2]: Flag that kpp does not learn - // freeze[3]: Flag that kppp does not learn - void update_weights(uint64_t epoch, const std::array& freeze); - // Save the evaluation function parameters to a file. // You can specify the extension added to the end of the file. void save_eval(std::string suffix); // Get the current eta. double get_eta(); - - // --learning related commands - - // A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function. - // By making the values ​​of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning - // The idea of ​​ensuring it is valid. - void regularize_kk(); - -#endif - - } +#endif // defined(EVAL_LEARN) + #endif // _EVALUATE_KPPT_COMMON_H_ From 2583f689729f7644cb5a5ac6d0369c0c726c3141 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 08:58:10 +0900 Subject: [PATCH 28/30] Removed macros for KPP evaluate functions. --- src/eval/evaluate_common.h | 2 +- src/learn/learner.cpp | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h index 989169b3..7799fe79 100644 --- a/src/eval/evaluate_common.h +++ b/src/eval/evaluate_common.h @@ -3,7 +3,7 @@ #if defined(EVAL_LEARN) -// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT). +// A common header-like function for modern evaluation functions. #include diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 84cade5c..5d9b242f 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -1647,14 +1647,6 @@ namespace Learner else if (option == "freeze_kkp") is >> freeze[1]; else if (option == "freeze_kpp") is >> freeze[2]; -#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA) - -#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES) - else if (option == "freeze_kppp") is >> freeze[3]; -#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT) - else if (option == "freeze_kkpp") is >> freeze[3]; -#endif - // LAMBDA else if (option == "lambda") is >> ELMO_LAMBDA; else if (option == "lambda2") is >> ELMO_LAMBDA2; @@ -1858,14 +1850,6 @@ namespace Learner cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; -#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA) - cout << "freeze_kk/kkp/kpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl; -#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES) - cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; -#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT) - cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; -#endif - // ----------------------------------- // various initialization // ----------------------------------- From 18648458117a35acb2617e9fe04192acca6ba2ae Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 09:26:42 +0900 Subject: [PATCH 29/30] Commented out unused parameters. --- src/nnue/features/castling_right.cpp | 6 +++--- src/nnue/features/enpassant.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp index 47fbd986..86fe06fe 100644 --- a/src/nnue/features/castling_right.cpp +++ b/src/nnue/features/castling_right.cpp @@ -26,7 +26,7 @@ namespace Eval { & ((castling_rights >> 2) & 3); } - for (int i = 0; i push_back(i); } @@ -36,7 +36,7 @@ namespace Eval { // Get a list of indices whose values ​​have changed from the previous one in the feature quantity void CastlingRight::AppendChangedIndices( const Position& pos, Color perspective, - IndexList* removed, IndexList* added) { + IndexList* removed, IndexList* /* added */) { int previous_castling_rights = pos.state()->previous->castlingRights; int current_castling_rights = pos.state()->castlingRights; @@ -54,7 +54,7 @@ namespace Eval { & ((current_castling_rights >> 2) & 3); } - for (int i = 0; i < kDimensions; ++i) { + for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) { if ((relative_previous_castling_rights & (i << 1)) && (relative_current_castling_rights & (i << 1)) == 0) { removed->push_back(i); diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp index 77bc936e..386bd907 100644 --- a/src/nnue/features/enpassant.cpp +++ b/src/nnue/features/enpassant.cpp @@ -30,8 +30,8 @@ namespace Eval { // Get a list of indices whose values ??have changed from the previous one in the feature quantity void EnPassant::AppendChangedIndices( - const Position& pos, Color perspective, - IndexList* removed, IndexList* added) { + const Position& /* pos */, Color /* perspective */, + IndexList* /* removed */, IndexList* /* added */) { // Not implemented. assert(false); } From 4206a1edd069600da29b8ee5a99a486b7aa1603f Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 09:46:05 +0900 Subject: [PATCH 30/30] Renamed parameters to avoid shadowing other parameters. --- src/nnue/nnue_test_command.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp index c3a53c7d..5f0776ef 100644 --- a/src/nnue/nnue_test_command.cpp +++ b/src/nnue/nnue_test_command.cpp @@ -34,12 +34,12 @@ void TestFeatures(Position& pos) { std::vector num_resets(kRefreshTriggers.size()); constexpr IndexType kUnknown = -1; std::vector trigger_map(RawFeatures::kDimensions, kUnknown); - auto make_index_sets = [&](const Position& pos) { + auto make_index_sets = [&](const Position& position) { std::vector>> index_sets( kRefreshTriggers.size(), std::vector>(2)); for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], + RawFeatures::AppendActiveIndices(position, kRefreshTriggers[i], active_indices); for (const auto perspective : Colors) { for (const auto index : active_indices[perspective]) { @@ -53,11 +53,11 @@ void TestFeatures(Position& pos) { } return index_sets; }; - auto update_index_sets = [&](const Position& pos, auto* index_sets) { + auto update_index_sets = [&](const Position& position, auto* index_sets) { for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { Features::IndexList removed_indices[2], added_indices[2]; bool reset[2]; - RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], + RawFeatures::AppendChangedIndices(position, kRefreshTriggers[i], removed_indices, added_indices, reset); for (const auto perspective : Colors) { if (reset[perspective]) {