// Learning routines: // // 1) Automatic generation of game records in .bin format // → "gensfen" command // // 2) Learning evaluation function parameters from the generated .bin files // → "learn" command // // → Shuffle in the teacher phase is also an extension of this command. // Example) "learn shuffle" // // 3) Automatic generation of fixed traces // → "makebook think" command // → implemented in extra/book/book.cpp // // 4) Post-station automatic review mode // → I will not be involved in the engine because it is a problem that the GUI should assist. // etc.. #include "learn.h" #include "autograd.h" #include "sfen_reader.h" #include "misc.h" #include "position.h" #include "thread.h" #include "tt.h" #include "uci.h" #include "search.h" #include "timeman.h" #include "nnue/evaluate_nnue.h" #include "nnue/evaluate_nnue_learner.h" #include "syzygy/tbprobe.h" #include #include #include // std::exp(),std::pow(),std::log() #include // memcpy() #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined (_OPENMP) #include #endif using namespace std; template T operator +=(std::atomic& x, const T rhs) { T old = x.load(std::memory_order_consume); // It is allowed that the value is rewritten from other thread at this timing. // The idea that the value is not destroyed is good. T desired = old + rhs; while (!x.compare_exchange_weak(old, desired, std::memory_order_release, std::memory_order_consume)) desired = old + rhs; return desired; } template T operator -= (std::atomic& x, const T rhs) { return x += -rhs; } namespace Learner { static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0); // Score scale factors. ex) If we set src_score_min_value = 0.0, // src_score_max_value = 1.0, dest_score_min_value = 0.0, // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. static double src_score_min_value = 0.0; static double src_score_max_value = 1.0; static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; // A constant used in elmo (WCSC27). Adjustment required. // Since elmo does not internally divide the expression, the value is different. // You can set this value with the learn command. // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27) static double elmo_lambda_low = 1.0; static double elmo_lambda_high = 1.0; static double elmo_lambda_limit = 32000; // Using stockfish's WDL with win rate model instead of sigmoid static bool use_wdl = false; static void append_files_from_dir( std::vector& filenames, const std::string& base_dir, const std::string& target_dir) { string kif_base_dir = Path::combine(base_dir, target_dir); sys::path p(kif_base_dir); // Origin of enumeration std::for_each(sys::directory_iterator(p), sys::directory_iterator(), [&](const sys::path& path) { if (sys::is_regular_file(path)) filenames.push_back(Path::combine(target_dir, path.filename().generic_string())); }); } static void rebase_files( std::vector& filenames, const std::string& base_dir) { for (auto& file : filenames) { file = Path::combine(base_dir, file); } } static double calculate_lambda(double teacher_signal) { // If the evaluation value in deep search exceeds elmo_lambda_limit // then apply elmo_lambda_high instead of elmo_lambda_low. const double lambda = (std::abs(teacher_signal) >= elmo_lambda_limit) ? elmo_lambda_high : elmo_lambda_low; return lambda; } // We use our own simple static autograd for automatic // differentiation of the loss function. While it works it has it's caveats. // To work fast enough it requires memoization and reference semantics. // Memoization is mostly opaque to the user and is only per eval basis. // As for reference semantics, we cannot copy every node, // because we need a way to reuse computation. // But we can't really use shared_ptr because of the overhead. That means // that we have to ensure all parts of a loss expression are not destroyed // before use. When lvalue references are used to construct a node it will // store just a reference, it only perform a copy of the rvalue reference arguments. // This means that we need some storage for the whole computation tree // that keeps the values after function returns and never moves them to // a different memory location. This means that we cannot use local // variables and just return by value - because there may be dangling references left. // We also cannot create a struct with this tree on demand because one cannot // use `auto` as a struct members. This is a big issue, and the only way // to solve it as of now is to use static thread_local variables and rely on the // following assumptions: // 1. the expression node must not change for the duration of the program // within a single instance of a function. This is usually not a problem // because almost all information is carried by the type. There is an // exception though, we have ConstantRef and Constant nodes that // do not encode the constants in the type, so it's possible // that these nodes are different on the first call to the function // then later. We MUST ensure that one function is only ever used // for one specific expression. // 2. thread_local variables are not expensive. Usually after creation // it only requires a single unsynchronized boolean check and that's // how most compilers implement it. // // So the general way to do things right now is to use static thread_local // variables for all named autograd nodes. Results being nodes should be // returned by reference, so that there's no need to copy the returned objects. // Parameters being nodes should be taken by lvalue reference if they are // used more than once (to enable reference semantics to reuse computation), // but they can be rvalues and forward on first use if there's only one use // of the node in the scope. // We must keep in mind that the node tree created by such a function // is never going to change as thread_local variables are initialized // on first call. This means that one cannot use one function as a factory // for different autograd expression trees. template static auto& cross_entropy_( ShallowT& q_, TeacherT& p_, ResultT& t_, LambdaT& lambda_ ) { using namespace Learner::Autograd::UnivariateStatic; constexpr double epsilon = 1e-12; static thread_local auto teacher_entropy_ = -(p_ * log(p_ + epsilon) + (1.0 - p_) * log(1.0 - p_ + epsilon)); static thread_local auto outcome_entropy_ = -(t_ * log(t_ + epsilon) + (1.0 - t_) * log(1.0 - t_ + epsilon)); static thread_local auto teacher_loss_ = -(p_ * log(q_) + (1.0 - p_) * log(1.0 - q_)); static thread_local auto outcome_loss_ = -(t_ * log(q_) + (1.0 - t_) * log(1.0 - q_)); static thread_local auto result_ = lambda_ * teacher_loss_ + (1.0 - lambda_) * outcome_loss_; static thread_local auto entropy_ = lambda_ * teacher_entropy_ + (1.0 - lambda_) * outcome_entropy_; static thread_local auto cross_entropy_ = result_ - entropy_; return cross_entropy_; } template static auto& scale_score_(ValueT&& v_) { using namespace Learner::Autograd::UnivariateStatic; // Normalize to [0.0, 1.0]. static thread_local auto normalized_ = (std::forward(v_) - ConstantRef(src_score_min_value)) / (ConstantRef(src_score_max_value) - ConstantRef(src_score_min_value)); // Scale to [dest_score_min_value, dest_score_max_value]. static thread_local auto scaled_ = normalized_ * (ConstantRef(dest_score_max_value) - ConstantRef(dest_score_min_value)) + ConstantRef(dest_score_min_value); return scaled_; } static Value scale_score(Value v) { // Normalize to [0.0, 1.0]. auto normalized = ((double)v - src_score_min_value) / (src_score_max_value - src_score_min_value); // Scale to [dest_score_min_value, dest_score_max_value]. auto scaled = normalized * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; return Value(scaled); } template static auto& expected_perf_(ValueT&& v_) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto perf_ = sigmoid(std::forward(v_) * ConstantRef(winning_probability_coefficient)); return perf_; } template static auto& expected_perf_use_wdl_( ValueT& v_, PlyT&& ply_ ) { using namespace Learner::Autograd::UnivariateStatic; // Coefficients of a 3rd order polynomial fit based on fishtest data // for two parameters needed to transform eval to the argument of a // logistic function. static constexpr T as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 }; static constexpr T bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 }; // The model captures only up to 240 plies, so limit input (and rescale) static thread_local auto m_ = std::forward(ply_) / 64.0; static thread_local auto a_ = (((as[0] * m_ + as[1]) * m_ + as[2]) * m_) + as[3]; static thread_local auto b_ = (((bs[0] * m_ + bs[1]) * m_ + bs[2]) * m_) + bs[3]; // Return win rate in per mille static thread_local auto sv_ = (v_ - a_) / b_; static thread_local auto svn_ = (-v_ - a_) / b_; static thread_local auto win_pct_ = sigmoid(sv_); static thread_local auto loss_pct_ = sigmoid(svn_); static thread_local auto draw_pct_ = 1.0 - win_pct_ - loss_pct_; static thread_local auto perf_ = win_pct_ + draw_pct_ * 0.5; return perf_; } static double expected_perf_use_wdl( Value v, int ply ) { // Coefficients of a 3rd order polynomial fit based on fishtest data // for two parameters needed to transform eval to the argument of a // logistic function. static constexpr double as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 }; static constexpr double bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 }; // The model captures only up to 240 plies, so limit input (and rescale) auto m = ply / 64.0; auto a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; auto b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; // Return win rate in per mille auto sv = ((double)v - a) / b; auto svn = ((double)-v - a) / b; auto win_pct = Math::sigmoid(sv); auto loss_pct = Math::sigmoid(svn); auto draw_pct = 1.0 - win_pct - loss_pct; auto perf = win_pct + draw_pct * 0.5; return perf; } [[maybe_unused]] static ValueWithGrad get_loss_noob( Value shallow, Value teacher_signal, int result, int /* ply */) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto q_ = VariableParameter{}; static thread_local auto p_ = ConstantParameter{}; static thread_local auto loss_ = pow(q_ - p_, 2.0) * (1.0 / (2400.0 * 2.0 * 600.0)); auto args = std::tuple( (double)shallow, (double)teacher_signal, (double)result, calculate_lambda(teacher_signal) ); return loss_.eval(args); } static auto& get_loss_cross_entropy_() { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto& q_ = expected_perf_(VariableParameter{}); static thread_local auto& p_ = expected_perf_(scale_score_(ConstantParameter{})); static thread_local auto t_ = (ConstantParameter{} + 1.0) * 0.5; static thread_local auto lambda_ = ConstantParameter{}; static thread_local auto& loss_ = cross_entropy_(q_, p_, t_, lambda_); return loss_; } static auto get_loss_cross_entropy_args( Value shallow, Value teacher_signal, int result) { return std::tuple( (double)shallow, (double)teacher_signal, (double)result, calculate_lambda(teacher_signal) ); } static ValueWithGrad get_loss_cross_entropy( Value shallow, Value teacher_signal, int result, int /* ply */) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto& loss_ = get_loss_cross_entropy_(); auto args = get_loss_cross_entropy_args(shallow, teacher_signal, result); return loss_.eval(args); } static ValueWithGrad get_loss_cross_entropy_no_grad( Value shallow, Value teacher_signal, int result, int /* ply */) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto& loss_ = get_loss_cross_entropy_(); auto args = get_loss_cross_entropy_args(shallow, teacher_signal, result); return { loss_.value(args), 0.0 }; } static auto& get_loss_cross_entropy_use_wdl_() { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto ply_ = ConstantParameter{}; static thread_local auto shallow_ = VariableParameter{}; static thread_local auto& q_ = expected_perf_use_wdl_(shallow_, ply_); // We could do just this but MSVC crashes with an internal compiler error :( // static thread_local auto& scaled_teacher_ = scale_score_(ConstantParameter{}); // static thread_local auto& p_ = expected_perf_use_wdl_(scaled_teacher_, ply_); static thread_local auto p_ = ConstantParameter{}; static thread_local auto t_ = (ConstantParameter{} + 1.0) * 0.5; static thread_local auto lambda_ = ConstantParameter{}; static thread_local auto& loss_ = cross_entropy_(q_, p_, t_, lambda_); return loss_; } static auto get_loss_cross_entropy_use_wdl_args( Value shallow, Value teacher_signal, int result, int ply) { return std::tuple( (double)shallow, // This is required because otherwise MSVC crashes :( expected_perf_use_wdl(scale_score(teacher_signal), ply), (double)result, calculate_lambda(teacher_signal), (double)std::min(240, ply) ); } static ValueWithGrad get_loss_cross_entropy_use_wdl( Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto& loss_ = get_loss_cross_entropy_use_wdl_(); auto args = get_loss_cross_entropy_use_wdl_args(shallow, teacher_signal, result, ply); return loss_.eval(args); } static ValueWithGrad get_loss_cross_entropy_use_wdl_no_grad( Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; static thread_local auto& loss_ = get_loss_cross_entropy_use_wdl_(); auto args = get_loss_cross_entropy_use_wdl_args(shallow, teacher_signal, result, ply); return { loss_.value(args), 0.0 }; } static auto get_loss(Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; if (use_wdl) { return get_loss_cross_entropy_use_wdl(shallow, teacher_signal, result, ply); } else { return get_loss_cross_entropy(shallow, teacher_signal, result, ply); } } static auto get_loss_no_grad(Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; if (use_wdl) { return get_loss_cross_entropy_use_wdl_no_grad(shallow, teacher_signal, result, ply); } else { return get_loss_cross_entropy_no_grad(shallow, teacher_signal, result, ply); } } [[maybe_unused]] static auto get_loss( Value teacher_signal, Value shallow, const PackedSfenValue& psv) { return get_loss(shallow, teacher_signal, psv.game_result, psv.gamePly); } static auto get_loss_no_grad( Value teacher_signal, Value shallow, const PackedSfenValue& psv) { return get_loss_no_grad(shallow, teacher_signal, psv.game_result, psv.gamePly); } // Class to generate sfen with multiple threads struct LearnerThink { struct Params { // Mini batch size size. Be sure to set it on the side that uses this class. uint64_t mini_batch_size = LEARN_MINI_BATCH_SIZE; // Number of phases used for calculation such as mse // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time. // Since search() is performed with depth = 1 in calculation of // move match rate, simple comparison is not possible... uint64_t validation_count = 2000; // Option to exclude early stage from learning int reduction_gameply = 1; // If the absolute value of the evaluation value of the deep search // of the teacher phase exceeds this value, discard the teacher phase. int eval_limit = 32000; // Flag whether to dig a folder each time the evaluation function is saved. // If true, do not dig the folder. bool save_only_once = false; bool shuffle = true; bool verbose = false; double newbob_decay = 0.5; int newbob_num_trials = 4; uint64_t auto_lr_drop = 0; std::string best_nn_directory; uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL; uint64_t loss_output_interval = 1'000'000; size_t sfen_read_size = SfenReader::DEFAULT_SFEN_READ_SIZE; size_t thread_buffer_size = SfenReader::DEFAULT_THREAD_BUFFER_SIZE; bool use_draw_games_in_training = true; bool use_draw_games_in_validation = true; bool skip_duplicated_positions_in_training = true; bool assume_quiet = false; bool smart_fen_skipping = false; bool smart_fen_skipping_for_validation = false; double learning_rate = 1.0; double max_grad = 1.0; string validation_set_file_name; string seed; std::vector filenames; uint64_t num_threads; void enforce_constraints() { num_threads = Options["Threads"]; if (loss_output_interval == 0) { loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; } // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. reduction_gameply = max(reduction_gameply, 1); if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { // Save the current net to [EvalSaveDir]\original. Eval::NNUE::save_eval("original"); // Set the folder above to best_nn_directory so that the trainer can // resotre the network parameters from the original net file. best_nn_directory = Path::combine(Options["EvalSaveDir"], "original"); } } }; LearnerThink(const Params& prm) : params(prm), prng(prm.seed), train_sr( prm.filenames, prm.shuffle, SfenReaderMode::Cyclic, prm.num_threads, std::to_string(prng.next_random_seed()), prm.sfen_read_size, prm.thread_buffer_size), validation_sr( prm.validation_set_file_name.empty() ? prm.filenames : std::vector{ prm.validation_set_file_name }, prm.shuffle, SfenReaderMode::Cyclic, 1, std::to_string(prng.next_random_seed()), prm.sfen_read_size, prm.thread_buffer_size), learn_loss_sum{} { save_count = 0; loss_output_count = 0; last_lr_drop = 0; best_loss = std::numeric_limits::infinity(); latest_loss_sum = 0.0; latest_loss_count = 0; total_done = 0; trials = params.newbob_num_trials; dir_number = 0; } void learn(uint64_t epochs); private: static void set_learning_search_limits(); PSVector fetch_next_validation_set(); void learn_worker(Thread& th, std::atomic& counter, uint64_t limit); void update_weights(const PSVector& psv, uint64_t epoch); void calc_loss(const PSVector& psv, uint64_t epoch); void calc_loss_worker( Thread& th, std::atomic& counter, const PSVector& psv, Loss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ); bool has_depth1_move_agreement(Position& pos, Move pvmove); bool check_progress(); // save merit function parameters to a file bool save(bool is_final = false); Params params; PRNG prng; // sfen reader SfenReader train_sr; SfenReader validation_sr; uint64_t save_count; uint64_t loss_output_count; std::atomic stop_flag; uint64_t total_done; uint64_t last_lr_drop; double best_loss; double latest_loss_sum; uint64_t latest_loss_count; int trials; int dir_number; // For calculation of learning data loss Loss learn_loss_sum; }; void LearnerThink::set_learning_search_limits() { Threads.main()->ponder = false; // About Search::Limits // Be careful because this member variable is global and affects other threads. auto& limits = Search::Limits; limits.startTime = now(); // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done) limits.infinite = true; // Since PV is an obstacle when displayed, erase it. limits.silent = true; // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it. limits.nodes = 0; // depth is also processed by the one passed as an argument of Learner::search(). limits.depth = 0; } PSVector LearnerThink::fetch_next_validation_set() { PSVector validation_data; auto mainThread = Threads.main(); mainThread->execute_with_worker([&validation_data, this](auto& th){ auto do_include_predicate = [&th, this](const PackedSfenValue& ps) -> bool { if (params.eval_limit < abs(ps.score)) return false; if (!params.use_draw_games_in_validation && ps.game_result == 0) return false; if (params.smart_fen_skipping_for_validation) { StateInfo si; auto& pos = th.rootPos; if (pos.set_from_packed_sfen(ps.sfen, &si, &th) != 0) return false; if (pos.capture_or_promotion((Move)ps.move) || pos.checkers()) return false; } return true; }; validation_data = validation_sr.read_some( params.validation_count, params.validation_count * 100, // to have a reasonable bound on the running time. do_include_predicate ); }); mainThread->wait_for_worker_finished(); return validation_data; } void LearnerThink::learn(uint64_t epochs) { #if defined(_OPENMP) omp_set_num_threads((int)Options["Threads"]); #endif set_learning_search_limits(); Eval::NNUE::verify_any_net_loaded(); const PSVector validation_data = fetch_next_validation_set(); if (validation_data.size() != params.validation_count) { auto out = sync_region_cout.new_region(); out << "INFO (learn): Error reading validation data. Read " << validation_data.size() << " out of " << params.validation_count << '\n' << "INFO (learn): This either means that less than 1% of the validation data passed the filter" << " or the file is empty\n"; return; } if (params.newbob_decay != 1.0) { calc_loss(validation_data, 0); best_loss = latest_loss_sum / latest_loss_count; latest_loss_sum = 0.0; latest_loss_count = 0; auto out = sync_region_cout.new_region(); out << "INFO (learn): initial loss = " << best_loss << endl; } stop_flag = false; for(uint64_t epoch = 1; epoch <= epochs; ++epoch) { std::atomic counter{0}; Threads.execute_with_workers([this, &counter](auto& th){ learn_worker(th, counter, params.mini_batch_size); }); total_done += params.mini_batch_size; Threads.wait_for_workers_finished(); if (stop_flag) break; update_weights(validation_data, epoch); if (stop_flag) break; } Eval::NNUE::finalize_net(); save(true); } void LearnerThink::learn_worker(Thread& th, std::atomic& counter, uint64_t limit) { const auto thread_id = th.thread_idx(); auto& pos = th.rootPos; std::vector> state(MAX_PLY); while(!stop_flag) { const auto iter = counter.fetch_add(1); if (iter >= limit) break; PackedSfenValue ps; RETRY_READ:; if (!train_sr.read_to_thread_buffer(thread_id, ps)) { // If we ran out of data we stop completely // because there's nothing left to do. stop_flag = true; break; } if (params.eval_limit < abs(ps.score)) goto RETRY_READ; if (!params.use_draw_games_in_training && ps.game_result == 0) goto RETRY_READ; // Skip over the opening phase if (ps.gamePly < prng.rand(params.reduction_gameply)) goto RETRY_READ; StateInfo si; if (pos.set_from_packed_sfen(ps.sfen, &si, &th) != 0) { // Malformed sfen auto out = sync_region_cout.new_region(); out << "ERROR: illigal packed sfen = " << pos.fen() << endl; goto RETRY_READ; } const auto rootColor = pos.side_to_move(); // A function that adds the current `pos` and `ps` // to the training set. auto pos_add_grad = [&]() { // Evaluation value of deep search const Value shallow_value = Eval::evaluate(pos); Eval::NNUE::add_example(pos, rootColor, shallow_value, ps, 1.0); }; if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move)) { goto RETRY_READ; } // We don't need to qsearch when doing smart skipping if (!params.assume_quiet && !params.smart_fen_skipping) { int ply = 0; pos.do_move((Move)ps.move, state[ply++]); // Evaluation value of shallow search (qsearch) const auto [_, pv] = Search::qsearch(pos); for (auto m : pv) { pos.do_move(m, state[ply++]); } } if (params.smart_fen_skipping && (pos.capture_or_promotion((Move)ps.move) || pos.checkers())) { goto RETRY_READ; } // We want to position being trained on not to be terminal if (MoveList(pos).size() == 0) goto RETRY_READ; // Since we have reached the end phase of PV, add the slope here. pos_add_grad(); } } void LearnerThink::update_weights(const PSVector& psv, uint64_t epoch) { // I'm not sure this fencing is correct. But either way there // should be no real issues happening since // the read/write phases are isolated. atomic_thread_fence(memory_order_seq_cst); learn_loss_sum += Eval::NNUE::update_parameters( Threads, epoch, params.verbose, params.learning_rate, params.max_grad, get_loss); atomic_thread_fence(memory_order_seq_cst); if (++save_count * params.mini_batch_size >= params.eval_save_interval) { save_count = 0; const bool converged = save(); if (converged) { stop_flag = true; return; } } if (++loss_output_count * params.mini_batch_size >= params.loss_output_interval) { loss_output_count = 0; // loss calculation calc_loss(psv, epoch); Eval::NNUE::check_health(); } } void LearnerThink::calc_loss(const PSVector& psv, uint64_t epoch) { TT.new_search(); TimePoint elapsed = now() - Search::Limits.startTime + 1; auto out = sync_region_cout.new_region(); out << "\n"; out << "PROGRESS (calc_loss): " << now_string() << ", " << total_done << " sfens" << ", " << total_done * 1000 / elapsed << " sfens/second" << ", epoch " << epoch << endl; out << " - learning rate = " << params.learning_rate << endl; // For calculation of verification data loss Loss test_loss_sum{}; // norm for learning atomic sum_norm{0.0}; // The number of times the pv first move of deep // search matches the pv first move of search(1). atomic move_accord_count{0}; auto mainThread = Threads.main(); mainThread->execute_with_worker([&out](auto& th){ auto& pos = th.rootPos; StateInfo si; pos.set(StartFEN, false, &si, &th); out << " - startpos eval = " << Eval::evaluate(pos) << endl; }); mainThread->wait_for_worker_finished(); // The number of tasks to do. atomic counter{0}; Threads.execute_with_workers([&](auto& th){ calc_loss_worker( th, counter, psv, test_loss_sum, sum_norm, move_accord_count ); }); Threads.wait_for_workers_finished(); latest_loss_sum += test_loss_sum.value(); latest_loss_count += psv.size(); if (psv.size() && test_loss_sum.count() > 0) { test_loss_sum.print_only_loss("val", out); if (learn_loss_sum.count() > 0) { learn_loss_sum.print_with_grad("train", out); } out << " - norm = " << sum_norm << endl; out << " - move accuracy = " << (move_accord_count * 100.0 / psv.size()) << "%" << endl; } else { out << "ERROR: psv.size() = " << psv.size() << " , done = " << test_loss_sum.count() << endl; } learn_loss_sum.reset(); } void LearnerThink::calc_loss_worker( Thread& th, std::atomic& counter, const PSVector& psv, Loss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ) { Loss local_loss_sum{}; auto& pos = th.rootPos; for(;;) { const auto task_id = counter.fetch_add(1); if (task_id >= psv.size()) { break; } const auto& ps = psv[task_id]; StateInfo si; if (pos.set_from_packed_sfen(ps.sfen, &si, &th) != 0) { cout << "Error! : illegal packed sfen " << pos.fen() << endl; continue; } const Value shallow_value = Eval::evaluate(pos); // Evaluation value of deep search const auto deep_value = (Value)ps.score; const auto loss = get_loss_no_grad( deep_value, shallow_value, ps); local_loss_sum += loss; sum_norm += (double)abs(shallow_value); // Threat all moves with equal scores as first. This is up to move ordering. if (has_depth1_move_agreement(pos, (Move)ps.move)) move_accord_count.fetch_add(1, std::memory_order_relaxed); } test_loss_sum += local_loss_sum; } bool LearnerThink::has_depth1_move_agreement(Position& pos, Move pvmove) { // Determine if the depth 1 search pv matches the move from the dataset. // Do a manual depth 1 search so we're not affected by previous searches. std::vector> child_scores; // Call evaluate once for the rootpos so that the evals // for children moves use incremental feature transformer updates. (void)Eval::evaluate(pos); // Just to get guaranteed alignment. std::vector> states(1); auto legal_moves = MoveList(pos); for (auto m : legal_moves) { pos.do_move(m, states[0]); // We don't care if the king is in check or stuff like that. // not a big issue and nnue should digest all. auto value = -Eval::evaluate(pos); child_scores.emplace_back(m, value); pos.undo_move(m); } if (child_scores.empty()) return false; std::sort( child_scores.begin(), child_scores.end(), [](auto& lhs, auto& rhs) { return lhs.second > rhs.second; } ); // Require the best move to have strictly higher score than the next one. return child_scores[0].first == pvmove && (child_scores.size() == 1 || child_scores[1].second != child_scores[0].second); } bool LearnerThink::check_progress() { auto out = sync_region_cout.new_region(); const double latest_loss = latest_loss_sum / latest_loss_count; bool converged = false; latest_loss_sum = 0.0; latest_loss_count = 0; auto drop_lr = [&]() { last_lr_drop = total_done; out << " - reducing learning rate from " << params.learning_rate << " to " << (params.learning_rate * params.newbob_decay) << " (" << trials << " more trials)" << endl; params.learning_rate *= params.newbob_decay; }; auto accept = [&]() { out << " - loss = " << latest_loss << " < best (" << best_loss << "), accepted" << endl; best_loss = latest_loss; trials = params.newbob_num_trials; }; auto reject = [&]() { out << " - loss = " << latest_loss << " >= best (" << best_loss << "), rejected" << endl; --trials; if (trials > 0) { drop_lr(); return false; } else { return true; } }; out << "INFO (learning_rate):" << endl; if (params.auto_lr_drop) { accept(); if (total_done >= last_lr_drop + params.auto_lr_drop) { drop_lr(); } } else if (latest_loss < best_loss) { accept(); } else { converged = reject(); } if (converged) { out << " - converged" << endl; } return converged; } // Write evaluation function file. bool LearnerThink::save(bool is_final) { // Each time you save, change the extension part of the file name like "0","1","2",.. // (Because I want to compare the winning rate for each evaluation function parameter later) bool converged = false; if (params.save_only_once) { // When EVAL_SAVE_ONLY_ONCE is defined, // Do not dig a subfolder because I want to save it only once. Eval::NNUE::save_eval(""); } else if (is_final) { Eval::NNUE::save_eval("final"); converged = true; } else { // TODO: consider naming the output directory by epoch. const std::string dir_name = std::to_string(dir_number++); Eval::NNUE::save_eval(dir_name); if (params.newbob_decay != 1.0 && latest_loss_count > 0) { converged = check_progress(); params.best_nn_directory = Path::combine((std::string)Options["EvalSaveDir"], dir_name); } } return converged; } // Learning from the generated game record void learn(istringstream& is) { LearnerThink::Params params; // Number of epochs uint64_t epochs = std::numeric_limits::max(); // Game file storage folder (get game file with relative path from here) string base_dir; string target_dir; uint64_t nn_batch_size = 1000; string nn_options; auto out = sync_region_cout.new_region(); // Assume the filenames are staggered. while (true) { string option; is >> option; if (option == "") break; // specify the number of phases of mini-batch if (option == "bat") { is >> params.mini_batch_size; params.mini_batch_size *= 10000; // Unit is ten thousand } // Specify the folder in which the game record is stored and make it the rooting target. else if (option == "targetdir") is >> target_dir; else if (option == "targetfile") { std::string filename; is >> filename; params.filenames.push_back(filename); } else if (option == "validation_count") is >> params.validation_count; // Specify the number of loops else if (option == "epochs") is >> epochs; // Game file storage folder (get game file with relative path from here) else if (option == "basedir") is >> base_dir; // Mini batch size else if (option == "batchsize") is >> params.mini_batch_size; // learning rate else if (option == "lr") is >> params.learning_rate; else if (option == "max_grad") is >> params.max_grad; // Accept also the old option name. else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> params.use_draw_games_in_training; // Accept also the old option name. else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> params.use_draw_games_in_validation; // Accept also the old option name. else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> params.skip_duplicated_positions_in_training; else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; // Using WDL with win rate model instead of sigmoid else if (option == "use_wdl") is >> use_wdl; // LAMBDA else if (option == "lambda") is >> elmo_lambda_low; else if (option == "lambda2") is >> elmo_lambda_high; else if (option == "lambda_limit") is >> elmo_lambda_limit; else if (option == "reduction_gameply") is >> params.reduction_gameply; else if (option == "eval_limit") is >> params.eval_limit; else if (option == "save_only_once") params.save_only_once = true; else if (option == "no_shuffle") params.shuffle = false; else if (option == "nn_batch_size") is >> nn_batch_size; else if (option == "newbob_decay") is >> params.newbob_decay; else if (option == "newbob_num_trials") is >> params.newbob_num_trials; else if (option == "nn_options") is >> nn_options; else if (option == "auto_lr_drop") is >> params.auto_lr_drop; else if (option == "eval_save_interval") is >> params.eval_save_interval; else if (option == "loss_output_interval") is >> params.loss_output_interval; else if (option == "validation_set_file_name") is >> params.validation_set_file_name; else if (option == "src_score_min_value") is >> src_score_min_value; else if (option == "src_score_max_value") is >> src_score_max_value; else if (option == "dest_score_min_value") is >> dest_score_min_value; else if (option == "dest_score_max_value") is >> dest_score_max_value; else if (option == "sfen_read_size") is >> params.sfen_read_size; else if (option == "thread_buffer_size") is >> params.thread_buffer_size; else if (option == "seed") is >> params.seed; else if (option == "set_recommended_uci_options") { UCI::setoption("Use NNUE", "pure"); UCI::setoption("MultiPV", "1"); UCI::setoption("Contempt", "0"); UCI::setoption("Skill Level", "20"); UCI::setoption("UCI_Chess960", "false"); UCI::setoption("UCI_AnalyseMode", "false"); UCI::setoption("UCI_LimitStrength", "false"); UCI::setoption("PruneAtShallowDepth", "false"); UCI::setoption("EnableTranspositionTable", "false"); } else if (option == "verbose") params.verbose = true; else if (option == "assume_quiet") params.assume_quiet = true; else if (option == "smart_fen_skipping") params.smart_fen_skipping = true; else if (option == "smart_fen_skipping_for_validation") params.smart_fen_skipping_for_validation = true; else { out << "INFO: Unknown option: " << option << ". Ignoring.\n"; } } out << "INFO: Executing learn command\n"; // Issue a warning if OpenMP is disabled. #if !defined(_OPENMP) out << "WARNING: OpenMP disabled." << endl; #endif params.enforce_constraints(); // Right now we only have the individual files. // We need to apply base_dir here if (!target_dir.empty()) { append_files_from_dir(params.filenames, base_dir, target_dir); } rebase_files(params.filenames, base_dir); out << "INFO: Input files:\n"; for (auto s : params.filenames) out << " - " << s << '\n'; out << "INFO: Parameters:\n"; if (!params.validation_set_file_name.empty()) { out << " - validation set : " << params.validation_set_file_name << endl; } out << " - validation count : " << params.validation_count << endl; out << " - epochs : " << epochs << endl; out << " - epochs * minibatch size : " << epochs * params.mini_batch_size << endl; out << " - eval_limit : " << params.eval_limit << endl; out << " - save_only_once : " << (params.save_only_once ? "true" : "false") << endl; out << " - shuffle on read : " << (params.shuffle ? "true" : "false") << endl; out << " - Loss Function : " << LOSS_FUNCTION << endl; out << " - minibatch size : " << params.mini_batch_size << endl; out << " - nn_batch_size : " << nn_batch_size << endl; out << " - nn_options : " << nn_options << endl; out << " - learning rate : " << params.learning_rate << endl; out << " - max_grad : " << params.max_grad << endl; out << " - use draws in training : " << params.use_draw_games_in_training << endl; out << " - use draws in validation : " << params.use_draw_games_in_validation << endl; out << " - skip repeated positions : " << params.skip_duplicated_positions_in_training << endl; out << " - winning prob coeff : " << winning_probability_coefficient << endl; out << " - use_wdl : " << use_wdl << endl; out << " - src_score_min_value : " << src_score_min_value << endl; out << " - src_score_max_value : " << src_score_max_value << endl; out << " - dest_score_min_value : " << dest_score_min_value << endl; out << " - dest_score_max_value : " << dest_score_max_value << endl; out << " - reduction_gameply : " << params.reduction_gameply << endl; out << " - elmo_lambda_low : " << elmo_lambda_low << endl; out << " - elmo_lambda_high : " << elmo_lambda_high << endl; out << " - elmo_lambda_limit : " << elmo_lambda_limit << endl; out << " - eval_save_interval : " << params.eval_save_interval << " sfens" << endl; out << " - loss_output_interval : " << params.loss_output_interval << " sfens" << endl; out << " - sfen_read_size : " << params.sfen_read_size << endl; out << " - thread_buffer_size : " << params.thread_buffer_size << endl; out << " - smart_fen_skipping : " << params.smart_fen_skipping << endl; out << " - smart_fen_skipping_val : " << params.smart_fen_skipping_for_validation << endl; out << " - seed : " << params.seed << endl; out << " - verbose : " << (params.verbose ? "true" : "false") << endl; if (params.auto_lr_drop) { out << " - learning rate scheduling : every " << params.auto_lr_drop << " sfens" << endl; } else if (params.newbob_decay != 1.0) { out << " - learning rate scheduling : newbob with decay" << endl; out << " - newbob_decay : " << params.newbob_decay << endl; out << " - newbob_num_trials : " << params.newbob_num_trials << endl; } else { out << " - learning rate scheduling : fixed learning rate" << endl; } out << endl; out << "INFO: Started initialization." << endl; Eval::NNUE::initialize_training(params.seed, out); Eval::NNUE::set_batch_size(nn_batch_size); Eval::NNUE::set_options(nn_options); LearnerThink learn_think(params); out << "Finished initialization." << endl; out.unlock(); // Start learning. learn_think.learn(epochs); } } // namespace Learner