// Learning routines: // // 1) Automatic generation of game records in .bin format // → "gensfen" command // // 2) Learning evaluation function parameters from the generated .bin files // → "learn" command // // → Shuffle in the teacher phase is also an extension of this command. // Example) "learn shuffle" // // 3) Automatic generation of fixed traces // → "makebook think" command // → implemented in extra/book/book.cpp // // 4) Post-station automatic review mode // → I will not be involved in the engine because it is a problem that the GUI should assist. // etc.. #include "learn.h" #include "convert.h" #include "sfen_reader.h" #include "misc.h" #include "position.h" #include "thread.h" #include "tt.h" #include "uci.h" #include "search.h" #include "timeman.h" #include "nnue/evaluate_nnue.h" #include "nnue/evaluate_nnue_learner.h" #include "syzygy/tbprobe.h" #include #include #include // std::exp(),std::pow(),std::log() #include // memcpy() #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined (_OPENMP) #include #endif extern double global_learning_rate; using namespace std; template T operator +=(std::atomic& x, const T rhs) { T old = x.load(std::memory_order_consume); // It is allowed that the value is rewritten from other thread at this timing. // The idea that the value is not destroyed is good. T desired = old + rhs; while (!x.compare_exchange_weak(old, desired, std::memory_order_release, std::memory_order_consume)) desired = old + rhs; return desired; } template T operator -= (std::atomic& x, const T rhs) { return x += -rhs; } namespace Learner { static bool use_draw_games_in_training = true; static bool use_draw_games_in_validation = true; static bool skip_duplicated_positions_in_training = true; static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0); // Score scale factors. ex) If we set src_score_min_value = 0.0, // src_score_max_value = 1.0, dest_score_min_value = 0.0, // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. static double src_score_min_value = 0.0; static double src_score_max_value = 1.0; static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; // Using stockfish's WDL with win rate model instead of sigmoid static bool use_wdl = false; namespace Detail { template struct Loss { using T = std::conditional_t< AtomicV, atomic, double >; T cross_entropy_eval{0.0}; T cross_entropy_win{0.0}; T cross_entropy{0.0}; T entropy_eval{0.0}; T entropy_win{0.0}; T entropy{0.0}; T count{0.0}; template Loss& operator += (const Loss& rhs) { cross_entropy_eval += rhs.cross_entropy_eval; cross_entropy_win += rhs.cross_entropy_win; cross_entropy += rhs.cross_entropy; entropy_eval += rhs.entropy_eval; entropy_win += rhs.entropy_win; entropy += rhs.entropy; count += rhs.count; return *this; } void reset() { cross_entropy_eval = 0.0; cross_entropy_win = 0.0; cross_entropy = 0.0; entropy_eval = 0.0; entropy_win = 0.0; entropy = 0.0; count = 0.0; } void print(const std::string& prefix, ostream& s) const { s << "INFO: " << prefix << "_cross_entropy_eval = " << cross_entropy_eval / count << " , " << prefix << "_cross_entropy_win = " << cross_entropy_win / count << " , " << prefix << "_entropy_eval = " << entropy_eval / count << " , " << prefix << "_entropy_win = " << entropy_win / count << " , " << prefix << "_cross_entropy = " << cross_entropy / count << " , " << prefix << "_entropy = " << entropy / count << endl; } }; } using Loss = Detail::Loss; using AtomicLoss = Detail::Loss; // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value) { // 1/(1+10^(-Eval/4)) // = 1/(1+e^(-Eval/4*ln(10)) // = sigmoid(Eval/4*ln(10)) return Math::sigmoid(value * winning_probability_coefficient); } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage_wdl(double value, int ply) { constexpr double wdl_total = 1000.0; constexpr double draw_score = 0.5; const double wdl_w = UCI::win_rate_model_double(value, ply); const double wdl_l = UCI::win_rate_model_double(-value, ply); const double wdl_d = wdl_total - wdl_w - wdl_l; return (wdl_w + wdl_d * draw_score) / wdl_total; } // A function that converts the evaluation value to the winning rate [0,1] double winning_percentage(double value, int ply) { if (use_wdl) { return winning_percentage_wdl(value, ply); } else { return winning_percentage(value); } } double calc_cross_entropy_of_winning_percentage( double deep_win_rate, double shallow_eval, int ply) { const double p = deep_win_rate; const double q = winning_percentage(shallow_eval, ply); return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); } double calc_d_cross_entropy_of_winning_percentage( double deep_win_rate, double shallow_eval, int ply) { constexpr double epsilon = 0.000001; const double y1 = calc_cross_entropy_of_winning_percentage( deep_win_rate, shallow_eval, ply); const double y2 = calc_cross_entropy_of_winning_percentage( deep_win_rate, shallow_eval + epsilon, ply); // Divide by the winning_probability_coefficient to // match scale with the sigmoidal win rate return ((y2 - y1) / epsilon) / winning_probability_coefficient; } // A constant used in elmo (WCSC27). Adjustment required. // Since elmo does not internally divide the expression, the value is different. // You can set this value with the learn command. // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27) double ELMO_LAMBDA = 0.33; double ELMO_LAMBDA2 = 0.33; double ELMO_LAMBDA_LIMIT = 32000; // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 double get_scaled_signal(double signal) { double scaled_signal = signal; // Normalize to [0.0, 1.0]. scaled_signal = (scaled_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); // Scale to [dest_score_min_value, dest_score_max_value]. scaled_signal = scaled_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; return scaled_signal; } // Teacher winning probability. double calculate_p(double teacher_signal, int ply) { const double scaled_teacher_signal = get_scaled_signal(teacher_signal); return winning_percentage(scaled_teacher_signal, ply); } double calculate_lambda(double teacher_signal) { // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. const double lambda = (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; return lambda; } double calculate_t(int game_result) { // Use 1 as the correction term if the expected win rate is 1, // 0 if you lose, and 0.5 if you draw. // game_result = 1,0,-1 so add 1 and divide by 2. const double t = double(game_result + 1) * 0.5; return t; } double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv) { // elmo (WCSC27) method // Correct with the actual game wins and losses. const double q = winning_percentage(shallow, psv.gamePly); const double p = calculate_p(teacher_signal, psv.gamePly); const double t = calculate_t(psv.game_result); const double lambda = calculate_lambda(teacher_signal); double grad; if (use_wdl) { const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); grad = lambda * dce_p + (1.0 - lambda) * dce_t; } else { // Use the actual win rate as a correction term. // This is the idea of elmo (WCSC27), modern O-parts. grad = lambda * (q - p) + (1.0 - lambda) * (q - t); } return grad; } // Calculate cross entropy during learning // The individual cross entropy of the win/loss term and win // rate term of the elmo expression is returned // to the arguments cross_entropy_eval and cross_entropy_win. Loss calc_cross_entropy( Value teacher_signal, Value shallow, const PackedSfenValue& psv) { // Teacher winning probability. const double q = winning_percentage(shallow, psv.gamePly); const double p = calculate_p(teacher_signal, psv.gamePly); const double t = calculate_t(psv.game_result); const double lambda = calculate_lambda(teacher_signal); constexpr double epsilon = 0.000001; const double m = (1.0 - lambda) * t + lambda * p; Loss loss{}; loss.cross_entropy_eval = (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon)); loss.cross_entropy_win = (-t * std::log(q + epsilon) - (1.0 - t) * std::log(1.0 - q + epsilon)); loss.entropy_eval = (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon)); loss.entropy_win = (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon)); loss.cross_entropy = (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon)); loss.entropy = (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); loss.count = 1; return loss; } // Other objective functions may be considered in the future... double calc_grad(Value shallow, const PackedSfenValue& psv) { return calc_grad((Value)psv.score, shallow, psv); } // Class to generate sfen with multiple threads struct LearnerThink { // Number of phases used for calculation such as mse // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time. // Since search() is performed with depth = 1 in calculation of // move match rate, simple comparison is not possible... static constexpr uint64_t sfen_for_mse_size = 2000; LearnerThink(uint64_t thread_num, const std::string& seed) : prng(seed), sr(thread_num, std::to_string(prng.next_random_seed())), learn_loss_sum{} { save_only_once = false; save_count = 0; loss_output_count = 0; newbob_decay = 1.0; newbob_num_trials = 2; auto_lr_drop = 0; last_lr_drop = 0; best_loss = std::numeric_limits::infinity(); latest_loss_sum = 0.0; latest_loss_count = 0; total_done = 0; } void set_do_shuffle(bool v) { sr.set_do_shuffle(v); } void add_file(const std::string& filename) { sr.add_file(filename); } void learn(); std::string validation_set_file_name; // Mini batch size size. Be sure to set it on the side that uses this class. uint64_t mini_batch_size = LEARN_MINI_BATCH_SIZE; // Option to exclude early stage from learning int reduction_gameply; // If the absolute value of the evaluation value of the deep search // of the teacher phase exceeds this value, discard the teacher phase. int eval_limit; // Flag whether to dig a folder each time the evaluation function is saved. // If true, do not dig the folder. bool save_only_once; double newbob_decay; int newbob_num_trials; uint64_t auto_lr_drop; std::string best_nn_directory; uint64_t eval_save_interval; uint64_t loss_output_interval; private: void learn_worker(Thread& th, std::atomic& counter, uint64_t limit); void update_weights(const PSVector& psv); void calc_loss(const PSVector& psv); void calc_loss_worker( Thread& th, std::atomic& counter, const PSVector& psv, AtomicLoss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ); Value get_shallow_value(Position& pos); // save merit function parameters to a file bool save(bool is_final = false); PRNG prng; // sfen reader SfenReader sr; uint64_t save_count; uint64_t loss_output_count; // Learning iteration counter uint64_t epoch = 0; std::atomic stop_flag; uint64_t total_done; uint64_t last_lr_drop; double best_loss; double latest_loss_sum; uint64_t latest_loss_count; // For calculation of learning data loss AtomicLoss learn_loss_sum; }; void LearnerThink::learn() { #if defined(_OPENMP) omp_set_num_threads((int)Options["Threads"]); #endif Eval::NNUE::verify_any_net_loaded(); // Start a thread that loads the training data in the background sr.start_file_read_worker(); const PSVector sfen_for_mse = validation_set_file_name.empty() ? sr.read_for_mse(sfen_for_mse_size) : sr.read_validation_set(validation_set_file_name, eval_limit, use_draw_games_in_validation); if (validation_set_file_name.empty() && sfen_for_mse.size() != sfen_for_mse_size) { cout << "Error reading sfen_for_mse. Read " << sfen_for_mse.size() << " out of " << sfen_for_mse_size << '\n'; sr.stop(); return; } if (newbob_decay != 1.0) { calc_loss(sfen_for_mse); best_loss = latest_loss_sum / latest_loss_count; latest_loss_sum = 0.0; latest_loss_count = 0; cout << "initial loss: " << best_loss << endl; } stop_flag = false; for(;;) { std::atomic counter{0}; Threads.execute_with_workers([this, &counter](auto& th){ learn_worker(th, counter, mini_batch_size); }); total_done += mini_batch_size; Threads.wait_for_workers_finished(); if (stop_flag) break; update_weights(sfen_for_mse); if (stop_flag) break; } sr.stop(); Eval::NNUE::finalize_net(); save(true); } void LearnerThink::learn_worker(Thread& th, std::atomic& counter, uint64_t limit) { const auto thread_id = th.thread_idx(); auto& pos = th.rootPos; Loss local_loss_sum{}; std::vector> state(MAX_PLY); while(!stop_flag) { const auto iter = counter.fetch_add(1); if (iter >= limit) break; PackedSfenValue ps; RETRY_READ:; if (!sr.read_to_thread_buffer(thread_id, ps)) { // If we ran out of data we stop completely // because there's nothing left to do. stop_flag = true; break; } if (eval_limit < abs(ps.score)) goto RETRY_READ; if (!use_draw_games_in_training && ps.game_result == 0) goto RETRY_READ; // Skip over the opening phase if (ps.gamePly < prng.rand(reduction_gameply)) goto RETRY_READ; StateInfo si; if (pos.set_from_packed_sfen(ps.sfen, &si, &th) != 0) { // Malformed sfen cout << "Error! : illigal packed sfen = " << pos.fen() << endl; goto RETRY_READ; } const auto rootColor = pos.side_to_move(); // A function that adds the current `pos` and `ps` // to the training set. auto pos_add_grad = [&]() { // Evaluation value of deep search const auto deep_value = (Value)ps.score; const Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); const auto loss = calc_cross_entropy( deep_value, shallow_value, ps); local_loss_sum += loss; Eval::NNUE::add_example(pos, rootColor, ps, 1.0); }; if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move)) { goto RETRY_READ; } int ply = 0; pos.do_move((Move)ps.move, state[ply++]); // We want to position being trained on not to be terminal if (MoveList(pos).size() == 0) goto RETRY_READ; // Evaluation value of shallow search (qsearch) const auto [_, pv] = Search::qsearch(pos); for (auto m : pv) { pos.do_move(m, state[ply++]); } // Since we have reached the end phase of PV, add the slope here. pos_add_grad(); } learn_loss_sum += local_loss_sum; } void LearnerThink::update_weights(const PSVector& psv) { // I'm not sure this fencing is correct. But either way there // should be no real issues happening since // the read/write phases are isolated. atomic_thread_fence(memory_order_seq_cst); Eval::NNUE::update_parameters(); atomic_thread_fence(memory_order_seq_cst); ++epoch; if (++save_count * mini_batch_size >= eval_save_interval) { save_count = 0; const bool converged = save(); if (converged) { stop_flag = true; return; } } if (++loss_output_count * mini_batch_size >= loss_output_interval) { loss_output_count = 0; // loss calculation calc_loss(psv); Eval::NNUE::check_health(); } } void LearnerThink::calc_loss(const PSVector& psv) { TT.new_search(); TimePoint elapsed = now() - Search::Limits.startTime + 1; cout << "PROGRESS: " << now_string() << ", "; cout << total_done << " sfens, "; cout << total_done * 1000 / elapsed << " sfens/second"; cout << ", iteration " << epoch; cout << ", learning rate = " << global_learning_rate << ", "; // For calculation of verification data loss AtomicLoss test_loss_sum{}; // norm for learning atomic sum_norm{0.0}; // The number of times the pv first move of deep // search matches the pv first move of search(1). atomic move_accord_count{0}; auto mainThread = Threads.main(); mainThread->execute_with_worker([](auto& th){ auto& pos = th.rootPos; StateInfo si; pos.set(StartFEN, false, &si, &th); cout << "startpos eval = " << Eval::evaluate(pos) << endl; }); mainThread->wait_for_worker_finished(); // The number of tasks to do. atomic counter{0}; Threads.execute_with_workers([&](auto& th){ calc_loss_worker( th, counter, psv, test_loss_sum, sum_norm, move_accord_count ); }); Threads.wait_for_workers_finished(); latest_loss_sum += test_loss_sum.cross_entropy - test_loss_sum.entropy; latest_loss_count += psv.size(); if (psv.size() && test_loss_sum.count > 0.0) { cout << "INFO: norm = " << sum_norm << " , move accuracy = " << (move_accord_count * 100.0 / psv.size()) << "%" << endl; test_loss_sum.print("test", cout); if (learn_loss_sum.count > 0.0) { learn_loss_sum.print("learn", cout); } } else { cout << "Error! : psv.size() = " << psv.size() << " , done = " << test_loss_sum.count << endl; } learn_loss_sum.reset(); } void LearnerThink::calc_loss_worker( Thread& th, std::atomic& counter, const PSVector& psv, AtomicLoss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ) { Loss local_loss_sum{}; auto& pos = th.rootPos; for(;;) { const auto task_id = counter.fetch_add(1); if (task_id >= psv.size()) { break; } const auto& ps = psv[task_id]; StateInfo si; if (pos.set_from_packed_sfen(ps.sfen, &si, &th) != 0) { cout << "Error! : illegal packed sfen " << pos.fen() << endl; continue; } const Value shallow_value = get_shallow_value(pos); // Evaluation value of deep search const auto deep_value = (Value)ps.score; const auto loss = calc_cross_entropy( deep_value, shallow_value, ps); local_loss_sum += loss; sum_norm += (double)abs(shallow_value); // Determine if the teacher's move and the score of the shallow search match const auto [value, pv] = Search::search(pos, 1); if (pv.size() > 0 && (uint16_t)pv[0] == ps.move) move_accord_count.fetch_add(1, std::memory_order_relaxed); } test_loss_sum += local_loss_sum; } Value LearnerThink::get_shallow_value(Position& pos) { // Evaluation value for shallow search // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and // Use qsearch() because it is difficult to compare the values. // EvalHash has been disabled in advance. (If not, the same value will be returned every time) const auto [_, pv] = Search::qsearch(pos); const auto rootColor = pos.side_to_move(); std::vector> states(pv.size()); for (size_t i = 0; i < pv.size(); ++i) { pos.do_move(pv[i], states[i]); } const Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); for (auto it = pv.rbegin(); it != pv.rend(); ++it) pos.undo_move(*it); return shallow_value; } // Write evaluation function file. bool LearnerThink::save(bool is_final) { // Each time you save, change the extension part of the file name like "0","1","2",.. // (Because I want to compare the winning rate for each evaluation function parameter later) if (save_only_once) { // When EVAL_SAVE_ONLY_ONCE is defined, // Do not dig a subfolder because I want to save it only once. Eval::NNUE::save_eval(""); } else if (is_final) { Eval::NNUE::save_eval("final"); return true; } else { static int dir_number = 0; const std::string dir_name = std::to_string(dir_number++); Eval::NNUE::save_eval(dir_name); if (newbob_decay != 1.0 && latest_loss_count > 0) { static int trials = newbob_num_trials; const double latest_loss = latest_loss_sum / latest_loss_count; latest_loss_sum = 0.0; latest_loss_count = 0; cout << "loss: " << latest_loss; auto tot = total_done; if (auto_lr_drop) { cout << " < best (" << best_loss << "), accepted" << endl; best_loss = latest_loss; best_nn_directory = Path::combine((std::string)Options["EvalSaveDir"], dir_name); trials = newbob_num_trials; if (tot >= last_lr_drop + auto_lr_drop) { last_lr_drop = tot; global_learning_rate *= newbob_decay; } } else if (latest_loss < best_loss) { cout << " < best (" << best_loss << "), accepted" << endl; best_loss = latest_loss; best_nn_directory = Path::combine((std::string)Options["EvalSaveDir"], dir_name); trials = newbob_num_trials; } else { cout << " >= best (" << best_loss << "), rejected" << endl; best_nn_directory = Path::combine((std::string)Options["EvalSaveDir"], dir_name); if (--trials > 0 && !is_final) { cout << "reducing learning rate from " << global_learning_rate << " to " << (global_learning_rate * newbob_decay) << " (" << trials << " more trials)" << endl; global_learning_rate *= newbob_decay; } } if (trials == 0) { cout << "converged" << endl; return true; } } } return false; } // Shuffle_files(), shuffle_files_quick() subcontracting, writing part. // output_file_name: Name of the file to write // prng: random number generator // sfen_file_streams: fstream of each teacher phase file // sfen_count_in_file: The number of teacher positions present in each file. void shuffle_write( const string& output_file_name, PRNG& prng, vector& sfen_file_streams, vector& sfen_count_in_file) { uint64_t total_sfen_count = 0; for (auto c : sfen_count_in_file) total_sfen_count += c; // number of exported phases uint64_t write_sfen_count = 0; // Output the progress on the screen for each phase. const uint64_t buffer_size = 10000000; auto print_status = [&]() { // Output progress every 10M phase or when all writing is completed if (((write_sfen_count % buffer_size) == 0) || (write_sfen_count == total_sfen_count)) { cout << write_sfen_count << " / " << total_sfen_count << endl; } }; cout << endl << "write : " << output_file_name << endl; fstream fs(output_file_name, ios::out | ios::binary); // total teacher positions uint64_t sfen_count_left = total_sfen_count; while (sfen_count_left != 0) { auto r = prng.rand(sfen_count_left); // Aspects stored in fs[0] file ... Aspects stored in fs[1] file ... //Think of it as a series like, and determine in which file r is pointing. // The contents of the file are shuffled, so you can take the next element from that file. // Each file has a_count[x] phases, so this process can be written as follows. uint64_t i = 0; while (sfen_count_in_file[i] <= r) r -= sfen_count_in_file[i++]; // This confirms n. Before you forget it, reduce the remaining number. --sfen_count_in_file[i]; --sfen_count_left; PackedSfenValue psv; // It's better to read and write all at once until the performance is not so good... if (sfen_file_streams[i].read((char*)&psv, sizeof(PackedSfenValue))) { fs.write((char*)&psv, sizeof(PackedSfenValue)); ++write_sfen_count; print_status(); } } print_status(); fs.close(); cout << "done!" << endl; } // Subcontracting the teacher shuffle "learn shuffle" command. // output_file_name: name of the output file where the shuffled teacher positions will be written void shuffle_files(const vector& filenames, const string& output_file_name, uint64_t buffer_size, const std::string& seed) { // The destination folder is // tmp/ for temporary writing // Temporary file is written to tmp/ folder for each buffer_size phase. // For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB. // In a PC with a small memory, it would be better to reduce this. // However, if the number of files increases too much, // it will not be possible to open at the same time due to OS restrictions. // There should have been a limit of 512 per process on Windows, so you can open here as 500, // The current setting is 500 files x 20M = 10G = 10 billion phases. PSVector buf(buffer_size); // ↑ buffer, a marker that indicates how much you have used uint64_t buf_write_marker = 0; // File name to write (incremental counter because it is a serial number) uint64_t write_file_count = 0; // random number to shuffle // Do not use std::random_device(). Because it always the same integers on MinGW. PRNG prng(seed); // generate the name of the temporary file auto make_filename = [](uint64_t i) { return "tmp/" + to_string(i) + ".bin"; }; // Exported files in tmp/ folder, number of teacher positions stored in each vector a_count; auto write_buffer = [&](uint64_t size) { Algo::shuffle(buf, prng); // write to a file fstream fs; fs.open(make_filename(write_file_count++), ios::out | ios::binary); fs.write(reinterpret_cast(buf.data()), size * sizeof(PackedSfenValue)); fs.close(); a_count.push_back(size); buf_write_marker = 0; cout << "."; }; std::filesystem::create_directory("tmp"); // Shuffle and export as a 10M phase shredded file. for (auto filename : filenames) { fstream fs(filename, ios::in | ios::binary); cout << endl << "open file = " << filename; while (fs.read(reinterpret_cast(&buf[buf_write_marker]), sizeof(PackedSfenValue))) if (++buf_write_marker == buffer_size) write_buffer(buffer_size); // Read in units of sizeof(PackedSfenValue), // Ignore the last remaining fraction. (Fails in fs.read, so exit while) // (The remaining fraction seems to be half-finished data // that was created because it was stopped halfway during teacher generation.) } if (buf_write_marker != 0) write_buffer(buf_write_marker); // Only shuffled files have been written write_file_count. // As a second pass, if you open all of them at the same time, // select one at random and load one phase at a time // Now you have shuffled. // Original file for shirt full + tmp file + file to write // requires 3 times the storage capacity of the original file. // 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases. // If you want to delete (or delete by hand) the // original file at this point after writing to tmp, // The storage capacity is about twice that of the original file. // So, maybe we should have an option to delete the original file. // Files are opened at the same time. It is highly possible that this will exceed FOPEN_MAX. // In that case, rather than adjusting buffer_size to reduce the number of files. vector afs; for (uint64_t i = 0; i < write_file_count; ++i) afs.emplace_back(fstream(make_filename(i), ios::in | ios::binary)); // Throw to the subcontract function and end. shuffle_write(output_file_name, prng, afs, a_count); } // Subcontracting the teacher shuffle "learn shuffleq" command. // This is written in 1 pass. // output_file_name: name of the output file where the shuffled teacher positions will be written void shuffle_files_quick(const vector& filenames, const string& output_file_name, const std::string& seed) { // random number to shuffle // Do not use std::random_device(). Because it always the same integers on MinGW. PRNG prng(seed); // number of files const size_t file_count = filenames.size(); // Number of teacher positions stored in each file in filenames vector sfen_count_in_file(file_count); // Count the number of teacher aspects in each file. vector sfen_file_streams(file_count); for (size_t i = 0; i < file_count; ++i) { auto filename = filenames[i]; auto& fs = sfen_file_streams[i]; fs.open(filename, ios::in | ios::binary); const uint64_t file_size = get_file_size(fs); const uint64_t sfen_count = file_size / sizeof(PackedSfenValue); sfen_count_in_file[i] = sfen_count; // Output the number of sfen stored in each file. cout << filename << " = " << sfen_count << " sfens." << endl; } // Since we know the file size of each file, // open them all at once (already open), // Select one at a time and load one phase at a time // Now you have shuffled. // Throw to the subcontract function and end. shuffle_write(output_file_name, prng, sfen_file_streams, sfen_count_in_file); } // Subcontracting the teacher shuffle "learn shufflem" command. // Read the whole memory and write it out with the specified file name. void shuffle_files_on_memory(const vector& filenames, const string output_file_name, const std::string& seed) { PSVector buf; for (auto filename : filenames) { std::cout << "read : " << filename << std::endl; read_file_to_memory(filename, [&buf](uint64_t size) { assert((size % sizeof(PackedSfenValue)) == 0); // Expand the buffer and read after the last end. uint64_t last = buf.size(); buf.resize(last + size / sizeof(PackedSfenValue)); return (void*)&buf[last]; }); } // shuffle from buf[0] to buf[size-1] // Do not use std::random_device(). Because it always the same integers on MinGW. PRNG prng(seed); uint64_t size = (uint64_t)buf.size(); std::cout << "shuffle buf.size() = " << size << std::endl; Algo::shuffle(buf, prng); std::cout << "write : " << output_file_name << endl; // If the file to be written exceeds 2GB, it cannot be // written in one shot with fstream::write, so use wrapper. write_memory_to_file( output_file_name, (void*)&buf[0], sizeof(PackedSfenValue) * buf.size()); std::cout << "..shuffle_on_memory done." << std::endl; } static void set_learning_search_limits() { // About Search::Limits // Be careful because this member variable is global and affects other threads. auto& limits = Search::Limits; limits.startTime = now(); // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done) limits.infinite = true; // Since PV is an obstacle when displayed, erase it. limits.silent = true; // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it. limits.nodes = 0; // depth is also processed by the one passed as an argument of Learner::search(). limits.depth = 0; } // Learning from the generated game record void learn(Position&, istringstream& is) { const auto thread_num = (int)Options["Threads"]; vector filenames; // mini_batch_size 1M aspect by default. This can be increased. auto mini_batch_size = LEARN_MINI_BATCH_SIZE; // Number of loops (read the game record file this number of times) int loop = 1; // Game file storage folder (get game file with relative path from here) string base_dir; string target_dir; // --- Function that only shuffles the teacher aspect // normal shuffle bool shuffle_normal = false; uint64_t buffer_size = 20000000; // fast shuffling assuming each file is shuffled bool shuffle_quick = false; // A function to read the entire file in memory and shuffle it. // (Requires file size memory) bool shuffle_on_memory = false; // Conversion of packed sfen. In plain, it consists of sfen(string), // evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0) bool use_convert_plain = false; // convert plain format teacher to Yaneura King's bin bool use_convert_bin = false; int ply_minimum = 0; int ply_maximum = 114514; bool interpolate_eval = 0; bool check_invalid_fen = false; bool check_illegal_move = false; // convert teacher in pgn-extract format to Yaneura King's bin bool use_convert_bin_from_pgn_extract = false; bool pgn_eval_side_to_move = false; bool convert_no_eval_fens_as_score_zero = false; // File name to write in those cases (default is "shuffled_sfen.bin") string output_file_name = "shuffled_sfen.bin"; // If the absolute value of the evaluation value // in the deep search of the teacher phase exceeds this value, // that phase is discarded. int eval_limit = 32000; // Flag to save the evaluation function file only once near the end. bool save_only_once = false; // Shuffle about what you are pre-reading on the teacher aspect. // (Shuffle of about 10 million phases) // Turn on if you want to pass a pre-shuffled file. bool no_shuffle = false; global_learning_rate = 1.0; // elmo lambda ELMO_LAMBDA = 1.0; ELMO_LAMBDA2 = 1.0; ELMO_LAMBDA_LIMIT = 32000; // if (gamePly > option; if (option == "") break; // specify the number of phases of mini-batch if (option == "bat") { is >> mini_batch_size; mini_batch_size *= 10000; // Unit is ten thousand } // Specify the folder in which the game record is stored and make it the rooting target. else if (option == "targetdir") is >> target_dir; else if (option == "targetfile") { std::string filename; is >> filename; filenames.push_back(filename); } // Specify the number of loops else if (option == "loop") is >> loop; // Game file storage folder (get game file with relative path from here) else if (option == "basedir") is >> base_dir; // Mini batch size else if (option == "batchsize") is >> mini_batch_size; // learning rate else if (option == "lr") is >> global_learning_rate; // Accept also the old option name. else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> use_draw_games_in_training; // Accept also the old option name. else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> use_draw_games_in_validation; // Accept also the old option name. else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training; else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; // Using WDL with win rate model instead of sigmoid else if (option == "use_wdl") is >> use_wdl; // LAMBDA else if (option == "lambda") is >> ELMO_LAMBDA; else if (option == "lambda2") is >> ELMO_LAMBDA2; else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT; else if (option == "reduction_gameply") is >> reduction_gameply; // shuffle related else if (option == "shuffle") shuffle_normal = true; else if (option == "buffer_size") is >> buffer_size; else if (option == "shuffleq") shuffle_quick = true; else if (option == "shufflem") shuffle_on_memory = true; else if (option == "output_file_name") is >> output_file_name; else if (option == "eval_limit") is >> eval_limit; else if (option == "save_only_once") save_only_once = true; else if (option == "no_shuffle") no_shuffle = true; else if (option == "nn_batch_size") is >> nn_batch_size; else if (option == "newbob_decay") is >> newbob_decay; else if (option == "newbob_num_trials") is >> newbob_num_trials; else if (option == "nn_options") is >> nn_options; else if (option == "auto_lr_drop") is >> auto_lr_drop; else if (option == "eval_save_interval") is >> eval_save_interval; else if (option == "loss_output_interval") is >> loss_output_interval; else if (option == "validation_set_file_name") is >> validation_set_file_name; // Rabbit convert related else if (option == "convert_plain") use_convert_plain = true; else if (option == "convert_bin") use_convert_bin = true; else if (option == "interpolate_eval") is >> interpolate_eval; else if (option == "check_invalid_fen") is >> check_invalid_fen; else if (option == "check_illegal_move") is >> check_illegal_move; else if (option == "convert_bin_from_pgn-extract") use_convert_bin_from_pgn_extract = true; else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move; else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero; else if (option == "src_score_min_value") is >> src_score_min_value; else if (option == "src_score_max_value") is >> src_score_max_value; else if (option == "dest_score_min_value") is >> dest_score_min_value; else if (option == "dest_score_max_value") is >> dest_score_max_value; else if (option == "seed") is >> seed; else if (option == "set_recommended_uci_options") { UCI::setoption("Use NNUE", "pure"); UCI::setoption("MultiPV", "1"); UCI::setoption("Contempt", "0"); UCI::setoption("Skill Level", "20"); UCI::setoption("UCI_Chess960", "false"); UCI::setoption("UCI_AnalyseMode", "false"); UCI::setoption("UCI_LimitStrength", "false"); UCI::setoption("PruneAtShallowDepth", "false"); UCI::setoption("EnableTranspositionTable", "false"); } else { cout << "Unknown option: " << option << ". Ignoring.\n"; } } if (loss_output_interval == 0) { loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; } cout << "learn command , "; // Issue a warning if OpenMP is disabled. #if !defined(_OPENMP) cout << "Warning! OpenMP disabled." << endl; #endif LearnerThink learn_think(thread_num, seed); // Display learning game file if (target_dir != "") { string kif_base_dir = Path::combine(base_dir, target_dir); namespace sys = std::filesystem; sys::path p(kif_base_dir); // Origin of enumeration std::for_each(sys::directory_iterator(p), sys::directory_iterator(), [&](const sys::path& path) { if (sys::is_regular_file(path)) filenames.push_back(Path::combine(target_dir, path.filename().generic_string())); }); } cout << "learn from "; for (auto s : filenames) cout << s << " , "; cout << endl; if (!validation_set_file_name.empty()) { cout << "validation set : " << validation_set_file_name << endl; } cout << "base dir : " << base_dir << endl; cout << "target dir : " << target_dir << endl; // shuffle mode if (shuffle_normal) { cout << "buffer_size : " << buffer_size << endl; cout << "shuffle mode.." << endl; shuffle_files(filenames, output_file_name, buffer_size, seed); return; } if (shuffle_quick) { cout << "quick shuffle mode.." << endl; shuffle_files_quick(filenames, output_file_name, seed); return; } if (shuffle_on_memory) { cout << "shuffle on memory.." << endl; shuffle_files_on_memory(filenames, output_file_name, seed); return; } if (use_convert_plain) { Eval::NNUE::init(); cout << "convert_plain.." << endl; convert_plain(filenames, output_file_name); return; } if (use_convert_bin) { Eval::NNUE::init(); cout << "convert_bin.." << endl; convert_bin( filenames, output_file_name, ply_minimum, ply_maximum, interpolate_eval, src_score_min_value, src_score_max_value, dest_score_min_value, dest_score_max_value, check_invalid_fen, check_illegal_move); return; } if (use_convert_bin_from_pgn_extract) { Eval::NNUE::init(); cout << "convert_bin_from_pgn-extract.." << endl; convert_bin_from_pgn_extract( filenames, output_file_name, pgn_eval_side_to_move, convert_no_eval_fens_as_score_zero); return; } cout << "loop : " << loop << endl; cout << "eval_limit : " << eval_limit << endl; cout << "save_only_once : " << (save_only_once ? "true" : "false") << endl; cout << "no_shuffle : " << (no_shuffle ? "true" : "false") << endl; cout << "Loss Function : " << LOSS_FUNCTION << endl; cout << "mini-batch size : " << mini_batch_size << endl; cout << "nn_batch_size : " << nn_batch_size << endl; cout << "nn_options : " << nn_options << endl; cout << "learning rate : " << global_learning_rate << endl; cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; if (newbob_decay != 1.0) { cout << "scheduling : newbob with decay = " << newbob_decay << ", " << newbob_num_trials << " trials" << endl; } else { cout << "scheduling : default" << endl; } // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. reduction_gameply = max(reduction_gameply, 1); cout << "reduction_gameply : " << reduction_gameply << endl; cout << "LAMBDA : " << ELMO_LAMBDA << endl; cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; // ----------------------------------- // various initialization // ----------------------------------- cout << "init.." << endl; Threads.main()->ponder = false; set_learning_search_limits(); cout << "init_training.." << endl; Eval::NNUE::initialize_training(seed); Eval::NNUE::set_batch_size(nn_batch_size); Eval::NNUE::set_options(nn_options); if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { // Save the current net to [EvalSaveDir]\original. Eval::NNUE::save_eval("original"); // Set the folder above to best_nn_directory so that the trainer can // resotre the network parameters from the original net file. learn_think.best_nn_directory = Path::combine(Options["EvalSaveDir"], "original"); } cout << "init done." << endl; // Reflect other option settings. learn_think.eval_limit = eval_limit; learn_think.save_only_once = save_only_once; learn_think.set_do_shuffle(!no_shuffle); learn_think.reduction_gameply = reduction_gameply; learn_think.newbob_decay = newbob_decay; learn_think.newbob_num_trials = newbob_num_trials; learn_think.auto_lr_drop = auto_lr_drop; learn_think.eval_save_interval = eval_save_interval; learn_think.loss_output_interval = loss_output_interval; learn_think.mini_batch_size = mini_batch_size; learn_think.validation_set_file_name = validation_set_file_name; // Insert the file name for the number of loops. for (int i = 0; i < loop; ++i) { for(auto& file : filenames) { learn_think.add_file(Path::combine(base_dir, file)); } } // Start learning. learn_think.learn(); } } // namespace Learner