diff --git a/src/Makefile b/src/Makefile index cc63ab15..0c6b21e5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -52,6 +52,8 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp extra/sfen_packer.cpp \ learn/gensfen2019.cpp \ learn/learner.cpp \ + learn/gensfen.cpp \ + learn/convert.cpp \ learn/learning_tools.cpp \ learn/multi_think.cpp @@ -891,7 +893,7 @@ learn: config-sanity EXTRACXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp ' \ EXTRALDFLAGS=' -lopenblas -fopenmp -Wl,-s -static ' \ all - + profile-learn: config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." @@ -900,7 +902,7 @@ profile-learn: config-sanity objclean profileclean LEARNLDFLAGS=' -lopenblas -fopenmp -Wl,-s -static ' @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." - $(PGOGENSFEN) + $(PGOGENSFEN) @echo "" @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h index b043f2e1..dacbd2ba 100644 --- a/src/eval/evaluate_common.h +++ b/src/eval/evaluate_common.h @@ -15,6 +15,8 @@ // KPP file name #define KPP_BIN "KPP_synthesized.bin" +#include "../position.h" + namespace Eval { diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp new file mode 100644 index 00000000..ebee8a96 --- /dev/null +++ b/src/learn/convert.cpp @@ -0,0 +1,515 @@ +#define EVAL_LEARN + +#if defined(EVAL_LEARN) + +// evaluate header for learning +#include "../eval/evaluate_common.h" + +#include "learn.h" +#include "multi_think.h" +#include "../uci.h" +#include "../syzygy/tbprobe.h" +#include "../misc.h" +#include "../thread.h" +#include "../position.h" +#include "../tt.h" + +#include +#include +#include +#include +#include +#include // std::exp(),std::pow(),std::log() +#include // memcpy() +#include +#include +#include +#include +#include +#include + +#if defined (_OPENMP) +#include +#endif + +#if defined(_MSC_VER) +// The C++ filesystem cannot be used unless it is C++17 or later or MSVC. +// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well. +// Use dirent.h because there is no help for it. +#include +#elif defined(__GNUC__) +#include +#endif + +using namespace std; + +namespace Learner +{ + bool fen_is_ok(Position& pos, std::string input_fen) { + std::string pos_fen = pos.fen(); + std::istringstream ss_input(input_fen); + std::istringstream ss_pos(pos_fen); + + // example : "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3 w - h6 0 24" + // --> "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3" + std::string str_input, str_pos; + ss_input >> str_input; + ss_pos >> str_pos; + + // Only compare "Piece placement field" between input_fen and pos.fen(). + return str_input == str_pos; + } + + void convert_bin( + const vector& filenames, + const string& output_file_name, + const int ply_minimum, + const int ply_maximum, + const int interpolate_eval, + const int src_score_min_value, + const int src_score_max_value, + const int dest_score_min_value, + const int dest_score_max_value, + const bool check_invalid_fen, + const bool check_illegal_move) + { + std::cout << "check_invalid_fen=" << check_invalid_fen << std::endl; + std::cout << "check_illegal_move=" << check_illegal_move << std::endl; + + std::fstream fs; + uint64_t data_size = 0; + uint64_t filtered_size = 0; + uint64_t filtered_size_fen = 0; + uint64_t filtered_size_move = 0; + uint64_t filtered_size_ply = 0; + auto th = Threads.main(); + auto& tpos = th->rootPos; + // convert plain rag to packed sfenvalue for Yaneura king + fs.open(output_file_name, ios::app | ios::binary); + StateListPtr states; + for (auto filename : filenames) { + std::cout << "convert " << filename << " ... "; + std::string line; + ifstream ifs; + ifs.open(filename); + PackedSfenValue p; + data_size = 0; + filtered_size = 0; + filtered_size_fen = 0; + filtered_size_move = 0; + filtered_size_ply = 0; + p.gamePly = 1; // Not included in apery format. Should be initialized + bool ignore_flag_fen = false; + bool ignore_flag_move = false; + bool ignore_flag_ply = false; + while (std::getline(ifs, line)) { + std::stringstream ss(line); + std::string token; + std::string value; + ss >> token; + if (token == "fen") { + states = StateListPtr(new std::deque(1)); // Drop old and create a new one + std::string input_fen = line.substr(4); + tpos.set(input_fen, false, &states->back(), Threads.main()); + if (check_invalid_fen && !fen_is_ok(tpos, input_fen)) { + ignore_flag_fen = true; + filtered_size_fen++; + } + else { + tpos.sfen_pack(p.sfen); + } + } + else if (token == "move") { + ss >> value; + Move move = UCI::to_move(tpos, value); + if (check_illegal_move && move == MOVE_NONE) { + ignore_flag_move = true; + filtered_size_move++; + } + else { + p.move = move; + } + } + else if (token == "score") { + double score; + ss >> score; + // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + // Normalize to [0.0, 1.0]. + score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value); + // Scale to [dest_score_min_value, dest_score_max_value]. + score = score * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; + p.score = Math::clamp((int32_t)std::round(score), -(int32_t)VALUE_MATE, (int32_t)VALUE_MATE); + } + else if (token == "ply") { + int temp; + ss >> temp; + if (temp < ply_minimum || temp > ply_maximum) { + ignore_flag_ply = true; + filtered_size_ply++; + } + p.gamePly = uint16_t(temp); // No cast here? + if (interpolate_eval != 0) { + p.score = min(3000, interpolate_eval * temp); + } + } + else if (token == "result") { + int temp; + ss >> temp; + p.game_result = int8_t(temp); // Do you need a cast here? + if (interpolate_eval) { + p.score = p.score * p.game_result; + } + } + else if (token == "e") { + if (!(ignore_flag_fen || ignore_flag_move || ignore_flag_ply)) { + fs.write((char*)&p, sizeof(PackedSfenValue)); + data_size += 1; + // debug + // std::cout< 0.25 * PawnValueEg + // #-4 --> -mate_in(4) + // #3 --> mate_in(3) + // -M4 --> -mate_in(4) + // +M3 --> mate_in(3) + Value parse_score_from_pgn_extract(std::string eval, bool& success) { + success = true; + + if (eval.substr(0, 1) == "#") { + if (eval.substr(1, 1) == "-") { + return -mate_in(stoi(eval.substr(2, eval.length() - 2))); + } + else { + return mate_in(stoi(eval.substr(1, eval.length() - 1))); + } + } + else if (eval.substr(0, 2) == "-M") { + //std::cout << "eval=" << eval << std::endl; + return -mate_in(stoi(eval.substr(2, eval.length() - 2))); + } + else if (eval.substr(0, 2) == "+M") { + //std::cout << "eval=" << eval << std::endl; + return mate_in(stoi(eval.substr(2, eval.length() - 2))); + } + else { + char* endptr; + double value = strtod(eval.c_str(), &endptr); + + if (*endptr != '\0') { + success = false; + return VALUE_ZERO; + } + else { + return Value(value * static_cast(PawnValueEg)); + } + } + } + + // for Debug + //#define DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT + + bool is_like_fen(std::string fen) { + int count_space = std::count(fen.cbegin(), fen.cend(), ' '); + int count_slash = std::count(fen.cbegin(), fen.cend(), '/'); + +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + //std::cout << "count_space=" << count_space << std::endl; + //std::cout << "count_slash=" << count_slash << std::endl; +#endif + + return count_space == 5 && count_slash == 7; + } + + void convert_bin_from_pgn_extract( + const vector& filenames, + const string& output_file_name, + const bool pgn_eval_side_to_move, + const bool convert_no_eval_fens_as_score_zero) + { + std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl; + std::cout << "convert_no_eval_fens_as_score_zero=" << convert_no_eval_fens_as_score_zero << std::endl; + + auto th = Threads.main(); + auto& pos = th->rootPos; + + std::fstream ofs; + ofs.open(output_file_name, ios::out | ios::binary); + + int game_count = 0; + int fen_count = 0; + + for (auto filename : filenames) { + std::cout << now_string() << " convert " << filename << std::endl; + ifstream ifs; + ifs.open(filename); + + int game_result = 0; + + std::string line; + while (std::getline(ifs, line)) { + + if (line.empty()) { + continue; + } + + else if (line.substr(0, 1) == "[") { + std::regex pattern_result(R"(\[Result (.+?)\])"); + std::smatch match; + + // example: [Result "1-0"] + if (std::regex_search(line, match, pattern_result)) { + game_result = parse_game_result_from_pgn_extract(match.str(1)); +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + std::cout << "game_result=" << game_result << std::endl; +#endif + game_count++; + if (game_count % 10000 == 0) { + std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; + } + } + + continue; + } + + else { + int gamePly = 1; + auto itr = line.cbegin(); + + while (true) { + gamePly++; + + PackedSfenValue psv; + memset((char*)&psv, 0, sizeof(PackedSfenValue)); + + // fen + { + bool fen_found = false; + + while (!fen_found) { + std::regex pattern_bracket(R"(\{(.+?)\})"); + std::smatch match; + if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { + break; + } + + itr += match.position(0) + match.length(0) - 1; + std::string str_fen = match.str(1); + trim(str_fen); + + if (is_like_fen(str_fen)) { + fen_found = true; + + StateInfo si; + pos.set(str_fen, false, &si, th); + pos.sfen_pack(psv.sfen); + } + +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + std::cout << "str_fen=" << str_fen << std::endl; + std::cout << "fen_found=" << fen_found << std::endl; +#endif + } + + if (!fen_found) { + break; + } + } + + // move + { + std::regex pattern_move(R"(\}(.+?)\{)"); + std::smatch match; + if (!std::regex_search(itr, line.cend(), match, pattern_move)) { + break; + } + + itr += match.position(0) + match.length(0) - 1; + std::string str_move = match.str(1); + trim(str_move); +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + std::cout << "str_move=" << str_move << std::endl; +#endif + psv.move = UCI::to_move(pos, str_move); + } + + // eval + bool eval_found = false; + { + std::regex pattern_bracket(R"(\{(.+?)\})"); + std::smatch match; + if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { + break; + } + + std::string str_eval_clk = match.str(1); + trim(str_eval_clk); +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + std::cout << "str_eval_clk=" << str_eval_clk << std::endl; +#endif + + // example: { [%eval 0.25] [%clk 0:10:00] } + // example: { [%eval #-4] [%clk 0:10:00] } + // example: { [%eval #3] [%clk 0:10:00] } + // example: { +0.71/22 1.2s } + // example: { -M4/7 0.003s } + // example: { M3/245 0.017s } + // example: { +M1/245 0.010s, White mates } + // example: { 0.60 } + // example: { book } + // example: { rnbqkb1r/pp3ppp/2p1pn2/3p4/2PP4/2N2N2/PP2PPPP/R1BQKB1R w KQkq - 0 5 } + + // Considering the absence of eval + if (!is_like_fen(str_eval_clk)) { + itr += match.position(0) + match.length(0) - 1; + + if (str_eval_clk != "book") { + std::regex pattern_eval1(R"(\[\%eval (.+?)\])"); + std::regex pattern_eval2(R"((.+?)\/)"); + + std::string str_eval; + if (std::regex_search(str_eval_clk, match, pattern_eval1) || + std::regex_search(str_eval_clk, match, pattern_eval2)) { + str_eval = match.str(1); + trim(str_eval); + } + else { + str_eval = str_eval_clk; + } + + bool success = false; + Value value = parse_score_from_pgn_extract(str_eval, success); + if (success) { + eval_found = true; + psv.score = Math::clamp(value, -VALUE_MATE, VALUE_MATE); + } + +#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) + std::cout << "str_eval=" << str_eval << std::endl; + std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl; +#endif + } + } + } + + // write + if (eval_found || convert_no_eval_fens_as_score_zero) { + if (!eval_found && convert_no_eval_fens_as_score_zero) { + psv.score = 0; + } + + psv.gamePly = gamePly; + psv.game_result = game_result; + + if (pos.side_to_move() == BLACK) { + if (!pgn_eval_side_to_move) { + psv.score *= -1; + } + psv.game_result *= -1; + } + + ofs.write((char*)&psv, sizeof(PackedSfenValue)); + + fen_count++; + } + } + + game_result = 0; + } + } + } + + std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; + std::cout << now_string() << " all done" << std::endl; + ofs.close(); + } + + void convert_plain( + const vector& filenames, + const string& output_file_name) + { + Position tpos; + std::ofstream ofs; + ofs.open(output_file_name, ios::app); + auto th = Threads.main(); + for (auto filename : filenames) { + std::cout << "convert " << filename << " ... "; + + // Just convert packedsfenvalue to text + std::fstream fs; + fs.open(filename, ios::in | ios::binary); + PackedSfenValue p; + while (true) + { + if (fs.read((char*)&p, sizeof(PackedSfenValue))) { + StateInfo si; + tpos.set_from_packed_sfen(p.sfen, &si, th, false); + + // write as plain text + ofs << "fen " << tpos.fen() << std::endl; + ofs << "move " << UCI::move(Move(p.move), false) << std::endl; + ofs << "score " << p.score << std::endl; + ofs << "ply " << int(p.gamePly) << std::endl; + ofs << "result " << int(p.game_result) << std::endl; + ofs << "e" << std::endl; + } + else { + break; + } + } + fs.close(); + std::cout << "done" << std::endl; + } + ofs.close(); + std::cout << "all done" << std::endl; + } +} +#endif diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp new file mode 100644 index 00000000..38bed2d5 --- /dev/null +++ b/src/learn/gensfen.cpp @@ -0,0 +1,1181 @@ +#define EVAL_LEARN + +#if defined(EVAL_LEARN) + +#include "../eval/evaluate_common.h" + +#include "learn.h" +#include "multi_think.h" +#include "../misc.h" +#include "../thread.h" +#include "../position.h" +#include "../tt.h" +#include "../uci.h" +#include "../syzygy/tbprobe.h" + +#if defined(USE_BOOK) +#include "../extra/book/book.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined (_OPENMP) +#include +#endif + +#if defined(_MSC_VER) +// std::filesystem doesn't work on GCC even though it claims to support C++17. +#include +#elif defined(__GNUC__) +#include +#endif + +#if defined(EVAL_NNUE) +#include "../nnue/evaluate_nnue_learner.h" +#include +#include +#endif + +using namespace std; + +namespace Learner +{ + static bool write_out_draw_game_in_training_data_generation = false; + static bool detect_draw_by_consecutive_low_score = false; + static bool detect_draw_by_insufficient_mating_material = false; + + // Use raw NNUE eval value in the Eval::evaluate(). + // If hybrid eval is enabled, training data + // generation and training don't work well. + // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 + static bool use_raw_nnue_eval = true; + + // Helper class for exporting Sfen + struct SfenWriter + { + // Amount of sfens required to flush the buffer. + static constexpr size_t SFEN_WRITE_SIZE = 5000; + + // Current status is output after + // each (SFEN_WRITE_SIZE * STATUS_OUTPUT_PERIOD) sfens + static constexpr uint64_t STATUS_OUTPUT_PERIOD = 40; + + // File name to write and number of threads to create + SfenWriter(string filename_, int thread_num) + { + sfen_buffers_pool.reserve((size_t)thread_num * 10); + sfen_buffers.resize(thread_num); + + output_file_stream.open(filename_, ios::out | ios::binary | ios::app); + filename = filename_; + + finished = false; + } + + ~SfenWriter() + { + finished = true; + file_worker_thread.join(); + output_file_stream.close(); + +#if !defined(DNDEBUG) + { + // All buffers should be empty since file_worker_thread + // should have written everything before exiting. + for (const auto& p : sfen_buffers) { assert(p == nullptr); } + assert(sfen_buffers_pool.empty()); + } +#endif + } + + void write(size_t thread_id, const PackedSfenValue& psv) + { + // We have a buffer for each thread and add it there. + // If the buffer overflows, write it to a file. + + // This buffer is prepared for each thread. + auto& buf = sfen_buffers[thread_id]; + + // Secure since there is no buf at the first time + // and immediately after writing the thread buffer. + if (!buf) + { + buf = std::make_unique(); + buf->reserve(SFEN_WRITE_SIZE); + } + + // Buffer is exclusive to this thread. + // There is no need for a critical section. + buf->push_back(psv); + + if (buf->size() >= SFEN_WRITE_SIZE) + { + // If you load it in sfen_buffers_pool, the worker will do the rest. + + // Critical section since sfen_buffers_pool is shared among threads. + std::unique_lock lk(mutex); + sfen_buffers_pool.emplace_back(std::move(buf)); + } + } + + // Move what remains in the buffer for your thread to a buffer for writing to a file. + void finalize(size_t thread_id) + { + std::unique_lock lk(mutex); + + auto& buf = sfen_buffers[thread_id]; + + // There is a case that buf==nullptr, so that check is necessary. + if (buf && buf->size() != 0) + { + sfen_buffers_pool.emplace_back(std::move(buf)); + } + } + + // Start the write_worker thread. + void start_file_write_worker() + { + file_worker_thread = std::thread([&] { this->file_write_worker(); }); + } + + // Dedicated thread to write to file + void file_write_worker() + { + auto output_status = [&]() + { + // Also output the current time to console. + sync_cout << endl << sfen_write_count << " sfens , at " << now_string() << sync_endl; + + // This is enough for flush(). + output_file_stream.flush(); + }; + + while (!finished || sfen_buffers_pool.size()) + { + vector> buffers; + { + std::unique_lock lk(mutex); + + // Atomically swap take the filled buffers and + // create a new buffer pool for threads to fill. + buffers = std::move(sfen_buffers_pool); + sfen_buffers_pool = std::vector>(); + } + + if (!buffers.size()) + { + // Poor man's condition variable. + sleep(100); + } + else + { + for (auto& buf : buffers) + { + output_file_stream.write(reinterpret_cast(buf->data()), sizeof(PackedSfenValue) * buf->size()); + + sfen_write_count += buf->size(); +#if 1 + // Add the processed number here, and if it exceeds save_every, + // change the file name and reset this counter. + sfen_write_count_current_file += buf->size(); + if (sfen_write_count_current_file >= save_every) + { + sfen_write_count_current_file = 0; + + output_file_stream.close(); + + // Sequential number attached to the file + int n = (int)(sfen_write_count / save_every); + + // Rename the file and open it again. + // Add ios::app in consideration of overwriting. + // (Depending on the operation, it may not be necessary.) + string new_filename = filename + "_" + std::to_string(n); + output_file_stream.open(new_filename, ios::out | ios::binary | ios::app); + cout << endl << "output sfen file = " << new_filename << endl; + } +#endif + // Output '.' every time when writing a game record. + std::cout << "."; + + // Output the number of phases processed + // every STATUS_OUTPUT_PERIOD times + // Finally, the remainder of the teacher phase + // of each thread is written out, + // so halfway numbers are displayed, but is it okay? + // If you overuse the threads to the maximum number + // of logical cores, the console will be clogged, + // so it may be beneficial to increase that value. + if ((++batch_counter % STATUS_OUTPUT_PERIOD) == 0) + { + output_status(); + } + } + } + } + + // Output the status again after whole processing is done. + output_status(); + } + + void set_save_interval(uint64_t v) + { + save_every = v; + } + + private: + + fstream output_file_stream; + + // A new net is saved after every save_every sfens are processed. + uint64_t save_every = std::numeric_limits::max(); + + // File name passed in the constructor + std::string filename; + + // Thread to write to the file + std::thread file_worker_thread; + + // Flag that all threads have finished + atomic finished; + + // Counter for time stamp output + uint64_t batch_counter = 0; + + // buffer before writing to file + // sfen_buffers is the buffer for each thread + // sfen_buffers_pool is a buffer for writing. + // After loading the phase in the former buffer by SFEN_WRITE_SIZE, + // transfer it to the latter. + std::vector> sfen_buffers; + std::vector> sfen_buffers_pool; + + // Mutex required to access sfen_buffers_pool + std::mutex mutex; + + // Number of sfens written in total, and the + // number of sfens written in the current file. + uint64_t sfen_write_count = 0; + uint64_t sfen_write_count_current_file = 0; + }; + + // ----------------------------------- + // worker that creates the game record (for each thread) + // ----------------------------------- + + // Class to generate sfen with multiple threads + struct MultiThinkGenSfen : public MultiThink + { + // Hash to limit the export of identical sfens + static constexpr uint64_t GENSFEN_HASH_SIZE = 64 * 1024 * 1024; + // It must be 2**N because it will be used as the mask to calculate hash_index. + static_assert((GENSFEN_HASH_SIZE& (GENSFEN_HASH_SIZE - 1)) == 0); + + MultiThinkGenSfen(int search_depth_min_, int search_depth_max_, SfenWriter& sw_) : + search_depth_min(search_depth_min_), + search_depth_max(search_depth_max_), + sfen_writer(sw_) + { + hash.resize(GENSFEN_HASH_SIZE); + + // Output seed to veryfy by the user if it's not identical by chance. + std::cout << prng << std::endl; + } + + void start_file_write_worker() + { + sfen_writer.start_file_write_worker(); + } + + void thread_worker(size_t thread_id) override; + + optional get_current_game_result( + Position& pos, + const vector& move_hist_scores) const; + + vector generate_random_move_flags(); + + bool commit_psv(PSVector& a_psv, size_t thread_id, int8_t lastTurnIsWin); + + optional choose_random_move( + Position& pos, + std::vector& random_move_flag, + int ply, + int& random_move_c); + + Value evaluate_leaf( + Position& pos, + std::vector>& states, + int ply, + vector& pv); + + // Min and max depths for search during gensfen + int search_depth_min; + int search_depth_max; + + // Number of the nodes to be searched. + // 0 represents no limits. + uint64_t nodes; + + // Upper limit of evaluation value of generated situation + int eval_limit; + + // minimum ply with random move + // maximum ply with random move + // Number of random moves in one station + int random_move_minply; + int random_move_maxply; + int random_move_count; + + // Move kings with a probability of 1/N when randomly moving like Apery software. + // When you move the king again, there is a 1/N chance that it will randomly moved + // once in the opponent's turn. + // Apery has N=2. Specifying 0 here disables this function. + int random_move_like_apery; + + // For when using multi pv instead of random move. + // random_multi_pv is the number of candidates for MultiPV. + // When adopting the move of the candidate move, the difference + // between the evaluation value of the move of the 1st place + // and the evaluation value of the move of the Nth place is. + // Must be in the range random_multi_pv_diff. + // random_multi_pv_depth is the search depth for MultiPV. + int random_multi_pv; + int random_multi_pv_diff; + int random_multi_pv_depth; + + // The minimum and maximum ply (number of steps from + // the initial phase) of the sfens to write out. + int write_minply; + int write_maxply; + + // sfen exporter + SfenWriter& sfen_writer; + + vector hash; // 64MB*sizeof(HASH_KEY) = 512MB + }; + + optional MultiThinkGenSfen::get_current_game_result( + Position& pos, + const vector& move_hist_scores) const + { + // Variables for draw adjudication. + // Todo: Make this as an option. + + // start the adjudication when ply reaches this value + constexpr int adj_draw_ply = 80; + + // 4 move scores for each side have to be checked + constexpr int adj_draw_cnt = 8; + + // move score in CP + constexpr int adj_draw_score = 0; + + // For the time being, it will be treated as a + // draw at the maximum number of steps to write. + const int ply = move_hist_scores.size(); + + // has it reached the max length or is a draw + if (ply >= write_maxply || pos.is_draw(ply)) + { + return 0; + } + + // Initialize the Syzygy Ending Tablebase and sort the moves. + Search::RootMoves rootMoves; + for (const auto& m : MoveList(pos)) + { + rootMoves.emplace_back(m); + } + + if (!rootMoves.empty()) + { + Tablebases::rank_root_moves(pos, rootMoves); + } + else + { + // If there is no legal move + return pos.checkers() + ? -1 /* mate */ + : 0 /* stalemate */; + } + + // Adjudicate game to a draw if the last 4 scores of each engine is 0. + if (detect_draw_by_consecutive_low_score) + { + if (ply >= adj_draw_ply) + { + int num_cons_plies_within_draw_score = 0; + bool is_adj_draw = false; + + for (auto it = move_hist_scores.rbegin(); + it != move_hist_scores.rend(); ++it) + { + if (abs(*it) <= adj_draw_score) + { + num_cons_plies_within_draw_score++; + } + else + { + // Draw scores must happen on consecutive plies + break; + } + + if (num_cons_plies_within_draw_score >= adj_draw_cnt) + { + is_adj_draw = true; + break; + } + } + + if (is_adj_draw) + { + return 0; + } + } + } + + // Draw by insufficient mating material + if (detect_draw_by_insufficient_mating_material) + { + if (pos.count() <= 4) + { + int num_pieces = pos.count(); + + // (1) KvK + if (num_pieces == 2) + { + return 0; + } + + // (2) KvK + 1 minor piece + if (num_pieces == 3) + { + int minor_pc = pos.count(WHITE) + pos.count(WHITE) + + pos.count(BLACK) + pos.count(BLACK); + if (minor_pc == 1) + { + return 0; + } + } + + // (3) KBvKB, bishops of the same color + else if (num_pieces == 4) + { + if (pos.count(WHITE) == 1 && pos.count(BLACK) == 1) + { + // Color of bishops is black. + if ((pos.pieces(WHITE, BISHOP) & DarkSquares) + && (pos.pieces(BLACK, BISHOP) & DarkSquares)) + { + return 0; + } + // Color of bishops is white. + if ((pos.pieces(WHITE, BISHOP) & ~DarkSquares) + && (pos.pieces(BLACK, BISHOP) & ~DarkSquares)) + { + return 0; + } + } + } + } + } + + return nullopt; + } + + // Write out the phases loaded in sfens to a file. + // lastTurnIsWin: win/loss in the next phase after the final phase in sfens + // 1 when winning. -1 when losing. Pass 0 for a draw. + // Return value: true if the specified number of + // sfens has already been reached and the process ends. + bool MultiThinkGenSfen::commit_psv(PSVector& sfens, size_t thread_id, int8_t lastTurnIsWin) + { + int8_t is_win = lastTurnIsWin; + + // From the final stage (one step before) to the first stage, give information on the outcome of the game for each stage. + // The phases stored in sfens are assumed to be continuous (in order). + for (auto it = sfens.rbegin(); it != sfens.rend(); ++it) + { + // If is_win == 0 (draw), multiply by -1 and it will remain 0 (draw) + is_win = -is_win; + it->game_result = is_win; + + // See how many sfens were already written and get the next id. + // Exit if requested number of sfens reached. + auto now_loop_count = get_next_loop_count(); + if (now_loop_count == LOOP_COUNT_FINISHED) + { + return true; + } + + // Write out one sfen. + sfen_writer.write(thread_id, *it); + +#if 0 + pos.set_from_packed_sfen(it->sfen); + cout << pos << "Win : " << it->is_win << " , " << it->score << endl; +#endif + } + + return false; + } + + optional MultiThinkGenSfen::choose_random_move( + Position& pos, + std::vector& random_move_flag, + int ply, + int& random_move_c) + { + optional random_move; + + // Randomly choose one from legal move + if ( + // 1. Random move of random_move_count times from random_move_minply to random_move_maxply + (random_move_minply != -1 && ply < (int)random_move_flag.size() && random_move_flag[ply]) || + // 2. A mode to perform random move of random_move_count times after leaving the startpos + (random_move_minply == -1 && random_move_c < random_move_count)) + { + ++random_move_c; + + // It's not a mate, so there should be one legal move... + if (random_multi_pv == 0) + { + // Normal random move + MoveList list(pos); + + // I don't really know the goodness and badness of making this the Apery method. + if (random_move_like_apery == 0 + || prng.rand(random_move_like_apery) != 0) + { + // Normally one move from legal move + random_move = list.at((size_t)prng.rand((uint64_t)list.size())); + } + else + { + // if you can move the king, move the king + Move moves[8]; // Near 8 + Move* p = &moves[0]; + for (auto& m : list) + { + if (type_of(pos.moved_piece(m)) == KING) + { + *(p++) = m; + } + } + + size_t n = p - &moves[0]; + if (n != 0) + { + // move to move the king + random_move = moves[prng.rand(n)]; + + // In Apery method, at this time there is a 1/2 chance + // that the opponent will also move randomly + if (prng.rand(2) == 0) + { + // Is it a simple hack to add a "1" next to random_move_flag[ply]? + random_move_flag.insert(random_move_flag.begin() + ply + 1, 1, true); + } + } + else + { + // Normally one move from legal move + random_move = list.at((size_t)prng.rand((uint64_t)list.size())); + } + } + } + else + { + Learner::search(pos, random_multi_pv_depth, random_multi_pv); + + // Select one from the top N hands of root Moves + auto& rm = pos.this_thread()->rootMoves; + + uint64_t s = min((uint64_t)rm.size(), (uint64_t)random_multi_pv); + for (uint64_t i = 1; i < s; ++i) + { + // The difference from the evaluation value of rm[0] must + // be within the range of random_multi_pv_diff. + // It can be assumed that rm[x].score is arranged in descending order. + if (rm[0].score > rm[i].score + random_multi_pv_diff) + { + s = i; + break; + } + } + + random_move = rm[prng.rand(s)].pv[0]; + } + } + + return random_move; + } + + vector MultiThinkGenSfen::generate_random_move_flags() + { + vector random_move_flag; + + // Depending on random move selection parameters setup + // the array of flags that indicates whether a random move + // be taken at a given ply. + + // Make an array like a[0] = 0 ,a[1] = 1, ... + // Fisher-Yates shuffle and take out the first N items. + // Actually, I only want N pieces, so I only need + // to shuffle the first N pieces with Fisher-Yates. + + vector a; + a.reserve((size_t)random_move_maxply); + + // random_move_minply ,random_move_maxply is specified by 1 origin, + // Note that we are handling 0 origin here. + for (int i = std::max(random_move_minply - 1, 0); i < random_move_maxply; ++i) + { + a.push_back(i); + } + + // In case of Apery random move, insert() may be called random_move_count times. + // Reserve only the size considering it. + random_move_flag.resize((size_t)random_move_maxply + random_move_count); + + // A random move that exceeds the size() of a[] cannot be applied, so limit it. + for (int i = 0; i < std::min(random_move_count, (int)a.size()); ++i) + { + swap(a[i], a[prng.rand((uint64_t)a.size() - i) + i]); + random_move_flag[a[i]] = true; + } + + return random_move_flag; + } + + Value MultiThinkGenSfen::evaluate_leaf( + Position& pos, + std::vector>& states, + int ply, + vector& pv) + { + auto rootColor = pos.side_to_move(); + + for (auto m : pv) + { +#if 1 + // There should be no illegal move. This is as a debugging precaution. + if (!pos.pseudo_legal(m) || !pos.legal(m)) + { + cout << "Error! : " << pos.fen() << m << endl; + } +#endif + pos.do_move(m, states[ply++]); + + // Because the difference calculation of evaluate() cannot be + // performed unless each node evaluate() is called! + // If the depth is 8 or more, it seems + // faster not to calculate this difference. +#if defined(EVAL_NNUE) + if (depth < 8) + { + Eval::NNUE::update_eval(pos); + } +#endif // defined(EVAL_NNUE) + } + + // Reach leaf + Value v; + if (pos.checkers()) { + // Sometime a king is checked. An example is a case that a checkmate is + // found in the search. If Eval::evaluate() is called whne a king is + // checked, classic eval crashes by an assertion. To avoid crashes, return + // VALUE_NONE and let the caller assign a value to the position. + return VALUE_NONE; + } + else + { + v = Eval::evaluate(pos); + + // evaluate() returns the evaluation value on the turn side, so + // If it's a turn different from root_color, you must invert v and return it. + if (rootColor != pos.side_to_move()) + { + v = -v; + } + } + + // Rewind the pv moves. + for (auto it = pv.rbegin(); it != pv.rend(); ++it) + { + pos.undo_move(*it); + } + + return v; + } + + // thread_id = 0..Threads.size()-1 + void MultiThinkGenSfen::thread_worker(size_t thread_id) + { + // For the time being, it will be treated as a draw + // at the maximum number of steps to write. + // Maximum StateInfo + Search PV to advance to leaf buffer + std::vector> states( + write_maxply + MAX_PLY /* == search_depth_min + α */); + + StateInfo si; + + // end flag + bool quit = false; + + // repeat until the specified number of times + while (!quit) + { + // It is necessary to set a dependent thread for Position. + // When parallelizing, Threads (since this is a vector, + // Do the same for up to Threads[0]...Threads[thread_num-1]. + auto th = Threads[thread_id]; + + auto& pos = th->rootPos; + pos.set(StartFEN, false, &si, th); + +#if defined(USE_BOOK) + // Refer to the members of BookMoveSelector defined in the search section. + auto& book = ::book; +#endif + + // Vector for holding the sfens in the current simulated game. + PSVector a_psv; + a_psv.reserve(write_maxply + MAX_PLY); + + // Precomputed flags. Used internally by choose_random_move. + vector random_move_flag = generate_random_move_flags(); + + // A counter that keeps track of the number of random moves + // When random_move_minply == -1, random moves are + // performed continuously, so use it at this time. + // Used internally by choose_random_move. + int actual_random_move_count = 0; + + // Save history of move scores for adjudication + vector move_hist_scores; + + auto flush_psv = [&](int8_t result) { + quit = commit_psv(a_psv, thread_id, result); + }; + + for (int ply = 0; ; ++ply) + { + Move next_move = MOVE_NONE; + + // Current search depth + const int depth = search_depth_min + (int)prng.rand(search_depth_max - search_depth_min + 1); + + const auto result = get_current_game_result(pos, move_hist_scores); + if (result.has_value()) + { + flush_psv(result.value()); + break; + } +#if defined(USE_BOOK) + if ((next_move = book.probe(pos)) != MOVE_NONE) + { + // Hit the constant track. + // The move was stored in next_move. + + // Do not use the fixed phase for learning. + sfens.clear(); + + if (random_move_minply != -1) + { + // Random move is performed with a certain + // probability even in the constant phase. + goto RANDOM_MOVE; + } + else + { + // When -1 is specified as random_move_minply, + // it points according to the standard until + // it goes out of the standard. + // Prepare an innumerable number of situations + // that have left the constant as + // ConsiderationBookMoveCount true using a huge constant + // Used for purposes such as performing + // a random move 5 times from there. + goto DO_MOVE; + } + } +#endif + { + auto [search_value, search_pv] = search(pos, depth, 1, nodes); + + // Always adjudivate by eval limit. + // Also because of this we don't have to check for TB/MATE scores + if (abs(search_value) >= eval_limit) + { + const auto wdl = (search_value >= eval_limit) ? 1 : -1; + flush_psv(wdl); + break; + } + + // Verification of a strange move + if (search_pv.size() > 0 + && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL)) + { + // (???) + // MOVE_WIN is checking if it is the declaration victory stage before this + // The declarative winning move should never come back here. + // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)... + cout << "Error! : " << pos.fen() << next_move << search_value << endl; + break; + } + + // Save the move score for adjudication. + move_hist_scores.push_back(search_value); + +#if 0 + dbg_hit_on(search_value == leaf_value); + // gensfen depth 3 eval_limit 32000 + // Total 217749 Hits 203579 hit rate (%) 93.490 + // gensfen depth 6 eval_limit 32000 + // Total 78407 Hits 69190 hit rate (%) 88.245 + // gensfen depth 6 eval_limit 3000 + // Total 53879 Hits 43713 hit rate (%) 81.132 + + // Problems such as pruning with moves in the substitution table. + // This is a little uncomfortable as a teacher... +#endif + + // If depth 0, pv is not obtained, so search again at depth 2. + if (search_depth_min <= 0) + { + auto [research_value, research_pv] = search(pos, 2); + search_pv = research_pv; + } + + // Discard stuff before write_minply is reached + // because it can harm training due to overfitting. + // Initial positions would be too common. + if (ply < write_minply - 1) + { + a_psv.clear(); + goto SKIP_SAVE; + } + + // Look into the position hashtable to see if the same + // position was seen before. + // This is a good heuristic to exlude already seen + // positions without many false positives. + { + auto key = pos.key(); + auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1)); + auto old_key = hash[hash_index]; + if (key == old_key) + { + a_psv.clear(); + goto SKIP_SAVE; + } + else + { + // Replace with the current key. + hash[hash_index] = key; + } + } + + // Pack the current position into a packed sfen and save it into the buffer. + { + a_psv.emplace_back(PackedSfenValue()); + auto& psv = a_psv.back(); + + // Here we only write the position data. + // Result is added after the whole game is done. + pos.sfen_pack(psv.sfen); + + // Get the value of evaluate() as seen from the + // root color on the leaf node of the PV line. + // I don't know the goodness and badness of using the + // return value of search() as it is. + // TODO: Consider using search value instead of evaluate_leaf. + // Maybe give it as an option. + + // Use PV moves to reach the leaf node and use the value + // that evaluated() is called on that leaf node. + const auto leaf_value = evaluate_leaf(pos, states, ply, search_pv); + + // If for some reason the leaf node couldn't yield an eval + // we fallback to search value. + psv.score = leaf_value == VALUE_NONE ? search_value : leaf_value; + + psv.gamePly = ply; + + // Take out the first PV move. This should be present unless depth 0. + assert(search_pv.size() >= 1); + psv.move = search_pv[0]; + } + + SKIP_SAVE:; + + // For some reason, We could not get PV (hit the substitution table etc. and got stuck?) + // so go to the next game. It's a rare case, so you can ignore it. + if (search_pv.size() == 0) + { + break; + } + + // Update the next move according to best search result. + next_move = search_pv[0]; + } + + RANDOM_MOVE:; + + auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count); + if (random_move.has_value()) + { + next_move = random_move.value(); + + // We don't have the whole game yet, but it ended, + // so the writing process ends and the next game starts. + if (!is_ok(next_move)) + { + break; + } + + // Clear the sfens that were written before the random move. + // (???) why? + a_psv.clear(); + } + + DO_MOVE:; + pos.do_move(next_move, states[ply]); + + // Call node evaluate() for each difference calculation. + Eval::NNUE::update_eval(pos); + + } // for (int ply = 0; ; ++ply) + + } // while(!quit) + + sfen_writer.finalize(thread_id); + } + + // ----------------------------------- + // Command to generate a game record (master thread) + // ----------------------------------- + + // Command to generate a game record + void gen_sfen(Position&, istringstream& is) + { + // number of threads (given by USI setoption) + uint32_t thread_num = (uint32_t)Options["Threads"]; + + // Number of generated game records default = 8 billion phases (Ponanza specification) + uint64_t loop_max = 8000000000UL; + + // Stop the generation when the evaluation value reaches this value. + int eval_limit = 3000; + + // search depth + int search_depth_min = 3; + int search_depth_max = INT_MIN; + + // Number of nodes to be searched. + uint64_t nodes = 0; + + // minimum ply, maximum ply and number of random moves + int random_move_minply = 1; + int random_move_maxply = 24; + int random_move_count = 5; + + // A function to move the random move mainly like Apery + // If this is set to 3, the ball will move with a probability of 1/3. + int random_move_like_apery = 0; + + // If you search with multipv instead of random move and choose from among them randomly, set random_multi_pv = 1 or more. + int random_multi_pv = 0; + int random_multi_pv_diff = 32000; + int random_multi_pv_depth = INT_MIN; + + // The minimum and maximum ply (number of steps from the initial phase) of the phase to write out. + int write_minply = 16; + int write_maxply = 400; + + // File name to write + string output_file_name = "generated_kifu.bin"; + + string token; + + // When hit to eval hash, as a evaluation value near the initial stage, if a hash collision occurs and a large value is written + // When eval_limit is set small, eval_limit will be exceeded every time in the initial phase, and phase generation will not proceed. + // Therefore, eval hash needs to be disabled. + // After that, when the hash of the eval hash collides, the evaluation value of a strange value is used, and it may be unpleasant to use it for the teacher. + bool use_eval_hash = false; + + // Save to file in this unit. + // File names are serialized like file_1.bin, file_2.bin. + uint64_t save_every = UINT64_MAX; + + // Add a random number to the end of the file name. + bool random_file_name = false; + + while (true) + { + token = ""; + is >> token; + if (token == "") + break; + + if (token == "depth") + is >> search_depth_min; + else if (token == "depth2") + is >> search_depth_max; + else if (token == "nodes") + is >> nodes; + else if (token == "loop") + is >> loop_max; + else if (token == "output_file_name") + is >> output_file_name; + else if (token == "eval_limit") + { + is >> eval_limit; + // Limit the maximum to a one-stop score. (Otherwise you might not end the loop) + eval_limit = std::min(eval_limit, (int)mate_in(2)); + } + else if (token == "random_move_minply") + is >> random_move_minply; + else if (token == "random_move_maxply") + is >> random_move_maxply; + else if (token == "random_move_count") + is >> random_move_count; + else if (token == "random_move_like_apery") + is >> random_move_like_apery; + else if (token == "random_multi_pv") + is >> random_multi_pv; + else if (token == "random_multi_pv_diff") + is >> random_multi_pv_diff; + else if (token == "random_multi_pv_depth") + is >> random_multi_pv_depth; + else if (token == "write_minply") + is >> write_minply; + else if (token == "write_maxply") + is >> write_maxply; + else if (token == "use_eval_hash") + is >> use_eval_hash; + else if (token == "save_every") + is >> save_every; + else if (token == "random_file_name") + is >> random_file_name; + // Accept also the old option name. + else if (token == "use_draw_in_training_data_generation" || token == "write_out_draw_game_in_training_data_generation") + is >> write_out_draw_game_in_training_data_generation; + // Accept also the old option name. + else if (token == "use_game_draw_adjudication" || token == "detect_draw_by_consecutive_low_score") + is >> detect_draw_by_consecutive_low_score; + else if (token == "detect_draw_by_insufficient_mating_material") + is >> detect_draw_by_insufficient_mating_material; + else if (token == "use_raw_nnue_eval") + is >> use_raw_nnue_eval; + else + cout << "Error! : Illegal token " << token << endl; + } + +#if defined(USE_GLOBAL_OPTIONS) + // Save it for later restore. + auto oldGlobalOptions = GlobalOptions; + GlobalOptions.use_eval_hash = use_eval_hash; +#endif + + // If search depth2 is not set, leave it the same as search depth. + if (search_depth_max == INT_MIN) + search_depth_max = search_depth_min; + if (random_multi_pv_depth == INT_MIN) + random_multi_pv_depth = search_depth_min; + + if (random_file_name) + { + // Give a random number to output_file_name at this point. + // Do not use std::random_device(). Because it always the same integers on MinGW. + PRNG r(std::chrono::system_clock::now().time_since_epoch().count()); + // Just in case, reassign the random numbers. + for (int i = 0; i < 10; ++i) + r.rand(1); + auto to_hex = [](uint64_t u) { + std::stringstream ss; + ss << std::hex << u; + return ss.str(); + }; + // I don't want to wear 64bit numbers by accident, so I'next_move going to make a 64bit number 2 just in case. + output_file_name = output_file_name + "_" + to_hex(r.rand()) + to_hex(r.rand()); + } + + std::cout << "gensfen : " << endl + << " search_depth_min = " << search_depth_min << " to " << search_depth_max << endl + << " nodes = " << nodes << endl + << " loop_max = " << loop_max << endl + << " eval_limit = " << eval_limit << endl + << " thread_num (set by USI setoption) = " << thread_num << endl +#if defined(USE_BOOK) + << " book_moves (set by USI setoption) = " << Options["BookMoves"] << endl +#endif + << " random_move_minply = " << random_move_minply << endl + << " random_move_maxply = " << random_move_maxply << endl + << " random_move_count = " << random_move_count << endl + << " random_move_like_apery = " << random_move_like_apery << endl + << " random_multi_pv = " << random_multi_pv << endl + << " random_multi_pv_diff = " << random_multi_pv_diff << endl + << " random_multi_pv_depth = " << random_multi_pv_depth << endl + << " write_minply = " << write_minply << endl + << " write_maxply = " << write_maxply << endl + << " output_file_name = " << output_file_name << endl + << " use_eval_hash = " << use_eval_hash << endl + << " save_every = " << save_every << endl + << " random_file_name = " << random_file_name << endl + << " write_out_draw_game_in_training_data_generation = " << write_out_draw_game_in_training_data_generation << endl + << " detect_draw_by_consecutive_low_score = " << detect_draw_by_consecutive_low_score << endl + << " detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl; + + // Show if the training data generator uses NNUE. + Eval::verify_NNUE(); + + // Create and execute threads as many as Options["Threads"]. + { + SfenWriter sfen_writer(output_file_name, thread_num); + sfen_writer.set_save_interval(save_every); + + MultiThinkGenSfen multi_think(search_depth_min, search_depth_max, sfen_writer); + multi_think.nodes = nodes; + multi_think.set_loop_max(loop_max); + multi_think.eval_limit = eval_limit; + multi_think.random_move_minply = random_move_minply; + multi_think.random_move_maxply = random_move_maxply; + multi_think.random_move_count = random_move_count; + multi_think.random_move_like_apery = random_move_like_apery; + multi_think.random_multi_pv = random_multi_pv; + multi_think.random_multi_pv_diff = random_multi_pv_diff; + multi_think.random_multi_pv_depth = random_multi_pv_depth; + multi_think.write_minply = write_minply; + multi_think.write_maxply = write_maxply; + multi_think.start_file_write_worker(); + multi_think.go_think(); + + // Since we are joining with the destructor of SfenWriter, please give a message that it has finished after the join + // Enclose this in a block because it should be displayed. + } + + std::cout << "gensfen finished." << endl; + +#if defined(USE_GLOBAL_OPTIONS) + // Restore Global Options. + GlobalOptions = oldGlobalOptions; +#endif + + } +} +#endif diff --git a/src/learn/learn.h b/src/learn/learn.h index eda2bb32..e29ed74a 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -180,6 +180,23 @@ typedef float LearnFloatType; #define ADA_GRAD_UPDATE #endif +// Character string according to update formula. (Output for debugging.) +// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory. +#if defined(ADA_GRAD_UPDATE) +#define LEARN_UPDATE "AdaGrad" +#elif defined(SGD_UPDATE) +#define LEARN_UPDATE "SGD" +#endif + +#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE) +#define LOSS_FUNCTION "WINNING_PERCENTAGE" +#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) +#define LOSS_FUNCTION "CROSS_ENTOROPY" +#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) +#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE" +#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD) +#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" +#endif // ---------------------- // Definition of struct used in Learner @@ -223,13 +240,38 @@ namespace Learner // Used in Learner::search(), Learner::qsearch(). typedef std::pair > ValueAndPV; + // Phase array: PSVector stands for packed sfen vector. + typedef std::vector PSVector; + // So far, only Yaneura King 2018 Otafuku has this stub // This stub is required if EVAL_LEARN is defined. extern Learner::ValueAndPV search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0); extern Learner::ValueAndPV qsearch(Position& pos); double calc_grad(Value shallow, const PackedSfenValue& psv); + + void convert_bin_from_pgn_extract( + const std::vector& filenames, + const std::string& output_file_name, + const bool pgn_eval_side_to_move, + const bool convert_no_eval_fens_as_score_zero); + + void convert_bin( + const std::vector& filenames, + const std::string& output_file_name, + const int ply_minimum, + const int ply_maximum, + const int interpolate_eval, + const int src_score_min_value, + const int src_score_max_value, + const int dest_score_min_value, + const int dest_score_max_value, + const bool check_invalid_fen, + const bool check_illegal_move); + void convert_plain( + const std::vector& filenames, + const std::string& output_file_name); } #endif diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp index 9f02a594..c897dd93 100644 --- a/src/learn/learner.cpp +++ b/src/learn/learner.cpp @@ -13,54 +13,34 @@ // → I will not be involved in the engine because it is a problem that the GUI should assist. // etc.. +#define EVAL_LEARN + #if defined(EVAL_LEARN) -#include -#include -#include -#include +#include "../eval/evaluate_common.h" #include "learn.h" #include "multi_think.h" #include "../uci.h" #include "../syzygy/tbprobe.h" +#include "../misc.h" +#include "../thread.h" +#include "../position.h" +#include "../tt.h" -// evaluate header for learning -#include "../eval/evaluate_common.h" - -// ---------------------- -// constant string based on the settings -// ---------------------- - -// Character string according to update formula. (Output for debugging.) -// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory. -#if defined(ADA_GRAD_UPDATE) -#define LEARN_UPDATE "AdaGrad" -#elif defined(SGD_UPDATE) -#define LEARN_UPDATE "SGD" -#endif - -#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE) -#define LOSS_FUNCTION "WINNING_PERCENTAGE" -#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) -#define LOSS_FUNCTION "CROSS_ENTOROPY" -#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE) -#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE" -#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD) -#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" -#endif - -// ----------------------------------- -// Below, the implementation section. -// ----------------------------------- - +#include +#include +#include #include #include #include #include #include -#include // std::exp(),std::pow(),std::log() -#include // memcpy() +#include // std::exp(),std::pow(),std::log() +#include // memcpy() +#include +#include +#include #if defined (_OPENMP) #include @@ -75,13 +55,6 @@ #include #endif -#include "../misc.h" -#include "../thread.h" -#include "../position.h" -//#include "../extra/book/book.h" -#include "../tt.h" -#include "multi_think.h" - #if defined(EVAL_NNUE) #include "../nnue/evaluate_nnue_learner.h" #include @@ -93,3470 +66,2019 @@ using namespace std; //// This is defined in the search section. //extern Book::BookMoveSelector book; -// Addition and subtraction definition for atomic -// Aligned with atomicAdd() in Apery/learner.hpp. template T operator += (std::atomic& x, const T rhs) { - T old = x.load(std::memory_order_consume); - // It is allowed that the value is rewritten from other thread at this timing. - // The idea that the value is not destroyed is good. - T desired = old + rhs; - while (!x.compare_exchange_weak(old, desired, std::memory_order_release, std::memory_order_consume)) - desired = old + rhs; - return desired; + T old = x.load(std::memory_order_consume); + // It is allowed that the value is rewritten from other thread at this timing. + // The idea that the value is not destroyed is good. + T desired = old + rhs; + while (!x.compare_exchange_weak(old, desired, std::memory_order_release, std::memory_order_consume)) + desired = old + rhs; + return desired; } template T operator -= (std::atomic& x, const T rhs) { return x += -rhs; } namespace Learner { - -// Phase array: PSVector stands for packed sfen vector. -typedef std::vector PSVector; - -bool write_out_draw_game_in_training_data_generation = false; -bool use_draw_games_in_training = false; -bool use_draw_games_in_validation = false; -bool skip_duplicated_positions_in_training = true; -bool detect_draw_by_consecutive_low_score = false; -bool detect_draw_by_insufficient_mating_material = false; -// 1.0 / PawnValueEg / 4.0 * log(10.0) -double winning_probability_coefficient = 0.00276753015984861260098316280611; -// Score scale factors. ex) If we set src_score_min_value = 0.0, -// src_score_max_value = 1.0, dest_score_min_value = 0.0, -// dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. -double src_score_min_value = 0.0; -double src_score_max_value = 1.0; -double dest_score_min_value = 0.0; -double dest_score_max_value = 1.0; -// Assume teacher signals are the scores of deep searches, and convert them into winning -// probabilities in the trainer. Sometimes we want to use the winning probabilities in the training -// data directly. In those cases, we set false to this variable. -bool convert_teacher_signal_to_winning_probability = true; -// Use raw NNUE eval value in the Eval::evaluate(). If hybrid eval is enabled, training data -// generation and training don't work well. -// https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 -bool use_raw_nnue_eval = true; -// Using WDL with win rate model instead of sigmoid -bool use_wdl = false; - -// ----------------------------------- -// write phase file -// ----------------------------------- - -// Helper class for exporting Sfen -struct SfenWriter -{ - // File name to write and number of threads to create - SfenWriter(string filename, int thread_num) - { - sfen_buffers_pool.reserve((size_t)thread_num * 10); - sfen_buffers.resize(thread_num); - - // When performing additional learning, the quality of the teacher generated after learning the evaluation function does not change much and I want to earn more teacher positions. - // Since it is preferable that old teachers also use it, it has such a specification. - fs.open(filename, ios::out | ios::binary | ios::app); - filename_ = filename; - - finished = false; - } - - ~SfenWriter() - { - finished = true; - file_worker_thread.join(); - fs.close(); - - // all buffers should be empty since file_worker_thread has written all.. - for (auto p : sfen_buffers) { assert(p == nullptr); } - assert(sfen_buffers_pool.empty()); - } - - // For each thread, flush the file by this number of phases. - const size_t SFEN_WRITE_SIZE = 5000; - - // write one by pairing the phase and evaluation value (in packed sfen format) - void write(size_t thread_id, const PackedSfenValue& psv) - { - // We have a buffer for each thread and add it there. - // If the buffer overflows, write it to a file. - - // This buffer is prepared for each thread. - auto& buf = sfen_buffers[thread_id]; - - // Secure since there is no buf at the first time and immediately after writing the thread buffer. - if (!buf) - { - buf = new PSVector(); - buf->reserve(SFEN_WRITE_SIZE); - } - - // It is prepared for each thread, so one thread does not call this write() function at the same time. - // There is no need to exclude at this point. - buf->push_back(psv); - - if (buf->size() >= SFEN_WRITE_SIZE) - { - // If you load it in sfen_buffers_pool, the worker will do the rest. - - // Mutex lock is required when changing the contents of sfen_buffers_pool. - std::unique_lock lk(mutex); - sfen_buffers_pool.push_back(buf); - - buf = nullptr; - // If you set buf == nullptr, the buffer will be allocated the next time this function is called. - } - } - - // Move what remains in the buffer for your thread to a buffer for writing to a file. - void finalize(size_t thread_id) - { - std::unique_lock lk(mutex); - - auto& buf = sfen_buffers[thread_id]; - - // There is a case that buf==nullptr, so that check is necessary. - if (buf && buf->size() != 0) - sfen_buffers_pool.push_back(buf); - - buf = nullptr; - } - - // Start the write_worker thread. - void start_file_write_worker() - { - file_worker_thread = std::thread([&] { this->file_write_worker(); }); - } - - // Dedicated thread to write to file - void file_write_worker() - { - auto output_status = [&]() - { - // also output the current time - sync_cout << endl << sfen_write_count << " sfens , at " << now_string() << sync_endl; - - // This is enough for flush(). - fs.flush(); - }; - - while (!finished || sfen_buffers_pool.size()) - { - vector buffers; - { - std::unique_lock lk(mutex); - - // copy the whole - buffers = sfen_buffers_pool; - sfen_buffers_pool.clear(); - } - - // sleep() if you didn't get anything - if (!buffers.size()) - sleep(100); - else - { - for (auto ptr : buffers) - { - fs.write((const char*)&((*ptr)[0]), sizeof(PackedSfenValue) * ptr->size()); - - sfen_write_count += ptr->size(); - -#if 1 - // Add the processed number here, and if it exceeds save_every, change the file name and reset this counter. - save_every_counter += ptr->size(); - if (save_every_counter >= save_every) - { - save_every_counter = 0; - // Change the file name. - - fs.close(); - - // Sequential number attached to the file - int n = (int)(sfen_write_count / save_every); - // Rename the file and open it again. Add ios::app in consideration of overwriting. (Depending on the operation, it may not be necessary.) - string filename = filename_ + "_" + std::to_string(n); - fs.open(filename, ios::out | ios::binary | ios::app); - cout << endl << "output sfen file = " << filename << endl; - } -#endif - - // Output'.' every time when writing a game record. - std::cout << "."; - - // Output the number of phases processed every 40 times - // Finally, the remainder of the teacher phase of each thread is written out, so halfway numbers are displayed, but is it okay? - // If you overuse the threads to the maximum number of logical cores, the console will be clogged, so it may be a little more loose. - if ((++time_stamp_count % 40) == 0) - output_status(); - - // Since this memory is unnecessary, release it at this timing. - delete ptr; - } - } - } - - // Output the time stamp again before the end. - output_status(); - } - - // Change the file name in this unit. - uint64_t save_every = UINT64_MAX; - -private: - - fstream fs; - - // File name passed in the constructor - std::string filename_; - - // Add the processed number here, and if it exceeds save_every, change the file name and reset this counter. - uint64_t save_every_counter = 0; - - // thread to write to the file - std::thread file_worker_thread; - // Flag that all threads have finished - atomic finished; - - // Counter for time stamp output - uint64_t time_stamp_count = 0; - - // buffer before writing to file - // sfen_buffers is the buffer for each thread - // sfen_buffers_pool is a buffer for writing. - // After loading the phase in the former buffer by SFEN_WRITE_SIZE, transfer it to the latter. - std::vector sfen_buffers; - std::vector sfen_buffers_pool; - - // Mutex required to access sfen_buffers_pool - std::mutex mutex; - - // number of written phases - uint64_t sfen_write_count = 0; -}; - -// ----------------------------------- -// worker that creates the game record (for each thread) -// ----------------------------------- - -// Class to generate sfen with multiple threads -struct MultiThinkGenSfen : public MultiThink -{ - MultiThinkGenSfen(int search_depth_, int search_depth2_, SfenWriter& sw_) - : search_depth(search_depth_), search_depth2(search_depth2_), sw(sw_) - { - hash.resize(GENSFEN_HASH_SIZE); - - // Output for confirmation if the same random seed is not drawn when parallelizing and gensfening the PC. - std::cout << prng << std::endl; - } - - virtual void thread_worker(size_t thread_id); - void start_file_write_worker() { sw.start_file_write_worker(); } - - // search_depth = search depth for normal search - int search_depth; - int search_depth2; - - // Number of the nodes to be searched. - // 0 represents no limits. - uint64_t nodes; - - // Upper limit of evaluation value of generated situation - int eval_limit; - - // minimum ply with random move - int random_move_minply; - // maximum ply with random move - int random_move_maxply; - // Number of random moves in one station - int random_move_count; - // Move balls with a probability of 1/N when randomly moving like Apery. - // When you move the ball again, there is a 1/N chance that it will randomly move once in the opponent's number. - // Apery has N=2. Specifying 0 here disables this function. - int random_move_like_apery; - - // For when using multi pv instead of random move. - // random_multi_pv is the number of candidates for MultiPV. - // When adopting the move of the candidate move, the difference between the evaluation value of the move of the 1st place and the evaluation value of the move of the Nth place is - // Must be in the range random_multi_pv_diff. - // random_multi_pv_depth is the search depth for MultiPV. - int random_multi_pv; - int random_multi_pv_diff; - int random_multi_pv_depth; - - // The minimum and maximum ply (number of steps from the initial phase) of the phase to write out. - int write_minply; - int write_maxply; - - // sfen exporter - SfenWriter& sw; - - // hash to limit the export of the same phase - // It must be 2**N because it will be used as the mask to calculate hash_index. - static const uint64_t GENSFEN_HASH_SIZE = 64 * 1024 * 1024; - - vector hash; // 64MB*sizeof(HASH_KEY) = 512MB -}; - -// thread_id = 0..Threads.size()-1 -void MultiThinkGenSfen::thread_worker(size_t thread_id) -{ - // For the time being, it will be treated as a draw at the maximum number of steps to write. - const int MAX_PLY2 = write_maxply; - - //Maximum StateInfo + Search PV to advance to leaf buffer - std::vector> states(MAX_PLY2 + MAX_PLY /* == search_depth + α */); - StateInfo si; - - // This move. Use this move to advance the stage. - Move m = MOVE_NONE; - - // end flag - bool quit = false; - - // Variables for draw adjudication. - // Todo: Make this as an option. - int adj_draw_ply = 80; // start the adjudication when ply reaches this value - int adj_draw_cnt = 8; // 4 move scores for each side have to be checked - int adj_draw_score = 0; // move score in CP - - // repeat until the specified number of times - while (!quit) - { - // It is necessary to set a dependent thread for Position. - // When parallelizing, Threads (since this is a vector, - // Do the same for up to Threads[0]...Threads[thread_num-1]. - auto th = Threads[thread_id]; - - auto& pos = th->rootPos; - pos.set(StartFEN, false, &si, th); - - // Test cod for Packed SFEN. - //{ - // PackedSfen packed_sfen; - // pos.sfen_pack(packed_sfen); - // std::cout << pos << std::endl; - // pos.set_from_packed_sfen(packed_sfen, &si, th); - // std::string actual = pos.fen(); - // assert(actual == StartFEN); - //} - - // Refer to the members of BookMoveSelector defined in the search section. - //auto& book = ::book; - - // Save the situation for one station, and write it out including the winning and losing at the end. - // The function to write is flush_psv() below this. - PSVector a_psv; - a_psv.reserve(MAX_PLY2 + MAX_PLY); - - // Write out the phases loaded in a_psv to a file. - // lastTurnIsWin: win/loss in the next phase after the final phase in a_psv - // 1 when winning. -1 when losing. Pass 0 for a draw. - // Return value: true if the specified number of phases has already been reached and the process ends. - auto flush_psv = [&](int8_t lastTurnIsWin) - { - int8_t isWin = lastTurnIsWin; - - // From the final stage (one step before) to the first stage, give information on the outcome of the game for each stage. - // The phases stored in a_psv are assumed to be continuous (in order). - for (auto it = a_psv.rbegin(); it != a_psv.rend(); ++it) - { - // If isWin == 0 (draw), multiply by -1 and it will remain 0 (draw) - isWin = - isWin; - it->game_result = isWin; - - // When I tried to write out the phase, it reached the specified number of times. - // Because the counter is added in get_next_loop_count() - // If you don't call this when the phase is output, the counter goes crazy. - auto loop_count = get_next_loop_count(); - if (loop_count == UINT64_MAX) - { - // Set the end flag. - quit = true; - return; - } - - // Write out one aspect. - sw.write(thread_id, *it); - -#if 0 - pos.set_from_packed_sfen(it->sfen); - cout << pos << "Win : " << it->isWin << " , " << it->score << endl; -#endif - } - }; - - // ply flag for whether or not to randomly move by eyes - vector random_move_flag; - { - // If you want to add a random move, random_move_maxply be sure to enter random_move_count times before the first move. - // I want you to disperse so much. - // I'm not sure how best it is. Experimenting under various conditions. - - // Make an array like a[0] = 0 ,a[1] = 1, ... - // Fisher-Yates shuffle and take out the first N items. - // Actually, I only want N pieces, so I only need to shuffle the first N pieces with Fisher-Yates. - - vector a; - a.reserve((size_t)random_move_maxply); - - // random_move_minply ,random_move_maxply is specified by 1 origin, - // Note that we are handling 0 origin here. - for (int i = std::max(random_move_minply - 1 , 0) ; i < random_move_maxply; ++i) - a.push_back(i); - - // In case of Apery random move, insert() may be called random_move_count times. - // Reserve only the size considering it. - random_move_flag.resize((size_t)random_move_maxply + random_move_count); - - // A random move that exceeds the size() of a[] cannot be applied, so limit it. - for (int i = 0 ; i < std::min(random_move_count, (int)a.size()) ; ++i) - { - swap(a[i], a[prng.rand((uint64_t)a.size() - i) + i]); - random_move_flag[a[i]] = true; - } - } - - // A counter that keeps track of the number of random moves - // When random_move_minply == -1, random moves are performed continuously, so use it at this time. - int random_move_c = 0; - - // Save history of move scores for adjudication - vector move_hist_scores; - - // ply: steps from the initial stage - for (int ply = 0; ; ++ply) - { - //cout << pos << endl; - - // Current search depth - // Goto will fly, so declare it first. - int depth = search_depth + (int)prng.rand(search_depth2 - search_depth + 1); - - // has it reached the length - if (ply >= MAX_PLY2) - { - if (write_out_draw_game_in_training_data_generation) { - // Write out as win/loss = draw. - // This way it is harder to allow the opponent to enter the ball when I enter (may) - flush_psv(0); - } - break; - } - - if (pos.is_draw(ply)) { - if (write_out_draw_game_in_training_data_generation) { - // Write if draw. - flush_psv(0); - } - break; - } - - // Initialize the Syzygy Ending Tablebase and sort the moves. - Search::RootMoves rootMoves; - for (const auto& m : MoveList(pos)) - rootMoves.emplace_back(m); - if (!rootMoves.empty()) - Tablebases::rank_root_moves(pos, rootMoves); - - // If there is no legal move, terminate the game if position - // is mate or a stalemate. - else { - if (pos.checkers()) // Mate - flush_psv(-1); - else if (write_out_draw_game_in_training_data_generation) { - flush_psv(0); // Stalemate - } - break; - } - - // Adjudicate game to a draw if the last 4 scores of each engine is 0. - if (detect_draw_by_consecutive_low_score) { - if (ply >= adj_draw_ply) { - int draw_cnt = 0; - bool is_adj_draw = false; - - for (vector::reverse_iterator it = move_hist_scores.rbegin(); - it != move_hist_scores.rend(); ++it) - { - if (abs(*it) <= adj_draw_score) - draw_cnt++; - else - break; // score should be successive - - if (draw_cnt >= adj_draw_cnt) { - is_adj_draw = true; - break; - } - } - - if (is_adj_draw) { - if (write_out_draw_game_in_training_data_generation) - flush_psv(0); - break; - } - } - } - - // Draw by insufficient mating material - if (detect_draw_by_insufficient_mating_material) { - if (pos.count() <= 4) { - int pcnt = pos.count(); - // (1) KvK - if (pcnt == 2) { - if (write_out_draw_game_in_training_data_generation) - flush_psv(0); - break; - } - // (2) KvK + 1 minor piece - if (pcnt == 3) { - int minor_pc = pos.count(WHITE) + pos.count(WHITE) + - pos.count(BLACK) + pos.count(BLACK); - if (minor_pc == 1) { - if (write_out_draw_game_in_training_data_generation) - flush_psv(0); - break; - } - } - // (3) KBvKB, bishops of the same color - else if (pcnt == 4) { - if (pos.count(WHITE) == 1 && pos.count(BLACK) == 1) { - // Color of bishops is black. - if ((pos.pieces(WHITE, BISHOP) & DarkSquares) - && (pos.pieces(BLACK, BISHOP) & DarkSquares)) - { - if (write_out_draw_game_in_training_data_generation) - flush_psv(0); - break; - } - // Color of bishops is white. - if ((pos.pieces(WHITE, BISHOP) & ~DarkSquares) - && (pos.pieces(BLACK, BISHOP) & ~DarkSquares)) - { - if (write_out_draw_game_in_training_data_generation) - flush_psv(0); - break; - } - } - } - } - } - - //// constant track - //if ((m = book.probe(pos)) != MOVE_NONE) - //{ - // // Hit the constant track. - // // The move was stored in m. - - // // Do not use the fixed phase for learning. - // a_psv.clear(); - - // if (random_move_minply != -1) - // // Random move is performed with a certain probability even in the constant phase. - // goto RANDOM_MOVE; - // else - // // When -1 is specified as random_move_minply, it points according to the standard until it goes out of the standard. - // // Prepare an innumerable number of situations that have left the constant as ConsiderationBookMoveCount true using a huge constant - // // Used for purposes such as performing a random move 5 times from there. - // goto DO_MOVE; - //} - - { - // search_depth~search_depth2 Evaluation value of hand reading and PV (best responder row) - // There should be no problem if you narrow the search window. - - auto pv_value1 = search(pos, depth, 1, nodes); - - auto value1 = pv_value1.first; - auto& pv1 = pv_value1.second; - - // For situations where the absolute evaluation value is greater than or equal to this value - // It doesn't make much sense to use that aspect for learning, so this game ends. - // Treat this as having won or lost. - - // If you win one move, declarative win, mate_in(2) will be returned here, so it will be the same value as the upper limit of eval_limit, - // This if expression is always true. The same applies to resign. - - if (abs(value1) >= eval_limit) - { - // sync_cout << pos << "eval limit = "<< eval_limit << "over ,move = "<< pv1[0] << sync_endl; - - // If value1 >= eval_limit in this aspect, you win (the turn side of this aspect). - flush_psv((value1 >= eval_limit) ? 1 : -1); - break; - } - - // Verification of a strange move - if (pv1.size() > 0 - && (pv1[0] == MOVE_NONE || pv1[0] == MOVE_NULL) - ) - { - // MOVE_WIN is checking if it is the declaration victory stage before this - // The declarative winning move should never come back here. - // Also, when MOVE_RESIGN, value1 is a one-stop score, which should be the minimum value of eval_limit (-31998)... - cout << "Error! : " << pos.fen() << m << value1 << endl; - break; - } - - // Save the move score for adjudication. - move_hist_scores.push_back(value1); - - // Use PV's move to the leaf node and use the value that evaluated() is called on that leaf node. - auto evaluate_leaf = [&](Position& pos , vector& pv) - { - auto rootColor = pos.side_to_move(); - - int ply2 = ply; - for (auto m : pv) - { - // As a verification for debugging, make sure there are no illegal players in the middle. - // NULL_MOVE does not come. - - // I tested it out enough so I can comment it out. -#if 1 - // I shouldn't be an illegal player. - // declarative win and not mated() are tested above so - // It is guaranteed that MOVE_WIN and MOVE_RESIGN do not come as a reader. (Should...) - if (!pos.pseudo_legal(m) || !pos.legal(m)) - { - cout << "Error! : " << pos.fen() << m << endl; - } -#endif - pos.do_move(m, states[ply2++]); - - //Because the difference calculation of evaluate() cannot be performed unless each node evaluate() is called! - // If the depth is 8 or more, it seems faster not to calculate this difference. -#if defined(EVAL_NNUE) - if (depth < 8) - Eval::NNUE::update_eval(pos); -#endif // defined(EVAL_NNUE) - } - - // reach leaf - Value v; - if (pos.checkers()) { - // Sometime a king is checked. An example is a case that a checkmate is - // found in the search. If Eval::evaluate() is called whne a king is - // checked, classic eval crashes by an assertion. To avoid crashes, return - // value1 instead of the score of the PV leaf. - v = value1; - } - else { - v = Eval::evaluate(pos); - // evaluate() returns the evaluation value on the turn side, so - // If it's a turn different from root_color, you must invert v and return it. - if (rootColor != pos.side_to_move()) - v = -v; - } - - // Rewind. - // Is it C++x14, and isn't there even foreach to turn in reverse? - // for (auto it : boost::adaptors::reverse(pv)) - - for (auto it = pv.rbegin(); it != pv.rend(); ++it) - pos.undo_move(*it); - - return v; - }; - -#if 0 - dbg_hit_on(pv_value1.first == leaf_value); - // gensfen depth 3 eval_limit 32000 - // Total 217749 Hits 203579 hit rate (%) 93.490 - // gensfen depth 6 eval_limit 32000 - // Total 78407 Hits 69190 hit rate (%) 88.245 - // gensfen depth 6 eval_limit 3000 - // Total 53879 Hits 43713 hit rate (%) 81.132 - - // Problems such as pruning with moves in the substitution table. - // This is a little uncomfortable as a teacher... -#endif - - //If depth 0, pv is not obtained, so search again at depth 2. - if (search_depth <= 0) - { - pv_value1 = search(pos, 2); - pv1 = pv_value1.second; - } - - // The surroundings of the initial stage are all similar - // Do not write it out because it can lead to overlearning when used for learning. - // → comparative experiment should be done - if (ply < write_minply - 1) - { - a_psv.clear(); - goto SKIP_SAVE; - } - - // Did you just write the same phase? - // This may include the same aspect as it is generated in parallel on multiple PCs, so - // It is better to do the same process when reading. - { - auto key = pos.key(); - auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1)); - auto key2 = hash[hash_index]; - if (key == key2) - { - // when skipping regarding earlier - // Clear the saved situation because the win/loss information will be incorrect. - // anyway, when the hash matches, it's likely that the previous phases also match - // Not worth writing out. - a_psv.clear(); - goto SKIP_SAVE; - } - hash[hash_index] = key; // Replace with the current key. - } - - // Temporary saving of the situation. - { - a_psv.emplace_back(PackedSfenValue()); - auto &psv = a_psv.back(); - - // If pack is requested, write the packed sfen and the evaluation value at that time. - // The final writing is after winning or losing. - pos.sfen_pack(psv.sfen); - - //{ - // std::string before_fen = pos.fen(); - // pos.set_from_packed_sfen(psv.sfen, &si, th); - // std::string after_fen = pos.fen(); - // assert(before_fen == after_fen); - //} - - // Get the value of evaluate() as seen from the root color on the leaf node of the PV line. - //I don't know the goodness and badness of using the return value of search() as it is. - psv.score = evaluate_leaf(pos, pv1); - psv.gamePly = ply; - - // Take out the first PV hand. This should be present unless depth 0. - assert(pv_value1.second.size() >= 1); - Move pv_move1 = pv_value1.second[0]; - psv.move = pv_move1; - } - - SKIP_SAVE:; - - // For some reason, I could not get PV (hit the substitution table etc. and got stuck?) so go to the next game. - // It's a rare case, so you can ignore it. - if (pv1.size() == 0) - break; - - // search_depth Advance the phase by hand reading. - m = pv1[0]; - } - - RANDOM_MOVE:; - - // Phase to randomly choose one from legal hands - if ( - // 1. Random move of random_move_count times from random_move_minply to random_move_maxply - (random_move_minply != -1 && ply <(int)random_move_flag.size() && random_move_flag[ply]) || - // 2. A mode to perform random move of random_move_count times after leaving the track - (random_move_minply == -1 && random_move_c list(pos); - - // I don't really know the goodness and badness of making this the Apery method. - if (random_move_like_apery == 0 - || prng.rand(random_move_like_apery) != 0 - ) - { - // Normally one move from legal move - m = list.at((size_t)prng.rand((uint64_t)list.size())); - } - else { - // if you can move the ball, move the ball - Move moves[8]; // Near 8 - Move* p = &moves[0]; - for (auto& m : list) - if (type_of(pos.moved_piece(m)) == KING) - *(p++) = m; - size_t n = p - &moves[0]; - if (n != 0) - { - // move to move the ball - m = moves[prng.rand(n)]; - - // In Apery method, at this time there is a 1/2 chance that the opponent will also move randomly - if (prng.rand(2) == 0) - { - // Is it a simple hack to add a "1" next to random_move_flag[ply]? - random_move_flag.insert(random_move_flag.begin() + ply + 1, 1, true); - } - } - else - // Normally one move from legal move - m = list.at((size_t)prng.rand((uint64_t)list.size())); - } - - // I put in the code of two handed balls, but if you choose one from legal hands, it should be equivalent to that - // I decided it's unnecessary because it just makes the code more complicated. - } - else { - // Since the logic becomes complicated, I'm sorry, I will search again with MultiPV here. - Learner::search(pos, random_multi_pv_depth, random_multi_pv); - // Select one from the top N hands of root Moves - - auto& rm = pos.this_thread()->rootMoves; - - uint64_t s = min((uint64_t)rm.size(), (uint64_t)random_multi_pv); - for (uint64_t i = 1; i < s; ++i) - { - // The difference from the evaluation value of rm[0] must be within the range of random_multi_pv_diff. - // It can be assumed that rm[x].score is arranged in descending order. - if (rm[0].score > rm[i].score + random_multi_pv_diff) - { - s = i; - break; - } - } - - m = rm[prng.rand(s)].pv[0]; - - // I haven't written one phase yet, but it ended, so the writing process ends and the next game starts. - if (!is_ok(m)) - break; - } - - // When trying to evaluate the move from the outcome of the game, - // There is a random move this time, so try not to fall below this. - a_psv.clear(); // clear saved aspect - } - - DO_MOVE:; - pos.do_move(m, states[ply]); - - // Call node evaluate() for each difference calculation. - Eval::NNUE::update_eval(pos); - - } // for (int ply = 0; ; ++ply) - - } // while(!quit) - - sw.finalize(thread_id); -} - -// ----------------------------------- -// Command to generate a game record (master thread) -// ----------------------------------- - -// Command to generate a game record -void gen_sfen(Position&, istringstream& is) -{ - // number of threads (given by USI setoption) - uint32_t thread_num = (uint32_t)Options["Threads"]; - - // Number of generated game records default = 8 billion phases (Ponanza specification) - uint64_t loop_max = 8000000000UL; - - // Stop the generation when the evaluation value reaches this value. - int eval_limit = 3000; - - // search depth - int search_depth = 3; - int search_depth2 = INT_MIN; - - // Number of nodes to be searched. - uint64_t nodes = 0; - - // minimum ply, maximum ply and number of random moves - int random_move_minply = 1; - int random_move_maxply = 24; - int random_move_count = 5; - // A function to move the random move mainly like Apery - // If this is set to 3, the ball will move with a probability of 1/3. - int random_move_like_apery = 0; - // If you search with multipv instead of random move and choose from among them randomly, set random_multi_pv = 1 or more. - int random_multi_pv = 0; - int random_multi_pv_diff = 32000; - int random_multi_pv_depth = INT_MIN; - - // The minimum and maximum ply (number of steps from the initial phase) of the phase to write out. - int write_minply = 16; - int write_maxply = 400; - - // File name to write - string output_file_name = "generated_kifu.bin"; - - string token; - - // When hit to eval hash, as a evaluation value near the initial stage, if a hash collision occurs and a large value is written - // When eval_limit is set small, eval_limit will be exceeded every time in the initial phase, and phase generation will not proceed. - // Therefore, eval hash needs to be disabled. - // After that, when the hash of the eval hash collides, the evaluation value of a strange value is used, and it may be unpleasant to use it for the teacher. - bool use_eval_hash = false; - - // Save to file in this unit. - // File names are serialized like file_1.bin, file_2.bin. - uint64_t save_every = UINT64_MAX; - - // Add a random number to the end of the file name. - bool random_file_name = false; - - while (true) - { - token = ""; - is >> token; - if (token == "") - break; - - if (token == "depth") - is >> search_depth; - else if (token == "depth2") - is >> search_depth2; - else if (token == "nodes") - is >> nodes; - else if (token == "loop") - is >> loop_max; - else if (token == "output_file_name") - is >> output_file_name; - else if (token == "eval_limit") - { - is >> eval_limit; - // Limit the maximum to a one-stop score. (Otherwise you might not end the loop) - eval_limit = std::min(eval_limit, (int)mate_in(2)); - } - else if (token == "random_move_minply") - is >> random_move_minply; - else if (token == "random_move_maxply") - is >> random_move_maxply; - else if (token == "random_move_count") - is >> random_move_count; - else if (token == "random_move_like_apery") - is >> random_move_like_apery; - else if (token == "random_multi_pv") - is >> random_multi_pv; - else if (token == "random_multi_pv_diff") - is >> random_multi_pv_diff; - else if (token == "random_multi_pv_depth") - is >> random_multi_pv_depth; - else if (token == "write_minply") - is >> write_minply; - else if (token == "write_maxply") - is >> write_maxply; - else if (token == "use_eval_hash") - is >> use_eval_hash; - else if (token == "save_every") - is >> save_every; - else if (token == "random_file_name") - is >> random_file_name; - // Accept also the old option name. - else if (token == "use_draw_in_training_data_generation" || token == "write_out_draw_game_in_training_data_generation") - is >> write_out_draw_game_in_training_data_generation; - // Accept also the old option name. - else if (token == "use_game_draw_adjudication" || token == "detect_draw_by_consecutive_low_score") - is >> detect_draw_by_consecutive_low_score; - else if (token == "detect_draw_by_insufficient_mating_material") - is >> detect_draw_by_insufficient_mating_material; - else if (token == "use_raw_nnue_eval") - is >> use_raw_nnue_eval; - else - cout << "Error! : Illegal token " << token << endl; - } - -#if defined(USE_GLOBAL_OPTIONS) - // Save it for later restore. - auto oldGlobalOptions = GlobalOptions; - GlobalOptions.use_eval_hash = use_eval_hash; -#endif - - // If search depth2 is not set, leave it the same as search depth. - if (search_depth2 == INT_MIN) - search_depth2 = search_depth; - if (random_multi_pv_depth == INT_MIN) - random_multi_pv_depth = search_depth; - - if (random_file_name) - { - // Give a random number to output_file_name at this point. - // Do not use std::random_device(). Because it always the same integers on MinGW. - PRNG r(std::chrono::system_clock::now().time_since_epoch().count()); - // Just in case, reassign the random numbers. - for(int i=0;i<10;++i) - r.rand(1); - auto to_hex = [](uint64_t u){ - std::stringstream ss; - ss << std::hex << u; - return ss.str(); - }; - // I don't want to wear 64bit numbers by accident, so I'm going to make a 64bit number 2 just in case. - output_file_name = output_file_name + "_" + to_hex(r.rand()) + to_hex(r.rand()); - } - - std::cout << "gensfen : " << endl - << " search_depth = " << search_depth << " to " << search_depth2 << endl - << " nodes = " << nodes << endl - << " loop_max = " << loop_max << endl - << " eval_limit = " << eval_limit << endl - << " thread_num (set by USI setoption) = " << thread_num << endl - //<< " book_moves (set by USI setoption) = " << Options["BookMoves"] << endl - << " random_move_minply = " << random_move_minply << endl - << " random_move_maxply = " << random_move_maxply << endl - << " random_move_count = " << random_move_count << endl - << " random_move_like_apery = " << random_move_like_apery << endl - << " random_multi_pv = " << random_multi_pv << endl - << " random_multi_pv_diff = " << random_multi_pv_diff << endl - << " random_multi_pv_depth = " << random_multi_pv_depth << endl - << " write_minply = " << write_minply << endl - << " write_maxply = " << write_maxply << endl - << " output_file_name = " << output_file_name << endl - << " use_eval_hash = " << use_eval_hash << endl - << " save_every = " << save_every << endl - << " random_file_name = " << random_file_name << endl - << " write_out_draw_game_in_training_data_generation = " << write_out_draw_game_in_training_data_generation << endl - << " detect_draw_by_consecutive_low_score = " << detect_draw_by_consecutive_low_score << endl - << " detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl; - - // Show if the training data generator uses NNUE. - Eval::verify_NNUE(); - - // Create and execute threads as many as Options["Threads"]. - { - SfenWriter sw(output_file_name, thread_num); - sw.save_every = save_every; - - MultiThinkGenSfen multi_think(search_depth, search_depth2, sw); - multi_think.nodes = nodes; - multi_think.set_loop_max(loop_max); - multi_think.eval_limit = eval_limit; - multi_think.random_move_minply = random_move_minply; - multi_think.random_move_maxply = random_move_maxply; - multi_think.random_move_count = random_move_count; - multi_think.random_move_like_apery = random_move_like_apery; - multi_think.random_multi_pv = random_multi_pv; - multi_think.random_multi_pv_diff = random_multi_pv_diff; - multi_think.random_multi_pv_depth = random_multi_pv_depth; - multi_think.write_minply = write_minply; - multi_think.write_maxply = write_maxply; - multi_think.start_file_write_worker(); - multi_think.go_think(); - - // Since we are joining with the destructor of SfenWriter, please give a message that it has finished after the join - // Enclose this in a block because it should be displayed. - } - - std::cout << "gensfen finished." << endl; - -#if defined(USE_GLOBAL_OPTIONS) - // Restore Global Options. - GlobalOptions = oldGlobalOptions; -#endif - -} - -// ----------------------------------- -// command to learn from the generated game (learn) -// ----------------------------------- - -// ordinary sigmoid function -double sigmoid(double x) -{ - return 1.0 / (1.0 + std::exp(-x)); -} - -// A function that converts the evaluation value to the winning rate [0,1] -double winning_percentage(double value) -{ - // 1/(1+10^(-Eval/4)) - // = 1/(1+e^(-Eval/4*ln(10)) - // = sigmoid(Eval/4*ln(10)) - return sigmoid(value * winning_probability_coefficient); -} - -// A function that converts the evaluation value to the winning rate [0,1] -double winning_percentage_wdl(double value, int ply) -{ - double wdl_w = UCI::win_rate_model_double( value, ply); - double wdl_l = UCI::win_rate_model_double(-value, ply); - double wdl_d = 1000.0 - wdl_w - wdl_l; - - return (wdl_w + wdl_d / 2.0) / 1000.0; -} - -// A function that converts the evaluation value to the winning rate [0,1] -double winning_percentage(double value, int ply) -{ - if (use_wdl) { - return winning_percentage_wdl(value, ply); - } - else { - return winning_percentage(value); - } -} - -double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) -{ - double p = deep_win_rate; - double q = winning_percentage(shallow_eval, ply); - return -p * std::log(q) - (1 - p) * std::log(1 - q); -} - -double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) -{ - constexpr double epsilon = 0.000001; - double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval , ply); - double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply); - - // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate - return ((y2 - y1) / epsilon) / winning_probability_coefficient; -} - -double dsigmoid(double x) -{ - // Sigmoid function - // f(x) = 1/(1+exp(-x)) - // the first derivative is - // f'(x) = df/dx = f(x)・{ 1-f(x)} - // becomes - - return sigmoid(x) * (1.0 - sigmoid(x)); -} - -// When the objective function is the sum of squares of the difference in winning percentage + static bool use_draw_games_in_training = false; + static bool use_draw_games_in_validation = false; + static bool skip_duplicated_positions_in_training = true; + // 1.0 / PawnValueEg / 4.0 * log(10.0) + static double winning_probability_coefficient = 0.00276753015984861260098316280611; + // Score scale factors. ex) If we set src_score_min_value = 0.0, + // src_score_max_value = 1.0, dest_score_min_value = 0.0, + // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000]. + static double src_score_min_value = 0.0; + static double src_score_max_value = 1.0; + static double dest_score_min_value = 0.0; + static double dest_score_max_value = 1.0; + // Assume teacher signals are the scores of deep searches, and convert them into winning + // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training + // data directly. In those cases, we set false to this variable. + static bool convert_teacher_signal_to_winning_probability = true; + // Use raw NNUE eval value in the Eval::evaluate(). If hybrid eval is enabled, training data + // generation and training don't work well. + // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192 + static bool use_raw_nnue_eval = true; + // Using WDL with win rate model instead of sigmoid + static bool use_wdl = false; + + // ----------------------------------- + // command to learn from the generated game (learn) + // ----------------------------------- + + // ordinary sigmoid function + double sigmoid(double x) + { + return 1.0 / (1.0 + std::exp(-x)); + } + + // A function that converts the evaluation value to the winning rate [0,1] + double winning_percentage(double value) + { + // 1/(1+10^(-Eval/4)) + // = 1/(1+e^(-Eval/4*ln(10)) + // = sigmoid(Eval/4*ln(10)) + return sigmoid(value * winning_probability_coefficient); + } + + // A function that converts the evaluation value to the winning rate [0,1] + double winning_percentage_wdl(double value, int ply) + { + double wdl_w = UCI::win_rate_model_double(value, ply); + double wdl_l = UCI::win_rate_model_double(-value, ply); + double wdl_d = 1000.0 - wdl_w - wdl_l; + + return (wdl_w + wdl_d / 2.0) / 1000.0; + } + + // A function that converts the evaluation value to the winning rate [0,1] + double winning_percentage(double value, int ply) + { + if (use_wdl) { + return winning_percentage_wdl(value, ply); + } + else { + return winning_percentage(value); + } + } + + double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) + { + double p = deep_win_rate; + double q = winning_percentage(shallow_eval, ply); + return -p * std::log(q) - (1 - p) * std::log(1 - q); + } + + double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply) + { + constexpr double epsilon = 0.000001; + double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply); + double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply); + + // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate + return ((y2 - y1) / epsilon) / winning_probability_coefficient; + } + + double dsigmoid(double x) + { + // Sigmoid function + // f(x) = 1/(1+exp(-x)) + // the first derivative is + // f'(x) = df/dx = f(x)・{ 1-f(x)} + // becomes + + return sigmoid(x) * (1.0 - sigmoid(x)); + } + + // When the objective function is the sum of squares of the difference in winning percentage #if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE) // function to calculate the gradient -double calc_grad(Value deep, Value shallow, PackedSfenValue& psv) -{ - // The square of the win rate difference minimizes it in the objective function. - // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2 - // However, σ is a sigmoid function that converts the evaluation value into the difference in the winning percentage. - // m is the number of samples. shallow is the evaluation value for a shallow search (qsearch()). deep is the evaluation value for deep search. - // If W is the feature vector (parameter of the evaluation function) and Xi and Yi are teachers - // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X - // f(Xi) = win_rate(W*Xi) - // If σ(i th deep) = Yi, - // J = m/2 Σ (f(Xi)-Yi )^2 - // becomes a common expression. - // W is a vector, and if we write the jth element as Wj, from the chain rule - // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj - // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1 + double calc_grad(Value deep, Value shallow, PackedSfenValue& psv) + { + // The square of the win rate difference minimizes it in the objective function. + // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2 + // However, σ is a sigmoid function that converts the evaluation value into the difference in the winning percentage. + // m is the number of samples. shallow is the evaluation value for a shallow search (qsearch()). deep is the evaluation value for deep search. + // If W is the feature vector (parameter of the evaluation function) and Xi and Yi are teachers + // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X + // f(Xi) = win_rate(W*Xi) + // If σ(i th deep) = Yi, + // J = m/2 Σ (f(Xi)-Yi )^2 + // becomes a common expression. + // W is a vector, and if we write the jth element as Wj, from the chain rule + // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj + // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1 - // 1/m will be multiplied later, but the contents of Σ can be retained in the array as the value of the gradient. - // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600 - // This /600 at the end is adjusted by the learning rate, so do not write it.. - // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. - // Therefore, it is not necessary to save it in memory. + // 1/m will be multiplied later, but the contents of Σ can be retained in the array as the value of the gradient. + // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600 + // This /600 at the end is adjusted by the learning rate, so do not write it.. + // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad. + // Therefore, it is not necessary to save it in memory. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); - return (q - p) * dsigmoid(double(shallow) / 600.0); -} + double p = winning_percentage(deep); + double q = winning_percentage(shallow); + return (q - p) * dsigmoid(double(shallow) / 600.0); + } #endif #if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY) -double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) -{ - // Objective function with cross entropy + double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) + { + // Objective function with cross entropy - // For the concept and nature of cross entropy, - // http://nnadl-ja.github.io/nnadl_site_ja/chap3.html#the_cross-entropy_cost_function - // http://postd.cc/visual-information-theory-3/ - // Refer to etc. + // For the concept and nature of cross entropy, + // http://nnadl-ja.github.io/nnadl_site_ja/chap3.html#the_cross-entropy_cost_function + // http://postd.cc/visual-information-theory-3/ + // Refer to etc. - // Objective function design) - // We want to make the distribution of p closer to the distribution of q → Think of it as the problem of minimizing the cross entropy between the probability distributions of p and q. - // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q) - // x + // Objective function design) + // We want to make the distribution of p closer to the distribution of q → Think of it as the problem of minimizing the cross entropy between the probability distributions of p and q. + // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q) + // x - // p is a constant and q is a Wi function (q = σ(W・Xi) ). - // ∂J/∂Wi = -p・q'/q-(1-p)(1-q)'/(1-q) - // = ... - // = q-p. + // p is a constant and q is a Wi function (q = σ(W・Xi) ). + // ∂J/∂Wi = -p・q'/q-(1-p)(1-q)'/(1-q) + // = ... + // = q-p. - double p = winning_percentage(deep); - double q = winning_percentage(shallow); + double p = winning_percentage(deep); + double q = winning_percentage(shallow); - return q - p; -} + return q - p; + } #endif #if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE ) -double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) -{ - // Version that does not pass the winning percentage function - // This, unless EVAL_LIMIT is set low, trying to match the evaluation value with the shape of the end stage - // eval may exceed the range of eval. - return shallow - deep; -} + double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv) + { + // Version that does not pass the winning percentage function + // This, unless EVAL_LIMIT is set low, trying to match the evaluation value with the shape of the end stage + // eval may exceed the range of eval. + return shallow - deep; + } #endif #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) -// A constant used in elmo (WCSC27). Adjustment required. -// Since elmo does not internally divide the expression, the value is different. -// You can set this value with the learn command. -// 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27) -double ELMO_LAMBDA = 0.33; -double ELMO_LAMBDA2 = 0.33; -double ELMO_LAMBDA_LIMIT = 32000; + // A constant used in elmo (WCSC27). Adjustment required. + // Since elmo does not internally divide the expression, the value is different. + // You can set this value with the learn command. + // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27) + double ELMO_LAMBDA = 0.33; + double ELMO_LAMBDA2 = 0.33; + double ELMO_LAMBDA_LIMIT = 32000; -double calc_grad(Value teacher_signal, Value shallow , const PackedSfenValue& psv) -{ - // elmo (WCSC27) method - // Correct with the actual game wins and losses. + double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv) + { + // elmo (WCSC27) method + // Correct with the actual game wins and losses. - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; + // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + double scaled_teacher_signal = teacher_signal; + // Normalize to [0.0, 1.0]. + scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); + // Scale to [dest_score_min_value, dest_score_max_value]. + scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - const double q = winning_percentage(shallow, psv.gamePly); - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal, psv.gamePly); - } + const double q = winning_percentage(shallow, psv.gamePly); + // Teacher winning probability. + double p = scaled_teacher_signal; + if (convert_teacher_signal_to_winning_probability) { + p = winning_percentage(scaled_teacher_signal, psv.gamePly); + } - // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. - // game_result = 1,0,-1 so add 1 and divide by 2. - const double t = double(psv.game_result + 1) / 2; + // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw. + // game_result = 1,0,-1 so add 1 and divide by 2. + const double t = double(psv.game_result + 1) / 2; - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; + // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. + const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; - double grad; - if (use_wdl) { - double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); - double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); - grad = lambda * dce_p + (1.0 - lambda) * dce_t; - } - else { - // Use the actual win rate as a correction term. - // This is the idea of ​​elmo (WCSC27), modern O-parts. - grad = lambda * (q - p) + (1.0 - lambda) * (q - t); - } + double grad; + if (use_wdl) { + double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); + double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); + grad = lambda * dce_p + (1.0 - lambda) * dce_t; + } + else { + // Use the actual win rate as a correction term. + // This is the idea of ​​elmo (WCSC27), modern O-parts. + grad = lambda * (q - p) + (1.0 - lambda) * (q - t); + } - return grad; -} + return grad; + } -// Calculate cross entropy during learning -// The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win. -void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv, - double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, - double& entropy_eval, double& entropy_win, double& entropy) -{ - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - double scaled_teacher_signal = teacher_signal; - // Normalize to [0.0, 1.0]. - scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; + // Calculate cross entropy during learning + // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win. + void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv, + double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy, + double& entropy_eval, double& entropy_win, double& entropy) + { + // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + double scaled_teacher_signal = teacher_signal; + // Normalize to [0.0, 1.0]. + scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value); + // Scale to [dest_score_min_value, dest_score_max_value]. + scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - // Teacher winning probability. - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) { - p = winning_percentage(scaled_teacher_signal); - } - const double q /* eval_winrate */ = winning_percentage(shallow); - const double t = double(psv.game_result + 1) / 2; + // Teacher winning probability. + double p = scaled_teacher_signal; + if (convert_teacher_signal_to_winning_probability) { + p = winning_percentage(scaled_teacher_signal); + } + const double q /* eval_winrate */ = winning_percentage(shallow); + const double t = double(psv.game_result + 1) / 2; - constexpr double epsilon = 0.000001; + constexpr double epsilon = 0.000001; - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; + // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. + const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA; - const double m = (1.0 - lambda) * t + lambda * p; + const double m = (1.0 - lambda) * t + lambda * p; - cross_entropy_eval = - (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon)); - cross_entropy_win = - (-t * std::log(q + epsilon) - (1.0 - t) * std::log(1.0 - q + epsilon)); - entropy_eval = - (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon)); - entropy_win = - (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon)); + cross_entropy_eval = + (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon)); + cross_entropy_win = + (-t * std::log(q + epsilon) - (1.0 - t) * std::log(1.0 - q + epsilon)); + entropy_eval = + (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon)); + entropy_win = + (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon)); - cross_entropy = - (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon)); - entropy = - (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); -} + cross_entropy = + (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon)); + entropy = + (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); + } #endif -// Other variations may be prepared as the objective function.. - - -double calc_grad(Value shallow, const PackedSfenValue& psv) { - return calc_grad((Value)psv.score, shallow, psv); -} - -// Sfen reader -struct SfenReader -{ - // Do not use std::random_device(). Because it always the same integers on MinGW. - SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count()) - { - packed_sfens.resize(thread_num); - total_read = 0; - total_done = 0; - last_done = 0; - next_update_weights = 0; - save_count = 0; - end_of_files = false; - no_shuffle = false; - stop_flag = false; - - hash.resize(READ_SFEN_HASH_SIZE); - } - - ~SfenReader() - { - if (file_worker_thread.joinable()) - file_worker_thread.join(); - - for (auto p : packed_sfens) - delete p; - for (auto p : packed_sfens_pool) - delete p; - } - - // number of phases used for calculation such as mse - // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time. - //Since search() is performed with depth = 1 in calculation of move match rate, simple comparison is not possible... - const uint64_t sfen_for_mse_size = 2000; - - // Load the phase for calculation such as mse. - void read_for_mse() - { - auto th = Threads.main(); - Position& pos = th->rootPos; - for (uint64_t i = 0; i < sfen_for_mse_size; ++i) - { - PackedSfenValue ps; - if (!read_to_thread_buffer(0, ps)) - { - cout << "Error! read packed sfen , failed." << endl; - break; - } - sfen_for_mse.push_back(ps); - - // Get the hash key. - StateInfo si; - pos.set_from_packed_sfen(ps.sfen,&si,th); - sfen_for_mse_hash.insert(pos.key()); - } - } - - void read_validation_set(const string file_name, int eval_limit) - { - ifstream fs(file_name, ios::binary); - - while (fs) - { - PackedSfenValue p; - if (fs.read((char*)&p, sizeof(PackedSfenValue))) - { - if (eval_limit < abs(p.score)) - continue; - if (!use_draw_games_in_validation && p.game_result == 0) - continue; - sfen_for_mse.push_back(p); - } else { - break; - } - } - } - - // Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT - const size_t THREAD_BUFFER_SIZE = 10 * 1000; - - // Buffer for reading files (If this is made larger, the shuffle becomes larger and the phases may vary. - // If it is too large, the memory consumption will increase. - // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE. - const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE; - - // [ASYNC] Thread returns one aspect. Otherwise returns false. - bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps) - { - // If there are any positions left in the thread buffer, retrieve one and return it. - auto& thread_ps = packed_sfens[thread_id]; - - // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish. - if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it. - && !read_to_thread_buffer_impl(thread_id)) - return false; - - // read_to_thread_buffer_impl() returned true, - // Since the filling of the thread buffer with the phase has been completed successfully - // thread_ps->rbegin() is alive. - - ps = *(thread_ps->rbegin()); - thread_ps->pop_back(); - - // If you've run out of buffers, call delete yourself to free this buffer. - if (thread_ps->size() == 0) - { - - delete thread_ps; - thread_ps = nullptr; - } - - return true; - } - - // [ASYNC] Read some aspects into thread buffer. - bool read_to_thread_buffer_impl(size_t thread_id) - { - while (true) - { - { - std::unique_lock lk(mutex); - // If you can fill from the file buffer, that's fine. - if (packed_sfens_pool.size() != 0) - { - // It seems that filling is possible, so fill and finish. - - packed_sfens[thread_id] = packed_sfens_pool.front(); - packed_sfens_pool.pop_front(); - - total_read += THREAD_BUFFER_SIZE; - - return true; - } - } - - // The file to read is already gone. No more use. - if (end_of_files) - return false; - - // Waiting for file worker to fill packed_sfens_pool. - // The mutex isn't locked, so it should fill up soon. - sleep(1); - } - - } - - // Start a thread that loads the phase file in the background. - void start_file_read_worker() - { - file_worker_thread = std::thread([&] { this->file_read_worker(); }); - } - - // for file read-only threads - void file_read_worker() - { - auto open_next_file = [&]() - { - if (fs.is_open()) - fs.close(); - - // no more - if (filenames.size() == 0) - return false; - - // Get the next file name. - string filename = *filenames.rbegin(); - filenames.pop_back(); - - fs.open(filename, ios::in | ios::binary); - cout << "open filename = " << filename << endl; - assert(fs); - - return true; - }; - - while (true) - { - // Wait for the buffer to run out. - // This size() is read only, so you don't need to lock it. - while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE) - sleep(100); - if (stop_flag) - return; - - PSVector sfens; - sfens.reserve(SFEN_READ_SIZE); - - // Read from the file into the file buffer. - while (sfens.size() < SFEN_READ_SIZE) - { - PackedSfenValue p; - if (fs.read((char*)&p, sizeof(PackedSfenValue))) - { - sfens.push_back(p); - } else - { - // read failure - if (!open_next_file()) - { - // There was no next file. Abon. - cout << "..end of files." << endl; - end_of_files = true; - return; - } - } - } - - // Shuffle the read phase data. - // random shuffle by Fisher-Yates algorithm - - if (!no_shuffle) - { - auto size = sfens.size(); - for (size_t i = 0; i < size; ++i) - swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]); - } - - // Divide this by THREAD_BUFFER_SIZE. There should be size pieces. - // SFEN_READ_SIZE shall be a multiple of THREAD_BUFFER_SIZE. - assert((SFEN_READ_SIZE % THREAD_BUFFER_SIZE)==0); - - auto size = size_t(SFEN_READ_SIZE / THREAD_BUFFER_SIZE); - std::vector ptrs; - ptrs.reserve(size); - - for (size_t i = 0; i < size; ++i) - { - // Delete this pointer on the receiving side. - PSVector* ptr = new PSVector(); - ptr->resize(THREAD_BUFFER_SIZE); - memcpy(&((*ptr)[0]), &sfens[i * THREAD_BUFFER_SIZE], sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE); - - ptrs.push_back(ptr); - } - - // Since sfens is ready, look at the occasion and copy - { - std::unique_lock lk(mutex); - - // You can ignore this time because you just copy the pointer... - // The mutex lock is required because the contents of packed_sfens_pool are changed. - - for (size_t i = 0; i < size; ++i) - packed_sfens_pool.push_back(ptrs[i]); - } - } - } - - // sfen files - vector filenames; - - // number of phases read (file to memory buffer) - atomic total_read; - - // number of processed phases - atomic total_done; - - // number of cases processed so far - uint64_t last_done; - - // If total_read exceeds this value, update_weights() and calculate mse. - uint64_t next_update_weights; - - uint64_t save_count; - - // Do not shuffle when reading the phase. - bool no_shuffle; - - bool stop_flag; - - // Determine if it is a phase for calculating rmse. - // (The computational aspects of rmse should not be used for learning.) - bool is_for_rmse(Key key) const - { - return sfen_for_mse_hash.count(key) != 0; - } - - // hash to limit the reading of the same situation - // Is there too many 64 million phases? Or Not really.. - // It must be 2**N because it will be used as the mask to calculate hash_index. - static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; - vector hash; // 64MB*8 = 512MB - - // test phase for mse calculation - PSVector sfen_for_mse; - -protected: - - // worker thread reading file in background - std::thread file_worker_thread; - - // Random number to shuffle when reading the phase - PRNG prng; - - // Did you read the files and reached the end? - atomic end_of_files; + // Other variations may be prepared as the objective function.. + + + double calc_grad(Value shallow, const PackedSfenValue& psv) { + return calc_grad((Value)psv.score, shallow, psv); + } + + // Sfen reader + struct SfenReader + { + // number of phases used for calculation such as mse + // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time. + //Since search() is performed with depth = 1 in calculation of move match rate, simple comparison is not possible... + static constexpr uint64_t sfen_for_mse_size = 2000; + + // Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT + static constexpr size_t THREAD_BUFFER_SIZE = 10 * 1000; + + // Buffer for reading files (If this is made larger, the shuffle becomes larger and the phases may vary. + // If it is too large, the memory consumption will increase. + // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE. + static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE; + + // Do not use std::random_device(). Because it always the same integers on MinGW. + SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count()) + { + packed_sfens.resize(thread_num); + total_read = 0; + total_done = 0; + last_done = 0; + next_update_weights = 0; + save_count = 0; + end_of_files = false; + no_shuffle = false; + stop_flag = false; + + hash.resize(READ_SFEN_HASH_SIZE); + } + + ~SfenReader() + { + if (file_worker_thread.joinable()) + file_worker_thread.join(); + } + + // Load the phase for calculation such as mse. + void read_for_mse() + { + auto th = Threads.main(); + Position& pos = th->rootPos; + for (uint64_t i = 0; i < sfen_for_mse_size; ++i) + { + PackedSfenValue ps; + if (!read_to_thread_buffer(0, ps)) + { + cout << "Error! read packed sfen , failed." << endl; + break; + } + sfen_for_mse.push_back(ps); + + // Get the hash key. + StateInfo si; + pos.set_from_packed_sfen(ps.sfen, &si, th); + sfen_for_mse_hash.insert(pos.key()); + } + } + + void read_validation_set(const string& file_name, int eval_limit) + { + ifstream input(file_name, ios::binary); + + while (input) + { + PackedSfenValue p; + if (input.read(reinterpret_cast(&p), sizeof(PackedSfenValue))) + { + if (eval_limit < abs(p.score)) + continue; + if (!use_draw_games_in_validation && p.game_result == 0) + continue; + sfen_for_mse.push_back(p); + } + else + { + break; + } + } + } + + // [ASYNC] Thread returns one aspect. Otherwise returns false. + bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps) + { + // If there are any positions left in the thread buffer, retrieve one and return it. + auto& thread_ps = packed_sfens[thread_id]; + + // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish. + if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it. + && !read_to_thread_buffer_impl(thread_id)) + return false; + + // read_to_thread_buffer_impl() returned true, + // Since the filling of the thread buffer with the phase has been completed successfully + // thread_ps->rbegin() is alive. + + ps = *(thread_ps->rbegin()); + thread_ps->pop_back(); + + // If you've run out of buffers, call delete yourself to free this buffer. + if (thread_ps->size() == 0) + { + thread_ps.reset(); + } + + return true; + } + + // [ASYNC] Read some aspects into thread buffer. + bool read_to_thread_buffer_impl(size_t thread_id) + { + while (true) + { + { + std::unique_lock lk(mutex); + // If you can fill from the file buffer, that's fine. + if (packed_sfens_pool.size() != 0) + { + // It seems that filling is possible, so fill and finish. + + packed_sfens[thread_id] = std::move(packed_sfens_pool.front()); + packed_sfens_pool.pop_front(); + + total_read += THREAD_BUFFER_SIZE; + + return true; + } + } + + // The file to read is already gone. No more use. + if (end_of_files) + return false; + + // Waiting for file worker to fill packed_sfens_pool. + // The mutex isn't locked, so it should fill up soon. + sleep(1); + } + + } + + // Start a thread that loads the phase file in the background. + void start_file_read_worker() + { + file_worker_thread = std::thread([&] { this->file_read_worker(); }); + } + + // for file read-only threads + void file_read_worker() + { + auto open_next_file = [&]() + { + if (fs.is_open()) + fs.close(); + + // no more + if (filenames.size() == 0) + return false; + + // Get the next file name. + string filename = *filenames.rbegin(); + filenames.pop_back(); + + fs.open(filename, ios::in | ios::binary); + cout << "open filename = " << filename << endl; + assert(fs); + + return true; + }; + + while (true) + { + // Wait for the buffer to run out. + // This size() is read only, so you don't need to lock it. + while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE) + sleep(100); + if (stop_flag) + return; + + PSVector sfens; + sfens.reserve(SFEN_READ_SIZE); + + // Read from the file into the file buffer. + while (sfens.size() < SFEN_READ_SIZE) + { + PackedSfenValue p; + if (fs.read(reinterpret_cast(&p), sizeof(PackedSfenValue))) + { + sfens.push_back(p); + } + else + { + // read failure + if (!open_next_file()) + { + // There was no next file. Abon. + cout << "..end of files." << endl; + end_of_files = true; + return; + } + } + } + + // Shuffle the read phase data. + // random shuffle by Fisher-Yates algorithm + + if (!no_shuffle) + { + auto size = sfens.size(); + for (size_t i = 0; i < size; ++i) + swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]); + } + + // Divide this by THREAD_BUFFER_SIZE. There should be size pieces. + // SFEN_READ_SIZE shall be a multiple of THREAD_BUFFER_SIZE. + assert((SFEN_READ_SIZE % THREAD_BUFFER_SIZE) == 0); + + auto size = size_t(SFEN_READ_SIZE / THREAD_BUFFER_SIZE); + std::vector> buffers; + buffers.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + // Delete this pointer on the receiving side. + auto buf = std::make_unique(); + buf->resize(THREAD_BUFFER_SIZE); + memcpy(buf->data(), &sfens[i * THREAD_BUFFER_SIZE], sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE); + + buffers.emplace_back(std::move(buf)); + } + + // Since sfens is ready, look at the occasion and copy + { + std::unique_lock lk(mutex); + + // You can ignore this time because you just copy the pointer... + // The mutex lock is required because the contents of packed_sfens_pool are changed. + + for (auto& buf : buffers) + packed_sfens_pool.emplace_back(std::move(buf)); + } + } + } + + // sfen files + vector filenames; + + // number of phases read (file to memory buffer) + atomic total_read; + + // number of processed phases + atomic total_done; + + // number of cases processed so far + uint64_t last_done; + + // If total_read exceeds this value, update_weights() and calculate mse. + uint64_t next_update_weights; + + uint64_t save_count; + + // Do not shuffle when reading the phase. + bool no_shuffle; + + bool stop_flag; + + // Determine if it is a phase for calculating rmse. + // (The computational aspects of rmse should not be used for learning.) + bool is_for_rmse(Key key) const + { + return sfen_for_mse_hash.count(key) != 0; + } + + // hash to limit the reading of the same situation + // Is there too many 64 million phases? Or Not really.. + // It must be 2**N because it will be used as the mask to calculate hash_index. + static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024; + vector hash; // 64MB*8 = 512MB + + // test phase for mse calculation + PSVector sfen_for_mse; + + protected: + + // worker thread reading file in background + std::thread file_worker_thread; + + // Random number to shuffle when reading the phase + PRNG prng; + + // Did you read the files and reached the end? + atomic end_of_files; - // handle of sfen file - std::fstream fs; + // handle of sfen file + std::fstream fs; - // sfen for each thread - // (When the thread is used up, the thread should call delete to release it.) - std::vector packed_sfens; + // sfen for each thread + // (When the thread is used up, the thread should call delete to release it.) + std::vector> packed_sfens; - // Mutex when accessing packed_sfens_pool - std::mutex mutex; + // Mutex when accessing packed_sfens_pool + std::mutex mutex; - // pool of sfen. The worker thread read from the file is added here. - // Each worker thread fills its own packed_sfens[thread_id] from here. - // * Lock and access the mutex. - std::list packed_sfens_pool; + // pool of sfen. The worker thread read from the file is added here. + // Each worker thread fills its own packed_sfens[thread_id] from here. + // * Lock and access the mutex. + std::list> packed_sfens_pool; - // Hold the hash key so that the mse calculation phase is not used for learning. - std::unordered_set sfen_for_mse_hash; -}; + // Hold the hash key so that the mse calculation phase is not used for learning. + std::unordered_set sfen_for_mse_hash; + }; -// Class to generate sfen with multiple threads -struct LearnerThink: public MultiThink -{ - LearnerThink(SfenReader& sr_):sr(sr_),stop_flag(false), save_only_once(false) - { + // Class to generate sfen with multiple threads + struct LearnerThink : public MultiThink + { + LearnerThink(SfenReader& sr_) :sr(sr_), stop_flag(false), save_only_once(false) + { #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) - learn_sum_cross_entropy_eval = 0.0; - learn_sum_cross_entropy_win = 0.0; - learn_sum_cross_entropy = 0.0; - learn_sum_entropy_eval = 0.0; - learn_sum_entropy_win = 0.0; - learn_sum_entropy = 0.0; + learn_sum_cross_entropy_eval = 0.0; + learn_sum_cross_entropy_win = 0.0; + learn_sum_cross_entropy = 0.0; + learn_sum_entropy_eval = 0.0; + learn_sum_entropy_win = 0.0; + learn_sum_entropy = 0.0; #endif #if defined(EVAL_NNUE) - newbob_scale = 1.0; - newbob_decay = 1.0; - newbob_num_trials = 2; - best_loss = std::numeric_limits::infinity(); - latest_loss_sum = 0.0; - latest_loss_count = 0; + newbob_scale = 1.0; + newbob_decay = 1.0; + newbob_num_trials = 2; + best_loss = std::numeric_limits::infinity(); + latest_loss_sum = 0.0; + latest_loss_count = 0; #endif - } + } - virtual void thread_worker(size_t thread_id); + virtual void thread_worker(size_t thread_id); - // Start a thread that loads the phase file in the background. - void start_file_read_worker() { sr.start_file_read_worker(); } + // Start a thread that loads the phase file in the background. + void start_file_read_worker() { sr.start_file_read_worker(); } - // save merit function parameters to a file - bool save(bool is_final=false); + // save merit function parameters to a file + bool save(bool is_final = false); - // sfen reader - SfenReader& sr; + // sfen reader + SfenReader& sr; - // Learning iteration counter - uint64_t epoch = 0; + // Learning iteration counter + uint64_t epoch = 0; - // Mini batch size size. Be sure to set it on the side that uses this class. - uint64_t mini_batch_size = 1000*1000; + // Mini batch size size. Be sure to set it on the side that uses this class. + uint64_t mini_batch_size = 1000 * 1000; - bool stop_flag; + bool stop_flag; - // Discount rate - double discount_rate; + // Discount rate + double discount_rate; - // Option to exclude early stage from learning - int reduction_gameply; + // Option to exclude early stage from learning + int reduction_gameply; - // Option not to learn kk/kkp/kpp/kppp - std::array freeze; + // Option not to learn kk/kkp/kpp/kppp + std::array freeze; - // If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase. - int eval_limit; + // If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase. + int eval_limit; - // Flag whether to dig a folder each time the evaluation function is saved. - // If true, do not dig the folder. - bool save_only_once; + // Flag whether to dig a folder each time the evaluation function is saved. + // If true, do not dig the folder. + bool save_only_once; - // --- loss calculation + // --- loss calculation #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - // For calculation of learning data loss - atomic learn_sum_cross_entropy_eval; - atomic learn_sum_cross_entropy_win; - atomic learn_sum_cross_entropy; - atomic learn_sum_entropy_eval; - atomic learn_sum_entropy_win; - atomic learn_sum_entropy; + // For calculation of learning data loss + atomic learn_sum_cross_entropy_eval; + atomic learn_sum_cross_entropy_win; + atomic learn_sum_cross_entropy; + atomic learn_sum_entropy_eval; + atomic learn_sum_entropy_win; + atomic learn_sum_entropy; #endif #if defined(EVAL_NNUE) - shared_timed_mutex nn_mutex; - double newbob_scale; - double newbob_decay; - int newbob_num_trials; - double best_loss; - double latest_loss_sum; - uint64_t latest_loss_count; - std::string best_nn_directory; + shared_timed_mutex nn_mutex; + double newbob_scale; + double newbob_decay; + int newbob_num_trials; + double best_loss; + double latest_loss_sum; + uint64_t latest_loss_count; + std::string best_nn_directory; #endif - uint64_t eval_save_interval; - uint64_t loss_output_interval; - uint64_t mirror_percentage; + uint64_t eval_save_interval; + uint64_t loss_output_interval; + uint64_t mirror_percentage; - // Loss calculation. - // done: Number of phases targeted this time - void calc_loss(size_t thread_id , uint64_t done); + // Loss calculation. + // done: Number of phases targeted this time + void calc_loss(size_t thread_id, uint64_t done); - // Define the loss calculation in ↑ as a task and execute it - TaskDispatcher task_dispatcher; -}; + // Define the loss calculation in ↑ as a task and execute it + TaskDispatcher task_dispatcher; + }; -void LearnerThink::calc_loss(size_t thread_id, uint64_t done) -{ - // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated. - // It doesn't matter if you have disabled the substitution table. - TT.new_search(); + void LearnerThink::calc_loss(size_t thread_id, uint64_t done) + { + // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated. + // It doesn't matter if you have disabled the substitution table. + TT.new_search(); #if defined(EVAL_NNUE) - std::cout << "PROGRESS: " << now_string() << ", "; - std::cout << sr.total_done << " sfens"; - std::cout << ", iteration " << epoch; - std::cout << ", eta = " << Eval::get_eta() << ", "; + std::cout << "PROGRESS: " << now_string() << ", "; + std::cout << sr.total_done << " sfens"; + std::cout << ", iteration " << epoch; + std::cout << ", eta = " << Eval::get_eta() << ", "; #endif #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - double sum_error = 0; - double sum_error2 = 0; - double sum_error3 = 0; + double sum_error = 0; + double sum_error2 = 0; + double sum_error3 = 0; #endif #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - // For calculation of verification data loss - atomic test_sum_cross_entropy_eval,test_sum_cross_entropy_win,test_sum_cross_entropy; - atomic test_sum_entropy_eval,test_sum_entropy_win,test_sum_entropy; - test_sum_cross_entropy_eval = 0; - test_sum_cross_entropy_win = 0; - test_sum_cross_entropy = 0; - test_sum_entropy_eval = 0; - test_sum_entropy_win = 0; - test_sum_entropy = 0; + // For calculation of verification data loss + atomic test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy; + atomic test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy; + test_sum_cross_entropy_eval = 0; + test_sum_cross_entropy_win = 0; + test_sum_cross_entropy = 0; + test_sum_entropy_eval = 0; + test_sum_entropy_win = 0; + test_sum_entropy = 0; - // norm for learning - atomic sum_norm; - sum_norm = 0; + // norm for learning + atomic sum_norm; + sum_norm = 0; #endif - // The number of times the pv first move of deep search matches the pv first move of search(1). - atomic move_accord_count; - move_accord_count = 0; + // The number of times the pv first move of deep search matches the pv first move of search(1). + atomic move_accord_count; + move_accord_count = 0; - // Display the value of eval() in the initial stage of Hirate and see the shaking. - auto th = Threads[thread_id]; - auto& pos = th->rootPos; - StateInfo si; - pos.set(StartFEN, false, &si, th); - std::cout << "hirate eval = " << Eval::evaluate(pos); + // Display the value of eval() in the initial stage of Hirate and see the shaking. + auto th = Threads[thread_id]; + auto& pos = th->rootPos; + StateInfo si; + pos.set(StartFEN, false, &si, th); + std::cout << "hirate eval = " << Eval::evaluate(pos); - //Eval::print_eval_stat(pos); + //Eval::print_eval_stat(pos); - // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished. - // I created a mechanism to call task, so I will use it. + // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished. + // I created a mechanism to call task, so I will use it. - // The number of tasks to do. - atomic task_count; - task_count = (int)sr.sfen_for_mse.size(); - task_dispatcher.task_reserve(task_count); + // The number of tasks to do. + atomic task_count; + task_count = (int)sr.sfen_for_mse.size(); + task_dispatcher.task_reserve(task_count); - // Create a task to search for the situation and give it to each thread. - for (const auto& ps : sr.sfen_for_mse) - { - // Assign work to each thread using TaskDispatcher. - // A task definition for that. - // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one. - auto task = [&ps,&test_sum_cross_entropy_eval,&test_sum_cross_entropy_win,&test_sum_cross_entropy,&test_sum_entropy_eval,&test_sum_entropy_win,&test_sum_entropy, &sum_norm,&task_count ,&move_accord_count](size_t thread_id) - { - // Does C++ properly capture a new ps instance for each loop?. - auto th = Threads[thread_id]; - auto& pos = th->rootPos; - StateInfo si; - if (pos.set_from_packed_sfen(ps.sfen ,&si, th) != 0) - { - // Unfortunately, as an sfen for rmse calculation, an invalid sfen was drawn. - cout << "Error! : illegal packed sfen " << pos.fen() << endl; - } + // Create a task to search for the situation and give it to each thread. + for (const auto& ps : sr.sfen_for_mse) + { + // Assign work to each thread using TaskDispatcher. + // A task definition for that. + // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one. + auto task = + [ + &ps, + &test_sum_cross_entropy_eval, + &test_sum_cross_entropy_win, + &test_sum_cross_entropy, + &test_sum_entropy_eval, + &test_sum_entropy_win, + &test_sum_entropy, + &sum_norm, + &task_count, + &move_accord_count + ](size_t task_thread_id) + { + // Does C++ properly capture a new ps instance for each loop?. + auto task_th = Threads[task_thread_id]; + auto& task_pos = task_th->rootPos; + StateInfo task_si; + if (task_pos.set_from_packed_sfen(ps.sfen, &task_si, task_th) != 0) + { + // Unfortunately, as an sfen for rmse calculation, an invalid sfen was drawn. + cout << "Error! : illegal packed sfen " << task_pos.fen() << endl; + } - // Evaluation value for shallow search - // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and - // Use qsearch() because it is difficult to compare the values. - // EvalHash has been disabled in advance. (If not, the same value will be returned every time) - auto r = qsearch(pos); + // Evaluation value for shallow search + // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and + // Use qsearch() because it is difficult to compare the values. + // EvalHash has been disabled in advance. (If not, the same value will be returned every time) + auto task_search_result = qsearch(task_pos); - auto shallow_value = r.first; - { - const auto rootColor = pos.side_to_move(); - const auto pv = r.second; - std::vector> states(pv.size()); - for (size_t i = 0; i < pv.size(); ++i) - { - pos.do_move(pv[i], states[i]); - Eval::NNUE::update_eval(pos); - } - shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos); - for (auto it = pv.rbegin(); it != pv.rend(); ++it) - pos.undo_move(*it); - } + auto shallow_value = task_search_result.first; + { + const auto rootColor = task_pos.side_to_move(); + const auto pv = task_search_result.second; + std::vector> states(pv.size()); + for (size_t i = 0; i < pv.size(); ++i) + { + task_pos.do_move(pv[i], states[i]); + Eval::NNUE::update_eval(task_pos); + } + shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos); + for (auto it = pv.rbegin(); it != pv.rend(); ++it) + task_pos.undo_move(*it); + } - // Evaluation value of deep search - auto deep_value = (Value)ps.score; + // Evaluation value of deep search + auto deep_value = (Value)ps.score; - // Note) This code does not consider when eval_limit is specified in the learn command. + // Note) This code does not consider when eval_limit is specified in the learn command. - // --- error calculation + // --- error calculation #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - auto grad = calc_grad(deep_value, shallow_value, ps); + auto grad = calc_grad(deep_value, shallow_value, ps); - // something like rmse - sum_error += grad*grad; - // Add the absolute value of the gradient - sum_error2 += abs(grad); - // Add the absolute value of the difference between the evaluation values - sum_error3 += abs(shallow_value - deep_value); + // something like rmse + sum_error += grad * grad; + // Add the absolute value of the gradient + sum_error2 += abs(grad); + // Add the absolute value of the difference between the evaluation values + sum_error3 += abs(shallow_value - deep_value); #endif - // --- calculation of cross entropy + // --- calculation of cross entropy - // For the time being, regarding the win rate and loss terms only in the elmo method - // Calculate and display the cross entropy. + // For the time being, regarding the win rate and loss terms only in the elmo method + // Calculate and display the cross entropy. #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; - double test_entropy_eval, test_entropy_win, test_entropy; - calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy); - // The total cross entropy need not be abs() by definition. - test_sum_cross_entropy_eval += test_cross_entropy_eval; - test_sum_cross_entropy_win += test_cross_entropy_win; - test_sum_cross_entropy += test_cross_entropy; - test_sum_entropy_eval += test_entropy_eval; - test_sum_entropy_win += test_entropy_win; - test_sum_entropy += test_entropy; - sum_norm += (double)abs(shallow_value); + double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; + double test_entropy_eval, test_entropy_win, test_entropy; + calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy); + // The total cross entropy need not be abs() by definition. + test_sum_cross_entropy_eval += test_cross_entropy_eval; + test_sum_cross_entropy_win += test_cross_entropy_win; + test_sum_cross_entropy += test_cross_entropy; + test_sum_entropy_eval += test_entropy_eval; + test_sum_entropy_win += test_entropy_win; + test_sum_entropy += test_entropy; + sum_norm += (double)abs(shallow_value); #endif - // Determine if the teacher's move and the score of the shallow search match - { - auto r = search(pos,1); - if ((uint16_t)r.second[0] == ps.move) - move_accord_count.fetch_add(1, std::memory_order_relaxed); - } + // Determine if the teacher's move and the score of the shallow search match + { + auto r = search(task_pos, 1); + if ((uint16_t)r.second[0] == ps.move) + move_accord_count.fetch_add(1, std::memory_order_relaxed); + } - // Reduced one task because I did it - --task_count; - }; + // Reduced one task because I did it + --task_count; + }; - // Throw the defined task to slave. - task_dispatcher.push_task_async(task); - } + // Throw the defined task to slave. + task_dispatcher.push_task_async(task); + } - // join yourself as a slave - task_dispatcher.on_idle(thread_id); + // join yourself as a slave + task_dispatcher.on_idle(thread_id); - // wait for all tasks to complete - while (task_count) - sleep(1); + // wait for all tasks to complete + while (task_count) + sleep(1); #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD) - // rmse = root mean square error: mean square error - // mae = mean absolute error: mean absolute error - auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon)); - auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon); - auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon); - cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae - << " , eval mae = " << eval_mae; + // rmse = root mean square error: mean square error + // mae = mean absolute error: mean absolute error + auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon)); + auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon); + auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon); + cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae + << " , eval mae = " << eval_mae; #endif #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD ) #if defined(EVAL_NNUE) - latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; - latest_loss_count += sr.sfen_for_mse.size(); + latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; + latest_loss_count += sr.sfen_for_mse.size(); #endif -// learn_cross_entropy may be called train cross entropy in the world of machine learning, -// When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce. + // learn_cross_entropy may be called train cross entropy in the world of machine learning, + // When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce. - if (sr.sfen_for_mse.size() && done) - { - cout - << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size() - << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size() - << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size() - << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size() - << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size() - << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size() - << " , norm = " << sum_norm - << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"; - if (done != static_cast(-1)) - { - cout - << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done - << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done - << " , learn_entropy_eval = " << learn_sum_entropy_eval / done - << " , learn_entropy_win = " << learn_sum_entropy_win / done - << " , learn_cross_entropy = " << learn_sum_cross_entropy / done - << " , learn_entropy = " << learn_sum_entropy / done; - } - cout << endl; - } - else { - cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl; - } + if (sr.sfen_for_mse.size() && done) + { + cout + << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size() + << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size() + << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size() + << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size() + << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size() + << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size() + << " , norm = " << sum_norm + << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"; + if (done != static_cast(-1)) + { + cout + << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done + << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done + << " , learn_entropy_eval = " << learn_sum_entropy_eval / done + << " , learn_entropy_win = " << learn_sum_entropy_win / done + << " , learn_cross_entropy = " << learn_sum_cross_entropy / done + << " , learn_entropy = " << learn_sum_entropy / done; + } + cout << endl; + } + else { + cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl; + } - // Clear 0 for next time. - learn_sum_cross_entropy_eval = 0.0; - learn_sum_cross_entropy_win = 0.0; - learn_sum_cross_entropy = 0.0; - learn_sum_entropy_eval = 0.0; - learn_sum_entropy_win = 0.0; - learn_sum_entropy = 0.0; + // Clear 0 for next time. + learn_sum_cross_entropy_eval = 0.0; + learn_sum_cross_entropy_win = 0.0; + learn_sum_cross_entropy = 0.0; + learn_sum_entropy_eval = 0.0; + learn_sum_entropy_win = 0.0; + learn_sum_entropy = 0.0; #else - << endl; + << endl; #endif -} + } -void LearnerThink::thread_worker(size_t thread_id) -{ + void LearnerThink::thread_worker(size_t thread_id) + { #if defined(_OPENMP) - omp_set_num_threads((int)Options["Threads"]); + omp_set_num_threads((int)Options["Threads"]); #endif - auto th = Threads[thread_id]; - auto& pos = th->rootPos; + auto th = Threads[thread_id]; + auto& pos = th->rootPos; - while (true) - { - // display mse (this is sometimes done only for thread 0) - // Immediately after being read from the file... + while (true) + { + // display mse (this is sometimes done only for thread 0) + // Immediately after being read from the file... #if defined(EVAL_NNUE) - // Lock the evaluation function so that it is not used during updating. - shared_lock read_lock(nn_mutex, defer_lock); - if (sr.next_update_weights <= sr.total_done || - (thread_id != 0 && !read_lock.try_lock())) + // Lock the evaluation function so that it is not used during updating. + shared_lock read_lock(nn_mutex, defer_lock); + if (sr.next_update_weights <= sr.total_done || + (thread_id != 0 && !read_lock.try_lock())) #else - if (sr.next_update_weights <= sr.total_done) + if (sr.next_update_weights <= sr.total_done) #endif - { - if (thread_id != 0) - { - // Wait except thread_id == 0. + { + if (thread_id != 0) + { + // Wait except thread_id == 0. - if (stop_flag) - break; + if (stop_flag) + break; - // I want to parallelize rmse calculation etc., so if task() is loaded, process it. - task_dispatcher.on_idle(thread_id); - continue; - } - else - { - // Only thread_id == 0 performs the following update process. + // I want to parallelize rmse calculation etc., so if task() is loaded, process it. + task_dispatcher.on_idle(thread_id); + continue; + } + else + { + // Only thread_id == 0 performs the following update process. - // The weight array is not updated for the first time. - if (sr.next_update_weights == 0) - { - sr.next_update_weights += mini_batch_size; - continue; - } + // The weight array is not updated for the first time. + if (sr.next_update_weights == 0) + { + sr.next_update_weights += mini_batch_size; + continue; + } #if !defined(EVAL_NNUE) - // Output the current time. Output every time. - std::cout << sr.total_done << " sfens , at " << now_string() << std::endl; + // Output the current time. Output every time. + std::cout << sr.total_done << " sfens , at " << now_string() << std::endl; - // Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch. - Eval::update_weights(epoch , freeze); + // Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch. + Eval::update_weights(epoch, freeze); - // Display epoch and current eta for debugging. - std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl; + // Display epoch and current eta for debugging. + std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl; #else - { - // update parameters + { + // update parameters - // Lock the evaluation function so that it is not used during updating. - lock_guard write_lock(nn_mutex); - Eval::NNUE::UpdateParameters(epoch); - } + // Lock the evaluation function so that it is not used during updating. + lock_guard write_lock(nn_mutex); + Eval::NNUE::UpdateParameters(epoch); + } #endif - ++epoch; + ++epoch; - // Save once every 1 billion phases. + // Save once every 1 billion phases. - // However, the elapsed time during update_weights() and calc_rmse() is ignored. - if (++sr.save_count * mini_batch_size >= eval_save_interval) - { - sr.save_count = 0; + // However, the elapsed time during update_weights() and calc_rmse() is ignored. + if (++sr.save_count * mini_batch_size >= eval_save_interval) + { + sr.save_count = 0; - // During this time, as the gradient calculation proceeds, the value becomes too large and I feel annoyed, so stop other threads. - const bool converged = save(); - if (converged) - { - stop_flag = true; - sr.stop_flag = true; - break; - } - } + // During this time, as the gradient calculation proceeds, the value becomes too large and I feel annoyed, so stop other threads. + const bool converged = save(); + if (converged) + { + stop_flag = true; + sr.stop_flag = true; + break; + } + } - // Calculate rmse. This is done for samples of 10,000 phases. - // If you do with 40 cores, update_weights every 1 million phases - // I don't think it's so good to be tiring. - static uint64_t loss_output_count = 0; - if (++loss_output_count * mini_batch_size >= loss_output_interval) - { - loss_output_count = 0; + // Calculate rmse. This is done for samples of 10,000 phases. + // If you do with 40 cores, update_weights every 1 million phases + // I don't think it's so good to be tiring. + static uint64_t loss_output_count = 0; + if (++loss_output_count * mini_batch_size >= loss_output_interval) + { + loss_output_count = 0; - // Number of cases processed this time - uint64_t done = sr.total_done - sr.last_done; + // Number of cases processed this time + uint64_t done = sr.total_done - sr.last_done; - // loss calculation - calc_loss(thread_id , done); + // loss calculation + calc_loss(thread_id, done); #if defined(EVAL_NNUE) - Eval::NNUE::CheckHealth(); + Eval::NNUE::CheckHealth(); #endif - // Make a note of how far you have totaled. - sr.last_done = sr.total_done; - } + // Make a note of how far you have totaled. + sr.last_done = sr.total_done; + } - // Next time, I want you to do this series of processing again when you process only mini_batch_size. - sr.next_update_weights += mini_batch_size; + // Next time, I want you to do this series of processing again when you process only mini_batch_size. + sr.next_update_weights += mini_batch_size; - // Since I was waiting for the update of this sr.next_update_weights except the main thread, - // Once this value is updated, it will start moving again. - } - } + // Since I was waiting for the update of this sr.next_update_weights except the main thread, + // Once this value is updated, it will start moving again. + } + } - PackedSfenValue ps; - RetryRead:; - if (!sr.read_to_thread_buffer(thread_id, ps)) - { - // ran out of thread pool for my thread. - // Because there are almost no phases left, - // Terminate all other threads. + PackedSfenValue ps; + RetryRead:; + if (!sr.read_to_thread_buffer(thread_id, ps)) + { + // ran out of thread pool for my thread. + // Because there are almost no phases left, + // Terminate all other threads. - stop_flag = true; - break; - } + stop_flag = true; + break; + } - // The evaluation value exceeds the learning target value. - // Ignore this aspect information. - if (eval_limit 0) { - static int trials = newbob_num_trials; - const double latest_loss = latest_loss_sum / latest_loss_count; - latest_loss_sum = 0.0; - latest_loss_count = 0; - cout << "loss: " << latest_loss; - if (latest_loss < best_loss) { - cout << " < best (" << best_loss << "), accepted" << endl; - best_loss = latest_loss; - best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name); - trials = newbob_num_trials; - } else { - cout << " >= best (" << best_loss << "), rejected" << endl; - if (best_nn_directory.empty()) { - cout << "WARNING: no improvement from initial model" << endl; - } else { - cout << "restoring parameters from " << best_nn_directory << endl; - Eval::NNUE::RestoreParameters(best_nn_directory); - } - if (--trials > 0 && !is_final) { - cout << "reducing learning rate scale from " << newbob_scale - << " to " << (newbob_scale * newbob_decay) - << " (" << trials << " more trials)" << endl; - newbob_scale *= newbob_decay; - Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); - } - } - if (trials == 0) { - cout << "converged" << endl; - return true; - } - } + if (newbob_decay != 1.0 && latest_loss_count > 0) { + static int trials = newbob_num_trials; + const double latest_loss = latest_loss_sum / latest_loss_count; + latest_loss_sum = 0.0; + latest_loss_count = 0; + cout << "loss: " << latest_loss; + if (latest_loss < best_loss) { + cout << " < best (" << best_loss << "), accepted" << endl; + best_loss = latest_loss; + best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name); + trials = newbob_num_trials; + } + else { + cout << " >= best (" << best_loss << "), rejected" << endl; + if (best_nn_directory.empty()) { + cout << "WARNING: no improvement from initial model" << endl; + } + else { + cout << "restoring parameters from " << best_nn_directory << endl; + Eval::NNUE::RestoreParameters(best_nn_directory); + } + if (--trials > 0 && !is_final) { + cout << "reducing learning rate scale from " << newbob_scale + << " to " << (newbob_scale * newbob_decay) + << " (" << trials << " more trials)" << endl; + newbob_scale *= newbob_decay; + Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); + } + } + if (trials == 0) { + cout << "converged" << endl; + return true; + } + } #endif - } - return false; -} - -// Shuffle_files(), shuffle_files_quick() subcontracting, writing part. -// output_file_name: Name of the file to write -// prng: random number -// afs: fstream of each teacher phase file -// a_count: The number of teacher positions inherent in each file. -void shuffle_write(const string& output_file_name , PRNG& prng , vector& afs , vector& a_count) -{ - uint64_t total_sfen_count = 0; - for (auto c : a_count) - total_sfen_count += c; - - // number of exported phases - uint64_t write_sfen_count = 0; - - // Output the progress on the screen for each phase. - const uint64_t buffer_size = 10000000; - - auto print_status = [&]() - { - // Output progress every 10M phase or when all writing is completed - if (((write_sfen_count % buffer_size) == 0) || - (write_sfen_count == total_sfen_count)) - cout << write_sfen_count << " / " << total_sfen_count << endl; - }; - - - cout << endl << "write : " << output_file_name << endl; - - fstream fs(output_file_name, ios::out | ios::binary); - - // total teacher positions - uint64_t sum = 0; - for (auto c : a_count) - sum += c; - - while (sum != 0) - { - auto r = prng.rand(sum); - - // Aspects stored in fs[0] file ... Aspects stored in fs[1] file ... - //Think of it as a series like, and determine in which file r is pointing. - // The contents of the file are shuffled, so you can take the next element from that file. - // Each file has a_count[x] phases, so this process can be written as follows. - - uint64_t n = 0; - while (a_count[n] <= r) - r -= a_count[n++]; - - // This confirms n. Before you forget it, reduce the remaining number. - - --a_count[n]; - --sum; - - PackedSfenValue psv; - // It's better to read and write all at once until the performance is not so good... - if (afs[n].read((char*)&psv, sizeof(PackedSfenValue))) - { - fs.write((char*)&psv, sizeof(PackedSfenValue)); - ++write_sfen_count; - print_status(); - } - } - print_status(); - fs.close(); - cout << "done!" << endl; -} - -// Subcontracting the teacher shuffle "learn shuffle" command. -// output_file_name: name of the output file where the shuffled teacher positions will be written -void shuffle_files(const vector& filenames , const string& output_file_name , uint64_t buffer_size ) -{ - // The destination folder is - // tmp/ for temporary writing - - // Temporary file is written to tmp/ folder for each buffer_size phase. - // For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB. - // In a PC with a small memory, it would be better to reduce this. - // However, if the number of files increases too much, it will not be possible to open at the same time due to OS restrictions. - // There should have been a limit of 512 per process on Windows, so you can open here as 500, - // The current setting is 500 files x 20M = 10G = 10 billion phases. - - PSVector buf; - buf.resize(buffer_size); - // ↑ buffer, a marker that indicates how much you have used - uint64_t buf_write_marker = 0; - - // File name to write (incremental counter because it is a serial number) - uint64_t write_file_count = 0; - - // random number to shuffle - // Do not use std::random_device(). Because it always the same integers on MinGW. - PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); - - // generate the name of the temporary file - auto make_filename = [](uint64_t i) - { - return "tmp/" + to_string(i) + ".bin"; - }; - - // Exported files in tmp/ folder, number of teacher positions stored in each - vector a_count; - - auto write_buffer = [&](uint64_t size) - { - // shuffle from buf[0] to buf[size-1] - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); - - // write to a file - fstream fs; - fs.open(make_filename(write_file_count++), ios::out | ios::binary); - fs.write((char*)&buf[0], size * sizeof(PackedSfenValue)); - fs.close(); - a_count.push_back(size); - - buf_write_marker = 0; - cout << "."; - }; - - Dependency::mkdir("tmp"); - - // Shuffle and export as a 10M phase shredded file. - for (auto filename : filenames) - { - fstream fs(filename, ios::in | ios::binary); - cout << endl << "open file = " << filename; - while (fs.read((char*)&buf[buf_write_marker], sizeof(PackedSfenValue))) - if (++buf_write_marker == buffer_size) - write_buffer(buffer_size); - - // Read in units of sizeof(PackedSfenValue), - // Ignore the last remaining fraction. (Fails in fs.read, so exit while) - // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.) - - } - - if (buf_write_marker != 0) - write_buffer(buf_write_marker); - - // Only shuffled files have been written write_file_count. - // As a second pass, if you open all of them at the same time, select one at random and load one phase at a time - // Now you have shuffled. - - // Original file for shirt full + tmp file + file to write requires 3 times the storage capacity of the original file. - // 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases. - // If you want to delete (or delete by hand) the original file at this point after writing to tmp, - // The storage capacity is about twice that of the original file. - // So, maybe we should have an option to delete the original file. - - // Files are opened at the same time. It is highly possible that this will exceed FOPEN_MAX. - // In that case, rather than adjusting buffer_size to reduce the number of files. - - vector afs; - for (uint64_t i = 0; i < write_file_count; ++i) - afs.emplace_back(fstream(make_filename(i),ios::in | ios::binary)); - - // Throw to the subcontract function and end. - shuffle_write(output_file_name, prng, afs, a_count); -} - -// Subcontracting the teacher shuffle "learn shuffleq" command. -// This is written in 1 pass. -// output_file_name: name of the output file where the shuffled teacher positions will be written -void shuffle_files_quick(const vector& filenames, const string& output_file_name) -{ - // number of phases read - uint64_t read_sfen_count = 0; - - // random number to shuffle - // Do not use std::random_device(). Because it always the same integers on MinGW. - PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); - - // number of files - size_t file_count = filenames.size(); - - // Number of teacher positions stored in each file in filenames - vector a_count(file_count); - - // Count the number of teacher aspects in each file. - vector afs(file_count); - - for (size_t i = 0; i & filenames,const string output_file_name) -{ - PSVector buf; - - for (auto filename : filenames) - { - std::cout << "read : " << filename << std::endl; - read_file_to_memory(filename, [&buf](uint64_t size) { - assert((size % sizeof(PackedSfenValue)) == 0); - // Expand the buffer and read after the last end. - uint64_t last = buf.size(); - buf.resize(last + size / sizeof(PackedSfenValue)); - return (void*)&buf[last]; - }); - } - - // shuffle from buf[0] to buf[size-1] - // Do not use std::random_device(). Because it always the same integers on MinGW. - PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); - uint64_t size = (uint64_t)buf.size(); - std::cout << "shuffle buf.size() = " << size << std::endl; - for (uint64_t i = 0; i < size; ++i) - swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); - - std::cout << "write : " << output_file_name << endl; - - // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper. - write_memory_to_file(output_file_name, (void*)&buf[0], (uint64_t)sizeof(PackedSfenValue)*(uint64_t)buf.size()); - - std::cout << "..shuffle_on_memory done." << std::endl; -} - -bool fen_is_ok(Position& pos, std::string input_fen) { - std::string pos_fen = pos.fen(); - std::istringstream ss_input(input_fen); - std::istringstream ss_pos(pos_fen); - - // example : "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3 w - h6 0 24" - // --> "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3" - std::string str_input, str_pos; - ss_input >> str_input; - ss_pos >> str_pos; - - // Only compare "Piece placement field" between input_fen and pos.fen(). - return str_input == str_pos; -} - -void convert_bin(const vector& filenames, const string& output_file_name, const int ply_minimum, const int ply_maximum, const int interpolate_eval, const bool check_invalid_fen, const bool check_illegal_move) -{ - std::cout << "check_invalid_fen=" << check_invalid_fen << std::endl; - std::cout << "check_illegal_move=" << check_illegal_move << std::endl; - - std::fstream fs; - uint64_t data_size=0; - uint64_t filtered_size = 0; - uint64_t filtered_size_fen = 0; - uint64_t filtered_size_move = 0; - uint64_t filtered_size_ply = 0; - auto th = Threads.main(); - auto &tpos = th->rootPos; - // convert plain rag to packed sfenvalue for Yaneura king - fs.open(output_file_name, ios::app | ios::binary); - StateListPtr states; - for (auto filename : filenames) { - std::cout << "convert " << filename << " ... "; - std::string line; - ifstream ifs; - ifs.open(filename); - PackedSfenValue p; - data_size = 0; - filtered_size = 0; - filtered_size_fen = 0; - filtered_size_move = 0; - filtered_size_ply = 0; - p.gamePly = 1; // Not included in apery format. Should be initialized - bool ignore_flag_fen = false; - bool ignore_flag_move = false; - bool ignore_flag_ply = false; - while (std::getline(ifs, line)) { - std::stringstream ss(line); - std::string token; - std::string value; - ss >> token; - if (token == "fen") { - states = StateListPtr(new std::deque(1)); // Drop old and create a new one - std::string input_fen = line.substr(4); - tpos.set(input_fen, false, &states->back(), Threads.main()); - if (check_invalid_fen && !fen_is_ok(tpos, input_fen)) { - ignore_flag_fen = true; - filtered_size_fen++; - } - else { - tpos.sfen_pack(p.sfen); - } - } - else if (token == "move") { - ss >> value; - Move move = UCI::to_move(tpos, value); - if (check_illegal_move && move == MOVE_NONE) { - ignore_flag_move = true; - filtered_size_move++; - } - else { - p.move = move; - } - } - else if (token == "score") { - double score; - ss >> score; - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - // Normalize to [0.0, 1.0]. - score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value); - // Scale to [dest_score_min_value, dest_score_max_value]. - score = score * (dest_score_max_value - dest_score_min_value) + dest_score_min_value; - p.score = Math::clamp((int32_t)std::round(score) , -(int32_t)VALUE_MATE , (int32_t)VALUE_MATE); - } - else if (token == "ply") { - int temp; - ss >> temp; - if(temp < ply_minimum || temp > ply_maximum){ - ignore_flag_ply = true; - filtered_size_ply++; - } - p.gamePly = uint16_t(temp); // No cast here? - if (interpolate_eval != 0){ - p.score = min(3000, interpolate_eval * temp); - } - } - else if (token == "result") { - int temp; - ss >> temp; - p.game_result = int8_t(temp); // Do you need a cast here? - if (interpolate_eval){ - p.score = p.score * p.game_result; - } - } - else if (token == "e") { - if (!(ignore_flag_fen || ignore_flag_move || ignore_flag_ply)) { - fs.write((char*)&p, sizeof(PackedSfenValue)); - data_size+=1; - // debug - // std::cout< 0.25 * PawnValueEg -// #-4 --> -mate_in(4) -// #3 --> mate_in(3) -// -M4 --> -mate_in(4) -// +M3 --> mate_in(3) -Value parse_score_from_pgn_extract(std::string eval, bool& success) { - success = true; - - if (eval.substr(0, 1) == "#") { - if (eval.substr(1, 1) == "-") { - return -mate_in(stoi(eval.substr(2, eval.length() - 2))); - } - else { - return mate_in(stoi(eval.substr(1, eval.length() - 1))); - } - } - else if (eval.substr(0, 2) == "-M") { - //std::cout << "eval=" << eval << std::endl; - return -mate_in(stoi(eval.substr(2, eval.length() - 2))); - } - else if (eval.substr(0, 2) == "+M") { - //std::cout << "eval=" << eval << std::endl; - return mate_in(stoi(eval.substr(2, eval.length() - 2))); - } - else { - char *endptr; - double value = strtod(eval.c_str(), &endptr); - - if (*endptr != '\0') { - success = false; - return VALUE_ZERO; - } - else { - return Value(value * static_cast(PawnValueEg)); - } - } -} - -// for Debug -//#define DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT - -bool is_like_fen(std::string fen) { - int count_space = std::count(fen.cbegin(), fen.cend(), ' '); - int count_slash = std::count(fen.cbegin(), fen.cend(), '/'); - -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - //std::cout << "count_space=" << count_space << std::endl; - //std::cout << "count_slash=" << count_slash << std::endl; -#endif - - return count_space == 5 && count_slash == 7; -} - -void convert_bin_from_pgn_extract(const vector& filenames, const string& output_file_name, const bool pgn_eval_side_to_move, const bool convert_no_eval_fens_as_score_zero) -{ - std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl; - std::cout << "convert_no_eval_fens_as_score_zero=" << convert_no_eval_fens_as_score_zero << std::endl; - - auto th = Threads.main(); - auto &pos = th->rootPos; - - std::fstream ofs; - ofs.open(output_file_name, ios::out | ios::binary); - - int game_count = 0; - int fen_count = 0; - - for (auto filename : filenames) { - std::cout << now_string() << " convert " << filename << std::endl; - ifstream ifs; - ifs.open(filename); - - int game_result = 0; - - std::string line; - while (std::getline(ifs, line)) { - - if (line.empty()) { - continue; - } - - else if (line.substr(0, 1) == "[") { - std::regex pattern_result(R"(\[Result (.+?)\])"); - std::smatch match; - - // example: [Result "1-0"] - if (std::regex_search(line, match, pattern_result)) { - game_result = parse_game_result_from_pgn_extract(match.str(1)); -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - std::cout << "game_result=" << game_result << std::endl; -#endif - game_count++; - if (game_count % 10000 == 0) { - std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; - } - } - - continue; - } - - else { - int gamePly = 1; - auto itr = line.cbegin(); - - while (true) { - gamePly++; - - PackedSfenValue psv; - memset((char*)&psv, 0, sizeof(PackedSfenValue)); - - // fen - { - bool fen_found = false; - - while (!fen_found) { - std::regex pattern_bracket(R"(\{(.+?)\})"); - std::smatch match; - if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { - break; - } - - itr += match.position(0) + match.length(0) - 1; - std::string str_fen = match.str(1); - trim(str_fen); - - if (is_like_fen(str_fen)) { - fen_found = true; - - StateInfo si; - pos.set(str_fen, false, &si, th); - pos.sfen_pack(psv.sfen); - } - -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - std::cout << "str_fen=" << str_fen << std::endl; - std::cout << "fen_found=" << fen_found << std::endl; -#endif - } - - if (!fen_found) { - break; - } - } - - // move - { - std::regex pattern_move(R"(\}(.+?)\{)"); - std::smatch match; - if (!std::regex_search(itr, line.cend(), match, pattern_move)) { - break; - } - - itr += match.position(0) + match.length(0) - 1; - std::string str_move = match.str(1); - trim(str_move); -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - std::cout << "str_move=" << str_move << std::endl; -#endif - psv.move = UCI::to_move(pos, str_move); - } - - // eval - bool eval_found = false; - { - std::regex pattern_bracket(R"(\{(.+?)\})"); - std::smatch match; - if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { - break; - } - - std::string str_eval_clk = match.str(1); - trim(str_eval_clk); -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - std::cout << "str_eval_clk=" << str_eval_clk << std::endl; -#endif - - // example: { [%eval 0.25] [%clk 0:10:00] } - // example: { [%eval #-4] [%clk 0:10:00] } - // example: { [%eval #3] [%clk 0:10:00] } - // example: { +0.71/22 1.2s } - // example: { -M4/7 0.003s } - // example: { M3/245 0.017s } - // example: { +M1/245 0.010s, White mates } - // example: { 0.60 } - // example: { book } - // example: { rnbqkb1r/pp3ppp/2p1pn2/3p4/2PP4/2N2N2/PP2PPPP/R1BQKB1R w KQkq - 0 5 } - - // Considering the absence of eval - if (!is_like_fen(str_eval_clk)) { - itr += match.position(0) + match.length(0) - 1; - - if (str_eval_clk != "book") { - std::regex pattern_eval1(R"(\[\%eval (.+?)\])"); - std::regex pattern_eval2(R"((.+?)\/)"); - - std::string str_eval; - if (std::regex_search(str_eval_clk, match, pattern_eval1) || - std::regex_search(str_eval_clk, match, pattern_eval2)) { - str_eval = match.str(1); - trim(str_eval); - } - else { - str_eval = str_eval_clk; - } - - bool success = false; - Value value = parse_score_from_pgn_extract(str_eval, success); - if (success) { - eval_found = true; - psv.score = Math::clamp(value, -VALUE_MATE , VALUE_MATE); - } - -#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT) - std::cout << "str_eval=" << str_eval << std::endl; - std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl; -#endif - } - } - } - - // write - if (eval_found || convert_no_eval_fens_as_score_zero) { - if (!eval_found && convert_no_eval_fens_as_score_zero) { - psv.score = 0; - } - - psv.gamePly = gamePly; - psv.game_result = game_result; - - if (pos.side_to_move() == BLACK) { - if (!pgn_eval_side_to_move) { - psv.score *= -1; - } - psv.game_result *= -1; - } - - ofs.write((char*)&psv, sizeof(PackedSfenValue)); - - fen_count++; - } - } - - game_result = 0; - } - } - } - - std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; - std::cout << now_string() << " all done" << std::endl; - ofs.close(); -} - -void convert_plain(const vector& filenames, const string& output_file_name) -{ - Position tpos; - std::ofstream ofs; - ofs.open(output_file_name, ios::app); - auto th = Threads.main(); - for (auto filename : filenames) { - std::cout << "convert " << filename << " ... "; - - // Just convert packedsfenvalue to text - std::fstream fs; - fs.open(filename, ios::in | ios::binary); - PackedSfenValue p; - while (true) - { - if (fs.read((char*)&p, sizeof(PackedSfenValue))) { - StateInfo si; - tpos.set_from_packed_sfen(p.sfen, &si, th, false); - - // write as plain text - ofs << "fen " << tpos.fen() << std::endl; - ofs << "move " << UCI::move(Move(p.move), false) << std::endl; - ofs << "score " << p.score << std::endl; - ofs << "ply " << int(p.gamePly) << std::endl; - ofs << "result " << int(p.game_result) << std::endl; - ofs << "e" << std::endl; - } - else { - break; - } - } - fs.close(); - std::cout << "done" << std::endl; - } - ofs.close(); - std::cout << "all done" << std::endl; -} - -// Learning from the generated game record -void learn(Position&, istringstream& is) -{ - auto thread_num = (int)Options["Threads"]; - SfenReader sr(thread_num); - - LearnerThink learn_think(sr); - vector filenames; - - // mini_batch_size 1M aspect by default. This can be increased. - auto mini_batch_size = LEARN_MINI_BATCH_SIZE; - - // Number of loops (read the game record file this number of times) - int loop = 1; - - // Game file storage folder (get game file with relative path from here) - string base_dir; - - string target_dir; - - // If 0, it will be the default value. - double eta1 = 0.0; - double eta2 = 0.0; - double eta3 = 0.0; - uint64_t eta1_epoch = 0; // eta2 is not applied by default - uint64_t eta2_epoch = 0; // eta3 is not applied by default + } + return false; + } + + // Shuffle_files(), shuffle_files_quick() subcontracting, writing part. + // output_file_name: Name of the file to write + // prng: random number + // afs: fstream of each teacher phase file + // a_count: The number of teacher positions inherent in each file. + void shuffle_write(const string& output_file_name, PRNG& prng, vector& afs, vector& a_count) + { + uint64_t total_sfen_count = 0; + for (auto c : a_count) + total_sfen_count += c; + + // number of exported phases + uint64_t write_sfen_count = 0; + + // Output the progress on the screen for each phase. + const uint64_t buffer_size = 10000000; + + auto print_status = [&]() + { + // Output progress every 10M phase or when all writing is completed + if (((write_sfen_count % buffer_size) == 0) || + (write_sfen_count == total_sfen_count)) + cout << write_sfen_count << " / " << total_sfen_count << endl; + }; + + + cout << endl << "write : " << output_file_name << endl; + + fstream fs(output_file_name, ios::out | ios::binary); + + // total teacher positions + uint64_t sum = 0; + for (auto c : a_count) + sum += c; + + while (sum != 0) + { + auto r = prng.rand(sum); + + // Aspects stored in fs[0] file ... Aspects stored in fs[1] file ... + //Think of it as a series like, and determine in which file r is pointing. + // The contents of the file are shuffled, so you can take the next element from that file. + // Each file has a_count[x] phases, so this process can be written as follows. + + uint64_t n = 0; + while (a_count[n] <= r) + r -= a_count[n++]; + + // This confirms n. Before you forget it, reduce the remaining number. + + --a_count[n]; + --sum; + + PackedSfenValue psv; + // It's better to read and write all at once until the performance is not so good... + if (afs[n].read((char*)&psv, sizeof(PackedSfenValue))) + { + fs.write((char*)&psv, sizeof(PackedSfenValue)); + ++write_sfen_count; + print_status(); + } + } + print_status(); + fs.close(); + cout << "done!" << endl; + } + + // Subcontracting the teacher shuffle "learn shuffle" command. + // output_file_name: name of the output file where the shuffled teacher positions will be written + void shuffle_files(const vector& filenames, const string& output_file_name, uint64_t buffer_size) + { + // The destination folder is + // tmp/ for temporary writing + + // Temporary file is written to tmp/ folder for each buffer_size phase. + // For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB. + // In a PC with a small memory, it would be better to reduce this. + // However, if the number of files increases too much, it will not be possible to open at the same time due to OS restrictions. + // There should have been a limit of 512 per process on Windows, so you can open here as 500, + // The current setting is 500 files x 20M = 10G = 10 billion phases. + + PSVector buf; + buf.resize(buffer_size); + // ↑ buffer, a marker that indicates how much you have used + uint64_t buf_write_marker = 0; + + // File name to write (incremental counter because it is a serial number) + uint64_t write_file_count = 0; + + // random number to shuffle + // Do not use std::random_device(). Because it always the same integers on MinGW. + PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); + + // generate the name of the temporary file + auto make_filename = [](uint64_t i) + { + return "tmp/" + to_string(i) + ".bin"; + }; + + // Exported files in tmp/ folder, number of teacher positions stored in each + vector a_count; + + auto write_buffer = [&](uint64_t size) + { + // shuffle from buf[0] to buf[size-1] + for (uint64_t i = 0; i < size; ++i) + swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + + // write to a file + fstream fs; + fs.open(make_filename(write_file_count++), ios::out | ios::binary); + fs.write((char*)&buf[0], size * sizeof(PackedSfenValue)); + fs.close(); + a_count.push_back(size); + + buf_write_marker = 0; + cout << "."; + }; + + Dependency::mkdir("tmp"); + + // Shuffle and export as a 10M phase shredded file. + for (auto filename : filenames) + { + fstream fs(filename, ios::in | ios::binary); + cout << endl << "open file = " << filename; + while (fs.read((char*)&buf[buf_write_marker], sizeof(PackedSfenValue))) + if (++buf_write_marker == buffer_size) + write_buffer(buffer_size); + + // Read in units of sizeof(PackedSfenValue), + // Ignore the last remaining fraction. (Fails in fs.read, so exit while) + // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.) + + } + + if (buf_write_marker != 0) + write_buffer(buf_write_marker); + + // Only shuffled files have been written write_file_count. + // As a second pass, if you open all of them at the same time, select one at random and load one phase at a time + // Now you have shuffled. + + // Original file for shirt full + tmp file + file to write requires 3 times the storage capacity of the original file. + // 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases. + // If you want to delete (or delete by hand) the original file at this point after writing to tmp, + // The storage capacity is about twice that of the original file. + // So, maybe we should have an option to delete the original file. + + // Files are opened at the same time. It is highly possible that this will exceed FOPEN_MAX. + // In that case, rather than adjusting buffer_size to reduce the number of files. + + vector afs; + for (uint64_t i = 0; i < write_file_count; ++i) + afs.emplace_back(fstream(make_filename(i), ios::in | ios::binary)); + + // Throw to the subcontract function and end. + shuffle_write(output_file_name, prng, afs, a_count); + } + + // Subcontracting the teacher shuffle "learn shuffleq" command. + // This is written in 1 pass. + // output_file_name: name of the output file where the shuffled teacher positions will be written + void shuffle_files_quick(const vector& filenames, const string& output_file_name) + { + // random number to shuffle + // Do not use std::random_device(). Because it always the same integers on MinGW. + PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); + + // number of files + size_t file_count = filenames.size(); + + // Number of teacher positions stored in each file in filenames + vector a_count(file_count); + + // Count the number of teacher aspects in each file. + vector afs(file_count); + + for (size_t i = 0; i < file_count; ++i) + { + auto filename = filenames[i]; + auto& fs = afs[i]; + + fs.open(filename, ios::in | ios::binary); + fs.seekg(0, fstream::end); + uint64_t eofPos = (uint64_t)fs.tellg(); + fs.clear(); // Otherwise, the next seek may fail. + fs.seekg(0, fstream::beg); + uint64_t begPos = (uint64_t)fs.tellg(); + uint64_t file_size = eofPos - begPos; + uint64_t sfen_count = file_size / sizeof(PackedSfenValue); + a_count[i] = sfen_count; + + // Output the number of sfen stored in each file. + cout << filename << " = " << sfen_count << " sfens." << endl; + } + + // Since we know the file size of each file, + // open them all at once (already open), + // Select one at a time and load one phase at a time + // Now you have shuffled. + + // Throw to the subcontract function and end. + shuffle_write(output_file_name, prng, afs, a_count); + } + + // Subcontracting the teacher shuffle "learn shufflem" command. + // Read the whole memory and write it out with the specified file name. + void shuffle_files_on_memory(const vector& filenames, const string output_file_name) + { + PSVector buf; + + for (auto filename : filenames) + { + std::cout << "read : " << filename << std::endl; + read_file_to_memory(filename, [&buf](uint64_t size) { + assert((size % sizeof(PackedSfenValue)) == 0); + // Expand the buffer and read after the last end. + uint64_t last = buf.size(); + buf.resize(last + size / sizeof(PackedSfenValue)); + return (void*)&buf[last]; + }); + } + + // shuffle from buf[0] to buf[size-1] + // Do not use std::random_device(). Because it always the same integers on MinGW. + PRNG prng(std::chrono::system_clock::now().time_since_epoch().count()); + uint64_t size = (uint64_t)buf.size(); + std::cout << "shuffle buf.size() = " << size << std::endl; + for (uint64_t i = 0; i < size; ++i) + swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]); + + std::cout << "write : " << output_file_name << endl; + + // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper. + write_memory_to_file(output_file_name, (void*)&buf[0], (uint64_t)sizeof(PackedSfenValue) * (uint64_t)buf.size()); + + std::cout << "..shuffle_on_memory done." << std::endl; + } + + // Learning from the generated game record + void learn(Position&, istringstream& is) + { + auto thread_num = (int)Options["Threads"]; + SfenReader sr(thread_num); + + LearnerThink learn_think(sr); + vector filenames; + + // mini_batch_size 1M aspect by default. This can be increased. + auto mini_batch_size = LEARN_MINI_BATCH_SIZE; + + // Number of loops (read the game record file this number of times) + int loop = 1; + + // Game file storage folder (get game file with relative path from here) + string base_dir; + + string target_dir; + + // If 0, it will be the default value. + double eta1 = 0.0; + double eta2 = 0.0; + double eta3 = 0.0; + uint64_t eta1_epoch = 0; // eta2 is not applied by default + uint64_t eta2_epoch = 0; // eta3 is not applied by default #if defined(USE_GLOBAL_OPTIONS) - // Save it for later restore. - auto oldGlobalOptions = GlobalOptions; - // If you hit the eval hash, you can not calculate rmse etc. so turn it off. - GlobalOptions.use_eval_hash = false; - // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off. - GlobalOptions.use_hash_probe = false; + // Save it for later restore. + auto oldGlobalOptions = GlobalOptions; + // If you hit the eval hash, you can not calculate rmse etc. so turn it off. + GlobalOptions.use_eval_hash = false; + // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off. + GlobalOptions.use_hash_probe = false; #endif - // --- Function that only shuffles the teacher aspect + // --- Function that only shuffles the teacher aspect - // normal shuffle - bool shuffle_normal = false; - uint64_t buffer_size = 20000000; - // fast shuffling assuming each file is shuffled - bool shuffle_quick = false; - // A function to read the entire file in memory and shuffle it. (Requires file size memory) - bool shuffle_on_memory = false; - // Conversion of packed sfen. In plain, it consists of sfen(string), evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0) - bool use_convert_plain = false; - // convert plain format teacher to Yaneura King's bin - bool use_convert_bin = false; - int ply_minimum = 0; - int ply_maximum = 114514; - bool interpolate_eval = 0; - bool check_invalid_fen = false; - bool check_illegal_move = false; - // convert teacher in pgn-extract format to Yaneura King's bin - bool use_convert_bin_from_pgn_extract = false; - bool pgn_eval_side_to_move = false; - bool convert_no_eval_fens_as_score_zero = false; - // File name to write in those cases (default is "shuffled_sfen.bin") - string output_file_name = "shuffled_sfen.bin"; + // normal shuffle + bool shuffle_normal = false; + uint64_t buffer_size = 20000000; + // fast shuffling assuming each file is shuffled + bool shuffle_quick = false; + // A function to read the entire file in memory and shuffle it. (Requires file size memory) + bool shuffle_on_memory = false; + // Conversion of packed sfen. In plain, it consists of sfen(string), evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0) + bool use_convert_plain = false; + // convert plain format teacher to Yaneura King's bin + bool use_convert_bin = false; + int ply_minimum = 0; + int ply_maximum = 114514; + bool interpolate_eval = 0; + bool check_invalid_fen = false; + bool check_illegal_move = false; + // convert teacher in pgn-extract format to Yaneura King's bin + bool use_convert_bin_from_pgn_extract = false; + bool pgn_eval_side_to_move = false; + bool convert_no_eval_fens_as_score_zero = false; + // File name to write in those cases (default is "shuffled_sfen.bin") + string output_file_name = "shuffled_sfen.bin"; - // If the absolute value of the evaluation value in the deep search of the teacher phase exceeds this value, that phase is discarded. - int eval_limit = 32000; + // If the absolute value of the evaluation value in the deep search of the teacher phase exceeds this value, that phase is discarded. + int eval_limit = 32000; - // Flag to save the evaluation function file only once near the end. - bool save_only_once = false; + // Flag to save the evaluation function file only once near the end. + bool save_only_once = false; - // Shuffle about what you are pre-reading on the teacher aspect. (Shuffle of about 10 million phases) - // Turn on if you want to pass a pre-shuffled file. - bool no_shuffle = false; + // Shuffle about what you are pre-reading on the teacher aspect. (Shuffle of about 10 million phases) + // Turn on if you want to pass a pre-shuffled file. + bool no_shuffle = false; #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - // elmo lambda - ELMO_LAMBDA = 0.33; - ELMO_LAMBDA2 = 0.33; - ELMO_LAMBDA_LIMIT = 32000; + // elmo lambda + ELMO_LAMBDA = 0.33; + ELMO_LAMBDA2 = 0.33; + ELMO_LAMBDA_LIMIT = 32000; #endif - // Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate) - double discount_rate = 0; + // Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate) + double discount_rate = 0; - // if (gamePly freeze = {}; + // Optional item that does not let you learn KK/KKP/KPP/KPPP + array freeze = {}; #if defined(EVAL_NNUE) - uint64_t nn_batch_size = 1000; - double newbob_decay = 1.0; - int newbob_num_trials = 2; - string nn_options; + uint64_t nn_batch_size = 1000; + double newbob_decay = 1.0; + int newbob_num_trials = 2; + string nn_options; #endif - uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL; - uint64_t loss_output_interval = 0; - uint64_t mirror_percentage = 0; + uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL; + uint64_t loss_output_interval = 0; + uint64_t mirror_percentage = 0; - string validation_set_file_name; + string validation_set_file_name; - // Assume the filenames are staggered. - while (true) - { - string option; - is >> option; + // Assume the filenames are staggered. + while (true) + { + string option; + is >> option; - if (option == "") - break; + if (option == "") + break; - // specify the number of phases of mini-batch - if (option == "bat") - { - is >> mini_batch_size; - mini_batch_size *= 10000; // Unit is ten thousand - } + // specify the number of phases of mini-batch + if (option == "bat") + { + is >> mini_batch_size; + mini_batch_size *= 10000; // Unit is ten thousand + } - // Specify the folder in which the game record is stored and make it the rooting target. - else if (option == "targetdir") is >> target_dir; + // Specify the folder in which the game record is stored and make it the rooting target. + else if (option == "targetdir") is >> target_dir; - // Specify the number of loops - else if (option == "loop") is >> loop; + // Specify the number of loops + else if (option == "loop") is >> loop; - // Game file storage folder (get game file with relative path from here) - else if (option == "basedir") is >> base_dir; + // Game file storage folder (get game file with relative path from here) + else if (option == "basedir") is >> base_dir; - // Mini batch size - else if (option == "batchsize") is >> mini_batch_size; + // Mini batch size + else if (option == "batchsize") is >> mini_batch_size; - // learning rate - else if (option == "eta") is >> eta1; - else if (option == "eta1") is >> eta1; // alias - else if (option == "eta2") is >> eta2; - else if (option == "eta3") is >> eta3; - else if (option == "eta1_epoch") is >> eta1_epoch; - else if (option == "eta2_epoch") is >> eta2_epoch; - // Accept also the old option name. - else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> use_draw_games_in_training; - // Accept also the old option name. - else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> use_draw_games_in_validation; - // Accept also the old option name. - else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training; - else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; - // Discount rate - else if (option == "discount_rate") is >> discount_rate; - // Using WDL with win rate model instead of sigmoid - else if (option == "use_wdl") is >> use_wdl; + // learning rate + else if (option == "eta") is >> eta1; + else if (option == "eta1") is >> eta1; // alias + else if (option == "eta2") is >> eta2; + else if (option == "eta3") is >> eta3; + else if (option == "eta1_epoch") is >> eta1_epoch; + else if (option == "eta2_epoch") is >> eta2_epoch; + // Accept also the old option name. + else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> use_draw_games_in_training; + // Accept also the old option name. + else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> use_draw_games_in_validation; + // Accept also the old option name. + else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training; + else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; + // Discount rate + else if (option == "discount_rate") is >> discount_rate; + // Using WDL with win rate model instead of sigmoid + else if (option == "use_wdl") is >> use_wdl; - // No learning of KK/KKP/KPP/KPPP. - else if (option == "freeze_kk") is >> freeze[0]; - else if (option == "freeze_kkp") is >> freeze[1]; - else if (option == "freeze_kpp") is >> freeze[2]; + // No learning of KK/KKP/KPP/KPPP. + else if (option == "freeze_kk") is >> freeze[0]; + else if (option == "freeze_kkp") is >> freeze[1]; + else if (option == "freeze_kpp") is >> freeze[2]; #if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA) #elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES) - else if (option == "freeze_kppp") is >> freeze[3]; + else if (option == "freeze_kppp") is >> freeze[3]; #elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT) - else if (option == "freeze_kkpp") is >> freeze[3]; + else if (option == "freeze_kkpp") is >> freeze[3]; #endif #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - // LAMBDA - else if (option == "lambda") is >> ELMO_LAMBDA; - else if (option == "lambda2") is >> ELMO_LAMBDA2; - else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT; + // LAMBDA + else if (option == "lambda") is >> ELMO_LAMBDA; + else if (option == "lambda2") is >> ELMO_LAMBDA2; + else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT; #endif - else if (option == "reduction_gameply") is >> reduction_gameply; + else if (option == "reduction_gameply") is >> reduction_gameply; - // shuffle related - else if (option == "shuffle") shuffle_normal = true; - else if (option == "buffer_size") is >> buffer_size; - else if (option == "shuffleq") shuffle_quick = true; - else if (option == "shufflem") shuffle_on_memory = true; - else if (option == "output_file_name") is >> output_file_name; + // shuffle related + else if (option == "shuffle") shuffle_normal = true; + else if (option == "buffer_size") is >> buffer_size; + else if (option == "shuffleq") shuffle_quick = true; + else if (option == "shufflem") shuffle_on_memory = true; + else if (option == "output_file_name") is >> output_file_name; - else if (option == "eval_limit") is >> eval_limit; - else if (option == "save_only_once") save_only_once = true; - else if (option == "no_shuffle") no_shuffle = true; + else if (option == "eval_limit") is >> eval_limit; + else if (option == "save_only_once") save_only_once = true; + else if (option == "no_shuffle") no_shuffle = true; #if defined(EVAL_NNUE) - else if (option == "nn_batch_size") is >> nn_batch_size; - else if (option == "newbob_decay") is >> newbob_decay; - else if (option == "newbob_num_trials") is >> newbob_num_trials; - else if (option == "nn_options") is >> nn_options; + else if (option == "nn_batch_size") is >> nn_batch_size; + else if (option == "newbob_decay") is >> newbob_decay; + else if (option == "newbob_num_trials") is >> newbob_num_trials; + else if (option == "nn_options") is >> nn_options; #endif - else if (option == "eval_save_interval") is >> eval_save_interval; - else if (option == "loss_output_interval") is >> loss_output_interval; - else if (option == "mirror_percentage") is >> mirror_percentage; - else if (option == "validation_set_file_name") is >> validation_set_file_name; + else if (option == "eval_save_interval") is >> eval_save_interval; + else if (option == "loss_output_interval") is >> loss_output_interval; + else if (option == "mirror_percentage") is >> mirror_percentage; + else if (option == "validation_set_file_name") is >> validation_set_file_name; - // Rabbit convert related - else if (option == "convert_plain") use_convert_plain = true; - else if (option == "convert_bin") use_convert_bin = true; - else if (option == "interpolate_eval") is >> interpolate_eval; - else if (option == "check_invalid_fen") is >> check_invalid_fen; - else if (option == "check_illegal_move") is >> check_illegal_move; - else if (option == "convert_bin_from_pgn-extract") use_convert_bin_from_pgn_extract = true; - else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move; - else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero; - else if (option == "src_score_min_value") is >> src_score_min_value; - else if (option == "src_score_max_value") is >> src_score_max_value; - else if (option == "dest_score_min_value") is >> dest_score_min_value; - else if (option == "dest_score_max_value") is >> dest_score_max_value; - else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability; - else if (option == "use_raw_nnue_eval") is >> use_raw_nnue_eval; + // Rabbit convert related + else if (option == "convert_plain") use_convert_plain = true; + else if (option == "convert_bin") use_convert_bin = true; + else if (option == "interpolate_eval") is >> interpolate_eval; + else if (option == "check_invalid_fen") is >> check_invalid_fen; + else if (option == "check_illegal_move") is >> check_illegal_move; + else if (option == "convert_bin_from_pgn-extract") use_convert_bin_from_pgn_extract = true; + else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move; + else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero; + else if (option == "src_score_min_value") is >> src_score_min_value; + else if (option == "src_score_max_value") is >> src_score_max_value; + else if (option == "dest_score_min_value") is >> dest_score_min_value; + else if (option == "dest_score_max_value") is >> dest_score_max_value; + else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability; + else if (option == "use_raw_nnue_eval") is >> use_raw_nnue_eval; - // Otherwise, it's a filename. - else - filenames.push_back(option); - } - if (loss_output_interval == 0) - loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; + // Otherwise, it's a filename. + else + filenames.push_back(option); + } + if (loss_output_interval == 0) + loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; - cout << "learn command , "; + cout << "learn command , "; - // Issue a warning if OpenMP is disabled. + // Issue a warning if OpenMP is disabled. #if !defined(_OPENMP) - cout << "Warning! OpenMP disabled." << endl; + cout << "Warning! OpenMP disabled." << endl; #endif - // Display learning game file - if (target_dir != "") - { - string kif_base_dir = Path::Combine(base_dir, target_dir); + // Display learning game file + if (target_dir != "") + { + string kif_base_dir = Path::Combine(base_dir, target_dir); - // Remove this folder. Keep it relative to base_dir. + // Remove this folder. Keep it relative to base_dir. #if defined(_MSC_VER) - // If you use std::tr2, warning C4996 will appear, so suppress it. - // * std::tr2 issued a deprecation warning by default under std:c++14, and was deleted by default in /std:c++17. - #pragma warning(push) - #pragma warning(disable:4996) + // If you use std::tr2, warning C4996 will appear, so suppress it. + // * std::tr2 issued a deprecation warning by default under std:c++14, and was deleted by default in /std:c++17. +#pragma warning(push) +#pragma warning(disable:4996) - namespace sys = std::filesystem; - sys::path p(kif_base_dir); // Origin of enumeration - std::for_each(sys::directory_iterator(p), sys::directory_iterator(), - [&](const sys::path& p) { - if (sys::is_regular_file(p)) - filenames.push_back(Path::Combine(target_dir, p.filename().generic_string())); - }); - #pragma warning(pop) + namespace sys = std::filesystem; + sys::path p(kif_base_dir); // Origin of enumeration + std::for_each(sys::directory_iterator(p), sys::directory_iterator(), + [&](const sys::path& p) { + if (sys::is_regular_file(p)) + filenames.push_back(Path::Combine(target_dir, p.filename().generic_string())); + }); +#pragma warning(pop) #elif defined(__GNUC__) - auto ends_with = [](std::string const & value, std::string const & ending) - { - if (ending.size() > value.size()) return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); - }; + auto ends_with = [](std::string const& value, std::string const& ending) + { + if (ending.size() > value.size()) return false; + return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); + }; - // It can't be helped, so read it using dirent.h. - DIR *dp; // pointer to directory - dirent* entry; // entry point returned by readdir() + // It can't be helped, so read it using dirent.h. + DIR* dp; // pointer to directory + dirent* entry; // entry point returned by readdir() - dp = opendir(kif_base_dir.c_str()); - if (dp != NULL) - { - do { - entry = readdir(dp); - // Only list files ending with ".bin" - // →I hate this restriction when generating files with serial numbers... - if (entry != NULL && ends_with(entry->d_name, ".bin") ) - { - //cout << entry->d_name << endl; - filenames.push_back(Path::Combine(target_dir, entry->d_name)); - } - } while (entry != NULL); - closedir(dp); - } + dp = opendir(kif_base_dir.c_str()); + if (dp != NULL) + { + do { + entry = readdir(dp); + // Only list files ending with ".bin" + // →I hate this restriction when generating files with serial numbers... + if (entry != NULL && ends_with(entry->d_name, ".bin")) + { + //cout << entry->d_name << endl; + filenames.push_back(Path::Combine(target_dir, entry->d_name)); + } + } while (entry != NULL); + closedir(dp); + } #endif - } + } - cout << "learn from "; - for (auto s : filenames) - cout << s << " , "; - cout << endl; - if (!validation_set_file_name.empty()) - { - cout << "validation set : " << validation_set_file_name << endl; - } + cout << "learn from "; + for (auto s : filenames) + cout << s << " , "; + cout << endl; + if (!validation_set_file_name.empty()) + { + cout << "validation set : " << validation_set_file_name << endl; + } - cout << "base dir : " << base_dir << endl; - cout << "target dir : " << target_dir << endl; + cout << "base dir : " << base_dir << endl; + cout << "target dir : " << target_dir << endl; - // shuffle mode - if (shuffle_normal) - { - cout << "buffer_size : " << buffer_size << endl; - cout << "shuffle mode.." << endl; - shuffle_files(filenames,output_file_name , buffer_size); - return; - } - if (shuffle_quick) - { - cout << "quick shuffle mode.." << endl; - shuffle_files_quick(filenames, output_file_name); - return; - } - if (shuffle_on_memory) - { - cout << "shuffle on memory.." << endl; - shuffle_files_on_memory(filenames,output_file_name); - return; - } - if (use_convert_plain) - { - Eval::init_NNUE(); - cout << "convert_plain.." << endl; - convert_plain(filenames, output_file_name); - return; - } - if (use_convert_bin) - { - Eval::init_NNUE(); - cout << "convert_bin.." << endl; - convert_bin(filenames,output_file_name, ply_minimum, ply_maximum, interpolate_eval, check_invalid_fen, check_illegal_move); - return; + // shuffle mode + if (shuffle_normal) + { + cout << "buffer_size : " << buffer_size << endl; + cout << "shuffle mode.." << endl; + shuffle_files(filenames, output_file_name, buffer_size); + return; + } + if (shuffle_quick) + { + cout << "quick shuffle mode.." << endl; + shuffle_files_quick(filenames, output_file_name); + return; + } + if (shuffle_on_memory) + { + cout << "shuffle on memory.." << endl; + shuffle_files_on_memory(filenames, output_file_name); + return; + } + if (use_convert_plain) + { + Eval::init_NNUE(); + cout << "convert_plain.." << endl; + convert_plain(filenames, output_file_name); + return; + } + if (use_convert_bin) + { + Eval::init_NNUE(); + cout << "convert_bin.." << endl; + convert_bin( + filenames, + output_file_name, + ply_minimum, + ply_maximum, + interpolate_eval, + src_score_min_value, + src_score_max_value, + dest_score_min_value, + dest_score_max_value, + check_invalid_fen, + check_illegal_move); + return; - } - if (use_convert_bin_from_pgn_extract) - { - Eval::init_NNUE(); - cout << "convert_bin_from_pgn-extract.." << endl; - convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move, convert_no_eval_fens_as_score_zero); - return; - } + } + if (use_convert_bin_from_pgn_extract) + { + Eval::init_NNUE(); + cout << "convert_bin_from_pgn-extract.." << endl; + convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move, convert_no_eval_fens_as_score_zero); + return; + } - cout << "loop : " << loop << endl; - cout << "eval_limit : " << eval_limit << endl; - cout << "save_only_once : " << (save_only_once ? "true" : "false") << endl; - cout << "no_shuffle : " << (no_shuffle ? "true" : "false") << endl; + cout << "loop : " << loop << endl; + cout << "eval_limit : " << eval_limit << endl; + cout << "save_only_once : " << (save_only_once ? "true" : "false") << endl; + cout << "no_shuffle : " << (no_shuffle ? "true" : "false") << endl; - // Insert the file name for the number of loops. - for (int i = 0; i < loop; ++i) - // sfen reader, I'll read it in reverse order so I'll reverse it here. I'm sorry. - for (auto it = filenames.rbegin(); it != filenames.rend(); ++it) - sr.filenames.push_back(Path::Combine(base_dir, *it)); + // Insert the file name for the number of loops. + for (int i = 0; i < loop; ++i) + // sfen reader, I'll read it in reverse order so I'll reverse it here. I'm sorry. + for (auto it = filenames.rbegin(); it != filenames.rend(); ++it) + sr.filenames.push_back(Path::Combine(base_dir, *it)); #if !defined(EVAL_NNUE) - cout << "Gradient Method : " << LEARN_UPDATE << endl; + cout << "Gradient Method : " << LEARN_UPDATE << endl; #endif - cout << "Loss Function : " << LOSS_FUNCTION << endl; - cout << "mini-batch size : " << mini_batch_size << endl; + cout << "Loss Function : " << LOSS_FUNCTION << endl; + cout << "mini-batch size : " << mini_batch_size << endl; #if defined(EVAL_NNUE) - cout << "nn_batch_size : " << nn_batch_size << endl; - cout << "nn_options : " << nn_options << endl; + cout << "nn_batch_size : " << nn_batch_size << endl; + cout << "nn_options : " << nn_options << endl; #endif - cout << "learning rate : " << eta1 << " , " << eta2 << " , " << eta3 << endl; - cout << "eta_epoch : " << eta1_epoch << " , " << eta2_epoch << endl; - cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; - cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; - cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; + cout << "learning rate : " << eta1 << " , " << eta2 << " , " << eta3 << endl; + cout << "eta_epoch : " << eta1_epoch << " , " << eta2_epoch << endl; + cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; + cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; + cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; #if defined(EVAL_NNUE) - if (newbob_decay != 1.0) { - cout << "scheduling : newbob with decay = " << newbob_decay - << ", " << newbob_num_trials << " trials" << endl; - } else { - cout << "scheduling : default" << endl; - } + if (newbob_decay != 1.0) { + cout << "scheduling : newbob with decay = " << newbob_decay + << ", " << newbob_num_trials << " trials" << endl; + } + else { + cout << "scheduling : default" << endl; + } #endif - cout << "discount rate : " << discount_rate << endl; + cout << "discount rate : " << discount_rate << endl; - // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. - reduction_gameply = max(reduction_gameply, 1); - cout << "reduction_gameply : " << reduction_gameply << endl; + // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. + reduction_gameply = max(reduction_gameply, 1); + cout << "reduction_gameply : " << reduction_gameply << endl; #if defined (LOSS_FUNCTION_IS_ELMO_METHOD) - cout << "LAMBDA : " << ELMO_LAMBDA << endl; - cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; - cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; + cout << "LAMBDA : " << ELMO_LAMBDA << endl; + cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; + cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; #endif - cout << "mirror_percentage : " << mirror_percentage << endl; - cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; - cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; + cout << "mirror_percentage : " << mirror_percentage << endl; + cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; + cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; #if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA) - cout << "freeze_kk/kkp/kpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl; + cout << "freeze_kk/kkp/kpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl; #elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES) - cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; + cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; #elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT) - cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; + cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl; #endif - // ----------------------------------- - // various initialization - // ----------------------------------- + // ----------------------------------- + // various initialization + // ----------------------------------- - cout << "init.." << endl; + cout << "init.." << endl; - // Read evaluation function parameters - Eval::init_NNUE(); + // Read evaluation function parameters + Eval::init_NNUE(); #if !defined(EVAL_NNUE) - cout << "init_grad.." << endl; + cout << "init_grad.." << endl; - // Initialize gradient array of merit function parameters - Eval::init_grad(eta1,eta1_epoch,eta2,eta2_epoch,eta3); + // Initialize gradient array of merit function parameters + Eval::init_grad(eta1, eta1_epoch, eta2, eta2_epoch, eta3); #else - cout << "init_training.." << endl; - Eval::NNUE::InitializeTraining(eta1,eta1_epoch,eta2,eta2_epoch,eta3); - Eval::NNUE::SetBatchSize(nn_batch_size); - Eval::NNUE::SetOptions(nn_options); - if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { - learn_think.best_nn_directory = std::string(Options["EvalDir"]); - } + cout << "init_training.." << endl; + Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3); + Eval::NNUE::SetBatchSize(nn_batch_size); + Eval::NNUE::SetOptions(nn_options); + if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { + learn_think.best_nn_directory = std::string(Options["EvalDir"]); + } #endif #if 0 - // A test to give a gradient of 1.0 to the initial stage of Hirate. - pos.set_hirate(); - cout << Eval::evaluate(pos) << endl; - //Eval::print_eval_stat(pos); - Eval::add_grad(pos, BLACK, 32.0 , false); - Eval::update_weights(1); - pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED; - cout << Eval::evaluate(pos) << endl; - //Eval::print_eval_stat(pos); + // A test to give a gradient of 1.0 to the initial stage of Hirate. + pos.set_hirate(); + cout << Eval::evaluate(pos) << endl; + //Eval::print_eval_stat(pos); + Eval::add_grad(pos, BLACK, 32.0, false); + Eval::update_weights(1); + pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED; + cout << Eval::evaluate(pos) << endl; + //Eval::print_eval_stat(pos); #endif - cout << "init done." << endl; + cout << "init done." << endl; - // Reflect other option settings. - learn_think.discount_rate = discount_rate; - learn_think.eval_limit = eval_limit; - learn_think.save_only_once = save_only_once; - learn_think.sr.no_shuffle = no_shuffle; - learn_think.freeze = freeze; - learn_think.reduction_gameply = reduction_gameply; + // Reflect other option settings. + learn_think.discount_rate = discount_rate; + learn_think.eval_limit = eval_limit; + learn_think.save_only_once = save_only_once; + learn_think.sr.no_shuffle = no_shuffle; + learn_think.freeze = freeze; + learn_think.reduction_gameply = reduction_gameply; #if defined(EVAL_NNUE) - learn_think.newbob_scale = 1.0; - learn_think.newbob_decay = newbob_decay; - learn_think.newbob_num_trials = newbob_num_trials; + learn_think.newbob_scale = 1.0; + learn_think.newbob_decay = newbob_decay; + learn_think.newbob_num_trials = newbob_num_trials; #endif - learn_think.eval_save_interval = eval_save_interval; - learn_think.loss_output_interval = loss_output_interval; - learn_think.mirror_percentage = mirror_percentage; + learn_think.eval_save_interval = eval_save_interval; + learn_think.loss_output_interval = loss_output_interval; + learn_think.mirror_percentage = mirror_percentage; - // Start a thread that loads the phase file in the background - // (If this is not started, mse cannot be calculated.) - learn_think.start_file_read_worker(); + // Start a thread that loads the phase file in the background + // (If this is not started, mse cannot be calculated.) + learn_think.start_file_read_worker(); - learn_think.mini_batch_size = mini_batch_size; + learn_think.mini_batch_size = mini_batch_size; - if (validation_set_file_name.empty()) { - // Get about 10,000 data for mse calculation. - sr.read_for_mse(); - } else { - sr.read_validation_set(validation_set_file_name, eval_limit); - } + if (validation_set_file_name.empty()) { + // Get about 10,000 data for mse calculation. + sr.read_for_mse(); + } + else { + sr.read_validation_set(validation_set_file_name, eval_limit); + } - // Calculate rmse once at this point (timing of 0 sfen) - // sr.calc_rmse(); + // Calculate rmse once at this point (timing of 0 sfen) + // sr.calc_rmse(); #if defined(EVAL_NNUE) - if (newbob_decay != 1.0) { - learn_think.calc_loss(0, -1); - learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count; - learn_think.latest_loss_sum = 0.0; - learn_think.latest_loss_count = 0; - cout << "initial loss: " << learn_think.best_loss << endl; - } + if (newbob_decay != 1.0) { + learn_think.calc_loss(0, -1); + learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count; + learn_think.latest_loss_sum = 0.0; + learn_think.latest_loss_count = 0; + cout << "initial loss: " << learn_think.best_loss << endl; + } #endif - // ----------------------------------- - // start learning evaluation function parameters - // ----------------------------------- + // ----------------------------------- + // start learning evaluation function parameters + // ----------------------------------- - // Start learning. - learn_think.go_think(); + // Start learning. + learn_think.go_think(); - // Save once at the end. - learn_think.save(true); + // Save once at the end. + learn_think.save(true); #if defined(USE_GLOBAL_OPTIONS) - // Restore Global Options. - GlobalOptions = oldGlobalOptions; + // Restore Global Options. + GlobalOptions = oldGlobalOptions; #endif -} + } } // namespace Learner diff --git a/src/learn/multi_think.h b/src/learn/multi_think.h index 6e6c695c..6225144c 100644 --- a/src/learn/multi_think.h +++ b/src/learn/multi_think.h @@ -11,12 +11,15 @@ #include "../thread_win32_osx.h" #include +#include // Learning from a game record, when making yourself think and generating a fixed track, etc. // Helper class used when multiple threads want to call Search::think() individually. // Derive and use this class. struct MultiThink { + static constexpr std::uint64_t LOOP_COUNT_FINISHED = std::numeric_limits::max(); + MultiThink() : prng(std::chrono::system_clock::now().time_since_epoch().count()) { loop_count = 0; @@ -62,7 +65,7 @@ struct MultiThink uint64_t get_next_loop_count() { std::unique_lock lk(loop_mutex); if (loop_count >= loop_max) - return UINT64_MAX; + return LOOP_COUNT_FINISHED; return loop_count++; }