From 0636e1256d09edde22df3bc75207d75c24b6f2fa Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Sun, 18 Oct 2020 14:51:38 +0200 Subject: [PATCH] Add cyclic mode to the sfen reader. Make sfen reader take all files at construction --- src/learn/learn.cpp | 44 ++++++++++++++++++-------------- src/learn/sfen_reader.h | 56 ++++++++++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 34 deletions(-) diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index 32aa986f..57dbeb63 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -381,9 +381,13 @@ namespace Learner // move match rate, simple comparison is not possible... static constexpr uint64_t sfen_for_mse_size = 2000; - LearnerThink(uint64_t thread_num, const std::string& seed) : + LearnerThink( + const std::vector& filenames, + uint64_t thread_num, + const std::string& seed + ) : prng(seed), - sr(thread_num, std::to_string(prng.next_random_seed())), + sr(filenames, SfenReaderMode::Sequential, thread_num, std::to_string(prng.next_random_seed())), learn_loss_sum{} { save_only_once = false; @@ -404,11 +408,6 @@ namespace Learner sr.set_do_shuffle(v); } - void add_file(const std::string& filename) - { - sr.add_file(filename); - } - void learn(); @@ -1095,11 +1094,26 @@ namespace Learner // Right now we only have the individual files. // We need to apply base_dir here - rebase_files(filenames, base_dir); if (!target_dir.empty()) { append_files_from_dir(filenames, base_dir, target_dir); } + rebase_files(filenames, base_dir); + + // Insert the file name for the number of loops. + { + std::vector filenamesTimesLoop; + + for (int i = 0; i < loop; ++i) + { + for(auto& file : filenames) + { + filenamesTimesLoop.emplace_back(file); + } + } + + filenames = std::move(filenamesTimesLoop); + } cout << "learn from "; for (auto s : filenames) @@ -1154,8 +1168,6 @@ namespace Learner cout << "init.." << endl; - LearnerThink learn_think(thread_num, seed); - Threads.main()->ponder = false; set_learning_search_limits(); @@ -1164,6 +1176,9 @@ namespace Learner Eval::NNUE::initialize_training(seed); Eval::NNUE::set_batch_size(nn_batch_size); Eval::NNUE::set_options(nn_options); + + LearnerThink learn_think(filenames, thread_num, seed); + if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { // Save the current net to [EvalSaveDir]\original. Eval::NNUE::save_eval("original"); @@ -1190,15 +1205,6 @@ namespace Learner learn_think.mini_batch_size = mini_batch_size; learn_think.validation_set_file_name = validation_set_file_name; - // Insert the file name for the number of loops. - for (int i = 0; i < loop; ++i) - { - for(auto& file : filenames) - { - learn_think.add_file(Path::combine(base_dir, file)); - } - } - cout << "init done." << endl; // Start learning. diff --git a/src/learn/sfen_reader.h b/src/learn/sfen_reader.h index 38c2532c..1ba9bd3b 100644 --- a/src/learn/sfen_reader.h +++ b/src/learn/sfen_reader.h @@ -18,6 +18,12 @@ namespace Learner{ + enum struct SfenReaderMode + { + Sequential, + Cyclic + }; + // Sfen reader struct SfenReader { @@ -32,7 +38,14 @@ namespace Learner{ // Do not use std::random_device(). // Because it always the same integers on MinGW. - SfenReader(int thread_num, const std::string& seed) : + SfenReader( + const std::vector& filenames_, + SfenReaderMode mode_, + int thread_num, + const std::string& seed + ) : + filenames(filenames_.begin(), filenames_.end()), + mode(mode_), prng(seed) { packed_sfens.resize(thread_num); @@ -173,6 +186,9 @@ namespace Learner{ void file_read_worker() { + std::string currentFilename; + uint64_t numEntriesReadFromCurrentFile = 0; + auto open_next_file = [&]() { // no more for(;;) @@ -183,18 +199,20 @@ namespace Learner{ return false; // Get the next file name. - std::string filename = filenames.front(); + currentFilename = filenames.front(); filenames.pop_front(); - sfen_input_stream = open_sfen_input_file(filename); + numEntriesReadFromCurrentFile = 0; + + sfen_input_stream = open_sfen_input_file(currentFilename); if (sfen_input_stream == nullptr) { - std::cout << "File does not exist: " << filename << '\n'; + std::cout << "File does not exist: " << currentFilename << '\n'; } else { - std::cout << "Opened file for reading: " << filename << '\n'; + std::cout << "Opened file for reading: " << currentFilename << '\n'; // in case the file is empty or was deleted. if (sfen_input_stream->eof()) @@ -236,13 +254,24 @@ namespace Learner{ if (p.has_value()) { sfens.push_back(*p); + ++numEntriesReadFromCurrentFile; } - else if(!open_next_file()) + else { - // There was no next file. Abort. - std::cout << "..end of files." << std::endl; - end_of_files = true; - return; + if (mode == SfenReaderMode::Cyclic + && numEntriesReadFromCurrentFile > 0) + { + // The file contained data so we add it again to the end of the queue. + filenames.emplace_back(currentFilename); + } + + if(!open_next_file()) + { + // There was no next file. Abort. + std::cout << "..end of files." << std::endl; + end_of_files = true; + return; + } } } @@ -295,11 +324,6 @@ namespace Learner{ shuffle = v; } - void add_file(const std::string& filename) - { - filenames.push_back(filename); - } - protected: // worker thread reading file in background @@ -316,6 +340,8 @@ namespace Learner{ // Do not shuffle when reading the phase. bool shuffle; + SfenReaderMode mode; + // Random number to shuffle when reading the phase PRNG prng;