Add cyclic mode to the sfen reader. Make sfen reader take all files at construction

This commit is contained in:
Tomasz Sobczyk
2020-10-18 14:51:38 +02:00
committed by nodchip
parent e4a38c18dd
commit 0636e1256d
2 changed files with 66 additions and 34 deletions

View File

@@ -381,9 +381,13 @@ namespace Learner
// move match rate, simple comparison is not possible...
static constexpr uint64_t sfen_for_mse_size = 2000;
LearnerThink(uint64_t thread_num, const std::string& seed) :
LearnerThink(
const std::vector<std::string>& filenames,
uint64_t thread_num,
const std::string& seed
) :
prng(seed),
sr(thread_num, std::to_string(prng.next_random_seed())),
sr(filenames, SfenReaderMode::Sequential, thread_num, std::to_string(prng.next_random_seed())),
learn_loss_sum{}
{
save_only_once = false;
@@ -404,11 +408,6 @@ namespace Learner
sr.set_do_shuffle(v);
}
void add_file(const std::string& filename)
{
sr.add_file(filename);
}
void learn();
@@ -1095,11 +1094,26 @@ namespace Learner
// Right now we only have the individual files.
// We need to apply base_dir here
rebase_files(filenames, base_dir);
if (!target_dir.empty())
{
append_files_from_dir(filenames, base_dir, target_dir);
}
rebase_files(filenames, base_dir);
// Insert the file name for the number of loops.
{
std::vector<std::string> filenamesTimesLoop;
for (int i = 0; i < loop; ++i)
{
for(auto& file : filenames)
{
filenamesTimesLoop.emplace_back(file);
}
}
filenames = std::move(filenamesTimesLoop);
}
cout << "learn from ";
for (auto s : filenames)
@@ -1154,8 +1168,6 @@ namespace Learner
cout << "init.." << endl;
LearnerThink learn_think(thread_num, seed);
Threads.main()->ponder = false;
set_learning_search_limits();
@@ -1164,6 +1176,9 @@ namespace Learner
Eval::NNUE::initialize_training(seed);
Eval::NNUE::set_batch_size(nn_batch_size);
Eval::NNUE::set_options(nn_options);
LearnerThink learn_think(filenames, thread_num, seed);
if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
// Save the current net to [EvalSaveDir]\original.
Eval::NNUE::save_eval("original");
@@ -1190,15 +1205,6 @@ namespace Learner
learn_think.mini_batch_size = mini_batch_size;
learn_think.validation_set_file_name = validation_set_file_name;
// Insert the file name for the number of loops.
for (int i = 0; i < loop; ++i)
{
for(auto& file : filenames)
{
learn_think.add_file(Path::combine(base_dir, file));
}
}
cout << "init done." << endl;
// Start learning.

View File

@@ -18,6 +18,12 @@
namespace Learner{
enum struct SfenReaderMode
{
Sequential,
Cyclic
};
// Sfen reader
struct SfenReader
{
@@ -32,7 +38,14 @@ namespace Learner{
// Do not use std::random_device().
// Because it always the same integers on MinGW.
SfenReader(int thread_num, const std::string& seed) :
SfenReader(
const std::vector<std::string>& filenames_,
SfenReaderMode mode_,
int thread_num,
const std::string& seed
) :
filenames(filenames_.begin(), filenames_.end()),
mode(mode_),
prng(seed)
{
packed_sfens.resize(thread_num);
@@ -173,6 +186,9 @@ namespace Learner{
void file_read_worker()
{
std::string currentFilename;
uint64_t numEntriesReadFromCurrentFile = 0;
auto open_next_file = [&]() {
// no more
for(;;)
@@ -183,18 +199,20 @@ namespace Learner{
return false;
// Get the next file name.
std::string filename = filenames.front();
currentFilename = filenames.front();
filenames.pop_front();
sfen_input_stream = open_sfen_input_file(filename);
numEntriesReadFromCurrentFile = 0;
sfen_input_stream = open_sfen_input_file(currentFilename);
if (sfen_input_stream == nullptr)
{
std::cout << "File does not exist: " << filename << '\n';
std::cout << "File does not exist: " << currentFilename << '\n';
}
else
{
std::cout << "Opened file for reading: " << filename << '\n';
std::cout << "Opened file for reading: " << currentFilename << '\n';
// in case the file is empty or was deleted.
if (sfen_input_stream->eof())
@@ -236,13 +254,24 @@ namespace Learner{
if (p.has_value())
{
sfens.push_back(*p);
++numEntriesReadFromCurrentFile;
}
else if(!open_next_file())
else
{
// There was no next file. Abort.
std::cout << "..end of files." << std::endl;
end_of_files = true;
return;
if (mode == SfenReaderMode::Cyclic
&& numEntriesReadFromCurrentFile > 0)
{
// The file contained data so we add it again to the end of the queue.
filenames.emplace_back(currentFilename);
}
if(!open_next_file())
{
// There was no next file. Abort.
std::cout << "..end of files." << std::endl;
end_of_files = true;
return;
}
}
}
@@ -295,11 +324,6 @@ namespace Learner{
shuffle = v;
}
void add_file(const std::string& filename)
{
filenames.push_back(filename);
}
protected:
// worker thread reading file in background
@@ -316,6 +340,8 @@ namespace Learner{
// Do not shuffle when reading the phase.
bool shuffle;
SfenReaderMode mode;
// Random number to shuffle when reading the phase
PRNG prng;