Add thread sanitized run for instrumented_learn and fix races.

This commit is contained in:
Tomasz Sobczyk
2020-12-24 13:35:48 +01:00
committed by nodchip
parent acf95c7c98
commit 1f7e5d3861
4 changed files with 38 additions and 13 deletions

View File

@@ -127,14 +127,20 @@ namespace Learner
const Params& prm
) :
params(prm),
prng(prm.seed),
sfen_writer(prm.output_file_name, prm.num_threads, prm.save_every, prm.sfen_format)
{
hash.resize(GENSFEN_HASH_SIZE);
prngs.reserve(prm.num_threads);
auto seed = prm.seed;
for (uint64_t i = 0; i < prm.num_threads; ++i)
{
prngs.emplace_back(seed);
seed = prngs.back().next_random_seed();
}
if (!prm.book.empty())
{
opening_book = open_opening_book(prm.book, prng);
opening_book = open_opening_book(prm.book, prngs[0]);
if (opening_book == nullptr)
{
std::cout << "WARNING: Failed to open opening book " << prm.book << ". Falling back to startpos.\n";
@@ -142,7 +148,7 @@ namespace Learner
}
// Output seed to veryfy by the user if it's not identical by chance.
std::cout << prng << std::endl;
std::cout << prngs[0] << std::endl;
}
void generate(uint64_t limit);
@@ -150,7 +156,7 @@ namespace Learner
private:
Params params;
PRNG prng;
std::vector<PRNG> prngs;
std::mutex stats_mutex;
TimePoint last_stats_report_time;
@@ -177,9 +183,10 @@ namespace Learner
Position& pos,
const vector<int>& move_hist_scores) const;
vector<uint8_t> generate_random_move_flags();
vector<uint8_t> generate_random_move_flags(PRNG& prng);
optional<Move> choose_random_move(
PRNG& prng,
Position& pos,
std::vector<uint8_t>& random_move_flag,
int ply,
@@ -252,6 +259,8 @@ namespace Learner
StateInfo si;
auto& prng = prngs[th.thread_idx()];
// end flag
bool quit = false;
@@ -279,7 +288,7 @@ namespace Learner
packed_sfens.reserve(params.write_maxply + MAX_PLY);
// Precomputed flags. Used internally by choose_random_move.
vector<uint8_t> random_move_flag = generate_random_move_flags();
vector<uint8_t> random_move_flag = generate_random_move_flags(prng);
// A counter that keeps track of the number of random moves
// When random_move_minply == -1, random moves are
@@ -423,7 +432,7 @@ namespace Learner
}
// Update the next move according to best search result or random move.
auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
auto random_move = choose_random_move(prng, pos, random_move_flag, ply, actual_random_move_count);
const Move next_move = random_move.has_value() ? *random_move : search_pv[0];
// We don't have the whole game yet, but it ended,
@@ -579,7 +588,7 @@ namespace Learner
return nullopt;
}
vector<uint8_t> Gensfen::generate_random_move_flags()
vector<uint8_t> Gensfen::generate_random_move_flags(PRNG& prng)
{
vector<uint8_t> random_move_flag;
@@ -617,6 +626,7 @@ namespace Learner
}
optional<Move> Gensfen::choose_random_move(
PRNG& prng,
Position& pos,
std::vector<uint8_t>& random_move_flag,
int ply,

View File

@@ -561,13 +561,13 @@ namespace Learner
LearnerThink(const Params& prm) :
params(prm),
prng(prm.seed),
init_prng(prm.seed),
train_sr(
prm.filenames,
prm.shuffle,
SfenReaderMode::Cyclic,
prm.num_threads,
std::to_string(prng.next_random_seed()),
std::to_string(init_prng.next_random_seed()),
prm.sfen_read_size,
prm.thread_buffer_size),
validation_sr(
@@ -575,7 +575,7 @@ namespace Learner
prm.shuffle,
SfenReaderMode::Cyclic,
1,
std::to_string(prng.next_random_seed()),
std::to_string(init_prng.next_random_seed()),
std::min<size_t>(prm.validation_count * 10, 1000000),
prm.thread_buffer_size),
learn_loss_sum{}
@@ -589,6 +589,12 @@ namespace Learner
total_done = 0;
trials = params.newbob_num_trials;
dir_number = 0;
prngs.reserve(prm.num_threads);
for (uint64_t i = 0; i < prm.num_threads; ++i)
{
prngs.emplace_back(init_prng.next_random_seed());
}
}
void learn(uint64_t epochs);
@@ -622,7 +628,8 @@ namespace Learner
Params params;
PRNG prng;
PRNG init_prng;
std::vector<PRNG> prngs;
// sfen reader
SfenReader train_sr;
@@ -776,6 +783,7 @@ namespace Learner
{
const auto thread_id = th.thread_idx();
auto& pos = th.rootPos;
auto& prng = prngs[th.thread_idx()];
std::vector<StateInfo, AlignedAllocator<StateInfo>> state(MAX_PLY);

View File

@@ -61,6 +61,7 @@ namespace Learner{
end_of_files = false;
shuffle = do_shuffle;
stop_flag = false;
num_buffers_in_pool.store(0);
file_worker_thread = std::thread([&] {
this->file_read_worker();
@@ -147,6 +148,7 @@ namespace Learner{
packed_sfens[thread_id] = std::move(packed_sfens_pool.front());
packed_sfens_pool.pop_front();
num_buffers_in_pool.fetch_sub(1);
total_read += thread_buffer_size;
@@ -224,7 +226,7 @@ namespace Learner{
{
// Wait for the buffer to run out.
// This size() is read only, so you don't need to lock it.
while (!stop_flag && packed_sfens_pool.size() >= sfen_read_size / thread_buffer_size)
while (!stop_flag && num_buffers_in_pool.load() >= sfen_read_size / thread_buffer_size)
sleep(100);
if (stop_flag)
@@ -294,7 +296,10 @@ namespace Learner{
// contents of packed_sfens_pool are changed.
for (auto& buf : buffers)
{
num_buffers_in_pool.fetch_add(1);
packed_sfens_pool.emplace_back(std::move(buf));
}
}
}
@@ -342,5 +347,6 @@ namespace Learner{
// Each worker thread fills its own packed_sfens[thread_id] from here.
// * Lock and access the mutex.
std::list<std::unique_ptr<PSVector>> packed_sfens_pool;
std::atomic<size_t> num_buffers_in_pool;
};
}