diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index 8326ab24..8265a66f 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -51,6 +51,8 @@ #include #include #include +#include +#include #if defined (_OPENMP) #include @@ -611,6 +613,8 @@ namespace Learner atomic& move_accord_count ); + bool has_depth1_move_agreement(Position& pos, Move pvmove); + bool check_progress(); // save merit function parameters to a file @@ -1007,15 +1011,53 @@ namespace Learner local_loss_sum += loss; sum_norm += (double)abs(shallow_value); - // Determine if the teacher's move and the score of the shallow search match - const auto [value, pv] = Search::search(pos, 1); - if (pv.size() > 0 && (uint16_t)pv[0] == ps.move) + // Threat all moves with equal scores as first. This is up to move ordering. + if (has_depth1_move_agreement(pos, (Move)ps.move)) move_accord_count.fetch_add(1, std::memory_order_relaxed); } test_loss_sum += local_loss_sum; } + bool LearnerThink::has_depth1_move_agreement(Position& pos, Move pvmove) + { + // Determine if the depth 1 search pv matches the move from the dataset. + // Do a manual depth 1 search so we're not affected by previous searches. + std::vector> child_scores; + + // Call evaluate once for the rootpos so that the evals + // for children moves use incremental feature transformer updates. + (void)Eval::evaluate(pos); + + // Just to get guaranteed alignment. + std::vector> states(1); + auto legal_moves = MoveList(pos); + for (auto m : legal_moves) + { + pos.do_move(m, states[0]); + // We don't care if the king is in check or stuff like that. + // not a big issue and nnue should digest all. + auto value = -Eval::evaluate(pos); + child_scores.emplace_back(m, value); + pos.undo_move(m); + } + + if (child_scores.empty()) + return false; + + std::sort( + child_scores.begin(), + child_scores.end(), + [](auto& lhs, auto& rhs) { return lhs.second > rhs.second; } + ); + + // Require the best move to have strictly higher score than the next one. + return + child_scores[0].first == pvmove + && (child_scores.size() == 1 + || child_scores[1].second != child_scores[0].second); + } + bool LearnerThink::check_progress() { auto out = sync_region_cout.new_region();