From 539bd2d1c8fdbe74cff0efc30a994f1fed7a08fe Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Sun, 29 Nov 2020 12:18:02 +0100 Subject: [PATCH] Replace the old loss/grad calculation completely. --- src/learn/autograd.h | 5 + src/learn/learn.cpp | 295 +++++++---------------------- src/learn/learn.h | 1 - src/nnue/evaluate_nnue_learner.cpp | 7 +- src/nnue/evaluate_nnue_learner.h | 1 - 5 files changed, 79 insertions(+), 230 deletions(-) diff --git a/src/learn/autograd.h b/src/learn/autograd.h index 0b894cc4..f83d4d72 100644 --- a/src/learn/autograd.h +++ b/src/learn/autograd.h @@ -42,6 +42,11 @@ namespace Learner grad /= rhs; return *this; } + + ValueWithGrad abs() const + { + return { std::abs(value), std::abs(grad) }; + } }; } diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index 411cee08..e558b56a 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -52,6 +52,7 @@ #include #include #include +#include #if defined (_OPENMP) #include @@ -99,65 +100,64 @@ namespace Learner // Using stockfish's WDL with win rate model instead of sigmoid static bool use_wdl = false; - namespace Detail { - template - struct Loss + struct Loss + { + double value() const { - using T = - std::conditional_t< - AtomicV, - atomic, - double - >; + return m_loss.value; + } - T cross_entropy_eval{0.0}; - T cross_entropy_win{0.0}; - T cross_entropy{0.0}; - T entropy_eval{0.0}; - T entropy_win{0.0}; - T entropy{0.0}; - T count{0.0}; + double grad() const + { + return m_loss.grad; + } - template - Loss& operator += (const Loss& rhs) - { - cross_entropy_eval += rhs.cross_entropy_eval; - cross_entropy_win += rhs.cross_entropy_win; - cross_entropy += rhs.cross_entropy; - entropy_eval += rhs.entropy_eval; - entropy_win += rhs.entropy_win; - entropy += rhs.entropy; - count += rhs.count; + uint64_t count() const + { + return m_count; + } - return *this; - } + Loss& operator += (const ValueWithGrad& rhs) + { + std::unique_lock lock(m_mutex); - void reset() - { - cross_entropy_eval = 0.0; - cross_entropy_win = 0.0; - cross_entropy = 0.0; - entropy_eval = 0.0; - entropy_win = 0.0; - entropy = 0.0; - count = 0.0; - } + m_loss += rhs.abs(); + m_count += 1; - template - void print(const std::string& prefix, StreamT& s) const - { - s << " - " << prefix << "_cross_entropy_eval = " << cross_entropy_eval / count << endl; - s << " - " << prefix << "_cross_entropy_win = " << cross_entropy_win / count << endl; - s << " - " << prefix << "_entropy_eval = " << entropy_eval / count << endl; - s << " - " << prefix << "_entropy_win = " << entropy_win / count << endl; - s << " - " << prefix << "_cross_entropy = " << cross_entropy / count << endl; - s << " - " << prefix << "_entropy = " << entropy / count << endl; - } - }; - } + return *this; + } - using Loss = Detail::Loss; - using AtomicLoss = Detail::Loss; + Loss& operator += (const Loss& rhs) + { + std::unique_lock lock(m_mutex); + + m_loss += rhs.m_loss.abs(); + m_count += rhs.m_count; + + return *this; + } + + void reset() + { + std::unique_lock lock(m_mutex); + + m_loss = ValueWithGrad{ 0.0, 0.0 }; + m_count = 0; + } + + template + void print(const std::string& prefix, StreamT& s) const + { + s << " - " << prefix << "_loss = " << m_loss.value / (double)m_count << endl; + s << " - " << prefix << "_grad_norm = " << m_loss.grad / (double)m_count << endl; + } + + private: + ValueWithGrad m_loss{ 0.0, 0.0 }; + uint64_t m_count{0}; + std::mutex m_mutex; + + }; static void append_files_from_dir( std::vector& filenames, @@ -185,94 +185,6 @@ namespace Learner } } - // A function that converts the evaluation value to the winning rate [0,1] - static double winning_percentage(double value) - { - // 1/(1+10^(-Eval/4)) - // = 1/(1+e^(-Eval/4*ln(10)) - // = sigmoid(Eval/4*ln(10)) - return Math::sigmoid(value * winning_probability_coefficient); - } - - // A function that converts the evaluation value to the winning rate [0,1] - static double winning_percentage_wdl(double value, int ply) - { - constexpr double wdl_total = 1000.0; - constexpr double draw_score = 0.5; - - const double wdl_w = UCI::win_rate_model_double(value, ply); - const double wdl_l = UCI::win_rate_model_double(-value, ply); - const double wdl_d = wdl_total - wdl_w - wdl_l; - - return (wdl_w + wdl_d * draw_score) / wdl_total; - } - - // A function that converts the evaluation value to the winning rate [0,1] - static double winning_percentage(double value, int ply) - { - if (use_wdl) - { - return winning_percentage_wdl(value, ply); - } - else - { - return winning_percentage(value); - } - } - - static double calc_cross_entropy_of_winning_percentage( - double deep_win_rate, - double shallow_eval, - int ply) - { - const double p = deep_win_rate; - const double q = winning_percentage(shallow_eval, ply); - return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q); - } - - static double calc_d_cross_entropy_of_winning_percentage( - double deep_win_rate, - double shallow_eval, - int ply) - { - constexpr double epsilon = 0.000001; - - const double y1 = calc_cross_entropy_of_winning_percentage( - deep_win_rate, shallow_eval, ply); - - const double y2 = calc_cross_entropy_of_winning_percentage( - deep_win_rate, shallow_eval + epsilon, ply); - - // Divide by the winning_probability_coefficient to - // match scale with the sigmoidal win rate - return ((y2 - y1) / epsilon) / winning_probability_coefficient; - } - - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 - static double get_scaled_signal(double signal) - { - double scaled_signal = signal; - - // Normalize to [0.0, 1.0]. - scaled_signal = - (scaled_signal - src_score_min_value) - / (src_score_max_value - src_score_min_value); - - // Scale to [dest_score_min_value, dest_score_max_value]. - scaled_signal = - scaled_signal * (dest_score_max_value - dest_score_min_value) - + dest_score_min_value; - - return scaled_signal; - } - - // Teacher winning probability. - static double calculate_p(double teacher_signal, int ply) - { - const double scaled_teacher_signal = get_scaled_signal(teacher_signal); - return winning_percentage(scaled_teacher_signal, ply); - } - static double calculate_lambda(double teacher_signal) { // If the evaluation value in deep search exceeds elmo_lambda_limit @@ -285,94 +197,31 @@ namespace Learner return lambda; } - static double calculate_t(int game_result) - { - // Use 1 as the correction term if the expected win rate is 1, - // 0 if you lose, and 0.5 if you draw. - // game_result = 1,0,-1 so add 1 and divide by 2. - const double t = double(game_result + 1) * 0.5; - - return t; - } - - static double calc_grad(Value shallow, Value teacher_signal, int result, int ply) - { - // elmo (WCSC27) method - // Correct with the actual game wins and losses. - const double q = winning_percentage(shallow, ply); - const double p = calculate_p(teacher_signal, ply); - const double t = calculate_t(result); - const double lambda = calculate_lambda(teacher_signal); - - double grad; - if (use_wdl) - { - const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, ply); - const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, ply); - grad = lambda * dce_p + (1.0 - lambda) * dce_t; - } - else - { - // Use the actual win rate as a correction term. - // This is the idea of ​​elmo (WCSC27), modern O-parts. - grad = lambda * (q - p) + (1.0 - lambda) * (q - t); - } - - return std::clamp(grad, -max_grad, max_grad); - } - static ValueWithGrad get_loss(Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; - auto q_ = sigmoid(VariableParameter{} * winning_probability_coefficient); auto p_ = sigmoid(ConstantParameter{} * winning_probability_coefficient); auto t_ = (ConstantParameter{} + 1.0) * 0.5; auto lambda_ = ConstantParameter{}; auto loss_ = pow(lambda_ * (q_ - p_) + (1.0 - lambda_) * (q_ - t_), 2.0); + /* + auto q_ = VariableParameter{}; + auto p_ = ConstantParameter{}; + auto loss_ = pow(q_ - p_, 2.0) * (1.0 / (2400.0 * 2.0 * 600.0)); + */ + auto args = std::tuple((double)shallow, (double)teacher_signal, (double)result, calculate_lambda(teacher_signal)); return loss_.eval(args); } - // Calculate cross entropy during learning - // The individual cross entropy of the win/loss term and win - // rate term of the elmo expression is returned - // to the arguments cross_entropy_eval and cross_entropy_win. - static Loss calc_cross_entropy( + static auto get_loss( Value teacher_signal, Value shallow, const PackedSfenValue& psv) { - // Teacher winning probability. - const double q = winning_percentage(shallow, psv.gamePly); - const double p = calculate_p(teacher_signal, psv.gamePly); - const double t = calculate_t(psv.game_result); - const double lambda = calculate_lambda(teacher_signal); - - constexpr double epsilon = 0.000001; - - const double m = (1.0 - lambda) * t + lambda * p; - - Loss loss{}; - - loss.cross_entropy_eval = - (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon)); - loss.cross_entropy_win = - (-t * std::log(q + epsilon) - (1.0 - t) * std::log(1.0 - q + epsilon)); - loss.entropy_eval = - (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon)); - loss.entropy_win = - (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon)); - - loss.cross_entropy = - (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon)); - loss.entropy = - (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); - - loss.count = 1; - - return loss; + return get_loss(shallow, teacher_signal, psv.game_result, psv.gamePly); } // Class to generate sfen with multiple threads @@ -495,7 +344,7 @@ namespace Learner Thread& th, std::atomic& counter, const PSVector& psv, - AtomicLoss& test_loss_sum, + Loss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ); @@ -530,7 +379,7 @@ namespace Learner int dir_number; // For calculation of learning data loss - AtomicLoss learn_loss_sum; + Loss learn_loss_sum; }; void LearnerThink::set_learning_search_limits() @@ -681,7 +530,7 @@ namespace Learner const Value shallow_value = Eval::evaluate(pos); - const auto loss = calc_cross_entropy( + const auto loss = get_loss( deep_value, (rootColor == pos.side_to_move()) ? shallow_value : -shallow_value, ps); @@ -735,7 +584,7 @@ namespace Learner // should be no real issues happening since // the read/write phases are isolated. atomic_thread_fence(memory_order_seq_cst); - Eval::NNUE::update_parameters(Threads, epoch, params.verbose, params.learning_rate, calc_grad, get_loss); + Eval::NNUE::update_parameters(Threads, epoch, params.verbose, params.learning_rate, get_loss); atomic_thread_fence(memory_order_seq_cst); if (++save_count * params.mini_batch_size >= params.eval_save_interval) @@ -778,7 +627,7 @@ namespace Learner out << " - learning rate = " << params.learning_rate << endl; // For calculation of verification data loss - AtomicLoss test_loss_sum{}; + Loss test_loss_sum{}; // norm for learning atomic sum_norm{0.0}; @@ -810,26 +659,24 @@ namespace Learner }); Threads.wait_for_workers_finished(); - latest_loss_sum += test_loss_sum.cross_entropy - test_loss_sum.entropy; + latest_loss_sum += test_loss_sum.value(); latest_loss_count += psv.size(); - if (psv.size() && test_loss_sum.count > 0.0) + if (psv.size() && test_loss_sum.count() > 0) { test_loss_sum.print("test", out); - if (learn_loss_sum.count > 0.0) + if (learn_loss_sum.count() > 0) { learn_loss_sum.print("learn", out); } out << " - norm = " << sum_norm << endl; out << " - move accuracy = " << (move_accord_count * 100.0 / psv.size()) << "%" << endl; - out << " - loss (current) = " << (test_loss_sum.cross_entropy - test_loss_sum.entropy) / psv.size() << endl; - out << " - loss (average) = " << latest_loss_sum / latest_loss_count << endl; } else { - out << "ERROR: psv.size() = " << psv.size() << " , done = " << test_loss_sum.count << endl; + out << "ERROR: psv.size() = " << psv.size() << " , done = " << test_loss_sum.count() << endl; } learn_loss_sum.reset(); @@ -839,7 +686,7 @@ namespace Learner Thread& th, std::atomic& counter, const PSVector& psv, - AtomicLoss& test_loss_sum, + Loss& test_loss_sum, atomic& sum_norm, atomic& move_accord_count ) @@ -869,7 +716,7 @@ namespace Learner // Evaluation value of deep search const auto deep_value = (Value)ps.score; - const auto loss = calc_cross_entropy( + const auto loss = get_loss( deep_value, shallow_value, ps); diff --git a/src/learn/learn.h b/src/learn/learn.h index f74fd4e3..4e8d8a02 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -68,7 +68,6 @@ namespace Learner // Learning from the generated game record void learn(std::istringstream& is); - using CalcGradFunc = double(Value, Value, int, int); using CalcLossFunc = ValueWithGrad(Value, Value, int, int); } diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp index 822c56b4..038a462c 100644 --- a/src/nnue/evaluate_nnue_learner.cpp +++ b/src/nnue/evaluate_nnue_learner.cpp @@ -195,7 +195,6 @@ namespace Eval::NNUE { uint64_t epoch, bool verbose, double learning_rate, - Learner::CalcGradFunc calc_grad, Learner::CalcLossFunc calc_loss) { using namespace Learner::Autograd::UnivariateStatic; @@ -237,8 +236,8 @@ namespace Eval::NNUE { e.sign * network_output[b] * kPonanzaConstant)); const auto discrete = e.sign * e.discrete_nn_eval; const auto& psv = e.psv; - const double gradient = - e.sign * calc_grad(shallow, (Value)psv.score, psv.game_result, psv.gamePly); + const auto loss = calc_loss(shallow, (Value)psv.score, psv.game_result, psv.gamePly); + const double gradient = loss.grad * e.sign * kPonanzaConstant; gradients[b] = static_cast(gradient * e.weight); @@ -330,4 +329,4 @@ namespace Eval::NNUE { #endif out << "INFO (save_eval): Finished saving evaluation file in " << eval_dir << std::endl; } -} // namespace Eval::NNUE \ No newline at end of file +} // namespace Eval::NNUE diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h index 0fe8afce..7f7daa5b 100644 --- a/src/nnue/evaluate_nnue_learner.h +++ b/src/nnue/evaluate_nnue_learner.h @@ -38,7 +38,6 @@ namespace Eval::NNUE { uint64_t epoch, bool verbose, double learning_rate, - Learner::CalcGradFunc calc_grad, Learner::CalcLossFunc calc_loss); // Check if there are any problems with learning