diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index e3bfe3a4..18f84114 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -237,6 +237,22 @@ namespace Learner return scaled_; } + static Value scale_score(Value v) + { + // Normalize to [0.0, 1.0]. + auto normalized = + ((double)v - src_score_min_value) + / (src_score_max_value - src_score_min_value); + + // Scale to [dest_score_min_value, dest_score_max_value]. + auto scaled = + normalized + * (dest_score_max_value - dest_score_min_value) + + dest_score_min_value; + + return Value(scaled); + } + template static auto& expected_perf_(ValueT&& v_) { @@ -247,7 +263,72 @@ namespace Learner return perf_; } - static ValueWithGrad get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */) + template + static auto& expected_perf_use_wdl_( + ValueT& v_, + PlyT&& ply_ + ) + { + using namespace Learner::Autograd::UnivariateStatic; + + // Coefficients of a 3rd order polynomial fit based on fishtest data + // for two parameters needed to transform eval to the argument of a + // logistic function. + static constexpr T as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 }; + static constexpr T bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 }; + + // The model captures only up to 240 plies, so limit input (and rescale) + static thread_local auto m_ = std::forward(ply_) / 64.0; + + static thread_local auto a_ = (((as[0] * m_ + as[1]) * m_ + as[2]) * m_) + as[3]; + static thread_local auto b_ = (((bs[0] * m_ + bs[1]) * m_ + bs[2]) * m_) + bs[3]; + + // Return win rate in per mille + static thread_local auto sv_ = (v_ - a_) / b_; + static thread_local auto svn_ = (-v_ - a_) / b_; + + static thread_local auto win_pct_ = sigmoid(sv_); + static thread_local auto loss_pct_ = sigmoid(svn_); + + static thread_local auto draw_pct_ = 1.0 - win_pct_ - loss_pct_; + + static thread_local auto perf_ = win_pct_ + draw_pct_ * 0.5; + + return perf_; + } + + static double expected_perf_use_wdl( + Value v, + int ply + ) + { + // Coefficients of a 3rd order polynomial fit based on fishtest data + // for two parameters needed to transform eval to the argument of a + // logistic function. + static constexpr double as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 }; + static constexpr double bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 }; + + // The model captures only up to 240 plies, so limit input (and rescale) + auto m = ply / 64.0; + + auto a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + auto b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + + // Return win rate in per mille + auto sv = ((double)v - a) / b; + auto svn = ((double)-v - a) / b; + + auto win_pct = Math::sigmoid(sv); + auto loss_pct = Math::sigmoid(svn); + + auto draw_pct = 1.0 - win_pct - loss_pct; + + auto perf = win_pct + draw_pct * 0.5; + + return perf; + } + + [[maybe_unused]] static ValueWithGrad get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */) { using namespace Learner::Autograd::UnivariateStatic; @@ -285,11 +366,46 @@ namespace Learner return loss_.eval(args); } + static ValueWithGrad get_loss_cross_entropy_use_wdl( + Value shallow, Value teacher_signal, int result, int ply) + { + using namespace Learner::Autograd::UnivariateStatic; + + static thread_local auto ply_ = ConstantParameter{}; + static thread_local auto shallow_ = VariableParameter{}; + static thread_local auto q_ = expected_perf_use_wdl_(shallow_, ply_); + // We could do just this but MSVC crashes with an internal compiler error :( + // static thread_local auto scaled_teacher_ = scale_score_(ConstantParameter{}); + // static thread_local auto p_ = expected_perf_use_wdl_(scaled_teacher_, ply_); + static thread_local auto p_ = ConstantParameter{}; + static thread_local auto t_ = (ConstantParameter{} + 1.0) * 0.5; + static thread_local auto lambda_ = ConstantParameter{}; + static thread_local auto loss_ = cross_entropy_(q_, p_, t_, lambda_); + + auto args = std::tuple( + (double)shallow, + // This is required because otherwise MSVC crashes :( + expected_perf_use_wdl(scale_score(teacher_signal), ply), + (double)result, + calculate_lambda(teacher_signal), + (double)std::min(240, ply) + ); + + return loss_.eval(args); + } + static auto get_loss(Value shallow, Value teacher_signal, int result, int ply) { using namespace Learner::Autograd::UnivariateStatic; - return get_loss_cross_entropy(shallow, teacher_signal, result, ply); + if (use_wdl) + { + return get_loss_cross_entropy_use_wdl(shallow, teacher_signal, result, ply); + } + else + { + return get_loss_cross_entropy(shallow, teacher_signal, result, ply); + } } static auto get_loss(