Reintroduce use_wdl.

2025-12-25 11:36:51 +08:00 · 2020-11-30 21:17:44 +01:00
parent cf6bc7ecaf
commit 99cb869db3
1 changed files with 118 additions and 2 deletions
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -237,6 +237,22 @@ namespace Learner
        return scaled_;
    }

+    static Value scale_score(Value v)
+    {
+        // Normalize to [0.0, 1.0].
+        auto normalized =
+            ((double)v - src_score_min_value)
+            / (src_score_max_value - src_score_min_value);
+
+        // Scale to [dest_score_min_value, dest_score_max_value].
+        auto scaled =
+            normalized
+            * (dest_score_max_value - dest_score_min_value)
+            + dest_score_min_value;
+
+        return Value(scaled);
+    }
+
    template <typename ValueT>
    static auto& expected_perf_(ValueT&& v_)
    {
@@ -247,7 +263,72 @@ namespace Learner
        return perf_;
    }

-    static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
+    template <typename ValueT, typename PlyT, typename T = typename ValueT::ValueType>
+    static auto& expected_perf_use_wdl_(
+        ValueT& v_,
+        PlyT&& ply_
+    )
+    {
+        using namespace Learner::Autograd::UnivariateStatic;
+
+        // Coefficients of a 3rd order polynomial fit based on fishtest data
+        // for two parameters needed to transform eval to the argument of a
+        // logistic function.
+        static constexpr T as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
+        static constexpr T bs[] = { -3.37154371, 28.44489198, -56.67657741,  72.05858751 };
+
+        // The model captures only up to 240 plies, so limit input (and rescale)
+        static thread_local auto m_ = std::forward<PlyT>(ply_) / 64.0;
+         
+        static thread_local auto a_ = (((as[0] * m_ + as[1]) * m_ + as[2]) * m_) + as[3];
+        static thread_local auto b_ = (((bs[0] * m_ + bs[1]) * m_ + bs[2]) * m_) + bs[3];
+
+        // Return win rate in per mille
+        static thread_local auto sv_ = (v_ - a_) / b_;
+        static thread_local auto svn_ = (-v_ - a_) / b_;
+
+        static thread_local auto win_pct_ = sigmoid(sv_);
+        static thread_local auto loss_pct_ = sigmoid(svn_);
+
+        static thread_local auto draw_pct_ = 1.0 - win_pct_ - loss_pct_;
+
+        static thread_local auto perf_ = win_pct_ + draw_pct_ * 0.5;
+
+        return perf_;
+    }
+
+    static double expected_perf_use_wdl(
+        Value v,
+        int ply
+    )
+    {
+        // Coefficients of a 3rd order polynomial fit based on fishtest data
+        // for two parameters needed to transform eval to the argument of a
+        // logistic function.
+        static constexpr double as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
+        static constexpr double bs[] = { -3.37154371, 28.44489198, -56.67657741,  72.05858751 };
+
+        // The model captures only up to 240 plies, so limit input (and rescale)
+        auto m = ply / 64.0;
+
+        auto a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
+        auto b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
+
+        // Return win rate in per mille
+        auto sv = ((double)v - a) / b;
+        auto svn = ((double)-v - a) / b;
+
+        auto win_pct = Math::sigmoid(sv);
+        auto loss_pct = Math::sigmoid(svn);
+
+        auto draw_pct = 1.0 - win_pct - loss_pct;
+
+        auto perf = win_pct + draw_pct * 0.5;
+
+        return perf;
+    }
+
+    [[maybe_unused]] static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
    {
        using namespace Learner::Autograd::UnivariateStatic;

@@ -285,11 +366,46 @@ namespace Learner
        return loss_.eval(args);
    }

+    static ValueWithGrad<double> get_loss_cross_entropy_use_wdl(
+        Value shallow, Value teacher_signal, int result, int ply)
+    {
+        using namespace Learner::Autograd::UnivariateStatic;
+
+        static thread_local auto ply_ = ConstantParameter<double, 4>{};
+        static thread_local auto shallow_ = VariableParameter<double, 0>{};
+        static thread_local auto q_ = expected_perf_use_wdl_(shallow_, ply_);
+        // We could do just this but MSVC crashes with an internal compiler error :(
+        // static thread_local auto scaled_teacher_ = scale_score_(ConstantParameter<double, 1>{});
+        // static thread_local auto p_ = expected_perf_use_wdl_(scaled_teacher_, ply_);
+        static thread_local auto p_ = ConstantParameter<double, 1>{};
+        static thread_local auto t_ = (ConstantParameter<double, 2>{} + 1.0) * 0.5;
+        static thread_local auto lambda_ = ConstantParameter<double, 3>{};
+        static thread_local auto loss_ = cross_entropy_(q_, p_, t_, lambda_);
+
+        auto args = std::tuple(
+            (double)shallow,
+            // This is required because otherwise MSVC crashes :(
+            expected_perf_use_wdl(scale_score(teacher_signal), ply),
+            (double)result,
+            calculate_lambda(teacher_signal),
+            (double)std::min(240, ply)
+        );
+
+        return loss_.eval(args);
+    }
+
    static auto get_loss(Value shallow, Value teacher_signal, int result, int ply)
    {
        using namespace Learner::Autograd::UnivariateStatic;

-        return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
+        if (use_wdl)
+        {
+            return get_loss_cross_entropy_use_wdl(shallow, teacher_signal, result, ply);
+        }
+        else
+        {
+            return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
+        }
    }

    static auto get_loss(