From cde6ec2bf26d46dedf4547580f6e45e34d8b1ab4 Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Sat, 24 Oct 2020 23:29:32 +0200
Subject: [PATCH] Make all grad related functions in learn static. Pass
 calc_grad as a parameter.

---
 src/learn/learn.cpp                | 40 +++++++++++++-----------------
 src/learn/learn.h                  |  4 +--
 src/nnue/evaluate_nnue_learner.cpp |  7 +++---
 src/nnue/evaluate_nnue_learner.h   |  2 +-
 4 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp
index 5bb41213..b0f77e89 100644
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -185,7 +185,7 @@ namespace Learner
     }
 
     // A function that converts the evaluation value to the winning rate [0,1]
-    double winning_percentage(double value)
+    static double winning_percentage(double value)
     {
         // 1/(1+10^(-Eval/4))
         // = 1/(1+e^(-Eval/4*ln(10))
@@ -194,7 +194,7 @@ namespace Learner
     }
 
     // A function that converts the evaluation value to the winning rate [0,1]
-    double winning_percentage_wdl(double value, int ply)
+    static double winning_percentage_wdl(double value, int ply)
     {
         constexpr double wdl_total = 1000.0;
         constexpr double draw_score = 0.5;
@@ -207,7 +207,7 @@ namespace Learner
     }
 
     // A function that converts the evaluation value to the winning rate [0,1]
-    double winning_percentage(double value, int ply)
+    static double winning_percentage(double value, int ply)
     {
         if (use_wdl)
         {
@@ -219,7 +219,7 @@ namespace Learner
         }
     }
 
-    double calc_cross_entropy_of_winning_percentage(
+    static double calc_cross_entropy_of_winning_percentage(
         double deep_win_rate,
         double shallow_eval,
         int ply)
@@ -229,7 +229,7 @@ namespace Learner
         return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q);
     }
 
-    double calc_d_cross_entropy_of_winning_percentage(
+    static double calc_d_cross_entropy_of_winning_percentage(
         double deep_win_rate,
         double shallow_eval,
         int ply)
@@ -248,7 +248,7 @@ namespace Learner
     }
 
     // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
-    double get_scaled_signal(double signal)
+    static double get_scaled_signal(double signal)
     {
         double scaled_signal = signal;
 
@@ -266,13 +266,13 @@ namespace Learner
     }
 
     // Teacher winning probability.
-    double calculate_p(double teacher_signal, int ply)
+    static double calculate_p(double teacher_signal, int ply)
     {
         const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
         return winning_percentage(scaled_teacher_signal, ply);
     }
 
-    double calculate_lambda(double teacher_signal)
+    static double calculate_lambda(double teacher_signal)
     {
         // If the evaluation value in deep search exceeds elmo_lambda_limit
         // then apply elmo_lambda_high instead of elmo_lambda_low.
@@ -284,7 +284,7 @@ namespace Learner
         return lambda;
     }
 
-    double calculate_t(int game_result)
+    static double calculate_t(int game_result)
     {
         // Use 1 as the correction term if the expected win rate is 1,
         // 0 if you lose, and 0.5 if you draw.
@@ -294,20 +294,20 @@ namespace Learner
         return t;
     }
 
-    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
+    static double calc_grad(Value shallow, Value teacher_signal, int result, int ply)
     {
         // elmo (WCSC27) method
         // Correct with the actual game wins and losses.
-        const double q = winning_percentage(shallow, psv.gamePly);
-        const double p = calculate_p(teacher_signal, psv.gamePly);
-        const double t = calculate_t(psv.game_result);
+        const double q = winning_percentage(shallow, ply);
+        const double p = calculate_p(teacher_signal, ply);
+        const double t = calculate_t(result);
         const double lambda = calculate_lambda(teacher_signal);
 
         double grad;
         if (use_wdl)
         {
-            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
-            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
+            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, ply);
+            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, ply);
             grad = lambda * dce_p + (1.0 - lambda) * dce_t;
         }
         else
@@ -324,7 +324,7 @@ namespace Learner
     // The individual cross entropy of the win/loss term and win
     // rate term of the elmo expression is returned
     // to the arguments cross_entropy_eval and cross_entropy_win.
-    Loss calc_cross_entropy(
+    static Loss calc_cross_entropy(
         Value teacher_signal,
         Value shallow,
         const PackedSfenValue& psv)
@@ -360,12 +360,6 @@ namespace Learner
         return loss;
     }
 
-    // Other objective functions may be considered in the future...
-    double calc_grad(Value shallow, const PackedSfenValue& psv)
-    {
-        return calc_grad((Value)psv.score, shallow, psv);
-    }
-
     // Class to generate sfen with multiple threads
     struct LearnerThink
     {
@@ -703,7 +697,7 @@ namespace Learner
         // should be no real issues happening since
         // the read/write phases are isolated.
         atomic_thread_fence(memory_order_seq_cst);
-        Eval::NNUE::update_parameters(epoch, params.verbose);
+        Eval::NNUE::update_parameters(epoch, params.verbose, calc_grad);
         atomic_thread_fence(memory_order_seq_cst);
 
         if (++save_count * params.mini_batch_size >= params.eval_save_interval)
diff --git a/src/learn/learn.h b/src/learn/learn.h
index 008ca7af..6ce476e5 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -64,10 +64,10 @@ namespace Learner
     // rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
     constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;
 
-    double calc_grad(Value shallow, const PackedSfenValue& psv);
-
     // Learning from the generated game record
     void learn(std::istringstream& is);
+
+    using CalcGradFunc = double(Value, Value, int, int);
 }
 
 #endif // ifndef _LEARN_H_
diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
index 6775707d..3e91a7de 100644
--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -18,8 +18,6 @@
 #include "misc.h"
 #include "thread_win32_osx.h"
 
-#include "learn/learn.h"
-
 // Learning rate scale
 double global_learning_rate;
 
@@ -183,7 +181,7 @@ namespace Eval::NNUE {
     }
 
     // update the evaluation function parameters
-    void update_parameters(uint64_t epoch, bool verbose) {
+    void update_parameters(uint64_t epoch, bool verbose, Learner::CalcGradFunc calc_grad) {
         assert(batch_size > 0);
 
         const auto learning_rate = static_cast<LearnFloatType>(
@@ -210,7 +208,8 @@ namespace Eval::NNUE {
                     batch[b].sign * network_output[b] * kPonanzaConstant));
                 const auto discrete = batch[b].sign * batch[b].discrete_nn_eval;
                 const auto& psv = batch[b].psv;
-                const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
+                const double gradient =
+                    batch[b].sign * calc_grad(shallow, (Value)psv.score, psv.game_result, psv.gamePly);
                 gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
 
 
diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
index 91d2aa99..8a9786e5 100644
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -31,7 +31,7 @@ namespace Eval::NNUE {
         double weight);
 
     // update the evaluation function parameters
-    void update_parameters(uint64_t epoch, bool verbose);
+    void update_parameters(uint64_t epoch, bool verbose, Learner::CalcGradFunc calc_grad);
 
     // Check if there are any problems with learning
     void check_health();