Merge remote-tracking branch 'remotes/nodchip/master' into trainer

2025-12-25 11:36:51 +08:00 · 2020-09-23 19:13:07 +08:00
parent 5be8b573be 2931463d3a
commit 9827411b7c
18 changed files with 253 additions and 570 deletions
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -29,8 +29,6 @@
 #include "uci.h"
 #include "search.h"

-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"

 #include "nnue/evaluate_nnue_learner.h"
@@ -58,6 +56,7 @@
 #include <omp.h>
 #endif

+extern double global_learning_rate;

 using namespace std;

@@ -92,12 +91,6 @@ namespace Learner
    static double dest_score_min_value = 0.0;
    static double dest_score_max_value = 1.0;

-    // Assume teacher signals are the scores of deep searches,
-    // and convert them into winning probabilities in the trainer.
-    // Sometimes we want to use the winning probabilities in the training
-    // data directly. In those cases, we set false to this variable.
-    static bool convert_teacher_signal_to_winning_probability = true;
-
    // Using stockfish's WDL with win rate model instead of sigmoid
    static bool use_wdl = false;

@@ -164,14 +157,6 @@ namespace Learner
        return ((y2 - y1) / epsilon) / winning_probability_coefficient;
    }

-    // A constant used in elmo (WCSC27). Adjustment required.
-    // Since elmo does not internally divide the expression, the value is different.
-    // You can set this value with the learn command.
-    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
-    double ELMO_LAMBDA = 0.33;
-    double ELMO_LAMBDA2 = 0.33;
-    double ELMO_LAMBDA_LIMIT = 32000;
-
    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
    double get_scaled_signal(double signal)
    {
@@ -194,26 +179,7 @@ namespace Learner
    double calculate_p(double teacher_signal, int ply)
    {
        const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
-
-        double p = scaled_teacher_signal;
-        if (convert_teacher_signal_to_winning_probability)
-        {
-            p = winning_percentage(scaled_teacher_signal, ply);
-        }
-
-        return p;
-    }
-
-    double calculate_lambda(double teacher_signal)
-    {
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
-        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-        const double lambda =
-            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
-            ? ELMO_LAMBDA2
-            : ELMO_LAMBDA;
-
-        return lambda;
+        return winning_percentage(scaled_teacher_signal, ply);
    }

    double calculate_t(int game_result)
@@ -236,21 +202,16 @@ namespace Learner
        const PackedSfenValue& psv,
        double& cross_entropy_eval,
        double& cross_entropy_win,
-        double& cross_entropy,
        double& entropy_eval,
-        double& entropy_win,
-        double& entropy)
+        double& entropy_win)
    {
        // Teacher winning probability.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);

        constexpr double epsilon = 0.000001;

-        const double m = (1.0 - lambda) * t + lambda * p;
-
        cross_entropy_eval =
            (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
        cross_entropy_win =
@@ -259,11 +220,6 @@ namespace Learner
            (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
        entropy_win =
            (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
-
-        cross_entropy =
-            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
-        entropy =
-            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
    }

    // Other objective functions may be considered in the future...
@@ -761,9 +717,6 @@ namespace Learner

        std::atomic<bool> stop_flag;

-        // Discount rate
-        double discount_rate;
-
        // Option to exclude early stage from learning
        int reduction_gameply;

@@ -796,7 +749,6 @@ namespace Learner

        uint64_t eval_save_interval;
        uint64_t loss_output_interval;
-        uint64_t mirror_percentage;

        // Loss calculation.
        // done: Number of phases targeted this time
@@ -840,20 +792,18 @@ namespace Learner
        // It doesn't matter if you have disabled the substitution table.
        TT.new_search();

-        std::cout << "PROGRESS: " << now_string() << ", ";
-        std::cout << sr.total_done << " sfens";
-        std::cout << ", iteration " << epoch;
-        std::cout << ", eta = " << Eval::get_eta() << ", ";
+        cout << "PROGRESS: " << now_string() << ", ";
+        cout << sr.total_done << " sfens";
+        cout << ", iteration " << epoch;
+        cout << ", learning rate = " << global_learning_rate << ", ";

        // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
        test_sum_cross_entropy_eval = 0;
        test_sum_cross_entropy_win = 0;
-        test_sum_cross_entropy = 0;
        test_sum_entropy_eval = 0;
        test_sum_entropy_win = 0;
-        test_sum_entropy = 0;

        // norm for learning
        atomic<double> sum_norm;
@@ -869,7 +819,7 @@ namespace Learner
        auto& pos = th->rootPos;
        StateInfo si;
        pos.set(StartFEN, false, &si, th);
-        std::cout << "hirate eval = " << Eval::evaluate(pos);
+        cout << "hirate eval = " << Eval::evaluate(pos) << endl;

        // It's better to parallelize here, but it's a bit
        // troublesome because the search before slave has not finished.
@@ -893,10 +843,8 @@ namespace Learner
                    &ps,
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
-                    &test_sum_cross_entropy,
                    &test_sum_entropy_eval,
                    &test_sum_entropy_win,
-                    &test_sum_entropy,
                    &sum_norm,
                    &task_count,
                    &move_accord_count
@@ -924,26 +872,22 @@ namespace Learner
                // For the time being, regarding the win rate and loss terms only in the elmo method
                // Calculate and display the cross entropy.

-                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
-                double test_entropy_eval, test_entropy_win, test_entropy;
+                double test_cross_entropy_eval, test_cross_entropy_win;
+                double test_entropy_eval, test_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    test_cross_entropy_eval,
                    test_cross_entropy_win,
-                    test_cross_entropy,
                    test_entropy_eval,
-                    test_entropy_win,
-                    test_entropy);
+                    test_entropy_win);

                // The total cross entropy need not be abs() by definition.
                test_sum_cross_entropy_eval += test_cross_entropy_eval;
                test_sum_cross_entropy_win += test_cross_entropy_win;
-                test_sum_cross_entropy += test_cross_entropy;
                test_sum_entropy_eval += test_entropy_eval;
                test_sum_entropy_win += test_entropy_win;
-                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);

                // Determine if the teacher's move and the score of the shallow search match
@@ -968,7 +912,7 @@ namespace Learner
        while (task_count)
            sleep(1);

-        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
+        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
        latest_loss_count += sr.sfen_for_mse.size();

        // learn_cross_entropy may be called train cross
@@ -978,27 +922,24 @@ namespace Learner

        if (sr.sfen_for_mse.size() && done)
        {
-            cout
-                << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
+            cout << "INFO: "
+                << "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
                << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
-                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
-                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                << " , norm = " << sum_norm
-                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
+                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
+                << endl;

            if (done != static_cast<uint64_t>(-1))
            {
-                cout
-                    << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
+                cout << "INFO: "
+                    << "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
                    << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                    << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                    << " , learn_entropy_win = " << learn_sum_entropy_win / done
-                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
-                    << " , learn_entropy = " << learn_sum_entropy / done;
+                    << endl;
            }
-            cout << endl;
        }
        else
        {
@@ -1008,10 +949,8 @@ namespace Learner
        // Clear 0 for next time.
        learn_sum_cross_entropy_eval = 0.0;
        learn_sum_cross_entropy_win = 0.0;
-        learn_sum_cross_entropy = 0.0;
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
-        learn_sum_entropy = 0.0;
    }

    void LearnerThink::thread_worker(size_t thread_id)
@@ -1060,7 +999,7 @@ namespace Learner

                        // Lock the evaluation function so that it is not used during updating.
                        lock_guard<shared_timed_mutex> write_lock(nn_mutex);
-                        Eval::NNUE::UpdateParameters(epoch);
+                        Eval::NNUE::UpdateParameters();
                    }

                    ++epoch;
@@ -1137,8 +1076,7 @@ namespace Learner
                goto RETRY_READ;

            StateInfo si;
-            const bool mirror = prng.rand(100) < mirror_percentage;
-            if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
+            if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0)
            {
                // I got a strange sfen. Should be debugged!
                // Since it is an illegal sfen, it may not be
@@ -1157,8 +1095,7 @@ namespace Learner

            if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
            {
-                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
-                continue;
+                goto RETRY_READ;
            }

            pos.do_move((Move)ps.move, state[ply++]);
@@ -1205,29 +1142,23 @@ namespace Learner
                    : -Eval::evaluate(pos);

                // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
-                double learn_entropy_eval, learn_entropy_win, learn_entropy;
+                double learn_cross_entropy_eval, learn_cross_entropy_win;
+                double learn_entropy_eval, learn_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    learn_cross_entropy_eval,
                    learn_cross_entropy_win,
-                    learn_cross_entropy,
                    learn_entropy_eval,
-                    learn_entropy_win,
-                    learn_entropy);
+                    learn_entropy_win);

                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
-                learn_sum_cross_entropy += learn_cross_entropy;
                learn_sum_entropy_eval += learn_entropy_eval;
                learn_sum_entropy_win += learn_entropy_win;
-                learn_sum_entropy += learn_entropy;

-                const double example_weight =
-                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
-                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
+                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);

                // Since the processing is completed, the counter of the processed number is incremented
                sr.total_done++;
@@ -1246,18 +1177,12 @@ namespace Learner
                    break;
                }

-                // Processing when adding the gradient to the node on each PV.
-                //If discount_rate is 0, this process is not performed.
-                if (discount_rate != 0)
-                    pos_add_grad();
-
                pos.do_move(m, state[ply++]);
            }

            if (illegal_move)
            {
-                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
-                continue;
+                goto RETRY_READ;
            }

            // Since we have reached the end phase of PV, add the slope here.
@@ -1276,18 +1201,18 @@ namespace Learner
        {
            // When EVAL_SAVE_ONLY_ONCE is defined,
            // Do not dig a subfolder because I want to save it only once.
-            Eval::save_eval("");
+            Eval::NNUE::save_eval("");
        }
        else if (is_final)
        {
-            Eval::save_eval("final");
+            Eval::NNUE::save_eval("final");
            return true;
        }
        else
        {
            static int dir_number = 0;
            const std::string dir_name = std::to_string(dir_number++);
-            Eval::save_eval(dir_name);
+            Eval::NNUE::save_eval(dir_name);

            if (newbob_decay != 1.0 && latest_loss_count > 0) {
                static int trials = newbob_num_trials;
@@ -1310,12 +1235,12 @@ namespace Learner
                    if (--trials > 0 && !is_final)
                    {
                        cout
-                            << "reducing learning rate scale from " << newbob_scale
+                            << "reducing learning rate from " << newbob_scale
                            << " to " << (newbob_scale * newbob_decay)
                            << " (" << trials << " more trials)" << endl;

                        newbob_scale *= newbob_decay;
-                        Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
+                        global_learning_rate = newbob_scale;
                    }
                }

@@ -1593,13 +1518,6 @@ namespace Learner

        string target_dir;

-        // If 0, it will be the default value.
-        double eta1 = 0.0;
-        double eta2 = 0.0;
-        double eta3 = 0.0;
-        uint64_t eta1_epoch = 0; // eta2 is not applied by default
-        uint64_t eta2_epoch = 0; // eta3 is not applied by default
-
        // --- Function that only shuffles the teacher aspect

        // normal shuffle
@@ -1640,15 +1558,7 @@ namespace Learner
        // Turn on if you want to pass a pre-shuffled file.
        bool no_shuffle = false;

-        // elmo lambda
-        ELMO_LAMBDA = 0.33;
-        ELMO_LAMBDA2 = 0.33;
-        ELMO_LAMBDA_LIMIT = 32000;
-
-        // Discount rate. If this is set to a value other than 0,
-        // the slope will be added even at other than the PV termination.
-        // (At that time, apply this discount rate)
-        double discount_rate = 0;
+        global_learning_rate = 1.0;

        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
@@ -1662,7 +1572,6 @@ namespace Learner

        uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
        uint64_t loss_output_interval = 0;
-        uint64_t mirror_percentage = 0;

        string validation_set_file_name;
        string seed;
@@ -1696,12 +1605,7 @@ namespace Learner
            else if (option == "batchsize") is >> mini_batch_size;

            // learning rate
-            else if (option == "eta")        is >> eta1;
-            else if (option == "eta1")       is >> eta1; // alias
-            else if (option == "eta2")       is >> eta2;
-            else if (option == "eta3")       is >> eta3;
-            else if (option == "eta1_epoch") is >> eta1_epoch;
-            else if (option == "eta2_epoch") is >> eta2_epoch;
+            else if (option == "lr")        is >> global_learning_rate;

            // Accept also the old option name.
            else if (option == "use_draw_in_training"
@@ -1720,17 +1624,9 @@ namespace Learner

            else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;

-            // Discount rate
-            else if (option == "discount_rate") is >> discount_rate;
-
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

-            // LAMBDA
-            else if (option == "lambda")       is >> ELMO_LAMBDA;
-            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
-            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
-
            else if (option == "reduction_gameply") is >> reduction_gameply;

            // shuffle related
@@ -1751,7 +1647,6 @@ namespace Learner

            else if (option == "eval_save_interval") is >> eval_save_interval;
            else if (option == "loss_output_interval") is >> loss_output_interval;
-            else if (option == "mirror_percentage") is >> mirror_percentage;
            else if (option == "validation_set_file_name") is >> validation_set_file_name;

            // Rabbit convert related
@@ -1767,7 +1662,6 @@ namespace Learner
            else if (option == "src_score_max_value") is >> src_score_max_value;
            else if (option == "dest_score_min_value") is >> dest_score_min_value;
            else if (option == "dest_score_max_value") is >> dest_score_max_value;
-            else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability;
            else if (option == "seed") is >> seed;
            // Otherwise, it's a filename.
            else
@@ -1903,8 +1797,7 @@ namespace Learner
        cout << "nn_batch_size     : " << nn_batch_size << endl;
        cout << "nn_options        : " << nn_options << endl;

-        cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
-        cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
+        cout << "learning rate     : " << global_learning_rate << endl;
        cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
        cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
        cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
@@ -1917,17 +1810,10 @@ namespace Learner
            cout << "scheduling        : default" << endl;
        }

-        cout << "discount rate     : " << discount_rate << endl;
-
        // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;

-        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
-        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
-        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
-
-        cout << "mirror_percentage : " << mirror_percentage << endl;
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;

@@ -1942,13 +1828,31 @@ namespace Learner

        Threads.main()->ponder = false;

+        // About Search::Limits
+        // Be careful because this member variable is global and affects other threads.
+        {
+          auto& limits = Search::Limits;
+
+          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
+          limits.infinite = true;
+
+          // Since PV is an obstacle when displayed, erase it.
+          limits.silent = true;
+
+          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
+          limits.nodes = 0;
+
+          // depth is also processed by the one passed as an argument of Learner::search().
+          limits.depth = 0;
+        }
+
        cout << "init_training.." << endl;
-        Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
+        Eval::NNUE::InitializeTraining(seed);
        Eval::NNUE::SetBatchSize(nn_batch_size);
        Eval::NNUE::SetOptions(nn_options);
        if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
            // Save the current net to [EvalSaveDir]\original.
-            Eval::save_eval("original");
+            Eval::NNUE::save_eval("original");

            // Set the folder above to best_nn_directory so that the trainer can
            // resotre the network parameters from the original net file.
@@ -1959,7 +1863,6 @@ namespace Learner
        cout << "init done." << endl;

        // Reflect other option settings.
-        learn_think.discount_rate = discount_rate;
        learn_think.eval_limit = eval_limit;
        learn_think.save_only_once = save_only_once;
        learn_think.sr.no_shuffle = no_shuffle;
@@ -1971,7 +1874,6 @@ namespace Learner

        learn_think.eval_save_interval = eval_save_interval;
        learn_think.loss_output_interval = loss_output_interval;
-        learn_think.mirror_percentage = mirror_percentage;

        // Start a thread that loads the phase file in the background
        // (If this is not started, mse cannot be calculated.)
@@ -1992,24 +1894,6 @@ namespace Learner
        // Calculate rmse once at this point (timing of 0 sfen)
        // sr.calc_rmse();

-        // About Search::Limits
-        // Be careful because this member variable is global and affects other threads.
-        {
-          auto& limits = Search::Limits;
-
-          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-          limits.infinite = true;
-
-          // Since PV is an obstacle when displayed, erase it.
-          limits.silent = true;
-
-          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-          limits.nodes = 0;
-
-          // depth is also processed by the one passed as an argument of Learner::search().
-          limits.depth = 0;
-        }
-
        if (newbob_decay != 1.0) {
            learn_think.calc_loss(0, -1);
            learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count;