From 58863c32436c22ea05121e039850253510d923d1 Mon Sep 17 00:00:00 2001
From: noobpwnftw <noobpwnftw@users.noreply.github.com>
Date: Tue, 8 Sep 2020 11:39:21 +0800
Subject: [PATCH 01/30] Update gensfen.cpp

---
 src/learn/gensfen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 6c8c455e..4214233b 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -58,7 +58,7 @@ namespace Learner
     // If hybrid eval is enabled, training data
     // generation and training don't work well.
     // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192
-    static bool use_raw_nnue_eval = true;
+    extern bool use_raw_nnue_eval;
 
     // Helper class for exporting Sfen
     struct SfenWriter

From 832c414b0d78263595b4e7cd6d19c87e61519010 Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Mon, 7 Sep 2020 23:03:53 +0200
Subject: [PATCH 02/30] First batch of reorganization.

---
 src/learn/learner.cpp | 402 +++++++++++++++++++++++++-----------------
 src/misc.cpp          |  21 ++-
 src/misc.h            |  32 +++-
 3 files changed, 278 insertions(+), 177 deletions(-)

diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 7021fd7f..98c8e32e 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -66,7 +66,7 @@ using namespace std;
 //extern Book::BookMoveSelector book;
 
 template <typename T>
-T operator += (std::atomic<T>& x, const T rhs)
+T operator +=(std::atomic<T>& x, const T rhs)
 {
     T old = x.load(std::memory_order_consume);
     // It is allowed that the value is rewritten from other thread at this timing.
@@ -84,8 +84,9 @@ namespace Learner
     static bool use_draw_games_in_training = false;
     static bool use_draw_games_in_validation = false;
     static bool skip_duplicated_positions_in_training = true;
-    // 1.0 / PawnValueEg / 4.0 * log(10.0)
-    static double winning_probability_coefficient = 0.00276753015984861260098316280611;
+
+    static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0);
+
     // Score scale factors.  ex) If we set src_score_min_value = 0.0,
     // src_score_max_value = 1.0, dest_score_min_value = 0.0,
     // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000].
@@ -93,6 +94,7 @@ namespace Learner
     static double src_score_max_value = 1.0;
     static double dest_score_min_value = 0.0;
     static double dest_score_max_value = 1.0;
+
     // Assume teacher signals are the scores of deep searches, and convert them into winning
     // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training
     // data directly. In those cases, we set false to this variable.
@@ -102,7 +104,7 @@ namespace Learner
     // generation and training don't work well.
     // https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192
     // This CANNOT be static since it's used elsewhere.
-    bool use_raw_nnue_eval = true;
+    bool use_raw_nnue_eval = false;
 
     // Using WDL with win rate model instead of sigmoid
     static bool use_wdl = false;
@@ -111,38 +113,37 @@ namespace Learner
     // command to learn from the generated game (learn)
     // -----------------------------------
 
-    // ordinary sigmoid function
-    double sigmoid(double x)
-    {
-        return 1.0 / (1.0 + std::exp(-x));
-    }
-
     // A function that converts the evaluation value to the winning rate [0,1]
     double winning_percentage(double value)
     {
         // 1/(1+10^(-Eval/4))
         // = 1/(1+e^(-Eval/4*ln(10))
         // = sigmoid(Eval/4*ln(10))
-        return sigmoid(value * winning_probability_coefficient);
+        return Math::sigmoid(value * winning_probability_coefficient);
     }
 
     // A function that converts the evaluation value to the winning rate [0,1]
     double winning_percentage_wdl(double value, int ply)
     {
+        constexpr double wdl_total = 1000.0;
+        constexpr double draw_score = 0.5;
+
         double wdl_w = UCI::win_rate_model_double(value, ply);
         double wdl_l = UCI::win_rate_model_double(-value, ply);
-        double wdl_d = 1000.0 - wdl_w - wdl_l;
+        double wdl_d = wdl_total - wdl_w - wdl_l;
 
-        return (wdl_w + wdl_d / 2.0) / 1000.0;
+        return (wdl_w + wdl_d * draw_score) / wdl_total;
     }
 
     // A function that converts the evaluation value to the winning rate [0,1]
     double winning_percentage(double value, int ply)
     {
-        if (use_wdl) {
+        if (use_wdl) 
+        {
             return winning_percentage_wdl(value, ply);
         }
-        else {
+        else 
+        {
             return winning_percentage(value);
         }
     }
@@ -151,7 +152,7 @@ namespace Learner
     {
         double p = deep_win_rate;
         double q = winning_percentage(shallow_eval, ply);
-        return -p * std::log(q) - (1 - p) * std::log(1 - q);
+        return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q);
     }
 
     double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
@@ -164,17 +165,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-    double dsigmoid(double x)
-    {
-        // Sigmoid function
-        // f(x) = 1/(1+exp(-x))
-        // the first derivative is
-        // f'(x) = df/dx = f(x)・{ 1-f(x)}
-        // becomes
-
-        return sigmoid(x) * (1.0 - sigmoid(x));
-    }
-
     // When the objective function is the sum of squares of the difference in winning percentage
 #if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
 // function to calculate the gradient
@@ -202,7 +192,7 @@ namespace Learner
 
         double p = winning_percentage(deep);
         double q = winning_percentage(shallow);
-        return (q - p) * dsigmoid(double(shallow) / 600.0);
+        return (q - p) * Math::dsigmoid(double(shallow) / 600.0);
     }
 #endif
 
@@ -253,39 +243,75 @@ namespace Learner
     double ELMO_LAMBDA2 = 0.33;
     double ELMO_LAMBDA_LIMIT = 32000;
 
+    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
+    double get_scaled_signal(double signal)
+    {
+        double scaled_signal = signal;
+
+        // Normalize to [0.0, 1.0].
+        scaled_signal =
+            (scaled_signal - src_score_min_value)
+            / (src_score_max_value - src_score_min_value);
+
+        // Scale to [dest_score_min_value, dest_score_max_value].
+        scaled_signal =
+            scaled_signal * (dest_score_max_value - dest_score_min_value)
+            + dest_score_min_value;
+
+        return scaled_signal;
+    }
+
+    // Teacher winning probability.
+    double calculate_p(double teacher_signal, int ply)
+    {
+        const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
+
+        // Teacher winning probability.
+        double p = scaled_teacher_signal;
+        if (convert_teacher_signal_to_winning_probability) 
+        {
+            p = winning_percentage(scaled_teacher_signal);
+        }
+    }
+
+    double calculate_lambda(double teacher_signal)
+    {
+        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
+        const double lambda =
+            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
+            ? ELMO_LAMBDA2
+            : ELMO_LAMBDA;
+
+        return lambda;
+    }
+
+    double calculate_t(int game_result)
+    {
+        // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
+        // game_result = 1,0,-1 so add 1 and divide by 2.
+        const double t = double(game_result + 1) * 0.5;
+
+        return t;
+    }
+
     double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
     {
         // elmo (WCSC27) method
         // Correct with the actual game wins and losses.
-
-        // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
-        double scaled_teacher_signal = teacher_signal;
-        // Normalize to [0.0, 1.0].
-        scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value);
-        // Scale to [dest_score_min_value, dest_score_max_value].
-        scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
-
         const double q = winning_percentage(shallow, psv.gamePly);
-        // Teacher winning probability.
-        double p = scaled_teacher_signal;
-        if (convert_teacher_signal_to_winning_probability) {
-            p = winning_percentage(scaled_teacher_signal, psv.gamePly);
-        }
-
-        // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
-        // game_result = 1,0,-1 so add 1 and divide by 2.
-        const double t = double(psv.game_result + 1) / 2;
-
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-        const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
+        const double p = calculate_p(teacher_signal, psv.gamePly);
+        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);
 
         double grad;
-        if (use_wdl) {
-            double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
-            double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
+        if (use_wdl) 
+        {
+            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
+            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
             grad = lambda * dce_p + (1.0 - lambda) * dce_t;
         }
-        else {
+        else 
+        {
             // Use the actual win rate as a correction term.
             // This is the idea of ​​elmo (WCSC27), modern O-parts.
             grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
@@ -296,30 +322,25 @@ namespace Learner
 
     // Calculate cross entropy during learning
     // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win.
-    void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv,
-        double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
-        double& entropy_eval, double& entropy_win, double& entropy)
+    void calc_cross_entropy(
+        Value teacher_signal, 
+        Value shallow, 
+        const PackedSfenValue& psv,
+        double& cross_entropy_eval, 
+        double& cross_entropy_win, 
+        double& cross_entropy,
+        double& entropy_eval, 
+        double& entropy_win, 
+        double& entropy)
     {
-        // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
-        double scaled_teacher_signal = teacher_signal;
-        // Normalize to [0.0, 1.0].
-        scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value);
-        // Scale to [dest_score_min_value, dest_score_max_value].
-        scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
-
         // Teacher winning probability.
-        double p = scaled_teacher_signal;
-        if (convert_teacher_signal_to_winning_probability) {
-            p = winning_percentage(scaled_teacher_signal);
-        }
-        const double q /* eval_winrate    */ = winning_percentage(shallow);
-        const double t = double(psv.game_result + 1) / 2;
+        const double q = winning_percentage(shallow, psv.gamePly);
+        const double p = calculate_p(teacher_signal, psv.gamePly);
+        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);
 
         constexpr double epsilon = 0.000001;
 
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-        const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
-
         const double m = (1.0 - lambda) * t + lambda * p;
 
         cross_entropy_eval =
@@ -343,7 +364,8 @@ namespace Learner
     // Other variations may be prepared as the objective function..
 
 
-    double calc_grad(Value shallow, const PackedSfenValue& psv) {
+    double calc_grad(Value shallow, const PackedSfenValue& psv) 
+    {
         return calc_grad((Value)psv.score, shallow, psv);
     }
 
@@ -363,8 +385,14 @@ namespace Learner
         // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
         static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE;
 
+        // hash to limit the reading of the same situation
+        // Is there too many 64 million phases? Or Not really..
+        // It must be 2**N because it will be used as the mask to calculate hash_index.
+        static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
+
         // Do not use std::random_device().  Because it always the same integers on MinGW.
-        SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count())
+        SfenReader(int thread_num) : 
+            prng(std::chrono::system_clock::now().time_since_epoch().count())
         {
             packed_sfens.resize(thread_num);
             total_read = 0;
@@ -398,6 +426,7 @@ namespace Learner
                     cout << "Error! read packed sfen , failed." << endl;
                     break;
                 }
+
                 sfen_for_mse.push_back(ps);
 
                 // Get the hash key.
@@ -418,8 +447,10 @@ namespace Learner
                 {
                     if (eval_limit < abs(p.score))
                         continue;
+
                     if (!use_draw_games_in_validation && p.game_result == 0)
                         continue;
+
                     sfen_for_mse.push_back(p);
                 }
                 else
@@ -436,7 +467,7 @@ namespace Learner
             auto& thread_ps = packed_sfens[thread_id];
 
             // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish.
-            if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it.
+            if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it.
                 && !read_to_thread_buffer_impl(thread_id))
                 return false;
 
@@ -444,11 +475,11 @@ namespace Learner
             // Since the filling of the thread buffer with the phase has been completed successfully
             // thread_ps->rbegin() is alive.
 
-            ps = *(thread_ps->rbegin());
+            ps = thread_ps->back();
             thread_ps->pop_back();
 
             // If you've run out of buffers, call delete yourself to free this buffer.
-            if (thread_ps->size() == 0)
+            if (thread_ps->empty())
             {
                 thread_ps.reset();
             }
@@ -507,7 +538,7 @@ namespace Learner
                     return false;
 
                 // Get the next file name.
-                string filename = *filenames.rbegin();
+                string filename = filenames.back();
                 filenames.pop_back();
 
                 fs.open(filename, ios::in | ios::binary);
@@ -523,6 +554,7 @@ namespace Learner
                 // This size() is read only, so you don't need to lock it.
                 while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE)
                     sleep(100);
+
                 if (stop_flag)
                     return;
 
@@ -555,9 +587,7 @@ namespace Learner
 
                 if (!no_shuffle)
                 {
-                    auto size = sfens.size();
-                    for (size_t i = 0; i < size; ++i)
-                        swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]);
+                    Algo::shuffle(sfens, prng);
                 }
 
                 // Divide this by THREAD_BUFFER_SIZE. There should be size pieces.
@@ -591,6 +621,13 @@ namespace Learner
             }
         }
 
+        // Determine if it is a phase for calculating rmse.
+        // (The computational aspects of rmse should not be used for learning.)
+        bool is_for_rmse(Key key) const
+        {
+            return sfen_for_mse_hash.count(key) != 0;
+        }
+
         // sfen files
         vector<string> filenames;
 
@@ -613,17 +650,6 @@ namespace Learner
 
         bool stop_flag;
 
-        // Determine if it is a phase for calculating rmse.
-        // (The computational aspects of rmse should not be used for learning.)
-        bool is_for_rmse(Key key) const
-        {
-            return sfen_for_mse_hash.count(key) != 0;
-        }
-
-        // hash to limit the reading of the same situation
-        // Is there too many 64 million phases? Or Not really..
-        // It must be 2**N because it will be used as the mask to calculate hash_index.
-        static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
         vector<Key> hash; // 64MB*8 = 512MB
 
         // test phase for mse calculation
@@ -663,7 +689,10 @@ namespace Learner
     // Class to generate sfen with multiple threads
     struct LearnerThink : public MultiThink
     {
-        LearnerThink(SfenReader& sr_) :sr(sr_), stop_flag(false), save_only_once(false)
+        LearnerThink(SfenReader& sr_) : 
+            sr(sr_), 
+            stop_flag(false), 
+            save_only_once(false)
         {
 #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
             learn_sum_cross_entropy_eval = 0.0;
@@ -686,7 +715,12 @@ namespace Learner
         virtual void thread_worker(size_t thread_id);
 
         // Start a thread that loads the phase file in the background.
-        void start_file_read_worker() { sr.start_file_read_worker(); }
+        void start_file_read_worker() 
+        { 
+            sr.start_file_read_worker(); 
+        }
+
+        Value get_shallow_value(Position& task_pos);
 
         // save merit function parameters to a file
         bool save(bool is_final = false);
@@ -753,6 +787,33 @@ namespace Learner
         TaskDispatcher task_dispatcher;
     };
 
+    Value LearnerThink::get_shallow_value(Position& task_pos)
+    {
+        // Evaluation value for shallow search
+        // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
+        // Use qsearch() because it is difficult to compare the values.
+        // EvalHash has been disabled in advance. (If not, the same value will be returned every time)
+        const auto [_, pv] = qsearch(task_pos);
+
+        std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
+        for (size_t i = 0; i < pv.size(); ++i)
+        {
+            task_pos.do_move(pv[i], states[i]);
+            Eval::NNUE::update_eval(task_pos);
+        }
+
+        const auto rootColor = task_pos.side_to_move();
+        const Value shallow_value =
+            (rootColor == task_pos.side_to_move())
+            ? Eval::evaluate(task_pos)
+            : -Eval::evaluate(task_pos);
+
+        for (auto it = pv.rbegin(); it != pv.rend(); ++it)
+            task_pos.undo_move(*it);
+
+        return shallow_value;
+    }
+
     void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
     {
         // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated.
@@ -800,8 +861,6 @@ namespace Learner
         pos.set(StartFEN, false, &si, th);
         std::cout << "hirate eval = " << Eval::evaluate(pos);
 
-        //Eval::print_eval_stat(pos);
-
         // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished.
         // I created a mechanism to call task, so I will use it.
 
@@ -818,6 +877,7 @@ namespace Learner
             // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one.
             auto task =
                 [
+                    this,
                     &ps,
                     &test_sum_cross_entropy_eval,
                     &test_sum_cross_entropy_win,
@@ -830,7 +890,6 @@ namespace Learner
                     &move_accord_count
                 ](size_t task_thread_id)
             {
-                // Does C++ properly capture a new ps instance for each loop?.
                 auto task_th = Threads[task_thread_id];
                 auto& task_pos = task_th->rootPos;
                 StateInfo task_si;
@@ -840,26 +899,7 @@ namespace Learner
                     cout << "Error! : illegal packed sfen " << task_pos.fen() << endl;
                 }
 
-                // Evaluation value for shallow search
-                // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
-                // Use qsearch() because it is difficult to compare the values.
-                // EvalHash has been disabled in advance. (If not, the same value will be returned every time)
-                auto task_search_result = qsearch(task_pos);
-
-                auto shallow_value = task_search_result.first;
-                {
-                    const auto rootColor = task_pos.side_to_move();
-                    const auto pv = task_search_result.second;
-                    std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
-                    for (size_t i = 0; i < pv.size(); ++i)
-                    {
-                        task_pos.do_move(pv[i], states[i]);
-                        Eval::NNUE::update_eval(task_pos);
-                    }
-                    shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos);
-                    for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-                        task_pos.undo_move(*it);
-                }
+                const Value shallow_value = get_shallow_value(task_pos);
 
                 // Evaluation value of deep search
                 auto deep_value = (Value)ps.score;
@@ -887,7 +927,17 @@ namespace Learner
 #if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                 double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
                 double test_entropy_eval, test_entropy_win, test_entropy;
-                calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy);
+                calc_cross_entropy(
+                    deep_value, 
+                    shallow_value, 
+                    ps, 
+                    test_cross_entropy_eval, 
+                    test_cross_entropy_win, 
+                    test_cross_entropy, 
+                    test_entropy_eval, 
+                    test_entropy_win, 
+                    test_entropy);
+
                 // The total cross entropy need not be abs() by definition.
                 test_sum_cross_entropy_eval += test_cross_entropy_eval;
                 test_sum_cross_entropy_win += test_cross_entropy_win;
@@ -900,8 +950,8 @@ namespace Learner
 
                 // Determine if the teacher's move and the score of the shallow search match
                 {
-                    auto r = search(task_pos, 1);
-                    if ((uint16_t)r.second[0] == ps.move)
+                    const auto [value, pv] = search(task_pos, 1);
+                    if ((uint16_t)pv[0] == ps.move)
                         move_accord_count.fetch_add(1, std::memory_order_relaxed);
                 }
 
@@ -950,6 +1000,7 @@ namespace Learner
                 << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                 << " , norm = " << sum_norm
                 << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
+
             if (done != static_cast<uint64_t>(-1))
             {
                 cout
@@ -962,7 +1013,8 @@ namespace Learner
             }
             cout << endl;
         }
-        else {
+        else 
+        {
             cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " ,  done = " << done << endl;
         }
 
@@ -978,7 +1030,6 @@ namespace Learner
 #endif
     }
 
-
     void LearnerThink::thread_worker(size_t thread_id)
     {
 #if defined(_OPENMP)
@@ -1092,7 +1143,9 @@ namespace Learner
             }
 
             PackedSfenValue ps;
-        RetryRead:;
+
+        RETRY_READ:;
+
             if (!sr.read_to_thread_buffer(thread_id, ps))
             {
                 // ran out of thread pool for my thread.
@@ -1106,16 +1159,14 @@ namespace Learner
             // The evaluation value exceeds the learning target value.
             // Ignore this aspect information.
             if (eval_limit < abs(ps.score))
-                goto RetryRead;
-
+                goto RETRY_READ;
 
             if (!use_draw_games_in_training && ps.game_result == 0)
-                goto RetryRead;
-
+                goto RETRY_READ;
 
             // Skip over the opening phase
             if (ps.gamePly < prng.rand(reduction_gameply))
-                goto RetryRead;
+                goto RETRY_READ;
 
 #if 0
             auto sfen = pos.sfen_unpack(ps.data);
@@ -1129,20 +1180,24 @@ namespace Learner
                 // I got a strange sfen. Should be debugged!
                 // Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not.
                 cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
-                goto RetryRead;
+                goto RETRY_READ;
             }
+
 #if !defined(EVAL_NNUE)
+            if (skip_duplicated_positions_in_training)
             {
-                auto key = pos.key();
+                const auto key = pos.key();
+
                 // Exclude the phase used for rmse calculation.
-                if (sr.is_for_rmse(key) && skip_duplicated_positions_in_training)
-                    goto RetryRead;
+                if (sr.is_for_rmse(key))
+                    goto RETRY_READ;
 
                 // Exclude the most recently used aspect.
-                auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
-                auto key2 = sr.hash[hash_index];
-                if (key == key2 && skip_duplicated_positions_in_training)
-                    goto RetryRead;
+                const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
+                const auto key2 = sr.hash[hash_index];
+                if (key == key2)
+                    goto RETRY_READ;
+
                 sr.hash[hash_index] = key; // Replace with the current key.
             }
 #endif
@@ -1152,22 +1207,21 @@ namespace Learner
             // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
         // Skip the position if there are no legal moves (=checkmated or stalemate).
             if (MoveList<LEGAL>(pos).size() == 0)
-                goto RetryRead;
+                goto RETRY_READ;
 
             // I can read it, so try displaying it.
             //      cout << pos << value << endl;
 
             // Evaluation value of shallow search (qsearch)
-            auto r = qsearch(pos);
-            auto pv = r.second;
+            const auto [shallow_value, pv] = qsearch(pos);
 
             // Evaluation value of deep search
-            auto deep_value = (Value)ps.score;
+            const auto deep_value = (Value)ps.score;
 
             // I feel that the mini batch has a better gradient.
             // Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation.
 
-            auto rootColor = pos.side_to_move();
+            const auto rootColor = pos.side_to_move();
 
             // If the initial PV is different, it is better not to use it for learning.
             // If it is the result of searching a completely different place, it may become noise.
@@ -1203,13 +1257,26 @@ namespace Learner
                 // I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
                 // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...
 
-                Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
+                const Value shallow_value = 
+                    (rootColor == pos.side_to_move()) 
+                    ? Eval::evaluate(pos) 
+                    : -Eval::evaluate(pos);
 
 #if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                 // Calculate loss for training data
                 double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
                 double learn_entropy_eval, learn_entropy_win, learn_entropy;
-                calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy);
+                calc_cross_entropy(
+                    deep_value, 
+                    shallow_value, 
+                    ps, 
+                    learn_cross_entropy_eval, 
+                    learn_cross_entropy_win, 
+                    learn_cross_entropy, 
+                    learn_entropy_eval, 
+                    learn_entropy_win, 
+                    learn_entropy);
+
                 learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                 learn_sum_cross_entropy_win += learn_cross_entropy_win;
                 learn_sum_cross_entropy += learn_cross_entropy;
@@ -1266,7 +1333,8 @@ namespace Learner
                 Eval::NNUE::update_eval(pos);
             }
 
-            if (illegal_move) {
+            if (illegal_move) 
+            {
                 sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
                 continue;
             }
@@ -1284,7 +1352,6 @@ namespace Learner
             dj_dw = calc_grad(deep_value, shallow_value, ps);
             Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
 #endif
-
         }
 
     }
@@ -1301,14 +1368,17 @@ namespace Learner
             // Do not dig a subfolder because I want to save it only once.
             Eval::save_eval("");
         }
-        else if (is_final) {
+        else if (is_final) 
+        {
             Eval::save_eval("final");
             return true;
         }
-        else {
+        else 
+        {
             static int dir_number = 0;
             const std::string dir_name = std::to_string(dir_number++);
             Eval::save_eval(dir_name);
+
 #if defined(EVAL_NNUE)
             if (newbob_decay != 1.0 && latest_loss_count > 0) {
                 static int trials = newbob_num_trials;
@@ -1316,22 +1386,28 @@ namespace Learner
                 latest_loss_sum = 0.0;
                 latest_loss_count = 0;
                 cout << "loss: " << latest_loss;
-                if (latest_loss < best_loss) {
+                if (latest_loss < best_loss) 
+                {
                     cout << " < best (" << best_loss << "), accepted" << endl;
                     best_loss = latest_loss;
                     best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
                     trials = newbob_num_trials;
                 }
-                else {
+                else 
+                {
                     cout << " >= best (" << best_loss << "), rejected" << endl;
-                    if (best_nn_directory.empty()) {
+                    if (best_nn_directory.empty()) 
+                    {
                         cout << "WARNING: no improvement from initial model" << endl;
                     }
-                    else {
+                    else 
+                    {
                         cout << "restoring parameters from " << best_nn_directory << endl;
                         Eval::NNUE::RestoreParameters(best_nn_directory);
                     }
-                    if (--trials > 0 && !is_final) {
+
+                    if (--trials > 0 && !is_final) 
+                    {
                         cout << "reducing learning rate scale from " << newbob_scale
                             << " to " << (newbob_scale * newbob_decay)
                             << " (" << trials << " more trials)" << endl;
@@ -1339,7 +1415,9 @@ namespace Learner
                         Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
                     }
                 }
-                if (trials == 0) {
+                
+                if (trials == 0) 
+                {
                     cout << "converged" << endl;
                     return true;
                 }
@@ -1371,10 +1449,11 @@ namespace Learner
             // Output progress every 10M phase or when all writing is completed
             if (((write_sfen_count % buffer_size) == 0) ||
                 (write_sfen_count == total_sfen_count))
+            {
                 cout << write_sfen_count << " / " << total_sfen_count << endl;
+            }
         };
 
-
         cout << endl << "write : " << output_file_name << endl;
 
         fstream fs(output_file_name, ios::out | ios::binary);
@@ -1453,9 +1532,7 @@ namespace Learner
 
         auto write_buffer = [&](uint64_t size)
         {
-            // shuffle from buf[0] to buf[size-1]
-            for (uint64_t i = 0; i < size; ++i)
-                swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
+            Algo::shuffle(buf, prng);
 
             // write to a file
             fstream fs;
@@ -1533,13 +1610,8 @@ namespace Learner
             auto& fs = afs[i];
 
             fs.open(filename, ios::in | ios::binary);
-            fs.seekg(0, fstream::end);
-            uint64_t eofPos = (uint64_t)fs.tellg();
-            fs.clear(); // Otherwise, the next seek may fail.
-            fs.seekg(0, fstream::beg);
-            uint64_t begPos = (uint64_t)fs.tellg();
-            uint64_t file_size = eofPos - begPos;
-            uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
+            const uint64_t file_size = get_file_size(fs);
+            const uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
             a_count[i] = sfen_count;
 
             // Output the number of sfen stored in each file.
@@ -1578,8 +1650,8 @@ namespace Learner
         PRNG prng(std::chrono::system_clock::now().time_since_epoch().count());
         uint64_t size = (uint64_t)buf.size();
         std::cout << "shuffle buf.size() = " << size << std::endl;
-        for (uint64_t i = 0; i < size; ++i)
-            swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
+
+        Algo::shuffle(buf, prng);
 
         std::cout << "write : " << output_file_name << endl;
 
diff --git a/src/misc.cpp b/src/misc.cpp
index a23b1205..5ef5ecdc 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -627,18 +627,27 @@ void* aligned_malloc(size_t size, size_t align)
     return p;
 }
 
+std::uint64_t get_file_size(std::fstream& fs)
+{
+    auto pos = fs.tellg();
+
+    fs.seekg(0, fstream::end);
+    const uint64_t eofPos = (uint64_t)fs.tellg();
+    fs.clear(); // Otherwise, the next seek may fail.
+    fs.seekg(0, fstream::beg);
+    const uint64_t begPos = (uint64_t)fs.tellg();
+    fs.seekg(pos);
+
+    return eofPos - begPos;
+}
+
 int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
 {
     fstream fs(filename, ios::in | ios::binary);
     if (fs.fail())
         return 1;
 
-    fs.seekg(0, fstream::end);
-    uint64_t eofPos = (uint64_t)fs.tellg();
-    fs.clear(); // Otherwise the next seek may fail.
-    fs.seekg(0, fstream::beg);
-    uint64_t begPos = (uint64_t)fs.tellg();
-    uint64_t file_size = eofPos - begPos;
+    const uint64_t file_size = get_file_size(fs);
     //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
 
     // I know the file size, so call callback_func to get a buffer for this,
diff --git a/src/misc.h b/src/misc.h
index c918a351..5add3b36 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -26,6 +26,8 @@
 #include <ostream>
 #include <string>
 #include <vector>
+#include <utility>
+#include <cmath>
 
 #include "types.h"
 
@@ -155,6 +157,7 @@ std::string now_string();
 // Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
 // Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
 
+std::uint64_t get_file_size(std::fstream& fs);
 int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
 int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
 
@@ -199,20 +202,37 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
 
 // Mathematical function used for progress calculation and learning
 namespace Math {
-	// Sigmoid function
-	// = 1.0 / (1.0 + std::exp(-x))
-	double sigmoid(double x);
+    inline double sigmoid(double x)
+    {
+        return 1.0 / (1.0 + std::exp(-x));
+    }
 
-	// Differentiation of sigmoid function
-	// = sigmoid(x) * (1.0-sigmoid(x))
-	double dsigmoid(double x);
+    inline double dsigmoid(double x)
+    {
+        // Sigmoid function
+        // f(x) = 1/(1+exp(-x))
+        // the first derivative is
+        // f'(x) = df/dx = f(x)・{ 1-f(x)}
+        // becomes
+
+        return sigmoid(x) * (1.0 - sigmoid(x));
+    }
 
 	// Clip v so that it fits between [lo,hi].
 	// * In Stockfish, this function is written in bitboard.h.
 	template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
 		return v < lo ? lo : v > hi ? hi : v;
 	}
+}
 
+namespace Algo {
+    template <typename Rng, typename T>
+    void shuffle(std::vector<T>& buf, Rng&& prng)
+    {
+        const auto size = buf.size();
+        for (uint64_t i = 0; i < size; ++i)
+            std::swap(buf[i], buf[prng.rand(size - i) + i]);
+    }
 }
 
 // --------------------

From 1482e5215afa1b457418d45805bb57a25f4529f4 Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Mon, 7 Sep 2020 23:26:38 +0200
Subject: [PATCH 03/30] A second batch of code reorganization.

---
 src/Makefile              |   1 -
 src/learn/convert.cpp     |  10 +--
 src/learn/gensfen.cpp     |   8 +-
 src/learn/gensfen2019.cpp |   1 -
 src/learn/learn.h         |  56 ++++++-------
 src/learn/learner.cpp     | 170 +++++++++++++++-----------------------
 6 files changed, 96 insertions(+), 150 deletions(-)
 delete mode 100644 src/learn/gensfen2019.cpp

diff --git a/src/Makefile b/src/Makefile
index 9db13e44..ca851dba 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -56,7 +56,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	nnue/features/enpassant.cpp \
 	nnue/nnue_test_command.cpp \
 	extra/sfen_packer.cpp \
-	learn/gensfen2019.cpp \
 	learn/learner.cpp \
 	learn/gensfen.cpp \
 	learn/convert.cpp \
diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp
index b84dc2f8..9bd9548d 100644
--- a/src/learn/convert.cpp
+++ b/src/learn/convert.cpp
@@ -25,20 +25,12 @@
 #include <chrono>
 #include <random>
 #include <regex>
+#include <filesystem>
 
 #if defined (_OPENMP)
 #include <omp.h>
 #endif
 
-#if defined(_MSC_VER)
-// The C++ filesystem cannot be used unless it is C++17 or later or MSVC.
-// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well.
-// Use dirent.h because there is no help for it.
-#include <filesystem>
-#elif defined(__GNUC__)
-#include <dirent.h>
-#endif
-
 using namespace std;
 
 namespace Learner
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 4214233b..b049192e 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -28,18 +28,12 @@
 #include <memory>
 #include <limits>
 #include <optional>
+#include <filesystem>
 
 #if defined (_OPENMP)
 #include <omp.h>
 #endif
 
-#if defined(_MSC_VER)
-// std::filesystem doesn't work on GCC even though it claims to support C++17.
-#include <filesystem>
-#elif defined(__GNUC__)
-#include <dirent.h>
-#endif
-
 #if defined(EVAL_NNUE)
 #include "../nnue/evaluate_nnue_learner.h"
 #include <climits>
diff --git a/src/learn/gensfen2019.cpp b/src/learn/gensfen2019.cpp
deleted file mode 100644
index 01293b9c..00000000
--- a/src/learn/gensfen2019.cpp
+++ /dev/null
@@ -1 +0,0 @@
-// just a place holder
diff --git a/src/learn/learn.h b/src/learn/learn.h
index e29ed74a..1bc39cf9 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -27,30 +27,6 @@
 // SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
 // #define SGD_UPDATE
 
-// ----------------------
-// Settings for learning
-// ----------------------
-
-// mini-batch size.
-// Calculate the gradient by combining this number of phases.
-// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
-// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
-// I don't think you need to change this value in most cases.
-
-#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
-
-// The number of phases to read from the file at one time. After reading this much, shuffle.
-// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
-// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
-
-#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
-
-// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
-// Needless to say, the longer the saving interval, the shorter the learning time.
-// Folder name is incremented for each save like 0/, 1/, 2/...
-// By default, once every 1 billion phases.
-#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
-
 
 // ----------------------
 // Select the objective function
@@ -79,10 +55,6 @@
 // debug settings for learning
 // ----------------------
 
-// Reduce the output of rmse during learning to 1 for this number of times.
-// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
-#define LEARN_RMSE_OUTPUT_INTERVAL 1
-
 
 // ----------------------
 // learning from zero vector
@@ -205,6 +177,34 @@ typedef float LearnFloatType;
 
 namespace Learner
 {
+	// ----------------------
+	// Settings for learning
+	// ----------------------
+
+	// mini-batch size.
+	// Calculate the gradient by combining this number of phases.
+	// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
+	// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
+	// I don't think you need to change this value in most cases.
+
+	constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1;
+
+	// The number of phases to read from the file at one time. After reading this much, shuffle.
+	// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
+	// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
+
+	constexpr std::size_t LEARN_SFEN_READ_SIZE = 1000 * 1000 * 10;
+
+	// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
+	// Needless to say, the longer the saving interval, the shorter the learning time.
+	// Folder name is incremented for each save like 0/, 1/, 2/...
+	// By default, once every 1 billion phases.
+	constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 1000000000ULL;
+
+	// Reduce the output of rmse during learning to 1 for this number of times.
+	// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
+	constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;
+
 	//Structure in which PackedSfen and evaluation value are integrated
 	// If you write different contents for each option, it will be a problem when reusing the teacher game
 	// For the time being, write all the following members regardless of the options.
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 98c8e32e..ddfaff5a 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -45,15 +45,6 @@
 #include <omp.h>
 #endif
 
-#if defined(_MSC_VER)
-// The C++ filesystem cannot be used unless it is C++17 or later or MSVC.
-// I tried to use windows.h, but with g++ of msys2 I can not get the files in the folder well.
-// Use dirent.h because there is no help for it.
-#include <filesystem>
-#elif defined(__GNUC__)
-#include <dirent.h>
-#endif
-
 #if defined(EVAL_NNUE)
 #include "../nnue/evaluate_nnue_learner.h"
 #include <climits>
@@ -62,8 +53,11 @@
 
 using namespace std;
 
-//// This is defined in the search section.
-//extern Book::BookMoveSelector book;
+
+#if defined(USE_BOOK)
+// This is defined in the search section.
+extern Book::BookMoveSelector book;
+#endif
 
 template <typename T>
 T operator +=(std::atomic<T>& x, const T rhs)
@@ -128,9 +122,9 @@ namespace Learner
         constexpr double wdl_total = 1000.0;
         constexpr double draw_score = 0.5;
 
-        double wdl_w = UCI::win_rate_model_double(value, ply);
-        double wdl_l = UCI::win_rate_model_double(-value, ply);
-        double wdl_d = wdl_total - wdl_w - wdl_l;
+        const double wdl_w = UCI::win_rate_model_double(value, ply);
+        const double wdl_l = UCI::win_rate_model_double(-value, ply);
+        const double wdl_d = wdl_total - wdl_w - wdl_l;
 
         return (wdl_w + wdl_d * draw_score) / wdl_total;
     }
@@ -150,16 +144,17 @@ namespace Learner
 
     double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
     {
-        double p = deep_win_rate;
-        double q = winning_percentage(shallow_eval, ply);
+        const double p = deep_win_rate;
+        const double q = winning_percentage(shallow_eval, ply);
         return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q);
     }
 
     double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
     {
         constexpr double epsilon = 0.000001;
-        double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply);
-        double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply);
+
+        const double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply);
+        const double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply);
 
         // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
@@ -190,8 +185,8 @@ namespace Learner
         // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
         // Therefore, it is not necessary to save it in memory.
 
-        double p = winning_percentage(deep);
-        double q = winning_percentage(shallow);
+        const double p = winning_percentage(deep, psv.gamePly);
+        const double q = winning_percentage(shallow, psv.gamePly);
         return (q - p) * Math::dsigmoid(double(shallow) / 600.0);
     }
 #endif
@@ -216,8 +211,8 @@ namespace Learner
         // = ...
         // = q-p.
 
-        double p = winning_percentage(deep);
-        double q = winning_percentage(shallow);
+        const double p = winning_percentage(deep, psv.gamePly);
+        const double q = winning_percentage(shallow, psv.gamePly);
 
         return q - p;
     }
@@ -270,8 +265,10 @@ namespace Learner
         double p = scaled_teacher_signal;
         if (convert_teacher_signal_to_winning_probability) 
         {
-            p = winning_percentage(scaled_teacher_signal);
+            p = winning_percentage(scaled_teacher_signal, ply);
         }
+
+        return p;
     }
 
     double calculate_lambda(double teacher_signal)
@@ -534,7 +531,7 @@ namespace Learner
                     fs.close();
 
                 // no more
-                if (filenames.size() == 0)
+                if (filenames.empty())
                     return false;
 
                 // Get the next file name.
@@ -543,6 +540,7 @@ namespace Learner
 
                 fs.open(filename, ios::in | ios::binary);
                 cout << "open filename = " << filename << endl;
+
                 assert(fs);
 
                 return true;
@@ -569,16 +567,12 @@ namespace Learner
                     {
                         sfens.push_back(p);
                     }
-                    else
+                    else if(!open_next_file())
                     {
-                        // read failure
-                        if (!open_next_file())
-                        {
-                            // There was no next file. Abon.
-                            cout << "..end of files." << endl;
-                            end_of_files = true;
-                            return;
-                        }
+                        // There was no next file. Abon.
+                        cout << "..end of files." << endl;
+                        end_of_files = true;
+                        return;
                     }
                 }
 
@@ -702,6 +696,7 @@ namespace Learner
             learn_sum_entropy_win = 0.0;
             learn_sum_entropy = 0.0;
 #endif
+
 #if defined(EVAL_NNUE)
             newbob_scale = 1.0;
             newbob_decay = 1.0;
@@ -1213,7 +1208,7 @@ namespace Learner
             //      cout << pos << value << endl;
 
             // Evaluation value of shallow search (qsearch)
-            const auto [shallow_value, pv] = qsearch(pos);
+            const auto [_, pv] = qsearch(pos);
 
             // Evaluation value of deep search
             const auto deep_value = (Value)ps.score;
@@ -1408,9 +1403,11 @@ namespace Learner
 
                     if (--trials > 0 && !is_final) 
                     {
-                        cout << "reducing learning rate scale from " << newbob_scale
+                        cout
+                            << "reducing learning rate scale from " << newbob_scale
                             << " to " << (newbob_scale * newbob_decay)
                             << " (" << trials << " more trials)" << endl;
+
                         newbob_scale *= newbob_decay;
                         Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
                     }
@@ -1432,10 +1429,10 @@ namespace Learner
     // prng: random number
     // afs: fstream of each teacher phase file
     // a_count: The number of teacher positions inherent in each file.
-    void shuffle_write(const string& output_file_name, PRNG& prng, vector<fstream>& afs, vector<uint64_t>& a_count)
+    void shuffle_write(const string& output_file_name, PRNG& prng, vector<fstream>& sfen_file_streams, vector<uint64_t>& sfen_count_in_file)
     {
         uint64_t total_sfen_count = 0;
-        for (auto c : a_count)
+        for (auto c : sfen_count_in_file)
             total_sfen_count += c;
 
         // number of exported phases
@@ -1459,39 +1456,39 @@ namespace Learner
         fstream fs(output_file_name, ios::out | ios::binary);
 
         // total teacher positions
-        uint64_t sum = 0;
-        for (auto c : a_count)
-            sum += c;
+        uint64_t sfen_count_left = total_sfen_count;
 
-        while (sum != 0)
+        while (sfen_count_left != 0)
         {
-            auto r = prng.rand(sum);
+            auto r = prng.rand(sfen_count_left);
 
             // Aspects stored in fs[0] file ... Aspects stored in fs[1] file ...
             //Think of it as a series like, and determine in which file r is pointing.
             // The contents of the file are shuffled, so you can take the next element from that file.
             // Each file has a_count[x] phases, so this process can be written as follows.
 
-            uint64_t n = 0;
-            while (a_count[n] <= r)
-                r -= a_count[n++];
+            uint64_t i = 0;
+            while (sfen_count_in_file[i] <= r)
+                r -= sfen_count_in_file[i++];
 
             // This confirms n. Before you forget it, reduce the remaining number.
 
-            --a_count[n];
-            --sum;
+            --sfen_count_in_file[i];
+            --sfen_count_left;
 
             PackedSfenValue psv;
             // It's better to read and write all at once until the performance is not so good...
-            if (afs[n].read((char*)&psv, sizeof(PackedSfenValue)))
+            if (sfen_file_streams[i].read((char*)&psv, sizeof(PackedSfenValue)))
             {
                 fs.write((char*)&psv, sizeof(PackedSfenValue));
                 ++write_sfen_count;
                 print_status();
             }
         }
+
         print_status();
         fs.close();
+
         cout << "done!" << endl;
     }
 
@@ -1509,8 +1506,8 @@ namespace Learner
         // There should have been a limit of 512 per process on Windows, so you can open here as 500,
         // The current setting is 500 files x 20M = 10G = 10 billion phases.
 
-        PSVector buf;
-        buf.resize(buffer_size);
+        PSVector buf(buffer_size);
+
         // ↑ buffer, a marker that indicates how much you have used
         uint64_t buf_write_marker = 0;
 
@@ -1537,7 +1534,7 @@ namespace Learner
             // write to a file
             fstream fs;
             fs.open(make_filename(write_file_count++), ios::out | ios::binary);
-            fs.write((char*)&buf[0], size * sizeof(PackedSfenValue));
+            fs.write(reinterpret_cast<char*>(buf.data()), size * sizeof(PackedSfenValue));
             fs.close();
             a_count.push_back(size);
 
@@ -1552,14 +1549,13 @@ namespace Learner
         {
             fstream fs(filename, ios::in | ios::binary);
             cout << endl << "open file = " << filename;
-            while (fs.read((char*)&buf[buf_write_marker], sizeof(PackedSfenValue)))
+            while (fs.read(reinterpret_cast<char*>(&buf[buf_write_marker]), sizeof(PackedSfenValue)))
                 if (++buf_write_marker == buffer_size)
                     write_buffer(buffer_size);
 
             // Read in units of sizeof(PackedSfenValue),
             // Ignore the last remaining fraction. (Fails in fs.read, so exit while)
             // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.)
-
         }
 
         if (buf_write_marker != 0)
@@ -1599,20 +1595,20 @@ namespace Learner
         size_t file_count = filenames.size();
 
         // Number of teacher positions stored in each file in filenames
-        vector<uint64_t> a_count(file_count);
+        vector<uint64_t> sfen_count_in_file(file_count);
 
         // Count the number of teacher aspects in each file.
-        vector<fstream> afs(file_count);
+        vector<fstream> sfen_file_streams(file_count);
 
         for (size_t i = 0; i < file_count; ++i)
         {
             auto filename = filenames[i];
-            auto& fs = afs[i];
+            auto& fs = sfen_file_streams[i];
 
             fs.open(filename, ios::in | ios::binary);
             const uint64_t file_size = get_file_size(fs);
             const uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
-            a_count[i] = sfen_count;
+            sfen_count_in_file[i] = sfen_count;
 
             // Output the number of sfen stored in each file.
             cout << filename << " = " << sfen_count << " sfens." << endl;
@@ -1624,7 +1620,7 @@ namespace Learner
         // Now you have shuffled.
 
         // Throw to the subcontract function and end.
-        shuffle_write(output_file_name, prng, afs, a_count);
+        shuffle_write(output_file_name, prng, sfen_file_streams, sfen_count_in_file);
     }
 
     // Subcontracting the teacher shuffle "learn shufflem" command.
@@ -1656,7 +1652,10 @@ namespace Learner
         std::cout << "write : " << output_file_name << endl;
 
         // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper.
-        write_memory_to_file(output_file_name, (void*)&buf[0], (uint64_t)sizeof(PackedSfenValue) * (uint64_t)buf.size());
+        write_memory_to_file(
+            output_file_name, 
+            (void*)&buf[0], 
+            sizeof(PackedSfenValue) * buf.size());
 
         std::cout << "..shuffle_on_memory done." << std::endl;
     }
@@ -1664,7 +1663,7 @@ namespace Learner
     // Learning from the generated game record
     void learn(Position&, istringstream& is)
     {
-        auto thread_num = (int)Options["Threads"];
+        const auto thread_num = (int)Options["Threads"];
         SfenReader sr(thread_num);
 
         LearnerThink learn_think(sr);
@@ -1889,13 +1888,6 @@ namespace Learner
         {
             string kif_base_dir = Path::Combine(base_dir, target_dir);
 
-            // Remove this folder. Keep it relative to base_dir.
-#if defined(_MSC_VER)
-        // If you use std::tr2, warning C4996 will appear, so suppress it.
-        // * std::tr2 issued a deprecation warning by default under std:c++14, and was deleted by default in /std:c++17.
-#pragma warning(push)
-#pragma warning(disable:4996)
-
             namespace sys = std::filesystem;
             sys::path p(kif_base_dir); // Origin of enumeration
             std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
@@ -1903,36 +1895,6 @@ namespace Learner
                     if (sys::is_regular_file(p))
                         filenames.push_back(Path::Combine(target_dir, p.filename().generic_string()));
                 });
-#pragma warning(pop)
-
-#elif defined(__GNUC__)
-
-            auto ends_with = [](std::string const& value, std::string const& ending)
-            {
-                if (ending.size() > value.size()) return false;
-                return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
-            };
-
-            // It can't be helped, so read it using dirent.h.
-            DIR* dp; // pointer to directory
-            dirent* entry; // entry point returned by readdir()
-
-            dp = opendir(kif_base_dir.c_str());
-            if (dp != NULL)
-            {
-                do {
-                    entry = readdir(dp);
-                    // Only list files ending with ".bin"
-                    // →I hate this restriction when generating files with serial numbers...
-                    if (entry != NULL && ends_with(entry->d_name, ".bin"))
-                    {
-                        //cout << entry->d_name << endl;
-                        filenames.push_back(Path::Combine(target_dir, entry->d_name));
-                    }
-                } while (entry != NULL);
-                closedir(dp);
-            }
-#endif
         }
 
         cout << "learn from ";
@@ -1990,6 +1952,7 @@ namespace Learner
                 dest_score_max_value,
                 check_invalid_fen,
                 check_illegal_move);
+
             return;
 
         }
@@ -1997,7 +1960,12 @@ namespace Learner
         {
             Eval::init_NNUE();
             cout << "convert_bin_from_pgn-extract.." << endl;
-            convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move, convert_no_eval_fens_as_score_zero);
+            convert_bin_from_pgn_extract(
+                filenames, 
+                output_file_name, 
+                pgn_eval_side_to_move, 
+                convert_no_eval_fens_as_score_zero);
+
             return;
         }
 
@@ -2154,12 +2122,6 @@ namespace Learner
 #endif
     }
 
-
 } // namespace Learner
 
-#if defined(GENSFEN2019)
-#include "gensfen2019.cpp"
-#endif
-
-
 #endif // EVAL_LEARN

From a0b2d6a01e39627e9ea87b234a18067e4e404faf Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Mon, 7 Sep 2020 23:33:32 +0200
Subject: [PATCH 04/30] Note a potential defect in sfen packer.

---
 src/extra/sfen_packer.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp
index ac789ce8..fd013fa2 100644
--- a/src/extra/sfen_packer.cpp
+++ b/src/extra/sfen_packer.cpp
@@ -218,7 +218,7 @@ struct SfenPacker
     PieceType pr = type_of(pc);
     auto c = huffman_table[pr];
     stream.write_n_bit(c.code, c.bits);
- 
+
     if (pc == NO_PIECE)
       return;
 
@@ -249,7 +249,7 @@ struct SfenPacker
 
     // first and second flag
     Color c = (Color)stream.read_one_bit();
-    
+
     return make_piece(c, pr);
   }
 };
@@ -266,7 +266,10 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 {
 	SfenPacker packer;
 	auto& stream = packer.stream;
-	stream.set_data((uint8_t*)&sfen);
+
+  // TODO: separate streams for writing and reading. Here we actually have to
+  // const_cast which is not safe in the long run.
+	stream.set_data(const_cast<uint8_t*>(&sfen));
 
 	std::memset(this, 0, sizeof(Position));
 	std::memset(si, 0, sizeof(StateInfo));

From 0202218f58467dac447b73b7724158ebec4a221f Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Mon, 7 Sep 2020 23:34:13 +0200
Subject: [PATCH 05/30] fix cast

---
 src/extra/sfen_packer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp
index fd013fa2..1d82111d 100644
--- a/src/extra/sfen_packer.cpp
+++ b/src/extra/sfen_packer.cpp
@@ -269,7 +269,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 
   // TODO: separate streams for writing and reading. Here we actually have to
   // const_cast which is not safe in the long run.
-	stream.set_data(const_cast<uint8_t*>(&sfen));
+	stream.set_data(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&sfen)));
 
 	std::memset(this, 0, sizeof(Position));
 	std::memset(si, 0, sizeof(StateInfo));

From 41b7674aee3920cb72554f8d22eb4e2cb6c57e09 Mon Sep 17 00:00:00 2001
From: Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Date: Mon, 7 Sep 2020 23:55:07 +0200
Subject: [PATCH 06/30] Improve comments, break long lines.

---
 src/learn/learner.cpp | 321 ++++++++++++++++++++++++++++--------------
 src/misc.h            |   1 +
 2 files changed, 213 insertions(+), 109 deletions(-)

diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index ddfaff5a..f9d188b8 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -1,18 +1,24 @@
-﻿// learning routines
+﻿// Learning routines:
 //
-// 1) Automatic generation of game records
+// 1) Automatic generation of game records in .bin format
 // → "gensfen" command
-// 2) Learning evaluation function parameters from the generated game record
+//
+// 2) Learning evaluation function parameters from the generated .bin files
 // → "learn" command
+//
 // → Shuffle in the teacher phase is also an extension of this command.
 // Example) "learn shuffle"
+//
 // 3) Automatic generation of fixed traces
 // → "makebook think" command
 // → implemented in extra/book/book.cpp
+//
 // 4) Post-station automatic review mode
 // → I will not be involved in the engine because it is a problem that the GUI should assist.
 // etc..
 
+#define EVAL_LEARN
+
 #if defined(EVAL_LEARN)
 
 #include "../eval/evaluate_common.h"
@@ -53,7 +59,6 @@
 
 using namespace std;
 
-
 #if defined(USE_BOOK)
 // This is defined in the search section.
 extern Book::BookMoveSelector book;
@@ -63,6 +68,7 @@ template <typename T>
 T operator +=(std::atomic<T>& x, const T rhs)
 {
     T old = x.load(std::memory_order_consume);
+
     // It is allowed that the value is rewritten from other thread at this timing.
     // The idea that the value is not destroyed is good.
     T desired = old + rhs;
@@ -81,7 +87,7 @@ namespace Learner
 
     static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0);
 
-    // Score scale factors.  ex) If we set src_score_min_value = 0.0,
+    // Score scale factors. ex) If we set src_score_min_value = 0.0,
     // src_score_max_value = 1.0, dest_score_min_value = 0.0,
     // dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000].
     static double src_score_min_value = 0.0;
@@ -89,8 +95,9 @@ namespace Learner
     static double dest_score_min_value = 0.0;
     static double dest_score_max_value = 1.0;
 
-    // Assume teacher signals are the scores of deep searches, and convert them into winning
-    // probabilities in the trainer. Sometimes we want to use the winning probabilities in the training
+    // Assume teacher signals are the scores of deep searches, 
+    // and convert them into winning probabilities in the trainer. 
+    // Sometimes we want to use the winning probabilities in the training
     // data directly. In those cases, we set false to this variable.
     static bool convert_teacher_signal_to_winning_probability = true;
 
@@ -100,13 +107,9 @@ namespace Learner
     // This CANNOT be static since it's used elsewhere.
     bool use_raw_nnue_eval = false;
 
-    // Using WDL with win rate model instead of sigmoid
+    // Using stockfish's WDL with win rate model instead of sigmoid
     static bool use_wdl = false;
 
-    // -----------------------------------
-    // command to learn from the generated game (learn)
-    // -----------------------------------
-
     // A function that converts the evaluation value to the winning rate [0,1]
     double winning_percentage(double value)
     {
@@ -142,21 +145,31 @@ namespace Learner
         }
     }
 
-    double calc_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
+    double calc_cross_entropy_of_winning_percentage(
+        double deep_win_rate, 
+        double shallow_eval, 
+        int ply)
     {
         const double p = deep_win_rate;
         const double q = winning_percentage(shallow_eval, ply);
         return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q);
     }
 
-    double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
+    double calc_d_cross_entropy_of_winning_percentage(
+        double deep_win_rate, 
+        double shallow_eval, 
+        int ply)
     {
         constexpr double epsilon = 0.000001;
 
-        const double y1 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval, ply);
-        const double y2 = calc_cross_entropy_of_winning_percentage(deep_win_rate, shallow_eval + epsilon, ply);
+        const double y1 = calc_cross_entropy_of_winning_percentage(
+            deep_win_rate, shallow_eval, ply);
 
-        // Divide by the winning_probability_coefficient to match scale with the sigmoidal win rate
+        const double y2 = calc_cross_entropy_of_winning_percentage(
+            deep_win_rate, shallow_eval + epsilon, ply);
+
+        // Divide by the winning_probability_coefficient to 
+        // match scale with the sigmoidal win rate
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
@@ -167,9 +180,12 @@ namespace Learner
     {
         // The square of the win rate difference minimizes it in the objective function.
         // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2
-        // However, σ is a sigmoid function that converts the evaluation value into the difference in the winning percentage.
-        // m is the number of samples. shallow is the evaluation value for a shallow search (qsearch()). deep is the evaluation value for deep search.
-        // If W is the feature vector (parameter of the evaluation function) and Xi and Yi are teachers
+        // However, σ is a sigmoid function that converts the 
+        // evaluation value into the difference in the winning percentage.
+        // m is the number of samples. shallow is the evaluation value 
+        // for a shallow search (qsearch()). deep is the evaluation value for deep search.
+        // If W is the feature vector (parameter of the evaluation function) 
+        // and Xi and Yi are teachers
         // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X
         // f(Xi) = win_rate(W*Xi)
         // If σ(i th deep) = Yi,
@@ -179,10 +195,12 @@ namespace Learner
         // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj
         // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1
 
-        // 1/m will be multiplied later, but the contents of Σ can be retained in the array as the value of the gradient.
+        // 1/m will be multiplied later, but the contents of Σ can 
+        // be retained in the array as the value of the gradient.
         // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600
         // This /600 at the end is adjusted by the learning rate, so do not write it..
-        // Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
+        // Also, the coefficient of 1/m is unnecessary if you use the update 
+        // formula that has the automatic gradient adjustment function like Adam and AdaGrad.
         // Therefore, it is not necessary to save it in memory.
 
         const double p = winning_percentage(deep, psv.gamePly);
@@ -202,7 +220,9 @@ namespace Learner
         // Refer to etc.
 
         // Objective function design)
-        // We want to make the distribution of p closer to the distribution of q → Think of it as the problem of minimizing the cross entropy between the probability distributions of p and q.
+        // We want to make the distribution of p closer to the distribution of q 
+        // → Think of it as the problem of minimizing the cross entropy 
+        // between the probability distributions of p and q.
         // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q)
         // x
 
@@ -222,7 +242,8 @@ namespace Learner
     double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
     {
         // Version that does not pass the winning percentage function
-        // This, unless EVAL_LIMIT is set low, trying to match the evaluation value with the shape of the end stage
+        // This, unless EVAL_LIMIT is set low, trying to 
+        // match the evaluation value with the shape of the end stage
         // eval may exceed the range of eval.
         return shallow - deep;
     }
@@ -261,7 +282,6 @@ namespace Learner
     {
         const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
 
-        // Teacher winning probability.
         double p = scaled_teacher_signal;
         if (convert_teacher_signal_to_winning_probability) 
         {
@@ -273,7 +293,8 @@ namespace Learner
 
     double calculate_lambda(double teacher_signal)
     {
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
+        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
+        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
         const double lambda =
             (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
             ? ELMO_LAMBDA2
@@ -284,7 +305,8 @@ namespace Learner
 
     double calculate_t(int game_result)
     {
-        // Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
+        // Use 1 as the correction term if the expected win rate is 1, 
+        // 0 if you lose, and 0.5 if you draw.
         // game_result = 1,0,-1 so add 1 and divide by 2.
         const double t = double(game_result + 1) * 0.5;
 
@@ -318,7 +340,9 @@ namespace Learner
     }
 
     // Calculate cross entropy during learning
-    // The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win.
+    // The individual cross entropy of the win/loss term and win 
+    // rate term of the elmo expression is returned 
+    // to the arguments cross_entropy_eval and cross_entropy_win.
     void calc_cross_entropy(
         Value teacher_signal, 
         Value shallow, 
@@ -356,11 +380,7 @@ namespace Learner
     }
 
 #endif
-
-
-    // Other variations may be prepared as the objective function..
-
-
+    // Other objective functions may be considered in the future...
     double calc_grad(Value shallow, const PackedSfenValue& psv) 
     {
         return calc_grad((Value)psv.score, shallow, psv);
@@ -369,15 +389,17 @@ namespace Learner
     // Sfen reader
     struct SfenReader
     {
-        // number of phases used for calculation such as mse
+        // Number of phases used for calculation such as mse
         // mini-batch size = 1M is standard, so 0.2% of that should be negligible in terms of time.
-        //Since search() is performed with depth = 1 in calculation of move match rate, simple comparison is not possible...
+        // Since search() is performed with depth = 1 in calculation of 
+        // move match rate, simple comparison is not possible...
         static constexpr uint64_t sfen_for_mse_size = 2000;
 
         // Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT
         static constexpr size_t THREAD_BUFFER_SIZE = 10 * 1000;
 
-        // Buffer for reading files (If this is made larger, the shuffle becomes larger and the phases may vary.
+        // Buffer for reading files (If this is made larger, 
+        // the shuffle becomes larger and the phases may vary.
         // If it is too large, the memory consumption will increase.
         // SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
         static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE;
@@ -387,7 +409,8 @@ namespace Learner
         // It must be 2**N because it will be used as the mask to calculate hash_index.
         static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
 
-        // Do not use std::random_device().  Because it always the same integers on MinGW.
+        // Do not use std::random_device().
+        // Because it always the same integers on MinGW.
         SfenReader(int thread_num) : 
             prng(std::chrono::system_clock::now().time_since_epoch().count())
         {
@@ -460,16 +483,20 @@ namespace Learner
         // [ASYNC] Thread returns one aspect. Otherwise returns false.
         bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps)
         {
-            // If there are any positions left in the thread buffer, retrieve one and return it.
+            // If there are any positions left in the thread buffer
+            // then retrieve one and return it.
             auto& thread_ps = packed_sfens[thread_id];
 
-            // Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish.
-            if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it.
+            // Fill the read buffer if there is no remaining buffer, 
+            // but if it doesn't even exist, finish.
+            // If the buffer is empty, fill it.
+            if ((thread_ps == nullptr || thread_ps->empty())
                 && !read_to_thread_buffer_impl(thread_id))
                 return false;
 
             // read_to_thread_buffer_impl() returned true,
-            // Since the filling of the thread buffer with the phase has been completed successfully
+            // Since the filling of the thread buffer with the 
+            // phase has been completed successfully
             // thread_ps->rbegin() is alive.
 
             ps = thread_ps->back();
@@ -511,6 +538,7 @@ namespace Learner
 
                 // Waiting for file worker to fill packed_sfens_pool.
                 // The mutex isn't locked, so it should fill up soon.
+                // Poor man's condition variable.
                 sleep(1);
             }
 
@@ -519,14 +547,14 @@ namespace Learner
         // Start a thread that loads the phase file in the background.
         void start_file_read_worker()
         {
-            file_worker_thread = std::thread([&] { this->file_read_worker(); });
+            file_worker_thread = std::thread([&] { 
+                this->file_read_worker(); 
+                });
         }
 
-        // for file read-only threads
         void file_read_worker()
         {
-            auto open_next_file = [&]()
-            {
+            auto open_next_file = [&]() {
                 if (fs.is_open())
                     fs.close();
 
@@ -569,7 +597,7 @@ namespace Learner
                     }
                     else if(!open_next_file())
                     {
-                        // There was no next file. Abon.
+                        // There was no next file. Abort.
                         cout << "..end of files." << endl;
                         end_of_files = true;
                         return;
@@ -577,8 +605,6 @@ namespace Learner
                 }
 
                 // Shuffle the read phase data.
-                // random shuffle by Fisher-Yates algorithm
-
                 if (!no_shuffle)
                 {
                     Algo::shuffle(sfens, prng);
@@ -597,17 +623,19 @@ namespace Learner
                     // Delete this pointer on the receiving side.
                     auto buf = std::make_unique<PSVector>();
                     buf->resize(THREAD_BUFFER_SIZE);
-                    memcpy(buf->data(), &sfens[i * THREAD_BUFFER_SIZE], sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE);
+                    memcpy(
+                        buf->data(), 
+                        &sfens[i * THREAD_BUFFER_SIZE], 
+                        sizeof(PackedSfenValue) * THREAD_BUFFER_SIZE);
 
                     buffers.emplace_back(std::move(buf));
                 }
 
-                // Since sfens is ready, look at the occasion and copy
                 {
                     std::unique_lock<std::mutex> lk(mutex);
 
-                    // You can ignore this time because you just copy the pointer...
-                    // The mutex lock is required because the contents of packed_sfens_pool are changed.
+                    // The mutex lock is required because the 
+                    // contents of packed_sfens_pool are changed.
 
                     for (auto& buf : buffers)
                         packed_sfens_pool.emplace_back(std::move(buf));
@@ -644,7 +672,7 @@ namespace Learner
 
         bool stop_flag;
 
-        vector<Key> hash; // 64MB*8 = 512MB
+        vector<Key> hash;
 
         // test phase for mse calculation
         PSVector sfen_for_mse;
@@ -660,7 +688,6 @@ namespace Learner
         // Did you read the files and reached the end?
         atomic<bool> end_of_files;
 
-
         // handle of sfen file
         std::fstream fs;
 
@@ -727,7 +754,7 @@ namespace Learner
         uint64_t epoch = 0;
 
         // Mini batch size size. Be sure to set it on the side that uses this class.
-        uint64_t mini_batch_size = 1000 * 1000;
+        uint64_t mini_batch_size = LEARN_MINI_BATCH_SIZE;
 
         bool stop_flag;
 
@@ -740,7 +767,8 @@ namespace Learner
         // Option not to learn kk/kkp/kpp/kppp
         std::array<bool, 4> freeze;
 
-        // If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase.
+        // If the absolute value of the evaluation value of the deep search 
+        // of the teacher phase exceeds this value, discard the teacher phase.
         int eval_limit;
 
         // Flag whether to dig a folder each time the evaluation function is saved.
@@ -811,7 +839,8 @@ namespace Learner
 
     void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
     {
-        // There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated.
+        // There is no point in hitting the replacement table, 
+        // so at this timing the generation of the replacement table is updated.
         // It doesn't matter if you have disabled the substitution table.
         TT.new_search();
 
@@ -845,7 +874,8 @@ namespace Learner
         sum_norm = 0;
 #endif
 
-        // The number of times the pv first move of deep search matches the pv first move of search(1).
+        // The number of times the pv first move of deep 
+        // search matches the pv first move of search(1).
         atomic<int> move_accord_count;
         move_accord_count = 0;
 
@@ -856,7 +886,8 @@ namespace Learner
         pos.set(StartFEN, false, &si, th);
         std::cout << "hirate eval = " << Eval::evaluate(pos);
 
-        // It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished.
+        // It's better to parallelize here, but it's a bit 
+        // troublesome because the search before slave has not finished.
         // I created a mechanism to call task, so I will use it.
 
         // The number of tasks to do.
@@ -869,7 +900,8 @@ namespace Learner
         {
             // Assign work to each thread using TaskDispatcher.
             // A task definition for that.
-            // It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one.
+            // It is not possible to capture pos used in ↑, 
+            // so specify the variables you want to capture one by one.
             auto task =
                 [
                     this,
@@ -899,7 +931,8 @@ namespace Learner
                 // Evaluation value of deep search
                 auto deep_value = (Value)ps.score;
 
-                // Note) This code does not consider when eval_limit is specified in the learn command.
+                // Note) This code does not consider when 
+                //       eval_limit is specified in the learn command.
 
                 // --- error calculation
 
@@ -975,14 +1008,16 @@ namespace Learner
             << " , eval mae = " << eval_mae;
 #endif
 
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
+#if defined(LOSS_FUNCTION_IS_ELMO_METHOD)
 #if defined(EVAL_NNUE)
         latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
         latest_loss_count += sr.sfen_for_mse.size();
 #endif
 
-        // learn_cross_entropy may be called train cross entropy in the world of machine learning,
-        // When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce.
+        // learn_cross_entropy may be called train cross 
+        // entropy in the world of machine learning,
+        // When omitting the acronym, it is nice to be able to 
+        // distinguish it from test cross entropy(tce) by writing it as lce.
 
         if (sr.sfen_for_mse.size() && done)
         {
@@ -1074,7 +1109,9 @@ namespace Learner
                     // Output the current time. Output every time.
                     std::cout << sr.total_done << " sfens , at " << now_string() << std::endl;
 
-                    // Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch.
+                    // Reflect the gradient in the weight array at this timing. 
+                    // The calculation of the gradient is just right for 
+                    // each 1M phase in terms of mini-batch.
                     Eval::update_weights(epoch, freeze);
 
                     // Display epoch and current eta for debugging.
@@ -1090,14 +1127,13 @@ namespace Learner
 #endif
                     ++epoch;
 
-                    // Save once every 1 billion phases.
-
                     // However, the elapsed time during update_weights() and calc_rmse() is ignored.
                     if (++sr.save_count * mini_batch_size >= eval_save_interval)
                     {
                         sr.save_count = 0;
 
-                        // During this time, as the gradient calculation proceeds, the value becomes too large and I feel annoyed, so stop other threads.
+                        // During this time, as the gradient calculation proceeds, 
+                        // the value becomes too large and I feel annoyed, so stop other threads.
                         const bool converged = save();
                         if (converged)
                         {
@@ -1109,7 +1145,6 @@ namespace Learner
 
                     // Calculate rmse. This is done for samples of 10,000 phases.
                     // If you do with 40 cores, update_weights every 1 million phases
-                    // I don't think it's so good to be tiring.
                     static uint64_t loss_output_count = 0;
                     if (++loss_output_count * mini_batch_size >= loss_output_interval)
                     {
@@ -1129,10 +1164,12 @@ namespace Learner
                         sr.last_done = sr.total_done;
                     }
 
-                    // Next time, I want you to do this series of processing again when you process only mini_batch_size.
+                    // Next time, I want you to do this series of 
+                    // processing again when you process only mini_batch_size.
                     sr.next_update_weights += mini_batch_size;
 
-                    // Since I was waiting for the update of this sr.next_update_weights except the main thread,
+                    // Since I was waiting for the update of this 
+                    // sr.next_update_weights except the main thread,
                     // Once this value is updated, it will start moving again.
                 }
             }
@@ -1173,7 +1210,8 @@ namespace Learner
             if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
             {
                 // I got a strange sfen. Should be debugged!
-                // Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not.
+                // Since it is an illegal sfen, it may not be 
+                // displayed with pos.sfen(), but it is better than not.
                 cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
                 goto RETRY_READ;
             }
@@ -1198,9 +1236,11 @@ namespace Learner
 #endif
 
             // There is a possibility that all the pieces are blocked and stuck.
-            // Also, the declaration win phase is excluded from learning because you cannot go to leaf with PV moves.
-            // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
-        // Skip the position if there are no legal moves (=checkmated or stalemate).
+            // Also, the declaration win phase is excluded from 
+            // learning because you cannot go to leaf with PV moves.
+            // (shouldn't write out such teacher aspect itself, 
+            // but may have written it out with an old generation routine)
+            // Skip the position if there are no legal moves (=checkmated or stalemate).
             if (MoveList<LEGAL>(pos).size() == 0)
                 goto RETRY_READ;
 
@@ -1214,7 +1254,8 @@ namespace Learner
             const auto deep_value = (Value)ps.score;
 
             // I feel that the mini batch has a better gradient.
-            // Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation.
+            // Go to the leaf node as it is, add only to the gradient array, 
+            // and later try AdaGrad at the time of rmse aggregation.
 
             const auto rootColor = pos.side_to_move();
 
@@ -1223,23 +1264,25 @@ namespace Learner
             // It may be better not to study where the difference in evaluation values ​​is too large.
 
 #if 0
-        // If you do this, about 13% of the phases will be excluded from the learning target. Good and bad are subtle.
+            // If you do this, about 13% of the phases will be excluded 
+            // from the learning target. Good and bad are subtle.
             if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move)
             {
-                // dbg_hit_on(false);
+                //dbg_hit_on(false);
                 continue;
             }
 #endif
 
 #if 0
             // It may be better not to study where the difference in evaluation values ​​is too large.
-            // → It's okay because it passes the win rate function... About 30% of the phases are out of the scope of learning...
+            // → It's okay because it passes the win rate function... 
+            // About 30% of the phases are out of the scope of learning...
             if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4)
             {
-                //          dbg_hit_on(false);
+                //dbg_hit_on(false);
                 continue;
             }
-            //      dbg_hit_on(true);
+            //dbg_hit_on(true);
 #endif
 
             int ply = 0;
@@ -1248,9 +1291,12 @@ namespace Learner
             auto pos_add_grad = [&]() {
                 // Use the value of evaluate in leaf as shallow_value.
                 // Using the return value of qsearch() as shallow_value,
-                // If PV is interrupted in the middle, the phase where evaluate() is called to calculate the gradient, and
-                // I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
-                // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...
+                // If PV is interrupted in the middle, the phase where 
+                // evaluate() is called to calculate the gradient, 
+                // and I don't think this is a very desirable property, 
+                // as the aspect that gives that gradient will be different.
+                // I have turned off the substitution table, but since 
+                // the pv array has not been updated due to one stumbling block etc...
 
                 const Value shallow_value = 
                     (rootColor == pos.side_to_move()) 
@@ -1284,7 +1330,8 @@ namespace Learner
                 // Slope
                 double dj_dw = calc_grad(deep_value, shallow_value, ps);
 
-                // Add jd_dw as the gradient (∂J/∂Wj) for the feature vector currently appearing in the leaf node.
+                // Add jd_dw as the gradient (∂J/∂Wj) for the 
+                // feature vector currently appearing in the leaf node.
 
                 // If it is not PV termination, apply a discount rate.
                 if (discount_rate != 0 && ply != (int)pv.size())
@@ -1330,7 +1377,7 @@ namespace Learner
 
             if (illegal_move) 
             {
-                sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
+                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
                 continue;
             }
 
@@ -1343,7 +1390,11 @@ namespace Learner
 
 #if 0
             // When adding the gradient to the root phase
-            shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
+            shallow_value = 
+                (rootColor == pos.side_to_move()) 
+                ? Eval::evaluate(pos) 
+                : -Eval::evaluate(pos);
+
             dj_dw = calc_grad(deep_value, shallow_value, ps);
             Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
 #endif
@@ -1426,10 +1477,14 @@ namespace Learner
 
     // Shuffle_files(), shuffle_files_quick() subcontracting, writing part.
     // output_file_name: Name of the file to write
-    // prng: random number
-    // afs: fstream of each teacher phase file
-    // a_count: The number of teacher positions inherent in each file.
-    void shuffle_write(const string& output_file_name, PRNG& prng, vector<fstream>& sfen_file_streams, vector<uint64_t>& sfen_count_in_file)
+    // prng: random number generator
+    // sfen_file_streams: fstream of each teacher phase file
+    // sfen_count_in_file: The number of teacher positions present in each file.
+    void shuffle_write(
+        const string& output_file_name, 
+        PRNG& prng, 
+        vector<fstream>& sfen_file_streams, 
+        vector<uint64_t>& sfen_count_in_file)
     {
         uint64_t total_sfen_count = 0;
         for (auto c : sfen_count_in_file)
@@ -1502,7 +1557,8 @@ namespace Learner
         // Temporary file is written to tmp/ folder for each buffer_size phase.
         // For example, if buffer_size = 20M, you need a buffer of 20M*40bytes = 800MB.
         // In a PC with a small memory, it would be better to reduce this.
-        // However, if the number of files increases too much, it will not be possible to open at the same time due to OS restrictions.
+        // However, if the number of files increases too much, 
+        // it will not be possible to open at the same time due to OS restrictions.
         // There should have been a limit of 512 per process on Windows, so you can open here as 500,
         // The current setting is 500 files x 20M = 10G = 10 billion phases.
 
@@ -1555,19 +1611,23 @@ namespace Learner
 
             // Read in units of sizeof(PackedSfenValue),
             // Ignore the last remaining fraction. (Fails in fs.read, so exit while)
-            // (The remaining fraction seems to be half-finished data that was created because it was stopped halfway during teacher generation.)
+            // (The remaining fraction seems to be half-finished data 
+            // that was created because it was stopped halfway during teacher generation.)
         }
 
         if (buf_write_marker != 0)
             write_buffer(buf_write_marker);
 
         // Only shuffled files have been written write_file_count.
-        // As a second pass, if you open all of them at the same time, select one at random and load one phase at a time
+        // As a second pass, if you open all of them at the same time, 
+        // select one at random and load one phase at a time
         // Now you have shuffled.
 
-        // Original file for shirt full + tmp file + file to write requires 3 times the storage capacity of the original file.
+        // Original file for shirt full + tmp file + file to write 
+        // requires 3 times the storage capacity of the original file.
         // 1 billion SSD is not enough for shuffling because it is 400GB for 10 billion phases.
-        // If you want to delete (or delete by hand) the original file at this point after writing to tmp,
+        // If you want to delete (or delete by hand) the 
+        // original file at this point after writing to tmp,
         // The storage capacity is about twice that of the original file.
         // So, maybe we should have an option to delete the original file.
 
@@ -1592,7 +1652,7 @@ namespace Learner
         PRNG prng(std::chrono::system_clock::now().time_since_epoch().count());
 
         // number of files
-        size_t file_count = filenames.size();
+        const size_t file_count = filenames.size();
 
         // Number of teacher positions stored in each file in filenames
         vector<uint64_t> sfen_count_in_file(file_count);
@@ -1651,7 +1711,8 @@ namespace Learner
 
         std::cout << "write : " << output_file_name << endl;
 
-        // If the file to be written exceeds 2GB, it cannot be written in one shot with fstream::write, so use wrapper.
+        // If the file to be written exceeds 2GB, it cannot be 
+        // written in one shot with fstream::write, so use wrapper.
         write_memory_to_file(
             output_file_name, 
             (void*)&buf[0], 
@@ -1703,9 +1764,11 @@ namespace Learner
         uint64_t buffer_size = 20000000;
         // fast shuffling assuming each file is shuffled
         bool shuffle_quick = false;
-        // A function to read the entire file in memory and shuffle it. (Requires file size memory)
+        // A function to read the entire file in memory and shuffle it. 
+        // (Requires file size memory)
         bool shuffle_on_memory = false;
-        // Conversion of packed sfen. In plain, it consists of sfen(string), evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0)
+        // Conversion of packed sfen. In plain, it consists of sfen(string), 
+        // evaluation value (integer), move (eg 7g7f, string), result (loss-1, win 1, draw 0)
         bool use_convert_plain = false;
         // convert plain format teacher to Yaneura King's bin
         bool use_convert_bin = false;
@@ -1721,13 +1784,16 @@ namespace Learner
         // File name to write in those cases (default is "shuffled_sfen.bin")
         string output_file_name = "shuffled_sfen.bin";
 
-        // If the absolute value of the evaluation value in the deep search of the teacher phase exceeds this value, that phase is discarded.
+        // If the absolute value of the evaluation value 
+        // in the deep search of the teacher phase exceeds this value, 
+        // that phase is discarded.
         int eval_limit = 32000;
 
         // Flag to save the evaluation function file only once near the end.
         bool save_only_once = false;
 
-        // Shuffle about what you are pre-reading on the teacher aspect. (Shuffle of about 10 million phases)
+        // Shuffle about what you are pre-reading on the teacher aspect. 
+        // (Shuffle of about 10 million phases)
         // Turn on if you want to pass a pre-shuffled file.
         bool no_shuffle = false;
 
@@ -1738,7 +1804,9 @@ namespace Learner
         ELMO_LAMBDA_LIMIT = 32000;
 #endif
 
-        // Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate)
+        // Discount rate. If this is set to a value other than 0, 
+        // the slope will be added even at other than the PV termination. 
+        // (At that time, apply this discount rate)
         double discount_rate = 0;
 
         // if (gamePly <rand(reduction_gameply)) continue;
@@ -1797,15 +1865,27 @@ namespace Learner
             else if (option == "eta3")       is >> eta3;
             else if (option == "eta1_epoch") is >> eta1_epoch;
             else if (option == "eta2_epoch") is >> eta2_epoch;
+
             // Accept also the old option name.
-            else if (option == "use_draw_in_training" || option == "use_draw_games_in_training") is >> use_draw_games_in_training;
+            else if (option == "use_draw_in_training" 
+                  || option == "use_draw_games_in_training") 
+                is >> use_draw_games_in_training;
+
             // Accept also the old option name.
-            else if (option == "use_draw_in_validation" || option == "use_draw_games_in_validation") is >> use_draw_games_in_validation;
+            else if (option == "use_draw_in_validation" 
+                  || option == "use_draw_games_in_validation") 
+                is >> use_draw_games_in_validation;
+
             // Accept also the old option name.
-            else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training;
+            else if (option == "use_hash_in_training" 
+                  || option == "skip_duplicated_positions_in_training") 
+                is >> skip_duplicated_positions_in_training;
+
             else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
+
             // Discount rate
             else if (option == "discount_rate") is >> discount_rate;
+
             // Using WDL with win rate model instead of sigmoid
             else if (option == "use_wdl") is >> use_wdl;
 
@@ -1873,8 +1953,11 @@ namespace Learner
             else
                 filenames.push_back(option);
         }
+
         if (loss_output_interval == 0)
+        {
             loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size;
+        }
 
         cout << "learn command , ";
 
@@ -1900,6 +1983,7 @@ namespace Learner
         cout << "learn from ";
         for (auto s : filenames)
             cout << s << " , ";
+
         cout << endl;
         if (!validation_set_file_name.empty())
         {
@@ -1917,18 +2001,21 @@ namespace Learner
             shuffle_files(filenames, output_file_name, buffer_size);
             return;
         }
+
         if (shuffle_quick)
         {
             cout << "quick shuffle mode.." << endl;
             shuffle_files_quick(filenames, output_file_name);
             return;
         }
+
         if (shuffle_on_memory)
         {
             cout << "shuffle on memory.." << endl;
             shuffle_files_on_memory(filenames, output_file_name);
             return;
         }
+
         if (use_convert_plain)
         {
             Eval::init_NNUE();
@@ -1936,6 +2023,7 @@ namespace Learner
             convert_plain(filenames, output_file_name);
             return;
         }
+
         if (use_convert_bin)
         {
             Eval::init_NNUE();
@@ -1956,6 +2044,7 @@ namespace Learner
             return;
 
         }
+
         if (use_convert_bin_from_pgn_extract)
         {
             Eval::init_NNUE();
@@ -1976,15 +2065,21 @@ namespace Learner
 
         // Insert the file name for the number of loops.
         for (int i = 0; i < loop; ++i)
-            // sfen reader, I'll read it in reverse order so I'll reverse it here. I'm sorry.
+        {
+            // sfen reader, I'll read it in reverse 
+            // order so I'll reverse it here. I'm sorry.
             for (auto it = filenames.rbegin(); it != filenames.rend(); ++it)
+            {
                 sr.filenames.push_back(Path::Combine(base_dir, *it));
+            }
+        }
 
 #if !defined(EVAL_NNUE)
         cout << "Gradient Method   : " << LEARN_UPDATE << endl;
 #endif
         cout << "Loss Function     : " << LOSS_FUNCTION << endl;
         cout << "mini-batch size   : " << mini_batch_size << endl;
+
 #if defined(EVAL_NNUE)
         cout << "nn_batch_size     : " << nn_batch_size << endl;
         cout << "nn_options        : " << nn_options << endl;
@@ -1994,6 +2089,7 @@ namespace Learner
         cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
         cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
         cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
+
 #if defined(EVAL_NNUE)
         if (newbob_decay != 1.0) {
             cout << "scheduling        : newbob with decay = " << newbob_decay
@@ -2003,6 +2099,7 @@ namespace Learner
             cout << "scheduling        : default" << endl;
         }
 #endif
+
         cout << "discount rate     : " << discount_rate << endl;
 
         // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
@@ -2014,6 +2111,7 @@ namespace Learner
         cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
         cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
 #endif
+
         cout << "mirror_percentage : " << mirror_percentage << endl;
         cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
         cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
@@ -2071,11 +2169,13 @@ namespace Learner
         learn_think.sr.no_shuffle = no_shuffle;
         learn_think.freeze = freeze;
         learn_think.reduction_gameply = reduction_gameply;
+
 #if defined(EVAL_NNUE)
         learn_think.newbob_scale = 1.0;
         learn_think.newbob_decay = newbob_decay;
         learn_think.newbob_num_trials = newbob_num_trials;
 #endif
+
         learn_think.eval_save_interval = eval_save_interval;
         learn_think.loss_output_interval = loss_output_interval;
         learn_think.mirror_percentage = mirror_percentage;
@@ -2086,16 +2186,19 @@ namespace Learner
 
         learn_think.mini_batch_size = mini_batch_size;
 
-        if (validation_set_file_name.empty()) {
+        if (validation_set_file_name.empty()) 
+        {
             // Get about 10,000 data for mse calculation.
             sr.read_for_mse();
         }
-        else {
+        else 
+        {
             sr.read_validation_set(validation_set_file_name, eval_limit);
         }
 
         // Calculate rmse once at this point (timing of 0 sfen)
         // sr.calc_rmse();
+
 #if defined(EVAL_NNUE)
         if (newbob_decay != 1.0) {
             learn_think.calc_loss(0, -1);
diff --git a/src/misc.h b/src/misc.h
index 5add3b36..4c04d3f0 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -226,6 +226,7 @@ namespace Math {
 }
 
 namespace Algo {
+    // Fisher-Yates
     template <typename Rng, typename T>
     void shuffle(std::vector<T>& buf, Rng&& prng)
     {

From a6013557f2cb5d13c21a2d406a02d504a643c885 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 22:45:29 +0900
Subject: [PATCH 07/30] Removed EVAL_NNUE macro.

---
 src/Makefile                                  |   6 +-
 src/eval/evaluate_common.h                    |   3 -
 src/learn/gensfen.cpp                         |  40 +++---
 src/learn/learner.cpp                         | 133 +++---------------
 src/nnue/evaluate_nnue.cpp                    |   2 -
 src/nnue/evaluate_nnue_learner.cpp            |   4 +-
 src/nnue/evaluate_nnue_learner.h              |   4 +-
 src/nnue/features/castling_right.cpp          |   4 -
 src/nnue/features/castling_right.h            |   4 -
 src/nnue/features/enpassant.cpp               |   4 -
 src/nnue/features/enpassant.h                 |   4 -
 src/nnue/features/half_relative_kp.cpp        |   4 -
 src/nnue/features/half_relative_kp.h          |   4 -
 src/nnue/features/k.cpp                       |   4 -
 src/nnue/features/k.h                         |   4 -
 src/nnue/features/p.cpp                       |   4 -
 src/nnue/features/p.h                         |   4 -
 src/nnue/layers/sum.h                         |   4 -
 src/nnue/nnue_test_command.cpp                |   4 +-
 src/nnue/nnue_test_command.h                  |   4 +-
 src/nnue/trainer/features/factorizer.h        |   4 -
 .../trainer/features/factorizer_feature_set.h |   4 -
 .../trainer/features/factorizer_half_kp.h     |   4 -
 src/nnue/trainer/trainer.h                    |   4 +-
 src/nnue/trainer/trainer_affine_transform.h   |   4 +-
 src/nnue/trainer/trainer_clipped_relu.h       |   4 +-
 .../trainer/trainer_feature_transformer.h     |   4 +-
 src/nnue/trainer/trainer_input_slice.h        |   4 +-
 src/nnue/trainer/trainer_sum.h                |   4 +-
 src/uci.cpp                                   |   6 +-
 src/ucioption.cpp                             |   2 -
 31 files changed, 65 insertions(+), 223 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index ca851dba..a07e1251 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -903,7 +903,7 @@ icc-profile-use:
 
 learn: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
-	EXTRACXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	EXTRACXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp  ' \
 	all
 
@@ -911,7 +911,7 @@ profile-learn: net config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS='  $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 2/4. Running benchmark for pgo-build ..."
@@ -920,7 +920,7 @@ profile-learn: net config-sanity objclean profileclean
 	@echo "Step 3/4. Building optimized executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 4/4. Deleting profile data ..."
diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
index dacbd2ba..3fb161ab 100644
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@@ -3,7 +3,6 @@
 
 // A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
 #include <functional>
 
 // KK file name
@@ -79,6 +78,4 @@ namespace Eval
 
 }
 
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
 #endif // _EVALUATE_KPPT_COMMON_H_
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index b049192e..9ae83174 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -1,45 +1,41 @@
 ﻿#if defined(EVAL_LEARN)
 
 #include "../eval/evaluate_common.h"
-
-#include "learn.h"
-#include "multi_think.h"
 #include "../misc.h"
-#include "../thread.h"
+#include "../nnue/evaluate_nnue_learner.h"
 #include "../position.h"
+#include "../syzygy/tbprobe.h"
+#include "../thread.h"
 #include "../tt.h"
 #include "../uci.h"
-#include "../syzygy/tbprobe.h"
+#include "learn.h"
+#include "multi_think.h"
 
 #if defined(USE_BOOK)
 #include "../extra/book/book.h"
 #endif
 
 #include <chrono>
-#include <random>
-#include <regex>
-#include <sstream>
-#include <fstream>
-#include <unordered_set>
-#include <iomanip>
-#include <list>
+#include <climits>
 #include <cmath>
 #include <cstring>
-#include <memory>
-#include <limits>
-#include <optional>
 #include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <limits>
+#include <list>
+#include <memory>
+#include <optional>
+#include <random>
+#include <regex>
+#include <shared_mutex>
+#include <sstream>
+#include <unordered_set>
 
 #if defined (_OPENMP)
 #include <omp.h>
 #endif
 
-#if defined(EVAL_NNUE)
-#include "../nnue/evaluate_nnue_learner.h"
-#include <climits>
-#include <shared_mutex>
-#endif
-
 using namespace std;
 
 namespace Learner
@@ -692,12 +688,10 @@ namespace Learner
             // performed unless each node evaluate() is called!
             // If the depth is 8 or more, it seems
             // faster not to calculate this difference.
-#if defined(EVAL_NNUE)
             if (depth < 8)
             {
                 Eval::NNUE::update_eval(pos);
             }
-#endif  // defined(EVAL_NNUE)
         }
 
         // Reach leaf
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index f9d188b8..358848ec 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -17,45 +17,40 @@
 // → I will not be involved in the engine because it is a problem that the GUI should assist.
 // etc..
 
-#define EVAL_LEARN
-
 #if defined(EVAL_LEARN)
 
 #include "../eval/evaluate_common.h"
-
+#include "../misc.h"
+#include "../nnue/evaluate_nnue_learner.h"
+#include "../position.h"
+#include "../syzygy/tbprobe.h"
+#include "../thread.h"
+#include "../tt.h"
+#include "../uci.h"
 #include "learn.h"
 #include "multi_think.h"
-#include "../uci.h"
-#include "../syzygy/tbprobe.h"
-#include "../misc.h"
-#include "../thread.h"
-#include "../position.h"
-#include "../tt.h"
 
 #include <chrono>
-#include <random>
-#include <regex>
-#include <sstream>
-#include <fstream>
-#include <unordered_set>
-#include <iomanip>
-#include <list>
+#include <climits>
 #include <cmath>    // std::exp(),std::pow(),std::log()
 #include <cstring>  // memcpy()
-#include <memory>
-#include <limits>
-#include <optional>
 #include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <limits>
+#include <list>
+#include <memory>
+#include <optional>
+#include <random>
+#include <regex>
+#include <shared_mutex>
+#include <sstream>
+#include <unordered_set>
 
 #if defined (_OPENMP)
 #include <omp.h>
 #endif
 
-#if defined(EVAL_NNUE)
-#include "../nnue/evaluate_nnue_learner.h"
-#include <climits>
-#include <shared_mutex>
-#endif
 
 using namespace std;
 
@@ -724,14 +719,12 @@ namespace Learner
             learn_sum_entropy = 0.0;
 #endif
 
-#if defined(EVAL_NNUE)
             newbob_scale = 1.0;
             newbob_decay = 1.0;
             newbob_num_trials = 2;
             best_loss = std::numeric_limits<double>::infinity();
             latest_loss_sum = 0.0;
             latest_loss_count = 0;
-#endif
         }
 
         virtual void thread_worker(size_t thread_id);
@@ -787,7 +780,6 @@ namespace Learner
         atomic<double> learn_sum_entropy;
 #endif
 
-#if defined(EVAL_NNUE)
         shared_timed_mutex nn_mutex;
         double newbob_scale;
         double newbob_decay;
@@ -796,7 +788,6 @@ namespace Learner
         double latest_loss_sum;
         uint64_t latest_loss_count;
         std::string best_nn_directory;
-#endif
 
         uint64_t eval_save_interval;
         uint64_t loss_output_interval;
@@ -844,13 +835,10 @@ namespace Learner
         // It doesn't matter if you have disabled the substitution table.
         TT.new_search();
 
-
-#if defined(EVAL_NNUE)
         std::cout << "PROGRESS: " << now_string() << ", ";
         std::cout << sr.total_done << " sfens";
         std::cout << ", iteration " << epoch;
         std::cout << ", eta = " << Eval::get_eta() << ", ";
-#endif
 
 #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
         double sum_error = 0;
@@ -1009,10 +997,8 @@ namespace Learner
 #endif
 
 #if defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-#if defined(EVAL_NNUE)
         latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
         latest_loss_count += sr.sfen_for_mse.size();
-#endif
 
         // learn_cross_entropy may be called train cross 
         // entropy in the world of machine learning,
@@ -1074,14 +1060,10 @@ namespace Learner
             // display mse (this is sometimes done only for thread 0)
             // Immediately after being read from the file...
 
-#if defined(EVAL_NNUE)
         // Lock the evaluation function so that it is not used during updating.
             shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
             if (sr.next_update_weights <= sr.total_done ||
                 (thread_id != 0 && !read_lock.try_lock()))
-#else
-            if (sr.next_update_weights <= sr.total_done)
-#endif
             {
                 if (thread_id != 0)
                 {
@@ -1105,18 +1087,6 @@ namespace Learner
                         continue;
                     }
 
-#if !defined(EVAL_NNUE)
-                    // Output the current time. Output every time.
-                    std::cout << sr.total_done << " sfens , at " << now_string() << std::endl;
-
-                    // Reflect the gradient in the weight array at this timing. 
-                    // The calculation of the gradient is just right for 
-                    // each 1M phase in terms of mini-batch.
-                    Eval::update_weights(epoch, freeze);
-
-                    // Display epoch and current eta for debugging.
-                    std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl;
-#else
                     {
                         // update parameters
 
@@ -1124,7 +1094,7 @@ namespace Learner
                         lock_guard<shared_timed_mutex> write_lock(nn_mutex);
                         Eval::NNUE::UpdateParameters(epoch);
                     }
-#endif
+
                     ++epoch;
 
                     // However, the elapsed time during update_weights() and calc_rmse() is ignored.
@@ -1156,9 +1126,7 @@ namespace Learner
                         // loss calculation
                         calc_loss(thread_id, done);
 
-#if defined(EVAL_NNUE)
                         Eval::NNUE::CheckHealth();
-#endif
 
                         // Make a note of how far you have totaled.
                         sr.last_done = sr.total_done;
@@ -1216,25 +1184,6 @@ namespace Learner
                 goto RETRY_READ;
             }
 
-#if !defined(EVAL_NNUE)
-            if (skip_duplicated_positions_in_training)
-            {
-                const auto key = pos.key();
-
-                // Exclude the phase used for rmse calculation.
-                if (sr.is_for_rmse(key))
-                    goto RETRY_READ;
-
-                // Exclude the most recently used aspect.
-                const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
-                const auto key2 = sr.hash[hash_index];
-                if (key == key2)
-                    goto RETRY_READ;
-
-                sr.hash[hash_index] = key; // Replace with the current key.
-            }
-#endif
-
             // There is a possibility that all the pieces are blocked and stuck.
             // Also, the declaration win phase is excluded from 
             // learning because you cannot go to leaf with PV moves.
@@ -1326,25 +1275,9 @@ namespace Learner
                 learn_sum_entropy += learn_entropy;
 #endif
 
-#if !defined(EVAL_NNUE)
-                // Slope
-                double dj_dw = calc_grad(deep_value, shallow_value, ps);
-
-                // Add jd_dw as the gradient (∂J/∂Wj) for the 
-                // feature vector currently appearing in the leaf node.
-
-                // If it is not PV termination, apply a discount rate.
-                if (discount_rate != 0 && ply != (int)pv.size())
-                    dj_dw *= discount_rate;
-
-                // Since we have reached leaf, add the gradient to the features that appear in this phase.
-                // Update based on gradient later.
-                Eval::add_grad(pos, rootColor, dj_dw, freeze);
-#else
                 const double example_weight =
                     (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
                 Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
-#endif
 
                 // Since the processing is completed, the counter of the processed number is incremented
                 sr.total_done++;
@@ -1425,7 +1358,6 @@ namespace Learner
             const std::string dir_name = std::to_string(dir_number++);
             Eval::save_eval(dir_name);
 
-#if defined(EVAL_NNUE)
             if (newbob_decay != 1.0 && latest_loss_count > 0) {
                 static int trials = newbob_num_trials;
                 const double latest_loss = latest_loss_sum / latest_loss_count;
@@ -1470,7 +1402,6 @@ namespace Learner
                     return true;
                 }
             }
-#endif
         }
         return false;
     }
@@ -1817,12 +1748,10 @@ namespace Learner
         // Optional item that does not let you learn KK/KKP/KPP/KPPP
         array<bool, 4> freeze = {};
 
-#if defined(EVAL_NNUE)
         uint64_t nn_batch_size = 1000;
         double newbob_decay = 1.0;
         int newbob_num_trials = 2;
         string nn_options;
-#endif
 
         uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
         uint64_t loss_output_interval = 0;
@@ -1922,12 +1851,11 @@ namespace Learner
             else if (option == "save_only_once") save_only_once = true;
             else if (option == "no_shuffle") no_shuffle = true;
 
-#if defined(EVAL_NNUE)
             else if (option == "nn_batch_size") is >> nn_batch_size;
             else if (option == "newbob_decay") is >> newbob_decay;
             else if (option == "newbob_num_trials") is >> newbob_num_trials;
             else if (option == "nn_options") is >> nn_options;
-#endif
+
             else if (option == "eval_save_interval") is >> eval_save_interval;
             else if (option == "loss_output_interval") is >> loss_output_interval;
             else if (option == "mirror_percentage") is >> mirror_percentage;
@@ -2074,23 +2002,18 @@ namespace Learner
             }
         }
 
-#if !defined(EVAL_NNUE)
-        cout << "Gradient Method   : " << LEARN_UPDATE << endl;
-#endif
         cout << "Loss Function     : " << LOSS_FUNCTION << endl;
         cout << "mini-batch size   : " << mini_batch_size << endl;
 
-#if defined(EVAL_NNUE)
         cout << "nn_batch_size     : " << nn_batch_size << endl;
         cout << "nn_options        : " << nn_options << endl;
-#endif
+
         cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
         cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
         cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
         cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
         cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
 
-#if defined(EVAL_NNUE)
         if (newbob_decay != 1.0) {
             cout << "scheduling        : newbob with decay = " << newbob_decay
                 << ", " << newbob_num_trials << " trials" << endl;
@@ -2098,7 +2021,6 @@ namespace Learner
         else {
             cout << "scheduling        : default" << endl;
         }
-#endif
 
         cout << "discount rate     : " << discount_rate << endl;
 
@@ -2133,12 +2055,6 @@ namespace Learner
         // Read evaluation function parameters
         Eval::init_NNUE();
 
-#if !defined(EVAL_NNUE)
-        cout << "init_grad.." << endl;
-
-        // Initialize gradient array of merit function parameters
-        Eval::init_grad(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
-#else
         cout << "init_training.." << endl;
         Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
         Eval::NNUE::SetBatchSize(nn_batch_size);
@@ -2146,7 +2062,6 @@ namespace Learner
         if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
             learn_think.best_nn_directory = std::string(Options["EvalDir"]);
         }
-#endif
 
 #if 0
         // A test to give a gradient of 1.0 to the initial stage of Hirate.
@@ -2170,11 +2085,9 @@ namespace Learner
         learn_think.freeze = freeze;
         learn_think.reduction_gameply = reduction_gameply;
 
-#if defined(EVAL_NNUE)
         learn_think.newbob_scale = 1.0;
         learn_think.newbob_decay = newbob_decay;
         learn_think.newbob_num_trials = newbob_num_trials;
-#endif
 
         learn_think.eval_save_interval = eval_save_interval;
         learn_think.loss_output_interval = loss_output_interval;
@@ -2199,7 +2112,6 @@ namespace Learner
         // Calculate rmse once at this point (timing of 0 sfen)
         // sr.calc_rmse();
 
-#if defined(EVAL_NNUE)
         if (newbob_decay != 1.0) {
             learn_think.calc_loss(0, -1);
             learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count;
@@ -2207,7 +2119,6 @@ namespace Learner
             learn_think.latest_loss_count = 0;
             cout << "initial loss: " << learn_think.best_loss << endl;
         }
-#endif
 
         // -----------------------------------
         // start learning evaluation function parameters
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index 5c8cee71..a2845c96 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -184,13 +184,11 @@ namespace Eval::NNUE {
 
     Initialize();
 
-#if defined(EVAL_NNUE)
     if (Options["SkipLoadingEval"])
     {
       std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
       return true;
     }
-#endif
 
     fileName = evalFile;
 
diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
index 13d9d578..7be06832 100644
--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -1,6 +1,6 @@
 ﻿// Code for learning NNUE evaluation function
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include <random>
 #include <fstream>
@@ -229,4 +229,4 @@ double get_eta() {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
index 1e4a463e..0e5fbcd2 100644
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -3,7 +3,7 @@
 #ifndef _EVALUATE_NNUE_LEARNER_H_
 #define _EVALUATE_NNUE_LEARNER_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../learn/learn.h"
 
@@ -41,6 +41,6 @@ void CheckHealth();
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp
index ee7b6576..47fbd986 100644
--- a/src/nnue/features/castling_right.cpp
+++ b/src/nnue/features/castling_right.cpp
@@ -1,7 +1,5 @@
 //Definition of input feature quantity K of NNUE evaluation function
 
-#if defined(EVAL_NNUE)
-
 #include "castling_right.h"
 #include "index_list.h"
 
@@ -69,5 +67,3 @@ namespace Eval {
   }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/nnue/features/castling_right.h b/src/nnue/features/castling_right.h
index 3af5b074..27074080 100644
--- a/src/nnue/features/castling_right.h
+++ b/src/nnue/features/castling_right.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"
 
@@ -43,6 +41,4 @@ namespace Eval {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp
index ea70529a..77bc936e 100644
--- a/src/nnue/features/enpassant.cpp
+++ b/src/nnue/features/enpassant.cpp
@@ -1,7 +1,5 @@
 //Definition of input feature quantity K of NNUE evaluation function
 
-#if defined(EVAL_NNUE)
-
 #include "enpassant.h"
 #include "index_list.h"
 
@@ -43,5 +41,3 @@ namespace Eval {
   }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/nnue/features/enpassant.h b/src/nnue/features/enpassant.h
index f77f9c4f..70a8eb5a 100644
--- a/src/nnue/features/enpassant.h
+++ b/src/nnue/features/enpassant.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"
 
@@ -43,6 +41,4 @@ namespace Eval {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp
index 015ecb73..597d65fb 100644
--- a/src/nnue/features/half_relative_kp.cpp
+++ b/src/nnue/features/half_relative_kp.cpp
@@ -1,7 +1,5 @@
 ﻿//Definition of input features HalfRelativeKP of NNUE evaluation function
 
-#if defined(EVAL_NNUE)
-
 #include "half_relative_kp.h"
 #include "index_list.h"
 
@@ -74,5 +72,3 @@ template class HalfRelativeKP<Side::kEnemy>;
 }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/nnue/features/half_relative_kp.h b/src/nnue/features/half_relative_kp.h
index 2d4182e4..1b384c14 100644
--- a/src/nnue/features/half_relative_kp.h
+++ b/src/nnue/features/half_relative_kp.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
 #define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"
 
@@ -60,6 +58,4 @@ class HalfRelativeKP {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/features/k.cpp b/src/nnue/features/k.cpp
index 314b1338..38ec9997 100644
--- a/src/nnue/features/k.cpp
+++ b/src/nnue/features/k.cpp
@@ -1,7 +1,5 @@
 ﻿//Definition of input feature quantity K of NNUE evaluation function
 
-#if defined(EVAL_NNUE)
-
 #include "k.h"
 #include "index_list.h"
 
@@ -54,5 +52,3 @@ void K::AppendChangedIndices(
 }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/nnue/features/k.h b/src/nnue/features/k.h
index 0c394f4e..9a0be4bb 100644
--- a/src/nnue/features/k.h
+++ b/src/nnue/features/k.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_K_H_
 #define _NNUE_FEATURES_K_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"
 
@@ -47,6 +45,4 @@ private:
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/features/p.cpp b/src/nnue/features/p.cpp
index b4a6faf9..0c1b7d50 100644
--- a/src/nnue/features/p.cpp
+++ b/src/nnue/features/p.cpp
@@ -1,7 +1,5 @@
 ﻿//Definition of input feature P of NNUE evaluation function
 
-#if defined(EVAL_NNUE)
-
 #include "p.h"
 #include "index_list.h"
 
@@ -52,5 +50,3 @@ void P::AppendChangedIndices(
 }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/nnue/features/p.h b/src/nnue/features/p.h
index b3d4191e..07d88952 100644
--- a/src/nnue/features/p.h
+++ b/src/nnue/features/p.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_P_H_
 #define _NNUE_FEATURES_P_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"
 
@@ -47,6 +45,4 @@ class P {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/layers/sum.h b/src/nnue/layers/sum.h
index d8c7bf93..419ced89 100644
--- a/src/nnue/layers/sum.h
+++ b/src/nnue/layers/sum.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_LAYERS_SUM_H_
 #define _NNUE_LAYERS_SUM_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../nnue_common.h"
 
 namespace Eval {
@@ -158,6 +156,4 @@ class Sum<PreviousLayer> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp
index 311c5ded..b8346693 100644
--- a/src/nnue/nnue_test_command.cpp
+++ b/src/nnue/nnue_test_command.cpp
@@ -1,6 +1,6 @@
 ﻿// USI extended command for NNUE evaluation function
 
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#if defined(ENABLE_TEST_CMD)
 
 #include "../thread.h"
 #include "../uci.h"
@@ -198,4 +198,4 @@ void TestCommand(Position& pos, std::istream& stream) {
 
 }  // namespace Eval
 
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#endif  // defined(ENABLE_TEST_CMD)
diff --git a/src/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h
index 570ef01b..30854fd2 100644
--- a/src/nnue/nnue_test_command.h
+++ b/src/nnue/nnue_test_command.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TEST_COMMAND_H_
 #define _NNUE_TEST_COMMAND_H_
 
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#if defined(ENABLE_TEST_CMD)
 
 namespace Eval {
 
@@ -16,6 +16,6 @@ void TestCommand(Position& pos, std::istream& stream);
 
 }  // namespace Eval
 
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#endif  // defined(ENABLE_TEST_CMD)
 
 #endif
diff --git a/src/nnue/trainer/features/factorizer.h b/src/nnue/trainer/features/factorizer.h
index 148ee8ec..43950de2 100644
--- a/src/nnue/trainer/features/factorizer.h
+++ b/src/nnue/trainer/features/factorizer.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../nnue_common.h"
 #include "../trainer.h"
 
@@ -105,6 +103,4 @@ constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/trainer/features/factorizer_feature_set.h b/src/nnue/trainer/features/factorizer_feature_set.h
index af524719..caf6608b 100644
--- a/src/nnue/trainer/features/factorizer_feature_set.h
+++ b/src/nnue/trainer/features/factorizer_feature_set.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../features/feature_set.h"
 #include "factorizer.h"
 
@@ -99,6 +97,4 @@ public:
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/trainer/features/factorizer_half_kp.h b/src/nnue/trainer/features/factorizer_half_kp.h
index 955894e8..70a6acca 100644
--- a/src/nnue/trainer/features/factorizer_half_kp.h
+++ b/src/nnue/trainer/features/factorizer_half_kp.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
 
-#if defined(EVAL_NNUE)
-
 #include "../../features/half_kp.h"
 #include "../../features/p.h"
 #include "../../features/half_relative_kp.h"
@@ -98,6 +96,4 @@ constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_NNUE)
-
 #endif
diff --git a/src/nnue/trainer/trainer.h b/src/nnue/trainer/trainer.h
index 4b467041..d526557a 100644
--- a/src/nnue/trainer/trainer.h
+++ b/src/nnue/trainer/trainer.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_H_
 #define _NNUE_TRAINER_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../nnue_common.h"
 #include "../features/index_list.h"
@@ -120,6 +120,6 @@ std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h
index da11ca29..4b5ddee6 100644
--- a/src/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
 #define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../../learn/learn.h"
 #include "../layers/affine_transform.h"
@@ -296,6 +296,6 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/trainer/trainer_clipped_relu.h b/src/nnue/trainer/trainer_clipped_relu.h
index bd59a02d..72575bf8 100644
--- a/src/nnue/trainer/trainer_clipped_relu.h
+++ b/src/nnue/trainer/trainer_clipped_relu.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
 #define _NNUE_TRAINER_CLIPPED_RELU_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../../learn/learn.h"
 #include "../layers/clipped_relu.h"
@@ -137,6 +137,6 @@ class Trainer<Layers::ClippedReLU<PreviousLayer>> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h
index 97dbeff4..6b94d952 100644
--- a/src/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
 #define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../../learn/learn.h"
 #include "../nnue_feature_transformer.h"
@@ -372,6 +372,6 @@ class Trainer<FeatureTransformer> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/trainer/trainer_input_slice.h b/src/nnue/trainer/trainer_input_slice.h
index 7d9e76c3..b6d6635b 100644
--- a/src/nnue/trainer/trainer_input_slice.h
+++ b/src/nnue/trainer/trainer_input_slice.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_INPUT_SLICE_H_
 #define _NNUE_TRAINER_INPUT_SLICE_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../../learn/learn.h"
 #include "../layers/input_slice.h"
@@ -246,6 +246,6 @@ class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/nnue/trainer/trainer_sum.h b/src/nnue/trainer/trainer_sum.h
index f7bf3b3d..0b7abe36 100644
--- a/src/nnue/trainer/trainer_sum.h
+++ b/src/nnue/trainer/trainer_sum.h
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_SUM_H_
 #define _NNUE_TRAINER_SUM_H_
 
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 
 #include "../../learn/learn.h"
 #include "../layers/sum.h"
@@ -185,6 +185,6 @@ class Trainer<Layers::Sum<PreviousLayer>> {
 
 }  // namespace Eval
 
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
 
 #endif
diff --git a/src/uci.cpp b/src/uci.cpp
index d6745d19..5be2afbb 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -32,7 +32,7 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"
 
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
 #include "nnue/nnue_test_command.h"
 #endif
 
@@ -67,7 +67,7 @@ namespace Learner
 }
 #endif
 
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
 void test_cmd(Position& pos, istringstream& is)
 {
     // Initialize as it may be searched.
@@ -373,7 +373,7 @@ void UCI::loop(int argc, char* argv[]) {
 
 #endif
 
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
       // test command
       else if (token == "test") test_cmd(pos, is);
 #endif
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index 0007b559..4f9fab5e 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -83,7 +83,6 @@ void init(OptionsMap& o) {
   // The default must follow the format nn-[SHA256 first 12 digits].nnue
   // for the build process (profile-build and fishtest) to work.
   o["EvalFile"]              << Option("nn-82215d0fd0df.nnue", on_eval_file);
-#ifdef EVAL_NNUE
   // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
   // I want to hit the test eval convert command, but there is no new evaluation function
   // It ends abnormally before executing this command.
@@ -92,7 +91,6 @@ void init(OptionsMap& o) {
   o["SkipLoadingEval"]       << Option(false);
   // how many moves to use a fixed move
   // o["BookMoves"] << Option(16, 0, 10000);
-#endif
 #if defined(EVAL_LEARN)
   // When learning the evaluation function, you can change the folder to save the evaluation function.
   // Evalsave by default. This folder shall be prepared in advance.

From e6a6ba52213290d0996913ec6367a8364c5199ec Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 22:49:55 +0900
Subject: [PATCH 08/30] Removed USE_BOOK macro.

---
 src/learn/gensfen.cpp | 40 ----------------------------------------
 src/learn/learner.cpp |  5 -----
 2 files changed, 45 deletions(-)

diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 9ae83174..589d9559 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -11,10 +11,6 @@
 #include "learn.h"
 #include "multi_think.h"
 
-#if defined(USE_BOOK)
-#include "../extra/book/book.h"
-#endif
-
 #include <chrono>
 #include <climits>
 #include <cmath>
@@ -750,11 +746,6 @@ namespace Learner
             auto& pos = th->rootPos;
             pos.set(StartFEN, false, &si, th);
 
-#if defined(USE_BOOK)
-            // Refer to the members of BookMoveSelector defined in the search section.
-            auto& book = ::book;
-#endif
-
             // Vector for holding the sfens in the current simulated game.
             PSVector a_psv;
             a_psv.reserve(write_maxply + MAX_PLY);
@@ -788,35 +779,7 @@ namespace Learner
                     flush_psv(result.value());
                     break;
                 }
-#if defined(USE_BOOK)
-                if ((next_move = book.probe(pos)) != MOVE_NONE)
-                {
-                    // Hit the constant track.
-                    // The move was stored in next_move.
 
-                    // Do not use the fixed phase for learning.
-                    sfens.clear();
-
-                    if (random_move_minply != -1)
-                    {
-                        // Random move is performed with a certain
-                        // probability even in the constant phase.
-                        goto RANDOM_MOVE;
-                    }
-                    else
-                    {
-                        // When -1 is specified as random_move_minply,
-                        // it points according to the standard until
-                        // it goes out of the standard.
-                        // Prepare an innumerable number of situations
-                        // that have left the constant as
-                        // ConsiderationBookMoveCount true using a huge constant
-                        // Used for purposes such as performing
-                        // a random move 5 times from there.
-                        goto DO_MOVE;
-                    }
-                }
-#endif
                 {
                     auto [search_value, search_pv] = search(pos, depth, 1, nodes);
 
@@ -1124,9 +1087,6 @@ namespace Learner
             << "  loop_max = " << loop_max << endl
             << "  eval_limit = " << eval_limit << endl
             << "  thread_num (set by USI setoption) = " << thread_num << endl
-#if defined(USE_BOOK)
-            << "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
-#endif
             << "  random_move_minply     = " << random_move_minply << endl
             << "  random_move_maxply     = " << random_move_maxply << endl
             << "  random_move_count      = " << random_move_count << endl
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 358848ec..e7f021fe 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -54,11 +54,6 @@
 
 using namespace std;
 
-#if defined(USE_BOOK)
-// This is defined in the search section.
-extern Book::BookMoveSelector book;
-#endif
-
 template <typename T>
 T operator +=(std::atomic<T>& x, const T rhs)
 {

From 21cfead52c2a77abc4e9eed21739ccc3df9826c0 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 22:52:46 +0900
Subject: [PATCH 09/30] Removed unused OMP_ macro.

---
 src/learn/convert.cpp        | 4 ----
 src/learn/gensfen.cpp        | 4 ----
 src/learn/learning_tools.cpp | 3 ---
 3 files changed, 11 deletions(-)

diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp
index 9bd9548d..d07fc00c 100644
--- a/src/learn/convert.cpp
+++ b/src/learn/convert.cpp
@@ -27,10 +27,6 @@
 #include <regex>
 #include <filesystem>
 
-#if defined (_OPENMP)
-#include <omp.h>
-#endif
-
 using namespace std;
 
 namespace Learner
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 589d9559..65e64177 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -28,10 +28,6 @@
 #include <sstream>
 #include <unordered_set>
 
-#if defined (_OPENMP)
-#include <omp.h>
-#endif
-
 using namespace std;
 
 namespace Learner
diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp
index de6da9c5..eca11c47 100644
--- a/src/learn/learning_tools.cpp
+++ b/src/learn/learning_tools.cpp
@@ -2,9 +2,6 @@
 
 #if defined (EVAL_LEARN)
 
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
 #include "../misc.h"
 
 using namespace Eval;

From 1d00d002412e11505430a9da32297b81e11b6801 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 22:59:57 +0900
Subject: [PATCH 10/30] Removed ENABLE_TEST_CMD macro.

---
 src/Makefile                   |  6 +++---
 src/nnue/nnue_test_command.cpp |  4 ----
 src/nnue/nnue_test_command.h   |  4 ----
 src/uci.cpp                    | 11 ++---------
 4 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index a07e1251..49c6c1b3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -903,7 +903,7 @@ icc-profile-use:
 
 learn: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
-	EXTRACXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	EXTRACXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp  ' \
 	all
 
@@ -911,7 +911,7 @@ profile-learn: net config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS='  $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 2/4. Running benchmark for pgo-build ..."
@@ -920,7 +920,7 @@ profile-learn: net config-sanity objclean profileclean
 	@echo "Step 3/4. Building optimized executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 4/4. Deleting profile data ..."
diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp
index b8346693..c3a53c7d 100644
--- a/src/nnue/nnue_test_command.cpp
+++ b/src/nnue/nnue_test_command.cpp
@@ -1,7 +1,5 @@
 ﻿// USI extended command for NNUE evaluation function
 
-#if defined(ENABLE_TEST_CMD)
-
 #include "../thread.h"
 #include "../uci.h"
 #include "evaluate_nnue.h"
@@ -197,5 +195,3 @@ void TestCommand(Position& pos, std::istream& stream) {
 }  // namespace NNUE
 
 }  // namespace Eval
-
-#endif  // defined(ENABLE_TEST_CMD)
diff --git a/src/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h
index 30854fd2..75d33e82 100644
--- a/src/nnue/nnue_test_command.h
+++ b/src/nnue/nnue_test_command.h
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TEST_COMMAND_H_
 #define _NNUE_TEST_COMMAND_H_
 
-#if defined(ENABLE_TEST_CMD)
-
 namespace Eval {
 
 namespace NNUE {
@@ -16,6 +14,4 @@ void TestCommand(Position& pos, std::istream& stream);
 
 }  // namespace Eval
 
-#endif  // defined(ENABLE_TEST_CMD)
-
 #endif
diff --git a/src/uci.cpp b/src/uci.cpp
index 5be2afbb..1454e4e0 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -24,17 +24,14 @@
 
 #include "evaluate.h"
 #include "movegen.h"
+#include "nnue/nnue_test_command.h"
 #include "position.h"
 #include "search.h"
+#include "syzygy/tbprobe.h"
 #include "thread.h"
 #include "timeman.h"
 #include "tt.h"
 #include "uci.h"
-#include "syzygy/tbprobe.h"
-
-#if defined(ENABLE_TEST_CMD)
-#include "nnue/nnue_test_command.h"
-#endif
 
 using namespace std;
 
@@ -67,7 +64,6 @@ namespace Learner
 }
 #endif
 
-#if defined(ENABLE_TEST_CMD)
 void test_cmd(Position& pos, istringstream& is)
 {
     // Initialize as it may be searched.
@@ -78,7 +74,6 @@ void test_cmd(Position& pos, istringstream& is)
 
     if (param == "nnue") Eval::NNUE::TestCommand(pos, is);
 }
-#endif
 
 namespace {
 
@@ -373,10 +368,8 @@ void UCI::loop(int argc, char* argv[]) {
 
 #endif
 
-#if defined(ENABLE_TEST_CMD)
       // test command
       else if (token == "test") test_cmd(pos, is);
-#endif
       else
           sync_cout << "Unknown command: " << cmd << sync_endl;
 

From 458771a18199d4f64f4190521bea4aa91460c462 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:02:31 +0900
Subject: [PATCH 11/30] Removed GENSFEN2019 macro.

---
 src/uci.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/uci.cpp b/src/uci.cpp
index 1454e4e0..6675f2e0 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -50,11 +50,6 @@ namespace Learner
   // Learning from the generated game record
   void learn(Position& pos, istringstream& is);
 
-#if defined(GENSFEN2019)
-  // Automatic generation command of teacher phase under development
-  void gen_sfen2019(Position& pos, istringstream& is);
-#endif
-
   // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
   typedef std::pair<Value, std::vector<Move> > ValueAndPV;
 
@@ -358,10 +353,6 @@ void UCI::loop(int argc, char* argv[]) {
       else if (token == "gensfen") Learner::gen_sfen(pos, is);
       else if (token == "learn") Learner::learn(pos, is);
 
-#if defined (GENSFEN2019)
-	  // Command to generate teacher phase under development
-      else if (token == "gensfen2019") Learner::gen_sfen2019(pos, is);
-#endif
       // Command to call qsearch(),search() directly for testing
       else if (token == "qsearch") qsearch_cmd(pos);
       else if (token == "search") search_cmd(pos, is);

From 04a9a951b8611d6f176d49c9edd24d22ec5ba457 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:08:39 +0900
Subject: [PATCH 12/30] Removed "#if 0" and "#if 1".

---
 src/learn/gensfen.cpp | 24 +++------------------
 src/learn/learner.cpp | 50 -------------------------------------------
 2 files changed, 3 insertions(+), 71 deletions(-)

diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 65e64177..ec3de570 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -166,7 +166,7 @@ namespace Learner
                         output_file_stream.write(reinterpret_cast<const char*>(buf->data()), sizeof(PackedSfenValue) * buf->size());
 
                         sfen_write_count += buf->size();
-#if 1
+
                         // Add the processed number here, and if it exceeds save_every,
                         // change the file name and reset this counter.
                         sfen_write_count_current_file += buf->size();
@@ -186,7 +186,7 @@ namespace Learner
                             output_file_stream.open(new_filename, ios::out | ios::binary | ios::app);
                             cout << endl << "output sfen file = " << new_filename << endl;
                         }
-#endif
+
                         // Output '.' every time when writing a game record.
                         std::cout << ".";
 
@@ -519,10 +519,6 @@ namespace Learner
         {
             // Write out one sfen.
             sfen_writer.write(thread_id, *it);
-#if 0
-            pos.set_from_packed_sfen(it->sfen);
-            cout << pos << "Win : " << it->is_win << " , " << it->score << endl;
-#endif
         }
 
         return quit;
@@ -667,13 +663,12 @@ namespace Learner
 
         for (auto m : pv)
         {
-#if 1
             // There should be no illegal move. This is as a debugging precaution.
             if (!pos.pseudo_legal(m) || !pos.legal(m))
             {
                 cout << "Error! : " << pos.fen() << m << endl;
             }
-#endif
+
             pos.do_move(m, states[ply++]);
 
             // Because the difference calculation of evaluate() cannot be
@@ -803,19 +798,6 @@ namespace Learner
                     // Save the move score for adjudication.
                     move_hist_scores.push_back(search_value);
 
-#if 0
-                    dbg_hit_on(search_value == leaf_value);
-                    // gensfen depth 3 eval_limit 32000
-                    // Total 217749 Hits 203579 hit rate (%) 93.490
-                    // gensfen depth 6 eval_limit 32000
-                    // Total 78407 Hits 69190 hit rate (%) 88.245
-                    // gensfen depth 6 eval_limit 3000
-                    // Total 53879 Hits 43713 hit rate (%) 81.132
-
-                    // Problems such as pruning with moves in the substitution table.
-                    // This is a little uncomfortable as a teacher...
-#endif
-
                     // If depth 0, pv is not obtained, so search again at depth 2.
                     if (search_depth_min <= 0)
                     {
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index e7f021fe..2f1d27b2 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -1163,11 +1163,6 @@ namespace Learner
             if (ps.gamePly < prng.rand(reduction_gameply))
                 goto RETRY_READ;
 
-#if 0
-            auto sfen = pos.sfen_unpack(ps.data);
-            pos.set(sfen);
-#endif
-            // ↑ Since it is slow when passing through sfen, I made a dedicated function.
             StateInfo si;
             const bool mirror = prng.rand(100) < mirror_percentage;
             if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
@@ -1207,28 +1202,6 @@ namespace Learner
             // If it is the result of searching a completely different place, it may become noise.
             // It may be better not to study where the difference in evaluation values ​​is too large.
 
-#if 0
-            // If you do this, about 13% of the phases will be excluded 
-            // from the learning target. Good and bad are subtle.
-            if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move)
-            {
-                //dbg_hit_on(false);
-                continue;
-            }
-#endif
-
-#if 0
-            // It may be better not to study where the difference in evaluation values ​​is too large.
-            // → It's okay because it passes the win rate function... 
-            // About 30% of the phases are out of the scope of learning...
-            if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4)
-            {
-                //dbg_hit_on(false);
-                continue;
-            }
-            //dbg_hit_on(true);
-#endif
-
             int ply = 0;
 
             // A helper function that adds the gradient to the current phase.
@@ -1315,17 +1288,6 @@ namespace Learner
             // rewind the phase
             for (auto it = pv.rbegin(); it != pv.rend(); ++it)
                 pos.undo_move(*it);
-
-#if 0
-            // When adding the gradient to the root phase
-            shallow_value = 
-                (rootColor == pos.side_to_move()) 
-                ? Eval::evaluate(pos) 
-                : -Eval::evaluate(pos);
-
-            dj_dw = calc_grad(deep_value, shallow_value, ps);
-            Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
-#endif
         }
 
     }
@@ -2058,18 +2020,6 @@ namespace Learner
             learn_think.best_nn_directory = std::string(Options["EvalDir"]);
         }
 
-#if 0
-        // A test to give a gradient of 1.0 to the initial stage of Hirate.
-        pos.set_hirate();
-        cout << Eval::evaluate(pos) << endl;
-        //Eval::print_eval_stat(pos);
-        Eval::add_grad(pos, BLACK, 32.0, false);
-        Eval::update_weights(1);
-        pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED;
-        cout << Eval::evaluate(pos) << endl;
-        //Eval::print_eval_stat(pos);
-#endif
-
         cout << "init done." << endl;
 
         // Reflect other option settings.

From ec96409176fa8f2cdc2e8a003150fcabf037f85c Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:30:57 +0900
Subject: [PATCH 13/30] Replaced DNDEBUG macro to _DEBUG macro.

---
 src/learn/gensfen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index ec3de570..0232e5d4 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -70,7 +70,7 @@ namespace Learner
             file_worker_thread.join();
             output_file_stream.close();
 
-#if !defined(DNDEBUG)
+#if defined(_DEBUG)
             {
                 // All buffers should be empty since file_worker_thread
                 // should have written everything before exiting.

From aa2452caf39446fded3c0ee79c18c3ecb43369b3 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:45:19 +0900
Subject: [PATCH 14/30] Removed #if for USE_EVAL_HASH.

---
 src/eval/evaluate_common.h |  6 ------
 src/learn/gensfen.cpp      | 10 ----------
 src/learn/learner.cpp      |  2 --
 3 files changed, 18 deletions(-)

diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
index 3fb161ab..927783cd 100644
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@@ -18,12 +18,6 @@
 
 namespace Eval
 {
-
-#if defined(USE_EVAL_HASH)
-	// prefetch function
-	void prefetch_evalhash(const Key key);
-#endif
-
 	// An operator that applies the function f to each parameter of the evaluation function.
 	// Used for parameter analysis etc.
 	// type indicates the survey target.
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 0232e5d4..4050d983 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -956,12 +956,6 @@ namespace Learner
 
         string token;
 
-        // When hit to eval hash, as a evaluation value near the initial stage, if a hash collision occurs and a large value is written
-        // When eval_limit is set small, eval_limit will be exceeded every time in the initial phase, and phase generation will not proceed.
-        // Therefore, eval hash needs to be disabled.
-        // After that, when the hash of the eval hash collides, the evaluation value of a strange value is used, and it may be unpleasant to use it for the teacher.
-        bool use_eval_hash = false;
-
         // Save to file in this unit.
         // File names are serialized like file_1.bin, file_2.bin.
         uint64_t save_every = UINT64_MAX;
@@ -1010,8 +1004,6 @@ namespace Learner
                 is >> write_minply;
             else if (token == "write_maxply")
                 is >> write_maxply;
-            else if (token == "use_eval_hash")
-                is >> use_eval_hash;
             else if (token == "save_every")
                 is >> save_every;
             else if (token == "random_file_name")
@@ -1033,7 +1025,6 @@ namespace Learner
 #if defined(USE_GLOBAL_OPTIONS)
         // Save it for later restore.
         auto oldGlobalOptions = GlobalOptions;
-        GlobalOptions.use_eval_hash = use_eval_hash;
 #endif
 
         // If search depth2 is not set, leave it the same as search depth.
@@ -1075,7 +1066,6 @@ namespace Learner
             << "  write_minply           = " << write_minply << endl
             << "  write_maxply           = " << write_maxply << endl
             << "  output_file_name       = " << output_file_name << endl
-            << "  use_eval_hash          = " << use_eval_hash << endl
             << "  save_every             = " << save_every << endl
             << "  random_file_name       = " << random_file_name << endl
             << "  write_out_draw_game_in_training_data_generation = " << write_out_draw_game_in_training_data_generation << endl
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 2f1d27b2..9e6f10cb 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -1639,8 +1639,6 @@ namespace Learner
 #if defined(USE_GLOBAL_OPTIONS)
     // Save it for later restore.
         auto oldGlobalOptions = GlobalOptions;
-        // If you hit the eval hash, you can not calculate rmse etc. so turn it off.
-        GlobalOptions.use_eval_hash = false;
         // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off.
         GlobalOptions.use_hash_probe = false;
 #endif

From 82dc68ba9ffe1d5fe849eef1f0fcc565ef810512 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:47:04 +0900
Subject: [PATCH 15/30] Removed #if for USE_GLOBAL_OPTIONS.

---
 src/learn/gensfen.cpp | 11 -----------
 src/learn/learner.cpp | 12 ------------
 src/search.cpp        | 11 -----------
 3 files changed, 34 deletions(-)

diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index 4050d983..3d015acf 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -1022,11 +1022,6 @@ namespace Learner
                 cout << "Error! : Illegal token " << token << endl;
         }
 
-#if defined(USE_GLOBAL_OPTIONS)
-        // Save it for later restore.
-        auto oldGlobalOptions = GlobalOptions;
-#endif
-
         // If search depth2 is not set, leave it the same as search depth.
         if (search_depth_max == INT_MIN)
             search_depth_max = search_depth_min;
@@ -1103,12 +1098,6 @@ namespace Learner
         }
 
         std::cout << "gensfen finished." << endl;
-
-#if defined(USE_GLOBAL_OPTIONS)
-        // Restore Global Options.
-        GlobalOptions = oldGlobalOptions;
-#endif
-
     }
 }
 #endif
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 9e6f10cb..daea9594 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -1636,13 +1636,6 @@ namespace Learner
         uint64_t eta1_epoch = 0; // eta2 is not applied by default
         uint64_t eta2_epoch = 0; // eta3 is not applied by default
 
-#if defined(USE_GLOBAL_OPTIONS)
-    // Save it for later restore.
-        auto oldGlobalOptions = GlobalOptions;
-        // If you hit the replacement table, pruning may occur at the previous evaluation value, so turn it off.
-        GlobalOptions.use_hash_probe = false;
-#endif
-
         // --- Function that only shuffles the teacher aspect
 
         // normal shuffle
@@ -2072,11 +2065,6 @@ namespace Learner
 
         // Save once at the end.
         learn_think.save(true);
-
-#if defined(USE_GLOBAL_OPTIONS)
-        // Restore Global Options.
-        GlobalOptions = oldGlobalOptions;
-#endif
     }
 
 } // namespace Learner
diff --git a/src/search.cpp b/src/search.cpp
index 8f258ae4..67348a2b 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -2070,17 +2070,6 @@ namespace Learner
         rootMoves.push_back(Search::RootMove(m));
 
       assert(!rootMoves.empty());
-
-      //#if defined(USE_GLOBAL_OPTIONS)
-      // Since the generation of the substitution table for each search thread should be managed,
-      // Increase the generation of the substitution table for this thread because it is a new search.
-            //TT.new_search(th->thread_id());
-
-            // ª If you call new_search here, it may be a loss because you can't use the previous search result.
-            // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ...
-
-            // ¨Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
-      //#endif
     }
   }
 

From 05d26499b42878447a21b6d721f4868151357665 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Tue, 8 Sep 2020 23:57:51 +0900
Subject: [PATCH 16/30] Removed LEARN_ELMO_METHOD macro.

---
 src/learn/learn.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 1bc39cf9..7285f61a 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -5,18 +5,6 @@
 
 #include <vector>
 
-// =====================
-// Settings for learning
-// =====================
-
-// If you select one of the following, the details after that will be automatically selected.
-// If you don't select any of them, you need to set the subsequent details one by one.
-
-// Learning setting by elmo method. This is the default setting.
-// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
-#define LEARN_ELMO_METHOD
-
-
 // ----------------------
 // update formula
 // ----------------------
@@ -147,10 +135,8 @@ typedef float LearnFloatType;
 // Learning with the method of elmo (WCSC27)
 // ----------------------
 
-#if defined( LEARN_ELMO_METHOD )
 #define LOSS_FUNCTION_IS_ELMO_METHOD
 #define ADA_GRAD_UPDATE
-#endif
 
 // Character string according to update formula. (Output for debugging.)
 // Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory.

From 0271d707759117af6557beb93319aa51c07280aa Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:01:53 +0900
Subject: [PATCH 17/30] Removed ADA_GRAD_UPDATE macro.

---
 src/learn/learn.h          | 10 +------
 src/learn/learning_tools.h | 54 +-------------------------------------
 2 files changed, 2 insertions(+), 62 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 7285f61a..8fb6217f 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -9,9 +9,6 @@
 // update formula
 // ----------------------
 
-// Ada Grad. Recommended because it is stable.
-// #define ADA_GRAD_UPDATE
-
 // SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
 // #define SGD_UPDATE
 
@@ -136,13 +133,8 @@ typedef float LearnFloatType;
 // ----------------------
 
 #define LOSS_FUNCTION_IS_ELMO_METHOD
-#define ADA_GRAD_UPDATE
 
-// Character string according to update formula. (Output for debugging.)
-// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory.
-#if defined(ADA_GRAD_UPDATE)
-#define LEARN_UPDATE "AdaGrad"
-#elif defined(SGD_UPDATE)
+#if defined(SGD_UPDATE)
 #define LEARN_UPDATE "SGD"
 #endif
 
diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h
index 3c4be08a..854133e4 100644
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
@@ -76,59 +76,7 @@ namespace EvalLearningTools
 
 		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
 
-#if defined (ADA_GRAD_UPDATE)
-
-		// Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1
-		// Keep the small value as a marker.
-		const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128);
-
-		// What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory,
-		// Since it is doubtful in accuracy and the visibility is bad, it was abolished.
-		LearnFloatType v0 = LearnFloatType(V0_NOT_INIT);
-
-		// AdaGrad g2
-		LearnFloatType g2 = LearnFloatType(0);
-
-		// update with AdaGrad
-		// When executing this function, the value of g and the member do not change
-		// Guaranteed by the caller. It does not have to be an atomic operation.
-		// k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc.
-		template <typename T>
-		void updateFV(T& v,double k)
-		{
-			// AdaGrad update formula
-			// Gradient vector is g, vector to be updated is v, η(eta) is a constant,
-			//     g2 = g2 + g^2
-			//     v = v - ηg/sqrt(g2)
-
-			constexpr double epsilon = 0.000001;
-
-			if (g == LearnFloatType(0))
-				return;
-
-			g2 += g * g;
-
-			// If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array,
-			// In this case, read the value of v from the one passed in the argument.
-			double V = (v0 == V0_NOT_INIT) ? v : v0;
-
-			V -= k * eta * (double)g / sqrt((double)g2 + epsilon);
-
-			// Limit the value of V to be within the range of types.
-			// By the way, windows.h defines the min and max macros, so to avoid it,
-			// Here, it is enclosed in parentheses so that it is not treated as a function-like macro.
-			V = (std::min)((double)(std::numeric_limits<T>::max)() , V);
-			V = (std::max)((double)(std::numeric_limits<T>::min)() , V);
-
-			v0 = (LearnFloatType)V;
-			v = (T)round(V);
-
-			// Clear g because one update of mini-batch for this element is over
-			// g[i] = 0;
-			// → There is a problem of dimension reduction, so this will be done by the caller.
-		}
-
-#elif defined(SGD_UPDATE)
+#if defined(SGD_UPDATE)
 
 		// See only the sign of the gradient Update with SGD
 		// When executing this function, the value of g and the member do not change

From f3a158725d573753cf4b81fc5866c0f3bbdb1e88 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:07:09 +0900
Subject: [PATCH 18/30] Removed SGD_UPDATE macro.

---
 src/learn/learn.h          | 12 ---------
 src/learn/learning_tools.h | 51 ++------------------------------------
 2 files changed, 2 insertions(+), 61 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 8fb6217f..91b40213 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -5,14 +5,6 @@
 
 #include <vector>
 
-// ----------------------
-// update formula
-// ----------------------
-
-// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
-// #define SGD_UPDATE
-
-
 // ----------------------
 // Select the objective function
 // ----------------------
@@ -134,10 +126,6 @@ typedef float LearnFloatType;
 
 #define LOSS_FUNCTION_IS_ELMO_METHOD
 
-#if defined(SGD_UPDATE)
-#define LEARN_UPDATE "SGD"
-#endif
-
 #if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
 #define LOSS_FUNCTION "WINNING_PERCENTAGE"
 #elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h
index 854133e4..348105b6 100644
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
@@ -4,13 +4,12 @@
 // A set of machine learning tools related to the weight array used for machine learning of evaluation functions
 
 #include "learn.h"
+
 #if defined (EVAL_LEARN)
-#include <array>
 
-#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
 #include "../misc.h"  // PRNG , my_insertion_sort
-#endif
 
+#include <array>
 #include <cmath>	// std::sqrt()
 
 namespace EvalLearningTools
@@ -29,14 +28,6 @@ namespace EvalLearningTools
 		// cumulative value of one mini-batch gradient
 		LearnFloatType g = LearnFloatType(0);
 
-		// When ADA_GRAD_UPDATE. LearnFloatType == float,
-		// total 4*2 + 4*2 + 1*2 = 18 bytes
-		// It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB.
-		// However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so
-		// Specify pragma pack(2).
-
-		// For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes.
-
 		// Learning rate η(eta) such as AdaGrad.
 		// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
 		// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
@@ -76,44 +67,6 @@ namespace EvalLearningTools
 
 		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
 
-#if defined(SGD_UPDATE)
-
-		// See only the sign of the gradient Update with SGD
-		// When executing this function, the value of g and the member do not change
-		// Guaranteed by the caller. It does not have to be an atomic operation.
-		template <typename T>
-		void updateFV(T & v , double k)
-		{
-			if (g == 0)
-				return;
-
-			// See only the sign of g and update.
-			// If g <0, add v a little.
-			// If g> 0, subtract v slightly.
-
-			// Since we only add integers, no decimal part is required.
-
-			// It's a good idea to move around 0-5.
-			// It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1),
-			// Pop_count() it. At this time, it has a binomial distribution.
-			//int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31));
-			// → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good.
-			int16_t diff = 1;
-
-			double V = v;
-			if (g > 0.0)
-				V-= diff;
-			else
-				V+= diff;
-
-			V = (std::min)((double)(std::numeric_limits<T>::max)(), V);
-			V = (std::max)((double)(std::numeric_limits<T>::min)(), V);
-
-			v = (T)V;
-		}
-
-#endif
-
 		// grad setting
 		template <typename T> void set_grad(const T& g_) { g = g_; }
 

From d37eb63581ce2de8fd1a8406a9bc06b6377d2176 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:08:56 +0900
Subject: [PATCH 19/30] Removed LOSS_FUNCTION_IS_WINNING_PERCENTAGE macro.

---
 src/learn/learn.h     |  9 +--------
 src/learn/learner.cpp | 36 ------------------------------------
 2 files changed, 1 insertion(+), 44 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 91b40213..9d783986 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -9,11 +9,6 @@
 // Select the objective function
 // ----------------------
 
-// The objective function is the sum of squares of the difference in winning percentage
-// See learner.cpp for more information.
-
-//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
-
 // Objective function is cross entropy
 // See learner.cpp for more information.
 // So-called ordinary "rag cloth squeezer"
@@ -126,9 +121,7 @@ typedef float LearnFloatType;
 
 #define LOSS_FUNCTION_IS_ELMO_METHOD
 
-#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
-#define LOSS_FUNCTION "WINNING_PERCENTAGE"
-#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
+#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
 #define LOSS_FUNCTION "CROSS_ENTOROPY"
 #elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
 #define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE"
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index daea9594..e9658da6 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -163,42 +163,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-    // When the objective function is the sum of squares of the difference in winning percentage
-#if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
-// function to calculate the gradient
-    double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
-    {
-        // The square of the win rate difference minimizes it in the objective function.
-        // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2
-        // However, σ is a sigmoid function that converts the 
-        // evaluation value into the difference in the winning percentage.
-        // m is the number of samples. shallow is the evaluation value 
-        // for a shallow search (qsearch()). deep is the evaluation value for deep search.
-        // If W is the feature vector (parameter of the evaluation function) 
-        // and Xi and Yi are teachers
-        // shallow = W*Xi // * is the Hadamard product, transposing W and meaning X
-        // f(Xi) = win_rate(W*Xi)
-        // If σ(i th deep) = Yi,
-        // J = m/2 Σ (f(Xi)-Yi )^2
-        // becomes a common expression.
-        // W is a vector, and if we write the jth element as Wj, from the chain rule
-        // ∂J/∂Wj = ∂J/∂f ・∂f/∂W ・∂W/∂Wj
-        // = 1/m Σ (f(Xi)-y) ・f'(Xi) ・ 1
-
-        // 1/m will be multiplied later, but the contents of Σ can 
-        // be retained in the array as the value of the gradient.
-        // f'(Xi) = win_rate'(shallow) = sigmoid'(shallow/600) = dsigmoid(shallow / 600) / 600
-        // This /600 at the end is adjusted by the learning rate, so do not write it..
-        // Also, the coefficient of 1/m is unnecessary if you use the update 
-        // formula that has the automatic gradient adjustment function like Adam and AdaGrad.
-        // Therefore, it is not necessary to save it in memory.
-
-        const double p = winning_percentage(deep, psv.gamePly);
-        const double q = winning_percentage(shallow, psv.gamePly);
-        return (q - p) * Math::dsigmoid(double(shallow) / 600.0);
-    }
-#endif
-
 #if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY)
     double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
     {

From f52fbf8006174023fa137feda1d7db67a884ac2e Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:10:04 +0900
Subject: [PATCH 20/30] Removed LOSS_FUNCTION_IS_CROSS_ENTOROPY macro.

---
 src/learn/learn.h     |  9 +--------
 src/learn/learner.cpp | 29 -----------------------------
 2 files changed, 1 insertion(+), 37 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 9d783986..da542d67 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -9,11 +9,6 @@
 // Select the objective function
 // ----------------------
 
-// Objective function is cross entropy
-// See learner.cpp for more information.
-// So-called ordinary "rag cloth squeezer"
-//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
-
 // A version in which the objective function is cross entropy, but the win rate function is not passed
 // #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
 
@@ -121,9 +116,7 @@ typedef float LearnFloatType;
 
 #define LOSS_FUNCTION_IS_ELMO_METHOD
 
-#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
-#define LOSS_FUNCTION "CROSS_ENTOROPY"
-#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
+#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
 #define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE"
 #elif defined(LOSS_FUNCTION_IS_ELMO_METHOD)
 #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index e9658da6..66835ce5 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -163,35 +163,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-#if defined (LOSS_FUNCTION_IS_CROSS_ENTOROPY)
-    double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
-    {
-        // Objective function with cross entropy
-
-        // For the concept and nature of cross entropy,
-        // http://nnadl-ja.github.io/nnadl_site_ja/chap3.html#the_cross-entropy_cost_function
-        // http://postd.cc/visual-information-theory-3/
-        // Refer to etc.
-
-        // Objective function design)
-        // We want to make the distribution of p closer to the distribution of q 
-        // → Think of it as the problem of minimizing the cross entropy 
-        // between the probability distributions of p and q.
-        // J = H(p,q) =-Σ p(x) log(q(x)) = -p log q-(1-p) log(1-q)
-        // x
-
-        // p is a constant and q is a Wi function (q = σ(W・Xi) ).
-        // ∂J/∂Wi = -p・q'/q-(1-p)(1-q)'/(1-q)
-        // = ...
-        // = q-p.
-
-        const double p = winning_percentage(deep, psv.gamePly);
-        const double q = winning_percentage(shallow, psv.gamePly);
-
-        return q - p;
-    }
-#endif
-
 #if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE )
     double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
     {

From ef1601218db703b42e31b34d8c324f0ec3001f83 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:11:11 +0900
Subject: [PATCH 21/30] Removed LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
 macro.

---
 src/learn/learn.h     |  7 +------
 src/learn/learner.cpp | 11 -----------
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index da542d67..d2477277 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -9,9 +9,6 @@
 // Select the objective function
 // ----------------------
 
-// A version in which the objective function is cross entropy, but the win rate function is not passed
-// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
-
 // elmo (WCSC27) method
 // #define LOSS_FUNCTION_IS_ELMO_METHOD
 
@@ -116,9 +113,7 @@ typedef float LearnFloatType;
 
 #define LOSS_FUNCTION_IS_ELMO_METHOD
 
-#if defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
-#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE"
-#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD)
+#if defined(LOSS_FUNCTION_IS_ELMO_METHOD)
 #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
 #endif
 
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 66835ce5..82bcfa09 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -163,17 +163,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-#if defined ( LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE )
-    double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
-    {
-        // Version that does not pass the winning percentage function
-        // This, unless EVAL_LIMIT is set low, trying to 
-        // match the evaluation value with the shape of the end stage
-        // eval may exceed the range of eval.
-        return shallow - deep;
-    }
-#endif
-
 #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
 
     // A constant used in elmo (WCSC27). Adjustment required.

From dbad9d96e0fc2923edfdbef37162ecd5b0645d50 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:17:02 +0900
Subject: [PATCH 22/30] Removed LOSS_FUNCTION_IS_ELMO_METHOD macro.

---
 src/learn/learn.h     | 19 ---------------
 src/learn/learner.cpp | 54 +------------------------------------------
 2 files changed, 1 insertion(+), 72 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index d2477277..2ee2f8d6 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -5,21 +5,6 @@
 
 #include <vector>
 
-// ----------------------
-// Select the objective function
-// ----------------------
-
-// elmo (WCSC27) method
-// #define LOSS_FUNCTION_IS_ELMO_METHOD
-
-// ※ Other things may be added.
-
-
-// ----------------------
-// debug settings for learning
-// ----------------------
-
-
 // ----------------------
 // learning from zero vector
 // ----------------------
@@ -111,11 +96,7 @@ typedef float LearnFloatType;
 // Learning with the method of elmo (WCSC27)
 // ----------------------
 
-#define LOSS_FUNCTION_IS_ELMO_METHOD
-
-#if defined(LOSS_FUNCTION_IS_ELMO_METHOD)
 #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
-#endif
 
 // ----------------------
 // Definition of struct used in Learner
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 82bcfa09..84cade5c 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -163,8 +163,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
-
     // A constant used in elmo (WCSC27). Adjustment required.
     // Since elmo does not internally divide the expression, the value is different.
     // You can set this value with the learn command.
@@ -293,7 +291,6 @@ namespace Learner
             (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
     }
 
-#endif
     // Other objective functions may be considered in the future...
     double calc_grad(Value shallow, const PackedSfenValue& psv) 
     {
@@ -629,14 +626,12 @@ namespace Learner
             stop_flag(false), 
             save_only_once(false)
         {
-#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
             learn_sum_cross_entropy_eval = 0.0;
             learn_sum_cross_entropy_win = 0.0;
             learn_sum_cross_entropy = 0.0;
             learn_sum_entropy_eval = 0.0;
             learn_sum_entropy_win = 0.0;
             learn_sum_entropy = 0.0;
-#endif
 
             newbob_scale = 1.0;
             newbob_decay = 1.0;
@@ -689,15 +684,13 @@ namespace Learner
 
         // --- loss calculation
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
-    // For calculation of learning data loss
+        // For calculation of learning data loss
         atomic<double> learn_sum_cross_entropy_eval;
         atomic<double> learn_sum_cross_entropy_win;
         atomic<double> learn_sum_cross_entropy;
         atomic<double> learn_sum_entropy_eval;
         atomic<double> learn_sum_entropy_win;
         atomic<double> learn_sum_entropy;
-#endif
 
         shared_timed_mutex nn_mutex;
         double newbob_scale;
@@ -759,13 +752,6 @@ namespace Learner
         std::cout << ", iteration " << epoch;
         std::cout << ", eta = " << Eval::get_eta() << ", ";
 
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-        double sum_error = 0;
-        double sum_error2 = 0;
-        double sum_error3 = 0;
-#endif
-
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
         // For calculation of verification data loss
         atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
         atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
@@ -779,7 +765,6 @@ namespace Learner
         // norm for learning
         atomic<double> sum_norm;
         sum_norm = 0;
-#endif
 
         // The number of times the pv first move of deep 
         // search matches the pv first move of search(1).
@@ -841,25 +826,11 @@ namespace Learner
                 // Note) This code does not consider when 
                 //       eval_limit is specified in the learn command.
 
-                // --- error calculation
-
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-                auto grad = calc_grad(deep_value, shallow_value, ps);
-
-                // something like rmse
-                sum_error += grad * grad;
-                // Add the absolute value of the gradient
-                sum_error2 += abs(grad);
-                // Add the absolute value of the difference between the evaluation values
-                sum_error3 += abs(shallow_value - deep_value);
-#endif
-
                 // --- calculation of cross entropy
 
                 // For the time being, regarding the win rate and loss terms only in the elmo method
                 // Calculate and display the cross entropy.
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                 double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
                 double test_entropy_eval, test_entropy_win, test_entropy;
                 calc_cross_entropy(
@@ -881,7 +852,6 @@ namespace Learner
                 test_sum_entropy_win += test_entropy_win;
                 test_sum_entropy += test_entropy;
                 sum_norm += (double)abs(shallow_value);
-#endif
 
                 // Determine if the teacher's move and the score of the shallow search match
                 {
@@ -905,17 +875,6 @@ namespace Learner
         while (task_count)
             sleep(1);
 
-#if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
-        // rmse = root mean square error: mean square error
-        // mae = mean absolute error: mean absolute error
-        auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon));
-        auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon);
-        auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon);
-        cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae
-            << " , eval mae = " << eval_mae;
-#endif
-
-#if defined(LOSS_FUNCTION_IS_ELMO_METHOD)
         latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
         latest_loss_count += sr.sfen_for_mse.size();
 
@@ -960,9 +919,6 @@ namespace Learner
         learn_sum_entropy_eval = 0.0;
         learn_sum_entropy_win = 0.0;
         learn_sum_entropy = 0.0;
-#else
-        << endl;
-#endif
     }
 
     void LearnerThink::thread_worker(size_t thread_id)
@@ -1144,7 +1100,6 @@ namespace Learner
                     ? Eval::evaluate(pos) 
                     : -Eval::evaluate(pos);
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                 // Calculate loss for training data
                 double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
                 double learn_entropy_eval, learn_entropy_win, learn_entropy;
@@ -1165,7 +1120,6 @@ namespace Learner
                 learn_sum_entropy_eval += learn_entropy_eval;
                 learn_sum_entropy_win += learn_entropy_win;
                 learn_sum_entropy += learn_entropy;
-#endif
 
                 const double example_weight =
                     (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
@@ -1600,12 +1554,10 @@ namespace Learner
         // Turn on if you want to pass a pre-shuffled file.
         bool no_shuffle = false;
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
         // elmo lambda
         ELMO_LAMBDA = 0.33;
         ELMO_LAMBDA2 = 0.33;
         ELMO_LAMBDA_LIMIT = 32000;
-#endif
 
         // Discount rate. If this is set to a value other than 0, 
         // the slope will be added even at other than the PV termination. 
@@ -1703,13 +1655,11 @@ namespace Learner
             else if (option == "freeze_kkpp")  is >> freeze[3];
 #endif
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
             // LAMBDA
             else if (option == "lambda")       is >> ELMO_LAMBDA;
             else if (option == "lambda2")      is >> ELMO_LAMBDA2;
             else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
 
-#endif
             else if (option == "reduction_gameply") is >> reduction_gameply;
 
             // shuffle related
@@ -1900,11 +1850,9 @@ namespace Learner
         reduction_gameply = max(reduction_gameply, 1);
         cout << "reduction_gameply : " << reduction_gameply << endl;
 
-#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
         cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
         cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
         cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
-#endif
 
         cout << "mirror_percentage : " << mirror_percentage << endl;
         cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;

From f52165e1d3b8bebdd702e089eb9fdd7761d45076 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:19:14 +0900
Subject: [PATCH 23/30] Removed RESET_TO_ZERO_VECTOR macro.

---
 src/learn/learn.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 2ee2f8d6..6056e8c6 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -5,18 +5,6 @@
 
 #include <vector>
 
-// ----------------------
-// learning from zero vector
-// ----------------------
-
-// Start learning the evaluation function parameters from the zero vector.
-// Initialize to zero, generate a game, learn from zero vector,
-// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
-// (very time consuming)
-
-//#define RESET_TO_ZERO_VECTOR
-
-
 // ----------------------
 // Floating point for learning
 // ----------------------

From 5e2570267228653a11bf42c14d77d1baf26b99ac Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:19:53 +0900
Subject: [PATCH 24/30] Removed USE_TRIANGLE_WEIGHT_ARRAY macro.

---
 src/learn/learn.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 6056e8c6..ea622bce 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -23,15 +23,6 @@ typedef float LearnFloatType;
 //#include "half_float.h"
 //typedef HalfFloat::float16 LearnFloatType;
 
-// ----------------------
-// save memory
-// ----------------------
-
-// Use a triangular array for the Weight array (of which is KPP) to save memory.
-// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
-
-#define USE_TRIANGLE_WEIGHT_ARRAY
-
 // ----------------------
 // dimension down
 // ----------------------

From eafa5693658a91e97612a04b2c620ec5a545e3a0 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:22:02 +0900
Subject: [PATCH 25/30] Removed macros for KPP factorization.

---
 src/learn/learn.h | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index ea622bce..0df71c7a 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -23,37 +23,6 @@ typedef float LearnFloatType;
 //#include "half_float.h"
 //typedef HalfFloat::float16 LearnFloatType;
 
-// ----------------------
-// dimension down
-// ----------------------
-
-// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
-// All on by default.
-
-// Dimension reduction using mirror and inverse for KK. (Unclear effect)
-// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
-#define USE_KK_MIRROR_WRITE
-#define USE_KK_INVERSE_WRITE
-
-// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
-// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
-#define USE_KKP_MIRROR_WRITE
-#define USE_KKP_INVERSE_WRITE
-
-// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
-// KPP has no inverse. (Because there is only K on the front side)
-#define USE_KPP_MIRROR_WRITE
-
-// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
-// KPPP has no inverse. (Because there is only K on the front side)
-#define USE_KPPP_MIRROR_WRITE
-
-// Reduce the dimension by KPP for learning the KKPP component.
-// Learning is very slow.
-// Do not use as it is not debugged.
-//#define USE_KKPP_LOWER_DIM
-
-
 // ======================
 // Settings for creating teacher phases
 // ======================

From 8d763fb503fed49e4b7fa2be115e0fa6eb0e74d7 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 00:22:38 +0900
Subject: [PATCH 26/30] Removed LEARN_GENSFEN_USE_DRAW_RESULT macro.

---
 src/learn/learn.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/learn/learn.h b/src/learn/learn.h
index 0df71c7a..b7ca18e8 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -23,19 +23,6 @@ typedef float LearnFloatType;
 //#include "half_float.h"
 //typedef HalfFloat::float16 LearnFloatType;
 
-// ======================
-// Settings for creating teacher phases
-// ======================
-
-// ----------------------
-// write out the draw
-// ----------------------
-
-// When you reach a draw, write it out as a teacher position
-// It's subtle whether it's better to do this.
-// #define LEARN_GENSFEN_USE_DRAW_RESULT
-
-
 // ======================
 // configure
 // ======================

From cea17c92f9ad91d0dd2d73db272e6ce6712ba048 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 08:53:57 +0900
Subject: [PATCH 27/30] Simplified evaluate_common.h.

---
 src/eval/evaluate_common.h | 59 ++++----------------------------------
 1 file changed, 5 insertions(+), 54 deletions(-)

diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
index 927783cd..989169b3 100644
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@@ -1,75 +1,26 @@
 ﻿#ifndef _EVALUATE_COMMON_H_
 #define _EVALUATE_COMMON_H_
 
+#if defined(EVAL_LEARN)
+
 // A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
 
-#include <functional>
-
-// KK file name
-#define KK_BIN "KK_synthesized.bin"
-
-// KKP file name
-#define KKP_BIN "KKP_synthesized.bin"
-
-// KPP file name
-#define KPP_BIN "KPP_synthesized.bin"
-
-#include "../position.h"
+#include <string>
 
 namespace Eval
 {
-	// An operator that applies the function f to each parameter of the evaluation function.
-	// Used for parameter analysis etc.
-	// type indicates the survey target.
-	// type = -1 :KK,KKP,KPP all
-	// type = 0: KK only
-	// type = 1: KKP only
-	// type = 2: KPP only
-	void foreach_eval_param(std::function<void(int32_t, int32_t)>f, int type = -1);
-
 	// --------------------------
 	// for learning
 	// --------------------------
 
-#if defined(EVAL_LEARN)
-	// Initialize the gradient array during learning
-	// Pass the learning rate as an argument. If 0.0, the default value is used.
-	// The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch.
-	// After eta2_epoch, gradually change from eta2 to eta3.
-	void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);
-
-	// Add the gradient difference value to the gradient array for all features that appear in the current phase.
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
-
-	// Do SGD or AdaGrad or something based on the current gradient.
-	// epoch: Generation counter (starting from 0)
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
-
 	// Save the evaluation function parameters to a file.
 	// You can specify the extension added to the end of the file.
 	void save_eval(std::string suffix);
 
 	// Get the current eta.
 	double get_eta();
-
-	// --learning related commands
-
-	// A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function.
-	// By making the values ​​of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning
-	// The idea of ​​ensuring it is valid.
-	void regularize_kk();
-
-#endif
-
-
 }
 
+#endif // defined(EVAL_LEARN)
+
 #endif // _EVALUATE_KPPT_COMMON_H_

From 2583f689729f7644cb5a5ac6d0369c0c726c3141 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 08:58:10 +0900
Subject: [PATCH 28/30] Removed macros for KPP evaluate functions.

---
 src/eval/evaluate_common.h |  2 +-
 src/learn/learner.cpp      | 16 ----------------
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
index 989169b3..7799fe79 100644
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@@ -3,7 +3,7 @@
 
 #if defined(EVAL_LEARN)
 
-// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
+// A common header-like function for modern evaluation functions.
 
 #include <string>
 
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index 84cade5c..5d9b242f 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -1647,14 +1647,6 @@ namespace Learner
             else if (option == "freeze_kkp")   is >> freeze[1];
             else if (option == "freeze_kpp")   is >> freeze[2];
 
-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-            else if (option == "freeze_kppp")  is >> freeze[3];
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-            else if (option == "freeze_kkpp")  is >> freeze[3];
-#endif
-
             // LAMBDA
             else if (option == "lambda")       is >> ELMO_LAMBDA;
             else if (option == "lambda2")      is >> ELMO_LAMBDA2;
@@ -1858,14 +1850,6 @@ namespace Learner
         cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
         cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
 
-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-        cout << "freeze_kk/kkp/kpp      : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl;
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-        cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-        cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#endif
-
         // -----------------------------------
         // various initialization
         // -----------------------------------

From 18648458117a35acb2617e9fe04192acca6ba2ae Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 09:26:42 +0900
Subject: [PATCH 29/30] Commented out unused parameters.

---
 src/nnue/features/castling_right.cpp | 6 +++---
 src/nnue/features/enpassant.cpp      | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp
index 47fbd986..86fe06fe 100644
--- a/src/nnue/features/castling_right.cpp
+++ b/src/nnue/features/castling_right.cpp
@@ -26,7 +26,7 @@ namespace Eval {
             & ((castling_rights >> 2) & 3);
         }
 
-        for (int i = 0; i <kDimensions; ++i) {
+        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
           if (relative_castling_rights & (i << 1)) {
             active->push_back(i);
           }
@@ -36,7 +36,7 @@ namespace Eval {
       // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
       void CastlingRight::AppendChangedIndices(
         const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
+        IndexList* removed, IndexList* /* added */) {
 
         int previous_castling_rights = pos.state()->previous->castlingRights;
         int current_castling_rights = pos.state()->castlingRights;
@@ -54,7 +54,7 @@ namespace Eval {
             & ((current_castling_rights >> 2) & 3);
         }
 
-        for (int i = 0; i < kDimensions; ++i) {
+        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
           if ((relative_previous_castling_rights & (i << 1)) &&
             (relative_current_castling_rights & (i << 1)) == 0) {
             removed->push_back(i);
diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp
index 77bc936e..386bd907 100644
--- a/src/nnue/features/enpassant.cpp
+++ b/src/nnue/features/enpassant.cpp
@@ -30,8 +30,8 @@ namespace Eval {
 
       // Get a list of indices whose values ??have changed from the previous one in the feature quantity
       void EnPassant::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
+        const Position& /* pos */, Color /* perspective */,
+        IndexList* /* removed */, IndexList* /* added */) {
         // Not implemented.
         assert(false);
       }

From 4206a1edd069600da29b8ee5a99a486b7aa1603f Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Wed, 9 Sep 2020 09:46:05 +0900
Subject: [PATCH 30/30] Renamed parameters to avoid shadowing other parameters.

---
 src/nnue/nnue_test_command.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp
index c3a53c7d..5f0776ef 100644
--- a/src/nnue/nnue_test_command.cpp
+++ b/src/nnue/nnue_test_command.cpp
@@ -34,12 +34,12 @@ void TestFeatures(Position& pos) {
   std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
   constexpr IndexType kUnknown = -1;
   std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
-  auto make_index_sets = [&](const Position& pos) {
+  auto make_index_sets = [&](const Position& position) {
     std::vector<std::vector<std::set<IndexType>>> index_sets(
         kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
     for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
       Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+      RawFeatures::AppendActiveIndices(position, kRefreshTriggers[i],
                                        active_indices);
       for (const auto perspective : Colors) {
         for (const auto index : active_indices[perspective]) {
@@ -53,11 +53,11 @@ void TestFeatures(Position& pos) {
     }
     return index_sets;
   };
-  auto update_index_sets = [&](const Position& pos, auto* index_sets) {
+  auto update_index_sets = [&](const Position& position, auto* index_sets) {
     for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
       Features::IndexList removed_indices[2], added_indices[2];
       bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+      RawFeatures::AppendChangedIndices(position, kRefreshTriggers[i],
                                         removed_indices, added_indices, reset);
       for (const auto perspective : Colors) {
         if (reset[perspective]) {