Merge remote-tracking branch 'remotes/nodchip/master' into trainer

2025-12-24 19:16:49 +08:00 · 2020-09-09 23:35:41 +08:00
parent b3a0ded37a 9dcadfa642
commit a47a3bfc7c
15 changed files with 257 additions and 180 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -77,8 +77,7 @@ script:
  - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi

  # start some basic learner CI
-  #TODO enable -Werror
-  - export CXXFLAGS=""
+  - export CXXFLAGS="-Werror"
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && LDFLAGS="-lstdc++fs" make -j2 ARCH=x86-64-modern learn; fi
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && LDFLAGS="-lstdc++fs" make -j2 ARCH=x86-64-modern profile-learn; fi

--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -1016,15 +1016,7 @@ make_v:
 Value Eval::evaluate(const Position& pos) {

  if (Options["Training"]) {
-    Value v = NNUE::evaluate(pos);
-
-    // Damp down the evaluation linearly when shuffling
-    v = v * (100 - pos.rule50_count()) / 100;
-
-    // Guarantee evaluation does not hit the tablebase range
-    v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
-
-    return v;
+    return NNUE::evaluate(pos);
  } else {
    // Use classical eval if there is a large imbalance
    // If there is a moderate imbalance, use classical eval with probability (1/8),
@@ -1033,13 +1025,12 @@ Value Eval::evaluate(const Position& pos) {
    bool classical = !Eval::useNNUE
                  ||  useClassical
                  || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
-    Value v = classical ? Evaluation<NO_TRACE>(pos).value()
-                        : NNUE::evaluate(pos) * 5 / 4 + Tempo;
+    Value v = classical ? Evaluation<NO_TRACE>(pos).value() : NNUE::evaluate(pos);

    if (   useClassical 
        && Eval::useNNUE 
        && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
-        v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
+        v = NNUE::evaluate(pos);

    // Damp down the evaluation linearly when shuffling
    v = v * (100 - pos.rule50_count()) / 100;
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -10,6 +10,7 @@
 #include "../uci.h"
 #include "learn.h"
 #include "multi_think.h"
+#include "../syzygy/tbprobe.h"

 #include <chrono>
 #include <climits>
@@ -291,6 +292,12 @@ namespace Learner
            int ply,
            int& random_move_c);

+        Value evaluate_leaf(
+            Position& pos,
+            std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
+            int ply,
+            vector<Move>& pv);
+
        // Min and max depths for search during gensfen
        int search_depth_min;
        int search_depth_max;
@@ -641,6 +648,56 @@ namespace Learner
        return random_move_flag;
    }

+    Value MultiThinkGenSfen::evaluate_leaf(
+        Position& pos,
+        std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
+        int ply,
+        vector<Move>& pv)
+    {
+        auto rootColor = pos.side_to_move();
+
+        for (auto m : pv)
+        {
+            // There should be no illegal move. This is as a debugging precaution.
+            if (!pos.pseudo_legal(m) || !pos.legal(m))
+            {
+                cout << "Error! : " << pos.fen() << m << endl;
+            }
+
+            pos.do_move(m, states[ply++]);
+        }
+
+        // Reach leaf
+        Value v;
+        if (pos.checkers())
+        {
+            // Sometime a king is checked.  An example is a case that a checkmate is
+            // found in the search.  If Eval::evaluate() is called whne a king is
+            // checked, classic eval crashes by an assertion. To avoid crashes, return
+            // VALUE_NONE and let the caller assign a value to the position.
+            v = VALUE_NONE;
+        }
+        else
+        {
+            v = Eval::evaluate(pos);
+
+            // evaluate() returns the evaluation value on the turn side, so
+            // If it's a turn different from root_color, you must invert v and return it.
+            if (rootColor != pos.side_to_move())
+            {
+                v = -v;
+            }
+        }
+
+        // Rewind the pv moves.
+        for (auto it = pv.rbegin(); it != pv.rend(); ++it)
+        {
+            pos.undo_move(*it);
+        }
+
+        return v;
+    }
+
    // thread_id = 0..Threads.size()-1
    void MultiThinkGenSfen::thread_worker(size_t thread_id)
    {
@@ -666,6 +723,8 @@ namespace Learner
            auto& pos = th->rootPos;
            pos.set(bookStart[prng.rand(bookStart.size())], false, &si, th);

+            int resign_counter = 0;
+            bool should_resign = prng.rand(10) > 1;
            // Vector for holding the sfens in the current simulated game.
            PSVector a_psv;
            a_psv.reserve(write_maxply + MAX_PLY);
@@ -700,6 +759,20 @@ namespace Learner
                    break;
                }

+                if (pos.count<ALL_PIECES>() <= 6) {
+                    Tablebases::ProbeState probe_state;
+                    Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &probe_state);
+                    assert(wdl != Tablebases::WDLScore::WDLScoreNone);
+                    if (wdl == Tablebases::WDLScore::WDLWin) {
+                        flush_psv(1);
+                    } else if (wdl == Tablebases::WDLScore::WDLLoss) {
+                        flush_psv(-1);
+                    } else {
+                        flush_psv(0);
+                    }
+                    break;
+                }
+
                {
                    auto [search_value, search_pv] = search(pos, depth, 1, nodes);

@@ -707,11 +780,14 @@ namespace Learner
                    // Also because of this we don't have to check for TB/MATE scores
                    if (abs(search_value) >= eval_limit)
                    {
-                        const auto wdl = (search_value >= eval_limit) ? 1 : -1;
-                        flush_psv(wdl);
-                        break;
+                        resign_counter++;
+                        if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) {
+                            flush_psv((search_value >= eval_limit) ? 1 : -1);
+                            break;
+                        }
+                    } else {
+                        resign_counter = 0;
                    }
-
                    // Verification of a strange move
                    if (search_pv.size() > 0
                        && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
@@ -743,26 +819,6 @@ namespace Learner
                        goto SKIP_SAVE;
                    }

-                    // Look into the position hashtable to see if the same
-                    // position was seen before.
-                    // This is a good heuristic to exlude already seen
-                    // positions without many false positives.
-                    {
-                        auto key = pos.key();
-                        auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
-                        auto old_key = hash[hash_index];
-                        if (key == old_key)
-                        {
-                            a_psv.clear();
-                            goto SKIP_SAVE;
-                        }
-                        else
-                        {
-                            // Replace with the current key.
-                            hash[hash_index] = key;
-                        }
-                    }
-
                    // Pack the current position into a packed sfen and save it into the buffer.
                    {
                        a_psv.emplace_back(PackedSfenValue());
@@ -772,8 +828,6 @@ namespace Learner
                        // Result is added after the whole game is done.
                        pos.sfen_pack(psv.sfen);

-                        // Get the value of evaluate() as seen from the
-                        // root color on the leaf node of the PV line.
                        psv.score = search_value;

                        psv.gamePly = ply;
@@ -795,6 +849,8 @@ namespace Learner
                    // Update the next move according to best search result.
                    next_move = search_pv[0];
                }
+
+                // Random move.
                auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
                if (random_move.has_value())
                {
@@ -807,6 +863,8 @@ namespace Learner
                        break;
                    }
                }
+
+                // Do move.
                pos.do_move(next_move, states[ply]);

            } // for (int ply = 0; ; ++ply)
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -221,28 +221,7 @@ namespace Learner

    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
    {
-        // elmo (WCSC27) method
-        // Correct with the actual game wins and losses.
-        const double q = winning_percentage(shallow, psv.gamePly);
-        const double p = calculate_p(teacher_signal, psv.gamePly);
-        const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);
-
-        double grad;
-        if (use_wdl) 
-        {
-            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
-            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
-            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
-        }
-        else 
-        {
-            // Use the actual win rate as a correction term.
-            // This is the idea of elmo (WCSC27), modern O-parts.
-            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
-        }
-
-        return grad;
+        return (double)(shallow - teacher_signal) / 2400.0;
    }

    // Calculate cross entropy during learning
@@ -659,6 +638,9 @@ namespace Learner

        bool stop_flag;

+        // Discount rate
+        double discount_rate;
+
        // Option to exclude early stage from learning
        int reduction_gameply;

@@ -701,6 +683,32 @@ namespace Learner
        TaskDispatcher task_dispatcher;
    };

+    Value LearnerThink::get_shallow_value(Position& task_pos)
+    {
+        // Evaluation value for shallow search
+        // The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
+        // Use qsearch() because it is difficult to compare the values.
+        // EvalHash has been disabled in advance. (If not, the same value will be returned every time)
+        const auto [_, pv] = qsearch(task_pos);
+        const auto rootColor = task_pos.side_to_move();
+
+        std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
+        for (size_t i = 0; i < pv.size(); ++i)
+        {
+            task_pos.do_move(pv[i], states[i]);
+        }
+
+        const Value shallow_value =
+            (rootColor == task_pos.side_to_move())
+            ? Eval::evaluate(task_pos)
+            : -Eval::evaluate(task_pos);
+
+        for (auto it = pv.rbegin(); it != pv.rend(); ++it)
+            task_pos.undo_move(*it);
+
+        return shallow_value;
+    }
+
    void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
    {
        // There is no point in hitting the replacement table, 
@@ -779,10 +787,7 @@ namespace Learner
                    cout << "Error! : illegal packed sfen " << task_pos.fen() << endl;
                }

-                // Determine if the teacher's move and the score of the shallow search match
-                const auto [shallow_value, pv] = qsearch(task_pos);
-                if ((uint16_t)pv[0] == ps.move)
-                    move_accord_count.fetch_add(1, std::memory_order_relaxed);
+                const Value shallow_value = get_shallow_value(task_pos);

                // Evaluation value of deep search
                auto deep_value = (Value)ps.score;
@@ -817,6 +822,13 @@ namespace Learner
                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);

+                // Determine if the teacher's move and the score of the shallow search match
+                {
+                    const auto [value, pv] = search(task_pos, 1);
+                    if ((uint16_t)pv[0] == ps.move)
+                        move_accord_count.fetch_add(1, std::memory_order_relaxed);
+                }
+
                // Reduced one task because I did it
                --task_count;
            };
@@ -1023,8 +1035,21 @@ namespace Learner
            // I can read it, so try displaying it.
            //      cout << pos << value << endl;

+            const auto rootColor = pos.side_to_move();
+
+            int ply = 0;
+            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
+
+            if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
+            {
+                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
+                continue;
+            }
+
+            pos.do_move((Move)ps.move, state[ply++]);
+
            // Evaluation value of shallow search (qsearch)
-            const auto [shallow_value, _] = qsearch(pos);
+            const auto [_, pv] = qsearch(pos);

            // Evaluation value of deep search
            const auto deep_value = (Value)ps.score;
@@ -1033,7 +1058,11 @@ namespace Learner
            // Go to the leaf node as it is, add only to the gradient array, 
            // and later try AdaGrad at the time of rmse aggregation.

-            const auto rootColor = pos.side_to_move();
+
+            // If the initial PV is different, it is better not to use it for learning.
+            // If it is the result of searching a completely different place, it may become noise.
+            // It may be better not to study where the difference in evaluation values is too large.
+

            // A helper function that adds the gradient to the current phase.
            auto pos_add_grad = [&]() {
@@ -1046,6 +1075,11 @@ namespace Learner
                // I have turned off the substitution table, but since 
                // the pv array has not been updated due to one stumbling block etc...

+                const Value shallow_value = 
+                    (rootColor == pos.side_to_move()) 
+                    ? Eval::evaluate(pos) 
+                    : -Eval::evaluate(pos);
+
                // Calculate loss for training data
                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
                double learn_entropy_eval, learn_entropy_win, learn_entropy;
@@ -1067,14 +1101,43 @@ namespace Learner
                learn_sum_entropy_win += learn_entropy_win;
                learn_sum_entropy += learn_entropy;

-                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
+                const double example_weight =
+                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
+                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);

                // Since the processing is completed, the counter of the processed number is incremented
                sr.total_done++;
            };

-            pos_add_grad();
+            bool illegal_move = false;
+            for (auto m : pv)
+            {
+                // I shouldn't be an illegal player.
+                // An illegal move sometimes comes here...
+                if (!pos.pseudo_legal(m) || !pos.legal(m))
+                {
+                    //cout << pos << m << endl;
+                    //assert(false);
+                    illegal_move = true;
+                    break;
+                }

+                // Processing when adding the gradient to the node on each PV.
+                //If discount_rate is 0, this process is not performed.
+                if (discount_rate != 0)
+                    pos_add_grad();
+
+                pos.do_move(m, state[ply++]);
+            }
+
+            if (illegal_move) 
+            {
+                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
+                continue;
+            }
+
+            // Since we have reached the end phase of PV, add the slope here.
+            pos_add_grad();
        }

    }
@@ -1118,15 +1181,7 @@ namespace Learner
                else 
                {
                    cout << " >= best (" << best_loss << "), rejected" << endl;
-                    if (best_nn_directory.empty()) 
-                    {
-                        cout << "WARNING: no improvement from initial model" << endl;
-                    }
-                    else 
-                    {
-                        cout << "restoring parameters from " << best_nn_directory << endl;
-                        Eval::NNUE::RestoreParameters(best_nn_directory);
-                    }
+                    best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);

                    if (--trials > 0 && !is_final) 
                    {
@@ -1468,6 +1523,11 @@ namespace Learner
        ELMO_LAMBDA2 = 0.33;
        ELMO_LAMBDA_LIMIT = 32000;

+        // Discount rate. If this is set to a value other than 0, 
+        // the slope will be added even at other than the PV termination. 
+        // (At that time, apply this discount rate)
+        double discount_rate = 0;
+
        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
@@ -1537,6 +1597,9 @@ namespace Learner

            else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;

+            // Discount rate
+            else if (option == "discount_rate") is >> discount_rate;
+
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

@@ -1603,9 +1666,11 @@ namespace Learner
        // Display learning game file
        if (target_dir != "")
        {
+            string kif_base_dir = Path::Combine(base_dir, target_dir);
+
            namespace sys = std::filesystem;
-            sys::path kif_base_dir(Path::Combine(base_dir, target_dir)); // Origin of enumeration
-            std::for_each(sys::directory_iterator(kif_base_dir), sys::directory_iterator(),
+            sys::path p(kif_base_dir); // Origin of enumeration
+            std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
                [&](const sys::path& path) {
                    if (sys::is_regular_file(path))
                        filenames.push_back(Path::Combine(target_dir, path.filename().generic_string()));
@@ -1726,6 +1791,8 @@ namespace Learner
            cout << "scheduling        : default" << endl;
        }

+        cout << "discount rate     : " << discount_rate << endl;
+
        // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;
@@ -1758,6 +1825,7 @@ namespace Learner
        cout << "init done." << endl;

        // Reflect other option settings.
+        learn_think.discount_rate = discount_rate;
        learn_think.eval_limit = eval_limit;
        learn_think.save_only_once = save_only_once;
        learn_think.sr.no_shuffle = no_shuffle;
@@ -1805,6 +1873,8 @@ namespace Learner
        // Start learning.
        learn_think.go_think();

+        Eval::NNUE::FinalizeNet();
+
        // Save once at the end.
        learn_think.save(true);
    }
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
@@ -40,13 +40,14 @@ namespace EvalLearningTools
 		static uint64_t eta2_epoch;

 		// Batch initialization of eta. If 0 is passed, the default value will be set.
-		static void init_eta(double eta1, double eta2, double eta3, uint64_t eta1_epoch, uint64_t eta2_epoch)
+		static void init_eta(double new_eta1, double new_eta2, double new_eta3,
+			uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
 		{
-			Weight::eta1 = (eta1 != 0) ? eta1 : 30.0;
-			Weight::eta2 = (eta2 != 0) ? eta2 : 30.0;
-			Weight::eta3 = (eta3 != 0) ? eta3 : 30.0;
-			Weight::eta1_epoch = (eta1_epoch != 0) ? eta1_epoch : 0;
-			Weight::eta2_epoch = (eta2_epoch != 0) ? eta2_epoch : 0;
+			Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
+			Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
+			Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
+			Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
+			Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
 		}

 		// Set eta according to epoch.
--- a/src/learn/multi_think.cpp
+++ b/src/learn/multi_think.cpp
@@ -10,13 +10,6 @@

 void MultiThink::go_think()
 {
-	// Keep a copy to restore the Options settings later.
-	auto oldOptions = Options;
-
-	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
-	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
-	Options["BookOnTheFly"] = std::string("false");
-
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
@@ -111,12 +104,6 @@ void MultiThink::go_think()
 	// The file writing thread etc. are still running only when all threads are finished
 	// Since the work itself may not have completed, output only that all threads have finished.
 	std::cout << "all threads are joined." << std::endl;
-
-	// Restored because Options were rewritten.
-	// Restore the handler because the handler will not start unless you assign a value.
-	for (auto& s : oldOptions)
-		Options[s.first] = std::string(s.second);
-
 }


--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -113,11 +113,21 @@ void SetOptions(const std::string& options) {
 void RestoreParameters(const std::string& dir_name) {
  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
  std::ifstream stream(file_name, std::ios::binary);
-  bool result = ReadParameters(stream);
+#ifndef NDEBUG
+  bool result =
+#endif
+  ReadParameters(stream);
+#ifndef NDEBUG
  assert(result);
+#endif
+
  SendMessages({{"reset"}});
 }

+void FinalizeNet() {
+  SendMessages({{"clear_unobserved_feature_weights"}});
+}
+
 // Add 1 sample of learning data
 void AddExample(Position& pos, Color rootColor,
                const Learner::PackedSfenValue& psv, double weight) {
@@ -209,14 +219,16 @@ void save_eval(std::string dir_name) {
  // Also, assume that the folders up to EvalSaveDir have been dug.
  std::filesystem::create_directories(eval_dir);

-  if (Options["SkipLoadingEval"] && NNUE::trainer) {
-    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
-  }
-
  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
  std::ofstream stream(file_name, std::ios::binary);
-  bool result = NNUE::WriteParameters(stream);
+#ifndef NDEBUG
+  bool result =
+#endif
+  NNUE::WriteParameters(stream);
+#ifndef NDEBUG
  assert(result);
+#endif
+
  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
 }

--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -37,6 +37,8 @@ void UpdateParameters(uint64_t epoch);
 // Check if there are any problems with learning
 void CheckHealth();

+void FinalizeNet();
+
 }  // namespace NNUE

 }  // namespace Eval
--- a/src/nnue/trainer/trainer.h
+++ b/src/nnue/trainer/trainer.h
@@ -70,8 +70,8 @@ struct Example {

 // Message used for setting hyperparameters
 struct Message {
-  Message(const std::string& in_name, const std::string& in_value = ""):
-      name(in_name), value(in_value), num_peekers(0), num_receivers(0) {}
+  Message(const std::string& message_name, const std::string& message_value = ""):
+      name(message_name), value(message_value), num_peekers(0), num_receivers(0) {}
  const std::string name;
  const std::string value;
  std::uint32_t num_peekers;
--- a/src/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@@ -196,7 +196,7 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
      weights_(),
      biases_diff_(),
      weights_diff_(),
-      momentum_(0.0),
+      momentum_(0.1),
      learning_rate_scale_(1.0) {
    DequantizeParameters();
  }
--- a/src/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@@ -234,7 +234,7 @@ class Trainer<FeatureTransformer> {
      biases_(),
      weights_(),
      biases_diff_(),
-      momentum_(0.0),
+      momentum_(0.1),
      learning_rate_scale_(1.0) {
    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
--- a/src/nnue/trainer/trainer_input_slice.h
+++ b/src/nnue/trainer/trainer_input_slice.h
@@ -206,7 +206,7 @@ class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
      const IndexType input_offset = kInputDimensions * b;
      const IndexType output_offset = kOutputDimensions * b;
      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        if (i < Offset || i >= Offset + kOutputDimensions) {
+        if ((int)i < (int)Offset || i >= Offset + kOutputDimensions) {
          gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
        } else {
          gradients_[input_offset + i] = gradients[output_offset + i - Offset];
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -2076,6 +2076,7 @@ namespace Learner
        rootMoves.push_back(Search::RootMove(m));

      assert(!rootMoves.empty());
+      TB::rank_root_moves(pos, rootMoves);
    }
  }

--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -33,24 +33,6 @@ TranspositionTable TT; // Our global transposition table

 void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {

-  // Preserve any existing move for the same position
-  if (m || (uint16_t)k != key16)
-      move16 = (uint16_t)m;
-
-  // Overwrite less valuable entries (cheapest checks first)
-  if (b == BOUND_EXACT
-      || (uint16_t)k != key16
-      || d - DEPTH_OFFSET > depth8 - 4)
-  {
-      assert(d > DEPTH_OFFSET);
-      assert(d < 256 + DEPTH_OFFSET);
-
-      key16     = (uint16_t)k;
-      depth8    = (uint8_t)(d - DEPTH_OFFSET);
-      genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
-      value16   = (int16_t)v;
-      eval16    = (int16_t)ev;
-  }
 }


@@ -115,33 +97,7 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.

 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
-
-  if (Options["Training"]) {
-    return found = false, first_entry(0);
-  }
-  TTEntry* const tte = first_entry(key);
-  const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
-
-  for (int i = 0; i < ClusterSize; ++i)
-      if (tte[i].key16 == key16 || !tte[i].depth8)
-      {
-          tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh
-
-          return found = (bool)tte[i].depth8, &tte[i];
-      }
-
-  // Find an entry to be replaced according to the replacement strategy
-  TTEntry* replace = tte;
-  for (int i = 1; i < ClusterSize; ++i)
-      // Due to our packed storage format for generation and its cyclic
-      // nature we add 263 (256 is the modulus plus 7 to keep the unrelated
-      // lowest three bits from affecting the result) to calculate the entry
-      // age correctly even after generation8 overflows into the next cycle.
-      if (  replace->depth8 - ((263 + generation8 - replace->genBound8) & 0xF8)
-          >   tte[i].depth8 - ((263 + generation8 -   tte[i].genBound8) & 0xF8))
-          replace = &tte[i];
-
-  return found = false, replace;
+  return found = false, first_entry(0);
 }


--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -70,7 +70,7 @@ void test_cmd(Position& pos, istringstream& is)
    if (param == "nnue") Eval::NNUE::TestCommand(pos, is);
 }

-namespace {
+namespace UCI {

  // position() is called when engine receives the "position" UCI command.
  // The function sets up the position described in the given FEN string ("fen")
@@ -229,33 +229,33 @@ namespace {
  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
  int win_rate_model(Value v, int ply) {
     // Return win rate in per mille (rounded to nearest)
-     return int(0.5 + UCI::win_rate_model_double(v, ply));
+     return int(0.5 + win_rate_model_double(v, ply));
+  }
+
+  // The win rate model returns the probability (per mille) of winning given an eval
+  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+  double win_rate_model_double(double v, int ply) {
+
+     // The model captures only up to 240 plies, so limit input (and rescale)
+     double m = std::min(240, ply) / 64.0;
+
+     // Coefficients of a 3rd order polynomial fit based on fishtest data
+     // for two parameters needed to transform eval to the argument of a
+     // logistic function.
+     double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
+     double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
+     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
+     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
+
+     // Transform eval to centipawns with limited range
+       double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+
+     // Return win rate in per mille
+     return 1000.0 / (1 + std::exp((a - x) / b));
  }

 } // namespace

-// The win rate model returns the probability (per mille) of winning given an eval
-// and a game-ply. The model fits rather accurately the LTC fishtest statistics.
-double UCI::win_rate_model_double(double v, int ply) {
-
-   // The model captures only up to 240 plies, so limit input (and rescale)
-   double m = std::min(240, ply) / 64.0;
-
-   // Coefficients of a 3rd order polynomial fit based on fishtest data
-   // for two parameters needed to transform eval to the argument of a
-   // logistic function.
-   double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
-   double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
-   double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
-   double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
-
-   // Transform eval to centipawns with limited range
-     double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
-
-   // Return win rate in per mille
-   return 1000.0 / (1 + std::exp((a - x) / b));
-}
-
 // --------------------
 // Call qsearch(),search() directly for testing
 // --------------------