Introduce a function to compute NNUE accumulator

This patch introduces `hint_common_parent_position()` to signal that potentially several child nodes will require an NNUE eval. By populating explicitly the accumulator, these subsequent evaluations can be performed more efficiently. This was based on the observation that calculating the evaluation in an excluded move position yielded a significant Elo gain, even though the evaluation itself was already available (work by pb00067). Sopel wrote the code to perform just the accumulator update. This PR is based on cleaned up code that passed STC: https://tests.stockfishchess.org/tests/view/63f62f9be74a12625bcd4aa0 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 110368 W: 29607 L: 29167 D: 51594 Ptnml(0-2): 41, 10551, 33572, 10967, 53 and in an the earlier (equivalent) version passed STC: https://tests.stockfishchess.org/tests/view/63f3c3fee74a12625bcce2a6 LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 47552 W: 12786 L: 12467 D: 22299 Ptnml(0-2): 120, 5107, 12997, 5438, 114 passed LTC: https://tests.stockfishchess.org/tests/view/63f45cc2e74a12625bccfa63 LLR: 2.94 (-2.94,2.94) <0.50,2.50> Total: 110368 W: 29607 L: 29167 D: 51594 Ptnml(0-2): 41, 10551, 33572, 10967, 53 closes https://github.com/official-stockfish/Stockfish/pull/4402 Bench: 3726250
2025-12-23 18:46:59 +08:00 · 2023-02-20 20:02:55 +01:00
parent 77dfcbedce
commit 08385527dd
5 changed files with 266 additions and 174 deletions
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -45,6 +45,7 @@ namespace Eval {

    std::string trace(Position& pos);
    Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
+    void hint_common_parent_position(const Position& pos);

    void init();
    void verify();
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -136,6 +136,11 @@ namespace Stockfish::Eval::NNUE {
    return (bool)stream;
  }

+  void hint_common_parent_position(const Position& pos) {
+    if (Eval::useNNUE)
+        featureTransformer->hint_common_access(pos);
+  }
+
  // Evaluation function. Perform differential calculation.
  Value evaluate(const Position& pos, bool adjusted, int* complexity) {

--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@@ -31,6 +31,7 @@ namespace Stockfish::Eval::NNUE {
  constexpr std::uint32_t HashValue =
      FeatureTransformer::get_hash_value() ^ Network::get_hash_value();

+
  // Deleter for automating release of memory area
  template <typename T>
  struct AlignedDeleter {
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -25,6 +25,7 @@
 #include "nnue_architecture.h"

 #include <cstring> // std::memset()
+#include <utility> // std::pair

 namespace Stockfish::Eval::NNUE {

@@ -332,27 +333,16 @@ namespace Stockfish::Eval::NNUE {
 #endif

      return psqt;
+    } // end of function transform()

-   } // end of function transform()
-
-
+    void hint_common_access(const Position& pos) const {
+      hint_common_access_for_perspective<WHITE>(pos);
+      hint_common_access_for_perspective<BLACK>(pos);
+    }

   private:
    template<Color Perspective>
-    void update_accumulator(const Position& pos) const {
-
-      // The size must be enough to contain the largest possible update.
-      // That might depend on the feature set and generally relies on the
-      // feature set's update cost calculation to be correct and never
-      // allow updates with more added/removed features than MaxActiveDimensions.
-
-  #ifdef VECTOR
-      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
-      // is defined in the VECTOR code below, once in each branch
-      vec_t acc[NumRegs];
-      psqt_vec_t psqt[NumPsqtRegs];
-  #endif
-
+    [[nodiscard]] std::pair<StateInfo*, StateInfo*> try_find_computed_accumulator(const Position& pos) const {
      // Look for a usable accumulator of an earlier position. We keep track
      // of the estimated gain in terms of features to be added/subtracted.
      StateInfo *st = pos.state(), *next = nullptr;
@@ -367,218 +357,313 @@ namespace Stockfish::Eval::NNUE {
        next = st;
        st = st->previous;
      }
+      return { st, next };
+    }

-      if (st->accumulator.computed[Perspective])
-      {
-        if (next == nullptr)
-          return;
+    // NOTE: The parameter states_to_update is an array of position states, ending with nullptr.
+    //       All states must be sequential, that is states_to_update[i] must either be reachable
+    //       by repeatedly applying ->previous from states_to_update[i+1] or states_to_update[i] == nullptr.
+    //       computed_st must be reachable by repeatadly applying ->previous on states_to_update[0], if not nullptr.
+    template<Color Perspective, size_t N>
+    void update_accumulator_incremetal(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N]) const {
+      static_assert(N > 0);
+      assert(states_to_update[N-1] == nullptr);

-        // Update incrementally in two steps. First, we update the "next"
-        // accumulator. Then, we update the current accumulator (pos.state()).
-
-        // Gather all features to be updated.
-        const Square ksq = pos.square<KING>(Perspective);
-        FeatureSet::IndexList removed[2], added[2];
-        FeatureSet::append_changed_indices<Perspective>(
-          ksq, next->dirtyPiece, removed[0], added[0]);
-        for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
-          FeatureSet::append_changed_indices<Perspective>(
-            ksq, st2->dirtyPiece, removed[1], added[1]);
-
-        // Mark the accumulators as computed.
-        next->accumulator.computed[Perspective] = true;
-        pos.state()->accumulator.computed[Perspective] = true;
-
-        // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
-        StateInfo *states_to_update[3] =
-          { next, next == pos.state() ? nullptr : pos.state(), nullptr };
  #ifdef VECTOR
-        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+      // is defined in the VECTOR code below, once in each branch
+      vec_t acc[NumRegs];
+      psqt_vec_t psqt[NumPsqtRegs];
+  #endif
+
+      if (states_to_update[0] == nullptr)
+        return;
+
+      // Update incrementally going back through states_to_update.
+
+      // Gather all features to be updated.
+      const Square ksq = pos.square<KING>(Perspective);
+
+      // The size must be enough to contain the largest possible update.
+      // That might depend on the feature set and generally relies on the
+      // feature set's update cost calculation to be correct and never
+      // allow updates with more added/removed features than MaxActiveDimensions.
+      FeatureSet::IndexList removed[N-1], added[N-1];
+
+      {
+        int i = N-2; // last potential state to update. Skip last element because it must be nullptr.
+        while (states_to_update[i] == nullptr)
+          --i;
+
+        StateInfo *st2 = states_to_update[i];
+
+        for (; i >= 0; --i)
        {
-          // Load accumulator
-          auto accTile = reinterpret_cast<vec_t*>(
-            &st->accumulator.accumulation[Perspective][j * TileHeight]);
-          for (IndexType k = 0; k < NumRegs; ++k)
-            acc[k] = vec_load(&accTile[k]);
+          states_to_update[i]->accumulator.computed[Perspective] = true;

-          for (IndexType i = 0; states_to_update[i]; ++i)
-          {
-            // Difference calculation for the deactivated features
-            for (const auto index : removed[i])
-            {
-              const IndexType offset = HalfDimensions * index + j * TileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
-              for (IndexType k = 0; k < NumRegs; ++k)
-                acc[k] = vec_sub_16(acc[k], column[k]);
-            }
+          StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];

-            // Difference calculation for the activated features
-            for (const auto index : added[i])
-            {
-              const IndexType offset = HalfDimensions * index + j * TileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
-              for (IndexType k = 0; k < NumRegs; ++k)
-                acc[k] = vec_add_16(acc[k], column[k]);
-            }
-
-            // Store accumulator
-            accTile = reinterpret_cast<vec_t*>(
-              &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
-            for (IndexType k = 0; k < NumRegs; ++k)
-              vec_store(&accTile[k], acc[k]);
-          }
+          for (; st2 != end_state; st2 = st2->previous)
+            FeatureSet::append_changed_indices<Perspective>(
+              ksq, st2->dirtyPiece, removed[i], added[i]);
        }
+      }

-        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
-        {
-          // Load accumulator
-          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
-            &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
-          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
-            psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+      StateInfo* st = computed_st;

-          for (IndexType i = 0; states_to_update[i]; ++i)
-          {
-            // Difference calculation for the deactivated features
-            for (const auto index : removed[i])
-            {
-              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
-              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
-              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
-                psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
-            }
+      // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
+#ifdef VECTOR
+      for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+      {
+        // Load accumulator
+        auto accTile = reinterpret_cast<vec_t*>(
+          &st->accumulator.accumulation[Perspective][j * TileHeight]);
+        for (IndexType k = 0; k < NumRegs; ++k)
+          acc[k] = vec_load(&accTile[k]);

-            // Difference calculation for the activated features
-            for (const auto index : added[i])
-            {
-              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
-              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
-              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
-                psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
-            }
-
-            // Store accumulator
-            accTilePsqt = reinterpret_cast<psqt_vec_t*>(
-              &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
-            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
-              vec_store_psqt(&accTilePsqt[k], psqt[k]);
-          }
-        }
-
-  #else
        for (IndexType i = 0; states_to_update[i]; ++i)
        {
-          std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective],
-              st->accumulator.accumulation[Perspective],
-              HalfDimensions * sizeof(BiasType));
-
-          for (std::size_t k = 0; k < PSQTBuckets; ++k)
-            states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k];
-
-          st = states_to_update[i];
-
          // Difference calculation for the deactivated features
          for (const auto index : removed[i])
          {
-            const IndexType offset = HalfDimensions * index;
-
-            for (IndexType j = 0; j < HalfDimensions; ++j)
-              st->accumulator.accumulation[Perspective][j] -= weights[offset + j];
-
-            for (std::size_t k = 0; k < PSQTBuckets; ++k)
-              st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+            const IndexType offset = HalfDimensions * index + j * TileHeight;
+            auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+            for (IndexType k = 0; k < NumRegs; ++k)
+              acc[k] = vec_sub_16(acc[k], column[k]);
          }

          // Difference calculation for the activated features
          for (const auto index : added[i])
-          {
-            const IndexType offset = HalfDimensions * index;
-
-            for (IndexType j = 0; j < HalfDimensions; ++j)
-              st->accumulator.accumulation[Perspective][j] += weights[offset + j];
-
-            for (std::size_t k = 0; k < PSQTBuckets; ++k)
-              st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
-          }
-        }
-  #endif
-      }
-      else
-      {
-        // Refresh the accumulator
-        auto& accumulator = pos.state()->accumulator;
-        accumulator.computed[Perspective] = true;
-        FeatureSet::IndexList active;
-        FeatureSet::append_active_indices<Perspective>(pos, active);
-
-  #ifdef VECTOR
-        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
-        {
-          auto biasesTile = reinterpret_cast<const vec_t*>(
-              &biases[j * TileHeight]);
-          for (IndexType k = 0; k < NumRegs; ++k)
-            acc[k] = biasesTile[k];
-
-          for (const auto index : active)
          {
            const IndexType offset = HalfDimensions * index + j * TileHeight;
            auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
-
-            for (unsigned k = 0; k < NumRegs; ++k)
+            for (IndexType k = 0; k < NumRegs; ++k)
              acc[k] = vec_add_16(acc[k], column[k]);
          }

-          auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[Perspective][j * TileHeight]);
-          for (unsigned k = 0; k < NumRegs; k++)
+          // Store accumulator
+          accTile = reinterpret_cast<vec_t*>(
+            &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
+          for (IndexType k = 0; k < NumRegs; ++k)
            vec_store(&accTile[k], acc[k]);
        }
+      }

-        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+      for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+      {
+        // Load accumulator
+        auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+          &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+        for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+          psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+
+        for (IndexType i = 0; states_to_update[i]; ++i)
        {
-          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
-            psqt[k] = vec_zero_psqt();
-
-          for (const auto index : active)
+          // Difference calculation for the deactivated features
+          for (const auto index : removed[i])
          {
            const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
            auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+          }

+          // Difference calculation for the activated features
+          for (const auto index : added[i])
+          {
+            const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+            auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
              psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
          }

-          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
-            &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+          // Store accumulator
+          accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
            vec_store_psqt(&accTilePsqt[k], psqt[k]);
        }
+      }

-  #else
-        std::memcpy(accumulator.accumulation[Perspective], biases,
+#else
+      for (IndexType i = 0; states_to_update[i]; ++i)
+      {
+        std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective],
+            st->accumulator.accumulation[Perspective],
            HalfDimensions * sizeof(BiasType));

        for (std::size_t k = 0; k < PSQTBuckets; ++k)
-          accumulator.psqtAccumulation[Perspective][k] = 0;
+          states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k];

-        for (const auto index : active)
+        st = states_to_update[i];
+
+        // Difference calculation for the deactivated features
+        for (const auto index : removed[i])
        {
          const IndexType offset = HalfDimensions * index;

          for (IndexType j = 0; j < HalfDimensions; ++j)
-            accumulator.accumulation[Perspective][j] += weights[offset + j];
+            st->accumulator.accumulation[Perspective][j] -= weights[offset + j];

          for (std::size_t k = 0; k < PSQTBuckets; ++k)
-            accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+            st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+        }
+
+        // Difference calculation for the activated features
+        for (const auto index : added[i])
+        {
+          const IndexType offset = HalfDimensions * index;
+
+          for (IndexType j = 0; j < HalfDimensions; ++j)
+            st->accumulator.accumulation[Perspective][j] += weights[offset + j];
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
        }
-  #endif
      }
+#endif

  #if defined(USE_MMX)
      _mm_empty();
  #endif
    }

+    template<Color Perspective>
+    void update_accumulator_refresh(const Position& pos) const {
+  #ifdef VECTOR
+      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+      // is defined in the VECTOR code below, once in each branch
+      vec_t acc[NumRegs];
+      psqt_vec_t psqt[NumPsqtRegs];
+  #endif
+
+      // Refresh the accumulator
+      // Could be extracted to a separate function because it's done in 2 places,
+      // but it's unclear if compilers would correctly handle register allocation.
+      auto& accumulator = pos.state()->accumulator;
+      accumulator.computed[Perspective] = true;
+      FeatureSet::IndexList active;
+      FeatureSet::append_active_indices<Perspective>(pos, active);
+
+#ifdef VECTOR
+      for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+      {
+        auto biasesTile = reinterpret_cast<const vec_t*>(
+            &biases[j * TileHeight]);
+        for (IndexType k = 0; k < NumRegs; ++k)
+          acc[k] = biasesTile[k];
+
+        for (const auto index : active)
+        {
+          const IndexType offset = HalfDimensions * index + j * TileHeight;
+          auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+
+          for (unsigned k = 0; k < NumRegs; ++k)
+            acc[k] = vec_add_16(acc[k], column[k]);
+        }
+
+        auto accTile = reinterpret_cast<vec_t*>(
+            &accumulator.accumulation[Perspective][j * TileHeight]);
+        for (unsigned k = 0; k < NumRegs; k++)
+          vec_store(&accTile[k], acc[k]);
+      }
+
+      for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+      {
+        for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+          psqt[k] = vec_zero_psqt();
+
+        for (const auto index : active)
+        {
+          const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+          auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+        }
+
+        auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+          &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+        for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+          vec_store_psqt(&accTilePsqt[k], psqt[k]);
+      }
+
+#else
+      std::memcpy(accumulator.accumulation[Perspective], biases,
+          HalfDimensions * sizeof(BiasType));
+
+      for (std::size_t k = 0; k < PSQTBuckets; ++k)
+        accumulator.psqtAccumulation[Perspective][k] = 0;
+
+      for (const auto index : active)
+      {
+        const IndexType offset = HalfDimensions * index;
+
+        for (IndexType j = 0; j < HalfDimensions; ++j)
+          accumulator.accumulation[Perspective][j] += weights[offset + j];
+
+        for (std::size_t k = 0; k < PSQTBuckets; ++k)
+          accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+      }
+#endif
+
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
+    }
+
+    template<Color Perspective>
+    void hint_common_access_for_perspective(const Position& pos) const {
+
+      // Works like update_accumulator, but performs less work.
+      // Updates ONLY the accumulator for pos.
+
+      // Look for a usable accumulator of an earlier position. We keep track
+      // of the estimated gain in terms of features to be added/subtracted.
+      // Fast early exit.
+      if (pos.state()->accumulator.computed[Perspective])
+        return;
+
+      auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos);
+
+      if (oldest_st->accumulator.computed[Perspective])
+      {
+        // Only update current position accumulator to minimize work.
+        StateInfo* states_to_update[2] = { pos.state(), nullptr };
+        update_accumulator_incremetal<Perspective, 2>(pos, oldest_st, states_to_update);
+      }
+      else
+      {
+        update_accumulator_refresh<Perspective>(pos);
+      }
+    }
+
+    template<Color Perspective>
+    void update_accumulator(const Position& pos) const {
+
+      auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos);
+
+      if (oldest_st->accumulator.computed[Perspective])
+      {
+        if (next == nullptr)
+          return;
+
+        // Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
+        // Currently we update 2 accumulators.
+        //     1. for the current position
+        //     2. the next accumulator after the computed one
+        // The heuristic may change in the future.
+        StateInfo *states_to_update[3] =
+          { next, next == pos.state() ? nullptr : pos.state(), nullptr };
+
+        update_accumulator_incremetal<Perspective, 3>(pos, oldest_st, states_to_update);
+      }
+      else
+      {
+        update_accumulator_refresh<Perspective>(pos);
+      }
+    }
+
    alignas(CacheLineSize) BiasType biases[HalfDimensions];
    alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
    alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -730,10 +730,10 @@ namespace {
        goto moves_loop;
    }
    else if (excludedMove) {
-        // excludeMove implies that we had a ttHit on the containing non-excluded search with ss->staticEval filled from TT
-        // However static evals from the TT aren't good enough (-13 elo), presumably due to changing optimism context
-        // Recalculate value with current optimism (without updating thread avgComplexity)
-        ss->staticEval = eval = evaluate(pos, &complexity);
+        // Providing the hint that this node's accumulator will be used often brings significant Elo gain (13 elo)
+        Eval::NNUE::hint_common_parent_position(pos);
+        eval = ss->staticEval;
+        complexity = abs(ss->staticEval - pos.psq_eg_stm());
    }
    else if (ss->ttHit)
    {