simplify accumulator updates

After #5759 accumulator updates are strictly on a per-move basis. Therefore, the generic code for updating multiple moves at once is no longer needed. Passed Non-regression STC: LLR: 3.00 (-2.94,2.94) <-1.75,0.25> Total: 81696 W: 21204 L: 21039 D: 39453 Ptnml(0-2): 210, 8431, 23416, 8566, 225 https://tests.stockfishchess.org/tests/view/67823a24a31c4c13e83518a8 closes https://github.com/official-stockfish/Stockfish/pull/5760 no functional change
2025-12-06 10:53:50 +08:00 · 2025-01-12 12:53:08 -08:00
parent 8e3e22b3d4
commit 62ecdfe82c
1 changed files with 81 additions and 112 deletions
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -472,25 +472,20 @@ class FeatureTransformer {
        return st;
    }
-    // Computes the accumulator of the next position.
+    // Given a computed accumulator, computes the accumulator of the next position.
    template<Color Perspective>
    void update_accumulator_incremental(const Position& pos, StateInfo* computed) const {
        assert((computed->*accPtr).computed[Perspective]);
        assert(computed->next != nullptr);
 #ifdef VECTOR
        // Gcc-10.2 unnecessarily spills AVX2 registers if this array
        // is defined in the VECTOR code below, once in each branch.
        vec_t      acc[Tiling::NumRegs];
        psqt_vec_t psqt[Tiling::NumPsqtRegs];
 #endif
        const Square ksq = pos.square<KING>(Perspective);
        // The size must be enough to contain the largest possible update.
        // That might depend on the feature set and generally relies on the
        // feature set's update cost calculation to be correct and never allow
        // updates with more added/removed features than MaxActiveDimensions.
        // In this case, the maximum size of both feature addition and removal
        // is 2, since we are incrementally updating one move at a time.
        FeatureSet::IndexList removed, added;
        FeatureSet::append_changed_indices<Perspective>(ksq, computed->next->dirtyPiece, removed,
                                                        added);
@@ -498,51 +493,76 @@ class FeatureTransformer {
        StateInfo* next = computed->next;
        assert(!(next->*accPtr).computed[Perspective]);
-#ifdef VECTOR
+        if (removed.size() == 0 && added.size() == 0)
        if ((removed.size() == 1 || removed.size() == 2) && added.size() == 1)
        {
            std::memcpy((next->*accPtr).accumulation[Perspective],
                        (computed->*accPtr).accumulation[Perspective],
                        HalfDimensions * sizeof(BiasType));
            std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
                        (computed->*accPtr).psqtAccumulation[Perspective],
                        PSQTBuckets * sizeof(PSQTWeightType));
        }
        else
        {
            assert(added.size() == 1 || added.size() == 2);
            assert(removed.size() == 1 || removed.size() == 2);
            assert(added.size() <= removed.size());
 #ifdef VECTOR
            auto* accIn =
              reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
            auto* accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
            const IndexType offsetA0 = HalfDimensions * added[0];
            auto*           columnA0 = reinterpret_cast<const vec_t*>(&weights[offsetA0]);
            const IndexType offsetR0 = HalfDimensions * removed[0];
            auto*           columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
            const IndexType offsetA  = HalfDimensions * added[0];
            auto*           columnA  = reinterpret_cast<const vec_t*>(&weights[offsetA]);
            if (removed.size() == 1)
            {
                for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
-                    accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA[i]);
+                    accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
            }
-            else
+            else if (added.size() == 1)
            {
                const IndexType offsetR1 = HalfDimensions * removed[1];
                auto*           columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
                for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
-                    accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA[i]),
+                    accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
                                           vec_add_16(columnR0[i], columnR1[i]));
            }
            else
            {
                const IndexType offsetA1 = HalfDimensions * added[1];
                auto*           columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
                const IndexType offsetR1 = HalfDimensions * removed[1];
                auto*           columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
                for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
                    accOut[i] =
                      vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
                                                      vec_add_16(columnR0[i], columnR1[i])));
            }
            auto* accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
              &(computed->*accPtr).psqtAccumulation[Perspective][0]);
            auto* accPsqtOut =
              reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
            const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
            auto* columnPsqtA0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA0]);
            const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
            auto* columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
            const IndexType offsetPsqtA = PSQTBuckets * added[0];
            auto* columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]);
            if (removed.size() == 1)
            {
                for (std::size_t i = 0;
                     i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
                    accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
-                                                    columnPsqtA[i]);
+                                                    columnPsqtA0[i]);
            }
-            else
+            else if (added.size() == 1)
            {
                const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
                auto*           columnPsqtR1 =
@@ -551,110 +571,58 @@ class FeatureTransformer {
                for (std::size_t i = 0;
                     i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
                    accPsqtOut[i] =
-                      vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA[i]),
+                      vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
                                      vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
            }
-        }
+            else
        else
        {
            for (IndexType i = 0; i < HalfDimensions / Tiling::TileHeight; ++i)
            {
-                // Load accumulator
+                const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
-                auto* accTileIn = reinterpret_cast<const vec_t*>(
+                auto*           columnPsqtA1 =
-                  &(computed->*accPtr).accumulation[Perspective][i * Tiling::TileHeight]);
+                  reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
-                for (IndexType j = 0; j < Tiling::NumRegs; ++j)
+                const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
-                    acc[j] = vec_load(&accTileIn[j]);
+                auto*           columnPsqtR1 =
                  reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
-                // Difference calculation for the deactivated features
+                for (std::size_t i = 0;
-                for (const auto index : removed)
+                     i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
-                {
+                    accPsqtOut[i] = vec_add_psqt_32(
-                    const IndexType offset = HalfDimensions * index + i * Tiling::TileHeight;
+                      accPsqtIn[i],
-                    auto*           column = reinterpret_cast<const vec_t*>(&weights[offset]);
+                      vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
-                    for (IndexType j = 0; j < Tiling::NumRegs; ++j)
+                                      vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
                        acc[j] = vec_sub_16(acc[j], column[j]);
                }
                // Difference calculation for the activated features
                for (const auto index : added)
                {
                    const IndexType offset = HalfDimensions * index + i * Tiling::TileHeight;
                    auto*           column = reinterpret_cast<const vec_t*>(&weights[offset]);
                    for (IndexType j = 0; j < Tiling::NumRegs; ++j)
                        acc[j] = vec_add_16(acc[j], column[j]);
                }
                // Store accumulator
                auto* accTileOut = reinterpret_cast<vec_t*>(
                  &(next->*accPtr).accumulation[Perspective][i * Tiling::TileHeight]);
                for (IndexType j = 0; j < Tiling::NumRegs; ++j)
                    vec_store(&accTileOut[j], acc[j]);
            }
            for (IndexType i = 0; i < PSQTBuckets / Tiling::PsqtTileHeight; ++i)
            {
                // Load accumulator
                auto* accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>(
                  &(computed->*accPtr).psqtAccumulation[Perspective][i * Tiling::PsqtTileHeight]);
                for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
                    psqt[j] = vec_load_psqt(&accTilePsqtIn[j]);
                // Difference calculation for the deactivated features
                for (const auto index : removed)
                {
                    const IndexType offset = PSQTBuckets * index + i * Tiling::PsqtTileHeight;
                    auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
                    for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
                        psqt[j] = vec_sub_psqt_32(psqt[j], columnPsqt[j]);
                }
                // Difference calculation for the activated features
                for (const auto index : added)
                {
                    const IndexType offset = PSQTBuckets * index + i * Tiling::PsqtTileHeight;
                    auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
                    for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
                        psqt[j] = vec_add_psqt_32(psqt[j], columnPsqt[j]);
                }
                // Store accumulator
                auto* accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(
                  &(next->*accPtr).psqtAccumulation[Perspective][i * Tiling::PsqtTileHeight]);
                for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
                    vec_store_psqt(&accTilePsqtOut[j], psqt[j]);
            }
        }
 #else
-        std::memcpy((next->*accPtr).accumulation[Perspective],
+            std::memcpy((next->*accPtr).accumulation[Perspective],
-                    (computed->*accPtr).accumulation[Perspective],
+                        (computed->*accPtr).accumulation[Perspective],
-                    HalfDimensions * sizeof(BiasType));
+                        HalfDimensions * sizeof(BiasType));
-        std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
+            std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
-                    (computed->*accPtr).psqtAccumulation[Perspective],
+                        (computed->*accPtr).psqtAccumulation[Perspective],
-                    PSQTBuckets * sizeof(PSQTWeightType));
+                        PSQTBuckets * sizeof(PSQTWeightType));
-        // Difference calculation for the deactivated features
+            // Difference calculation for the deactivated features
-        for (const auto index : removed)
+            for (const auto index : removed)
-        {
+            {
-            const IndexType offset = HalfDimensions * index;
+                const IndexType offset = HalfDimensions * index;
-            for (IndexType i = 0; i < HalfDimensions; ++i)
+                for (IndexType i = 0; i < HalfDimensions; ++i)
-                (next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
+                    (next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
-            for (std::size_t i = 0; i < PSQTBuckets; ++i)
+                for (std::size_t i = 0; i < PSQTBuckets; ++i)
-                (next->*accPtr).psqtAccumulation[Perspective][i] -=
+                    (next->*accPtr).psqtAccumulation[Perspective][i] -=
-                  psqtWeights[index * PSQTBuckets + i];
+                      psqtWeights[index * PSQTBuckets + i];
-        }
+            }
-        // Difference calculation for the activated features
+            // Difference calculation for the activated features
-        for (const auto index : added)
+            for (const auto index : added)
-        {
+            {
-            const IndexType offset = HalfDimensions * index;
+                const IndexType offset = HalfDimensions * index;
-            for (IndexType i = 0; i < HalfDimensions; ++i)
+                for (IndexType i = 0; i < HalfDimensions; ++i)
-                (next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
+                    (next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
-            for (std::size_t i = 0; i < PSQTBuckets; ++i)
+                for (std::size_t i = 0; i < PSQTBuckets; ++i)
-                (next->*accPtr).psqtAccumulation[Perspective][i] +=
+                    (next->*accPtr).psqtAccumulation[Perspective][i] +=
-                  psqtWeights[index * PSQTBuckets + i];
+                      psqtWeights[index * PSQTBuckets + i];
-        }
+            }
 #endif
        }
        (next->*accPtr).computed[Perspective] = true;
@@ -662,6 +630,7 @@ class FeatureTransformer {
            update_accumulator_incremental<Perspective>(pos, next);
    }
    template<Color Perspective>
    void update_accumulator_refresh_cache(const Position&                           pos,
                                          AccumulatorCaches::Cache<HalfDimensions>* cache) const {