Use neon_m128_reduce_add_epi32 for NEON vector reduction

Accomplishing the entire horizontal addition in a single NEON instruction closes https://github.com/official-stockfish/Stockfish/pull/5885 No functional change
2025-12-06 10:53:50 +08:00 · 2025-02-14 03:07:39 +03:00
parent ee7259e48b
commit 095d19afea
1 changed files with 1 additions and 1 deletions
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -102,7 +102,7 @@ static void affine_transform_non_ssse3(std::int32_t*       output,
            product           = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
            sum               = vpadalq_s16(sum, product);
        }
-        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+        output[i] = Simd::neon_m128_reduce_add_epi32(sum);

        #endif
    }