diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2b11adef..bcd14e6f 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -664,7 +664,11 @@ class FeatureTransformer { for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { - auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); + auto accTile = + reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); + auto entryTile = + reinterpret_cast(&entry.accumulation[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = entryTile[k]; @@ -679,7 +683,7 @@ class FeatureTransformer { auto columnA = reinterpret_cast(&weights[offsetA]); for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); + acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k])); } for (; i < int(removed.size()); ++i) { @@ -702,12 +706,17 @@ class FeatureTransformer { for (IndexType k = 0; k < NumRegs; k++) vec_store(&entryTile[k], acc[k]); + for (IndexType k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); } for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { - auto entryTilePsqt = - reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto entryTilePsqt = reinterpret_cast( + &entry.psqtAccumulation[j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; @@ -732,6 +741,8 @@ class FeatureTransformer { for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&entryTilePsqt[k], psqt[k]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); } #else @@ -755,8 +766,6 @@ class FeatureTransformer { entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; } -#endif - // The accumulator of the refresh entry has been updated. // Now copy its content to the actual accumulator we were refreshing @@ -765,6 +774,7 @@ class FeatureTransformer { std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, sizeof(int32_t) * PSQTBuckets); +#endif for (Color c : {WHITE, BLACK}) entry.byColorBB[c] = pos.pieces(c);