Remove combineLast3 optimization

Passed non-reg STC 1st:
LLR: 2.96 (-2.94,2.94) <-1.75,0.25>
Total: 67328 W: 17296 L: 17118 D: 32914
Ptnml(0-2): 158, 7095, 19011, 7211, 189
https://tests.stockfishchess.org/tests/view/67e6c2796682f97da2178ebe

Passed non-reg STC 2nd:
LLR: 2.93 (-2.94,2.94) <-1.75,0.25>
Total: 92288 W: 23885 L: 23734 D: 44669
Ptnml(0-2): 213, 10039, 25518, 10132, 242
https://tests.stockfishchess.org/tests/view/67ed6a2d31d7cf8afdc45190

closes https://github.com/official-stockfish/Stockfish/pull/5975

Bench: 1875196
This commit is contained in:
FauziAkram
2025-04-02 19:43:55 +03:00
committed by Disservin
parent 8d2eef2b1e
commit 5f8e67a544

View File

@@ -356,8 +356,6 @@ void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& feat
accumulator.computed[Perspective] = true;
#ifdef VECTOR
const bool combineLast3 =
std::abs((int) removed.size() - (int) added.size()) == 1 && removed.size() + added.size() > 2;
vec_t acc[Tiling::NumRegs];
psqt_vec_t psqt[Tiling::NumPsqtRegs];
@@ -371,7 +369,7 @@ void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& feat
acc[k] = entryTile[k];
IndexType i = 0;
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
for (; i < std::min(removed.size(), added.size()); ++i)
{
IndexType indexR = removed[i];
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
@@ -383,58 +381,23 @@ void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& feat
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
}
if (combineLast3)
for (; i < removed.size(); ++i)
{
IndexType indexR = removed[i];
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
IndexType indexA = added[i];
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
IndexType index = removed[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
if (removed.size() > added.size())
{
IndexType indexR2 = removed[i + 1];
const IndexType offsetR2 = Dimensions * indexR2 + j * Tiling::TileHeight;
auto* columnR2 =
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = fused<Vec16Wrapper, Add, Sub, Sub>(acc[k], columnA[k], columnR[k],
columnR2[k]);
}
else
{
IndexType indexA2 = added[i + 1];
const IndexType offsetA2 = Dimensions * indexA2 + j * Tiling::TileHeight;
auto* columnA2 =
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = fused<Vec16Wrapper, Add, Add, Sub>(acc[k], columnA[k], columnA2[k],
columnR[k]);
}
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
else
for (; i < added.size(); ++i)
{
for (; i < removed.size(); ++i)
{
IndexType index = removed[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
IndexType index = added[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
for (; i < added.size(); ++i)
{
IndexType index = added[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (IndexType k = 0; k < Tiling::NumRegs; k++)