mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-06 10:53:50 +08:00
simplify accumulator updates
After #5759 accumulator updates are strictly on a per-move basis. Therefore, the generic code for updating multiple moves at once is no longer needed. Passed Non-regression STC: LLR: 3.00 (-2.94,2.94) <-1.75,0.25> Total: 81696 W: 21204 L: 21039 D: 39453 Ptnml(0-2): 210, 8431, 23416, 8566, 225 https://tests.stockfishchess.org/tests/view/67823a24a31c4c13e83518a8 closes https://github.com/official-stockfish/Stockfish/pull/5760 no functional change
This commit is contained in:
committed by
Joost VandeVondele
parent
8e3e22b3d4
commit
62ecdfe82c
@@ -472,25 +472,20 @@ class FeatureTransformer {
|
|||||||
return st;
|
return st;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes the accumulator of the next position.
|
// Given a computed accumulator, computes the accumulator of the next position.
|
||||||
template<Color Perspective>
|
template<Color Perspective>
|
||||||
void update_accumulator_incremental(const Position& pos, StateInfo* computed) const {
|
void update_accumulator_incremental(const Position& pos, StateInfo* computed) const {
|
||||||
assert((computed->*accPtr).computed[Perspective]);
|
assert((computed->*accPtr).computed[Perspective]);
|
||||||
assert(computed->next != nullptr);
|
assert(computed->next != nullptr);
|
||||||
|
|
||||||
#ifdef VECTOR
|
|
||||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
|
||||||
// is defined in the VECTOR code below, once in each branch.
|
|
||||||
vec_t acc[Tiling::NumRegs];
|
|
||||||
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const Square ksq = pos.square<KING>(Perspective);
|
const Square ksq = pos.square<KING>(Perspective);
|
||||||
|
|
||||||
// The size must be enough to contain the largest possible update.
|
// The size must be enough to contain the largest possible update.
|
||||||
// That might depend on the feature set and generally relies on the
|
// That might depend on the feature set and generally relies on the
|
||||||
// feature set's update cost calculation to be correct and never allow
|
// feature set's update cost calculation to be correct and never allow
|
||||||
// updates with more added/removed features than MaxActiveDimensions.
|
// updates with more added/removed features than MaxActiveDimensions.
|
||||||
|
// In this case, the maximum size of both feature addition and removal
|
||||||
|
// is 2, since we are incrementally updating one move at a time.
|
||||||
FeatureSet::IndexList removed, added;
|
FeatureSet::IndexList removed, added;
|
||||||
FeatureSet::append_changed_indices<Perspective>(ksq, computed->next->dirtyPiece, removed,
|
FeatureSet::append_changed_indices<Perspective>(ksq, computed->next->dirtyPiece, removed,
|
||||||
added);
|
added);
|
||||||
@@ -498,51 +493,76 @@ class FeatureTransformer {
|
|||||||
StateInfo* next = computed->next;
|
StateInfo* next = computed->next;
|
||||||
assert(!(next->*accPtr).computed[Perspective]);
|
assert(!(next->*accPtr).computed[Perspective]);
|
||||||
|
|
||||||
#ifdef VECTOR
|
if (removed.size() == 0 && added.size() == 0)
|
||||||
if ((removed.size() == 1 || removed.size() == 2) && added.size() == 1)
|
|
||||||
{
|
{
|
||||||
|
std::memcpy((next->*accPtr).accumulation[Perspective],
|
||||||
|
(computed->*accPtr).accumulation[Perspective],
|
||||||
|
HalfDimensions * sizeof(BiasType));
|
||||||
|
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
|
||||||
|
(computed->*accPtr).psqtAccumulation[Perspective],
|
||||||
|
PSQTBuckets * sizeof(PSQTWeightType));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(added.size() == 1 || added.size() == 2);
|
||||||
|
assert(removed.size() == 1 || removed.size() == 2);
|
||||||
|
assert(added.size() <= removed.size());
|
||||||
|
|
||||||
|
#ifdef VECTOR
|
||||||
auto* accIn =
|
auto* accIn =
|
||||||
reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
|
reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
|
||||||
auto* accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
|
auto* accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
|
||||||
|
|
||||||
|
const IndexType offsetA0 = HalfDimensions * added[0];
|
||||||
|
auto* columnA0 = reinterpret_cast<const vec_t*>(&weights[offsetA0]);
|
||||||
const IndexType offsetR0 = HalfDimensions * removed[0];
|
const IndexType offsetR0 = HalfDimensions * removed[0];
|
||||||
auto* columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
|
auto* columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
|
||||||
const IndexType offsetA = HalfDimensions * added[0];
|
|
||||||
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
|
||||||
|
|
||||||
if (removed.size() == 1)
|
if (removed.size() == 1)
|
||||||
{
|
{
|
||||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||||
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA[i]);
|
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
|
||||||
}
|
}
|
||||||
else
|
else if (added.size() == 1)
|
||||||
{
|
{
|
||||||
const IndexType offsetR1 = HalfDimensions * removed[1];
|
const IndexType offsetR1 = HalfDimensions * removed[1];
|
||||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||||
|
|
||||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||||
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA[i]),
|
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
|
||||||
vec_add_16(columnR0[i], columnR1[i]));
|
vec_add_16(columnR0[i], columnR1[i]));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const IndexType offsetA1 = HalfDimensions * added[1];
|
||||||
|
auto* columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
|
||||||
|
const IndexType offsetR1 = HalfDimensions * removed[1];
|
||||||
|
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||||
|
|
||||||
|
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||||
|
accOut[i] =
|
||||||
|
vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
|
||||||
|
vec_add_16(columnR0[i], columnR1[i])));
|
||||||
|
}
|
||||||
|
|
||||||
auto* accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
|
auto* accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
|
||||||
&(computed->*accPtr).psqtAccumulation[Perspective][0]);
|
&(computed->*accPtr).psqtAccumulation[Perspective][0]);
|
||||||
auto* accPsqtOut =
|
auto* accPsqtOut =
|
||||||
reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
|
reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
|
||||||
|
|
||||||
|
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
|
||||||
|
auto* columnPsqtA0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA0]);
|
||||||
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
|
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
|
||||||
auto* columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
|
auto* columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
|
||||||
const IndexType offsetPsqtA = PSQTBuckets * added[0];
|
|
||||||
auto* columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]);
|
|
||||||
|
|
||||||
if (removed.size() == 1)
|
if (removed.size() == 1)
|
||||||
{
|
{
|
||||||
for (std::size_t i = 0;
|
for (std::size_t i = 0;
|
||||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||||
accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
|
accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
|
||||||
columnPsqtA[i]);
|
columnPsqtA0[i]);
|
||||||
}
|
}
|
||||||
else
|
else if (added.size() == 1)
|
||||||
{
|
{
|
||||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||||
auto* columnPsqtR1 =
|
auto* columnPsqtR1 =
|
||||||
@@ -551,110 +571,58 @@ class FeatureTransformer {
|
|||||||
for (std::size_t i = 0;
|
for (std::size_t i = 0;
|
||||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||||
accPsqtOut[i] =
|
accPsqtOut[i] =
|
||||||
vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA[i]),
|
vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
|
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
else
|
|
||||||
{
|
|
||||||
for (IndexType i = 0; i < HalfDimensions / Tiling::TileHeight; ++i)
|
|
||||||
{
|
{
|
||||||
// Load accumulator
|
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||||
auto* accTileIn = reinterpret_cast<const vec_t*>(
|
auto* columnPsqtA1 =
|
||||||
&(computed->*accPtr).accumulation[Perspective][i * Tiling::TileHeight]);
|
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
|
||||||
for (IndexType j = 0; j < Tiling::NumRegs; ++j)
|
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||||
acc[j] = vec_load(&accTileIn[j]);
|
auto* columnPsqtR1 =
|
||||||
|
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
|
||||||
|
|
||||||
// Difference calculation for the deactivated features
|
for (std::size_t i = 0;
|
||||||
for (const auto index : removed)
|
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||||
{
|
accPsqtOut[i] = vec_add_psqt_32(
|
||||||
const IndexType offset = HalfDimensions * index + i * Tiling::TileHeight;
|
accPsqtIn[i],
|
||||||
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
|
||||||
for (IndexType j = 0; j < Tiling::NumRegs; ++j)
|
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
|
||||||
acc[j] = vec_sub_16(acc[j], column[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Difference calculation for the activated features
|
|
||||||
for (const auto index : added)
|
|
||||||
{
|
|
||||||
const IndexType offset = HalfDimensions * index + i * Tiling::TileHeight;
|
|
||||||
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
|
||||||
for (IndexType j = 0; j < Tiling::NumRegs; ++j)
|
|
||||||
acc[j] = vec_add_16(acc[j], column[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store accumulator
|
|
||||||
auto* accTileOut = reinterpret_cast<vec_t*>(
|
|
||||||
&(next->*accPtr).accumulation[Perspective][i * Tiling::TileHeight]);
|
|
||||||
for (IndexType j = 0; j < Tiling::NumRegs; ++j)
|
|
||||||
vec_store(&accTileOut[j], acc[j]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (IndexType i = 0; i < PSQTBuckets / Tiling::PsqtTileHeight; ++i)
|
|
||||||
{
|
|
||||||
// Load accumulator
|
|
||||||
auto* accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>(
|
|
||||||
&(computed->*accPtr).psqtAccumulation[Perspective][i * Tiling::PsqtTileHeight]);
|
|
||||||
for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
|
|
||||||
psqt[j] = vec_load_psqt(&accTilePsqtIn[j]);
|
|
||||||
|
|
||||||
// Difference calculation for the deactivated features
|
|
||||||
for (const auto index : removed)
|
|
||||||
{
|
|
||||||
const IndexType offset = PSQTBuckets * index + i * Tiling::PsqtTileHeight;
|
|
||||||
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
|
||||||
for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
|
|
||||||
psqt[j] = vec_sub_psqt_32(psqt[j], columnPsqt[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Difference calculation for the activated features
|
|
||||||
for (const auto index : added)
|
|
||||||
{
|
|
||||||
const IndexType offset = PSQTBuckets * index + i * Tiling::PsqtTileHeight;
|
|
||||||
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
|
||||||
for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
|
|
||||||
psqt[j] = vec_add_psqt_32(psqt[j], columnPsqt[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store accumulator
|
|
||||||
auto* accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(
|
|
||||||
&(next->*accPtr).psqtAccumulation[Perspective][i * Tiling::PsqtTileHeight]);
|
|
||||||
for (std::size_t j = 0; j < Tiling::NumPsqtRegs; ++j)
|
|
||||||
vec_store_psqt(&accTilePsqtOut[j], psqt[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
std::memcpy((next->*accPtr).accumulation[Perspective],
|
std::memcpy((next->*accPtr).accumulation[Perspective],
|
||||||
(computed->*accPtr).accumulation[Perspective],
|
(computed->*accPtr).accumulation[Perspective],
|
||||||
HalfDimensions * sizeof(BiasType));
|
HalfDimensions * sizeof(BiasType));
|
||||||
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
|
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
|
||||||
(computed->*accPtr).psqtAccumulation[Perspective],
|
(computed->*accPtr).psqtAccumulation[Perspective],
|
||||||
PSQTBuckets * sizeof(PSQTWeightType));
|
PSQTBuckets * sizeof(PSQTWeightType));
|
||||||
|
|
||||||
// Difference calculation for the deactivated features
|
// Difference calculation for the deactivated features
|
||||||
for (const auto index : removed)
|
for (const auto index : removed)
|
||||||
{
|
{
|
||||||
const IndexType offset = HalfDimensions * index;
|
const IndexType offset = HalfDimensions * index;
|
||||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||||
(next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
|
(next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
|
||||||
|
|
||||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||||
(next->*accPtr).psqtAccumulation[Perspective][i] -=
|
(next->*accPtr).psqtAccumulation[Perspective][i] -=
|
||||||
psqtWeights[index * PSQTBuckets + i];
|
psqtWeights[index * PSQTBuckets + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Difference calculation for the activated features
|
// Difference calculation for the activated features
|
||||||
for (const auto index : added)
|
for (const auto index : added)
|
||||||
{
|
{
|
||||||
const IndexType offset = HalfDimensions * index;
|
const IndexType offset = HalfDimensions * index;
|
||||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||||
(next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
|
(next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
|
||||||
|
|
||||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||||
(next->*accPtr).psqtAccumulation[Perspective][i] +=
|
(next->*accPtr).psqtAccumulation[Perspective][i] +=
|
||||||
psqtWeights[index * PSQTBuckets + i];
|
psqtWeights[index * PSQTBuckets + i];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
(next->*accPtr).computed[Perspective] = true;
|
(next->*accPtr).computed[Perspective] = true;
|
||||||
|
|
||||||
@@ -662,6 +630,7 @@ class FeatureTransformer {
|
|||||||
update_accumulator_incremental<Perspective>(pos, next);
|
update_accumulator_incremental<Perspective>(pos, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<Color Perspective>
|
template<Color Perspective>
|
||||||
void update_accumulator_refresh_cache(const Position& pos,
|
void update_accumulator_refresh_cache(const Position& pos,
|
||||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||||
|
|||||||
Reference in New Issue
Block a user