Refactor accumulator storage/updates

Passed Non-regression STC:
LLR: 2.93 (-2.94,2.94) <-1.75,0.25>
Total: 115840 W: 29983 L: 29854 D: 56003
Ptnml(0-2): 338, 12990, 31149, 13091, 352
https://tests.stockfishchess.org/tests/view/67d0a044166a3e8781d84223

closes https://github.com/official-stockfish/Stockfish/pull/5927

No functional change
This commit is contained in:
Shawn Xu
2025-03-09 19:33:30 -07:00
committed by Disservin
parent 66aee01bb1
commit fc0e0a44d4
17 changed files with 813 additions and 527 deletions

View File

@@ -55,7 +55,8 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
misc.cpp movegen.cpp movepick.cpp position.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp
nnue/nnue_accumulator.cpp nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp \
engine.cpp score.cpp memory.cpp
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \

View File

@@ -54,21 +54,22 @@ bool Eval::use_smallnet(const Position& pos) {
// of the position from the point of view of the side to move.
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
const Position& pos,
Eval::NNUE::AccumulatorStack& accumulators,
Eval::NNUE::AccumulatorCaches& caches,
int optimism) {
assert(!pos.checkers());
bool smallNet = use_smallnet(pos);
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small)
: networks.big.evaluate(pos, &caches.big);
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, accumulators, &caches.small)
: networks.big.evaluate(pos, accumulators, &caches.big);
Value nnue = (125 * psqt + 131 * positional) / 128;
// Re-evaluate the position when higher eval accuracy is worth the time spent
if (smallNet && (std::abs(nnue) < 236))
{
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
nnue = (125 * psqt + 131 * positional) / 128;
smallNet = false;
}
@@ -99,7 +100,10 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
if (pos.checkers())
return "Final evaluation: none (in check)";
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
Eval::NNUE::AccumulatorStack accumulators;
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
accumulators.reset(pos, networks, *caches);
std::stringstream ss;
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
@@ -107,12 +111,12 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
auto [psqt, positional] = networks.big.evaluate(pos, &caches->big);
auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches->big);
Value v = psqt + positional;
v = pos.side_to_move() == WHITE ? v : -v;
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
v = evaluate(networks, pos, *caches, VALUE_ZERO);
v = evaluate(networks, pos, accumulators, *caches, VALUE_ZERO);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
ss << " [with scaled NNUE, ...]";

View File

@@ -39,6 +39,7 @@ namespace Eval {
namespace NNUE {
struct Networks;
struct AccumulatorCaches;
class AccumulatorStack;
}
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
@@ -47,6 +48,7 @@ int simple_eval(const Position& pos, Color c);
bool use_smallnet(const Position& pos);
Value evaluate(const NNUE::Networks& networks,
const Position& pos,
Eval::NNUE::AccumulatorStack& accumulators,
Eval::NNUE::AccumulatorCaches& caches,
int optimism);
} // namespace Eval

View File

@@ -77,8 +77,8 @@ template void HalfKAv2_hm::append_changed_indices<BLACK>(Square ksq,
IndexList& removed,
IndexList& added);
bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
bool HalfKAv2_hm::requires_refresh(const DirtyPiece& dirtyPiece, Color perspective) {
return dirtyPiece.piece[0] == make_piece(perspective, KING);
}
} // namespace Stockfish::Eval::NNUE::Features

View File

@@ -28,7 +28,6 @@
#include "../nnue_common.h"
namespace Stockfish {
struct StateInfo;
class Position;
}
@@ -135,9 +134,9 @@ class HalfKAv2_hm {
static void
append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added);
// Returns whether the change stored in this StateInfo means
// Returns whether the change stored in this DirtyPiece means
// that a full accumulator refresh is required.
static bool requires_refresh(const StateInfo* st, Color perspective);
static bool requires_refresh(const DirtyPiece& dirtyPiece, Color perspective);
};
} // namespace Stockfish::Eval::NNUE::Features

View File

@@ -210,6 +210,7 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
template<typename Arch, typename Transformer>
NetworkOutput
Network<Arch, Transformer>::evaluate(const Position& pos,
AccumulatorStack& accumulatorStack,
AccumulatorCaches::Cache<FTDimensions>* cache) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
@@ -229,8 +230,9 @@ Network<Arch, Transformer>::evaluate(const Position& pos
ASSERT_ALIGNED(transformedFeatures, alignment);
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt =
featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
const auto positional = network[bucket].propagate(transformedFeatures);
return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
}
@@ -280,6 +282,7 @@ void Network<Arch, Transformer>::verify(std::string
template<typename Arch, typename Transformer>
NnueEvalTrace
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
AccumulatorStack& accumulatorStack,
AccumulatorCaches::Cache<FTDimensions>* cache) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
@@ -303,7 +306,7 @@ Network<Arch, Transformer>::trace_evaluate(const Position&
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
{
const auto materialist =
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
const auto positional = network[bucket].propagate(transformedFeatures);
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
@@ -447,14 +450,14 @@ bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
return bool(stream);
}
// Explicit template instantiation
// Explicit template instantiations
template class Network<
NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>>;
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>>;
template class Network<
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>>;
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>>;
} // namespace Stockfish::Eval::NNUE

View File

@@ -29,13 +29,16 @@
#include <utility>
#include "../memory.h"
#include "../position.h"
#include "../types.h"
#include "nnue_accumulator.h"
#include "nnue_architecture.h"
#include "nnue_feature_transformer.h"
#include "nnue_misc.h"
namespace Stockfish {
class Position;
}
namespace Stockfish::Eval::NNUE {
enum class EmbeddedNNUEType {
@@ -64,11 +67,13 @@ class Network {
bool save(const std::optional<std::string>& filename) const;
NetworkOutput evaluate(const Position& pos,
AccumulatorStack& accumulatorStack,
AccumulatorCaches::Cache<FTDimensions>* cache) const;
void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
NnueEvalTrace trace_evaluate(const Position& pos,
AccumulatorStack& accumulatorStack,
AccumulatorCaches::Cache<FTDimensions>* cache) const;
private:
@@ -100,16 +105,18 @@ class Network {
template<IndexType Size>
friend struct AccumulatorCaches::Cache;
friend class AccumulatorStack;
};
// Definitions of the network types
using SmallFeatureTransformer =
FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>;
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>;
using SmallNetworkArchitecture =
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
using BigFeatureTransformer =
FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>;
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>;
using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
using NetworkBig = Network<BigNetworkArchitecture, BigFeatureTransformer>;

View File

@@ -0,0 +1,601 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "nnue_accumulator.h"
#include <cassert>
#include <initializer_list>
#include <memory>
#include "../bitboard.h"
#include "../position.h"
#include "../types.h"
#include "nnue_architecture.h"
#include "network.h"
#include "nnue_common.h"
#include "nnue_feature_transformer.h"
namespace Stockfish::Eval::NNUE {
namespace {
template<Color Perspective,
IncUpdateDirection Direction = FORWARD,
IndexType TransformedFeatureDimensions,
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
void update_accumulator_incremental(
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
const Square ksq,
AccumulatorState& target_state,
const AccumulatorState& computed);
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void update_accumulator_refresh_cache(
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const Position& pos,
AccumulatorState& accumulatorState,
AccumulatorCaches::Cache<Dimensions>& cache);
}
void AccumulatorState::reset(const DirtyPiece& dp) noexcept {
dirtyPiece = dp;
accumulatorBig.computed.fill(false);
accumulatorSmall.computed.fill(false);
}
const AccumulatorState& AccumulatorStack::latest() const noexcept {
return m_accumulators[m_current_idx - 1];
}
AccumulatorState& AccumulatorStack::mut_latest() noexcept {
return m_accumulators[m_current_idx - 1];
}
void AccumulatorStack::reset(const Position& rootPos,
const Networks& networks,
AccumulatorCaches& caches) noexcept {
m_current_idx = 1;
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsBig,
&AccumulatorState::accumulatorBig>(
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsBig,
&AccumulatorState::accumulatorBig>(
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsSmall,
&AccumulatorState::accumulatorSmall>(
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsSmall,
&AccumulatorState::accumulatorSmall>(
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
}
void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {
assert(m_current_idx + 1 < m_accumulators.size());
m_accumulators[m_current_idx].reset(dirtyPiece);
m_current_idx++;
}
void AccumulatorStack::pop() noexcept {
assert(m_current_idx > 1);
m_current_idx--;
}
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void AccumulatorStack::evaluate(const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
evaluate_side<WHITE>(pos, featureTransformer, cache);
evaluate_side<BLACK>(pos, featureTransformer, cache);
}
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void AccumulatorStack::evaluate_side(
const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions, accPtr>();
if ((m_accumulators[last_usable_accum].*accPtr).computed[Perspective])
forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
else
{
update_accumulator_refresh_cache<Perspective>(featureTransformer, pos, mut_latest(), cache);
backward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
}
}
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
// state just before a change that requires full refresh.
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {
for (std::size_t curr_idx = m_current_idx - 1; curr_idx > 0; curr_idx--)
{
if ((m_accumulators[curr_idx].*accPtr).computed[Perspective])
return curr_idx;
if (FeatureSet::requires_refresh(m_accumulators[curr_idx].dirtyPiece, Perspective))
return curr_idx;
}
return 0;
}
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void AccumulatorStack::forward_update_incremental(
const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const std::size_t begin) noexcept {
assert(begin < m_accumulators.size());
assert((m_accumulators[begin].*accPtr).computed[Perspective]);
const Square ksq = pos.square<KING>(Perspective);
for (std::size_t next = begin + 1; next < m_current_idx; next++)
update_accumulator_incremental<Perspective>(featureTransformer, ksq, m_accumulators[next],
m_accumulators[next - 1]);
assert((latest().*accPtr).computed[Perspective]);
}
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void AccumulatorStack::backward_update_incremental(
const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const std::size_t end) noexcept {
assert(end < m_accumulators.size());
assert(end < m_current_idx);
assert((latest().*accPtr).computed[Perspective]);
const Square ksq = pos.square<KING>(Perspective);
for (std::size_t next = m_current_idx - 2; next >= end; next--)
update_accumulator_incremental<Perspective, BACKWARDS>(
featureTransformer, ksq, m_accumulators[next], m_accumulators[next + 1]);
assert((m_accumulators[end].*accPtr).computed[Perspective]);
}
// Explicit template instantiations
template void
AccumulatorStack::evaluate<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>(
const Position& pos,
const FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>&
featureTransformer,
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
template void
AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>(
const Position& pos,
const FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>&
featureTransformer,
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
namespace {
template<Color Perspective,
IncUpdateDirection Direction,
IndexType TransformedFeatureDimensions,
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
void update_accumulator_incremental(
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
const Square ksq,
AccumulatorState& target_state,
const AccumulatorState& computed) {
[[maybe_unused]] constexpr bool Forward = Direction == FORWARD;
[[maybe_unused]] constexpr bool Backwards = Direction == BACKWARDS;
assert(Forward != Backwards);
assert((computed.*accPtr).computed[Perspective]);
assert(!(target_state.*accPtr).computed[Perspective]);
// The size must be enough to contain the largest possible update.
// That might depend on the feature set and generally relies on the
// feature set's update cost calculation to be correct and never allow
// updates with more added/removed features than MaxActiveDimensions.
// In this case, the maximum size of both feature addition and removal
// is 2, since we are incrementally updating one move at a time.
FeatureSet::IndexList removed, added;
if constexpr (Forward)
FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed,
added);
else
FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);
if (removed.size() == 0 && added.size() == 0)
{
std::memcpy((target_state.*accPtr).accumulation[Perspective],
(computed.*accPtr).accumulation[Perspective],
TransformedFeatureDimensions * sizeof(BiasType));
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
(computed.*accPtr).psqtAccumulation[Perspective],
PSQTBuckets * sizeof(PSQTWeightType));
}
else
{
assert(added.size() == 1 || added.size() == 2);
assert(removed.size() == 1 || removed.size() == 2);
if (Forward)
assert(added.size() <= removed.size());
else
assert(removed.size() <= added.size());
#ifdef VECTOR
auto* accIn =
reinterpret_cast<const vec_t*>(&(computed.*accPtr).accumulation[Perspective][0]);
auto* accOut =
reinterpret_cast<vec_t*>(&(target_state.*accPtr).accumulation[Perspective][0]);
const IndexType offsetA0 = TransformedFeatureDimensions * added[0];
auto* columnA0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA0]);
const IndexType offsetR0 = TransformedFeatureDimensions * removed[0];
auto* columnR0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR0]);
if ((Forward && removed.size() == 1) || (Backwards && added.size() == 1))
{
assert(added.size() == 1 && removed.size() == 1);
for (IndexType i = 0;
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
}
else if (Forward && added.size() == 1)
{
assert(removed.size() == 2);
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
for (IndexType i = 0;
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
vec_add_16(columnR0[i], columnR1[i]));
}
else if (Backwards && removed.size() == 1)
{
assert(added.size() == 2);
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
for (IndexType i = 0;
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_add_16(vec_add_16(accIn[i], columnA0[i]),
vec_sub_16(columnA1[i], columnR0[i]));
}
else
{
assert(added.size() == 2 && removed.size() == 2);
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
for (IndexType i = 0;
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
vec_add_16(columnR0[i], columnR1[i])));
}
auto* accPsqtIn =
reinterpret_cast<const psqt_vec_t*>(&(computed.*accPtr).psqtAccumulation[Perspective][0]);
auto* accPsqtOut =
reinterpret_cast<psqt_vec_t*>(&(target_state.*accPtr).psqtAccumulation[Perspective][0]);
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
auto* columnPsqtA0 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA0]);
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
auto* columnPsqtR0 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR0]);
if ((Forward && removed.size() == 1)
|| (Backwards && added.size() == 1)) // added.size() == removed.size() == 1
{
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
++i)
accPsqtOut[i] =
vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]), columnPsqtA0[i]);
}
else if (Forward && added.size() == 1)
{
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
auto* columnPsqtR1 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
++i)
accPsqtOut[i] = vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
}
else if (Backwards && removed.size() == 1)
{
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
auto* columnPsqtA1 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
++i)
accPsqtOut[i] = vec_add_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
vec_sub_psqt_32(columnPsqtA1[i], columnPsqtR0[i]));
}
else
{
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
auto* columnPsqtA1 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
auto* columnPsqtR1 =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
++i)
accPsqtOut[i] = vec_add_psqt_32(
accPsqtIn[i], vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
}
#else
std::memcpy((target_state.*accPtr).accumulation[Perspective],
(computed.*accPtr).accumulation[Perspective],
TransformedFeatureDimensions * sizeof(BiasType));
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
(computed.*accPtr).psqtAccumulation[Perspective],
PSQTBuckets * sizeof(PSQTWeightType));
// Difference calculation for the deactivated features
for (const auto index : removed)
{
const IndexType offset = TransformedFeatureDimensions * index;
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
(target_state.*accPtr).accumulation[Perspective][i] -=
featureTransformer.weights[offset + i];
for (std::size_t i = 0; i < PSQTBuckets; ++i)
(target_state.*accPtr).psqtAccumulation[Perspective][i] -=
featureTransformer.psqtWeights[index * PSQTBuckets + i];
}
// Difference calculation for the activated features
for (const auto index : added)
{
const IndexType offset = TransformedFeatureDimensions * index;
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
(target_state.*accPtr).accumulation[Perspective][i] +=
featureTransformer.weights[offset + i];
for (std::size_t i = 0; i < PSQTBuckets; ++i)
(target_state.*accPtr).psqtAccumulation[Perspective][i] +=
featureTransformer.psqtWeights[index * PSQTBuckets + i];
}
#endif
}
(target_state.*accPtr).computed[Perspective] = true;
}
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void update_accumulator_refresh_cache(
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const Position& pos,
AccumulatorState& accumulatorState,
AccumulatorCaches::Cache<Dimensions>& cache) {
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions>;
const Square ksq = pos.square<KING>(Perspective);
auto& entry = cache[ksq][Perspective];
FeatureSet::IndexList removed, added;
for (Color c : {WHITE, BLACK})
{
for (PieceType pt = PAWN; pt <= KING; ++pt)
{
const Piece piece = make_piece(c, pt);
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
const Bitboard newBB = pos.pieces(c, pt);
Bitboard toRemove = oldBB & ~newBB;
Bitboard toAdd = newBB & ~oldBB;
while (toRemove)
{
Square sq = pop_lsb(toRemove);
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
}
while (toAdd)
{
Square sq = pop_lsb(toAdd);
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
}
}
}
auto& accumulator = accumulatorState.*accPtr;
accumulator.computed[Perspective] = true;
#ifdef VECTOR
const bool combineLast3 =
std::abs((int) removed.size() - (int) added.size()) == 1 && removed.size() + added.size() > 2;
vec_t acc[Tiling::NumRegs];
psqt_vec_t psqt[Tiling::NumPsqtRegs];
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
{
auto* accTile =
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = entryTile[k];
std::size_t i = 0;
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
{
IndexType indexR = removed[i];
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
IndexType indexA = added[i];
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
}
if (combineLast3)
{
IndexType indexR = removed[i];
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
IndexType indexA = added[i];
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
if (removed.size() > added.size())
{
IndexType indexR2 = removed[i + 1];
const IndexType offsetR2 = Dimensions * indexR2 + j * Tiling::TileHeight;
auto* columnR2 =
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(vec_add_16(acc[k], columnA[k]),
vec_add_16(columnR[k], columnR2[k]));
}
else
{
IndexType indexA2 = added[i + 1];
const IndexType offsetA2 = Dimensions * indexA2 + j * Tiling::TileHeight;
auto* columnA2 =
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]),
vec_add_16(columnA[k], columnA2[k]));
}
}
else
{
for (; i < removed.size(); ++i)
{
IndexType index = removed[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
for (; i < added.size(); ++i)
{
IndexType index = added[i];
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < Tiling::NumRegs; k++)
vec_store(&entryTile[k], acc[k]);
for (IndexType k = 0; k < Tiling::NumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
{
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
auto* entryTilePsqt =
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = entryTilePsqt[k];
for (std::size_t i = 0; i < removed.size(); ++i)
{
IndexType index = removed[i];
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
auto* columnPsqt =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
}
for (std::size_t i = 0; i < added.size(); ++i)
{
IndexType index = added[i];
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
auto* columnPsqt =
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
}
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
}
#else
for (const auto index : removed)
{
const IndexType offset = Dimensions * index;
for (IndexType j = 0; j < Dimensions; ++j)
entry.accumulation[j] -= featureTransformer.weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
}
for (const auto index : added)
{
const IndexType offset = Dimensions * index;
for (IndexType j = 0; j < Dimensions; ++j)
entry.accumulation[j] += featureTransformer.weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
}
// The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing.
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
sizeof(BiasType) * Dimensions);
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
sizeof(int32_t) * PSQTBuckets);
#endif
for (Color c : {WHITE, BLACK})
entry.byColorBB[c] = pos.pieces(c);
for (PieceType pt = PAWN; pt <= KING; ++pt)
entry.byTypeBB[pt] = pos.pieces(pt);
}
}
}

View File

@@ -21,23 +21,43 @@
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
#define NNUE_ACCUMULATOR_H_INCLUDED
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>
#include "../types.h"
#include "nnue_architecture.h"
#include "nnue_common.h"
namespace Stockfish {
class Position;
}
namespace Stockfish::Eval::NNUE {
using BiasType = std::int16_t;
using PSQTWeightType = std::int32_t;
using IndexType = std::uint32_t;
struct Networks;
template<IndexType Size>
struct alignas(CacheLineSize) Accumulator;
struct AccumulatorState;
template<IndexType TransformedFeatureDimensions,
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
class FeatureTransformer;
// Class that holds the result of affine transformation of input features
template<IndexType Size>
struct alignas(CacheLineSize) Accumulator {
std::int16_t accumulation[COLOR_NB][Size];
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
bool computed[COLOR_NB];
std::int16_t accumulation[COLOR_NB][Size];
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
std::array<bool, COLOR_NB> computed;
};
@@ -95,6 +115,69 @@ struct AccumulatorCaches {
Cache<TransformedFeatureDimensionsSmall> small;
};
struct AccumulatorState {
Accumulator<TransformedFeatureDimensionsBig> accumulatorBig;
Accumulator<TransformedFeatureDimensionsSmall> accumulatorSmall;
DirtyPiece dirtyPiece;
void reset(const DirtyPiece& dp) noexcept;
};
class AccumulatorStack {
public:
AccumulatorStack() :
m_accumulators(MAX_PLY + 1),
m_current_idx{} {}
[[nodiscard]] const AccumulatorState& latest() const noexcept;
void
reset(const Position& rootPos, const Networks& networks, AccumulatorCaches& caches) noexcept;
void push(const DirtyPiece& dirtyPiece) noexcept;
void pop() noexcept;
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
void evaluate(const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
private:
[[nodiscard]] AccumulatorState& mut_latest() noexcept;
template<Color Perspective,
IndexType Dimensions,
Accumulator<Dimensions> AccumulatorState::*accPtr>
void evaluate_side(const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
template<Color Perspective,
IndexType Dimensions,
Accumulator<Dimensions> AccumulatorState::*accPtr>
[[nodiscard]] std::size_t find_last_usable_accumulator() const noexcept;
template<Color Perspective,
IndexType Dimensions,
Accumulator<Dimensions> AccumulatorState::*accPtr>
void
forward_update_incremental(const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const std::size_t begin) noexcept;
template<Color Perspective,
IndexType Dimensions,
Accumulator<Dimensions> AccumulatorState::*accPtr>
void
backward_update_incremental(const Position& pos,
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
const std::size_t end) noexcept;
std::vector<AccumulatorState> m_accumulators;
std::size_t m_current_idx;
};
} // namespace Stockfish::Eval::NNUE
#endif // NNUE_ACCUMULATOR_H_INCLUDED

View File

@@ -279,6 +279,11 @@ inline void write_leb_128(std::ostream& stream, const IntType* values, std::size
flush();
}
enum IncUpdateDirection {
FORWARD,
BACKWARDS
};
} // namespace Stockfish::Eval::NNUE
#endif // #ifndef NNUE_COMMON_H_INCLUDED

View File

@@ -22,12 +22,9 @@
#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iosfwd>
#include <type_traits>
#include <utility>
#include "../position.h"
#include "../types.h"
@@ -41,11 +38,6 @@ using BiasType = std::int16_t;
using WeightType = std::int16_t;
using PSQTWeightType = std::int32_t;
enum IncUpdateDirection {
FORWARD,
BACKWARDS
};
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
@@ -249,15 +241,12 @@ class SIMDTiling {
// Input feature converter
template<IndexType TransformedFeatureDimensions,
Accumulator<TransformedFeatureDimensions> StateInfo::*accPtr>
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
class FeatureTransformer {
// Number of output dimensions for one side
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
private:
using Tiling = SIMDTiling<TransformedFeatureDimensions, HalfDimensions>;
public:
// Output type
using OutputType = TransformedFeatureType;
@@ -348,19 +337,21 @@ class FeatureTransformer {
// Convert input features
std::int32_t transform(const Position& pos,
AccumulatorStack& accumulatorStack,
AccumulatorCaches::Cache<HalfDimensions>* cache,
OutputType* output,
int bucket) const {
update_accumulator<WHITE>(pos, cache);
update_accumulator<BLACK>(pos, cache);
accumulatorStack.evaluate(pos, *this, *cache);
const auto& accumulatorState = accumulatorStack.latest();
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
const auto& psqtAccumulation = (accumulatorState.*accPtr).psqtAccumulation;
const auto psqt =
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
/ 2;
const auto& accumulation = (pos.state()->*accPtr).accumulation;
const auto& accumulation = (accumulatorState.*accPtr).accumulation;
for (IndexType p = 0; p < 2; ++p)
{
@@ -473,432 +464,6 @@ class FeatureTransformer {
return psqt;
} // end of function transform()
private:
// Given a computed accumulator, computes the accumulator of another position.
template<Color Perspective, IncUpdateDirection Direction = FORWARD>
void update_accumulator_incremental(const Square ksq,
StateInfo* target_state,
const StateInfo* computed) const {
[[maybe_unused]] constexpr bool Forward = Direction == FORWARD;
[[maybe_unused]] constexpr bool Backwards = Direction == BACKWARDS;
assert((computed->*accPtr).computed[Perspective]);
StateInfo* next = Forward ? computed->next : computed->previous;
assert(next != nullptr);
assert(!(next->*accPtr).computed[Perspective]);
// The size must be enough to contain the largest possible update.
// That might depend on the feature set and generally relies on the
// feature set's update cost calculation to be correct and never allow
// updates with more added/removed features than MaxActiveDimensions.
// In this case, the maximum size of both feature addition and removal
// is 2, since we are incrementally updating one move at a time.
FeatureSet::IndexList removed, added;
if constexpr (Forward)
FeatureSet::append_changed_indices<Perspective>(ksq, next->dirtyPiece, removed, added);
else
FeatureSet::append_changed_indices<Perspective>(ksq, computed->dirtyPiece, added,
removed);
if (removed.size() == 0 && added.size() == 0)
{
std::memcpy((next->*accPtr).accumulation[Perspective],
(computed->*accPtr).accumulation[Perspective],
HalfDimensions * sizeof(BiasType));
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
(computed->*accPtr).psqtAccumulation[Perspective],
PSQTBuckets * sizeof(PSQTWeightType));
}
else
{
assert(added.size() == 1 || added.size() == 2);
assert(removed.size() == 1 || removed.size() == 2);
if (Forward)
assert(added.size() <= removed.size());
else
assert(removed.size() <= added.size());
#ifdef VECTOR
auto* accIn =
reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
auto* accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
const IndexType offsetA0 = HalfDimensions * added[0];
auto* columnA0 = reinterpret_cast<const vec_t*>(&weights[offsetA0]);
const IndexType offsetR0 = HalfDimensions * removed[0];
auto* columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
if ((Forward && removed.size() == 1) || (Backwards && added.size() == 1))
{
assert(added.size() == 1 && removed.size() == 1);
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
}
else if (Forward && added.size() == 1)
{
assert(removed.size() == 2);
const IndexType offsetR1 = HalfDimensions * removed[1];
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
vec_add_16(columnR0[i], columnR1[i]));
}
else if (Backwards && removed.size() == 1)
{
assert(added.size() == 2);
const IndexType offsetA1 = HalfDimensions * added[1];
auto* columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] = vec_add_16(vec_add_16(accIn[i], columnA0[i]),
vec_sub_16(columnA1[i], columnR0[i]));
}
else
{
assert(added.size() == 2 && removed.size() == 2);
const IndexType offsetA1 = HalfDimensions * added[1];
auto* columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
const IndexType offsetR1 = HalfDimensions * removed[1];
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
accOut[i] =
vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
vec_add_16(columnR0[i], columnR1[i])));
}
auto* accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
&(computed->*accPtr).psqtAccumulation[Perspective][0]);
auto* accPsqtOut =
reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
auto* columnPsqtA0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA0]);
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
auto* columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
if ((Forward && removed.size() == 1)
|| (Backwards && added.size() == 1)) // added.size() == removed.size() == 1
{
for (std::size_t i = 0;
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
columnPsqtA0[i]);
}
else if (Forward && added.size() == 1)
{
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
auto* columnPsqtR1 =
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
for (std::size_t i = 0;
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[i] =
vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
}
else if (Backwards && removed.size() == 1)
{
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
auto* columnPsqtA1 =
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
for (std::size_t i = 0;
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[i] =
vec_add_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
vec_sub_psqt_32(columnPsqtA1[i], columnPsqtR0[i]));
}
else
{
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
auto* columnPsqtA1 =
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
auto* columnPsqtR1 =
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
for (std::size_t i = 0;
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
accPsqtOut[i] = vec_add_psqt_32(
accPsqtIn[i],
vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
}
#else
std::memcpy((next->*accPtr).accumulation[Perspective],
(computed->*accPtr).accumulation[Perspective],
HalfDimensions * sizeof(BiasType));
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
(computed->*accPtr).psqtAccumulation[Perspective],
PSQTBuckets * sizeof(PSQTWeightType));
// Difference calculation for the deactivated features
for (const auto index : removed)
{
const IndexType offset = HalfDimensions * index;
for (IndexType i = 0; i < HalfDimensions; ++i)
(next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
for (std::size_t i = 0; i < PSQTBuckets; ++i)
(next->*accPtr).psqtAccumulation[Perspective][i] -=
psqtWeights[index * PSQTBuckets + i];
}
// Difference calculation for the activated features
for (const auto index : added)
{
const IndexType offset = HalfDimensions * index;
for (IndexType i = 0; i < HalfDimensions; ++i)
(next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
for (std::size_t i = 0; i < PSQTBuckets; ++i)
(next->*accPtr).psqtAccumulation[Perspective][i] +=
psqtWeights[index * PSQTBuckets + i];
}
#endif
}
(next->*accPtr).computed[Perspective] = true;
if (next != target_state)
update_accumulator_incremental<Perspective, Direction>(ksq, target_state, next);
}
template<Color Perspective>
void update_accumulator_refresh_cache(const Position& pos,
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
assert(cache != nullptr);
Square ksq = pos.square<KING>(Perspective);
auto& entry = (*cache)[ksq][Perspective];
FeatureSet::IndexList removed, added;
for (Color c : {WHITE, BLACK})
{
for (PieceType pt = PAWN; pt <= KING; ++pt)
{
const Piece piece = make_piece(c, pt);
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
const Bitboard newBB = pos.pieces(c, pt);
Bitboard toRemove = oldBB & ~newBB;
Bitboard toAdd = newBB & ~oldBB;
while (toRemove)
{
Square sq = pop_lsb(toRemove);
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
}
while (toAdd)
{
Square sq = pop_lsb(toAdd);
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
}
}
}
auto& accumulator = pos.state()->*accPtr;
accumulator.computed[Perspective] = true;
#ifdef VECTOR
const bool combineLast3 = std::abs((int) removed.size() - (int) added.size()) == 1
&& removed.size() + added.size() > 2;
vec_t acc[Tiling::NumRegs];
psqt_vec_t psqt[Tiling::NumPsqtRegs];
for (IndexType j = 0; j < HalfDimensions / Tiling::TileHeight; ++j)
{
auto* accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = entryTile[k];
std::size_t i = 0;
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
{
IndexType indexR = removed[i];
const IndexType offsetR = HalfDimensions * indexR + j * Tiling::TileHeight;
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
IndexType indexA = added[i];
const IndexType offsetA = HalfDimensions * indexA + j * Tiling::TileHeight;
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
}
if (combineLast3)
{
IndexType indexR = removed[i];
const IndexType offsetR = HalfDimensions * indexR + j * Tiling::TileHeight;
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
IndexType indexA = added[i];
const IndexType offsetA = HalfDimensions * indexA + j * Tiling::TileHeight;
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
if (removed.size() > added.size())
{
IndexType indexR2 = removed[i + 1];
const IndexType offsetR2 = HalfDimensions * indexR2 + j * Tiling::TileHeight;
auto* columnR2 = reinterpret_cast<const vec_t*>(&weights[offsetR2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(vec_add_16(acc[k], columnA[k]),
vec_add_16(columnR[k], columnR2[k]));
}
else
{
IndexType indexA2 = added[i + 1];
const IndexType offsetA2 = HalfDimensions * indexA2 + j * Tiling::TileHeight;
auto* columnA2 = reinterpret_cast<const vec_t*>(&weights[offsetA2]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]),
vec_add_16(columnA[k], columnA2[k]));
}
}
else
{
for (; i < removed.size(); ++i)
{
IndexType index = removed[i];
const IndexType offset = HalfDimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
for (; i < added.size(); ++i)
{
IndexType index = added[i];
const IndexType offset = HalfDimensions * index + j * Tiling::TileHeight;
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < Tiling::NumRegs; k++)
vec_store(&entryTile[k], acc[k]);
for (IndexType k = 0; k < Tiling::NumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
{
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
auto* entryTilePsqt =
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = entryTilePsqt[k];
for (std::size_t i = 0; i < removed.size(); ++i)
{
IndexType index = removed[i];
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
}
for (std::size_t i = 0; i < added.size(); ++i)
{
IndexType index = added[i];
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
}
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
}
#else
for (const auto index : removed)
{
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
entry.accumulation[j] -= weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k];
}
for (const auto index : added)
{
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
entry.accumulation[j] += weights[offset + j];
for (std::size_t k = 0; k < PSQTBuckets; ++k)
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
}
// The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing.
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
sizeof(BiasType) * HalfDimensions);
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
sizeof(int32_t) * PSQTBuckets);
#endif
for (Color c : {WHITE, BLACK})
entry.byColorBB[c] = pos.pieces(c);
for (PieceType pt = PAWN; pt <= KING; ++pt)
entry.byTypeBB[pt] = pos.pieces(pt);
}
template<Color Perspective>
void update_accumulator(const Position& pos,
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
StateInfo* st = pos.state();
if ((st->*accPtr).computed[Perspective])
return; // nothing to do
// Look for a usable already computed accumulator of an earlier position.
// Always try to do an incremental update as most accumulators will be reusable.
do
{
if (FeatureSet::requires_refresh(st, Perspective) || !st->previous
|| st->previous->next != st)
{
// compute accumulator from scratch for this position
update_accumulator_refresh_cache<Perspective>(pos, cache);
if (st != pos.state())
// when computing an accumulator from scratch we can use it to
// efficiently compute the accumulator backwards, until we get to a king
// move. We expect that we will need these accumulators later anyway, so
// computing them now will save some work.
update_accumulator_incremental<Perspective, BACKWARDS>(
pos.square<KING>(Perspective), st, pos.state());
return;
}
st = st->previous;
} while (!(st->*accPtr).computed[Perspective]);
// Start from the oldest computed accumulator, update all the
// accumulators up to the current position.
update_accumulator_incremental<Perspective>(pos.square<KING>(Perspective), pos.state(), st);
}
template<IndexType Size>
friend struct AccumulatorCaches::Cache;
alignas(CacheLineSize) BiasType biases[HalfDimensions];
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];

View File

@@ -120,9 +120,12 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
format_cp_compact(value, &board[y + 2][x + 2], pos);
};
AccumulatorStack accumulators;
accumulators.reset(pos, networks, caches);
// We estimate the value of each piece by doing a differential evaluation from
// the current base eval, simulating the removal of the piece from its square.
auto [psqt, positional] = networks.big.evaluate(pos, &caches.big);
auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches.big);
Value base = psqt + positional;
base = pos.side_to_move() == WHITE ? base : -base;
@@ -135,18 +138,15 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
if (pc != NO_PIECE && type_of(pc) != KING)
{
auto st = pos.state();
pos.remove_piece(sq);
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
accumulators.reset(pos, networks, caches);
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
Value eval = psqt + positional;
eval = pos.side_to_move() == WHITE ? eval : -eval;
v = base - eval;
pos.put_piece(pc, sq);
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
}
writeSquare(f, r, pc, v);
@@ -157,7 +157,8 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
ss << board[row] << '\n';
ss << '\n';
auto t = networks.big.trace_evaluate(pos, &caches.big);
accumulators.reset(pos, networks, caches);
auto t = networks.big.trace_evaluate(pos, accumulators, &caches.big);
ss << " NNUE network contributions "
<< (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl

View File

@@ -34,7 +34,6 @@ template<bool Root>
uint64_t perft(Position& pos, Depth depth) {
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
uint64_t cnt, nodes = 0;
const bool leaf = (depth == 2);

View File

@@ -34,7 +34,6 @@
#include "bitboard.h"
#include "misc.h"
#include "movegen.h"
#include "nnue/nnue_common.h"
#include "syzygy/tbprobe.h"
#include "tt.h"
#include "uci.h"
@@ -83,7 +82,6 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
if (int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
{
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
Position p;
p.set(pos.fen(), pos.is_chess960(), &st);
@@ -685,10 +683,10 @@ bool Position::gives_check(Move m) const {
// moves should be filtered out before this function is called.
// If a pointer to the TT table is passed, the entry for the new position
// will be prefetched
void Position::do_move(Move m,
StateInfo& newSt,
bool givesCheck,
const TranspositionTable* tt = nullptr) {
DirtyPiece Position::do_move(Move m,
StateInfo& newSt,
bool givesCheck,
const TranspositionTable* tt = nullptr) {
assert(m.is_ok());
assert(&newSt != st);
@@ -709,11 +707,7 @@ void Position::do_move(Move m,
++st->rule50;
++st->pliesFromNull;
// Used by NNUE
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
auto& dp = st->dirtyPiece;
DirtyPiece dp;
dp.dirty_num = 1;
Color us = sideToMove;
@@ -733,7 +727,7 @@ void Position::do_move(Move m,
assert(captured == make_piece(us, ROOK));
Square rfrom, rto;
do_castling<true>(us, from, to, rfrom, rto);
do_castling<true>(us, from, to, rfrom, rto, &dp);
k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
@@ -906,6 +900,8 @@ void Position::do_move(Move m,
}
assert(pos_is_ok());
return dp;
}
@@ -975,23 +971,25 @@ void Position::undo_move(Move m) {
// Helper used to do/undo a castling move. This is a bit
// tricky in Chess960 where from/to squares can overlap.
template<bool Do>
void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
void Position::do_castling(
Color us, Square from, Square& to, Square& rfrom, Square& rto, DirtyPiece* const dp) {
bool kingSide = to > from;
rfrom = to; // Castling is encoded as "king captures friendly rook"
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
assert(!Do || dp);
if (Do)
{
auto& dp = st->dirtyPiece;
dp.piece[0] = make_piece(us, KING);
dp.from[0] = from;
dp.to[0] = to;
dp.piece[1] = make_piece(us, ROOK);
dp.from[1] = rfrom;
dp.to[1] = rto;
dp.dirty_num = 2;
dp->piece[0] = make_piece(us, KING);
dp->from[0] = from;
dp->to[0] = to;
dp->piece[1] = make_piece(us, ROOK);
dp->from[1] = rfrom;
dp->to[1] = rto;
dp->dirty_num = 2;
}
// Remove both pieces first since squares could overlap in Chess960
@@ -1011,7 +1009,7 @@ void Position::do_null_move(StateInfo& newSt, const TranspositionTable& tt) {
assert(!checkers());
assert(&newSt != st);
std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig));
std::memcpy(&newSt, st, sizeof(StateInfo));
newSt.previous = st;
st->next = &newSt;
@@ -1026,11 +1024,6 @@ void Position::do_null_move(StateInfo& newSt, const TranspositionTable& tt) {
st->key ^= Zobrist::side;
prefetch(tt.first_entry(key()));
st->dirtyPiece.dirty_num = 0;
st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
st->pliesFromNull = 0;
sideToMove = ~sideToMove;

View File

@@ -26,8 +26,6 @@
#include <string>
#include "bitboard.h"
#include "nnue/nnue_accumulator.h"
#include "nnue/nnue_architecture.h"
#include "types.h"
namespace Stockfish {
@@ -61,11 +59,6 @@ struct StateInfo {
Bitboard checkSquares[PIECE_TYPE_NB];
Piece capturedPiece;
int repetition;
// Used by NNUE
DirtyPiece dirtyPiece;
Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsBig> accumulatorBig;
Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsSmall> accumulatorSmall;
};
@@ -140,11 +133,11 @@ class Position {
Piece captured_piece() const;
// Doing and undoing moves
void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
void do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
void undo_move(Move m);
void do_null_move(StateInfo& newSt, const TranspositionTable& tt);
void undo_null_move();
void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
DirtyPiece do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
void undo_move(Move m);
void do_null_move(StateInfo& newSt, const TranspositionTable& tt);
void undo_null_move();
// Static Exchange Evaluation
bool see_ge(Move m, int threshold = 0) const;
@@ -187,7 +180,12 @@ class Position {
// Other helpers
void move_piece(Square from, Square to);
template<bool Do>
void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
void do_castling(Color us,
Square from,
Square& to,
Square& rfrom,
Square& rto,
DirtyPiece* const dp = nullptr);
template<bool AfterMove>
Key adjust_key50(Key k) const;

View File

@@ -41,7 +41,6 @@
#include "movepick.h"
#include "nnue/network.h"
#include "nnue/nnue_accumulator.h"
#include "nnue/nnue_common.h"
#include "position.h"
#include "syzygy/tbprobe.h"
#include "thread.h"
@@ -197,6 +196,8 @@ void Search::Worker::ensure_network_replicated() {
void Search::Worker::start_searching() {
accumulatorStack.reset(rootPos, networks[numaAccessToken], refreshTable);
// Non-main threads go directly to iterative_deepening()
if (!is_mainthread())
{
@@ -552,6 +553,26 @@ void Search::Worker::iterative_deepening() {
skill.best ? skill.best : skill.pick_best(rootMoves, multiPV)));
}
void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st) {
do_move(pos, move, st, pos.gives_check(move));
}
void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck) {
DirtyPiece dp = pos.do_move(move, st, givesCheck, &tt);
accumulatorStack.push(dp);
}
void Search::Worker::do_null_move(Position& pos, StateInfo& st) { pos.do_null_move(st, tt); }
void Search::Worker::undo_move(Position& pos, const Move move) {
pos.undo_move(move);
accumulatorStack.pop();
}
void Search::Worker::undo_null_move(Position& pos) { pos.undo_null_move(); }
// Reset histories, usually before a new game
void Search::Worker::clear() {
mainHistory.fill(66);
@@ -614,7 +635,6 @@ Value Search::Worker::search(
Move pv[MAX_PLY + 1];
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
Key posKey;
Move move, excludedMove, bestMove;
@@ -859,11 +879,11 @@ Value Search::Worker::search(
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
ss->continuationCorrectionHistory = &thisThread->continuationCorrectionHistory[NO_PIECE][0];
pos.do_null_move(st, tt);
do_null_move(pos, st);
Value nullValue = -search<NonPV>(pos, ss + 1, -beta, -beta + 1, depth - R, false);
pos.undo_null_move();
undo_null_move(pos);
// Do not return unproven mate or TB scores
if (nullValue >= beta && !is_win(nullValue))
@@ -925,7 +945,7 @@ Value Search::Worker::search(
movedPiece = pos.moved_piece(move);
pos.do_move(move, st, &tt);
do_move(pos, move, st);
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
ss->currentMove = move;
@@ -943,7 +963,7 @@ Value Search::Worker::search(
value = -search<NonPV>(pos, ss + 1, -probCutBeta, -probCutBeta + 1, probCutDepth,
!cutNode);
pos.undo_move(move);
undo_move(pos, move);
if (value >= probCutBeta)
{
@@ -1165,7 +1185,7 @@ moves_loop: // When in check, search starts here
}
// Step 16. Make the move
pos.do_move(move, st, givesCheck, &tt);
do_move(pos, move, st, givesCheck);
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
// Add extension to new depth
@@ -1290,7 +1310,7 @@ moves_loop: // When in check, search starts here
}
// Step 19. Undo move
pos.undo_move(move);
undo_move(pos, move);
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
@@ -1510,7 +1530,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
Move pv[MAX_PLY + 1];
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
Key posKey;
Move move, bestMove;
@@ -1674,7 +1693,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
// Step 7. Make and search the move
Piece movedPiece = pos.moved_piece(move);
pos.do_move(move, st, givesCheck, &tt);
do_move(pos, move, st, givesCheck);
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
// Update the current move
@@ -1685,7 +1704,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
&thisThread->continuationCorrectionHistory[movedPiece][move.to_sq()];
value = -qsearch<nodeType>(pos, ss + 1, -beta, -alpha);
pos.undo_move(move);
undo_move(pos, move);
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
@@ -1752,7 +1771,7 @@ TimePoint Search::Worker::elapsed() const {
TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); }
Value Search::Worker::evaluate(const Position& pos) {
return Eval::evaluate(networks[numaAccessToken], pos, refreshTable,
return Eval::evaluate(networks[numaAccessToken], pos, accumulatorStack, refreshTable,
optimism[pos.side_to_move()]);
}
@@ -2178,7 +2197,6 @@ void SearchManager::pv(Search::Worker& worker,
bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) {
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
assert(pv.size() == 1);
if (pv[0] == Move::none())

View File

@@ -295,6 +295,12 @@ class Worker {
private:
void iterative_deepening();
void do_move(Position& pos, const Move move, StateInfo& st);
void do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck);
void do_null_move(Position& pos, StateInfo& st);
void undo_move(Position& pos, const Move move);
void undo_null_move(Position& pos);
// This is the main search function, for both PV and non-PV nodes
template<NodeType nodeType>
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
@@ -347,6 +353,7 @@ class Worker {
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
// Used by NNUE
Eval::NNUE::AccumulatorStack accumulatorStack;
Eval::NNUE::AccumulatorCaches refreshTable;
friend class Stockfish::ThreadPool;