mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-06 10:53:50 +08:00
Refactor accumulator storage/updates
Passed Non-regression STC: LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 115840 W: 29983 L: 29854 D: 56003 Ptnml(0-2): 338, 12990, 31149, 13091, 352 https://tests.stockfishchess.org/tests/view/67d0a044166a3e8781d84223 closes https://github.com/official-stockfish/Stockfish/pull/5927 No functional change
This commit is contained in:
@@ -55,7 +55,8 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
|
||||
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
|
||||
misc.cpp movegen.cpp movepick.cpp position.cpp \
|
||||
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
||||
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp
|
||||
nnue/nnue_accumulator.cpp nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp \
|
||||
engine.cpp score.cpp memory.cpp
|
||||
|
||||
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \
|
||||
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
|
||||
|
||||
@@ -54,21 +54,22 @@ bool Eval::use_smallnet(const Position& pos) {
|
||||
// of the position from the point of view of the side to move.
|
||||
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorStack& accumulators,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism) {
|
||||
|
||||
assert(!pos.checkers());
|
||||
|
||||
bool smallNet = use_smallnet(pos);
|
||||
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small)
|
||||
: networks.big.evaluate(pos, &caches.big);
|
||||
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, accumulators, &caches.small)
|
||||
: networks.big.evaluate(pos, accumulators, &caches.big);
|
||||
|
||||
Value nnue = (125 * psqt + 131 * positional) / 128;
|
||||
|
||||
// Re-evaluate the position when higher eval accuracy is worth the time spent
|
||||
if (smallNet && (std::abs(nnue) < 236))
|
||||
{
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
|
||||
nnue = (125 * psqt + 131 * positional) / 128;
|
||||
smallNet = false;
|
||||
}
|
||||
@@ -99,7 +100,10 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
if (pos.checkers())
|
||||
return "Final evaluation: none (in check)";
|
||||
|
||||
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
|
||||
Eval::NNUE::AccumulatorStack accumulators;
|
||||
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
|
||||
|
||||
accumulators.reset(pos, networks, *caches);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
|
||||
@@ -107,12 +111,12 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
|
||||
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
|
||||
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, &caches->big);
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches->big);
|
||||
Value v = psqt + positional;
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
|
||||
|
||||
v = evaluate(networks, pos, *caches, VALUE_ZERO);
|
||||
v = evaluate(networks, pos, accumulators, *caches, VALUE_ZERO);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
|
||||
ss << " [with scaled NNUE, ...]";
|
||||
|
||||
@@ -39,6 +39,7 @@ namespace Eval {
|
||||
namespace NNUE {
|
||||
struct Networks;
|
||||
struct AccumulatorCaches;
|
||||
class AccumulatorStack;
|
||||
}
|
||||
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
|
||||
@@ -47,6 +48,7 @@ int simple_eval(const Position& pos, Color c);
|
||||
bool use_smallnet(const Position& pos);
|
||||
Value evaluate(const NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorStack& accumulators,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism);
|
||||
} // namespace Eval
|
||||
|
||||
@@ -77,8 +77,8 @@ template void HalfKAv2_hm::append_changed_indices<BLACK>(Square ksq,
|
||||
IndexList& removed,
|
||||
IndexList& added);
|
||||
|
||||
bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
|
||||
return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
|
||||
bool HalfKAv2_hm::requires_refresh(const DirtyPiece& dirtyPiece, Color perspective) {
|
||||
return dirtyPiece.piece[0] == make_piece(perspective, KING);
|
||||
}
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Stockfish {
|
||||
struct StateInfo;
|
||||
class Position;
|
||||
}
|
||||
|
||||
@@ -135,9 +134,9 @@ class HalfKAv2_hm {
|
||||
static void
|
||||
append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added);
|
||||
|
||||
// Returns whether the change stored in this StateInfo means
|
||||
// Returns whether the change stored in this DirtyPiece means
|
||||
// that a full accumulator refresh is required.
|
||||
static bool requires_refresh(const StateInfo* st, Color perspective);
|
||||
static bool requires_refresh(const DirtyPiece& dirtyPiece, Color perspective);
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
@@ -210,6 +210,7 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
|
||||
template<typename Arch, typename Transformer>
|
||||
NetworkOutput
|
||||
Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
@@ -229,8 +230,9 @@ Network<Arch, Transformer>::evaluate(const Position& pos
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt =
|
||||
featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
|
||||
const auto positional = network[bucket].propagate(transformedFeatures);
|
||||
return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
|
||||
}
|
||||
@@ -280,6 +282,7 @@ void Network<Arch, Transformer>::verify(std::string
|
||||
template<typename Arch, typename Transformer>
|
||||
NnueEvalTrace
|
||||
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
@@ -303,7 +306,7 @@ Network<Arch, Transformer>::trace_evaluate(const Position&
|
||||
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
|
||||
{
|
||||
const auto materialist =
|
||||
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
|
||||
featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
|
||||
const auto positional = network[bucket].propagate(transformedFeatures);
|
||||
|
||||
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
|
||||
@@ -447,14 +450,14 @@ bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
|
||||
return bool(stream);
|
||||
}
|
||||
|
||||
// Explicit template instantiation
|
||||
// Explicit template instantiations
|
||||
|
||||
template class Network<
|
||||
NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>>;
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>>;
|
||||
|
||||
template class Network<
|
||||
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>>;
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>>;
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -29,13 +29,16 @@
|
||||
#include <utility>
|
||||
|
||||
#include "../memory.h"
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
#include "nnue_accumulator.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_misc.h"
|
||||
|
||||
namespace Stockfish {
|
||||
class Position;
|
||||
}
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
enum class EmbeddedNNUEType {
|
||||
@@ -64,11 +67,13 @@ class Network {
|
||||
bool save(const std::optional<std::string>& filename) const;
|
||||
|
||||
NetworkOutput evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
|
||||
|
||||
void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
|
||||
private:
|
||||
@@ -100,16 +105,18 @@ class Network {
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
|
||||
friend class AccumulatorStack;
|
||||
};
|
||||
|
||||
// Definitions of the network types
|
||||
using SmallFeatureTransformer =
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>;
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>;
|
||||
using SmallNetworkArchitecture =
|
||||
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
|
||||
|
||||
using BigFeatureTransformer =
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>;
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>;
|
||||
using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
|
||||
|
||||
using NetworkBig = Network<BigNetworkArchitecture, BigFeatureTransformer>;
|
||||
|
||||
601
src/nnue/nnue_accumulator.cpp
Normal file
601
src/nnue/nnue_accumulator.cpp
Normal file
@@ -0,0 +1,601 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "nnue_accumulator.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
|
||||
#include "../bitboard.h"
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "network.h"
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
namespace {
|
||||
|
||||
template<Color Perspective,
|
||||
IncUpdateDirection Direction = FORWARD,
|
||||
IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_incremental(
|
||||
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed);
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_refresh_cache(
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache);
|
||||
|
||||
}
|
||||
|
||||
void AccumulatorState::reset(const DirtyPiece& dp) noexcept {
|
||||
dirtyPiece = dp;
|
||||
accumulatorBig.computed.fill(false);
|
||||
accumulatorSmall.computed.fill(false);
|
||||
}
|
||||
|
||||
const AccumulatorState& AccumulatorStack::latest() const noexcept {
|
||||
return m_accumulators[m_current_idx - 1];
|
||||
}
|
||||
|
||||
AccumulatorState& AccumulatorStack::mut_latest() noexcept {
|
||||
return m_accumulators[m_current_idx - 1];
|
||||
}
|
||||
|
||||
void AccumulatorStack::reset(const Position& rootPos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches) noexcept {
|
||||
m_current_idx = 1;
|
||||
|
||||
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsBig,
|
||||
&AccumulatorState::accumulatorBig>(
|
||||
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
|
||||
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsBig,
|
||||
&AccumulatorState::accumulatorBig>(
|
||||
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
|
||||
|
||||
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsSmall,
|
||||
&AccumulatorState::accumulatorSmall>(
|
||||
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
|
||||
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsSmall,
|
||||
&AccumulatorState::accumulatorSmall>(
|
||||
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
|
||||
}
|
||||
|
||||
void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {
|
||||
assert(m_current_idx + 1 < m_accumulators.size());
|
||||
m_accumulators[m_current_idx].reset(dirtyPiece);
|
||||
m_current_idx++;
|
||||
}
|
||||
|
||||
void AccumulatorStack::pop() noexcept {
|
||||
assert(m_current_idx > 1);
|
||||
m_current_idx--;
|
||||
}
|
||||
|
||||
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
|
||||
evaluate_side<WHITE>(pos, featureTransformer, cache);
|
||||
evaluate_side<BLACK>(pos, featureTransformer, cache);
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::evaluate_side(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
|
||||
const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions, accPtr>();
|
||||
|
||||
if ((m_accumulators[last_usable_accum].*accPtr).computed[Perspective])
|
||||
forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
|
||||
|
||||
else
|
||||
{
|
||||
update_accumulator_refresh_cache<Perspective>(featureTransformer, pos, mut_latest(), cache);
|
||||
backward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
|
||||
}
|
||||
}
|
||||
|
||||
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
|
||||
// state just before a change that requires full refresh.
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {
|
||||
|
||||
for (std::size_t curr_idx = m_current_idx - 1; curr_idx > 0; curr_idx--)
|
||||
{
|
||||
if ((m_accumulators[curr_idx].*accPtr).computed[Perspective])
|
||||
return curr_idx;
|
||||
|
||||
if (FeatureSet::requires_refresh(m_accumulators[curr_idx].dirtyPiece, Perspective))
|
||||
return curr_idx;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::forward_update_incremental(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t begin) noexcept {
|
||||
|
||||
assert(begin < m_accumulators.size());
|
||||
assert((m_accumulators[begin].*accPtr).computed[Perspective]);
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
for (std::size_t next = begin + 1; next < m_current_idx; next++)
|
||||
update_accumulator_incremental<Perspective>(featureTransformer, ksq, m_accumulators[next],
|
||||
m_accumulators[next - 1]);
|
||||
|
||||
assert((latest().*accPtr).computed[Perspective]);
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::backward_update_incremental(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t end) noexcept {
|
||||
|
||||
assert(end < m_accumulators.size());
|
||||
assert(end < m_current_idx);
|
||||
assert((latest().*accPtr).computed[Perspective]);
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
for (std::size_t next = m_current_idx - 2; next >= end; next--)
|
||||
update_accumulator_incremental<Perspective, BACKWARDS>(
|
||||
featureTransformer, ksq, m_accumulators[next], m_accumulators[next + 1]);
|
||||
|
||||
assert((m_accumulators[end].*accPtr).computed[Perspective]);
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template void
|
||||
AccumulatorStack::evaluate<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>&
|
||||
featureTransformer,
|
||||
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
|
||||
template void
|
||||
AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>&
|
||||
featureTransformer,
|
||||
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
template<Color Perspective,
|
||||
IncUpdateDirection Direction,
|
||||
IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_incremental(
|
||||
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed) {
|
||||
[[maybe_unused]] constexpr bool Forward = Direction == FORWARD;
|
||||
[[maybe_unused]] constexpr bool Backwards = Direction == BACKWARDS;
|
||||
|
||||
assert(Forward != Backwards);
|
||||
|
||||
assert((computed.*accPtr).computed[Perspective]);
|
||||
assert(!(target_state.*accPtr).computed[Perspective]);
|
||||
|
||||
// The size must be enough to contain the largest possible update.
|
||||
// That might depend on the feature set and generally relies on the
|
||||
// feature set's update cost calculation to be correct and never allow
|
||||
// updates with more added/removed features than MaxActiveDimensions.
|
||||
// In this case, the maximum size of both feature addition and removal
|
||||
// is 2, since we are incrementally updating one move at a time.
|
||||
FeatureSet::IndexList removed, added;
|
||||
if constexpr (Forward)
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed,
|
||||
added);
|
||||
else
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);
|
||||
|
||||
if (removed.size() == 0 && added.size() == 0)
|
||||
{
|
||||
std::memcpy((target_state.*accPtr).accumulation[Perspective],
|
||||
(computed.*accPtr).accumulation[Perspective],
|
||||
TransformedFeatureDimensions * sizeof(BiasType));
|
||||
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
|
||||
(computed.*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 1 || added.size() == 2);
|
||||
assert(removed.size() == 1 || removed.size() == 2);
|
||||
|
||||
if (Forward)
|
||||
assert(added.size() <= removed.size());
|
||||
else
|
||||
assert(removed.size() <= added.size());
|
||||
|
||||
#ifdef VECTOR
|
||||
auto* accIn =
|
||||
reinterpret_cast<const vec_t*>(&(computed.*accPtr).accumulation[Perspective][0]);
|
||||
auto* accOut =
|
||||
reinterpret_cast<vec_t*>(&(target_state.*accPtr).accumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetA0 = TransformedFeatureDimensions * added[0];
|
||||
auto* columnA0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA0]);
|
||||
const IndexType offsetR0 = TransformedFeatureDimensions * removed[0];
|
||||
auto* columnR0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1) || (Backwards && added.size() == 1))
|
||||
{
|
||||
assert(added.size() == 1 && removed.size() == 1);
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
assert(removed.size() == 2);
|
||||
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
assert(added.size() == 2);
|
||||
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_sub_16(columnA1[i], columnR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 2 && removed.size() == 2);
|
||||
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
|
||||
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i])));
|
||||
}
|
||||
|
||||
auto* accPsqtIn =
|
||||
reinterpret_cast<const psqt_vec_t*>(&(computed.*accPtr).psqtAccumulation[Perspective][0]);
|
||||
auto* accPsqtOut =
|
||||
reinterpret_cast<psqt_vec_t*>(&(target_state.*accPtr).psqtAccumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
|
||||
auto* columnPsqtA0 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA0]);
|
||||
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
|
||||
auto* columnPsqtR0 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1)
|
||||
|| (Backwards && added.size() == 1)) // added.size() == removed.size() == 1
|
||||
{
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] =
|
||||
vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]), columnPsqtA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_sub_psqt_32(columnPsqtA1[i], columnPsqtR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(
|
||||
accPsqtIn[i], vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
|
||||
}
|
||||
#else
|
||||
std::memcpy((target_state.*accPtr).accumulation[Perspective],
|
||||
(computed.*accPtr).accumulation[Perspective],
|
||||
TransformedFeatureDimensions * sizeof(BiasType));
|
||||
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
|
||||
(computed.*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = TransformedFeatureDimensions * index;
|
||||
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
|
||||
(target_state.*accPtr).accumulation[Perspective][i] -=
|
||||
featureTransformer.weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(target_state.*accPtr).psqtAccumulation[Perspective][i] -=
|
||||
featureTransformer.psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = TransformedFeatureDimensions * index;
|
||||
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
|
||||
(target_state.*accPtr).accumulation[Perspective][i] +=
|
||||
featureTransformer.weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(target_state.*accPtr).psqtAccumulation[Perspective][i] +=
|
||||
featureTransformer.psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
(target_state.*accPtr).computed[Perspective] = true;
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_refresh_cache(
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) {
|
||||
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions>;
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
auto& entry = cache[ksq][Perspective];
|
||||
FeatureSet::IndexList removed, added;
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto& accumulator = accumulatorState.*accPtr;
|
||||
accumulator.computed[Perspective] = true;
|
||||
|
||||
#ifdef VECTOR
|
||||
const bool combineLast3 =
|
||||
std::abs((int) removed.size() - (int) added.size()) == 1 && removed.size() + added.size() > 2;
|
||||
vec_t acc[Tiling::NumRegs];
|
||||
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
||||
|
||||
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
|
||||
{
|
||||
auto* accTile =
|
||||
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
|
||||
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
std::size_t i = 0;
|
||||
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
|
||||
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
|
||||
}
|
||||
if (combineLast3)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
|
||||
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
|
||||
|
||||
if (removed.size() > added.size())
|
||||
{
|
||||
IndexType indexR2 = removed[i + 1];
|
||||
const IndexType offsetR2 = Dimensions * indexR2 + j * Tiling::TileHeight;
|
||||
auto* columnR2 =
|
||||
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(vec_add_16(acc[k], columnA[k]),
|
||||
vec_add_16(columnR[k], columnR2[k]));
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexType indexA2 = added[i + 1];
|
||||
const IndexType offsetA2 = Dimensions * indexA2 + j * Tiling::TileHeight;
|
||||
auto* columnA2 =
|
||||
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]),
|
||||
vec_add_16(columnA[k], columnA2[k]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
for (; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
|
||||
{
|
||||
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
|
||||
auto* entryTilePsqt =
|
||||
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = entryTilePsqt[k];
|
||||
|
||||
for (std::size_t i = 0; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
auto* columnPsqt =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
for (std::size_t i = 0; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
auto* columnPsqt =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = Dimensions * index;
|
||||
for (IndexType j = 0; j < Dimensions; ++j)
|
||||
entry.accumulation[j] -= featureTransformer.weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = Dimensions * index;
|
||||
for (IndexType j = 0; j < Dimensions; ++j)
|
||||
entry.accumulation[j] += featureTransformer.weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
|
||||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing.
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
|
||||
sizeof(BiasType) * Dimensions);
|
||||
|
||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
|
||||
sizeof(int32_t) * PSQTBuckets);
|
||||
#endif
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
entry.byColorBB[c] = pos.pieces(c);
|
||||
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
entry.byTypeBB[pt] = pos.pieces(pt);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -21,23 +21,43 @@
|
||||
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
|
||||
#define NNUE_ACCUMULATOR_H_INCLUDED
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "../types.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_common.h"
|
||||
|
||||
namespace Stockfish {
|
||||
class Position;
|
||||
}
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
struct Networks;
|
||||
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Accumulator;
|
||||
|
||||
struct AccumulatorState;
|
||||
|
||||
template<IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
class FeatureTransformer;
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[COLOR_NB][Size];
|
||||
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
bool computed[COLOR_NB];
|
||||
std::int16_t accumulation[COLOR_NB][Size];
|
||||
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
std::array<bool, COLOR_NB> computed;
|
||||
};
|
||||
|
||||
|
||||
@@ -95,6 +115,69 @@ struct AccumulatorCaches {
|
||||
Cache<TransformedFeatureDimensionsSmall> small;
|
||||
};
|
||||
|
||||
|
||||
struct AccumulatorState {
|
||||
Accumulator<TransformedFeatureDimensionsBig> accumulatorBig;
|
||||
Accumulator<TransformedFeatureDimensionsSmall> accumulatorSmall;
|
||||
DirtyPiece dirtyPiece;
|
||||
|
||||
void reset(const DirtyPiece& dp) noexcept;
|
||||
};
|
||||
|
||||
|
||||
class AccumulatorStack {
|
||||
public:
|
||||
AccumulatorStack() :
|
||||
m_accumulators(MAX_PLY + 1),
|
||||
m_current_idx{} {}
|
||||
|
||||
[[nodiscard]] const AccumulatorState& latest() const noexcept;
|
||||
|
||||
void
|
||||
reset(const Position& rootPos, const Networks& networks, AccumulatorCaches& caches) noexcept;
|
||||
void push(const DirtyPiece& dirtyPiece) noexcept;
|
||||
void pop() noexcept;
|
||||
|
||||
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
|
||||
private:
|
||||
[[nodiscard]] AccumulatorState& mut_latest() noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void evaluate_side(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
[[nodiscard]] std::size_t find_last_usable_accumulator() const noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void
|
||||
forward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t begin) noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void
|
||||
backward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t end) noexcept;
|
||||
|
||||
std::vector<AccumulatorState> m_accumulators;
|
||||
std::size_t m_current_idx;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // NNUE_ACCUMULATOR_H_INCLUDED
|
||||
|
||||
@@ -279,6 +279,11 @@ inline void write_leb_128(std::ostream& stream, const IntType* values, std::size
|
||||
flush();
|
||||
}
|
||||
|
||||
enum IncUpdateDirection {
|
||||
FORWARD,
|
||||
BACKWARDS
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
||||
|
||||
@@ -22,12 +22,9 @@
|
||||
#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iosfwd>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
@@ -41,11 +38,6 @@ using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
|
||||
enum IncUpdateDirection {
|
||||
FORWARD,
|
||||
BACKWARDS
|
||||
};
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
@@ -249,15 +241,12 @@ class SIMDTiling {
|
||||
|
||||
// Input feature converter
|
||||
template<IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> StateInfo::*accPtr>
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
class FeatureTransformer {
|
||||
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
|
||||
|
||||
private:
|
||||
using Tiling = SIMDTiling<TransformedFeatureDimensions, HalfDimensions>;
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
@@ -348,19 +337,21 @@ class FeatureTransformer {
|
||||
|
||||
// Convert input features
|
||||
std::int32_t transform(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
OutputType* output,
|
||||
int bucket) const {
|
||||
update_accumulator<WHITE>(pos, cache);
|
||||
update_accumulator<BLACK>(pos, cache);
|
||||
|
||||
accumulatorStack.evaluate(pos, *this, *cache);
|
||||
const auto& accumulatorState = accumulatorStack.latest();
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
|
||||
const auto& psqtAccumulation = (accumulatorState.*accPtr).psqtAccumulation;
|
||||
const auto psqt =
|
||||
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
|
||||
/ 2;
|
||||
|
||||
const auto& accumulation = (pos.state()->*accPtr).accumulation;
|
||||
const auto& accumulation = (accumulatorState.*accPtr).accumulation;
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
@@ -473,432 +464,6 @@ class FeatureTransformer {
|
||||
return psqt;
|
||||
} // end of function transform()
|
||||
|
||||
private:
|
||||
// Given a computed accumulator, computes the accumulator of another position.
|
||||
template<Color Perspective, IncUpdateDirection Direction = FORWARD>
|
||||
void update_accumulator_incremental(const Square ksq,
|
||||
StateInfo* target_state,
|
||||
const StateInfo* computed) const {
|
||||
[[maybe_unused]] constexpr bool Forward = Direction == FORWARD;
|
||||
[[maybe_unused]] constexpr bool Backwards = Direction == BACKWARDS;
|
||||
assert((computed->*accPtr).computed[Perspective]);
|
||||
|
||||
StateInfo* next = Forward ? computed->next : computed->previous;
|
||||
|
||||
assert(next != nullptr);
|
||||
assert(!(next->*accPtr).computed[Perspective]);
|
||||
|
||||
// The size must be enough to contain the largest possible update.
|
||||
// That might depend on the feature set and generally relies on the
|
||||
// feature set's update cost calculation to be correct and never allow
|
||||
// updates with more added/removed features than MaxActiveDimensions.
|
||||
// In this case, the maximum size of both feature addition and removal
|
||||
// is 2, since we are incrementally updating one move at a time.
|
||||
FeatureSet::IndexList removed, added;
|
||||
if constexpr (Forward)
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, next->dirtyPiece, removed, added);
|
||||
else
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, computed->dirtyPiece, added,
|
||||
removed);
|
||||
|
||||
if (removed.size() == 0 && added.size() == 0)
|
||||
{
|
||||
std::memcpy((next->*accPtr).accumulation[Perspective],
|
||||
(computed->*accPtr).accumulation[Perspective],
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
|
||||
(computed->*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 1 || added.size() == 2);
|
||||
assert(removed.size() == 1 || removed.size() == 2);
|
||||
if (Forward)
|
||||
assert(added.size() <= removed.size());
|
||||
else
|
||||
assert(removed.size() <= added.size());
|
||||
|
||||
#ifdef VECTOR
|
||||
auto* accIn =
|
||||
reinterpret_cast<const vec_t*>(&(computed->*accPtr).accumulation[Perspective][0]);
|
||||
auto* accOut = reinterpret_cast<vec_t*>(&(next->*accPtr).accumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetA0 = HalfDimensions * added[0];
|
||||
auto* columnA0 = reinterpret_cast<const vec_t*>(&weights[offsetA0]);
|
||||
const IndexType offsetR0 = HalfDimensions * removed[0];
|
||||
auto* columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1) || (Backwards && added.size() == 1))
|
||||
{
|
||||
assert(added.size() == 1 && removed.size() == 1);
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
assert(removed.size() == 2);
|
||||
const IndexType offsetR1 = HalfDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
assert(added.size() == 2);
|
||||
const IndexType offsetA1 = HalfDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
|
||||
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_sub_16(columnA1[i], columnR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 2 && removed.size() == 2);
|
||||
const IndexType offsetA1 = HalfDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&weights[offsetA1]);
|
||||
const IndexType offsetR1 = HalfDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] =
|
||||
vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i])));
|
||||
}
|
||||
|
||||
auto* accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
|
||||
&(computed->*accPtr).psqtAccumulation[Perspective][0]);
|
||||
auto* accPsqtOut =
|
||||
reinterpret_cast<psqt_vec_t*>(&(next->*accPtr).psqtAccumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
|
||||
auto* columnPsqtA0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA0]);
|
||||
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
|
||||
auto* columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1)
|
||||
|| (Backwards && added.size() == 1)) // added.size() == removed.size() == 1
|
||||
{
|
||||
for (std::size_t i = 0;
|
||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]),
|
||||
columnPsqtA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0;
|
||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||
accPsqtOut[i] =
|
||||
vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
|
||||
|
||||
for (std::size_t i = 0;
|
||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||
accPsqtOut[i] =
|
||||
vec_add_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_sub_psqt_32(columnPsqtA1[i], columnPsqtR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA1]);
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0;
|
||||
i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t); ++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(
|
||||
accPsqtIn[i],
|
||||
vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
|
||||
}
|
||||
#else
|
||||
std::memcpy((next->*accPtr).accumulation[Perspective],
|
||||
(computed->*accPtr).accumulation[Perspective],
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
std::memcpy((next->*accPtr).psqtAccumulation[Perspective],
|
||||
(computed->*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||
(next->*accPtr).accumulation[Perspective][i] -= weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(next->*accPtr).psqtAccumulation[Perspective][i] -=
|
||||
psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||
(next->*accPtr).accumulation[Perspective][i] += weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(next->*accPtr).psqtAccumulation[Perspective][i] +=
|
||||
psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
(next->*accPtr).computed[Perspective] = true;
|
||||
|
||||
if (next != target_state)
|
||||
update_accumulator_incremental<Perspective, Direction>(ksq, target_state, next);
|
||||
}
|
||||
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator_refresh_cache(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
assert(cache != nullptr);
|
||||
|
||||
Square ksq = pos.square<KING>(Perspective);
|
||||
auto& entry = (*cache)[ksq][Perspective];
|
||||
FeatureSet::IndexList removed, added;
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = true;
|
||||
|
||||
#ifdef VECTOR
|
||||
const bool combineLast3 = std::abs((int) removed.size() - (int) added.size()) == 1
|
||||
&& removed.size() + added.size() > 2;
|
||||
vec_t acc[Tiling::NumRegs];
|
||||
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
||||
|
||||
for (IndexType j = 0; j < HalfDimensions / Tiling::TileHeight; ++j)
|
||||
{
|
||||
auto* accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
|
||||
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
std::size_t i = 0;
|
||||
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = HalfDimensions * indexR + j * Tiling::TileHeight;
|
||||
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = HalfDimensions * indexA + j * Tiling::TileHeight;
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
|
||||
}
|
||||
if (combineLast3)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = HalfDimensions * indexR + j * Tiling::TileHeight;
|
||||
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = HalfDimensions * indexA + j * Tiling::TileHeight;
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
||||
|
||||
if (removed.size() > added.size())
|
||||
{
|
||||
IndexType indexR2 = removed[i + 1];
|
||||
const IndexType offsetR2 = HalfDimensions * indexR2 + j * Tiling::TileHeight;
|
||||
auto* columnR2 = reinterpret_cast<const vec_t*>(&weights[offsetR2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(vec_add_16(acc[k], columnA[k]),
|
||||
vec_add_16(columnR[k], columnR2[k]));
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexType indexA2 = added[i + 1];
|
||||
const IndexType offsetA2 = HalfDimensions * indexA2 + j * Tiling::TileHeight;
|
||||
auto* columnA2 = reinterpret_cast<const vec_t*>(&weights[offsetA2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]),
|
||||
vec_add_16(columnA[k], columnA2[k]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = HalfDimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
for (; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = HalfDimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
|
||||
{
|
||||
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
|
||||
auto* entryTilePsqt =
|
||||
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = entryTilePsqt[k];
|
||||
|
||||
for (std::size_t i = 0; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
for (std::size_t i = 0; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[j] -= weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[j] += weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
|
||||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing.
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
|
||||
sizeof(int32_t) * PSQTBuckets);
|
||||
#endif
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
entry.byColorBB[c] = pos.pieces(c);
|
||||
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
entry.byTypeBB[pt] = pos.pieces(pt);
|
||||
}
|
||||
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
StateInfo* st = pos.state();
|
||||
if ((st->*accPtr).computed[Perspective])
|
||||
return; // nothing to do
|
||||
|
||||
// Look for a usable already computed accumulator of an earlier position.
|
||||
// Always try to do an incremental update as most accumulators will be reusable.
|
||||
do
|
||||
{
|
||||
if (FeatureSet::requires_refresh(st, Perspective) || !st->previous
|
||||
|| st->previous->next != st)
|
||||
{
|
||||
// compute accumulator from scratch for this position
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
||||
if (st != pos.state())
|
||||
// when computing an accumulator from scratch we can use it to
|
||||
// efficiently compute the accumulator backwards, until we get to a king
|
||||
// move. We expect that we will need these accumulators later anyway, so
|
||||
// computing them now will save some work.
|
||||
update_accumulator_incremental<Perspective, BACKWARDS>(
|
||||
pos.square<KING>(Perspective), st, pos.state());
|
||||
return;
|
||||
}
|
||||
st = st->previous;
|
||||
} while (!(st->*accPtr).computed[Perspective]);
|
||||
|
||||
// Start from the oldest computed accumulator, update all the
|
||||
// accumulators up to the current position.
|
||||
update_accumulator_incremental<Perspective>(pos.square<KING>(Perspective), pos.state(), st);
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
|
||||
alignas(CacheLineSize) BiasType biases[HalfDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
|
||||
alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
|
||||
|
||||
@@ -120,9 +120,12 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
format_cp_compact(value, &board[y + 2][x + 2], pos);
|
||||
};
|
||||
|
||||
AccumulatorStack accumulators;
|
||||
accumulators.reset(pos, networks, caches);
|
||||
|
||||
// We estimate the value of each piece by doing a differential evaluation from
|
||||
// the current base eval, simulating the removal of the piece from its square.
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, &caches.big);
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches.big);
|
||||
Value base = psqt + positional;
|
||||
base = pos.side_to_move() == WHITE ? base : -base;
|
||||
|
||||
@@ -135,18 +138,15 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
|
||||
if (pc != NO_PIECE && type_of(pc) != KING)
|
||||
{
|
||||
auto st = pos.state();
|
||||
|
||||
pos.remove_piece(sq);
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
|
||||
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
|
||||
accumulators.reset(pos, networks, caches);
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
|
||||
Value eval = psqt + positional;
|
||||
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
||||
v = base - eval;
|
||||
|
||||
pos.put_piece(pc, sq);
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
|
||||
}
|
||||
|
||||
writeSquare(f, r, pc, v);
|
||||
@@ -157,7 +157,8 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
ss << board[row] << '\n';
|
||||
ss << '\n';
|
||||
|
||||
auto t = networks.big.trace_evaluate(pos, &caches.big);
|
||||
accumulators.reset(pos, networks, caches);
|
||||
auto t = networks.big.trace_evaluate(pos, accumulators, &caches.big);
|
||||
|
||||
ss << " NNUE network contributions "
|
||||
<< (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
|
||||
|
||||
@@ -34,7 +34,6 @@ template<bool Root>
|
||||
uint64_t perft(Position& pos, Depth depth) {
|
||||
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
uint64_t cnt, nodes = 0;
|
||||
const bool leaf = (depth == 2);
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "bitboard.h"
|
||||
#include "misc.h"
|
||||
#include "movegen.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "tt.h"
|
||||
#include "uci.h"
|
||||
@@ -83,7 +82,6 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
|
||||
if (int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
|
||||
{
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
Position p;
|
||||
p.set(pos.fen(), pos.is_chess960(), &st);
|
||||
@@ -685,10 +683,10 @@ bool Position::gives_check(Move m) const {
|
||||
// moves should be filtered out before this function is called.
|
||||
// If a pointer to the TT table is passed, the entry for the new position
|
||||
// will be prefetched
|
||||
void Position::do_move(Move m,
|
||||
StateInfo& newSt,
|
||||
bool givesCheck,
|
||||
const TranspositionTable* tt = nullptr) {
|
||||
DirtyPiece Position::do_move(Move m,
|
||||
StateInfo& newSt,
|
||||
bool givesCheck,
|
||||
const TranspositionTable* tt = nullptr) {
|
||||
|
||||
assert(m.is_ok());
|
||||
assert(&newSt != st);
|
||||
@@ -709,11 +707,7 @@ void Position::do_move(Move m,
|
||||
++st->rule50;
|
||||
++st->pliesFromNull;
|
||||
|
||||
// Used by NNUE
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
|
||||
|
||||
auto& dp = st->dirtyPiece;
|
||||
DirtyPiece dp;
|
||||
dp.dirty_num = 1;
|
||||
|
||||
Color us = sideToMove;
|
||||
@@ -733,7 +727,7 @@ void Position::do_move(Move m,
|
||||
assert(captured == make_piece(us, ROOK));
|
||||
|
||||
Square rfrom, rto;
|
||||
do_castling<true>(us, from, to, rfrom, rto);
|
||||
do_castling<true>(us, from, to, rfrom, rto, &dp);
|
||||
|
||||
k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
|
||||
st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
|
||||
@@ -906,6 +900,8 @@ void Position::do_move(Move m,
|
||||
}
|
||||
|
||||
assert(pos_is_ok());
|
||||
|
||||
return dp;
|
||||
}
|
||||
|
||||
|
||||
@@ -975,23 +971,25 @@ void Position::undo_move(Move m) {
|
||||
// Helper used to do/undo a castling move. This is a bit
|
||||
// tricky in Chess960 where from/to squares can overlap.
|
||||
template<bool Do>
|
||||
void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
|
||||
void Position::do_castling(
|
||||
Color us, Square from, Square& to, Square& rfrom, Square& rto, DirtyPiece* const dp) {
|
||||
|
||||
bool kingSide = to > from;
|
||||
rfrom = to; // Castling is encoded as "king captures friendly rook"
|
||||
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
|
||||
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
|
||||
|
||||
assert(!Do || dp);
|
||||
|
||||
if (Do)
|
||||
{
|
||||
auto& dp = st->dirtyPiece;
|
||||
dp.piece[0] = make_piece(us, KING);
|
||||
dp.from[0] = from;
|
||||
dp.to[0] = to;
|
||||
dp.piece[1] = make_piece(us, ROOK);
|
||||
dp.from[1] = rfrom;
|
||||
dp.to[1] = rto;
|
||||
dp.dirty_num = 2;
|
||||
dp->piece[0] = make_piece(us, KING);
|
||||
dp->from[0] = from;
|
||||
dp->to[0] = to;
|
||||
dp->piece[1] = make_piece(us, ROOK);
|
||||
dp->from[1] = rfrom;
|
||||
dp->to[1] = rto;
|
||||
dp->dirty_num = 2;
|
||||
}
|
||||
|
||||
// Remove both pieces first since squares could overlap in Chess960
|
||||
@@ -1011,7 +1009,7 @@ void Position::do_null_move(StateInfo& newSt, const TranspositionTable& tt) {
|
||||
assert(!checkers());
|
||||
assert(&newSt != st);
|
||||
|
||||
std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig));
|
||||
std::memcpy(&newSt, st, sizeof(StateInfo));
|
||||
|
||||
newSt.previous = st;
|
||||
st->next = &newSt;
|
||||
@@ -1026,11 +1024,6 @@ void Position::do_null_move(StateInfo& newSt, const TranspositionTable& tt) {
|
||||
st->key ^= Zobrist::side;
|
||||
prefetch(tt.first_entry(key()));
|
||||
|
||||
st->dirtyPiece.dirty_num = 0;
|
||||
st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
|
||||
|
||||
st->pliesFromNull = 0;
|
||||
|
||||
sideToMove = ~sideToMove;
|
||||
|
||||
@@ -26,8 +26,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
#include "nnue/nnue_architecture.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
@@ -61,11 +59,6 @@ struct StateInfo {
|
||||
Bitboard checkSquares[PIECE_TYPE_NB];
|
||||
Piece capturedPiece;
|
||||
int repetition;
|
||||
|
||||
// Used by NNUE
|
||||
DirtyPiece dirtyPiece;
|
||||
Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsBig> accumulatorBig;
|
||||
Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsSmall> accumulatorSmall;
|
||||
};
|
||||
|
||||
|
||||
@@ -140,11 +133,11 @@ class Position {
|
||||
Piece captured_piece() const;
|
||||
|
||||
// Doing and undoing moves
|
||||
void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
|
||||
void do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
|
||||
void undo_move(Move m);
|
||||
void do_null_move(StateInfo& newSt, const TranspositionTable& tt);
|
||||
void undo_null_move();
|
||||
void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
|
||||
DirtyPiece do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
|
||||
void undo_move(Move m);
|
||||
void do_null_move(StateInfo& newSt, const TranspositionTable& tt);
|
||||
void undo_null_move();
|
||||
|
||||
// Static Exchange Evaluation
|
||||
bool see_ge(Move m, int threshold = 0) const;
|
||||
@@ -187,7 +180,12 @@ class Position {
|
||||
// Other helpers
|
||||
void move_piece(Square from, Square to);
|
||||
template<bool Do>
|
||||
void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
|
||||
void do_castling(Color us,
|
||||
Square from,
|
||||
Square& to,
|
||||
Square& rfrom,
|
||||
Square& rto,
|
||||
DirtyPiece* const dp = nullptr);
|
||||
template<bool AfterMove>
|
||||
Key adjust_key50(Key k) const;
|
||||
|
||||
|
||||
@@ -41,7 +41,6 @@
|
||||
#include "movepick.h"
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "position.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "thread.h"
|
||||
@@ -197,6 +196,8 @@ void Search::Worker::ensure_network_replicated() {
|
||||
|
||||
void Search::Worker::start_searching() {
|
||||
|
||||
accumulatorStack.reset(rootPos, networks[numaAccessToken], refreshTable);
|
||||
|
||||
// Non-main threads go directly to iterative_deepening()
|
||||
if (!is_mainthread())
|
||||
{
|
||||
@@ -552,6 +553,26 @@ void Search::Worker::iterative_deepening() {
|
||||
skill.best ? skill.best : skill.pick_best(rootMoves, multiPV)));
|
||||
}
|
||||
|
||||
|
||||
void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st) {
|
||||
do_move(pos, move, st, pos.gives_check(move));
|
||||
}
|
||||
|
||||
void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck) {
|
||||
DirtyPiece dp = pos.do_move(move, st, givesCheck, &tt);
|
||||
accumulatorStack.push(dp);
|
||||
}
|
||||
|
||||
void Search::Worker::do_null_move(Position& pos, StateInfo& st) { pos.do_null_move(st, tt); }
|
||||
|
||||
void Search::Worker::undo_move(Position& pos, const Move move) {
|
||||
pos.undo_move(move);
|
||||
accumulatorStack.pop();
|
||||
}
|
||||
|
||||
void Search::Worker::undo_null_move(Position& pos) { pos.undo_null_move(); }
|
||||
|
||||
|
||||
// Reset histories, usually before a new game
|
||||
void Search::Worker::clear() {
|
||||
mainHistory.fill(66);
|
||||
@@ -614,7 +635,6 @@ Value Search::Worker::search(
|
||||
|
||||
Move pv[MAX_PLY + 1];
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
Key posKey;
|
||||
Move move, excludedMove, bestMove;
|
||||
@@ -859,11 +879,11 @@ Value Search::Worker::search(
|
||||
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
|
||||
ss->continuationCorrectionHistory = &thisThread->continuationCorrectionHistory[NO_PIECE][0];
|
||||
|
||||
pos.do_null_move(st, tt);
|
||||
do_null_move(pos, st);
|
||||
|
||||
Value nullValue = -search<NonPV>(pos, ss + 1, -beta, -beta + 1, depth - R, false);
|
||||
|
||||
pos.undo_null_move();
|
||||
undo_null_move(pos);
|
||||
|
||||
// Do not return unproven mate or TB scores
|
||||
if (nullValue >= beta && !is_win(nullValue))
|
||||
@@ -925,7 +945,7 @@ Value Search::Worker::search(
|
||||
|
||||
movedPiece = pos.moved_piece(move);
|
||||
|
||||
pos.do_move(move, st, &tt);
|
||||
do_move(pos, move, st);
|
||||
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
ss->currentMove = move;
|
||||
@@ -943,7 +963,7 @@ Value Search::Worker::search(
|
||||
value = -search<NonPV>(pos, ss + 1, -probCutBeta, -probCutBeta + 1, probCutDepth,
|
||||
!cutNode);
|
||||
|
||||
pos.undo_move(move);
|
||||
undo_move(pos, move);
|
||||
|
||||
if (value >= probCutBeta)
|
||||
{
|
||||
@@ -1165,7 +1185,7 @@ moves_loop: // When in check, search starts here
|
||||
}
|
||||
|
||||
// Step 16. Make the move
|
||||
pos.do_move(move, st, givesCheck, &tt);
|
||||
do_move(pos, move, st, givesCheck);
|
||||
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Add extension to new depth
|
||||
@@ -1290,7 +1310,7 @@ moves_loop: // When in check, search starts here
|
||||
}
|
||||
|
||||
// Step 19. Undo move
|
||||
pos.undo_move(move);
|
||||
undo_move(pos, move);
|
||||
|
||||
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
|
||||
|
||||
@@ -1510,7 +1530,6 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
|
||||
|
||||
Move pv[MAX_PLY + 1];
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
Key posKey;
|
||||
Move move, bestMove;
|
||||
@@ -1674,7 +1693,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
|
||||
// Step 7. Make and search the move
|
||||
Piece movedPiece = pos.moved_piece(move);
|
||||
|
||||
pos.do_move(move, st, givesCheck, &tt);
|
||||
do_move(pos, move, st, givesCheck);
|
||||
thisThread->nodes.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Update the current move
|
||||
@@ -1685,7 +1704,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta)
|
||||
&thisThread->continuationCorrectionHistory[movedPiece][move.to_sq()];
|
||||
|
||||
value = -qsearch<nodeType>(pos, ss + 1, -beta, -alpha);
|
||||
pos.undo_move(move);
|
||||
undo_move(pos, move);
|
||||
|
||||
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
|
||||
|
||||
@@ -1752,7 +1771,7 @@ TimePoint Search::Worker::elapsed() const {
|
||||
TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); }
|
||||
|
||||
Value Search::Worker::evaluate(const Position& pos) {
|
||||
return Eval::evaluate(networks[numaAccessToken], pos, refreshTable,
|
||||
return Eval::evaluate(networks[numaAccessToken], pos, accumulatorStack, refreshTable,
|
||||
optimism[pos.side_to_move()]);
|
||||
}
|
||||
|
||||
@@ -2178,7 +2197,6 @@ void SearchManager::pv(Search::Worker& worker,
|
||||
bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) {
|
||||
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
assert(pv.size() == 1);
|
||||
if (pv[0] == Move::none())
|
||||
|
||||
@@ -295,6 +295,12 @@ class Worker {
|
||||
private:
|
||||
void iterative_deepening();
|
||||
|
||||
void do_move(Position& pos, const Move move, StateInfo& st);
|
||||
void do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck);
|
||||
void do_null_move(Position& pos, StateInfo& st);
|
||||
void undo_move(Position& pos, const Move move);
|
||||
void undo_null_move(Position& pos);
|
||||
|
||||
// This is the main search function, for both PV and non-PV nodes
|
||||
template<NodeType nodeType>
|
||||
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
|
||||
@@ -347,6 +353,7 @@ class Worker {
|
||||
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
|
||||
|
||||
// Used by NNUE
|
||||
Eval::NNUE::AccumulatorStack accumulatorStack;
|
||||
Eval::NNUE::AccumulatorCaches refreshTable;
|
||||
|
||||
friend class Stockfish::ThreadPool;
|
||||
|
||||
Reference in New Issue
Block a user