mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-24 19:16:49 +08:00
3
AUTHORS
3
AUTHORS
@@ -44,6 +44,7 @@ Daniel Dugovic (ddugovic)
|
||||
Dariusz Orzechowski (dorzechowski)
|
||||
David Zar
|
||||
Daylen Yang (daylen)
|
||||
Deshawn Mohan-Smith (GoldenRare)
|
||||
DiscanX
|
||||
Dominik Schlösser (domschl)
|
||||
double-beep
|
||||
@@ -64,7 +65,6 @@ Gary Heckman (gheckman)
|
||||
George Sobala (gsobala)
|
||||
gguliash
|
||||
Gian-Carlo Pascutto (gcp)
|
||||
Deshawn Mohan-Smith (GoldenRare)
|
||||
Gontran Lemaire (gonlem)
|
||||
Goodkov Vasiliy Aleksandrovich (goodkov)
|
||||
Gregor Cramer
|
||||
@@ -112,6 +112,7 @@ Mark Tenzer (31m059)
|
||||
marotear
|
||||
Matthew Lai (matthewlai)
|
||||
Matthew Sullivan (Matt14916)
|
||||
Maxim Molchanov (Maxim)
|
||||
Michael An (man)
|
||||
Michael Byrne (MichaelB7)
|
||||
Michael Chaly (Vizvezdenec)
|
||||
|
||||
@@ -41,7 +41,7 @@ BINDIR = $(PREFIX)/bin
|
||||
### Built-in benchmark for pgo-builds
|
||||
PGO_TRAINING_DATA_FILE = pgo_training_data.bin
|
||||
PGOBENCH = ./$(EXE) bench
|
||||
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_DATA_FILE)
|
||||
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 sfen_format bin output_file_name $(PGO_TRAINING_DATA_FILE)
|
||||
|
||||
### Source and object files
|
||||
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
|
||||
|
||||
@@ -84,11 +84,11 @@ using namespace Trace;
|
||||
namespace {
|
||||
|
||||
// Threshold for lazy and space evaluation
|
||||
constexpr Value LazyThreshold1 = Value(1400);
|
||||
constexpr Value LazyThreshold2 = Value(1300);
|
||||
constexpr Value SpaceThreshold = Value(12222);
|
||||
constexpr Value NNUEThreshold1 = Value(550);
|
||||
constexpr Value NNUEThreshold2 = Value(150);
|
||||
constexpr Value LazyThreshold1 = Value(1565);
|
||||
constexpr Value LazyThreshold2 = Value(1102);
|
||||
constexpr Value SpaceThreshold = Value(11551);
|
||||
constexpr Value NNUEThreshold1 = Value(682);
|
||||
constexpr Value NNUEThreshold2 = Value(176);
|
||||
|
||||
// KingAttackWeights[PieceType] contains king attack weights by piece type
|
||||
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
|
||||
@@ -930,7 +930,7 @@ Value Eval::evaluate(const Position& pos) {
|
||||
{
|
||||
// Scale and shift NNUE for compatibility with search and classical evaluation
|
||||
auto adjusted_NNUE = [&](){
|
||||
int mat = pos.non_pawn_material() + PieceValue[MG][PAWN] * pos.count<PAWN>();
|
||||
int mat = pos.non_pawn_material() + PawnValueMg * pos.count<PAWN>();
|
||||
return NNUE::evaluate(pos) * (720 + mat / 32) / 1024 + Tempo;
|
||||
};
|
||||
|
||||
@@ -940,16 +940,18 @@ Value Eval::evaluate(const Position& pos) {
|
||||
bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
|
||||
bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
|
||||
|
||||
v = classical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
|
||||
bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
|
||||
|
||||
v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
|
||||
|
||||
// If the classical eval is small and imbalance large, use NNUE nevertheless.
|
||||
// For the case of opposite colored bishops, switch to NNUE eval with
|
||||
// small probability if the classical eval is less than the threshold.
|
||||
if ( largePsq
|
||||
&& (abs(v) * 16 < NNUEThreshold2 * r50
|
||||
|| ( pos.opposite_bishops()
|
||||
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
|
||||
&& !(pos.this_thread()->nodes & 0xB))))
|
||||
if ( largePsq && !strongClassical
|
||||
&& ( abs(v) * 16 < NNUEThreshold2 * r50
|
||||
|| ( pos.opposite_bishops()
|
||||
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
|
||||
&& !(pos.this_thread()->nodes & 0xB))))
|
||||
v = adjusted_NNUE();
|
||||
}
|
||||
|
||||
|
||||
@@ -585,11 +585,10 @@ namespace CommandLine {
|
||||
string argv0; // path+name of the executable binary, as given by argv[0]
|
||||
string binaryDirectory; // path of the executable directory
|
||||
string workingDirectory; // path of the working directory
|
||||
string pathSeparator; // Separator for our current OS
|
||||
|
||||
void init(int argc, char* argv[]) {
|
||||
(void)argc;
|
||||
string separator;
|
||||
string pathSeparator;
|
||||
|
||||
// extract the path+name of the executable binary
|
||||
argv0 = argv[0];
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Code for calculating NNUE evaluation function
|
||||
@@ -40,330 +40,313 @@
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_W_PAWN, PS_B_PAWN },
|
||||
{ PS_W_KNIGHT, PS_B_KNIGHT },
|
||||
{ PS_W_BISHOP, PS_B_BISHOP },
|
||||
{ PS_W_ROOK, PS_B_ROOK },
|
||||
{ PS_W_QUEEN, PS_B_QUEEN },
|
||||
{ PS_W_KING, PS_B_KING },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_B_PAWN, PS_W_PAWN },
|
||||
{ PS_B_KNIGHT, PS_W_KNIGHT },
|
||||
{ PS_B_BISHOP, PS_W_BISHOP },
|
||||
{ PS_B_ROOK, PS_W_ROOK },
|
||||
{ PS_B_QUEEN, PS_W_QUEEN },
|
||||
{ PS_B_KING, PS_W_KING },
|
||||
{ PS_NONE, PS_NONE }
|
||||
};
|
||||
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_W_PAWN, PS_B_PAWN },
|
||||
{ PS_W_KNIGHT, PS_B_KNIGHT },
|
||||
{ PS_W_BISHOP, PS_B_BISHOP },
|
||||
{ PS_W_ROOK, PS_B_ROOK },
|
||||
{ PS_W_QUEEN, PS_B_QUEEN },
|
||||
{ PS_W_KING, PS_B_KING },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_B_PAWN, PS_W_PAWN },
|
||||
{ PS_B_KNIGHT, PS_W_KNIGHT },
|
||||
{ PS_B_BISHOP, PS_W_BISHOP },
|
||||
{ PS_B_ROOK, PS_W_ROOK },
|
||||
{ PS_B_QUEEN, PS_W_QUEEN },
|
||||
{ PS_B_KING, PS_W_KING },
|
||||
{ PS_NONE, PS_NONE }
|
||||
};
|
||||
|
||||
// Input feature converter
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
// Input feature converter
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
std::string fileName;
|
||||
// Evaluation function file name
|
||||
std::string fileName;
|
||||
|
||||
// Saved evaluation function file name
|
||||
std::string savedfileName = "nn.bin";
|
||||
// Saved evaluation function file name
|
||||
std::string savedfileName = "nn.bin";
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string() {
|
||||
return "Features=" + FeatureTransformer::get_structure_string() +
|
||||
",Network=" + Network::get_structure_string();
|
||||
}
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string() {
|
||||
return "Features=" + FeatureTransformer::get_structure_string() +
|
||||
",Network=" + Network::get_structure_string();
|
||||
}
|
||||
|
||||
std::string get_layers_info() {
|
||||
return
|
||||
FeatureTransformer::get_layers_info()
|
||||
+ '\n' + Network::get_layers_info();
|
||||
}
|
||||
std::string get_layers_info() {
|
||||
return
|
||||
FeatureTransformer::get_layers_info()
|
||||
+ '\n' + Network::get_layers_info();
|
||||
}
|
||||
|
||||
UseNNUEMode useNNUE;
|
||||
std::string eval_file_loaded = "None";
|
||||
UseNNUEMode useNNUE;
|
||||
std::string eval_file_loaded = "None";
|
||||
|
||||
namespace Detail {
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
template <typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || header != T::GetHashValue()) return false;
|
||||
return reference.ReadParameters(stream);
|
||||
}
|
||||
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
if (!stream || header != T::GetHashValue())
|
||||
return false;
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return reference.ReadParameters(stream);
|
||||
}
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
} // namespace Detail
|
||||
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
// Initialize the evaluation function parameters
|
||||
void initialize() {
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
Detail::initialize(feature_transformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
} // namespace Detail
|
||||
// Read network header
|
||||
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
void initialize() {
|
||||
version = read_little_endian<std::uint32_t>(stream);
|
||||
*hash_value = read_little_endian<std::uint32_t>(stream);
|
||||
size = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || version != kVersion) return false;
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
Detail::initialize(feature_transformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
|
||||
// Read network header
|
||||
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
|
||||
version = read_little_endian<std::uint32_t>(stream);
|
||||
*hash_value = read_little_endian<std::uint32_t>(stream);
|
||||
size = read_little_endian<std::uint32_t>(stream);
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
|
||||
if (!stream || version != kVersion)
|
||||
return false;
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!read_header(stream, &hash_value, &architecture)) return false;
|
||||
if (hash_value != kHashValue) return false;
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, *network)) return false;
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
if (!write_header(stream, kHashValue, get_architecture_string()))
|
||||
return false;
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
if (!Detail::WriteParameters(stream, feature_transformer))
|
||||
return false;
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
if (!Detail::WriteParameters(stream, network))
|
||||
return false;
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!read_header(stream, &hash_value, &architecture))
|
||||
return false;
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
|
||||
if (hash_value != kHashValue)
|
||||
return false;
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer))
|
||||
return false;
|
||||
|
||||
if (!Detail::ReadParameters(stream, *network))
|
||||
return false;
|
||||
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
|
||||
if (!write_header(stream, kHashValue, get_architecture_string()))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, feature_transformer))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, network))
|
||||
return false;
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
}
|
||||
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
}
|
||||
initialize();
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
}
|
||||
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
|
||||
{
|
||||
if (mode == "false")
|
||||
return UseNNUEMode::False;
|
||||
else if (mode == "true")
|
||||
return UseNNUEMode::True;
|
||||
else if (mode == "pure")
|
||||
return UseNNUEMode::Pure;
|
||||
|
||||
initialize();
|
||||
return UseNNUEMode::False;
|
||||
}
|
||||
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
}
|
||||
void init() {
|
||||
|
||||
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
|
||||
{
|
||||
if (mode == "false")
|
||||
return UseNNUEMode::False;
|
||||
else if (mode == "true")
|
||||
return UseNNUEMode::True;
|
||||
else if (mode == "pure")
|
||||
return UseNNUEMode::Pure;
|
||||
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
|
||||
|
||||
return UseNNUEMode::False;
|
||||
}
|
||||
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
|
||||
{
|
||||
eval_file_loaded.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
void init() {
|
||||
|
||||
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
|
||||
|
||||
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
|
||||
{
|
||||
eval_file_loaded.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
#if defined(DEFAULT_NNUE_DIRECTORY)
|
||||
#define stringify2(x) #x
|
||||
#define stringify(x) stringify2(x)
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
|
||||
#else
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
|
||||
#endif
|
||||
|
||||
for (std::string directory : dirs)
|
||||
{
|
||||
if (eval_file_loaded != eval_file)
|
||||
{
|
||||
std::ifstream stream(directory + eval_file, std::ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
{
|
||||
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (std::string directory : dirs)
|
||||
{
|
||||
if (eval_file_loaded != eval_file)
|
||||
{
|
||||
std::ifstream stream(directory + eval_file, std::ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
{
|
||||
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef stringify2
|
||||
#undef stringify
|
||||
}
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify_eval_file_loaded() {
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify_eval_file_loaded() {
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg4 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg4 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
/// In training we override eval file so this is useful.
|
||||
void verify_any_net_loaded() {
|
||||
/// In training we override eval file so this is useful.
|
||||
void verify_any_net_loaded() {
|
||||
|
||||
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
@@ -1,21 +1,23 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// header used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
|
||||
#define NNUE_EVALUATE_NNUE_H_INCLUDED
|
||||
|
||||
@@ -25,84 +27,83 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
// header used in NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
enum struct UseNNUEMode
|
||||
{
|
||||
False,
|
||||
True,
|
||||
Pure
|
||||
};
|
||||
enum struct UseNNUEMode
|
||||
{
|
||||
False,
|
||||
True,
|
||||
Pure
|
||||
};
|
||||
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
std_aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
std_aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
|
||||
template <typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
template <typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
|
||||
// Input feature converter
|
||||
extern LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
// Input feature converter
|
||||
extern LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
extern std::string fileName;
|
||||
// Evaluation function file name
|
||||
extern std::string fileName;
|
||||
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
|
||||
extern UseNNUEMode useNNUE;
|
||||
extern UseNNUEMode useNNUE;
|
||||
|
||||
extern std::string eval_file_loaded;
|
||||
extern std::string eval_file_loaded;
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string();
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string();
|
||||
|
||||
std::string get_layers_info();
|
||||
std::string get_layers_info();
|
||||
|
||||
// read the header
|
||||
bool read_header(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
// read the header
|
||||
bool read_header(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
void init();
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
void init();
|
||||
|
||||
void verify_eval_file_loaded();
|
||||
void verify_any_net_loaded();
|
||||
void verify_eval_file_loaded();
|
||||
void verify_any_net_loaded();
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// A class template that represents the input feature set of the NNUE evaluation function
|
||||
@@ -22,7 +22,6 @@
|
||||
#define NNUE_FEATURE_SET_H_INCLUDED
|
||||
|
||||
#include "features_common.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Common header of input features of NNUE evaluation function
|
||||
@@ -21,30 +21,29 @@
|
||||
#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
#define NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "../../evaluate.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
class IndexList;
|
||||
class IndexList;
|
||||
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate full evaluation when own king moves
|
||||
kEnemyKingMoved, // calculate full evaluation when opponent king moves
|
||||
kAnyKingMoved, // calculate full evaluation when any king moves
|
||||
kAnyPieceMoved, // always calculate full evaluation
|
||||
};
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate full evaluation when own king moves
|
||||
kEnemyKingMoved, // calculate full evaluation when opponent king moves
|
||||
kAnyKingMoved, // calculate full evaluation when any king moves
|
||||
kAnyPieceMoved, // always calculate full evaluation
|
||||
};
|
||||
|
||||
enum class Side {
|
||||
kFriend, // side to move
|
||||
kEnemy, // opponent
|
||||
};
|
||||
enum class Side {
|
||||
kFriend, // side to move
|
||||
kEnemy, // opponent
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of index list of input features
|
||||
@@ -21,43 +21,43 @@
|
||||
#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
|
||||
#include "position.h"
|
||||
|
||||
#include "nnue/nnue_architecture.h"
|
||||
#include "../../position.h"
|
||||
#include "../nnue_architecture.h"
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T values_[MaxSize] = {};
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
private:
|
||||
T values_[MaxSize];
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
//Type of feature index list
|
||||
class IndexList : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
//Type of feature index list
|
||||
class IndexList
|
||||
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -223,13 +223,13 @@ namespace Eval::NNUE::Layers {
|
||||
return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, kOnes512);
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
return _mm512_madd_epi16(product0, kOnes512);
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -256,14 +256,13 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, kOnes256);
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
return _mm256_madd_epi16(product0, kOnes256);
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -288,10 +287,9 @@ namespace Eval::NNUE::Layers {
|
||||
return _mm_add_epi32(sum0, bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
|
||||
[[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, kOnes128);
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
return _mm_madd_epi16(product0, kOnes128);
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -335,15 +333,6 @@ namespace Eval::NNUE::Layers {
|
||||
const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
|
||||
__m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
|
||||
|
||||
__m512i sum01a = _mm512_setzero_si512();
|
||||
__m512i sum23a = _mm512_setzero_si512();
|
||||
__m512i sum45a = _mm512_setzero_si512();
|
||||
__m512i sum67a = _mm512_setzero_si512();
|
||||
__m512i sum01b = _mm512_setzero_si512();
|
||||
__m512i sum23b = _mm512_setzero_si512();
|
||||
__m512i sum45b = _mm512_setzero_si512();
|
||||
__m512i sum67b = _mm512_setzero_si512();
|
||||
|
||||
const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
|
||||
const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
|
||||
const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
|
||||
@@ -356,6 +345,16 @@ namespace Eval::NNUE::Layers {
|
||||
const __m256i in256 = input_vector256[0];
|
||||
const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum01a = _mm512_setzero_si512();
|
||||
__m512i sum23a = _mm512_setzero_si512();
|
||||
__m512i sum45a = _mm512_setzero_si512();
|
||||
__m512i sum67a = _mm512_setzero_si512();
|
||||
__m512i sum01b = _mm512_setzero_si512();
|
||||
__m512i sum23b = _mm512_setzero_si512();
|
||||
__m512i sum45b = _mm512_setzero_si512();
|
||||
__m512i sum67b = _mm512_setzero_si512();
|
||||
|
||||
m512_add_dpbusd_epi32(sum01a, in, row01a);
|
||||
m512_add_dpbusd_epi32(sum23a, in, row23a);
|
||||
m512_add_dpbusd_epi32(sum45a, in, row45a);
|
||||
@@ -364,6 +363,16 @@ namespace Eval::NNUE::Layers {
|
||||
m512_add_dpbusd_epi32(sum23b, in, row23b);
|
||||
m512_add_dpbusd_epi32(sum45b, in, row45b);
|
||||
m512_add_dpbusd_epi32(sum67b, in, row67b);
|
||||
#else
|
||||
__m512i sum01a = m512_dpbusd_epi32(in, row01a);
|
||||
__m512i sum23a = m512_dpbusd_epi32(in, row23a);
|
||||
__m512i sum45a = m512_dpbusd_epi32(in, row45a);
|
||||
__m512i sum67a = m512_dpbusd_epi32(in, row67a);
|
||||
__m512i sum01b = m512_dpbusd_epi32(in, row01b);
|
||||
__m512i sum23b = m512_dpbusd_epi32(in, row23b);
|
||||
__m512i sum45b = m512_dpbusd_epi32(in, row45b);
|
||||
__m512i sum67b = m512_dpbusd_epi32(in, row67b);
|
||||
#endif
|
||||
|
||||
*outptr = m512_hadd256x16(
|
||||
sum01a, sum23a, sum45a, sum67a,
|
||||
@@ -384,48 +393,80 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
__m512i sum1 = _mm512_setzero_si512();
|
||||
__m512i sum2 = _mm512_setzero_si512();
|
||||
__m512i sum3 = _mm512_setzero_si512();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks512; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
__m512i sum1 = _mm512_setzero_si512();
|
||||
__m512i sum2 = _mm512_setzero_si512();
|
||||
__m512i sum3 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
__m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
|
||||
__m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
|
||||
__m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m512_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m512_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m512_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks256; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -436,30 +477,50 @@ namespace Eval::NNUE::Layers {
|
||||
{
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks512; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m512_hadd(sum0, biases_[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks256; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
@@ -493,24 +554,40 @@ namespace Eval::NNUE::Layers {
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -518,15 +595,25 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
@@ -559,24 +646,24 @@ namespace Eval::NNUE::Layers {
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
__m128i sum0 = _mm_setzero_si128();
|
||||
__m128i sum1 = _mm_setzero_si128();
|
||||
__m128i sum2 = _mm_setzero_si128();
|
||||
__m128i sum3 = _mm_setzero_si128();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; j += 1)
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
{
|
||||
const __m128i in = input_vector[j];
|
||||
|
||||
m128_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m128_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m128_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m128_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
|
||||
}
|
||||
|
||||
*outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -584,16 +671,12 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
__m128i sum0 = _mm_setzero_si128();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; j += 1)
|
||||
{
|
||||
const __m128i in = input_vector[j];
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
|
||||
m128_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
}
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
|
||||
|
||||
output[0] = m128_hadd(sum0, biases_[0]);
|
||||
}
|
||||
|
||||
@@ -1,34 +1,35 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Class for difference calculation of NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
|
||||
#define NNUE_ACCUMULATOR_H_INCLUDED
|
||||
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
// Class for difference calculation of NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
bool computed_accumulation;
|
||||
};
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
bool computed_accumulation;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,36 +1,37 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
||||
#define NNUE_ARCHITECTURE_H_INCLUDED
|
||||
|
||||
// Defines the network structure
|
||||
#include "architectures/halfkp_256x2-32-32.h"
|
||||
|
||||
// Input features and network structure used in NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// A class that converts the input features of the NNUE evaluation function
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
#include "features/index_list.h"
|
||||
|
||||
#include <cstring>
|
||||
@@ -31,456 +30,486 @@
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define TILING
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define VECTOR
|
||||
|
||||
#ifdef USE_AVX512
|
||||
typedef __m512i vec_t;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
#ifdef USE_AVX512
|
||||
typedef __m512i vec_t;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_zero _mm_setzero_si64()
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_zero {0}
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_zero _mm_setzero_si64()
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_zero {0}
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
||||
#endif
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
#ifdef VECTOR
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
static constexpr int kLayerIndex = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return RawFeatures::get_name() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
bool update_accumulator_if_possible(const Position& pos) const {
|
||||
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation)
|
||||
return true;
|
||||
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
update_accumulator(pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
if (!update_accumulator_if_possible(pos))
|
||||
refresh_accumulator(pos);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
|
||||
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
|
||||
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i kZero = _mm512_setzero_si512();
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
#endif
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
auto out = reinterpret_cast<__m512i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m512i sum0 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m512i sum1 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm512_add_epi16(sum0, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm512_add_epi16(sum1, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
|
||||
}
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void refresh_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
#else
|
||||
#undef TILING
|
||||
|
||||
#endif
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
#ifdef TILING
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
static constexpr int kLayerIndex = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return RawFeatures::get_name() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
bool update_accumulator_if_possible(const Position& pos) const {
|
||||
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation)
|
||||
return true;
|
||||
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
update_accumulator(pos);
|
||||
return true;
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
if (!update_accumulator_if_possible(pos))
|
||||
refresh_accumulator(pos);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
#endif
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
#endif
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void refresh_accumulator(const Position& pos) const {
|
||||
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
#ifdef TILING
|
||||
for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
vec_t acc[kNumRegs];
|
||||
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < kNumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
#else
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_accumulation = true;
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void update_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
const auto& prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
} else {
|
||||
auto prevAccTile = reinterpret_cast<const vec_t*>(
|
||||
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_load(&prevAccTile[k]);
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void update_accumulator(const Position& pos) const {
|
||||
|
||||
const auto& prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
|
||||
#ifdef TILING
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
vec_t acc[kNumRegs];
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
} else {
|
||||
auto prevAccTile = reinterpret_cast<const vec_t*>(
|
||||
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_load(&prevAccTile[k]);
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
#else
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
|
||||
}
|
||||
}
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
accumulator.computed_accumulation = true;
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
|
||||
}
|
||||
}
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
accumulator.computed_accumulation = true;
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<FeatureTransformer>;
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
};
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<FeatureTransformer>;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif //#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
||||
#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
||||
|
||||
@@ -176,8 +176,8 @@ namespace {
|
||||
score -= Doubled * doubled
|
||||
+ WeakLever * more_than_one(lever);
|
||||
|
||||
if (blocked && r > RANK_4)
|
||||
score += BlockedPawn[r-4];
|
||||
if (blocked && r >= RANK_5)
|
||||
score += BlockedPawn[r - RANK_5];
|
||||
}
|
||||
|
||||
return score;
|
||||
|
||||
@@ -59,7 +59,7 @@ namespace {
|
||||
// Razor and futility margins
|
||||
constexpr int RazorMargin = 510;
|
||||
Value futility_margin(Depth d, bool improving) {
|
||||
return Value(223 * (d - improving));
|
||||
return Value(234 * (d - improving));
|
||||
}
|
||||
|
||||
// Reductions lookup table, initialized at startup
|
||||
@@ -67,7 +67,7 @@ namespace {
|
||||
|
||||
Depth reduction(bool i, Depth d, int mn) {
|
||||
int r = Reductions[d] * Reductions[mn];
|
||||
return (r + 509) / 1024 + (!i && r > 894);
|
||||
return (r + 503) / 1024 + (!i && r > 915);
|
||||
}
|
||||
|
||||
constexpr int futility_move_count(bool improving, Depth depth) {
|
||||
@@ -188,7 +188,7 @@ namespace {
|
||||
void Search::init() {
|
||||
|
||||
for (int i = 1; i < MAX_MOVES; ++i)
|
||||
Reductions[i] = int((22.0 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i)));
|
||||
Reductions[i] = int((21.3 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i)));
|
||||
}
|
||||
|
||||
|
||||
@@ -404,7 +404,7 @@ void Thread::search() {
|
||||
beta = std::min(prev + delta, VALUE_INFINITE);
|
||||
|
||||
// Adjust contempt based on root move's previousScore (dynamic contempt)
|
||||
int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
|
||||
int dct = ct + (113 - ct / 2) * prev / (abs(prev) + 147);
|
||||
|
||||
contempt = (us == WHITE ? make_score(dct, dct / 2)
|
||||
: -make_score(dct, dct / 2));
|
||||
@@ -824,7 +824,7 @@ namespace {
|
||||
&& (ss-1)->statScore < 22977
|
||||
&& eval >= beta
|
||||
&& eval >= ss->staticEval
|
||||
&& ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182
|
||||
&& ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 168
|
||||
&& !excludedMove
|
||||
&& pos.non_pawn_material(us)
|
||||
&& (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
|
||||
@@ -832,7 +832,7 @@ namespace {
|
||||
assert(eval - beta >= 0);
|
||||
|
||||
// Null move dynamic reduction based on depth and value
|
||||
Depth R = (982 + 85 * depth) / 256 + std::min(int(eval - beta) / 192, 3);
|
||||
Depth R = (1015 + 85 * depth) / 256 + std::min(int(eval - beta) / 191, 3);
|
||||
|
||||
ss->currentMove = MOVE_NULL;
|
||||
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
|
||||
@@ -849,7 +849,7 @@ namespace {
|
||||
if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY)
|
||||
nullValue = beta;
|
||||
|
||||
if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 13))
|
||||
if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14))
|
||||
return nullValue;
|
||||
|
||||
assert(!thisThread->nmpMinPly); // Recursive verification is not allowed
|
||||
@@ -868,7 +868,7 @@ namespace {
|
||||
}
|
||||
}
|
||||
|
||||
probCutBeta = beta + 176 - 49 * improving;
|
||||
probCutBeta = beta + 183 - 49 * improving;
|
||||
|
||||
// Step 10. ProbCut (~10 Elo)
|
||||
// If we have a good enough capture and a reduced search returns a value
|
||||
@@ -1036,7 +1036,7 @@ moves_loop: // When in check, search starts from here
|
||||
// Futility pruning: parent node (~5 Elo)
|
||||
if ( lmrDepth < 7
|
||||
&& !ss->inCheck
|
||||
&& ss->staticEval + 283 + 170 * lmrDepth <= alpha
|
||||
&& ss->staticEval + 266 + 170 * lmrDepth <= alpha
|
||||
&& (*contHist[0])[movedPiece][to_sq(move)]
|
||||
+ (*contHist[1])[movedPiece][to_sq(move)]
|
||||
+ (*contHist[3])[movedPiece][to_sq(move)]
|
||||
@@ -1044,7 +1044,7 @@ moves_loop: // When in check, search starts from here
|
||||
continue;
|
||||
|
||||
// Prune moves with negative SEE (~20 Elo)
|
||||
if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
|
||||
if (!pos.see_ge(move, Value(-(30 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
|
||||
continue;
|
||||
}
|
||||
else
|
||||
@@ -1055,8 +1055,8 @@ moves_loop: // When in check, search starts from here
|
||||
&& captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] < 0)
|
||||
continue;
|
||||
|
||||
// See based pruning
|
||||
if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
|
||||
// SEE based pruning
|
||||
if (!pos.see_ge(move, Value(-213) * depth)) // (~25 Elo)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -1150,12 +1150,12 @@ moves_loop: // When in check, search starts from here
|
||||
|| moveCountPruning
|
||||
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|
||||
|| cutNode
|
||||
|| thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
|
||||
|| thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024))
|
||||
{
|
||||
Depth r = reduction(improving, depth, moveCount);
|
||||
|
||||
// Decrease reduction if the ttHit running average is large
|
||||
if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
|
||||
if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
|
||||
r--;
|
||||
|
||||
// Increase reduction if other threads are searching this position
|
||||
@@ -1208,10 +1208,10 @@ moves_loop: // When in check, search starts from here
|
||||
- 5287;
|
||||
|
||||
// Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
|
||||
if (ss->statScore >= -106 && (ss-1)->statScore < -104)
|
||||
if (ss->statScore >= -105 && (ss-1)->statScore < -103)
|
||||
r--;
|
||||
|
||||
else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
|
||||
else if ((ss-1)->statScore >= -122 && ss->statScore < -129)
|
||||
r++;
|
||||
|
||||
// Decrease/increase reduction for moves with a good/bad history (~30 Elo)
|
||||
@@ -1225,7 +1225,7 @@ moves_loop: // When in check, search starts from here
|
||||
|
||||
// Unless giving check, this capture is likely bad
|
||||
if ( !givesCheck
|
||||
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
|
||||
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha)
|
||||
r++;
|
||||
}
|
||||
|
||||
@@ -1499,7 +1499,7 @@ moves_loop: // When in check, search starts from here
|
||||
if (PvNode && bestValue > alpha)
|
||||
alpha = bestValue;
|
||||
|
||||
futilityBase = bestValue + 145;
|
||||
futilityBase = bestValue + 155;
|
||||
}
|
||||
|
||||
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
|
||||
|
||||
@@ -204,8 +204,8 @@ enum PieceType {
|
||||
|
||||
enum Piece {
|
||||
NO_PIECE,
|
||||
W_PAWN = 1, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
||||
B_PAWN = 9, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
|
||||
W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
||||
B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
|
||||
PIECE_NB = 16
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user