mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-26 20:16:14 +08:00
Merge remote-tracking branch 'remotes/official/master' into merge
This commit is contained in:
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Code for calculating NNUE evaluation function
|
||||
@@ -40,330 +40,313 @@
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_W_PAWN, PS_B_PAWN },
|
||||
{ PS_W_KNIGHT, PS_B_KNIGHT },
|
||||
{ PS_W_BISHOP, PS_B_BISHOP },
|
||||
{ PS_W_ROOK, PS_B_ROOK },
|
||||
{ PS_W_QUEEN, PS_B_QUEEN },
|
||||
{ PS_W_KING, PS_B_KING },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_B_PAWN, PS_W_PAWN },
|
||||
{ PS_B_KNIGHT, PS_W_KNIGHT },
|
||||
{ PS_B_BISHOP, PS_W_BISHOP },
|
||||
{ PS_B_ROOK, PS_W_ROOK },
|
||||
{ PS_B_QUEEN, PS_W_QUEEN },
|
||||
{ PS_B_KING, PS_W_KING },
|
||||
{ PS_NONE, PS_NONE }
|
||||
};
|
||||
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_W_PAWN, PS_B_PAWN },
|
||||
{ PS_W_KNIGHT, PS_B_KNIGHT },
|
||||
{ PS_W_BISHOP, PS_B_BISHOP },
|
||||
{ PS_W_ROOK, PS_B_ROOK },
|
||||
{ PS_W_QUEEN, PS_B_QUEEN },
|
||||
{ PS_W_KING, PS_B_KING },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_NONE, PS_NONE },
|
||||
{ PS_B_PAWN, PS_W_PAWN },
|
||||
{ PS_B_KNIGHT, PS_W_KNIGHT },
|
||||
{ PS_B_BISHOP, PS_W_BISHOP },
|
||||
{ PS_B_ROOK, PS_W_ROOK },
|
||||
{ PS_B_QUEEN, PS_W_QUEEN },
|
||||
{ PS_B_KING, PS_W_KING },
|
||||
{ PS_NONE, PS_NONE }
|
||||
};
|
||||
|
||||
// Input feature converter
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
// Input feature converter
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
std::string fileName;
|
||||
// Evaluation function file name
|
||||
std::string fileName;
|
||||
|
||||
// Saved evaluation function file name
|
||||
std::string savedfileName = "nn.bin";
|
||||
// Saved evaluation function file name
|
||||
std::string savedfileName = "nn.bin";
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string() {
|
||||
return "Features=" + FeatureTransformer::get_structure_string() +
|
||||
",Network=" + Network::get_structure_string();
|
||||
}
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string() {
|
||||
return "Features=" + FeatureTransformer::get_structure_string() +
|
||||
",Network=" + Network::get_structure_string();
|
||||
}
|
||||
|
||||
std::string get_layers_info() {
|
||||
return
|
||||
FeatureTransformer::get_layers_info()
|
||||
+ '\n' + Network::get_layers_info();
|
||||
}
|
||||
std::string get_layers_info() {
|
||||
return
|
||||
FeatureTransformer::get_layers_info()
|
||||
+ '\n' + Network::get_layers_info();
|
||||
}
|
||||
|
||||
UseNNUEMode useNNUE;
|
||||
std::string eval_file_loaded = "None";
|
||||
UseNNUEMode useNNUE;
|
||||
std::string eval_file_loaded = "None";
|
||||
|
||||
namespace Detail {
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
template <typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || header != T::GetHashValue()) return false;
|
||||
return reference.ReadParameters(stream);
|
||||
}
|
||||
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
if (!stream || header != T::GetHashValue())
|
||||
return false;
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return reference.ReadParameters(stream);
|
||||
}
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
} // namespace Detail
|
||||
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
// Initialize the evaluation function parameters
|
||||
void initialize() {
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
Detail::initialize(feature_transformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
} // namespace Detail
|
||||
// Read network header
|
||||
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
void initialize() {
|
||||
version = read_little_endian<std::uint32_t>(stream);
|
||||
*hash_value = read_little_endian<std::uint32_t>(stream);
|
||||
size = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || version != kVersion) return false;
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
Detail::initialize(feature_transformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
|
||||
// Read network header
|
||||
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
|
||||
version = read_little_endian<std::uint32_t>(stream);
|
||||
*hash_value = read_little_endian<std::uint32_t>(stream);
|
||||
size = read_little_endian<std::uint32_t>(stream);
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
|
||||
if (!stream || version != kVersion)
|
||||
return false;
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!read_header(stream, &hash_value, &architecture)) return false;
|
||||
if (hash_value != kHashValue) return false;
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, *network)) return false;
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
if (!write_header(stream, kHashValue, get_architecture_string()))
|
||||
return false;
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
if (!Detail::WriteParameters(stream, feature_transformer))
|
||||
return false;
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
if (!Detail::WriteParameters(stream, network))
|
||||
return false;
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!read_header(stream, &hash_value, &architecture))
|
||||
return false;
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
|
||||
if (hash_value != kHashValue)
|
||||
return false;
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer))
|
||||
return false;
|
||||
|
||||
if (!Detail::ReadParameters(stream, *network))
|
||||
return false;
|
||||
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
|
||||
if (!write_header(stream, kHashValue, get_architecture_string()))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, feature_transformer))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, network))
|
||||
return false;
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
}
|
||||
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
}
|
||||
initialize();
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
}
|
||||
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
|
||||
{
|
||||
if (mode == "false")
|
||||
return UseNNUEMode::False;
|
||||
else if (mode == "true")
|
||||
return UseNNUEMode::True;
|
||||
else if (mode == "pure")
|
||||
return UseNNUEMode::Pure;
|
||||
|
||||
initialize();
|
||||
return UseNNUEMode::False;
|
||||
}
|
||||
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
}
|
||||
void init() {
|
||||
|
||||
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
|
||||
{
|
||||
if (mode == "false")
|
||||
return UseNNUEMode::False;
|
||||
else if (mode == "true")
|
||||
return UseNNUEMode::True;
|
||||
else if (mode == "pure")
|
||||
return UseNNUEMode::Pure;
|
||||
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
|
||||
|
||||
return UseNNUEMode::False;
|
||||
}
|
||||
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
|
||||
{
|
||||
eval_file_loaded.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
void init() {
|
||||
|
||||
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
|
||||
|
||||
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
|
||||
{
|
||||
eval_file_loaded.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
#if defined(DEFAULT_NNUE_DIRECTORY)
|
||||
#define stringify2(x) #x
|
||||
#define stringify(x) stringify2(x)
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
|
||||
#else
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
|
||||
#endif
|
||||
|
||||
for (std::string directory : dirs)
|
||||
{
|
||||
if (eval_file_loaded != eval_file)
|
||||
{
|
||||
std::ifstream stream(directory + eval_file, std::ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
{
|
||||
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (std::string directory : dirs)
|
||||
{
|
||||
if (eval_file_loaded != eval_file)
|
||||
{
|
||||
std::ifstream stream(directory + eval_file, std::ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
{
|
||||
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef stringify2
|
||||
#undef stringify
|
||||
}
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify_eval_file_loaded() {
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify_eval_file_loaded() {
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg4 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg4 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
/// In training we override eval file so this is useful.
|
||||
void verify_any_net_loaded() {
|
||||
/// In training we override eval file so this is useful.
|
||||
void verify_any_net_loaded() {
|
||||
|
||||
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
@@ -1,21 +1,23 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// header used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
|
||||
#define NNUE_EVALUATE_NNUE_H_INCLUDED
|
||||
|
||||
@@ -25,84 +27,83 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
// header used in NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
enum struct UseNNUEMode
|
||||
{
|
||||
False,
|
||||
True,
|
||||
Pure
|
||||
};
|
||||
enum struct UseNNUEMode
|
||||
{
|
||||
False,
|
||||
True,
|
||||
Pure
|
||||
};
|
||||
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
std_aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
std_aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
|
||||
template <typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
template <typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
|
||||
// Input feature converter
|
||||
extern LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
// Input feature converter
|
||||
extern LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
extern std::string fileName;
|
||||
// Evaluation function file name
|
||||
extern std::string fileName;
|
||||
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
|
||||
extern UseNNUEMode useNNUE;
|
||||
extern UseNNUEMode useNNUE;
|
||||
|
||||
extern std::string eval_file_loaded;
|
||||
extern std::string eval_file_loaded;
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string();
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string get_architecture_string();
|
||||
|
||||
std::string get_layers_info();
|
||||
std::string get_layers_info();
|
||||
|
||||
// read the header
|
||||
bool read_header(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
// read the header
|
||||
bool read_header(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
// write the header
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
void init();
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
void init();
|
||||
|
||||
void verify_eval_file_loaded();
|
||||
void verify_any_net_loaded();
|
||||
void verify_eval_file_loaded();
|
||||
void verify_any_net_loaded();
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// A class template that represents the input feature set of the NNUE evaluation function
|
||||
@@ -22,7 +22,6 @@
|
||||
#define NNUE_FEATURE_SET_H_INCLUDED
|
||||
|
||||
#include "features_common.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Common header of input features of NNUE evaluation function
|
||||
@@ -21,30 +21,29 @@
|
||||
#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
#define NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "../../evaluate.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
class IndexList;
|
||||
class IndexList;
|
||||
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate full evaluation when own king moves
|
||||
kEnemyKingMoved, // calculate full evaluation when opponent king moves
|
||||
kAnyKingMoved, // calculate full evaluation when any king moves
|
||||
kAnyPieceMoved, // always calculate full evaluation
|
||||
};
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate full evaluation when own king moves
|
||||
kEnemyKingMoved, // calculate full evaluation when opponent king moves
|
||||
kAnyKingMoved, // calculate full evaluation when any king moves
|
||||
kAnyPieceMoved, // always calculate full evaluation
|
||||
};
|
||||
|
||||
enum class Side {
|
||||
kFriend, // side to move
|
||||
kEnemy, // opponent
|
||||
};
|
||||
enum class Side {
|
||||
kFriend, // side to move
|
||||
kEnemy, // opponent
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of index list of input features
|
||||
@@ -21,43 +21,43 @@
|
||||
#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
|
||||
#include "position.h"
|
||||
|
||||
#include "nnue/nnue_architecture.h"
|
||||
#include "../../position.h"
|
||||
#include "../nnue_architecture.h"
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T values_[MaxSize] = {};
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
private:
|
||||
T values_[MaxSize];
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
//Type of feature index list
|
||||
class IndexList : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
//Type of feature index list
|
||||
class IndexList
|
||||
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -223,13 +223,13 @@ namespace Eval::NNUE::Layers {
|
||||
return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, kOnes512);
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
return _mm512_madd_epi16(product0, kOnes512);
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -256,14 +256,13 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, kOnes256);
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
return _mm256_madd_epi16(product0, kOnes256);
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -288,10 +287,9 @@ namespace Eval::NNUE::Layers {
|
||||
return _mm_add_epi32(sum0, bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
|
||||
[[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, kOnes128);
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
return _mm_madd_epi16(product0, kOnes128);
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -335,15 +333,6 @@ namespace Eval::NNUE::Layers {
|
||||
const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
|
||||
__m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
|
||||
|
||||
__m512i sum01a = _mm512_setzero_si512();
|
||||
__m512i sum23a = _mm512_setzero_si512();
|
||||
__m512i sum45a = _mm512_setzero_si512();
|
||||
__m512i sum67a = _mm512_setzero_si512();
|
||||
__m512i sum01b = _mm512_setzero_si512();
|
||||
__m512i sum23b = _mm512_setzero_si512();
|
||||
__m512i sum45b = _mm512_setzero_si512();
|
||||
__m512i sum67b = _mm512_setzero_si512();
|
||||
|
||||
const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
|
||||
const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
|
||||
const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
|
||||
@@ -356,6 +345,16 @@ namespace Eval::NNUE::Layers {
|
||||
const __m256i in256 = input_vector256[0];
|
||||
const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum01a = _mm512_setzero_si512();
|
||||
__m512i sum23a = _mm512_setzero_si512();
|
||||
__m512i sum45a = _mm512_setzero_si512();
|
||||
__m512i sum67a = _mm512_setzero_si512();
|
||||
__m512i sum01b = _mm512_setzero_si512();
|
||||
__m512i sum23b = _mm512_setzero_si512();
|
||||
__m512i sum45b = _mm512_setzero_si512();
|
||||
__m512i sum67b = _mm512_setzero_si512();
|
||||
|
||||
m512_add_dpbusd_epi32(sum01a, in, row01a);
|
||||
m512_add_dpbusd_epi32(sum23a, in, row23a);
|
||||
m512_add_dpbusd_epi32(sum45a, in, row45a);
|
||||
@@ -364,6 +363,16 @@ namespace Eval::NNUE::Layers {
|
||||
m512_add_dpbusd_epi32(sum23b, in, row23b);
|
||||
m512_add_dpbusd_epi32(sum45b, in, row45b);
|
||||
m512_add_dpbusd_epi32(sum67b, in, row67b);
|
||||
#else
|
||||
__m512i sum01a = m512_dpbusd_epi32(in, row01a);
|
||||
__m512i sum23a = m512_dpbusd_epi32(in, row23a);
|
||||
__m512i sum45a = m512_dpbusd_epi32(in, row45a);
|
||||
__m512i sum67a = m512_dpbusd_epi32(in, row67a);
|
||||
__m512i sum01b = m512_dpbusd_epi32(in, row01b);
|
||||
__m512i sum23b = m512_dpbusd_epi32(in, row23b);
|
||||
__m512i sum45b = m512_dpbusd_epi32(in, row45b);
|
||||
__m512i sum67b = m512_dpbusd_epi32(in, row67b);
|
||||
#endif
|
||||
|
||||
*outptr = m512_hadd256x16(
|
||||
sum01a, sum23a, sum45a, sum67a,
|
||||
@@ -384,48 +393,80 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
__m512i sum1 = _mm512_setzero_si512();
|
||||
__m512i sum2 = _mm512_setzero_si512();
|
||||
__m512i sum3 = _mm512_setzero_si512();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks512; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
__m512i sum1 = _mm512_setzero_si512();
|
||||
__m512i sum2 = _mm512_setzero_si512();
|
||||
__m512i sum3 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
__m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
|
||||
__m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
|
||||
__m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m512_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m512_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m512_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks256; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -436,30 +477,50 @@ namespace Eval::NNUE::Layers {
|
||||
{
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks512; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m512_hadd(sum0, biases_[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks256; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
@@ -493,24 +554,40 @@ namespace Eval::NNUE::Layers {
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -518,15 +595,25 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
@@ -559,24 +646,24 @@ namespace Eval::NNUE::Layers {
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
__m128i sum0 = _mm_setzero_si128();
|
||||
__m128i sum1 = _mm_setzero_si128();
|
||||
__m128i sum2 = _mm_setzero_si128();
|
||||
__m128i sum3 = _mm_setzero_si128();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; j += 1)
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
{
|
||||
const __m128i in = input_vector[j];
|
||||
|
||||
m128_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m128_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m128_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m128_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
|
||||
}
|
||||
|
||||
*outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
@@ -584,16 +671,12 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
__m128i sum0 = _mm_setzero_si128();
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; j += 1)
|
||||
{
|
||||
const __m128i in = input_vector[j];
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
|
||||
m128_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
}
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
|
||||
|
||||
output[0] = m128_hadd(sum0, biases_[0]);
|
||||
}
|
||||
|
||||
@@ -1,34 +1,35 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Class for difference calculation of NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
|
||||
#define NNUE_ACCUMULATOR_H_INCLUDED
|
||||
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
// Class for difference calculation of NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
bool computed_accumulation;
|
||||
};
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
bool computed_accumulation;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,36 +1,37 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
||||
#define NNUE_ARCHITECTURE_H_INCLUDED
|
||||
|
||||
// Defines the network structure
|
||||
#include "architectures/halfkp_256x2-32-32.h"
|
||||
|
||||
// Input features and network structure used in NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// A class that converts the input features of the NNUE evaluation function
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
#include "features/index_list.h"
|
||||
|
||||
#include <cstring>
|
||||
@@ -31,456 +30,486 @@
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define TILING
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define VECTOR
|
||||
|
||||
#ifdef USE_AVX512
|
||||
typedef __m512i vec_t;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
#ifdef USE_AVX512
|
||||
typedef __m512i vec_t;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_zero _mm_setzero_si64()
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_zero {0}
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_zero _mm_setzero_si64()
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_zero {0}
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
||||
#endif
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
#ifdef VECTOR
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
static constexpr int kLayerIndex = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return RawFeatures::get_name() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
bool update_accumulator_if_possible(const Position& pos) const {
|
||||
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation)
|
||||
return true;
|
||||
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
update_accumulator(pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
if (!update_accumulator_if_possible(pos))
|
||||
refresh_accumulator(pos);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
|
||||
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
|
||||
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i kZero = _mm512_setzero_si512();
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
#endif
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
auto out = reinterpret_cast<__m512i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m512i sum0 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m512i sum1 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm512_add_epi16(sum0, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm512_add_epi16(sum1, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
|
||||
}
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void refresh_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
#else
|
||||
#undef TILING
|
||||
|
||||
#endif
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
#ifdef TILING
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
static constexpr int kLayerIndex = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return RawFeatures::get_name() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
bool update_accumulator_if_possible(const Position& pos) const {
|
||||
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation)
|
||||
return true;
|
||||
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
update_accumulator(pos);
|
||||
return true;
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
if (!update_accumulator_if_possible(pos))
|
||||
refresh_accumulator(pos);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
#endif
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
#endif
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void refresh_accumulator(const Position& pos) const {
|
||||
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
#ifdef TILING
|
||||
for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
vec_t acc[kNumRegs];
|
||||
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < kNumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
#else
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_accumulation = true;
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void update_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
const auto& prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
} else {
|
||||
auto prevAccTile = reinterpret_cast<const vec_t*>(
|
||||
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_load(&prevAccTile[k]);
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void update_accumulator(const Position& pos) const {
|
||||
|
||||
const auto& prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
|
||||
#ifdef TILING
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
vec_t acc[kNumRegs];
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
} else {
|
||||
auto prevAccTile = reinterpret_cast<const vec_t*>(
|
||||
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_load(&prevAccTile[k]);
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
#else
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
|
||||
}
|
||||
}
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
accumulator.computed_accumulation = true;
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
|
||||
}
|
||||
}
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
accumulator.computed_accumulation = true;
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<FeatureTransformer>;
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
};
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<FeatureTransformer>;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif //#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
||||
#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
||||
|
||||
Reference in New Issue
Block a user