diff --git a/AUTHORS b/AUTHORS index f30be4de..b31a36e9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -44,6 +44,7 @@ Daniel Dugovic (ddugovic) Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) +Deshawn Mohan-Smith (GoldenRare) DiscanX Dominik Schlösser (domschl) double-beep @@ -64,7 +65,6 @@ Gary Heckman (gheckman) George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) -Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer @@ -112,6 +112,7 @@ Mark Tenzer (31m059) marotear Matthew Lai (matthewlai) Matthew Sullivan (Matt14916) +Maxim Molchanov (Maxim) Michael An (man) Michael Byrne (MichaelB7) Michael Chaly (Vizvezdenec) diff --git a/src/Makefile b/src/Makefile index 7f00bfff..7f1eaa86 100644 --- a/src/Makefile +++ b/src/Makefile @@ -41,7 +41,7 @@ BINDIR = $(PREFIX)/bin ### Built-in benchmark for pgo-builds PGO_TRAINING_DATA_FILE = pgo_training_data.bin PGOBENCH = ./$(EXE) bench -PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_DATA_FILE) +PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 sfen_format bin output_file_name $(PGO_TRAINING_DATA_FILE) ### Source and object files SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 2abc6ac8..dd204a52 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -84,11 +84,11 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(1400); - constexpr Value LazyThreshold2 = Value(1300); - constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold1 = Value(550); - constexpr Value NNUEThreshold2 = Value(150); + constexpr Value LazyThreshold1 = Value(1565); + constexpr Value LazyThreshold2 = Value(1102); + constexpr Value SpaceThreshold = Value(11551); + constexpr Value NNUEThreshold1 = Value(682); + constexpr Value NNUEThreshold2 = Value(176); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -930,7 +930,7 @@ Value Eval::evaluate(const Position& pos) { { // Scale and shift NNUE for compatibility with search and classical evaluation auto adjusted_NNUE = [&](){ - int mat = pos.non_pawn_material() + PieceValue[MG][PAWN] * pos.count(); + int mat = pos.non_pawn_material() + PawnValueMg * pos.count(); return NNUE::evaluate(pos) * (720 + mat / 32) / 1024 + Tempo; }; @@ -940,16 +940,18 @@ Value Eval::evaluate(const Position& pos) { bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); - v = classical ? Evaluation(pos).value() : adjusted_NNUE(); + bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count() < 2; + + v = classical || strongClassical ? Evaluation(pos).value() : adjusted_NNUE(); // If the classical eval is small and imbalance large, use NNUE nevertheless. // For the case of opposite colored bishops, switch to NNUE eval with // small probability if the classical eval is less than the threshold. - if ( largePsq - && (abs(v) * 16 < NNUEThreshold2 * r50 - || ( pos.opposite_bishops() - && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50 - && !(pos.this_thread()->nodes & 0xB)))) + if ( largePsq && !strongClassical + && ( abs(v) * 16 < NNUEThreshold2 * r50 + || ( pos.opposite_bishops() + && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50 + && !(pos.this_thread()->nodes & 0xB)))) v = adjusted_NNUE(); } diff --git a/src/misc.cpp b/src/misc.cpp index 879f4462..eb68e842 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -585,11 +585,10 @@ namespace CommandLine { string argv0; // path+name of the executable binary, as given by argv[0] string binaryDirectory; // path of the executable directory string workingDirectory; // path of the working directory -string pathSeparator; // Separator for our current OS void init(int argc, char* argv[]) { (void)argc; - string separator; + string pathSeparator; // extract the path+name of the executable binary argv0 = argv[0]; diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index c9a3ddbb..c7bd681f 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // Code for calculating NNUE evaluation function @@ -40,330 +40,313 @@ namespace Eval::NNUE { - const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_NONE }, - { PS_W_PAWN, PS_B_PAWN }, - { PS_W_KNIGHT, PS_B_KNIGHT }, - { PS_W_BISHOP, PS_B_BISHOP }, - { PS_W_ROOK, PS_B_ROOK }, - { PS_W_QUEEN, PS_B_QUEEN }, - { PS_W_KING, PS_B_KING }, - { PS_NONE, PS_NONE }, - { PS_NONE, PS_NONE }, - { PS_B_PAWN, PS_W_PAWN }, - { PS_B_KNIGHT, PS_W_KNIGHT }, - { PS_B_BISHOP, PS_W_BISHOP }, - { PS_B_ROOK, PS_W_ROOK }, - { PS_B_QUEEN, PS_W_QUEEN }, - { PS_B_KING, PS_W_KING }, - { PS_NONE, PS_NONE } - }; + const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_NONE }, + { PS_W_PAWN, PS_B_PAWN }, + { PS_W_KNIGHT, PS_B_KNIGHT }, + { PS_W_BISHOP, PS_B_BISHOP }, + { PS_W_ROOK, PS_B_ROOK }, + { PS_W_QUEEN, PS_B_QUEEN }, + { PS_W_KING, PS_B_KING }, + { PS_NONE, PS_NONE }, + { PS_NONE, PS_NONE }, + { PS_B_PAWN, PS_W_PAWN }, + { PS_B_KNIGHT, PS_W_KNIGHT }, + { PS_B_BISHOP, PS_W_BISHOP }, + { PS_B_ROOK, PS_W_ROOK }, + { PS_B_QUEEN, PS_W_QUEEN }, + { PS_B_KING, PS_W_KING }, + { PS_NONE, PS_NONE } + }; - // Input feature converter - LargePagePtr feature_transformer; + // Input feature converter + LargePagePtr feature_transformer; - // Evaluation function - AlignedPtr network; + // Evaluation function + AlignedPtr network; - // Evaluation function file name - std::string fileName; + // Evaluation function file name + std::string fileName; - // Saved evaluation function file name - std::string savedfileName = "nn.bin"; + // Saved evaluation function file name + std::string savedfileName = "nn.bin"; - // Get a string that represents the structure of the evaluation function - std::string get_architecture_string() { - return "Features=" + FeatureTransformer::get_structure_string() + - ",Network=" + Network::get_structure_string(); - } + // Get a string that represents the structure of the evaluation function + std::string get_architecture_string() { + return "Features=" + FeatureTransformer::get_structure_string() + + ",Network=" + Network::get_structure_string(); + } - std::string get_layers_info() { - return - FeatureTransformer::get_layers_info() - + '\n' + Network::get_layers_info(); - } + std::string get_layers_info() { + return + FeatureTransformer::get_layers_info() + + '\n' + Network::get_layers_info(); + } - UseNNUEMode useNNUE; - std::string eval_file_loaded = "None"; + UseNNUEMode useNNUE; + std::string eval_file_loaded = "None"; - namespace Detail { + namespace Detail { - // Initialize the evaluation function parameters - template - void initialize(AlignedPtr& pointer) { + // Initialize the evaluation function parameters + template + void initialize(AlignedPtr& pointer) { - pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); - } + pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } - template - void initialize(LargePagePtr& pointer) { + template + void initialize(LargePagePtr& pointer) { - static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } - pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); - std::memset(pointer.get(), 0, sizeof(T)); - } + // Read evaluation function parameters + template + bool ReadParameters(std::istream& stream, T& reference) { - // Read evaluation function parameters - template - bool ReadParameters(std::istream& stream, T& reference) { + std::uint32_t header; + header = read_little_endian(stream); + if (!stream || header != T::GetHashValue()) return false; + return reference.ReadParameters(stream); + } - std::uint32_t header; - header = read_little_endian(stream); + // write evaluation function parameters + template + bool WriteParameters(std::ostream& stream, const AlignedPtr& pointer) { + constexpr std::uint32_t header = T::GetHashValue(); - if (!stream || header != T::GetHashValue()) - return false; + stream.write(reinterpret_cast(&header), sizeof(header)); - return reference.ReadParameters(stream); - } + return pointer->WriteParameters(stream); + } - // write evaluation function parameters - template - bool WriteParameters(std::ostream& stream, const AlignedPtr& pointer) { - constexpr std::uint32_t header = T::GetHashValue(); + template + bool WriteParameters(std::ostream& stream, const LargePagePtr& pointer) { + constexpr std::uint32_t header = T::GetHashValue(); - stream.write(reinterpret_cast(&header), sizeof(header)); + stream.write(reinterpret_cast(&header), sizeof(header)); - return pointer->WriteParameters(stream); - } + return pointer->WriteParameters(stream); + } + } // namespace Detail - template - bool WriteParameters(std::ostream& stream, const LargePagePtr& pointer) { - constexpr std::uint32_t header = T::GetHashValue(); + // Initialize the evaluation function parameters + void initialize() { - stream.write(reinterpret_cast(&header), sizeof(header)); + Detail::initialize(feature_transformer); + Detail::initialize(network); + } - return pointer->WriteParameters(stream); - } - } // namespace Detail + // Read network header + bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture) + { + std::uint32_t version, size; - // Initialize the evaluation function parameters - void initialize() { + version = read_little_endian(stream); + *hash_value = read_little_endian(stream); + size = read_little_endian(stream); + if (!stream || version != kVersion) return false; + architecture->resize(size); + stream.read(&(*architecture)[0], size); + return !stream.fail(); + } - Detail::initialize(feature_transformer); - Detail::initialize(network); - } + // write the header + bool write_header(std::ostream& stream, + std::uint32_t hash_value, const std::string& architecture) { - // Read network header - bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture) - { - std::uint32_t version, size; + stream.write(reinterpret_cast(&kVersion), sizeof(kVersion)); + stream.write(reinterpret_cast(&hash_value), sizeof(hash_value)); - version = read_little_endian(stream); - *hash_value = read_little_endian(stream); - size = read_little_endian(stream); + const std::uint32_t size = static_cast(architecture.size()); - if (!stream || version != kVersion) - return false; + stream.write(reinterpret_cast(&size), sizeof(size)); + stream.write(architecture.data(), size); - architecture->resize(size); - stream.read(&(*architecture)[0], size); + return !stream.fail(); + } - return !stream.fail(); - } + // Read network parameters + bool ReadParameters(std::istream& stream) { - // write the header - bool write_header(std::ostream& stream, - std::uint32_t hash_value, const std::string& architecture) { + std::uint32_t hash_value; + std::string architecture; + if (!read_header(stream, &hash_value, &architecture)) return false; + if (hash_value != kHashValue) return false; + if (!Detail::ReadParameters(stream, *feature_transformer)) return false; + if (!Detail::ReadParameters(stream, *network)) return false; + return stream && stream.peek() == std::ios::traits_type::eof(); + } - stream.write(reinterpret_cast(&kVersion), sizeof(kVersion)); - stream.write(reinterpret_cast(&hash_value), sizeof(hash_value)); + // write evaluation function parameters + bool WriteParameters(std::ostream& stream) { - const std::uint32_t size = static_cast(architecture.size()); + if (!write_header(stream, kHashValue, get_architecture_string())) + return false; - stream.write(reinterpret_cast(&size), sizeof(size)); - stream.write(architecture.data(), size); + if (!Detail::WriteParameters(stream, feature_transformer)) + return false; - return !stream.fail(); - } + if (!Detail::WriteParameters(stream, network)) + return false; - // Read network parameters - bool ReadParameters(std::istream& stream) { + return !stream.fail(); +} - std::uint32_t hash_value; - std::string architecture; - if (!read_header(stream, &hash_value, &architecture)) - return false; + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { - if (hash_value != kHashValue) - return false; + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. - if (!Detail::ReadParameters(stream, *feature_transformer)) - return false; - - if (!Detail::ReadParameters(stream, *network)) - return false; - - return stream && stream.peek() == std::ios::traits_type::eof(); - } - // write evaluation function parameters - bool WriteParameters(std::ostream& stream) { - - if (!write_header(stream, kHashValue, get_architecture_string())) - return false; - - if (!Detail::WriteParameters(stream, feature_transformer)) - return false; - - if (!Detail::WriteParameters(stream, network)) - return false; - - return !stream.fail(); - } - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - - // We manually align the arrays on the stack because with gcc < 9.3 - // overaligning stack variables with alignas() doesn't work correctly. - - constexpr uint64_t alignment = kCacheLineSize; + constexpr uint64_t alignment = kCacheLineSize; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformed_features_unaligned[ - FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)]; - char buffer_unaligned[Network::kBufferSize + alignment]; + TransformedFeatureType transformed_features_unaligned[ + FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)]; + char buffer_unaligned[Network::kBufferSize + alignment]; - auto* transformed_features = align_ptr_up(&transformed_features_unaligned[0]); - auto* buffer = align_ptr_up(&buffer_unaligned[0]); + auto* transformed_features = align_ptr_up(&transformed_features_unaligned[0]); + auto* buffer = align_ptr_up(&buffer_unaligned[0]); #else - alignas(alignment) - TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - alignas(alignment) char buffer[Network::kBufferSize]; + alignas(alignment) + TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; + alignas(alignment) char buffer[Network::kBufferSize]; #endif - ASSERT_ALIGNED(transformed_features, alignment); - ASSERT_ALIGNED(buffer, alignment); + ASSERT_ALIGNED(transformed_features, alignment); + ASSERT_ALIGNED(buffer, alignment); - feature_transformer->Transform(pos, transformed_features); + feature_transformer->Transform(pos, transformed_features); + const auto output = network->Propagate(transformed_features, buffer); + return static_cast(output[0] / FV_SCALE); + } - const auto output = network->Propagate(transformed_features, buffer); + // Load eval, from a file stream or a memory stream + bool load_eval(std::string name, std::istream& stream) { - return static_cast(output[0] / FV_SCALE); - } + initialize(); + fileName = name; + return ReadParameters(stream); +} - // Load eval, from a file stream or a memory stream - bool load_eval(std::string name, std::istream& stream) { +static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode) +{ + if (mode == "false") + return UseNNUEMode::False; + else if (mode == "true") + return UseNNUEMode::True; + else if (mode == "pure") + return UseNNUEMode::Pure; - initialize(); + return UseNNUEMode::False; +} - fileName = name; - return ReadParameters(stream); - } +void init() { - static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode) - { - if (mode == "false") - return UseNNUEMode::False; - else if (mode == "true") - return UseNNUEMode::True; - else if (mode == "pure") - return UseNNUEMode::Pure; + useNNUE = nnue_mode_from_option(Options["Use NNUE"]); - return UseNNUEMode::False; - } + if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False) + { + eval_file_loaded.clear(); + return; + } - void init() { - - useNNUE = nnue_mode_from_option(Options["Use NNUE"]); - - if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False) - { - eval_file_loaded.clear(); - return; - } - - std::string eval_file = std::string(Options["EvalFile"]); + std::string eval_file = std::string(Options["EvalFile"]); #if defined(DEFAULT_NNUE_DIRECTORY) #define stringify2(x) #x #define stringify(x) stringify2(x) - std::vector dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + std::vector dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; #else - std::vector dirs = { "" , CommandLine::binaryDirectory }; + std::vector dirs = { "" , CommandLine::binaryDirectory }; #endif - for (std::string directory : dirs) - { - if (eval_file_loaded != eval_file) - { - std::ifstream stream(directory + eval_file, std::ios::binary); - if (load_eval(eval_file, stream)) - { - sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl; - eval_file_loaded = eval_file; - } - else - { - sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl; - eval_file_loaded.clear(); - } - } - } + for (std::string directory : dirs) + { + if (eval_file_loaded != eval_file) + { + std::ifstream stream(directory + eval_file, std::ios::binary); + if (load_eval(eval_file, stream)) + { + sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl; + eval_file_loaded = eval_file; + } + else + { + sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl; + eval_file_loaded.clear(); + } + } + } #undef stringify2 #undef stringify - } +} - /// NNUE::verify() verifies that the last net used was loaded successfully - void verify_eval_file_loaded() { +/// NNUE::verify() verifies that the last net used was loaded successfully +void verify_eval_file_loaded() { - std::string eval_file = std::string(Options["EvalFile"]); + std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) - { - UCI::OptionsMap defaults; - UCI::init(defaults); + if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) + { + UCI::OptionsMap defaults; + UCI::init(defaults); - std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; - std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; - std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]); - std::string msg5 = "The engine will be terminated now."; + std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]); + std::string msg5 = "The engine will be terminated now."; - sync_cout << "info string ERROR: " << msg1 << sync_endl; - sync_cout << "info string ERROR: " << msg2 << sync_endl; - sync_cout << "info string ERROR: " << msg3 << sync_endl; - sync_cout << "info string ERROR: " << msg4 << sync_endl; - sync_cout << "info string ERROR: " << msg5 << sync_endl; + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; - std::exit(EXIT_FAILURE); - } + std::exit(EXIT_FAILURE); + } - if (useNNUE != UseNNUEMode::False) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; - else - sync_cout << "info string classical evaluation enabled" << sync_endl; - } + if (useNNUE != UseNNUEMode::False) + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; + else + sync_cout << "info string classical evaluation enabled" << sync_endl; +} - /// In training we override eval file so this is useful. - void verify_any_net_loaded() { +/// In training we override eval file so this is useful. +void verify_any_net_loaded() { - if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty()) - { - UCI::OptionsMap defaults; - UCI::init(defaults); + if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty()) + { + UCI::OptionsMap defaults; + UCI::init(defaults); - std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; - std::string msg2 = "The option is set to true, but the network file was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; - std::string msg5 = "The engine will be terminated now."; + std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + std::string msg2 = "The option is set to true, but the network file was not loaded successfully."; + std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + std::string msg5 = "The engine will be terminated now."; - sync_cout << "info string ERROR: " << msg1 << sync_endl; - sync_cout << "info string ERROR: " << msg2 << sync_endl; - sync_cout << "info string ERROR: " << msg3 << sync_endl; - sync_cout << "info string ERROR: " << msg5 << sync_endl; + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; - std::exit(EXIT_FAILURE); - } + std::exit(EXIT_FAILURE); + } - if (useNNUE != UseNNUEMode::False) - sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl; - else - sync_cout << "info string classical evaluation enabled" << sync_endl; - } + if (useNNUE != UseNNUEMode::False) + sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl; + else + sync_cout << "info string classical evaluation enabled" << sync_endl; +} } // namespace Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index b33969fc..a1051abe 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -1,21 +1,23 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ +// header used in NNUE evaluation function + #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED #define NNUE_EVALUATE_NNUE_H_INCLUDED @@ -25,84 +27,83 @@ #include -// header used in NNUE evaluation function namespace Eval::NNUE { - enum struct UseNNUEMode - { - False, - True, - Pure - }; + enum struct UseNNUEMode + { + False, + True, + Pure + }; - // Hash value of evaluation function structure - constexpr std::uint32_t kHashValue = - FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); + // Hash value of evaluation function structure + constexpr std::uint32_t kHashValue = + FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); - // Deleter for automating release of memory area - template - struct AlignedDeleter { - void operator()(T* ptr) const { - ptr->~T(); - std_aligned_free(ptr); - } - }; + // Deleter for automating release of memory area + template + struct AlignedDeleter { + void operator()(T* ptr) const { + ptr->~T(); + std_aligned_free(ptr); + } + }; - template - struct LargePageDeleter { - void operator()(T* ptr) const { - ptr->~T(); - aligned_large_pages_free(ptr); - } - }; + template + struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; - template - using AlignedPtr = std::unique_ptr>; + template + using AlignedPtr = std::unique_ptr>; - template - using LargePagePtr = std::unique_ptr>; + template + using LargePagePtr = std::unique_ptr>; - // Input feature converter - extern LargePagePtr feature_transformer; + // Input feature converter + extern LargePagePtr feature_transformer; - // Evaluation function - extern AlignedPtr network; + // Evaluation function + extern AlignedPtr network; - // Evaluation function file name - extern std::string fileName; + // Evaluation function file name + extern std::string fileName; - // Saved evaluation function file name - extern std::string savedfileName; + // Saved evaluation function file name + extern std::string savedfileName; - extern UseNNUEMode useNNUE; + extern UseNNUEMode useNNUE; - extern std::string eval_file_loaded; + extern std::string eval_file_loaded; - // Get a string that represents the structure of the evaluation function - std::string get_architecture_string(); + // Get a string that represents the structure of the evaluation function + std::string get_architecture_string(); - std::string get_layers_info(); + std::string get_layers_info(); - // read the header - bool read_header(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture); + // read the header + bool read_header(std::istream& stream, + std::uint32_t* hash_value, std::string* architecture); - // write the header - bool write_header(std::ostream& stream, - std::uint32_t hash_value, const std::string& architecture); + // write the header + bool write_header(std::ostream& stream, + std::uint32_t hash_value, const std::string& architecture); - // read evaluation function parameters - bool ReadParameters(std::istream& stream); + // read evaluation function parameters + bool ReadParameters(std::istream& stream); - // write evaluation function parameters - bool WriteParameters(std::ostream& stream); + // write evaluation function parameters + bool WriteParameters(std::ostream& stream); - Value evaluate(const Position& pos); - bool load_eval(std::string name, std::istream& stream); - void init(); + Value evaluate(const Position& pos); + bool load_eval(std::string name, std::istream& stream); + void init(); - void verify_eval_file_loaded(); - void verify_any_net_loaded(); + void verify_eval_file_loaded(); + void verify_any_net_loaded(); } // namespace Eval::NNUE diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index 32ef24ef..6602eae7 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // A class template that represents the input feature set of the NNUE evaluation function @@ -22,7 +22,6 @@ #define NNUE_FEATURE_SET_H_INCLUDED #include "features_common.h" - #include namespace Eval::NNUE::Features { diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h index 671ceeb9..656502a3 100644 --- a/src/nnue/features/features_common.h +++ b/src/nnue/features/features_common.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ //Common header of input features of NNUE evaluation function @@ -21,30 +21,29 @@ #ifndef NNUE_FEATURES_COMMON_H_INCLUDED #define NNUE_FEATURES_COMMON_H_INCLUDED -#include "evaluate.h" - -#include "nnue/nnue_common.h" +#include "../../evaluate.h" +#include "../nnue_common.h" namespace Eval::NNUE::Features { - class IndexList; + class IndexList; - template - class FeatureSet; + template + class FeatureSet; - // Trigger to perform full calculations instead of difference only - enum class TriggerEvent { - kNone, // Calculate the difference whenever possible - kFriendKingMoved, // calculate full evaluation when own king moves - kEnemyKingMoved, // calculate full evaluation when opponent king moves - kAnyKingMoved, // calculate full evaluation when any king moves - kAnyPieceMoved, // always calculate full evaluation - }; + // Trigger to perform full calculations instead of difference only + enum class TriggerEvent { + kNone, // Calculate the difference whenever possible + kFriendKingMoved, // calculate full evaluation when own king moves + kEnemyKingMoved, // calculate full evaluation when opponent king moves + kAnyKingMoved, // calculate full evaluation when any king moves + kAnyPieceMoved, // always calculate full evaluation + }; - enum class Side { - kFriend, // side to move - kEnemy, // opponent - }; + enum class Side { + kFriend, // side to move + kEnemy, // opponent + }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h index 6751b26c..d9ad680a 100644 --- a/src/nnue/features/index_list.h +++ b/src/nnue/features/index_list.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // Definition of index list of input features @@ -21,43 +21,43 @@ #ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED #define NNUE_FEATURES_INDEX_LIST_H_INCLUDED -#include "position.h" - -#include "nnue/nnue_architecture.h" +#include "../../position.h" +#include "../nnue_architecture.h" namespace Eval::NNUE::Features { - // Class template used for feature index list - template - class ValueList { + // Class template used for feature index list + template + class ValueList { - public: - std::size_t size() const { return size_; } - void resize(std::size_t size) { size_ = size; } - void push_back(const T& value) { values_[size_++] = value; } - T& operator[](std::size_t index) { return values_[index]; } - T* begin() { return values_; } - T* end() { return values_ + size_; } - const T& operator[](std::size_t index) const { return values_[index]; } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } + public: + std::size_t size() const { return size_; } + void resize(std::size_t size) { size_ = size; } + void push_back(const T& value) { values_[size_++] = value; } + T& operator[](std::size_t index) { return values_[index]; } + T* begin() { return values_; } + T* end() { return values_ + size_; } + const T& operator[](std::size_t index) const { return values_[index]; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } - void swap(ValueList& other) { - const std::size_t max_size = std::max(size_, other.size_); - for (std::size_t i = 0; i < max_size; ++i) { - std::swap(values_[i], other.values_[i]); - } - std::swap(size_, other.size_); - } + void swap(ValueList& other) { + const std::size_t max_size = std::max(size_, other.size_); + for (std::size_t i = 0; i < max_size; ++i) { + std::swap(values_[i], other.values_[i]); + } + std::swap(size_, other.size_); + } - private: - T values_[MaxSize] = {}; - std::size_t size_ = 0; - }; + private: + T values_[MaxSize]; + std::size_t size_ = 0; + }; - //Type of feature index list - class IndexList : public ValueList { - }; + //Type of feature index list + class IndexList + : public ValueList { + }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index d290bc12..1f2ff7c5 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -223,13 +223,13 @@ namespace Eval::NNUE::Layers { return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias); }; - [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) { #if defined (USE_VNNI) + [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) { acc = _mm512_dpbusd_epi32(acc, a, b); #else + [[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i { __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, kOnes512); - acc = _mm512_add_epi32(acc, product0); + return _mm512_madd_epi16(product0, kOnes512); #endif }; @@ -256,14 +256,13 @@ namespace Eval::NNUE::Layers { return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); }; - - [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) { #if defined (USE_VNNI) + [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) { acc = _mm256_dpbusd_epi32(acc, a, b); #else + [[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i { __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, kOnes256); - acc = _mm256_add_epi32(acc, product0); + return _mm256_madd_epi16(product0, kOnes256); #endif }; @@ -288,10 +287,9 @@ namespace Eval::NNUE::Layers { return _mm_add_epi32(sum0, bias); }; - [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) { + [[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i { __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, kOnes128); - acc = _mm_add_epi32(acc, product0); + return _mm_madd_epi16(product0, kOnes128); }; #endif @@ -335,15 +333,6 @@ namespace Eval::NNUE::Layers { const __m512i bias = *reinterpret_cast(&biases_[i]); __m512i* outptr = reinterpret_cast<__m512i*>(&output[i]); - __m512i sum01a = _mm512_setzero_si512(); - __m512i sum23a = _mm512_setzero_si512(); - __m512i sum45a = _mm512_setzero_si512(); - __m512i sum67a = _mm512_setzero_si512(); - __m512i sum01b = _mm512_setzero_si512(); - __m512i sum23b = _mm512_setzero_si512(); - __m512i sum45b = _mm512_setzero_si512(); - __m512i sum67b = _mm512_setzero_si512(); - const auto row01a = *reinterpret_cast(&weights_[offset01a]); const auto row23a = *reinterpret_cast(&weights_[offset23a]); const auto row45a = *reinterpret_cast(&weights_[offset45a]); @@ -356,6 +345,16 @@ namespace Eval::NNUE::Layers { const __m256i in256 = input_vector256[0]; const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1); +#if defined (USE_VNNI) + __m512i sum01a = _mm512_setzero_si512(); + __m512i sum23a = _mm512_setzero_si512(); + __m512i sum45a = _mm512_setzero_si512(); + __m512i sum67a = _mm512_setzero_si512(); + __m512i sum01b = _mm512_setzero_si512(); + __m512i sum23b = _mm512_setzero_si512(); + __m512i sum45b = _mm512_setzero_si512(); + __m512i sum67b = _mm512_setzero_si512(); + m512_add_dpbusd_epi32(sum01a, in, row01a); m512_add_dpbusd_epi32(sum23a, in, row23a); m512_add_dpbusd_epi32(sum45a, in, row45a); @@ -364,6 +363,16 @@ namespace Eval::NNUE::Layers { m512_add_dpbusd_epi32(sum23b, in, row23b); m512_add_dpbusd_epi32(sum45b, in, row45b); m512_add_dpbusd_epi32(sum67b, in, row67b); +#else + __m512i sum01a = m512_dpbusd_epi32(in, row01a); + __m512i sum23a = m512_dpbusd_epi32(in, row23a); + __m512i sum45a = m512_dpbusd_epi32(in, row45a); + __m512i sum67a = m512_dpbusd_epi32(in, row67a); + __m512i sum01b = m512_dpbusd_epi32(in, row01b); + __m512i sum23b = m512_dpbusd_epi32(in, row23b); + __m512i sum45b = m512_dpbusd_epi32(in, row45b); + __m512i sum67b = m512_dpbusd_epi32(in, row67b); +#endif *outptr = m512_hadd256x16( sum01a, sum23a, sum45a, sum67a, @@ -384,48 +393,80 @@ namespace Eval::NNUE::Layers { if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0) { - __m512i sum0 = _mm512_setzero_si512(); - __m512i sum1 = _mm512_setzero_si512(); - __m512i sum2 = _mm512_setzero_si512(); - __m512i sum3 = _mm512_setzero_si512(); - const auto row0 = reinterpret_cast(&weights_[offset0]); const auto row1 = reinterpret_cast(&weights_[offset1]); const auto row2 = reinterpret_cast(&weights_[offset2]); const auto row3 = reinterpret_cast(&weights_[offset3]); - for (IndexType j = 0; j < kNumChunks512; ++j) +#if defined (USE_VNNI) + __m512i sum0 = _mm512_setzero_si512(); + __m512i sum1 = _mm512_setzero_si512(); + __m512i sum2 = _mm512_setzero_si512(); + __m512i sum3 = _mm512_setzero_si512(); + const IndexType kStart = 0; +#else + __m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]); + __m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]); + __m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]); + __m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks512; ++j) { const __m512i in = input_vector512[j]; +#if defined (USE_VNNI) m512_add_dpbusd_epi32(sum0, in, row0[j]); m512_add_dpbusd_epi32(sum1, in, row1[j]); m512_add_dpbusd_epi32(sum2, in, row2[j]); m512_add_dpbusd_epi32(sum3, in, row3[j]); +#else + sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j])); + sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j])); + sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j])); + sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j])); +#endif } *outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias); } else { - __m256i sum0 = _mm256_setzero_si256(); - __m256i sum1 = _mm256_setzero_si256(); - __m256i sum2 = _mm256_setzero_si256(); - __m256i sum3 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights_[offset0]); const auto row1 = reinterpret_cast(&weights_[offset1]); const auto row2 = reinterpret_cast(&weights_[offset2]); const auto row3 = reinterpret_cast(&weights_[offset3]); - for (IndexType j = 0; j < kNumChunks256; ++j) +#if defined (USE_VNNI) + __m256i sum0 = _mm256_setzero_si256(); + __m256i sum1 = _mm256_setzero_si256(); + __m256i sum2 = _mm256_setzero_si256(); + __m256i sum3 = _mm256_setzero_si256(); + const IndexType kStart = 0; +#else + __m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]); + __m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]); + __m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]); + __m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks256; ++j) { const __m256i in = input_vector256[j]; +#if defined (USE_VNNI) m256_add_dpbusd_epi32(sum0, in, row0[j]); m256_add_dpbusd_epi32(sum1, in, row1[j]); m256_add_dpbusd_epi32(sum2, in, row2[j]); m256_add_dpbusd_epi32(sum3, in, row3[j]); +#else + sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j])); + sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j])); + sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j])); + sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j])); +#endif } *outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias); @@ -436,30 +477,50 @@ namespace Eval::NNUE::Layers { { if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0) { - __m512i sum0 = _mm512_setzero_si512(); - const auto row0 = reinterpret_cast(&weights_[0]); - for (IndexType j = 0; j < kNumChunks512; ++j) +#if defined (USE_VNNI) + __m512i sum0 = _mm512_setzero_si512(); + const IndexType kStart = 0; +#else + __m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks512; ++j) { const __m512i in = input_vector512[j]; +#if defined (USE_VNNI) m512_add_dpbusd_epi32(sum0, in, row0[j]); +#else + sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j])); +#endif } output[0] = m512_hadd(sum0, biases_[0]); } else { - __m256i sum0 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights_[0]); - for (IndexType j = 0; j < kNumChunks256; ++j) +#if defined (USE_VNNI) + __m256i sum0 = _mm256_setzero_si256(); + const IndexType kStart = 0; +#else + __m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks256; ++j) { const __m256i in = input_vector256[j]; +#if defined (USE_VNNI) m256_add_dpbusd_epi32(sum0, in, row0[j]); +#else + sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j])); +#endif } output[0] = m256_hadd(sum0, biases_[0]); @@ -493,24 +554,40 @@ namespace Eval::NNUE::Layers { const __m128i bias = *reinterpret_cast(&biases_[i]); __m128i* outptr = reinterpret_cast<__m128i*>(&output[i]); - __m256i sum0 = _mm256_setzero_si256(); - __m256i sum1 = _mm256_setzero_si256(); - __m256i sum2 = _mm256_setzero_si256(); - __m256i sum3 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights_[offset0]); const auto row1 = reinterpret_cast(&weights_[offset1]); const auto row2 = reinterpret_cast(&weights_[offset2]); const auto row3 = reinterpret_cast(&weights_[offset3]); - for (IndexType j = 0; j < kNumChunks; ++j) +#if defined (USE_VNNI) + __m256i sum0 = _mm256_setzero_si256(); + __m256i sum1 = _mm256_setzero_si256(); + __m256i sum2 = _mm256_setzero_si256(); + __m256i sum3 = _mm256_setzero_si256(); + const IndexType kStart = 0; +#else + __m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]); + __m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]); + __m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]); + __m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks; ++j) { const __m256i in = input_vector[j]; +#if defined (USE_VNNI) m256_add_dpbusd_epi32(sum0, in, row0[j]); m256_add_dpbusd_epi32(sum1, in, row1[j]); m256_add_dpbusd_epi32(sum2, in, row2[j]); m256_add_dpbusd_epi32(sum3, in, row3[j]); +#else + sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j])); + sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j])); + sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j])); + sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j])); +#endif } *outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias); @@ -518,15 +595,25 @@ namespace Eval::NNUE::Layers { } else if constexpr (kOutputDimensions == 1) { - __m256i sum0 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights_[0]); - for (IndexType j = 0; j < kNumChunks; ++j) +#if defined (USE_VNNI) + __m256i sum0 = _mm256_setzero_si256(); + const IndexType kStart = 0; +#else + __m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]); + const IndexType kStart = 1; +#endif + + for (IndexType j = kStart; j < kNumChunks; ++j) { const __m256i in = input_vector[j]; - m256_add_dpbusd_epi32(sum0, in, row0[j]); +#if defined (USE_VNNI) + m256_add_dpbusd_epi32(sum0, in, row0[j]); +#else + sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j])); +#endif } output[0] = m256_hadd(sum0, biases_[0]); @@ -559,24 +646,24 @@ namespace Eval::NNUE::Layers { const __m128i bias = *reinterpret_cast(&biases_[i]); __m128i* outptr = reinterpret_cast<__m128i*>(&output[i]); - __m128i sum0 = _mm_setzero_si128(); - __m128i sum1 = _mm_setzero_si128(); - __m128i sum2 = _mm_setzero_si128(); - __m128i sum3 = _mm_setzero_si128(); - const auto row0 = reinterpret_cast(&weights_[offset0]); const auto row1 = reinterpret_cast(&weights_[offset1]); const auto row2 = reinterpret_cast(&weights_[offset2]); const auto row3 = reinterpret_cast(&weights_[offset3]); - for (int j = 0; j < (int)kNumChunks; j += 1) + __m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]); + __m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]); + __m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]); + __m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]); + + for (int j = 1; j < (int)kNumChunks; ++j) { const __m128i in = input_vector[j]; - m128_add_dpbusd_epi32(sum0, in, row0[j]); - m128_add_dpbusd_epi32(sum1, in, row1[j]); - m128_add_dpbusd_epi32(sum2, in, row2[j]); - m128_add_dpbusd_epi32(sum3, in, row3[j]); + sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j])); + sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j])); + sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j])); + sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j])); } *outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias); @@ -584,16 +671,12 @@ namespace Eval::NNUE::Layers { } else if constexpr (kOutputDimensions == 1) { - __m128i sum0 = _mm_setzero_si128(); - const auto row0 = reinterpret_cast(&weights_[0]); - for (int j = 0; j < (int)kNumChunks; j += 1) - { - const __m128i in = input_vector[j]; + __m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]); - m128_add_dpbusd_epi32(sum0, in, row0[j]); - } + for (int j = 1; j < (int)kNumChunks; ++j) + sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j])); output[0] = m128_hadd(sum0, biases_[0]); } diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 8b60dafc..3d2f5bb4 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -1,34 +1,35 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ +// Class for difference calculation of NNUE evaluation function + #ifndef NNUE_ACCUMULATOR_H_INCLUDED #define NNUE_ACCUMULATOR_H_INCLUDED #include "nnue_architecture.h" -// Class for difference calculation of NNUE evaluation function namespace Eval::NNUE { - // Class that holds the result of affine transformation of input features - struct alignas(kCacheLineSize) Accumulator { - std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - bool computed_accumulation; - }; + // Class that holds the result of affine transformation of input features + struct alignas(kCacheLineSize) Accumulator { + std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; + bool computed_accumulation; + }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 2ecb6999..91cdc4bd 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -1,36 +1,37 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ +// Input features and network structure used in NNUE evaluation function + #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED // Defines the network structure #include "architectures/halfkp_256x2-32-32.h" -// Input features and network structure used in NNUE evaluation function namespace Eval::NNUE { - static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); - static_assert(Network::kOutputDimensions == 1, ""); - static_assert(std::is_same::value, ""); + static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); + static_assert(Network::kOutputDimensions == 1, ""); + static_assert(std::is_same::value, ""); - // Trigger for full calculation instead of difference calculation - constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; + // Trigger for full calculation instead of difference calculation + constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 5a52e0cb..8c17c959 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // A class that converts the input features of the NNUE evaluation function @@ -23,7 +23,6 @@ #include "nnue_common.h" #include "nnue_architecture.h" - #include "features/index_list.h" #include @@ -31,456 +30,486 @@ namespace Eval::NNUE { - // If vector instructions are enabled, we update and refresh the - // accumulator tile by tile such that each tile fits in the CPU's - // vector registers. -#define TILING + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define VECTOR -#ifdef USE_AVX512 - typedef __m512i vec_t; -#define vec_load(a) _mm512_load_si512(a) -#define vec_store(a,b) _mm512_store_si512(a,b) -#define vec_add_16(a,b) _mm512_add_epi16(a,b) -#define vec_sub_16(a,b) _mm512_sub_epi16(a,b) -#define vec_zero _mm512_setzero_si512() - static constexpr IndexType kNumRegs = 8; // only 8 are needed + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_load_si512(a) + #define vec_store(a,b) _mm512_store_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + #define vec_zero _mm512_setzero_si512() + static constexpr IndexType kNumRegs = 8; // only 8 are needed -#elif USE_AVX2 - typedef __m256i vec_t; -#define vec_load(a) _mm256_load_si256(a) -#define vec_store(a,b) _mm256_store_si256(a,b) -#define vec_add_16(a,b) _mm256_add_epi16(a,b) -#define vec_sub_16(a,b) _mm256_sub_epi16(a,b) -#define vec_zero _mm256_setzero_si256() - static constexpr IndexType kNumRegs = 16; - -#elif USE_SSE2 - typedef __m128i vec_t; -#define vec_load(a) (*(a)) -#define vec_store(a,b) *(a)=(b) -#define vec_add_16(a,b) _mm_add_epi16(a,b) -#define vec_sub_16(a,b) _mm_sub_epi16(a,b) -#define vec_zero _mm_setzero_si128() - static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; - -#elif USE_MMX - typedef __m64 vec_t; -#define vec_load(a) (*(a)) -#define vec_store(a,b) *(a)=(b) -#define vec_add_16(a,b) _mm_add_pi16(a,b) -#define vec_sub_16(a,b) _mm_sub_pi16(a,b) -#define vec_zero _mm_setzero_si64() - static constexpr IndexType kNumRegs = 8; - -#elif USE_NEON - typedef int16x8_t vec_t; -#define vec_load(a) (*(a)) -#define vec_store(a,b) *(a)=(b) -#define vec_add_16(a,b) vaddq_s16(a,b) -#define vec_sub_16(a,b) vsubq_s16(a,b) -#define vec_zero {0} + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_load_si256(a) + #define vec_store(a,b) _mm256_store_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + #define vec_zero _mm256_setzero_si256() static constexpr IndexType kNumRegs = 16; + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + #define vec_zero _mm_setzero_si128() + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + #define vec_zero _mm_setzero_si64() + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + #define vec_zero {0} + static constexpr IndexType kNumRegs = 16; + + #else + #undef VECTOR + + #endif + + // Input feature converter + class FeatureTransformer { + + private: + // Number of output dimensions for one side + static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + + #ifdef VECTOR + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + + public: + // Output type + using OutputType = TransformedFeatureType; + + // Number of input/output dimensions + static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; + static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; + + // Size of forward propagation buffer + static constexpr std::size_t kBufferSize = + kOutputDimensions * sizeof(OutputType); + + static constexpr int kLayerIndex = 0; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { + + return RawFeatures::kHashValue ^ kOutputDimensions; + } + + static std::string get_name() { + return RawFeatures::get_name() + "[" + + std::to_string(kInputDimensions) + "->" + + std::to_string(kHalfDimensions) + "x2]"; + } + + // a string representing the structure + static std::string get_structure_string() { + return get_name(); + } + + static std::string get_layers_info() { + std::string info = " - "; + info += std::to_string(kLayerIndex); + info += " - "; + info += get_name(); + return info; + } + + // Read network parameters + bool ReadParameters(std::istream& stream) { + + for (std::size_t i = 0; i < kHalfDimensions; ++i) + biases_[i] = read_little_endian(stream); + for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) + weights_[i] = read_little_endian(stream); + return !stream.fail(); + } + + // write parameters + bool WriteParameters(std::ostream& stream) const { + stream.write(reinterpret_cast(biases_), + kHalfDimensions * sizeof(BiasType)); + + stream.write(reinterpret_cast(weights_), + kHalfDimensions * kInputDimensions * sizeof(WeightType)); + + return !stream.fail(); + } + + // Proceed with the difference calculation if possible + bool update_accumulator_if_possible(const Position& pos) const { + + const auto now = pos.state(); + if (now->accumulator.computed_accumulation) + return true; + + const auto prev = now->previous; + if (prev && prev->accumulator.computed_accumulation) { + update_accumulator(pos); + return true; + } + + return false; + } + + // Convert input features + void Transform(const Position& pos, OutputType* output) const { + + if (!update_accumulator_if_possible(pos)) + refresh_accumulator(pos); + + const auto& accumulation = pos.state()->accumulator.accumulation; + + #if defined(USE_AVX512) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2); + static_assert(kHalfDimensions % (kSimdWidth * 2) == 0); + const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7); + const __m512i kZero = _mm512_setzero_si512(); + + #elif defined(USE_AVX2) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + constexpr int kControl = 0b11011000; + const __m256i kZero = _mm256_setzero_si256(); + + #elif defined(USE_SSE2) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + + #ifdef USE_SSE41 + const __m128i kZero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + + #elif defined(USE_NEON) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + const int8x8_t kZero = {0}; + #endif + + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + for (IndexType p = 0; p < 2; ++p) { + const IndexType offset = kHalfDimensions * p; + + #if defined(USE_AVX512) + auto out = reinterpret_cast<__m512i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m512i sum0 = _mm512_load_si512( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + __m512i sum1 = _mm512_load_si512( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm512_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm512_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl, + _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero))); + } + + #elif defined(USE_AVX2) + auto out = reinterpret_cast<__m256i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m256i sum0 = _mm256_load_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + __m256i sum1 = _mm256_load_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm256_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm256_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + _mm256_packs_epi16(sum0, sum1), kZero), kControl)); + } + + #elif defined(USE_SSE2) + auto out = reinterpret_cast<__m128i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i sum0 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m128i sum1 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); + + _mm_store_si128(&out[j], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, kZero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); + } + + #elif defined(USE_MMX) + auto out = reinterpret_cast<__m64*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 sum0 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m64 sum1 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_pi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_pi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); + out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + + #elif defined(USE_NEON) + const auto out = reinterpret_cast(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + int16x8_t sum = reinterpret_cast( + accumulation[perspectives[p]][0])[j]; + + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum = vaddq_s16(sum, reinterpret_cast( + accumulation[perspectives[p]][i])[j]); + } + + out[j] = vmax_s8(vqmovn_s16(sum), kZero); + } + + #else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum += accumulation[static_cast(perspectives[p])][i][j]; + } + + output[offset + j] = static_cast( + std::max(0, std::min(127, sum))); + } + #endif + + } + #if defined(USE_MMX) + _mm_empty(); + #endif + } + + private: + // Calculate cumulative value without using difference calculation + void refresh_accumulator(const Position& pos) const { + + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[kNumRegs]; + #endif + auto& accumulator = pos.state()->accumulator; + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList active_indices[2]; + RawFeatures::append_active_indices(pos, kRefreshTriggers[i], + active_indices); + for (Color perspective : { WHITE, BLACK }) { +#ifdef VECTOR + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + for (IndexType k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } #else -#undef TILING - -#endif - - // Input feature converter - class FeatureTransformer { - - private: - // Number of output dimensions for one side - static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; - -#ifdef TILING - static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; - static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); -#endif - - public: - // Output type - using OutputType = TransformedFeatureType; - - // Number of input/output dimensions - static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; - static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; - - // Size of forward propagation buffer - static constexpr std::size_t kBufferSize = - kOutputDimensions * sizeof(OutputType); - - static constexpr int kLayerIndex = 0; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { - - return RawFeatures::kHashValue ^ kOutputDimensions; - } - - static std::string get_name() { - return RawFeatures::get_name() + "[" + - std::to_string(kInputDimensions) + "->" + - std::to_string(kHalfDimensions) + "x2]"; - } - - // a string representing the structure - static std::string get_structure_string() { - return get_name(); - } - - static std::string get_layers_info() { - std::string info = " - "; - info += std::to_string(kLayerIndex); - info += " - "; - info += get_name(); - return info; - } - - // Read network parameters - bool ReadParameters(std::istream& stream) { - - for (std::size_t i = 0; i < kHalfDimensions; ++i) - biases_[i] = read_little_endian(stream); - - for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) - weights_[i] = read_little_endian(stream); - - return !stream.fail(); - } - - // write parameters - bool WriteParameters(std::ostream& stream) const { - stream.write(reinterpret_cast(biases_), - kHalfDimensions * sizeof(BiasType)); - - stream.write(reinterpret_cast(weights_), - kHalfDimensions * kInputDimensions * sizeof(WeightType)); - - return !stream.fail(); - } - - // Proceed with the difference calculation if possible - bool update_accumulator_if_possible(const Position& pos) const { - - const auto now = pos.state(); - if (now->accumulator.computed_accumulation) - return true; - - const auto prev = now->previous; - if (prev && prev->accumulator.computed_accumulation) { - update_accumulator(pos); - return true; + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); } - return false; - } - - // Convert input features - void Transform(const Position& pos, OutputType* output) const { - - if (!update_accumulator_if_possible(pos)) - refresh_accumulator(pos); - - const auto& accumulation = pos.state()->accumulator.accumulation; - -#if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - constexpr int kControl = 0b11011000; - const __m256i kZero = _mm256_setzero_si256(); - -#elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - -#ifdef USE_SSE41 - const __m128i kZero = _mm_setzero_si128(); -#else - const __m128i k0x80s = _mm_set1_epi8(-128); -#endif - -#elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - -#elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - const int8x8_t kZero = {0}; -#endif - - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - for (IndexType p = 0; p < 2; ++p) { - const IndexType offset = kHalfDimensions * p; - -#if defined(USE_AVX2) - auto out = reinterpret_cast<__m256i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i sum0 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); - __m256i sum1 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm256_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm256_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - - _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( - _mm256_packs_epi16(sum0, sum1), kZero), kControl)); - } - -#elif defined(USE_SSE2) - auto out = reinterpret_cast<__m128i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i sum0 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m128i sum1 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - - const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); - - _mm_store_si128(&out[j], - -#ifdef USE_SSE41 - _mm_max_epi8(packedbytes, kZero) -#else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) -#endif - - ); - } - -#elif defined(USE_MMX) - auto out = reinterpret_cast<__m64*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m64 sum0 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m64 sum1 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm_add_pi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm_add_pi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - - const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); - out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); - } - -#elif defined(USE_NEON) - const auto out = reinterpret_cast(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - int16x8_t sum = reinterpret_cast( - accumulation[perspectives[p]][0])[j]; - - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum = vaddq_s16(sum, reinterpret_cast( - accumulation[perspectives[p]][i])[j]); - } - - out[j] = vmax_s8(vqmovn_s16(sum), kZero); - } - -#else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum += accumulation[static_cast(perspectives[p])][i][j]; - } - - output[offset + j] = static_cast( - std::max(0, std::min(127, sum))); - } -#endif + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; } -#if defined(USE_MMX) - _mm_empty(); #endif + } + } - private: - // Calculate cumulative value without using difference calculation - void refresh_accumulator(const Position& pos) const { - - auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList active_indices[2]; - RawFeatures::append_active_indices(pos, kRefreshTriggers[i], - active_indices); - for (Color perspective : { WHITE, BLACK }) { -#ifdef TILING - for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; - - if (i == 0) { - auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = biasesTile[k]; - } else { - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_zero; - } - - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - for (unsigned k = 0; k < kNumRegs; k++) - vec_store(&accTile[k], acc[k]); - } -#else - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); - } - - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } -#endif - } - - } - #if defined(USE_MMX) - _mm_empty(); + _mm_empty(); #endif - accumulator.computed_accumulation = true; + accumulator.computed_accumulation = true; + } + + // Calculate cumulative value using difference calculation + void update_accumulator(const Position& pos) const { + + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[kNumRegs]; + #endif + const auto& prev_accumulator = pos.state()->previous->accumulator; + auto& accumulator = pos.state()->accumulator; + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2] = { false, false }; + RawFeatures::append_changed_indices(pos, kRefreshTriggers[i], + removed_indices, added_indices, reset); + +#ifdef VECTOR + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + + if (reset[perspective]) { + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); + + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); } - - // Calculate cumulative value using difference calculation - void update_accumulator(const Position& pos) const { - - const auto& prev_accumulator = pos.state()->previous->accumulator; - auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2] = { false, false }; - RawFeatures::append_changed_indices(pos, kRefreshTriggers[i], - removed_indices, added_indices, reset); - -#ifdef TILING - for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { - for (Color perspective : { WHITE, BLACK }) { - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; - - if (reset[perspective]) { - if (i == 0) { - auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = biasesTile[k]; - } else { - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_zero; - } - } else { - auto prevAccTile = reinterpret_cast( - &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_load(&prevAccTile[k]); - - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - } - - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - } - - for (IndexType k = 0; k < kNumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } - } + } #if defined(USE_MMX) - _mm_empty(); + _mm_empty(); #endif #else - for (Color perspective : { WHITE, BLACK }) { + for (Color perspective : { WHITE, BLACK }) { - if (reset[perspective]) { - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); - } - } else { - std::memcpy(accumulator.accumulation[perspective][i], - prev_accumulator.accumulation[perspective][i], - kHalfDimensions * sizeof(BiasType)); - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; + if (reset[perspective]) { + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); + } + } else { + std::memcpy(accumulator.accumulation[perspective][i], + prev_accumulator.accumulation[perspective][i], + kHalfDimensions * sizeof(BiasType)); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; - } - } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } - } - } -#endif - } - accumulator.computed_accumulation = true; + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; + } } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; - using BiasType = std::int16_t; - using WeightType = std::int16_t; + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } + } + } +#endif + } + accumulator.computed_accumulation = true; + } - // Make the learning class a friend - friend class Trainer; + using BiasType = std::int16_t; + using WeightType = std::int16_t; - alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; - alignas(kCacheLineSize) - WeightType weights_[kHalfDimensions * kInputDimensions]; - }; + // Make the learning class a friend + friend class Trainer; + + alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; + alignas(kCacheLineSize) + WeightType weights_[kHalfDimensions * kInputDimensions]; + }; } // namespace Eval::NNUE -#endif //#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/pawns.cpp b/src/pawns.cpp index fde70ba5..68aaf331 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -176,8 +176,8 @@ namespace { score -= Doubled * doubled + WeakLever * more_than_one(lever); - if (blocked && r > RANK_4) - score += BlockedPawn[r-4]; + if (blocked && r >= RANK_5) + score += BlockedPawn[r - RANK_5]; } return score; diff --git a/src/search.cpp b/src/search.cpp index 8d057f42..30384868 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -59,7 +59,7 @@ namespace { // Razor and futility margins constexpr int RazorMargin = 510; Value futility_margin(Depth d, bool improving) { - return Value(223 * (d - improving)); + return Value(234 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -67,7 +67,7 @@ namespace { Depth reduction(bool i, Depth d, int mn) { int r = Reductions[d] * Reductions[mn]; - return (r + 509) / 1024 + (!i && r > 894); + return (r + 503) / 1024 + (!i && r > 915); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -188,7 +188,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((22.0 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i))); + Reductions[i] = int((21.3 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i))); } @@ -404,7 +404,7 @@ void Thread::search() { beta = std::min(prev + delta, VALUE_INFINITE); // Adjust contempt based on root move's previousScore (dynamic contempt) - int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149); + int dct = ct + (113 - ct / 2) * prev / (abs(prev) + 147); contempt = (us == WHITE ? make_score(dct, dct / 2) : -make_score(dct, dct / 2)); @@ -824,7 +824,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 168 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -832,7 +832,7 @@ namespace { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and value - Depth R = (982 + 85 * depth) / 256 + std::min(int(eval - beta) / 192, 3); + Depth R = (1015 + 85 * depth) / 256 + std::min(int(eval - beta) / 191, 3); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -849,7 +849,7 @@ namespace { if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY) nullValue = beta; - if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 13)) + if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14)) return nullValue; assert(!thisThread->nmpMinPly); // Recursive verification is not allowed @@ -868,7 +868,7 @@ namespace { } } - probCutBeta = beta + 176 - 49 * improving; + probCutBeta = beta + 183 - 49 * improving; // Step 10. ProbCut (~10 Elo) // If we have a good enough capture and a reduced search returns a value @@ -1036,7 +1036,7 @@ moves_loop: // When in check, search starts from here // Futility pruning: parent node (~5 Elo) if ( lmrDepth < 7 && !ss->inCheck - && ss->staticEval + 283 + 170 * lmrDepth <= alpha + && ss->staticEval + 266 + 170 * lmrDepth <= alpha && (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] @@ -1044,7 +1044,7 @@ moves_loop: // When in check, search starts from here continue; // Prune moves with negative SEE (~20 Elo) - if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth))) + if (!pos.see_ge(move, Value(-(30 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth))) continue; } else @@ -1055,8 +1055,8 @@ moves_loop: // When in check, search starts from here && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] < 0) continue; - // See based pruning - if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo) + // SEE based pruning + if (!pos.see_ge(move, Value(-213) * depth)) // (~25 Elo) continue; } } @@ -1150,12 +1150,12 @@ moves_loop: // When in check, search starts from here || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || cutNode - || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024)) + || thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024)) { Depth r = reduction(improving, depth, moveCount); // Decrease reduction if the ttHit running average is large - if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) + if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; // Increase reduction if other threads are searching this position @@ -1208,10 +1208,10 @@ moves_loop: // When in check, search starts from here - 5287; // Decrease/increase reduction by comparing opponent's stat score (~10 Elo) - if (ss->statScore >= -106 && (ss-1)->statScore < -104) + if (ss->statScore >= -105 && (ss-1)->statScore < -103) r--; - else if ((ss-1)->statScore >= -119 && ss->statScore < -140) + else if ((ss-1)->statScore >= -122 && ss->statScore < -129) r++; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) @@ -1225,7 +1225,7 @@ moves_loop: // When in check, search starts from here // Unless giving check, this capture is likely bad if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha) r++; } @@ -1499,7 +1499,7 @@ moves_loop: // When in check, search starts from here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 145; + futilityBase = bestValue + 155; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, diff --git a/src/types.h b/src/types.h index f55a20f7..4918e8ff 100644 --- a/src/types.h +++ b/src/types.h @@ -204,8 +204,8 @@ enum PieceType { enum Piece { NO_PIECE, - W_PAWN = 1, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN = 9, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, + W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, PIECE_NB = 16 };