Merge pull request #254 from noobpwnftw/merge

Merge
This commit is contained in:
nodchip
2020-11-28 09:03:38 +09:00
committed by GitHub
16 changed files with 1086 additions and 988 deletions

View File

@@ -44,6 +44,7 @@ Daniel Dugovic (ddugovic)
Dariusz Orzechowski (dorzechowski)
David Zar
Daylen Yang (daylen)
Deshawn Mohan-Smith (GoldenRare)
DiscanX
Dominik Schlösser (domschl)
double-beep
@@ -64,7 +65,6 @@ Gary Heckman (gheckman)
George Sobala (gsobala)
gguliash
Gian-Carlo Pascutto (gcp)
Deshawn Mohan-Smith (GoldenRare)
Gontran Lemaire (gonlem)
Goodkov Vasiliy Aleksandrovich (goodkov)
Gregor Cramer
@@ -112,6 +112,7 @@ Mark Tenzer (31m059)
marotear
Matthew Lai (matthewlai)
Matthew Sullivan (Matt14916)
Maxim Molchanov (Maxim)
Michael An (man)
Michael Byrne (MichaelB7)
Michael Chaly (Vizvezdenec)

View File

@@ -41,7 +41,7 @@ BINDIR = $(PREFIX)/bin
### Built-in benchmark for pgo-builds
PGO_TRAINING_DATA_FILE = pgo_training_data.bin
PGOBENCH = ./$(EXE) bench
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_DATA_FILE)
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 sfen_format bin output_file_name $(PGO_TRAINING_DATA_FILE)
### Source and object files
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \

View File

@@ -84,11 +84,11 @@ using namespace Trace;
namespace {
// Threshold for lazy and space evaluation
constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222);
constexpr Value NNUEThreshold1 = Value(550);
constexpr Value NNUEThreshold2 = Value(150);
constexpr Value LazyThreshold1 = Value(1565);
constexpr Value LazyThreshold2 = Value(1102);
constexpr Value SpaceThreshold = Value(11551);
constexpr Value NNUEThreshold1 = Value(682);
constexpr Value NNUEThreshold2 = Value(176);
// KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -930,7 +930,7 @@ Value Eval::evaluate(const Position& pos) {
{
// Scale and shift NNUE for compatibility with search and classical evaluation
auto adjusted_NNUE = [&](){
int mat = pos.non_pawn_material() + PieceValue[MG][PAWN] * pos.count<PAWN>();
int mat = pos.non_pawn_material() + PawnValueMg * pos.count<PAWN>();
return NNUE::evaluate(pos) * (720 + mat / 32) / 1024 + Tempo;
};
@@ -940,16 +940,18 @@ Value Eval::evaluate(const Position& pos) {
bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
v = classical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
// If the classical eval is small and imbalance large, use NNUE nevertheless.
// For the case of opposite colored bishops, switch to NNUE eval with
// small probability if the classical eval is less than the threshold.
if ( largePsq
&& (abs(v) * 16 < NNUEThreshold2 * r50
|| ( pos.opposite_bishops()
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
&& !(pos.this_thread()->nodes & 0xB))))
if ( largePsq && !strongClassical
&& ( abs(v) * 16 < NNUEThreshold2 * r50
|| ( pos.opposite_bishops()
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
&& !(pos.this_thread()->nodes & 0xB))))
v = adjusted_NNUE();
}

View File

@@ -585,11 +585,10 @@ namespace CommandLine {
string argv0; // path+name of the executable binary, as given by argv[0]
string binaryDirectory; // path of the executable directory
string workingDirectory; // path of the working directory
string pathSeparator; // Separator for our current OS
void init(int argc, char* argv[]) {
(void)argc;
string separator;
string pathSeparator;
// extract the path+name of the executable binary
argv0 = argv[0];

View File

@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Code for calculating NNUE evaluation function
@@ -40,330 +40,313 @@
namespace Eval::NNUE {
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
{ PS_W_PAWN, PS_B_PAWN },
{ PS_W_KNIGHT, PS_B_KNIGHT },
{ PS_W_BISHOP, PS_B_BISHOP },
{ PS_W_ROOK, PS_B_ROOK },
{ PS_W_QUEEN, PS_B_QUEEN },
{ PS_W_KING, PS_B_KING },
{ PS_NONE, PS_NONE },
{ PS_NONE, PS_NONE },
{ PS_B_PAWN, PS_W_PAWN },
{ PS_B_KNIGHT, PS_W_KNIGHT },
{ PS_B_BISHOP, PS_W_BISHOP },
{ PS_B_ROOK, PS_W_ROOK },
{ PS_B_QUEEN, PS_W_QUEEN },
{ PS_B_KING, PS_W_KING },
{ PS_NONE, PS_NONE }
};
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
{ PS_W_PAWN, PS_B_PAWN },
{ PS_W_KNIGHT, PS_B_KNIGHT },
{ PS_W_BISHOP, PS_B_BISHOP },
{ PS_W_ROOK, PS_B_ROOK },
{ PS_W_QUEEN, PS_B_QUEEN },
{ PS_W_KING, PS_B_KING },
{ PS_NONE, PS_NONE },
{ PS_NONE, PS_NONE },
{ PS_B_PAWN, PS_W_PAWN },
{ PS_B_KNIGHT, PS_W_KNIGHT },
{ PS_B_BISHOP, PS_W_BISHOP },
{ PS_B_ROOK, PS_W_ROOK },
{ PS_B_QUEEN, PS_W_QUEEN },
{ PS_B_KING, PS_W_KING },
{ PS_NONE, PS_NONE }
};
// Input feature converter
LargePagePtr<FeatureTransformer> feature_transformer;
// Input feature converter
LargePagePtr<FeatureTransformer> feature_transformer;
// Evaluation function
AlignedPtr<Network> network;
// Evaluation function
AlignedPtr<Network> network;
// Evaluation function file name
std::string fileName;
// Evaluation function file name
std::string fileName;
// Saved evaluation function file name
std::string savedfileName = "nn.bin";
// Saved evaluation function file name
std::string savedfileName = "nn.bin";
// Get a string that represents the structure of the evaluation function
std::string get_architecture_string() {
return "Features=" + FeatureTransformer::get_structure_string() +
",Network=" + Network::get_structure_string();
}
// Get a string that represents the structure of the evaluation function
std::string get_architecture_string() {
return "Features=" + FeatureTransformer::get_structure_string() +
",Network=" + Network::get_structure_string();
}
std::string get_layers_info() {
return
FeatureTransformer::get_layers_info()
+ '\n' + Network::get_layers_info();
}
std::string get_layers_info() {
return
FeatureTransformer::get_layers_info()
+ '\n' + Network::get_layers_info();
}
UseNNUEMode useNNUE;
std::string eval_file_loaded = "None";
UseNNUEMode useNNUE;
std::string eval_file_loaded = "None";
namespace Detail {
namespace Detail {
// Initialize the evaluation function parameters
template <typename T>
void initialize(AlignedPtr<T>& pointer) {
// Initialize the evaluation function parameters
template <typename T>
void initialize(AlignedPtr<T>& pointer) {
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
template <typename T>
void initialize(LargePagePtr<T>& pointer) {
template <typename T>
void initialize(LargePagePtr<T>& pointer) {
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
// Read evaluation function parameters
template <typename T>
bool ReadParameters(std::istream& stream, T& reference) {
// Read evaluation function parameters
template <typename T>
bool ReadParameters(std::istream& stream, T& reference) {
std::uint32_t header;
header = read_little_endian<std::uint32_t>(stream);
if (!stream || header != T::GetHashValue()) return false;
return reference.ReadParameters(stream);
}
std::uint32_t header;
header = read_little_endian<std::uint32_t>(stream);
// write evaluation function parameters
template <typename T>
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
if (!stream || header != T::GetHashValue())
return false;
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return reference.ReadParameters(stream);
}
return pointer->WriteParameters(stream);
}
// write evaluation function parameters
template <typename T>
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
template <typename T>
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return pointer->WriteParameters(stream);
}
return pointer->WriteParameters(stream);
}
} // namespace Detail
template <typename T>
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
// Initialize the evaluation function parameters
void initialize() {
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
Detail::initialize(feature_transformer);
Detail::initialize(network);
}
return pointer->WriteParameters(stream);
}
} // namespace Detail
// Read network header
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
{
std::uint32_t version, size;
// Initialize the evaluation function parameters
void initialize() {
version = read_little_endian<std::uint32_t>(stream);
*hash_value = read_little_endian<std::uint32_t>(stream);
size = read_little_endian<std::uint32_t>(stream);
if (!stream || version != kVersion) return false;
architecture->resize(size);
stream.read(&(*architecture)[0], size);
return !stream.fail();
}
Detail::initialize(feature_transformer);
Detail::initialize(network);
}
// write the header
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture) {
// Read network header
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
{
std::uint32_t version, size;
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
version = read_little_endian<std::uint32_t>(stream);
*hash_value = read_little_endian<std::uint32_t>(stream);
size = read_little_endian<std::uint32_t>(stream);
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
if (!stream || version != kVersion)
return false;
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
stream.write(architecture.data(), size);
architecture->resize(size);
stream.read(&(*architecture)[0], size);
return !stream.fail();
}
return !stream.fail();
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
// write the header
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture) {
std::uint32_t hash_value;
std::string architecture;
if (!read_header(stream, &hash_value, &architecture)) return false;
if (hash_value != kHashValue) return false;
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
if (!Detail::ReadParameters(stream, *network)) return false;
return stream && stream.peek() == std::ios::traits_type::eof();
}
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
// write evaluation function parameters
bool WriteParameters(std::ostream& stream) {
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
if (!write_header(stream, kHashValue, get_architecture_string()))
return false;
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
stream.write(architecture.data(), size);
if (!Detail::WriteParameters(stream, feature_transformer))
return false;
return !stream.fail();
}
if (!Detail::WriteParameters(stream, network))
return false;
// Read network parameters
bool ReadParameters(std::istream& stream) {
return !stream.fail();
}
std::uint32_t hash_value;
std::string architecture;
if (!read_header(stream, &hash_value, &architecture))
return false;
// Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) {
if (hash_value != kHashValue)
return false;
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
if (!Detail::ReadParameters(stream, *feature_transformer))
return false;
if (!Detail::ReadParameters(stream, *network))
return false;
return stream && stream.peek() == std::ios::traits_type::eof();
}
// write evaluation function parameters
bool WriteParameters(std::ostream& stream) {
if (!write_header(stream, kHashValue, get_architecture_string()))
return false;
if (!Detail::WriteParameters(stream, feature_transformer))
return false;
if (!Detail::WriteParameters(stream, network))
return false;
return !stream.fail();
}
// Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = kCacheLineSize;
constexpr uint64_t alignment = kCacheLineSize;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformed_features_unaligned[
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
char buffer_unaligned[Network::kBufferSize + alignment];
TransformedFeatureType transformed_features_unaligned[
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
char buffer_unaligned[Network::kBufferSize + alignment];
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
#else
alignas(alignment)
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
alignas(alignment) char buffer[Network::kBufferSize];
alignas(alignment)
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
alignas(alignment) char buffer[Network::kBufferSize];
#endif
ASSERT_ALIGNED(transformed_features, alignment);
ASSERT_ALIGNED(buffer, alignment);
ASSERT_ALIGNED(transformed_features, alignment);
ASSERT_ALIGNED(buffer, alignment);
feature_transformer->Transform(pos, transformed_features);
feature_transformer->Transform(pos, transformed_features);
const auto output = network->Propagate(transformed_features, buffer);
return static_cast<Value>(output[0] / FV_SCALE);
}
const auto output = network->Propagate(transformed_features, buffer);
// Load eval, from a file stream or a memory stream
bool load_eval(std::string name, std::istream& stream) {
return static_cast<Value>(output[0] / FV_SCALE);
}
initialize();
fileName = name;
return ReadParameters(stream);
}
// Load eval, from a file stream or a memory stream
bool load_eval(std::string name, std::istream& stream) {
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
{
if (mode == "false")
return UseNNUEMode::False;
else if (mode == "true")
return UseNNUEMode::True;
else if (mode == "pure")
return UseNNUEMode::Pure;
initialize();
return UseNNUEMode::False;
}
fileName = name;
return ReadParameters(stream);
}
void init() {
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
{
if (mode == "false")
return UseNNUEMode::False;
else if (mode == "true")
return UseNNUEMode::True;
else if (mode == "pure")
return UseNNUEMode::Pure;
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
return UseNNUEMode::False;
}
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
{
eval_file_loaded.clear();
return;
}
void init() {
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
{
eval_file_loaded.clear();
return;
}
std::string eval_file = std::string(Options["EvalFile"]);
std::string eval_file = std::string(Options["EvalFile"]);
#if defined(DEFAULT_NNUE_DIRECTORY)
#define stringify2(x) #x
#define stringify(x) stringify2(x)
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
#else
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
#endif
for (std::string directory : dirs)
{
if (eval_file_loaded != eval_file)
{
std::ifstream stream(directory + eval_file, std::ios::binary);
if (load_eval(eval_file, stream))
{
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
eval_file_loaded = eval_file;
}
else
{
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
eval_file_loaded.clear();
}
}
}
for (std::string directory : dirs)
{
if (eval_file_loaded != eval_file)
{
std::ifstream stream(directory + eval_file, std::ios::binary);
if (load_eval(eval_file, stream))
{
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
eval_file_loaded = eval_file;
}
else
{
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
eval_file_loaded.clear();
}
}
}
#undef stringify2
#undef stringify
}
}
/// NNUE::verify() verifies that the last net used was loaded successfully
void verify_eval_file_loaded() {
/// NNUE::verify() verifies that the last net used was loaded successfully
void verify_eval_file_loaded() {
std::string eval_file = std::string(Options["EvalFile"]);
std::string eval_file = std::string(Options["EvalFile"]);
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
{
UCI::OptionsMap defaults;
UCI::init(defaults);
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
{
UCI::OptionsMap defaults;
UCI::init(defaults);
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
std::string msg5 = "The engine will be terminated now.";
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
std::string msg5 = "The engine will be terminated now.";
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg4 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg4 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
std::exit(EXIT_FAILURE);
}
std::exit(EXIT_FAILURE);
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
/// In training we override eval file so this is useful.
void verify_any_net_loaded() {
/// In training we override eval file so this is useful.
void verify_any_net_loaded() {
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
{
UCI::OptionsMap defaults;
UCI::init(defaults);
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
{
UCI::OptionsMap defaults;
UCI::init(defaults);
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg5 = "The engine will be terminated now.";
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg5 = "The engine will be terminated now.";
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
std::exit(EXIT_FAILURE);
}
std::exit(EXIT_FAILURE);
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
} // namespace Eval::NNUE

View File

@@ -1,21 +1,23 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// header used in NNUE evaluation function
#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
#define NNUE_EVALUATE_NNUE_H_INCLUDED
@@ -25,84 +27,83 @@
#include <memory>
// header used in NNUE evaluation function
namespace Eval::NNUE {
enum struct UseNNUEMode
{
False,
True,
Pure
};
enum struct UseNNUEMode
{
False,
True,
Pure
};
// Hash value of evaluation function structure
constexpr std::uint32_t kHashValue =
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
// Hash value of evaluation function structure
constexpr std::uint32_t kHashValue =
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
// Deleter for automating release of memory area
template <typename T>
struct AlignedDeleter {
void operator()(T* ptr) const {
ptr->~T();
std_aligned_free(ptr);
}
};
// Deleter for automating release of memory area
template <typename T>
struct AlignedDeleter {
void operator()(T* ptr) const {
ptr->~T();
std_aligned_free(ptr);
}
};
template <typename T>
struct LargePageDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_large_pages_free(ptr);
}
};
template <typename T>
struct LargePageDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_large_pages_free(ptr);
}
};
template <typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
template <typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
template <typename T>
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
template <typename T>
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
// Input feature converter
extern LargePagePtr<FeatureTransformer> feature_transformer;
// Input feature converter
extern LargePagePtr<FeatureTransformer> feature_transformer;
// Evaluation function
extern AlignedPtr<Network> network;
// Evaluation function
extern AlignedPtr<Network> network;
// Evaluation function file name
extern std::string fileName;
// Evaluation function file name
extern std::string fileName;
// Saved evaluation function file name
extern std::string savedfileName;
// Saved evaluation function file name
extern std::string savedfileName;
extern UseNNUEMode useNNUE;
extern UseNNUEMode useNNUE;
extern std::string eval_file_loaded;
extern std::string eval_file_loaded;
// Get a string that represents the structure of the evaluation function
std::string get_architecture_string();
// Get a string that represents the structure of the evaluation function
std::string get_architecture_string();
std::string get_layers_info();
std::string get_layers_info();
// read the header
bool read_header(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture);
// read the header
bool read_header(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture);
// write the header
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture);
// write the header
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture);
// read evaluation function parameters
bool ReadParameters(std::istream& stream);
// read evaluation function parameters
bool ReadParameters(std::istream& stream);
// write evaluation function parameters
bool WriteParameters(std::ostream& stream);
// write evaluation function parameters
bool WriteParameters(std::ostream& stream);
Value evaluate(const Position& pos);
bool load_eval(std::string name, std::istream& stream);
void init();
Value evaluate(const Position& pos);
bool load_eval(std::string name, std::istream& stream);
void init();
void verify_eval_file_loaded();
void verify_any_net_loaded();
void verify_eval_file_loaded();
void verify_any_net_loaded();
} // namespace Eval::NNUE

View File

@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// A class template that represents the input feature set of the NNUE evaluation function
@@ -22,7 +22,6 @@
#define NNUE_FEATURE_SET_H_INCLUDED
#include "features_common.h"
#include <array>
namespace Eval::NNUE::Features {

View File

@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Common header of input features of NNUE evaluation function
@@ -21,30 +21,29 @@
#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
#define NNUE_FEATURES_COMMON_H_INCLUDED
#include "evaluate.h"
#include "nnue/nnue_common.h"
#include "../../evaluate.h"
#include "../nnue_common.h"
namespace Eval::NNUE::Features {
class IndexList;
class IndexList;
template <typename... FeatureTypes>
class FeatureSet;
template <typename... FeatureTypes>
class FeatureSet;
// Trigger to perform full calculations instead of difference only
enum class TriggerEvent {
kNone, // Calculate the difference whenever possible
kFriendKingMoved, // calculate full evaluation when own king moves
kEnemyKingMoved, // calculate full evaluation when opponent king moves
kAnyKingMoved, // calculate full evaluation when any king moves
kAnyPieceMoved, // always calculate full evaluation
};
// Trigger to perform full calculations instead of difference only
enum class TriggerEvent {
kNone, // Calculate the difference whenever possible
kFriendKingMoved, // calculate full evaluation when own king moves
kEnemyKingMoved, // calculate full evaluation when opponent king moves
kAnyKingMoved, // calculate full evaluation when any king moves
kAnyPieceMoved, // always calculate full evaluation
};
enum class Side {
kFriend, // side to move
kEnemy, // opponent
};
enum class Side {
kFriend, // side to move
kEnemy, // opponent
};
} // namespace Eval::NNUE::Features

View File

@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of index list of input features
@@ -21,43 +21,43 @@
#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
#include "position.h"
#include "nnue/nnue_architecture.h"
#include "../../position.h"
#include "../nnue_architecture.h"
namespace Eval::NNUE::Features {
// Class template used for feature index list
template <typename T, std::size_t MaxSize>
class ValueList {
// Class template used for feature index list
template <typename T, std::size_t MaxSize>
class ValueList {
public:
std::size_t size() const { return size_; }
void resize(std::size_t size) { size_ = size; }
void push_back(const T& value) { values_[size_++] = value; }
T& operator[](std::size_t index) { return values_[index]; }
T* begin() { return values_; }
T* end() { return values_ + size_; }
const T& operator[](std::size_t index) const { return values_[index]; }
const T* begin() const { return values_; }
const T* end() const { return values_ + size_; }
public:
std::size_t size() const { return size_; }
void resize(std::size_t size) { size_ = size; }
void push_back(const T& value) { values_[size_++] = value; }
T& operator[](std::size_t index) { return values_[index]; }
T* begin() { return values_; }
T* end() { return values_ + size_; }
const T& operator[](std::size_t index) const { return values_[index]; }
const T* begin() const { return values_; }
const T* end() const { return values_ + size_; }
void swap(ValueList& other) {
const std::size_t max_size = std::max(size_, other.size_);
for (std::size_t i = 0; i < max_size; ++i) {
std::swap(values_[i], other.values_[i]);
}
std::swap(size_, other.size_);
}
void swap(ValueList& other) {
const std::size_t max_size = std::max(size_, other.size_);
for (std::size_t i = 0; i < max_size; ++i) {
std::swap(values_[i], other.values_[i]);
}
std::swap(size_, other.size_);
}
private:
T values_[MaxSize] = {};
std::size_t size_ = 0;
};
private:
T values_[MaxSize];
std::size_t size_ = 0;
};
//Type of feature index list
class IndexList : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
};
//Type of feature index list
class IndexList
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
};
} // namespace Eval::NNUE::Features

View File

@@ -223,13 +223,13 @@ namespace Eval::NNUE::Layers {
return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
};
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
#if defined (USE_VNNI)
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
acc = _mm512_dpbusd_epi32(acc, a, b);
#else
[[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
__m512i product0 = _mm512_maddubs_epi16(a, b);
product0 = _mm512_madd_epi16(product0, kOnes512);
acc = _mm512_add_epi32(acc, product0);
return _mm512_madd_epi16(product0, kOnes512);
#endif
};
@@ -256,14 +256,13 @@ namespace Eval::NNUE::Layers {
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
};
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
#if defined (USE_VNNI)
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
acc = _mm256_dpbusd_epi32(acc, a, b);
#else
[[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
__m256i product0 = _mm256_maddubs_epi16(a, b);
product0 = _mm256_madd_epi16(product0, kOnes256);
acc = _mm256_add_epi32(acc, product0);
return _mm256_madd_epi16(product0, kOnes256);
#endif
};
@@ -288,10 +287,9 @@ namespace Eval::NNUE::Layers {
return _mm_add_epi32(sum0, bias);
};
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
[[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
__m128i product0 = _mm_maddubs_epi16(a, b);
product0 = _mm_madd_epi16(product0, kOnes128);
acc = _mm_add_epi32(acc, product0);
return _mm_madd_epi16(product0, kOnes128);
};
#endif
@@ -335,15 +333,6 @@ namespace Eval::NNUE::Layers {
const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
__m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
__m512i sum01a = _mm512_setzero_si512();
__m512i sum23a = _mm512_setzero_si512();
__m512i sum45a = _mm512_setzero_si512();
__m512i sum67a = _mm512_setzero_si512();
__m512i sum01b = _mm512_setzero_si512();
__m512i sum23b = _mm512_setzero_si512();
__m512i sum45b = _mm512_setzero_si512();
__m512i sum67b = _mm512_setzero_si512();
const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
@@ -356,6 +345,16 @@ namespace Eval::NNUE::Layers {
const __m256i in256 = input_vector256[0];
const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
#if defined (USE_VNNI)
__m512i sum01a = _mm512_setzero_si512();
__m512i sum23a = _mm512_setzero_si512();
__m512i sum45a = _mm512_setzero_si512();
__m512i sum67a = _mm512_setzero_si512();
__m512i sum01b = _mm512_setzero_si512();
__m512i sum23b = _mm512_setzero_si512();
__m512i sum45b = _mm512_setzero_si512();
__m512i sum67b = _mm512_setzero_si512();
m512_add_dpbusd_epi32(sum01a, in, row01a);
m512_add_dpbusd_epi32(sum23a, in, row23a);
m512_add_dpbusd_epi32(sum45a, in, row45a);
@@ -364,6 +363,16 @@ namespace Eval::NNUE::Layers {
m512_add_dpbusd_epi32(sum23b, in, row23b);
m512_add_dpbusd_epi32(sum45b, in, row45b);
m512_add_dpbusd_epi32(sum67b, in, row67b);
#else
__m512i sum01a = m512_dpbusd_epi32(in, row01a);
__m512i sum23a = m512_dpbusd_epi32(in, row23a);
__m512i sum45a = m512_dpbusd_epi32(in, row45a);
__m512i sum67a = m512_dpbusd_epi32(in, row67a);
__m512i sum01b = m512_dpbusd_epi32(in, row01b);
__m512i sum23b = m512_dpbusd_epi32(in, row23b);
__m512i sum45b = m512_dpbusd_epi32(in, row45b);
__m512i sum67b = m512_dpbusd_epi32(in, row67b);
#endif
*outptr = m512_hadd256x16(
sum01a, sum23a, sum45a, sum67a,
@@ -384,48 +393,80 @@ namespace Eval::NNUE::Layers {
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
{
__m512i sum0 = _mm512_setzero_si512();
__m512i sum1 = _mm512_setzero_si512();
__m512i sum2 = _mm512_setzero_si512();
__m512i sum3 = _mm512_setzero_si512();
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
for (IndexType j = 0; j < kNumChunks512; ++j)
#if defined (USE_VNNI)
__m512i sum0 = _mm512_setzero_si512();
__m512i sum1 = _mm512_setzero_si512();
__m512i sum2 = _mm512_setzero_si512();
__m512i sum3 = _mm512_setzero_si512();
const IndexType kStart = 0;
#else
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
__m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
__m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
__m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks512; ++j)
{
const __m512i in = input_vector512[j];
#if defined (USE_VNNI)
m512_add_dpbusd_epi32(sum0, in, row0[j]);
m512_add_dpbusd_epi32(sum1, in, row1[j]);
m512_add_dpbusd_epi32(sum2, in, row2[j]);
m512_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
}
else
{
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
for (IndexType j = 0; j < kNumChunks256; ++j)
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
__m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
__m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
__m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks256; ++j)
{
const __m256i in = input_vector256[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
m256_add_dpbusd_epi32(sum1, in, row1[j]);
m256_add_dpbusd_epi32(sum2, in, row2[j]);
m256_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -436,30 +477,50 @@ namespace Eval::NNUE::Layers {
{
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
{
__m512i sum0 = _mm512_setzero_si512();
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
for (IndexType j = 0; j < kNumChunks512; ++j)
#if defined (USE_VNNI)
__m512i sum0 = _mm512_setzero_si512();
const IndexType kStart = 0;
#else
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks512; ++j)
{
const __m512i in = input_vector512[j];
#if defined (USE_VNNI)
m512_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m512_hadd(sum0, biases_[0]);
}
else
{
__m256i sum0 = _mm256_setzero_si256();
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
for (IndexType j = 0; j < kNumChunks256; ++j)
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks256; ++j)
{
const __m256i in = input_vector256[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m256_hadd(sum0, biases_[0]);
@@ -493,24 +554,40 @@ namespace Eval::NNUE::Layers {
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
for (IndexType j = 0; j < kNumChunks; ++j)
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
__m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
__m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
__m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks; ++j)
{
const __m256i in = input_vector[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
m256_add_dpbusd_epi32(sum1, in, row1[j]);
m256_add_dpbusd_epi32(sum2, in, row2[j]);
m256_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -518,15 +595,25 @@ namespace Eval::NNUE::Layers {
}
else if constexpr (kOutputDimensions == 1)
{
__m256i sum0 = _mm256_setzero_si256();
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
for (IndexType j = 0; j < kNumChunks; ++j)
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks; ++j)
{
const __m256i in = input_vector[j];
m256_add_dpbusd_epi32(sum0, in, row0[j]);
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m256_hadd(sum0, biases_[0]);
@@ -559,24 +646,24 @@ namespace Eval::NNUE::Layers {
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
__m128i sum0 = _mm_setzero_si128();
__m128i sum1 = _mm_setzero_si128();
__m128i sum2 = _mm_setzero_si128();
__m128i sum3 = _mm_setzero_si128();
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
for (int j = 0; j < (int)kNumChunks; j += 1)
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
__m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
__m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
__m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
for (int j = 1; j < (int)kNumChunks; ++j)
{
const __m128i in = input_vector[j];
m128_add_dpbusd_epi32(sum0, in, row0[j]);
m128_add_dpbusd_epi32(sum1, in, row1[j]);
m128_add_dpbusd_epi32(sum2, in, row2[j]);
m128_add_dpbusd_epi32(sum3, in, row3[j]);
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
}
*outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -584,16 +671,12 @@ namespace Eval::NNUE::Layers {
}
else if constexpr (kOutputDimensions == 1)
{
__m128i sum0 = _mm_setzero_si128();
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
for (int j = 0; j < (int)kNumChunks; j += 1)
{
const __m128i in = input_vector[j];
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
m128_add_dpbusd_epi32(sum0, in, row0[j]);
}
for (int j = 1; j < (int)kNumChunks; ++j)
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
output[0] = m128_hadd(sum0, biases_[0]);
}

View File

@@ -1,34 +1,35 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Class for difference calculation of NNUE evaluation function
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
#define NNUE_ACCUMULATOR_H_INCLUDED
#include "nnue_architecture.h"
// Class for difference calculation of NNUE evaluation function
namespace Eval::NNUE {
// Class that holds the result of affine transformation of input features
struct alignas(kCacheLineSize) Accumulator {
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
bool computed_accumulation;
};
// Class that holds the result of affine transformation of input features
struct alignas(kCacheLineSize) Accumulator {
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
bool computed_accumulation;
};
} // namespace Eval::NNUE

View File

@@ -1,36 +1,37 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Input features and network structure used in NNUE evaluation function
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
#define NNUE_ARCHITECTURE_H_INCLUDED
// Defines the network structure
#include "architectures/halfkp_256x2-32-32.h"
// Input features and network structure used in NNUE evaluation function
namespace Eval::NNUE {
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
static_assert(Network::kOutputDimensions == 1, "");
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
static_assert(Network::kOutputDimensions == 1, "");
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
// Trigger for full calculation instead of difference calculation
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
// Trigger for full calculation instead of difference calculation
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
} // namespace Eval::NNUE

View File

@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// A class that converts the input features of the NNUE evaluation function
@@ -23,7 +23,6 @@
#include "nnue_common.h"
#include "nnue_architecture.h"
#include "features/index_list.h"
#include <cstring>
@@ -31,456 +30,486 @@
namespace Eval::NNUE {
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
#define TILING
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
#define VECTOR
#ifdef USE_AVX512
typedef __m512i vec_t;
#define vec_load(a) _mm512_load_si512(a)
#define vec_store(a,b) _mm512_store_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
#define vec_zero _mm512_setzero_si512()
static constexpr IndexType kNumRegs = 8; // only 8 are needed
#ifdef USE_AVX512
typedef __m512i vec_t;
#define vec_load(a) _mm512_load_si512(a)
#define vec_store(a,b) _mm512_store_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
#define vec_zero _mm512_setzero_si512()
static constexpr IndexType kNumRegs = 8; // only 8 are needed
#elif USE_AVX2
typedef __m256i vec_t;
#define vec_load(a) _mm256_load_si256(a)
#define vec_store(a,b) _mm256_store_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
#define vec_zero _mm256_setzero_si256()
static constexpr IndexType kNumRegs = 16;
#elif USE_SSE2
typedef __m128i vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
#define vec_zero _mm_setzero_si128()
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
#elif USE_MMX
typedef __m64 vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
#define vec_zero _mm_setzero_si64()
static constexpr IndexType kNumRegs = 8;
#elif USE_NEON
typedef int16x8_t vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
#define vec_sub_16(a,b) vsubq_s16(a,b)
#define vec_zero {0}
#elif USE_AVX2
typedef __m256i vec_t;
#define vec_load(a) _mm256_load_si256(a)
#define vec_store(a,b) _mm256_store_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
#define vec_zero _mm256_setzero_si256()
static constexpr IndexType kNumRegs = 16;
#elif USE_SSE2
typedef __m128i vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
#define vec_zero _mm_setzero_si128()
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
#elif USE_MMX
typedef __m64 vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
#define vec_zero _mm_setzero_si64()
static constexpr IndexType kNumRegs = 8;
#elif USE_NEON
typedef int16x8_t vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
#define vec_sub_16(a,b) vsubq_s16(a,b)
#define vec_zero {0}
static constexpr IndexType kNumRegs = 16;
#else
#undef VECTOR
#endif
// Input feature converter
class FeatureTransformer {
private:
// Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
#ifdef VECTOR
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
#endif
public:
// Output type
using OutputType = TransformedFeatureType;
// Number of input/output dimensions
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
// Size of forward propagation buffer
static constexpr std::size_t kBufferSize =
kOutputDimensions * sizeof(OutputType);
static constexpr int kLayerIndex = 0;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
return RawFeatures::kHashValue ^ kOutputDimensions;
}
static std::string get_name() {
return RawFeatures::get_name() + "[" +
std::to_string(kInputDimensions) + "->" +
std::to_string(kHalfDimensions) + "x2]";
}
// a string representing the structure
static std::string get_structure_string() {
return get_name();
}
static std::string get_layers_info() {
std::string info = " - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
for (std::size_t i = 0; i < kHalfDimensions; ++i)
biases_[i] = read_little_endian<BiasType>(stream);
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool update_accumulator_if_possible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation)
return true;
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
update_accumulator(pos);
return true;
}
return false;
}
// Convert input features
void Transform(const Position& pos, OutputType* output) const {
if (!update_accumulator_if_possible(pos))
refresh_accumulator(pos);
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
const __m512i kZero = _mm512_setzero_si512();
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41
const __m128i kZero = _mm_setzero_si128();
#else
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
#endif
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
for (IndexType p = 0; p < 2; ++p) {
const IndexType offset = kHalfDimensions * p;
#if defined(USE_AVX512)
auto out = reinterpret_cast<__m512i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m512i sum0 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m512i sum1 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm512_add_epi16(sum0, reinterpret_cast<const __m512i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm512_add_epi16(sum1, reinterpret_cast<const __m512i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
}
#elif defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
#elif defined(USE_SSE2)
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
}
#elif defined(USE_MMX)
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
#elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][i])[j]);
}
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
}
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
#endif
}
#if defined(USE_MMX)
_mm_empty();
#endif
}
private:
// Calculate cumulative value without using difference calculation
void refresh_accumulator(const Position& pos) const {
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
vec_t acc[kNumRegs];
#endif
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList active_indices[2];
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
active_indices);
for (Color perspective : { WHITE, BLACK }) {
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (IndexType k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
#undef TILING
#endif
// Input feature converter
class FeatureTransformer {
private:
// Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
#ifdef TILING
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
#endif
public:
// Output type
using OutputType = TransformedFeatureType;
// Number of input/output dimensions
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
// Size of forward propagation buffer
static constexpr std::size_t kBufferSize =
kOutputDimensions * sizeof(OutputType);
static constexpr int kLayerIndex = 0;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
return RawFeatures::kHashValue ^ kOutputDimensions;
}
static std::string get_name() {
return RawFeatures::get_name() + "[" +
std::to_string(kInputDimensions) + "->" +
std::to_string(kHalfDimensions) + "x2]";
}
// a string representing the structure
static std::string get_structure_string() {
return get_name();
}
static std::string get_layers_info() {
std::string info = " - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
for (std::size_t i = 0; i < kHalfDimensions; ++i)
biases_[i] = read_little_endian<BiasType>(stream);
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail();
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool update_accumulator_if_possible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation)
return true;
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
update_accumulator(pos);
return true;
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
return false;
}
// Convert input features
void Transform(const Position& pos, OutputType* output) const {
if (!update_accumulator_if_possible(pos))
refresh_accumulator(pos);
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41
const __m128i kZero = _mm_setzero_si128();
#else
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
#endif
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
for (IndexType p = 0; p < 2; ++p) {
const IndexType offset = kHalfDimensions * p;
#if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
#elif defined(USE_SSE2)
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
}
#elif defined(USE_MMX)
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
#elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][i])[j]);
}
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
}
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
#endif
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#if defined(USE_MMX)
_mm_empty();
#endif
}
}
private:
// Calculate cumulative value without using difference calculation
void refresh_accumulator(const Position& pos) const {
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList active_indices[2];
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
active_indices);
for (Color perspective : { WHITE, BLACK }) {
#ifdef TILING
for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
vec_t acc[kNumRegs];
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (unsigned k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif
}
}
#if defined(USE_MMX)
_mm_empty();
_mm_empty();
#endif
accumulator.computed_accumulation = true;
accumulator.computed_accumulation = true;
}
// Calculate cumulative value using difference calculation
void update_accumulator(const Position& pos) const {
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
vec_t acc[kNumRegs];
#endif
const auto& prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2] = { false, false };
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
for (Color perspective : { WHITE, BLACK }) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
if (reset[perspective]) {
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
} else {
auto prevAccTile = reinterpret_cast<const vec_t*>(
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_load(&prevAccTile[k]);
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < kNumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
// Calculate cumulative value using difference calculation
void update_accumulator(const Position& pos) const {
const auto& prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2] = { false, false };
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
#ifdef TILING
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
for (Color perspective : { WHITE, BLACK }) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
vec_t acc[kNumRegs];
if (reset[perspective]) {
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
} else {
auto prevAccTile = reinterpret_cast<const vec_t*>(
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_load(&prevAccTile[k]);
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < kNumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
}
}
#if defined(USE_MMX)
_mm_empty();
_mm_empty();
#endif
#else
for (Color perspective : { WHITE, BLACK }) {
for (Color perspective : { WHITE, BLACK }) {
if (reset[perspective]) {
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
} else {
std::memcpy(accumulator.accumulation[perspective][i],
prev_accumulator.accumulation[perspective][i],
kHalfDimensions * sizeof(BiasType));
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
if (reset[perspective]) {
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
} else {
std::memcpy(accumulator.accumulation[perspective][i],
prev_accumulator.accumulation[perspective][i],
kHalfDimensions * sizeof(BiasType));
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
}
}
#endif
}
accumulator.computed_accumulation = true;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
using BiasType = std::int16_t;
using WeightType = std::int16_t;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
}
}
#endif
}
accumulator.computed_accumulation = true;
}
// Make the learning class a friend
friend class Trainer<FeatureTransformer>;
using BiasType = std::int16_t;
using WeightType = std::int16_t;
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
alignas(kCacheLineSize)
WeightType weights_[kHalfDimensions * kInputDimensions];
};
// Make the learning class a friend
friend class Trainer<FeatureTransformer>;
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
alignas(kCacheLineSize)
WeightType weights_[kHalfDimensions * kInputDimensions];
};
} // namespace Eval::NNUE
#endif //#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED

View File

@@ -176,8 +176,8 @@ namespace {
score -= Doubled * doubled
+ WeakLever * more_than_one(lever);
if (blocked && r > RANK_4)
score += BlockedPawn[r-4];
if (blocked && r >= RANK_5)
score += BlockedPawn[r - RANK_5];
}
return score;

View File

@@ -59,7 +59,7 @@ namespace {
// Razor and futility margins
constexpr int RazorMargin = 510;
Value futility_margin(Depth d, bool improving) {
return Value(223 * (d - improving));
return Value(234 * (d - improving));
}
// Reductions lookup table, initialized at startup
@@ -67,7 +67,7 @@ namespace {
Depth reduction(bool i, Depth d, int mn) {
int r = Reductions[d] * Reductions[mn];
return (r + 509) / 1024 + (!i && r > 894);
return (r + 503) / 1024 + (!i && r > 915);
}
constexpr int futility_move_count(bool improving, Depth depth) {
@@ -188,7 +188,7 @@ namespace {
void Search::init() {
for (int i = 1; i < MAX_MOVES; ++i)
Reductions[i] = int((22.0 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i)));
Reductions[i] = int((21.3 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i)));
}
@@ -404,7 +404,7 @@ void Thread::search() {
beta = std::min(prev + delta, VALUE_INFINITE);
// Adjust contempt based on root move's previousScore (dynamic contempt)
int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
int dct = ct + (113 - ct / 2) * prev / (abs(prev) + 147);
contempt = (us == WHITE ? make_score(dct, dct / 2)
: -make_score(dct, dct / 2));
@@ -824,7 +824,7 @@ namespace {
&& (ss-1)->statScore < 22977
&& eval >= beta
&& eval >= ss->staticEval
&& ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182
&& ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 168
&& !excludedMove
&& pos.non_pawn_material(us)
&& (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -832,7 +832,7 @@ namespace {
assert(eval - beta >= 0);
// Null move dynamic reduction based on depth and value
Depth R = (982 + 85 * depth) / 256 + std::min(int(eval - beta) / 192, 3);
Depth R = (1015 + 85 * depth) / 256 + std::min(int(eval - beta) / 191, 3);
ss->currentMove = MOVE_NULL;
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -849,7 +849,7 @@ namespace {
if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY)
nullValue = beta;
if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 13))
if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14))
return nullValue;
assert(!thisThread->nmpMinPly); // Recursive verification is not allowed
@@ -868,7 +868,7 @@ namespace {
}
}
probCutBeta = beta + 176 - 49 * improving;
probCutBeta = beta + 183 - 49 * improving;
// Step 10. ProbCut (~10 Elo)
// If we have a good enough capture and a reduced search returns a value
@@ -1036,7 +1036,7 @@ moves_loop: // When in check, search starts from here
// Futility pruning: parent node (~5 Elo)
if ( lmrDepth < 7
&& !ss->inCheck
&& ss->staticEval + 283 + 170 * lmrDepth <= alpha
&& ss->staticEval + 266 + 170 * lmrDepth <= alpha
&& (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)]
@@ -1044,7 +1044,7 @@ moves_loop: // When in check, search starts from here
continue;
// Prune moves with negative SEE (~20 Elo)
if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
if (!pos.see_ge(move, Value(-(30 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
continue;
}
else
@@ -1055,8 +1055,8 @@ moves_loop: // When in check, search starts from here
&& captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] < 0)
continue;
// See based pruning
if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
// SEE based pruning
if (!pos.see_ge(move, Value(-213) * depth)) // (~25 Elo)
continue;
}
}
@@ -1150,12 +1150,12 @@ moves_loop: // When in check, search starts from here
|| moveCountPruning
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|| cutNode
|| thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
|| thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024))
{
Depth r = reduction(improving, depth, moveCount);
// Decrease reduction if the ttHit running average is large
if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
r--;
// Increase reduction if other threads are searching this position
@@ -1208,10 +1208,10 @@ moves_loop: // When in check, search starts from here
- 5287;
// Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
if (ss->statScore >= -106 && (ss-1)->statScore < -104)
if (ss->statScore >= -105 && (ss-1)->statScore < -103)
r--;
else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
else if ((ss-1)->statScore >= -122 && ss->statScore < -129)
r++;
// Decrease/increase reduction for moves with a good/bad history (~30 Elo)
@@ -1225,7 +1225,7 @@ moves_loop: // When in check, search starts from here
// Unless giving check, this capture is likely bad
if ( !givesCheck
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha)
r++;
}
@@ -1499,7 +1499,7 @@ moves_loop: // When in check, search starts from here
if (PvNode && bestValue > alpha)
alpha = bestValue;
futilityBase = bestValue + 145;
futilityBase = bestValue + 155;
}
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,

View File

@@ -204,8 +204,8 @@ enum PieceType {
enum Piece {
NO_PIECE,
W_PAWN = 1, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
B_PAWN = 9, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
PIECE_NB = 16
};