diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 4d8a4b66..0d504468 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -214,13 +214,13 @@ namespace Eval::NNUE { std::string eval_file = std::string(Options["EvalFile"]); - #if defined(DEFAULT_NNUE_DIRECTORY) - #define stringify2(x) #x - #define stringify(x) stringify2(x) +#if defined(DEFAULT_NNUE_DIRECTORY) +#define stringify2(x) #x +#define stringify(x) stringify2(x) std::vector dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; - #else +#else std::vector dirs = { "" , CommandLine::binaryDirectory }; - #endif +#endif for (std::string directory : dirs) if (eval_file_loaded != eval_file) @@ -238,8 +238,8 @@ namespace Eval::NNUE { } } - #undef stringify2 - #undef stringify +#undef stringify2 +#undef stringify } /// NNUE::verify() verifies that the last net used was loaded successfully diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 5335713b..e6ddc7fd 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -1,23 +1,21 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ -// header used in NNUE evaluation function - #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED #define NNUE_EVALUATE_NNUE_H_INCLUDED @@ -25,79 +23,82 @@ #include +// header used in NNUE evaluation function namespace Eval::NNUE { - enum struct UseNNUEMode - { - False, - True, - Pure - }; + enum struct UseNNUEMode + { + False, + True, + Pure + }; - // Hash value of evaluation function structure - constexpr std::uint32_t kHashValue = - FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); + // Hash value of evaluation function structure + constexpr std::uint32_t kHashValue = + FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); - // Deleter for automating release of memory area - template - struct AlignedDeleter { - void operator()(T* ptr) const { - ptr->~T(); - std_aligned_free(ptr); - } - }; + // Deleter for automating release of memory area + template + struct AlignedDeleter { + void operator()(T* ptr) const { + ptr->~T(); + std_aligned_free(ptr); + } + }; - template - struct LargePageDeleter { - void operator()(T* ptr) const { - ptr->~T(); - aligned_large_pages_free(ptr); - } - }; + template + struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; - template - using AlignedPtr = std::unique_ptr>; + template + using AlignedPtr = std::unique_ptr>; - template - using LargePagePtr = std::unique_ptr>; + template + using LargePagePtr = std::unique_ptr>; - // Input feature converter - extern LargePagePtr feature_transformer; + // Input feature converter + extern LargePagePtr feature_transformer; - // Evaluation function - extern AlignedPtr network; + // Evaluation function + extern AlignedPtr network; - // Evaluation function file name - extern std::string fileName; + // Evaluation function file name + extern std::string fileName; - // Saved evaluation function file name - extern std::string savedfileName; + // Saved evaluation function file name + extern std::string savedfileName; - extern UseNNUEMode useNNUE; - extern std::string eval_file_loaded; + extern UseNNUEMode useNNUE; - // Get a string that represents the structure of the evaluation function - std::string GetArchitectureString(); + extern std::string eval_file_loaded; - // read the header - bool ReadHeader(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture); + // Get a string that represents the structure of the evaluation function + std::string GetArchitectureString(); - // write the header - bool WriteHeader(std::ostream& stream, - std::uint32_t hash_value, const std::string& architecture); + // read the header + bool ReadHeader(std::istream& stream, + std::uint32_t* hash_value, std::string* architecture); - // read evaluation function parameters - bool ReadParameters(std::istream& stream); + // write the header + bool WriteHeader(std::ostream& stream, + std::uint32_t hash_value, const std::string& architecture); - // write evaluation function parameters - bool WriteParameters(std::ostream& stream); + // read evaluation function parameters + bool ReadParameters(std::istream& stream); - Value evaluate(const Position& pos); - bool load_eval(std::string name, std::istream& stream); - void init(); - void verify_eval_file_loaded(); - void verify_any_net_loaded(); + // write evaluation function parameters + bool WriteParameters(std::ostream& stream); + + Value evaluate(const Position& pos); + bool load_eval(std::string name, std::istream& stream); + void init(); + + void verify_eval_file_loaded(); + void verify_any_net_loaded(); } // namespace Eval::NNUE diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp index 2d6c6db3..92ecd8d2 100644 --- a/src/nnue/evaluate_nnue_learner.cpp +++ b/src/nnue/evaluate_nnue_learner.cpp @@ -1,18 +1,10 @@ -// Code for learning NNUE evaluation function - -#include +#include #include #include -#include "../learn/learn.h" - -#include "../position.h" -#include "../uci.h" -#include "../misc.h" -#include "../thread_win32_osx.h" - #include "evaluate_nnue.h" #include "evaluate_nnue_learner.h" + #include "trainer/features/factorizer_feature_set.h" #include "trainer/features/factorizer_half_kp.h" #include "trainer/trainer_feature_transformer.h" @@ -21,191 +13,207 @@ #include "trainer/trainer_clipped_relu.h" #include "trainer/trainer_sum.h" +#include "position.h" +#include "uci.h" +#include "misc.h" +#include "thread_win32_osx.h" + +#include "learn/learn.h" + // Learning rate scale double global_learning_rate; +// Code for learning NNUE evaluation function namespace Eval::NNUE { - namespace { + namespace { - // learning data - std::vector examples; + // learning data + std::vector examples; - // Mutex for exclusive control of examples - std::mutex examples_mutex; + // Mutex for exclusive control of examples + std::mutex examples_mutex; - // number of samples in mini-batch - uint64_t batch_size; + // number of samples in mini-batch + uint64_t batch_size; - // random number generator - std::mt19937 rng; + // random number generator + std::mt19937 rng; - // learner - std::shared_ptr> trainer; + // learner + std::shared_ptr> trainer; - // Tell the learner options such as hyperparameters - void SendMessages(std::vector messages) { - for (auto& message : messages) { - trainer->SendMessage(&message); - assert(message.num_receivers > 0); - } - } - - } // namespace - - // Initialize learning - void InitializeTraining(const std::string& seed) { - std::cout << "Initializing NN training for " - << GetArchitectureString() << std::endl; - - assert(feature_transformer); - assert(network); - trainer = Trainer::Create(network.get(), feature_transformer.get()); - rng.seed(PRNG(seed).rand()); - - if (Options["SkipLoadingEval"]) { - trainer->Initialize(rng); - } - } - - // set the number of samples in the mini-batch - void SetBatchSize(uint64_t size) { - assert(size > 0); - batch_size = size; - } - - // Set options such as hyperparameters - void SetOptions(const std::string& options) { - std::vector messages; - for (const auto& option : Split(options, ',')) { - const auto fields = Split(option, '='); - assert(fields.size() == 1 || fields.size() == 2); - if (fields.size() == 1) { - messages.emplace_back(fields[0]); - } else { - messages.emplace_back(fields[0], fields[1]); - } - } - SendMessages(std::move(messages)); - } - - // Reread the evaluation function parameters for learning from the file - void RestoreParameters(const std::string& dir_name) { - const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName); - std::ifstream stream(file_name, std::ios::binary); -#ifndef NDEBUG - bool result = -#endif - ReadParameters(stream); -#ifndef NDEBUG - assert(result); -#endif - - SendMessages({{"reset"}}); - } - - void FinalizeNet() { - SendMessages({{"clear_unobserved_feature_weights"}}); - } - - // Add 1 sample of learning data - void AddExample(Position& pos, Color rootColor, - const Learner::PackedSfenValue& psv, double weight) { - Example example; - if (rootColor == pos.side_to_move()) { - example.sign = 1; - } else { - example.sign = -1; - } - example.psv = psv; - example.weight = weight; - - Features::IndexList active_indices[2]; - for (const auto trigger : kRefreshTriggers) { - RawFeatures::AppendActiveIndices(pos, trigger, active_indices); - } - if (pos.side_to_move() != WHITE) { - active_indices[0].swap(active_indices[1]); - } - for (const auto color : Colors) { - std::vector training_features; - for (const auto base_index : active_indices[color]) { - static_assert(Features::Factorizer::GetDimensions() < - (1 << TrainingFeature::kIndexBits), ""); - Features::Factorizer::AppendTrainingFeatures( - base_index, &training_features); - } - std::sort(training_features.begin(), training_features.end()); - - auto& unique_features = example.training_features[color]; - for (const auto& feature : training_features) { - if (!unique_features.empty() && - feature.GetIndex() == unique_features.back().GetIndex()) { - unique_features.back() += feature; - } else { - unique_features.push_back(feature); + // Tell the learner options such as hyperparameters + void SendMessages(std::vector messages) { + for (auto& message : messages) { + trainer->SendMessage(&message); + assert(message.num_receivers > 0); + } + } + + } // namespace + + // Initialize learning + void InitializeTraining(const std::string& seed) { + std::cout << "Initializing NN training for " + << GetArchitectureString() << std::endl; + + assert(feature_transformer); + assert(network); + trainer = Trainer::Create(network.get(), feature_transformer.get()); + rng.seed(PRNG(seed).rand()); + + if (Options["SkipLoadingEval"]) { + trainer->Initialize(rng); } - } } - std::lock_guard lock(examples_mutex); - examples.push_back(std::move(example)); - } - - // update the evaluation function parameters - void UpdateParameters() { - assert(batch_size > 0); - - const auto learning_rate = static_cast( - global_learning_rate / batch_size); - - std::lock_guard lock(examples_mutex); - std::shuffle(examples.begin(), examples.end(), rng); - while (examples.size() >= batch_size) { - std::vector batch(examples.end() - batch_size, examples.end()); - examples.resize(examples.size() - batch_size); - - const auto network_output = trainer->Propagate(batch); - - std::vector gradients(batch.size()); - for (std::size_t b = 0; b < batch.size(); ++b) { - const auto shallow = static_cast(Round( - batch[b].sign * network_output[b] * kPonanzaConstant)); - const auto& psv = batch[b].psv; - const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv); - gradients[b] = static_cast(gradient * batch[b].weight); - } - - trainer->Backpropagate(gradients.data(), learning_rate); + // set the number of samples in the mini-batch + void SetBatchSize(uint64_t size) { + assert(size > 0); + batch_size = size; } - SendMessages({{"quantize_parameters"}}); - } - // Check if there are any problems with learning - void CheckHealth() { - SendMessages({{"check_health"}}); - } + // Set options such as hyperparameters + void SetOptions(const std::string& options) { + std::vector messages; + for (const auto& option : Split(options, ',')) { + const auto fields = Split(option, '='); + assert(fields.size() == 1 || fields.size() == 2); - // save merit function parameters to a file - void save_eval(std::string dir_name) { - auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name); - std::cout << "save_eval() start. folder = " << eval_dir << std::endl; + if (fields.size() == 1) { + messages.emplace_back(fields[0]); + } else { + messages.emplace_back(fields[0], fields[1]); + } + } - // mkdir() will fail if this folder already exists, but - // Apart from that. If not, I just want you to make it. - // Also, assume that the folders up to EvalSaveDir have been dug. - std::filesystem::create_directories(eval_dir); + SendMessages(std::move(messages)); + } - const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName); - std::ofstream stream(file_name, std::ios::binary); + // Reread the evaluation function parameters for learning from the file + void RestoreParameters(const std::string& dir_name) { + const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName); + std::ifstream stream(file_name, std::ios::binary); #ifndef NDEBUG - bool result = + bool result = #endif - WriteParameters(stream); + ReadParameters(stream); #ifndef NDEBUG - assert(result); + assert(result); #endif - std::cout << "save_eval() finished. folder = " << eval_dir << std::endl; - } + SendMessages({{"reset"}}); + } + + void FinalizeNet() { + SendMessages({{"clear_unobserved_feature_weights"}}); + } + + // Add 1 sample of learning data + void AddExample(Position& pos, Color rootColor, + const Learner::PackedSfenValue& psv, double weight) { + + Example example; + if (rootColor == pos.side_to_move()) { + example.sign = 1; + } else { + example.sign = -1; + } + + example.psv = psv; + example.weight = weight; + + Features::IndexList active_indices[2]; + for (const auto trigger : kRefreshTriggers) { + RawFeatures::AppendActiveIndices(pos, trigger, active_indices); + } + + if (pos.side_to_move() != WHITE) { + active_indices[0].swap(active_indices[1]); + } + + for (const auto color : Colors) { + std::vector training_features; + for (const auto base_index : active_indices[color]) { + static_assert(Features::Factorizer::GetDimensions() < + (1 << TrainingFeature::kIndexBits), ""); + Features::Factorizer::AppendTrainingFeatures( + base_index, &training_features); + } + + std::sort(training_features.begin(), training_features.end()); + + auto& unique_features = example.training_features[color]; + for (const auto& feature : training_features) { + if (!unique_features.empty() && + feature.GetIndex() == unique_features.back().GetIndex()) { + + unique_features.back() += feature; + } else { + unique_features.push_back(feature); + } + } + } + + std::lock_guard lock(examples_mutex); + examples.push_back(std::move(example)); + } + + // update the evaluation function parameters + void UpdateParameters() { + assert(batch_size > 0); + + const auto learning_rate = static_cast( + global_learning_rate / batch_size); + + std::lock_guard lock(examples_mutex); + std::shuffle(examples.begin(), examples.end(), rng); + while (examples.size() >= batch_size) { + std::vector batch(examples.end() - batch_size, examples.end()); + examples.resize(examples.size() - batch_size); + + const auto network_output = trainer->Propagate(batch); + + std::vector gradients(batch.size()); + for (std::size_t b = 0; b < batch.size(); ++b) { + const auto shallow = static_cast(Round( + batch[b].sign * network_output[b] * kPonanzaConstant)); + const auto& psv = batch[b].psv; + const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv); + gradients[b] = static_cast(gradient * batch[b].weight); + } + + trainer->Backpropagate(gradients.data(), learning_rate); + } + SendMessages({{"quantize_parameters"}}); + } + + // Check if there are any problems with learning + void CheckHealth() { + SendMessages({{"check_health"}}); + } + + // save merit function parameters to a file + void save_eval(std::string dir_name) { + auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name); + std::cout << "save_eval() start. folder = " << eval_dir << std::endl; + + // mkdir() will fail if this folder already exists, but + // Apart from that. If not, I just want you to make it. + // Also, assume that the folders up to EvalSaveDir have been dug. + std::filesystem::create_directories(eval_dir); + + const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName); + std::ofstream stream(file_name, std::ios::binary); +#ifndef NDEBUG + bool result = +#endif + WriteParameters(stream); +#ifndef NDEBUG + assert(result); +#endif + + std::cout << "save_eval() finished. folder = " << eval_dir << std::endl; + } } // namespace Eval::NNUE \ No newline at end of file diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h index c41d8d6b..525b286a 100644 --- a/src/nnue/evaluate_nnue_learner.h +++ b/src/nnue/evaluate_nnue_learner.h @@ -1,37 +1,36 @@ -// Interface used for learning NNUE evaluation function - -#ifndef _EVALUATE_NNUE_LEARNER_H_ +#ifndef _EVALUATE_NNUE_LEARNER_H_ #define _EVALUATE_NNUE_LEARNER_H_ -#include "../learn/learn.h" +#include "learn/learn.h" +// Interface used for learning NNUE evaluation function namespace Eval::NNUE { - // Initialize learning - void InitializeTraining(const std::string& seed); + // Initialize learning + void InitializeTraining(const std::string& seed); - // set the number of samples in the mini-batch - void SetBatchSize(uint64_t size); + // set the number of samples in the mini-batch + void SetBatchSize(uint64_t size); - // Set options such as hyperparameters - void SetOptions(const std::string& options); + // Set options such as hyperparameters + void SetOptions(const std::string& options); - // Reread the evaluation function parameters for learning from the file - void RestoreParameters(const std::string& dir_name); + // Reread the evaluation function parameters for learning from the file + void RestoreParameters(const std::string& dir_name); -// Add 1 sample of learning data - void AddExample(Position& pos, Color rootColor, - const Learner::PackedSfenValue& psv, double weight); + // Add 1 sample of learning data + void AddExample(Position& pos, Color rootColor, + const Learner::PackedSfenValue& psv, double weight); - // update the evaluation function parameters - void UpdateParameters(); + // update the evaluation function parameters + void UpdateParameters(); - // Check if there are any problems with learning - void CheckHealth(); + // Check if there are any problems with learning + void CheckHealth(); - void FinalizeNet(); + void FinalizeNet(); - void save_eval(std::string suffix); + void save_eval(std::string suffix); } // namespace Eval::NNUE #endif diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 26370710..8b60dafc 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -1,36 +1,34 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ -// Class for difference calculation of NNUE evaluation function - #ifndef NNUE_ACCUMULATOR_H_INCLUDED #define NNUE_ACCUMULATOR_H_INCLUDED #include "nnue_architecture.h" +// Class for difference calculation of NNUE evaluation function namespace Eval::NNUE { - // Class that holds the result of affine transformation of input features - struct alignas(kCacheLineSize) Accumulator { - std::int16_t - accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - bool computed_accumulation; - }; + // Class that holds the result of affine transformation of input features + struct alignas(kCacheLineSize) Accumulator { + std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; + bool computed_accumulation; + }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 91cdc4bd..2ecb6999 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -1,37 +1,36 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ -// Input features and network structure used in NNUE evaluation function - #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED // Defines the network structure #include "architectures/halfkp_256x2-32-32.h" +// Input features and network structure used in NNUE evaluation function namespace Eval::NNUE { - static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); - static_assert(Network::kOutputDimensions == 1, ""); - static_assert(std::is_same::value, ""); + static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); + static_assert(Network::kOutputDimensions == 1, ""); + static_assert(std::is_same::value, ""); - // Trigger for full calculation instead of difference calculation - constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; + // Trigger for full calculation instead of difference calculation + constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 9975134c..70c7596d 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // Constants used in NNUE evaluation function @@ -21,11 +21,11 @@ #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED +#include "types.h" + #include #include -#include "../types.h" - #if defined(USE_AVX2) #include @@ -70,84 +70,84 @@ namespace Eval::NNUE { - // Version of the evaluation file - constexpr std::uint32_t kVersion = 0x7AF32F17u; + // Version of the evaluation file + constexpr std::uint32_t kVersion = 0x7AF32F17u; - // Constant used in evaluation value calculation - constexpr int FV_SCALE = 16; - constexpr int kWeightScaleBits = 6; + // Constant used in evaluation value calculation + constexpr int FV_SCALE = 16; + constexpr int kWeightScaleBits = 6; - // Size of cache line (in bytes) - constexpr std::size_t kCacheLineSize = 64; + // Size of cache line (in bytes) + constexpr std::size_t kCacheLineSize = 64; - // SIMD width (in bytes) - #if defined(USE_AVX2) - constexpr std::size_t kSimdWidth = 32; + // SIMD width (in bytes) +#if defined(USE_AVX2) + constexpr std::size_t kSimdWidth = 32; - #elif defined(USE_SSE2) - constexpr std::size_t kSimdWidth = 16; +#elif defined(USE_SSE2) + constexpr std::size_t kSimdWidth = 16; - #elif defined(USE_MMX) - constexpr std::size_t kSimdWidth = 8; +#elif defined(USE_MMX) + constexpr std::size_t kSimdWidth = 8; - #elif defined(USE_NEON) - constexpr std::size_t kSimdWidth = 16; - #endif +#elif defined(USE_NEON) + constexpr std::size_t kSimdWidth = 16; +#endif - constexpr std::size_t kMaxSimdWidth = 32; + constexpr std::size_t kMaxSimdWidth = 32; - // unique number for each piece type on each square - enum { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1 - }; + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 + }; - extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; - // Type of input feature after conversion - using TransformedFeatureType = std::uint8_t; - using IndexType = std::uint32_t; + // Type of input feature after conversion + using TransformedFeatureType = std::uint8_t; + using IndexType = std::uint32_t; - // Forward declaration of learning class template - template - class Trainer; + // Forward declaration of learning class template + template + class Trainer; - // Round n up to be a multiple of base - template - constexpr IntType CeilToMultiple(IntType n, IntType base) { - return (n + base - 1) / base * base; - } + // Round n up to be a multiple of base + template + constexpr IntType CeilToMultiple(IntType n, IntType base) { + return (n + base - 1) / base * base; + } - // read_little_endian() is our utility to read an integer (signed or unsigned, any size) - // from a stream in little-endian order. We swap the byte order after the read if - // necessary to return a result with the byte ordering of the compiling machine. - template - inline IntType read_little_endian(std::istream& stream) { + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) + // from a stream in little-endian order. We swap the byte order after the read if + // necessary to return a result with the byte ordering of the compiling machine. + template + inline IntType read_little_endian(std::istream& stream) { - IntType result; - std::uint8_t u[sizeof(IntType)]; - typename std::make_unsigned::type v = 0; + IntType result; + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; - std::memcpy(&result, &v, sizeof(IntType)); - return result; - } + std::memcpy(&result, &v, sizeof(IntType)); + return result; + } } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index c9d8e0d2..2fc24dab 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -1,19 +1,19 @@ /* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ // A class that converts the input features of the NNUE evaluation function @@ -23,435 +23,450 @@ #include "nnue_common.h" #include "nnue_architecture.h" + #include "features/index_list.h" -#include // std::memset() +#include +#include namespace Eval::NNUE { - // If vector instructions are enabled, we update and refresh the - // accumulator tile by tile such that each tile fits in the CPU's - // vector registers. - #define TILING + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. +#define TILING - #ifdef USE_AVX512 - typedef __m512i vec_t; - #define vec_load(a) _mm512_loadA_si512(a) - #define vec_store(a,b) _mm512_storeA_si512(a,b) - #define vec_add_16(a,b) _mm512_add_epi16(a,b) - #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) - #define vec_zero _mm512_setzero_si512() - static constexpr IndexType kNumRegs = 8; // only 8 are needed +#ifdef USE_AVX512 + typedef __m512i vec_t; +#define vec_load(a) _mm512_loadA_si512(a) +#define vec_store(a,b) _mm512_storeA_si512(a,b) +#define vec_add_16(a,b) _mm512_add_epi16(a,b) +#define vec_sub_16(a,b) _mm512_sub_epi16(a,b) +#define vec_zero _mm512_setzero_si512() + static constexpr IndexType kNumRegs = 8; // only 8 are needed - #elif USE_AVX2 - typedef __m256i vec_t; - #define vec_load(a) _mm256_loadA_si256(a) - #define vec_store(a,b) _mm256_storeA_si256(a,b) - #define vec_add_16(a,b) _mm256_add_epi16(a,b) - #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) - #define vec_zero _mm256_setzero_si256() - static constexpr IndexType kNumRegs = 16; +#elif USE_AVX2 + typedef __m256i vec_t; +#define vec_load(a) _mm256_loadA_si256(a) +#define vec_store(a,b) _mm256_storeA_si256(a,b) +#define vec_add_16(a,b) _mm256_add_epi16(a,b) +#define vec_sub_16(a,b) _mm256_sub_epi16(a,b) +#define vec_zero _mm256_setzero_si256() + static constexpr IndexType kNumRegs = 16; - #elif USE_SSE2 - typedef __m128i vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) _mm_add_epi16(a,b) - #define vec_sub_16(a,b) _mm_sub_epi16(a,b) - #define vec_zero _mm_setzero_si128() - static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; +#elif USE_SSE2 + typedef __m128i vec_t; +#define vec_load(a) (*(a)) +#define vec_store(a,b) *(a)=(b) +#define vec_add_16(a,b) _mm_add_epi16(a,b) +#define vec_sub_16(a,b) _mm_sub_epi16(a,b) +#define vec_zero _mm_setzero_si128() + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; - #elif USE_MMX - typedef __m64 vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) _mm_add_pi16(a,b) - #define vec_sub_16(a,b) _mm_sub_pi16(a,b) - #define vec_zero _mm_setzero_si64() - static constexpr IndexType kNumRegs = 8; +#elif USE_MMX + typedef __m64 vec_t; +#define vec_load(a) (*(a)) +#define vec_store(a,b) *(a)=(b) +#define vec_add_16(a,b) _mm_add_pi16(a,b) +#define vec_sub_16(a,b) _mm_sub_pi16(a,b) +#define vec_zero _mm_setzero_si64() + static constexpr IndexType kNumRegs = 8; - #elif USE_NEON - typedef int16x8_t vec_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) vaddq_s16(a,b) - #define vec_sub_16(a,b) vsubq_s16(a,b) - #define vec_zero {0} - static constexpr IndexType kNumRegs = 16; +#elif USE_NEON + typedef int16x8_t vec_t; +#define vec_load(a) (*(a)) +#define vec_store(a,b) *(a)=(b) +#define vec_add_16(a,b) vaddq_s16(a,b) +#define vec_sub_16(a,b) vsubq_s16(a,b) +#define vec_zero {0} + static constexpr IndexType kNumRegs = 16; - #else - #undef TILING +#else +#undef TILING - #endif +#endif - // Input feature converter - class FeatureTransformer { + // Input feature converter + class FeatureTransformer { - private: - // Number of output dimensions for one side - static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + private: + // Number of output dimensions for one side + static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; - #ifdef TILING - static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; - static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); - #endif +#ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); +#endif - public: - // Output type - using OutputType = TransformedFeatureType; + public: + // Output type + using OutputType = TransformedFeatureType; - // Number of input/output dimensions - static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; - static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; + // Number of input/output dimensions + static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; + static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; - // Size of forward propagation buffer - static constexpr std::size_t kBufferSize = - kOutputDimensions * sizeof(OutputType); + // Size of forward propagation buffer + static constexpr std::size_t kBufferSize = + kOutputDimensions * sizeof(OutputType); - // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { - return RawFeatures::kHashValue ^ kOutputDimensions; - } - - // a string representing the structure - static std::string GetStructureString() { - return RawFeatures::GetName() + "[" + - std::to_string(kInputDimensions) + "->" + - std::to_string(kHalfDimensions) + "x2]"; - } - - // Read network parameters - bool ReadParameters(std::istream& stream) { - - for (std::size_t i = 0; i < kHalfDimensions; ++i) - biases_[i] = read_little_endian(stream); - for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) - weights_[i] = read_little_endian(stream); - return !stream.fail(); - } - - // write parameters - bool WriteParameters(std::ostream& stream) const { - stream.write(reinterpret_cast(biases_), - kHalfDimensions * sizeof(BiasType)); - stream.write(reinterpret_cast(weights_), - kHalfDimensions * kInputDimensions * sizeof(WeightType)); - return !stream.fail(); - } - - // Proceed with the difference calculation if possible - bool UpdateAccumulatorIfPossible(const Position& pos) const { - - const auto now = pos.state(); - if (now->accumulator.computed_accumulation) - return true; - - const auto prev = now->previous; - if (prev && prev->accumulator.computed_accumulation) { - UpdateAccumulator(pos); - return true; - } - - return false; - } - - // Convert input features - void Transform(const Position& pos, OutputType* output) const { - - if (!UpdateAccumulatorIfPossible(pos)) - RefreshAccumulator(pos); - - const auto& accumulation = pos.state()->accumulator.accumulation; - - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - constexpr int kControl = 0b11011000; - const __m256i kZero = _mm256_setzero_si256(); - - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - - #ifdef USE_SSE41 - const __m128i kZero = _mm_setzero_si128(); - #else - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - const int8x8_t kZero = {0}; - #endif - - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - for (IndexType p = 0; p < 2; ++p) { - const IndexType offset = kHalfDimensions * p; - - #if defined(USE_AVX2) - auto out = reinterpret_cast<__m256i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i sum0 = _mm256_loadA_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); - __m256i sum1 = _mm256_loadA_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm256_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm256_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( - _mm256_packs_epi16(sum0, sum1), kZero), kControl)); + return RawFeatures::kHashValue ^ kOutputDimensions; } - #elif defined(USE_SSE2) - auto out = reinterpret_cast<__m128i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i sum0 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m128i sum1 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm_add_epi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm_add_epi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); - - _mm_store_si128(&out[j], - - #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, kZero) - #else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) - #endif - - ); + // a string representing the structure + static std::string GetStructureString() { + return RawFeatures::GetName() + "[" + + std::to_string(kInputDimensions) + "->" + + std::to_string(kHalfDimensions) + "x2]"; } - #elif defined(USE_MMX) - auto out = reinterpret_cast<__m64*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m64 sum0 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m64 sum1 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum0 = _mm_add_pi16(sum0, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 0]); - sum1 = _mm_add_pi16(sum1, reinterpret_cast( - accumulation[perspectives[p]][i])[j * 2 + 1]); - } - const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); - out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + // Read network parameters + bool ReadParameters(std::istream& stream) { + + for (std::size_t i = 0; i < kHalfDimensions; ++i) + biases_[i] = read_little_endian(stream); + + for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) + weights_[i] = read_little_endian(stream); + + return !stream.fail(); } - #elif defined(USE_NEON) - const auto out = reinterpret_cast(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - int16x8_t sum = reinterpret_cast( - accumulation[perspectives[p]][0])[j]; - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum = vaddq_s16(sum, reinterpret_cast( - accumulation[perspectives[p]][i])[j]); - } - out[j] = vmax_s8(vqmovn_s16(sum), kZero); + // write parameters + bool WriteParameters(std::ostream& stream) const { + stream.write(reinterpret_cast(biases_), + kHalfDimensions * sizeof(BiasType)); + + stream.write(reinterpret_cast(weights_), + kHalfDimensions * kInputDimensions * sizeof(WeightType)); + + return !stream.fail(); } - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; - for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { - sum += accumulation[static_cast(perspectives[p])][i][j]; - } - output[offset + j] = static_cast( - std::max(0, std::min(127, sum))); - } - #endif + // Proceed with the difference calculation if possible + bool UpdateAccumulatorIfPossible(const Position& pos) const { - } - #if defined(USE_MMX) - _mm_empty(); - #endif - } + const auto now = pos.state(); + if (now->accumulator.computed_accumulation) + return true; - private: - // Calculate cumulative value without using difference calculation - void RefreshAccumulator(const Position& pos) const { - - auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], - active_indices); - for (Color perspective : { WHITE, BLACK }) { - #ifdef TILING - for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; - - if (i == 0) { - auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = biasesTile[k]; - } else { - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_zero; - } - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); + const auto prev = now->previous; + if (prev && prev->accumulator.computed_accumulation) { + UpdateAccumulator(pos); + return true; } - for (unsigned k = 0; k < kNumRegs; k++) - vec_store(&accTile[k], acc[k]); - } - #else - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); - } - - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } - #endif + return false; } - } + // Convert input features + void Transform(const Position& pos, OutputType* output) const { - #if defined(USE_MMX) - _mm_empty(); - #endif + if (!UpdateAccumulatorIfPossible(pos)) + RefreshAccumulator(pos); - accumulator.computed_accumulation = true; - } + const auto& accumulation = pos.state()->accumulator.accumulation; - // Calculate cumulative value using difference calculation - void UpdateAccumulator(const Position& pos) const { +#if defined(USE_AVX2) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + constexpr int kControl = 0b11011000; + const __m256i kZero = _mm256_setzero_si256(); - const auto& prev_accumulator = pos.state()->previous->accumulator; - auto& accumulator = pos.state()->accumulator; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2] = { false, false }; - RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], - removed_indices, added_indices, reset); +#elif defined(USE_SSE2) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - #ifdef TILING - for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { - for (Color perspective : { WHITE, BLACK }) { - auto accTile = reinterpret_cast( - &accumulator.accumulation[perspective][i][j * kTileHeight]); - vec_t acc[kNumRegs]; +#ifdef USE_SSE41 + const __m128i kZero = _mm_setzero_si128(); +#else + const __m128i k0x80s = _mm_set1_epi8(-128); +#endif - if (reset[perspective]) { - if (i == 0) { - auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = biasesTile[k]; - } else { - for (unsigned k = 0; k < kNumRegs; ++k) - acc[k] = vec_zero; - } - } else { - auto prevAccTile = reinterpret_cast( - &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_load(&prevAccTile[k]); +#elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); +#elif defined(USE_NEON) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + const int8x8_t kZero = {0}; +#endif + + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + for (IndexType p = 0; p < 2; ++p) { + const IndexType offset = kHalfDimensions * p; + +#if defined(USE_AVX2) + auto out = reinterpret_cast<__m256i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m256i sum0 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + __m256i sum1 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm256_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm256_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + _mm256_packs_epi16(sum0, sum1), kZero), kControl)); + } + +#elif defined(USE_SSE2) + auto out = reinterpret_cast<__m128i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i sum0 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m128i sum1 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); + + _mm_store_si128(&out[j], + +#ifdef USE_SSE41 + _mm_max_epi8(packedbytes, kZero) +#else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) +#endif + + ); + } + +#elif defined(USE_MMX) + auto out = reinterpret_cast<__m64*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 sum0 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m64 sum1 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_pi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_pi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } + + const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); + out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + +#elif defined(USE_NEON) + const auto out = reinterpret_cast(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + int16x8_t sum = reinterpret_cast( + accumulation[perspectives[p]][0])[j]; + + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum = vaddq_s16(sum, reinterpret_cast( + accumulation[perspectives[p]][i])[j]); + } + + out[j] = vmax_s8(vqmovn_s16(sum), kZero); + } + +#else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum += accumulation[static_cast(perspectives[p])][i][j]; + } + + output[offset + j] = static_cast( + std::max(0, std::min(127, sum))); + } +#endif - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - - for (IndexType k = 0; k < kNumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - } - - for (IndexType k = 0; k < kNumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } +#if defined(USE_MMX) + _mm_empty(); +#endif } - #if defined(USE_MMX) - _mm_empty(); - #endif - #else - for (Color perspective : { WHITE, BLACK }) { + private: + // Calculate cumulative value without using difference calculation + void RefreshAccumulator(const Position& pos) const { + + auto& accumulator = pos.state()->accumulator; + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList active_indices[2]; + RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], + active_indices); + for (Color perspective : { WHITE, BLACK }) { +#ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; + + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } +#else + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); + } + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } +#endif + } - if (reset[perspective]) { - if (i == 0) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memset(accumulator.accumulation[perspective][i], 0, - kHalfDimensions * sizeof(BiasType)); } - } else { - std::memcpy(accumulator.accumulation[perspective][i], - prev_accumulator.accumulation[perspective][i], - kHalfDimensions * sizeof(BiasType)); - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; - } - } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; +#if defined(USE_MMX) + _mm_empty(); +#endif - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } - } + accumulator.computed_accumulation = true; } - #endif - } - accumulator.computed_accumulation = true; - } - using BiasType = std::int16_t; - using WeightType = std::int16_t; + // Calculate cumulative value using difference calculation + void UpdateAccumulator(const Position& pos) const { - // Make the learning class a friend - friend class Trainer; + const auto& prev_accumulator = pos.state()->previous->accumulator; + auto& accumulator = pos.state()->accumulator; + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2] = { false, false }; + RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], + removed_indices, added_indices, reset); - alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; - alignas(kCacheLineSize) - WeightType weights_[kHalfDimensions * kInputDimensions]; - }; +#ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; + + if (reset[perspective]) { + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); + + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } +#if defined(USE_MMX) + _mm_empty(); +#endif + +#else + for (Color perspective : { WHITE, BLACK }) { + + if (reset[perspective]) { + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); + } + } else { + std::memcpy(accumulator.accumulation[perspective][i], + prev_accumulator.accumulation[perspective][i], + kHalfDimensions * sizeof(BiasType)); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } + } + } +#endif + } + accumulator.computed_accumulation = true; + } + + using BiasType = std::int16_t; + using WeightType = std::int16_t; + + // Make the learning class a friend + friend class Trainer; + + alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; + alignas(kCacheLineSize) + WeightType weights_[kHalfDimensions * kInputDimensions]; + }; } // namespace Eval::NNUE -#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#endif //#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp index f6f05c2e..55fa603a 100644 --- a/src/nnue/nnue_test_command.cpp +++ b/src/nnue/nnue_test_command.cpp @@ -1,197 +1,215 @@ -// USI extended command for NNUE evaluation function - -#include "../thread.h" -#include "../uci.h" -#include "evaluate_nnue.h" +#include "evaluate_nnue.h" #include "nnue_test_command.h" +#include "thread.h" +#include "uci.h" + #include #include -#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \ - std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} } - -namespace Eval { - -namespace NNUE { - -namespace { - -// Testing RawFeatures mainly for difference calculation -void TestFeatures(Position& pos) { - const std::uint64_t num_games = 1000; - StateInfo si; - pos.set(StartFEN, false, &si, Threads.main()); - const int MAX_PLY = 256; // test up to 256 hands - - StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps - int ply; // Trouble from the initial phase - - PRNG prng(20171128); - - std::uint64_t num_moves = 0; - std::vector num_updates(kRefreshTriggers.size() + 1); - std::vector num_resets(kRefreshTriggers.size()); - constexpr IndexType kUnknown = -1; - std::vector trigger_map(RawFeatures::kDimensions, kUnknown); - auto make_index_sets = [&](const Position& position) { - std::vector>> index_sets( - kRefreshTriggers.size(), std::vector>(2)); - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(position, kRefreshTriggers[i], - active_indices); - for (const auto perspective : Colors) { - for (const auto index : active_indices[perspective]) { - ASSERT(index < RawFeatures::kDimensions); - ASSERT(index_sets[i][perspective].count(index) == 0); - ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); - index_sets[i][perspective].insert(index); - trigger_map[index] = i; - } - } - } - return index_sets; - }; - auto update_index_sets = [&](const Position& position, auto* index_sets) { - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2] = { false, false }; - RawFeatures::AppendChangedIndices(position, kRefreshTriggers[i], - removed_indices, added_indices, reset); - for (const auto perspective : Colors) { - if (reset[perspective]) { - (*index_sets)[i][perspective].clear(); - ++num_resets[i]; - } else { - for (const auto index : removed_indices[perspective]) { - ASSERT(index < RawFeatures::kDimensions); - ASSERT((*index_sets)[i][perspective].count(index) == 1); - ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); - (*index_sets)[i][perspective].erase(index); - ++num_updates.back(); - ++num_updates[i]; - trigger_map[index] = i; - } - } - for (const auto index : added_indices[perspective]) { - ASSERT(index < RawFeatures::kDimensions); - ASSERT((*index_sets)[i][perspective].count(index) == 0); - ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); - (*index_sets)[i][perspective].insert(index); - ++num_updates.back(); - ++num_updates[i]; - trigger_map[index] = i; - } - } - } - }; - - std::cout << "feature set: " << RawFeatures::GetName() - << "[" << RawFeatures::kDimensions << "]" << std::endl; - std::cout << "start testing with random games"; - - for (std::uint64_t i = 0; i < num_games; ++i) { - auto index_sets = make_index_sets(pos); - for (ply = 0; ply < MAX_PLY; ++ply) { - MoveList mg(pos); // Generate all legal hands - - // There was no legal move == Clog - if (mg.size() == 0) - break; - - // Randomly choose from the generated moves and advance the phase with the moves. - Move m = mg.begin()[prng.rand(mg.size())]; - pos.do_move(m, state[ply]); - - ++num_moves; - update_index_sets(pos, &index_sets); - ASSERT(index_sets == make_index_sets(pos)); - } - - pos.set(StartFEN, false, &si, Threads.main()); - - // Output'.' every 100 times (so you can see that it's progressing) - if ((i % 100) == 0) - std::cout << "." << std::flush; - } - std::cout << "passed." << std::endl; - std::cout << num_games << " games, " << num_moves << " moves, " - << num_updates.back() << " updates, " - << (1.0 * num_updates.back() / num_moves) - << " updates per move" << std::endl; - std::size_t num_observed_indices = 0; - for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { - const auto count = std::count(trigger_map.begin(), trigger_map.end(), i); - num_observed_indices += count; - std::cout << "TriggerEvent(" << static_cast(kRefreshTriggers[i]) - << "): " << count << " features (" - << (100.0 * count / RawFeatures::kDimensions) << "%), " - << num_updates[i] << " updates (" - << (1.0 * num_updates[i] / num_moves) << " per move), " - << num_resets[i] << " resets (" - << (100.0 * num_resets[i] / num_moves) << "%)" - << std::endl; - } - std::cout << "observed " << num_observed_indices << " (" - << (100.0 * num_observed_indices / RawFeatures::kDimensions) - << "% of " << RawFeatures::kDimensions - << ") features" << std::endl; +#define ASSERT(X) { \ + if (!(X)) { \ + std::cout \ + << "\nError : ASSERT(" << #X << "), " \ + << __FILE__ << "(" << __LINE__ << "): " \ + << __func__ << std::endl; \ + std::this_thread::sleep_for(std::chrono::microseconds(3000)); \ + *(int*)1 =0; \ + } \ } -// Output a string that represents the structure of the evaluation function -void PrintInfo(std::istream& stream) { - std::cout << "network architecture: " << GetArchitectureString() << std::endl; - - while (true) { - std::string file_name; - stream >> file_name; - if (file_name.empty()) break; - - std::uint32_t hash_value; - std::string architecture; - const bool success = [&]() { - std::ifstream file_stream(file_name, std::ios::binary); - if (!file_stream) return false; - if (!ReadHeader(file_stream, &hash_value, &architecture)) return false; - return true; - }(); - - std::cout << file_name << ": "; - if (success) { - if (hash_value == kHashValue) { - std::cout << "matches with this binary"; - if (architecture != GetArchitectureString()) { - std::cout << ", but architecture string differs: " << architecture; - } - std::cout << std::endl; - } else { - std::cout << architecture << std::endl; - } - } else { - std::cout << "failed to read header" << std::endl; - } - } -} - -} // namespace - // USI extended command for NNUE evaluation function -void TestCommand(Position& pos, std::istream& stream) { - std::string sub_command; - stream >> sub_command; +namespace Eval::NNUE { - if (sub_command == "test_features") { - TestFeatures(pos); - } else if (sub_command == "info") { - PrintInfo(stream); - } else { - std::cout << "usage:" << std::endl; - std::cout << " test nnue test_features" << std::endl; - std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl; - } -} + namespace { -} // namespace NNUE + // Testing RawFeatures mainly for difference calculation + void TestFeatures(Position& pos) { + const std::uint64_t num_games = 1000; + StateInfo si; + pos.set(StartFEN, false, &si, Threads.main()); + const int MAX_PLY = 256; // test up to 256 hands -} // namespace Eval + StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps + int ply; // Trouble from the initial phase + + PRNG prng(20171128); + + std::uint64_t num_moves = 0; + std::vector num_updates(kRefreshTriggers.size() + 1); + std::vector num_resets(kRefreshTriggers.size()); + constexpr IndexType kUnknown = -1; + std::vector trigger_map(RawFeatures::kDimensions, kUnknown); + + auto make_index_sets = [&](const Position& position) { + std::vector>> index_sets( + kRefreshTriggers.size(), std::vector>(2)); + + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList active_indices[2]; + RawFeatures::AppendActiveIndices(position, kRefreshTriggers[i], + active_indices); + + for (const auto perspective : Colors) { + for (const auto index : active_indices[perspective]) { + ASSERT(index < RawFeatures::kDimensions); + ASSERT(index_sets[i][perspective].count(index) == 0); + ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); + index_sets[i][perspective].insert(index); + trigger_map[index] = i; + } + } + } + + return index_sets; + }; + + auto update_index_sets = [&](const Position& position, auto* index_sets) { + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2] = { false, false }; + RawFeatures::AppendChangedIndices(position, kRefreshTriggers[i], + removed_indices, added_indices, reset); + for (const auto perspective : Colors) { + if (reset[perspective]) { + (*index_sets)[i][perspective].clear(); + ++num_resets[i]; + } else { + for (const auto index : removed_indices[perspective]) { + ASSERT(index < RawFeatures::kDimensions); + ASSERT((*index_sets)[i][perspective].count(index) == 1); + ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); + (*index_sets)[i][perspective].erase(index); + ++num_updates.back(); + ++num_updates[i]; + trigger_map[index] = i; + } + } + + for (const auto index : added_indices[perspective]) { + ASSERT(index < RawFeatures::kDimensions); + ASSERT((*index_sets)[i][perspective].count(index) == 0); + ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i); + (*index_sets)[i][perspective].insert(index); + ++num_updates.back(); + ++num_updates[i]; + trigger_map[index] = i; + } + } + } + }; + + std::cout << "feature set: " << RawFeatures::GetName() + << "[" << RawFeatures::kDimensions << "]" << std::endl; + std::cout << "start testing with random games"; + + for (std::uint64_t i = 0; i < num_games; ++i) { + auto index_sets = make_index_sets(pos); + for (ply = 0; ply < MAX_PLY; ++ply) { + MoveList mg(pos); // Generate all legal hands + + // There was no legal move == Clog + if (mg.size() == 0) + break; + + // Randomly choose from the generated moves and advance the phase with the moves. + Move m = mg.begin()[prng.rand(mg.size())]; + pos.do_move(m, state[ply]); + + ++num_moves; + update_index_sets(pos, &index_sets); + ASSERT(index_sets == make_index_sets(pos)); + } + + pos.set(StartFEN, false, &si, Threads.main()); + + // Output'.' every 100 times (so you can see that it's progressing) + if ((i % 100) == 0) + std::cout << "." << std::flush; + } + + std::cout << "passed." << std::endl; + std::cout << num_games << " games, " << num_moves << " moves, " + << num_updates.back() << " updates, " + << (1.0 * num_updates.back() / num_moves) + << " updates per move" << std::endl; + std::size_t num_observed_indices = 0; + + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + const auto count = std::count(trigger_map.begin(), trigger_map.end(), i); + num_observed_indices += count; + std::cout << "TriggerEvent(" << static_cast(kRefreshTriggers[i]) + << "): " << count << " features (" + << (100.0 * count / RawFeatures::kDimensions) << "%), " + << num_updates[i] << " updates (" + << (1.0 * num_updates[i] / num_moves) << " per move), " + << num_resets[i] << " resets (" + << (100.0 * num_resets[i] / num_moves) << "%)" + << std::endl; + } + std::cout << "observed " << num_observed_indices << " (" + << (100.0 * num_observed_indices / RawFeatures::kDimensions) + << "% of " << RawFeatures::kDimensions + << ") features" << std::endl; + } + + // Output a string that represents the structure of the evaluation function + void PrintInfo(std::istream& stream) { + std::cout << "network architecture: " << GetArchitectureString() << std::endl; + + while (true) { + std::string file_name; + stream >> file_name; + if (file_name.empty()) + break; + + std::uint32_t hash_value; + std::string architecture; + const bool success = [&]() { + std::ifstream file_stream(file_name, std::ios::binary); + + if (!file_stream) + return false; + if (!ReadHeader(file_stream, &hash_value, &architecture)) + return false; + + return true; + }(); + + std::cout << file_name << ": "; + if (success) { + if (hash_value == kHashValue) { + std::cout << "matches with this binary"; + if (architecture != GetArchitectureString()) { + std::cout << ", but architecture string differs: " << architecture; + } + + std::cout << std::endl; + } else { + std::cout << architecture << std::endl; + } + } else { + std::cout << "failed to read header" << std::endl; + } + } + } + + } // namespace + + // USI extended command for NNUE evaluation function + void TestCommand(Position& pos, std::istream& stream) { + std::string sub_command; + stream >> sub_command; + + if (sub_command == "test_features") { + TestFeatures(pos); + } else if (sub_command == "info") { + PrintInfo(stream); + } else { + std::cout << "usage:" << std::endl; + std::cout << " test nnue test_features" << std::endl; + std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl; + } + } + +} // namespace Eval::NNUE diff --git a/src/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h index 75d33e82..989731d6 100644 --- a/src/nnue/nnue_test_command.h +++ b/src/nnue/nnue_test_command.h @@ -1,17 +1,12 @@ -// USI extended command interface for NNUE evaluation function - -#ifndef _NNUE_TEST_COMMAND_H_ +#ifndef _NNUE_TEST_COMMAND_H_ #define _NNUE_TEST_COMMAND_H_ -namespace Eval { +// USI extended command interface for NNUE evaluation function +namespace Eval::NNUE { -namespace NNUE { + // USI extended command for NNUE evaluation function + void TestCommand(Position& pos, std::istream& stream); -// USI extended command for NNUE evaluation function -void TestCommand(Position& pos, std::istream& stream); - -} // namespace NNUE - -} // namespace Eval +} // namespace Eval::NNUE #endif