From 406979ea12ee7828e079871b0f9f3dc8f127a741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sun, 23 Aug 2020 13:43:38 +0200 Subject: [PATCH 01/35] Embed default net, and simplify using non-default nets covers the most important cases from the user perspective: It embeds the default net in the binary, so a download of that binary will result in a working engine with the default net. The engine will be functional in the default mode without any additional user action. It allows non-default nets to be used, which will be looked for in up to three directories (working directory, location of the binary, and optionally a specific default directory). This mechanism is also kept for those developers that use MSVC, the one compiler that doesn't have an easy mechanism for embedding data. It is possible to disable embedding, and instead specify a specific directory, e.g. linux distros might want to use CXXFLAGS="-DNNUE_EMBEDDING_OFF -DDEFAULT_NNUE_DIRECTORY=/usr/share/games/stockfish/" make -j ARCH=x86-64 profile-build passed STC non-regression: https://tests.stockfishchess.org/tests/view/5f4a581c150f0aef5f8ae03a LLR: 2.95 (-2.94,2.94) {-1.25,-0.25} Total: 66928 W: 7202 L: 7147 D: 52579 Ptnml(0-2): 291, 5309, 22211, 5360, 293 closes https://github.com/official-stockfish/Stockfish/pull/3070 fixes https://github.com/official-stockfish/Stockfish/issues/3030 No functional change. --- AUTHORS | 3 +- README.md | 18 +- appveyor.yml | 2 +- src/Makefile | 10 +- src/evaluate.cpp | 103 +++++++++-- src/evaluate.h | 7 +- src/incbin/UNLICENCE | 26 +++ src/incbin/incbin.h | 368 +++++++++++++++++++++++++++++++++++++ src/main.cpp | 1 + src/misc.cpp | 59 ++++++ src/misc.h | 9 +- src/nnue/evaluate_nnue.cpp | 14 +- src/ucioption.cpp | 5 +- 13 files changed, 582 insertions(+), 43 deletions(-) create mode 100644 src/incbin/UNLICENCE create mode 100755 src/incbin/incbin.h diff --git a/AUTHORS b/AUTHORS index c96f870a..c00ab657 100644 --- a/AUTHORS +++ b/AUTHORS @@ -36,10 +36,11 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Dale Weiler (graphitemaster) Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) -Dariusz Orzechowski +Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) DiscanX diff --git a/README.md b/README.md index 2cc88bf4..96a495ae 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ about how to use Stockfish with it. The Stockfish engine features two evaluation functions for chess, the classical evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently -updateable neural networks. The classical evaluation runs efficiently on most 64bit -CPU architectures, while the NNUE evaluation benefits strongly from the vector -intrinsics available on modern CPUs (avx2 or similar). +updateable neural networks. The classical evaluation runs efficiently on almost all +CPU architectures, while the NNUE evaluation benefits from the vector +intrinsics available on most CPUs (sse2, avx2, neon, or similar). ## Files @@ -29,10 +29,11 @@ This distribution of Stockfish consists of the following files: that can be used to compile Stockfish on Unix-like systems. * a file with the .nnue extension, storing the neural network for the NNUE - evaluation. + evaluation. Binary distributions will have this file embedded. Note: to use the NNUE evaluation, the additional data file with neural network parameters -needs to be downloaded. The filename for the default net can be found as the default +needs to be available. Normally, this file is already embedded in the binary or it can be downloaded. +The filename for the default (recommended) net can be found as the default value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` (for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from ``` @@ -61,11 +62,14 @@ Currently, Stockfish has the following UCI options: * #### Use NNUE Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be available to load from file (see also EvalFile). + the network parameters must be available to load from file (see also EvalFile), + if they are not embedded in the binary. * #### EvalFile The name of the file of the NNUE evaluation parameters. Depending on the GUI the - filename should include the full path to the folder/directory that contains the file. + filename might have to include the full path to the folder/directory that contains the file. + Other locations, such as the directory that contains the binary and the working directory, + are also searched. * #### UCI_AnalyseMode An option handled by your GUI. diff --git a/appveyor.yml b/appveyor.yml index a3732a23..ab608409 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,7 +63,7 @@ build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - ps: | # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" $nnuenet = $Matches.nnuenet Write-Host "Default net:" $nnuenet diff --git a/src/Makefile b/src/Makefile index 703aa230..5f363f02 100644 --- a/src/Makefile +++ b/src/Makefile @@ -614,10 +614,12 @@ ifeq ($(debug), no) # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) + ifneq ($(arch),i386) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif + endif endif endif @@ -705,7 +707,7 @@ endif config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make -build: config-sanity +build: config-sanity net $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @@ -731,12 +733,13 @@ install: -cp $(EXE) $(BINDIR) -strip $(BINDIR)/$(EXE) -#clean all +# clean all clean: objclean profileclean @rm -f .depend *~ core +# evaluation network (nnue) net: - $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -758,7 +761,6 @@ net: echo "shasum / sha256sum not found, skipping net validation"; \ fi - # clean binaries and objects objclean: @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ce92db9a..67154751 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -20,51 +20,126 @@ #include #include #include // For std::memset +#include #include #include #include +#include +#include #include "bitboard.h" #include "evaluate.h" #include "material.h" +#include "misc.h" #include "pawns.h" #include "thread.h" #include "uci.h" +#include "incbin/incbin.h" + + +// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsof Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) + INCBIN(EmbeddedNNUE, EvalFileDefaultName); +#else + const unsigned char gEmbeddedNNUEData[1] = {0x0}; + const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; + const unsigned int gEmbeddedNNUESize = 1; +#endif + + +using namespace std; +using namespace Eval::NNUE; namespace Eval { bool useNNUE; - std::string eval_file_loaded="None"; + string eval_file_loaded = "None"; + + /// init_NNUE() tries to load a nnue network at startup time, or when the engine + /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" + /// The name of the nnue network is always retrieved from the EvalFile option. + /// We search the given network in three locations: internally (the default + /// network may be embedded in the binary), in the active working directory and + /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY + /// variable to have the engine search in a special directory in their distro. void init_NNUE() { useNNUE = Options["Use NNUE"]; - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE && eval_file_loaded != eval_file) - if (Eval::NNUE::load_eval_file(eval_file)) - eval_file_loaded = eval_file; + if (!useNNUE) + return; + + string eval_file = string(Options["EvalFile"]); + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (eval_file_loaded != eval_file) + { + if (directory != "") + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + + if (directory == "" && eval_file == EvalFileDefaultName) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer : public basic_streambuf { + public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + }; + + MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + + istream stream(&buffer); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + } } + /// verify_NNUE() verifies that the last net used was loaded successfully void verify_NNUE() { - std::string eval_file = std::string(Options["EvalFile"]); + string eval_file = string(Options["EvalFile"]); + if (useNNUE && eval_file_loaded != eval_file) { UCI::OptionsMap defaults; UCI::init(defaults); - sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; - sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; - sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; - sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; - sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; - std::exit(EXIT_FAILURE); + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); } if (useNNUE) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; else - sync_cout << "info string classical evaluation enabled." << sync_endl; + sync_cout << "info string classical evaluation enabled" << sync_endl; } } diff --git a/src/evaluate.h b/src/evaluate.h index e808068d..d701f5a7 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -35,12 +35,17 @@ namespace Eval { void init_NNUE(); void verify_NNUE(); + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-82215d0fd0df.nnue" + namespace NNUE { Value evaluate(const Position& pos); Value compute_eval(const Position& pos); void update_eval(const Position& pos); - bool load_eval_file(const std::string& evalFile); + bool load_eval(std::string streamName, std::istream& stream); } // namespace NNUE diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 00000000..32484ab5 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100755 index 00000000..c19684d7 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/main.cpp b/src/main.cpp index fbad6622..f95db1c2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; + CommandLine::init(argc, argv); UCI::init(Options); Tune::init(); PSQT::init(); diff --git a/src/misc.cpp b/src/misc.cpp index 80c436ac..3fbdea35 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -132,6 +132,7 @@ public: } // namespace + /// engine_info() returns the full name of the current Stockfish version. This /// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when /// the program was compiled) or "Stockfish ", depending on whether @@ -589,3 +590,61 @@ void bindThisThread(size_t idx) { #endif } // namespace WinProcGroup + +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd +#endif + +namespace CommandLine { + +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory +string pathSeparator; // Separator for our current OS + +void init(int argc, char* argv[]) { + (void)argc; + string separator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); +} + + +} // namespace CommandLine diff --git a/src/misc.h b/src/misc.h index 8ad17b50..68b9c884 100644 --- a/src/misc.h +++ b/src/misc.h @@ -42,9 +42,7 @@ void dbg_mean_of(int v); void dbg_print(); typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds - static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); - inline TimePoint now() { return std::chrono::duration_cast (std::chrono::steady_clock::now().time_since_epoch()).count(); @@ -126,4 +124,11 @@ namespace WinProcGroup { void bindThisThread(size_t idx); } +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + #endif // #ifndef MISC_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index e6619089..d6ac9894 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -18,7 +18,6 @@ // Code for calculating NNUE evaluation function -#include #include #include @@ -143,17 +142,12 @@ namespace Eval::NNUE { return accumulator.score; } - // Load the evaluation function file - bool load_eval_file(const std::string& evalFile) { + // Load eval, from a file stream or a memory stream + bool load_eval(std::string streamName, std::istream& stream) { Initialize(); - fileName = evalFile; - - std::ifstream stream(evalFile, std::ios::binary); - - const bool result = ReadParameters(stream); - - return result; + fileName = streamName; + return ReadParameters(stream); } // Evaluation function. Perform differential calculation. diff --git a/src/ucioption.cpp b/src/ucioption.cpp index ec83c7c8..5e747a7f 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -21,6 +21,7 @@ #include #include +#include "evaluate.h" #include "misc.h" #include "search.h" #include "thread.h" @@ -79,9 +80,7 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(true, on_use_NNUE); - // The default must follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. - o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); } From e4ed7d3dd7b8895ce523180cb3da3ec2714050fc Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 26 Aug 2020 18:00:54 +0200 Subject: [PATCH 02/35] Cleaner make help do not print details if ARCH is an empty string. Follow up for b0b4ca17db49ed03057b5fa4ee4a12dab0e9c9e6 https://github.com/official-stockfish/Stockfish/pull/3071 No functional change --- src/Makefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5f363f02..9ae5a51c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -85,12 +85,17 @@ endif ### 2.1. General and architecture defaults +ifeq ($(ARCH),) + ARCH = x86-64-modern + help_skip_sanity = yes +endif # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` -ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ - x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32)) +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -113,7 +118,6 @@ avx512 = no vnni256 = no vnni512 = no neon = no -ARCH = x86-64-modern STRIP = strip ### 2.2 Architecture specific @@ -695,11 +699,12 @@ help: @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" @echo "-------------------------------" -ifeq ($(SUPPORTED_ARCH), true) +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity else @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" endif From d90d893b5eeea020b2d59d1372f5aa0a20b45412 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Fri, 28 Aug 2020 09:27:15 +0200 Subject: [PATCH 03/35] Reintroduce depth reduction Reintroduce depth reduction if the position is not in TT. STC https://tests.stockfishchess.org/tests/view/5f4652e85089a564a10d868c LLR: 2.97 (-2.94,2.94) {-0.25,1.25} Total: 40240 W: 4535 L: 4331 D: 31374 Ptnml(0-2): 215, 3276, 12969, 3410, 250 LTC https://tests.stockfishchess.org/tests/view/5f46ca5e5089a564a10d86f3 LLR: 2.93 (-2.94,2.94) {0.25,1.25} Total: 63096 W: 3426 L: 3188 D: 56482 Ptnml(0-2): 51, 2798, 25645, 2970, 84 closes https://github.com/official-stockfish/Stockfish/pull/3072 bench: 3611906 --- src/search.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index cae8a684..77447043 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -939,6 +939,12 @@ namespace { } } } + + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; moves_loop: // When in check, search starts from here @@ -963,7 +969,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 11. Loop through all pseudo-legal moves until no moves remain + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1001,7 +1007,7 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 12. Pruning at shallow depth (~200 Elo) + // Step 13. Pruning at shallow depth (~200 Elo) if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) @@ -1059,7 +1065,7 @@ moves_loop: // When in check, search starts from here } } - // Step 13. Extensions (~75 Elo) + // Step 14. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1142,10 +1148,10 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 14. Make the move + // Step 15. Make the move pos.do_move(move, st, givesCheck); - // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) @@ -1248,7 +1254,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 16. Full depth search when LMR is skipped or fails high + // Step 17. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1276,12 +1282,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 17. Undo move + // Step 18. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 18. Check for a new best move + // Step 19. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1358,7 +1364,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 19. Check for mate and stalemate + // Step 20. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. From c02b3a4c7a339d212d5c6f75b3b89c926d33a800 Mon Sep 17 00:00:00 2001 From: MJZ1977 <37274752+MJZ1977@users.noreply.github.com> Date: Fri, 28 Aug 2020 12:06:36 +0200 Subject: [PATCH 04/35] Add / remove leaves from search tree ttPv add if previous leaf is in search tree and we didn't find a counter move else remove the position if the leaf is the last one in search tree. STC : https://tests.stockfishchess.org/tests/view/5f49203c3def640786115314 LLR: 2.95 (-2.94,2.94) {-0.25,1.25} Total: 29968 W: 3381 L: 3195 D: 23392 Ptnml(0-2): 146, 2432, 9671, 2560, 175 LTC : https://tests.stockfishchess.org/tests/view/5f494bea3def640786115336 LLR: 2.96 (-2.94,2.94) {0.25,1.25} Total: 84952 W: 4619 L: 4333 D: 76000 Ptnml(0-2): 86, 3765, 34481, 4065, 79 closes https://github.com/official-stockfish/Stockfish/pull/3075 Bench 3527337 --- src/search.cpp | 34 ++++++++++++++++++++++++---------- src/search.h | 1 + 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 77447043..a2342a3c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -597,7 +597,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; + bool ttHit, formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -644,6 +644,7 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); (ss+1)->ply = ss->ply + 1; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; Square prevSq = to_sq((ss-1)->currentMove); @@ -667,10 +668,11 @@ namespace { ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; - ttPv = PvNode || (ttHit && tte->is_pv()); - formerPv = ttPv && !PvNode; + if (!excludedMove) + ss->ttPv = PvNode || (ttHit && tte->is_pv()); + formerPv = ss->ttPv && !PvNode; - if ( ttPv + if ( ss->ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture @@ -748,7 +750,7 @@ namespace { if ( b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b, + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); @@ -798,7 +800,7 @@ namespace { else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; - tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Step 7. Razoring (~1 Elo) @@ -824,7 +826,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -898,6 +900,8 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; + bool ttPv = ss->ttPv; + ss->ttPv = false; while ( (move = mp.next_move()) != MOVE_NONE && probCutCount < 2 + 2 * cutNode) @@ -938,6 +942,7 @@ namespace { return value; } } + ss->ttPv = ttPv; } // Step 11. If the position is not in TT, decrease depth by 2 @@ -1180,7 +1185,7 @@ moves_loop: // When in check, search starts from here r++; // Decrease reduction if position is or has been on the PV (~10 Elo) - if (ttPv) + if (ss->ttPv) r -= 2; if (moveCountPruning && !formerPv) @@ -1209,7 +1214,7 @@ moves_loop: // When in check, search starts from here // hence break make_move(). (~2 Elo) else if ( type_of(move) == NORMAL && !pos.see_ge(reverse_move(move))) - r -= 2 + ttPv - (type_of(movedPiece) == PAWN); + r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] @@ -1387,8 +1392,17 @@ moves_loop: // When in check, search starts from here if (PvNode) bestValue = std::min(bestValue, maxValue); + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + // Otherwise, a counter move has been found and if the position is the last leaf + // in the search tree, remove the position from the search tree. + else if (depth > 3) + ss->ttPv = ss->ttPv && (ss+1)->ttPv; + if (!excludedMove && !(rootNode && thisThread->pvIdx)) - tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv, + tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, depth, bestMove, ss->staticEval); diff --git a/src/search.h b/src/search.h index 2554f3fb..79085189 100644 --- a/src/search.h +++ b/src/search.h @@ -48,6 +48,7 @@ struct Stack { int statScore; int moveCount; bool inCheck; + bool ttPv; }; From 9b5b9ec9a6a3a0e46ac00f58976887560948a7e2 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Sat, 29 Aug 2020 21:13:05 -0400 Subject: [PATCH 05/35] QS Pruning Simplification Remove depth dependence in QS pruning STC: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 40536 W: 4442 L: 4358 D: 31736 Ptnml(0-2): 209, 3330, 13118, 3390, 221 https://tests.stockfishchess.org/tests/view/5f49035b3def6407861152f9 LTC: LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 97104 W: 5164 L: 5130 D: 86810 Ptnml(0-2): 103, 4478, 39377, 4470, 124 https://tests.stockfishchess.org/tests/view/5f4939d53def640786115322 closes https://github.com/official-stockfish/Stockfish/pull/3077 Bench: 3865238 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index a2342a3c..b319dff5 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1585,7 +1585,7 @@ moves_loop: // When in check, search starts from here [to_sq(move)]; if ( !captureOrPromotion - && moveCount >= abs(depth) + 1 + && moveCount && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) continue; From e0bafa1911ede61b9268e0b461a5d8856d6cd6be Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Sun, 30 Aug 2020 13:58:05 +0300 Subject: [PATCH 06/35] Update parametes in classical evaluation. Passed STC (NNUE=False): https://tests.stockfishchess.org/tests/view/5f42edfe5089a564a10d84a0 LLR: 2.96 (-2.94,2.94) {-0.25,1.25} Total: 13840 W: 2591 L: 2336 D: 8913 Ptnml(0-2): 194, 1453, 3387, 1676, 210 Passed LTC (NNUE=False): https://tests.stockfishchess.org/tests/view/5f4369795089a564a10d84d8 LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 159744 W: 19430 L: 18850 D: 121464 Ptnml(0-2): 960, 14185, 49030, 14709, 988 closes https://github.com/official-stockfish/Stockfish/pull/3080 bench: 3736029 --- src/evaluate.cpp | 10 +++++----- src/search.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 67154751..09f36513 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -227,26 +227,26 @@ namespace { // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + constexpr Score Outpost[] = { S(56, 34), S(31, 23) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260) }; // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. - constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; + constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161) + S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) + S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; // Assorted bonuses and penalties diff --git a/src/search.cpp b/src/search.cpp index b319dff5..e6e53e7c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -944,7 +944,7 @@ namespace { } ss->ttPv = ttPv; } - + // Step 11. If the position is not in TT, decrease depth by 2 if ( PvNode && depth >= 6 From a0afe32d16554ff3b5c74f34ae56400f35759edf Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 30 Aug 2020 18:40:49 -0700 Subject: [PATCH 07/35] Use stable sort to make sure bench with TB yields same results everywhere. std::sort() is not stable so different implementations can produce different results: use the stable version instead. Observed for '8/6k1/5r2/8/8/8/1K6/Q7 w - - 0 1' yielding different bench results for gcc and MSVC and 3-4-5 syzygy TB prior to this patch. closes https://github.com/official-stockfish/Stockfish/pull/3083 No functional change. --- src/search.cpp | 2 +- src/syzygy/tbprobe.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index e6e53e7c..c676bd6d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1964,7 +1964,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { if (RootInTB) { // Sort moves according to TB rank - std::sort(rootMoves.begin(), rootMoves.end(), + std::stable_sort(rootMoves.begin(), rootMoves.end(), [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); // Probe during search only if DTZ is not available and we are winning diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 20215b96..3dfe3e3e 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -758,7 +758,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; @@ -859,7 +859,7 @@ encode_remaining: while (d->groupLen[++next]) { - std::sort(groupSq, groupSq + d->groupLen[next]); + std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous From a057f170c6920fc4d1abdae619c5259e9d80703c Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 30 Aug 2020 20:48:10 -0700 Subject: [PATCH 08/35] Use llvm linker with clang on windows for LTO. other linkers might fail to link during the LTO phase. The linker might have to be installed using `pacman -Syu mingw-w64-x86_64-lld` closes https://github.com/official-stockfish/Stockfish/pull/3084 No functional change. --- src/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Makefile b/src/Makefile index 9ae5a51c..340b3008 100644 --- a/src/Makefile +++ b/src/Makefile @@ -595,6 +595,11 @@ ifeq ($(debug), no) LDFLAGS += $(CXXFLAGS) else ifeq ($(comp),clang) CXXFLAGS += -flto=thin + ifneq ($(findstring MINGW,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + else ifneq ($(findstring MSYS,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be From 61381372ec896ae6b0f139555e6e3f816d8aa570 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 31 Aug 2020 22:53:20 +0200 Subject: [PATCH 09/35] Always print an info line before a bestmove if very few nodes are being searched before a bestmove is reported, an info line might be missing. fixes https://github.com/official-stockfish/Stockfish/issues/2757 closes https://github.com/official-stockfish/Stockfish/pull/3088 no functional change --- src/search.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c676bd6d..c15cd753 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1861,12 +1861,15 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { { bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated) + if (depth == 1 && !updated && i > 0) continue; - Depth d = updated ? depth : depth - 1; + Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; v = tb ? rootMoves[i].tbScore : v; From a8bbaa17954471cf3fd8d168f1cafe3f2034730e Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Sun, 30 Aug 2020 13:57:57 -0400 Subject: [PATCH 10/35] LMR Root Node Simplification Simplify LMR at Root node STC: LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 71520 W: 7649 L: 7614 D: 56257 Ptnml(0-2): 346, 5845, 23349, 5868, 352 https://tests.stockfishchess.org/tests/view/5f4be8c0ba100690c5cc5cbb LTC: LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 74832 W: 3997 L: 3948 D: 66887 Ptnml(0-2): 77, 3422, 30362, 3485, 70 https://tests.stockfishchess.org/tests/view/5f4c603eba100690c5cc5d0e closes https://github.com/official-stockfish/Stockfish/pull/3091 Bench: 3624569 --- src/search.cpp | 1 - src/thread.cpp | 11 ----------- src/thread.h | 1 - 3 files changed, 13 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c15cd753..b79fa6be 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1160,7 +1160,6 @@ moves_loop: // When in check, search starts from here // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) - && (!rootNode || thisThread->best_move_count(move) == 0) && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha diff --git a/src/thread.cpp b/src/thread.cpp index 1aa66a81..b46fce5e 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -51,17 +51,6 @@ Thread::~Thread() { } -/// Thread::bestMoveCount(Move move) return best move counter for the given root move - -int Thread::best_move_count(Move move) const { - - auto rm = std::find(rootMoves.begin() + pvIdx, - rootMoves.begin() + pvLast, move); - - return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0; -} - - /// Thread::clear() reset histories, usually before a new game void Thread::clear() { diff --git a/src/thread.h b/src/thread.h index 042bc2e9..34b99015 100644 --- a/src/thread.h +++ b/src/thread.h @@ -54,7 +54,6 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int best_move_count(Move move) const; Pawns::Table pawnsTable; Material::Table materialTable; From be87517734e2a1b222d1a35e98764382b4176732 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 1 Sep 2020 12:22:47 +0200 Subject: [PATCH 11/35] Only use MADV_RANDOM if defined needed to compile on Haiku. fixes https://github.com/official-stockfish/Stockfish/issues/3093 closes https://github.com/official-stockfish/Stockfish/pull/3094 No functional change --- src/syzygy/tbprobe.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 3dfe3e3e..4d682f1a 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -223,7 +223,9 @@ public: *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif ::close(fd); if (*baseAddress == MAP_FAILED) From aa2de712302a2379d8aa26127d86455ad276f512 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 2 Sep 2020 08:05:08 +0200 Subject: [PATCH 12/35] Update CPU contributors list with fishtest data of Sept. 2 2020 closes https://github.com/official-stockfish/Stockfish/pull/3095 No functional change --- Top CPU Contributors.txt | 319 +++++++++++++++++++++------------------ 1 file changed, 169 insertions(+), 150 deletions(-) diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 0ea5ac72..482e9000 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,154 +1,173 @@ -Contributors with >10,000 CPU hours as of January 7, 2020 +Contributors with >10,000 CPU hours as of Sept 2, 2020 Thank you! Username CPU Hours Games played -------------------------------------------------- -noobpwnftw 9305707 695548021 -mlang 780050 61648867 -dew 621626 43921547 -mibere 524702 42238645 -crunchy 354587 27344275 -cw 354495 27274181 -fastgm 332801 22804359 -JojoM 295750 20437451 -CSU_Dynasty 262015 21828122 -Fisherman 232181 18939229 -ctoks 218866 17622052 -glinscott 201989 13780820 -tvijlbrief 201204 15337115 -velislav 188630 14348485 -gvreuls 187164 15149976 -bking_US 180289 11876016 -nordlandia 172076 13467830 -leszek 157152 11443978 -Thanar 148021 12365359 -spams 141975 10319326 -drabel 138073 11121749 -vdv 137850 9394330 -mgrabiak 133578 10454324 -TueRens 132485 10878471 -bcross 129683 11557084 -marrco 126078 9356740 -sqrt2 125830 9724586 -robal 122873 9593418 -vdbergh 120766 8926915 -malala 115926 8002293 -CoffeeOne 114241 5004100 -dsmith 113189 7570238 -BrunoBanani 104644 7436849 -Data 92328 8220352 -mhoram 89333 6695109 -davar 87924 7009424 -xoto 81094 6869316 -ElbertoOne 80899 7023771 -grandphish2 78067 6160199 -brabos 77212 6186135 -psk 75733 5984901 -BRAVONE 73875 5054681 -sunu 70771 5597972 -sterni1971 70605 5590573 -MaZePallas 66886 5188978 -Vizvezdenec 63708 4967313 -nssy 63462 5259388 -jromang 61634 4940891 -teddybaer 61231 5407666 -Pking_cda 60099 5293873 -solarlight 57469 5028306 -dv8silencer 56913 3883992 -tinker 54936 4086118 -renouve 49732 3501516 -Freja 49543 3733019 -robnjr 46972 4053117 -rap 46563 3219146 -Bobo1239 46036 3817196 -ttruscott 45304 3649765 -racerschmacer 44881 3975413 -finfish 44764 3370515 -eva42 41783 3599691 -biffhero 40263 3111352 -bigpen0r 39817 3291647 -mhunt 38871 2691355 -ronaldjerum 38820 3240695 -Antihistamine 38785 2761312 -pb00067 38038 3086320 -speedycpu 37591 3003273 -rkl 37207 3289580 -VoyagerOne 37050 3441673 -jbwiebe 35320 2805433 -cuistot 34191 2146279 -homyur 33927 2850481 -manap 32873 2327384 -gri 32538 2515779 -oryx 31267 2899051 -EthanOConnor 30959 2090311 -SC 30832 2730764 -csnodgrass 29505 2688994 -jmdana 29458 2205261 -strelock 28219 2067805 -jkiiski 27832 1904470 -Pyafue 27533 1902349 -Garf 27515 2747562 -eastorwest 27421 2317535 -slakovv 26903 2021889 -Prcuvu 24835 2170122 -anst 24714 2190091 -hyperbolic.tom 24319 2017394 -Patrick_G 23687 1801617 -Sharaf_DG 22896 1786697 -nabildanial 22195 1519409 -chriswk 21931 1868317 -achambord 21665 1767323 -Zirie 20887 1472937 -team-oh 20217 1636708 -Isidor 20096 1680691 -ncfish1 19931 1520927 -nesoneg 19875 1463031 -Spprtr 19853 1548165 -JanErik 19849 1703875 -agg177 19478 1395014 -SFTUser 19231 1567999 -xor12 19017 1680165 -sg4032 18431 1641865 -rstoesser 18118 1293588 -MazeOfGalious 17917 1629593 -j3corre 17743 941444 -cisco2015 17725 1690126 -ianh2105 17706 1632562 -dex 17678 1467203 -jundery 17194 1115855 -iisiraider 17019 1101015 -horst.prack 17012 1465656 -Adrian.Schmidt123 16563 1281436 -purplefishies 16342 1092533 -wei 16274 1745989 -ville 16144 1384026 -eudhan 15712 1283717 -OuaisBla 15581 972000 -DragonLord 15559 1162790 -dju 14716 875569 -chris 14479 1487385 -0xB00B1ES 14079 1001120 -OssumOpossum 13776 1007129 -enedene 13460 905279 -bpfliegel 13346 884523 -Ente 13198 1156722 -IgorLeMasson 13087 1147232 -jpulman 13000 870599 -ako027ako 12775 1173203 -Nikolay.IT 12352 1068349 -Andrew Grant 12327 895539 -joster 12008 950160 -AdrianSA 11996 804972 -Nesa92 11455 1111993 -fatmurphy 11345 853210 -Dark_wizzie 11108 1007152 -modolief 10869 896470 -mschmidt 10757 803401 -infinity 10594 727027 -mabichito 10524 749391 -Thomas A. Anderson 10474 732094 -thijsk 10431 719357 -Flopzee 10339 894821 -crocogoat 10104 1013854 -SapphireBrand 10104 969604 -stocky 10017 699440 +noobpwnftw 19352969 1231459677 +mlang 957168 61657446 +dew 949885 56893432 +mibere 703817 46865007 +crunchy 427035 27344275 +cw 416006 27521077 +JojoM 415904 24479564 +fastgm 404873 23953472 +CSU_Dynasty 335774 22850550 +tvijlbrief 335199 21871270 +Fisherman 325053 21786603 +gvreuls 311480 20751516 +ctoks 275877 18710423 +velislav 241267 15596372 +glinscott 217799 13780820 +nordlandia 211692 13484886 +bcross 206213 14934233 +bking_US 198894 11876016 +leszek 189170 11446821 +mgrabiak 183896 11778092 +drabel 181408 12489478 +TueRens 181349 12192000 +Thanar 179852 12365359 +vdv 175171 9881246 +robal 166948 10702862 +spams 157128 10319326 +marrco 149947 9376421 +sqrt2 147963 9724586 +vdbergh 137041 8926915 +CoffeeOne 136294 5004100 +malala 136182 8002293 +mhoram 128934 8177193 +davar 122092 7960001 +dsmith 122059 7570238 +xoto 119696 8222144 +grandphish2 116481 7582197 +Data 113305 8220352 +BrunoBanani 112960 7436849 +ElbertoOne 99028 7023771 +MaZePallas 98571 6362619 +brabos 92118 6186135 +psk 89957 5984901 +sunu 88463 6007033 +sterni1971 86948 5613788 +Vizvezdenec 83752 5343724 +BRAVONE 81239 5054681 +nssy 76497 5259388 +teddybaer 75125 5407666 +Pking_cda 73776 5293873 +jromang 70695 4940891 +solarlight 70517 5028306 +dv8silencer 70287 3883992 +Bobo1239 68515 4652287 +racerschmacer 67468 4935996 +manap 66273 4121774 +tinker 63458 4213726 +linrock 59082 4516053 +robnjr 57262 4053117 +Freja 56938 3733019 +ttruscott 56005 3679485 +renouve 53811 3501516 +cuistot 52532 3014920 +finfish 51360 3370515 +eva42 51272 3599691 +rkl 50759 3840947 +rap 49985 3219146 +pb00067 49727 3298270 +ronaldjerum 47654 3240695 +bigpen0r 47278 3291647 +biffhero 46564 3111352 +VoyagerOne 45386 3445881 +speedycpu 43842 3003273 +jbwiebe 43305 2805433 +Antihistamine 41788 2761312 +mhunt 41735 2691355 +eastorwest 40387 2812173 +homyur 39893 2850481 +gri 39871 2515779 +oryx 38228 2941656 +0x3C33 37773 2529097 +SC 37290 2731014 +csnodgrass 36207 2688994 +jmdana 36108 2205261 +strelock 34716 2074055 +Garf 33800 2747562 +EthanOConnor 33370 2090311 +slakovv 32915 2021889 +Spprtr 32591 2139601 +Prcuvu 30377 2170122 +anst 30301 2190091 +jkiiski 30136 1904470 +hyperbolic.tom 29840 2017394 +Pyafue 29650 1902349 +OuaisBla 27629 1578000 +chriswk 26902 1868317 +achambord 26582 1767323 +Patrick_G 26276 1801617 +yorkman 26193 1992080 +SFTUser 25182 1675689 +nabildanial 24942 1519409 +Sharaf_DG 24765 1786697 +ncfish1 24411 1520927 +agg177 23890 1395014 +JanErik 23408 1703875 +Isidor 23388 1680691 +Norabor 22976 1587862 +cisco2015 22880 1759669 +Zirie 22542 1472937 +team-oh 22272 1636708 +MazeOfGalious 21978 1629593 +sg4032 21945 1643065 +ianh2105 21725 1632562 +xor12 21628 1680365 +dex 21612 1467203 +nesoneg 21494 1463031 +horst.prack 20878 1465656 +0xB00B1ES 20590 1208666 +j3corre 20405 941444 +Adrian.Schmidt123 20316 1281436 +wei 19973 1745989 +rstoesser 19569 1293588 +eudhan 19274 1283717 +Ente 19070 1373058 +jundery 18445 1115855 +iisiraider 18247 1101015 +ville 17883 1384026 +chris 17698 1487385 +purplefishies 17595 1092533 +DragonLord 17014 1162790 +dju 16515 929427 +IgorLeMasson 16064 1147232 +ako027ako 15671 1173203 +Nikolay.IT 15154 1068349 +Andrew Grant 15114 895539 +yurikvelo 15027 1165616 +OssumOpossum 14857 1007129 +enedene 14476 905279 +bpfliegel 14298 884523 +jpulman 13982 870599 +joster 13794 950160 +Nesa92 13786 1114691 +Dark_wizzie 13422 1007152 +Hjax 13350 900887 +Fifis 13313 965473 +mabichito 12903 749391 +thijsk 12886 722107 +crocogoat 12876 1048802 +AdrianSA 12860 804972 +Flopzee 12698 894821 +fatmurphy 12547 853210 +SapphireBrand 12416 969604 +modolief 12386 896470 +scuzzi 12362 833465 +pgontarz 12151 848794 +stocky 11954 699440 +mschmidt 11941 803401 +infinity 11470 727027 +torbjo 11387 728873 +Thomas A. Anderson 11372 732094 +snicolet 11106 869170 +amicic 10779 733593 +rpngn 10712 688203 +d64 10680 771144 +basepi 10637 744851 +jjoshua2 10559 670905 +dzjp 10343 732529 +ols 10259 570669 +lbraesch 10252 647825 From c306d838697011da0a960758dde3f7ede6849060 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 2 Sep 2020 09:12:04 +0200 Subject: [PATCH 13/35] Stockfish 12 Official release version of Stockfish 12 Bench: 3624569 ----------------------- It is our pleasure to release Stockfish 12 to users world-wide Downloads will be freely available at https://stockfishchess.org/download/ This version 12 of Stockfish plays significantly stronger than any of its predecessors. In a match against Stockfish 11, Stockfish 12 will typically win at least ten times more game pairs than it loses. This jump in strength, visible in regular progression tests during development[1], results from the introduction of an efficiently updatable neural network (NNUE) for the evaluation in Stockfish[2], and associated tuning of the engine as a whole. The concept of the NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. Stockfish remains a CPU-only engine, since the NNUE networks can be very efficiently evaluated on CPUs. The recommended parameters of the NNUE network are embedded in distributed binaries, and Stockfish will use NNUE by default. Both the NNUE and the classical evaluations are available, and can be used to assign values to positions that are later used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function of various chess concepts, handcrafted by experts, tested and tuned using fishtest. The NNUE evaluation computes this value with a neural network based on basic inputs. The network is optimized and trained on the evaluations of millions of positions. The Stockfish project builds on a thriving community of enthusiasts that contribute their expertise, time, and resources to build a free and open source chess engine that is robust, widely available, and very strong. We invite chess fans to join the fishtest testing framework and programmers to contribute on github[3]. Stay safe and enjoy chess! The Stockfish team [1] https://github.com/glinscott/fishtest/wiki/Regression-Tests [2] https://github.com/official-stockfish/Stockfish/commit/84f3e867903f62480c33243dd0ecbffd342796fc [3] https://stockfishchess.org/get-involved/ --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 3fbdea35..22070f0e 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -65,7 +65,7 @@ namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = ""; +const string Version = "12"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From 571c2d6d8daf70de884c493b40cf0279e9b48c61 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 4 Sep 2020 07:46:06 +0200 Subject: [PATCH 14/35] Restore development version have fun! No functional change --- src/misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.cpp b/src/misc.cpp index 22070f0e..3fbdea35 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -65,7 +65,7 @@ namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = "12"; +const string Version = ""; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We From 0e1f734b05ee5c67e9a17ae0e2045a64209dee05 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 2 Sep 2020 16:45:49 +0200 Subject: [PATCH 15/35] Less pruning in qsearch do not prune moves that give discovery checks, even if with negative SSE. STC https://tests.stockfishchess.org/tests/view/5f4cb5e8ba100690c5cc5d25 LLR: 2.96 (-2.94,2.94) {-0.25,1.25} Total: 91328 W: 9940 L: 9667 D: 71721 Ptnml(0-2): 491, 7345, 29693, 7670, 465 LTC https://tests.stockfishchess.org/tests/view/5f4dbc2eba100690c5cc5dac LLR: 2.97 (-2.94,2.94) {0.25,1.25} Total: 52448 W: 2799 L: 2586 D: 47063 Ptnml(0-2): 53, 2220, 21459, 2445, 47 closes https://github.com/official-stockfish/Stockfish/pull/3098 bench: 4031192 --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index b79fa6be..0d823c8e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1564,7 +1564,9 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if (!ss->inCheck && !pos.see_ge(move)) + if ( !ss->inCheck + && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move)) + && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible From d6530f7d49ef45e38dacafd8a3a838130113265c Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Thu, 3 Sep 2020 12:18:42 +0200 Subject: [PATCH 16/35] Simplify singularQuietLMR remove formerPV dependence STC https://tests.stockfishchess.org/tests/view/5f4cb922ba100690c5cc5d35 LLR: 2.96 (-2.94,2.94) {-1.25,0.25} Total: 113672 W: 12347 L: 12368 D: 88957 Ptnml(0-2): 566, 9537, 36699, 9420, 614 LTC https://tests.stockfishchess.org/tests/view/5f4e8474ba100690c5cc5e12 LLR: 2.93 (-2.94,2.94) {-0.75,0.25} Total: 43032 W: 2298 L: 2227 D: 38507 Ptnml(0-2): 45, 1940, 17475, 2011, 45 closes https://github.com/official-stockfish/Stockfish/pull/3102 bench: 3290084 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0d823c8e..b5e190c8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1196,7 +1196,7 @@ moves_loop: // When in check, search starts from here // Decrease reduction if ttMove has been singularly extended (~3 Elo) if (singularQuietLMR) - r -= 1 + formerPv; + r--; if (!captureOrPromotion) { From 2a696115094882b7dc5c024a97ed7dc2bdc98642 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Wed, 2 Sep 2020 16:58:44 -0400 Subject: [PATCH 17/35] LMR Simplification remove reduction at non-check cut nodes for second move at low depths STC: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 61712 W: 6594 L: 6543 D: 48575 Ptnml(0-2): 293, 5085, 20082, 5070, 326 https://tests.stockfishchess.org/tests/view/5f5007d6ba100690c5cc5ea9 LTC: LLR: 2.94 (-2.94,2.94) {-0.75,0.25} Total: 57544 W: 2983 L: 2925 D: 51636 Ptnml(0-2): 47, 2568, 23495, 2604, 58 https://tests.stockfishchess.org/tests/view/5f50c597ba100690c5cc5ef7 closes https://github.com/official-stockfish/Stockfish/pull/3103 Bench: 3952302 --- src/search.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index b5e190c8..a7692841 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1168,13 +1168,6 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction at non-check cut nodes for second move at low depths - if ( cutNode - && depth <= 10 - && moveCount <= 2 - && !ss->inCheck) - r--; - // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; From 9cc482c7889cbc5f6d92e1b69ccd28d422a44a32 Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Thu, 3 Sep 2020 20:22:51 +0800 Subject: [PATCH 18/35] Update default net to nn-308d71810dff.nnue equivalent to 20200903-1739 Net trained from scratch, so it has quite different features extracted compared to the previous net (82215d0fd0df). STC: LLR: 2.98 (-2.94,2.94) {-0.25,1.25} Total: 108328 W: 14048 L: 13719 D: 80561 Ptnml(0-2): 842, 10039, 32062, 10390, 831 https://tests.stockfishchess.org/tests/view/5f50e053ba100690c5cc5f00 LTC: LLR: 2.96 (-2.94,2.94) {0.25,1.25} Total: 13872 W: 1059 L: 890 D: 11923 Ptnml(0-2): 30, 724, 5270, 871, 41 https://tests.stockfishchess.org/tests/view/5f51821fba100690c5cc5f36 closes https://github.com/official-stockfish/Stockfish/pull/3104 Bench: 3832716 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index d701f5a7..3da6a9fe 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,7 +38,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-82215d0fd0df.nnue" + #define EvalFileDefaultName "nn-308d71810dff.nnue" namespace NNUE { From 9a063fc3cbc8f522215392db232eeb0e04e71b2c Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Fri, 4 Sep 2020 15:53:59 +0300 Subject: [PATCH 19/35] Adjust penalty on refuted early quiet moves This patch changes how previous early moves are penalized in case search finds a best move. Here, the first quiet move that was not a transposition table move is penalized. passed STC https://tests.stockfishchess.org/tests/view/5f51d839ba100690c5cc5f69 LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 10088 W: 1150 L: 997 D: 7941 Ptnml(0-2): 41, 772, 3278, 899, 54 passed LTC https://tests.stockfishchess.org/tests/view/5f51e435ba100690c5cc5f76 LLR: 2.93 (-2.94,2.94) {0.25,1.25} Total: 30808 W: 1564 L: 1405 D: 27839 Ptnml(0-2): 19, 1245, 12717, 1404, 19 closes https://github.com/official-stockfish/Stockfish/pull/3106 bench 3983758 --- src/search.cpp | 42 +++++++++++++++++++++--------------------- src/search.h | 1 + 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index a7692841..4aeadc28 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -597,7 +597,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, formerPv, givesCheck, improving, didLMR, priorCapture; + bool formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -664,12 +664,12 @@ namespace { // position key in case of an excluded move. excludedMove = ss->excludedMove; posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ttHit ? tte->move() : MOVE_NONE; + : ss->ttHit ? tte->move() : MOVE_NONE; if (!excludedMove) - ss->ttPv = PvNode || (ttHit && tte->is_pv()); + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); formerPv = ss->ttPv && !PvNode; if ( ss->ttPv @@ -681,11 +681,11 @@ namespace { // thisThread->ttHitAverage can be used to approximate the running average of ttHit thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ttHit; + + TtHitAverageResolution * ss->ttHit; // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= depth && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -778,7 +778,7 @@ namespace { improving = false; goto moves_loop; } - else if (ttHit) + else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); @@ -882,14 +882,14 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit + && !( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { // if ttMove is a capture and value from transposition table is good enough produce probCut // cutoff without digging into actual probCut search - if ( ttHit + if ( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue >= probCutBeta @@ -933,7 +933,7 @@ namespace { if (value >= probCutBeta) { // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ttHit + if ( !(ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) tte->save(posKey, value_to_tt(value, ss->ply), ttPv, @@ -1423,7 +1423,7 @@ moves_loop: // When in check, search starts from here Move ttMove, move, bestMove; Depth ttDepth; Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool ttHit, pvHit, givesCheck, captureOrPromotion; + bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) @@ -1453,13 +1453,13 @@ moves_loop: // When in check, search starts from here : DEPTH_QS_NO_CHECKS; // Transposition table lookup posKey = pos.key(); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - pvHit = ttHit && tte->is_pv(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= ttDepth && ttValue != VALUE_NONE // Only in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -1474,7 +1474,7 @@ moves_loop: // When in check, search starts from here } else { - if (ttHit) + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) @@ -1493,7 +1493,7 @@ moves_loop: // When in check, search starts from here // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (!ttHit) + if (!ss->ttHit) tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, MOVE_NONE, ss->staticEval); @@ -1711,8 +1711,8 @@ moves_loop: // When in check, search starts from here else captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; - // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted - if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0])) + // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); diff --git a/src/search.h b/src/search.h index 79085189..f60da4a5 100644 --- a/src/search.h +++ b/src/search.h @@ -49,6 +49,7 @@ struct Stack { int moveCount; bool inCheck; bool ttPv; + bool ttHit; }; From d539da19d2b13d70a81ab863f54046add0bc3b38 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Fri, 4 Sep 2020 17:14:50 +0800 Subject: [PATCH 20/35] Use classical eval more often If there is a moderate imbalance, use classical eval with small probability (1/16), as derived from the node counter. STC: LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 32320 W: 3562 L: 3377 D: 25381 Ptnml(0-2): 144, 2609, 10478, 2776, 153 https://tests.stockfishchess.org/tests/view/5f520615ba100690c5cc5f80 LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 21032 W: 1116 L: 974 D: 18942 Ptnml(0-2): 20, 837, 8664, 971, 24 https://tests.stockfishchess.org/tests/view/5f522eaaba100690c5cc5f8c closes https://github.com/official-stockfish/Stockfish/pull/3107 Bench: 4109324 --- src/evaluate.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 09f36513..db8379da 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1015,12 +1015,16 @@ make_v: Value Eval::evaluate(const Position& pos) { + bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); bool classical = !Eval::useNNUE - || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); + || useClassical + || (abs(eg_value(pos.psq_score())) > PawnValueMg / 8 && !(pos.this_thread()->nodes & 0xF)); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; - if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) + if ( useClassical + && Eval::useNNUE + && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) v = NNUE::evaluate(pos) * 5 / 4 + Tempo; // Damp down the evaluation linearly when shuffling From fc27d158c012341593518a05abf51903ecbcb495 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sun, 6 Sep 2020 17:29:12 +0200 Subject: [PATCH 21/35] Bug fix in do_null_move() and NNUE simplification. This fixes #3108 and removes some NNUE code that is currently not used. At the moment, do_null_move() copies the accumulator from the previous state into the new state, which is correct. It then clears the "computed_score" flag because the side to move has changed, and with the other side to move NNUE will return a completely different evaluation (normally with changed sign but also with different NNUE-internal tempo bonus). The problem is that do_null_move() clears the wrong flag. It clears the computed_score flag of the old state, not of the new state. It turns out that this almost never affects the search. For example, fixing it does not change the current bench (but it does change the previous bench). This is because the search code usually avoids calling evaluate() after a null move. This PR corrects do_null_move() by removing the computed_score flag altogether. The flag is not needed because nnue_evaluate() is never called twice on a position. This PR also removes some unnecessary {}s and inserts a few blank lines in the modified NNUE files in line with SF coding style. Resulf ot STC non-regression test: LLR: 2.95 (-2.94,2.94) {-1.25,0.25} Total: 26328 W: 3118 L: 3012 D: 20198 Ptnml(0-2): 126, 2208, 8397, 2300, 133 https://tests.stockfishchess.org/tests/view/5f553ccc2d02727c56b36db1 closes https://github.com/official-stockfish/Stockfish/pull/3109 bench: 4109324 --- src/nnue/evaluate_nnue.cpp | 38 ++----------------- src/nnue/nnue_accumulator.h | 2 - src/nnue/nnue_feature_transformer.h | 58 +++++++++++++---------------- src/position.cpp | 2 - 4 files changed, 29 insertions(+), 71 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index d6ac9894..ed138881 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -115,31 +115,16 @@ namespace Eval::NNUE { return stream && stream.peek() == std::ios::traits_type::eof(); } - // Proceed with the difference calculation if possible - static void UpdateAccumulatorIfPossible(const Position& pos) { - - feature_transformer->UpdateAccumulatorIfPossible(pos); - } - - // Calculate the evaluation value - static Value ComputeScore(const Position& pos, bool refresh) { - - auto& accumulator = pos.state()->accumulator; - if (!refresh && accumulator.computed_score) { - return accumulator.score; - } + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { alignas(kCacheLineSize) TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - feature_transformer->Transform(pos, transformed_features, refresh); + feature_transformer->Transform(pos, transformed_features); alignas(kCacheLineSize) char buffer[Network::kBufferSize]; const auto output = network->Propagate(transformed_features, buffer); - auto score = static_cast(output[0] / FV_SCALE); - - accumulator.score = score; - accumulator.computed_score = true; - return accumulator.score; + return static_cast(output[0] / FV_SCALE); } // Load eval, from a file stream or a memory stream @@ -150,19 +135,4 @@ namespace Eval::NNUE { return ReadParameters(stream); } - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - return ComputeScore(pos, false); - } - - // Evaluation function. Perform full calculation. - Value compute_eval(const Position& pos) { - return ComputeScore(pos, true); - } - - // Proceed with the difference calculation if possible - void update_eval(const Position& pos) { - UpdateAccumulatorIfPossible(pos); - } - } // namespace Eval::NNUE diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 69dfaad2..26370710 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,9 +29,7 @@ namespace Eval::NNUE { struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - Value score; bool computed_accumulation; - bool computed_score; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 43707610..2b6259c3 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -50,11 +50,13 @@ namespace Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; } // Read network parameters bool ReadParameters(std::istream& stream) { + for (std::size_t i = 0; i < kHalfDimensions; ++i) biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) @@ -64,23 +66,26 @@ namespace Eval::NNUE { // Proceed with the difference calculation if possible bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); - if (now->accumulator.computed_accumulation) { + if (now->accumulator.computed_accumulation) return true; - } + const auto prev = now->previous; if (prev && prev->accumulator.computed_accumulation) { UpdateAccumulator(pos); return true; } + return false; } // Convert input features - void Transform(const Position& pos, OutputType* output, bool refresh) const { - if (refresh || !UpdateAccumulatorIfPossible(pos)) { + void Transform(const Position& pos, OutputType* output) const { + + if (!UpdateAccumulatorIfPossible(pos)) RefreshAccumulator(pos); - } + const auto& accumulation = pos.state()->accumulator.accumulation; #if defined(USE_AVX2) @@ -177,6 +182,7 @@ namespace Eval::NNUE { private: // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; IndexType i = 0; Features::IndexList active_indices[2]; @@ -216,9 +222,8 @@ namespace Eval::NNUE { &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto accumulation = reinterpret_cast( @@ -240,11 +245,11 @@ namespace Eval::NNUE { #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } // Calculate cumulative value using difference calculation void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; IndexType i = 0; @@ -288,33 +293,27 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - } #elif defined(USE_SSE2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } #elif defined(USE_MMX) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; #endif } @@ -325,33 +324,27 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } #elif defined(USE_SSE2) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } #elif defined(USE_MMX) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; #endif } @@ -362,7 +355,6 @@ namespace Eval::NNUE { #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } using BiasType = std::int16_t; diff --git a/src/position.cpp b/src/position.cpp index fe89b753..e6a760d2 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -704,7 +704,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; - st->accumulator.computed_score = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -1000,7 +999,6 @@ void Position::do_null_move(StateInfo& newSt) { if (Eval::useNNUE) { std::memcpy(&newSt, st, sizeof(StateInfo)); - st->accumulator.computed_score = false; } else std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); From d2562cde12cdcc3df654279d6d632ae74c5f71af Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Tue, 8 Sep 2020 15:37:53 +0200 Subject: [PATCH 22/35] Always re-enable NNUE after "bench". Restore the default NNUE setting (enabled) after a bench command. This also makes the resulting program settings independent of the number of FENs that are being benched. Fixes issue #3112. closes https://github.com/official-stockfish/Stockfish/pull/3113 No functional change. --- src/benchmark.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 806e9840..ffb631a2 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -164,5 +164,7 @@ vector setup_bench(const Position& current, istream& is) { ++posCounter; } + list.emplace_back("setoption name Use NNUE value true"); + return list; } From 0405f3540366cc16245d51531881c55d3726c8b5 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Mon, 7 Sep 2020 04:54:26 +0800 Subject: [PATCH 23/35] Double probability of using classical eval This patch doubles the moderate imbalance threshold and probability of using classical eval. So now if imbalance is greater than PawnValueMg / 4 then there is a 1/8 chance of using classical eval. STC: LLR: 2.93 (-2.94,2.94) {-0.25,1.25} Total: 10984 W: 1303 L: 1140 D: 8541 Ptnml(0-2): 58, 867, 3489, 1010, 68 https://tests.stockfishchess.org/tests/view/5f554c9f97da2d5437d3813e LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 43064 W: 2476 L: 2276 D: 38312 Ptnml(0-2): 37, 1985, 17308, 2145, 57 https://tests.stockfishchess.org/tests/view/5f55690a00a0aa2ca79f0a43 closes https://github.com/official-stockfish/Stockfish/pull/3114 Bench: 4161067 --- src/evaluate.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index db8379da..faf71d27 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1015,10 +1015,13 @@ make_v: Value Eval::evaluate(const Position& pos) { + // Use classical eval if there is a large imbalance + // If there is a moderate imbalance, use classical eval with probability (1/8), + // as derived from the node counter. bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); bool classical = !Eval::useNNUE || useClassical - || (abs(eg_value(pos.psq_score())) > PawnValueMg / 8 && !(pos.this_thread()->nodes & 0xF)); + || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; From 35ab8254b70f62a4e0138c475fad0c77dcc0af2d Mon Sep 17 00:00:00 2001 From: mckx00 Date: Sun, 13 Sep 2020 19:28:32 -0700 Subject: [PATCH 24/35] Simplify StatSCore Initialization No need to initialize StatScore at rootNode. Current Logic is redundant because at subsequent levels the grandchildren statScore is initialized to zero. closes https://github.com/official-stockfish/Stockfish/pull/3122 Non functional change. --- src/search.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4aeadc28..07c491b6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -654,9 +654,7 @@ namespace { // starts with statScore = 0. Later grandchildren start with the last calculated // statScore of the previous grandchild. This influences the reduction rules in // LMR which are based on the statScore of parent position. - if (rootNode) - (ss+4)->statScore = 0; - else + if (!rootNode) (ss+2)->statScore = 0; // Step 4. Transposition table lookup. We don't want the score of a partial From 7135678f71b7f6ee32e92b8dbef2b16b403d8ea9 Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Mon, 14 Sep 2020 17:24:05 +0800 Subject: [PATCH 25/35] Update default net to nn-03744f8d56d8.nnue Equivalent to 20200914-1520 closes https://github.com/official-stockfish/Stockfish/pull/3123 Bench: 4222126 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index 3da6a9fe..c723bd8f 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,7 +38,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-308d71810dff.nnue" + #define EvalFileDefaultName "nn-03744f8d56d8.nnue" namespace NNUE { From 5f426d8667feda65eaf1eca699f629d31e170d43 Mon Sep 17 00:00:00 2001 From: xoto10 Date: Thu, 10 Sep 2020 21:10:57 +0100 Subject: [PATCH 26/35] Use 2 * bestMoveChanges. NNUE appears to provide a more stable eval than the classic eval, so the time use dependencies on bestMoveChanges, fallingEval, etc may need to change to make the best use of available time. This change doubles the effect of totBestMoveChanges when giving more time because the choice of best move is unstable. STC: LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 101928 W: 11995 L: 11698 D: 78235 Elo +0.78 Ptnml(0-2): 592, 8707, 32103, 8936, 626 https://tests.stockfishchess.org/tests/view/5f538a462d02727c56b36cec LTC: LLR: 2.94 (-2.94,2.94) {0.25,1.25} Total: 186392 W: 10383 L: 9877 D: 166132 Elo +0.81 Ptnml(0-2): 207, 8370, 75539, 8870, 210 https://tests.stockfishchess.org/tests/view/5f54a9712d02727c56b36d5a closes https://github.com/official-stockfish/Stockfish/pull/3119 Bench 4222126 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 07c491b6..c7d2efd4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -520,7 +520,7 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); + double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); double totalTime = rootMoves.size() == 1 ? 0 : Time.optimum() * fallingEval * reduction * bestMoveInstability; From d86663af141f1256bfc32ab95891e944d84e8755 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sun, 13 Sep 2020 20:16:52 +0200 Subject: [PATCH 27/35] Improve NDK section in Makefile This PR sets the "comp" variable simply to "clang", which seems to be more consistent and allows a small simplification. The PR also moves the section that sets "profile_make" and "profile_use" to after the NDK section, which ensures that these variables are now set correctly for NDK/clang. closes https://github.com/official-stockfish/Stockfish/pull/3121 No functional change --- src/Makefile | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/Makefile b/src/Makefile index 340b3008..54868b39 100644 --- a/src/Makefile +++ b/src/Makefile @@ -381,19 +381,6 @@ ifeq ($(COMP),clang) endif endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use -else -ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use -endif -endif - ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 @@ -405,20 +392,30 @@ endif # Currently we don't know how to make PGO builds with the NDK yet. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang ifeq ($(arch),armv7) - comp=armv7a-linux-androideabi16-clang CXX=armv7a-linux-androideabi16-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif ifeq ($(arch),armv8) - comp=aarch64-linux-android21-clang CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -590,10 +587,7 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(COMP),ndk) - CXXFLAGS += -flto=thin - LDFLAGS += $(CXXFLAGS) - else ifeq ($(comp),clang) + ifeq ($(comp),clang) CXXFLAGS += -flto=thin ifneq ($(findstring MINGW,$(KERNEL)),) CXXFLAGS += -fuse-ld=lld From df43805953b241f95c246ff3e96aece76b518590 Mon Sep 17 00:00:00 2001 From: GoldenRare Date: Thu, 10 Sep 2020 00:24:40 -0400 Subject: [PATCH 28/35] Added FEN string to bench output fixes https://github.com/official-stockfish/Stockfish/pull/3117 closes https://github.com/official-stockfish/Stockfish/pull/3118 No functional change --- AUTHORS | 1 + src/uci.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index c00ab657..198dfa5a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -63,6 +63,7 @@ Gary Heckman (gheckman) George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) +Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer diff --git a/src/uci.cpp b/src/uci.cpp index bc0ee0a0..3f3cc458 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -170,7 +170,7 @@ namespace { if (token == "go" || token == "eval") { - cerr << "\nPosition: " << cnt++ << '/' << num << endl; + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; if (token == "go") { go(pos, is, states); From 0ca93c5b94b820a41e2850ede084096120128a28 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 16 Sep 2020 19:14:32 +0200 Subject: [PATCH 29/35] Remove castling extension STC https://tests.stockfishchess.org/tests/view/5f5fa5348fbc1c8a3f476eca LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 38520 W: 4713 L: 4610 D: 29197 Ptnml(0-2): 233, 3486, 11734, 3559, 248 LTC https://tests.stockfishchess.org/tests/view/5f62166a912c15f19854b806 LLR: 2.93 (-2.94,2.94) {-0.75,0.25} Total: 48024 W: 2673 L: 2600 D: 42751 Ptnml(0-2): 64, 2247, 19316, 2322, 63 closes https://github.com/official-stockfish/Stockfish/pull/3128 bench: 3818400 --- src/search.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c7d2efd4..17cd0a73 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1127,11 +1127,6 @@ moves_loop: // When in check, search starts from here && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; - // Castling extension - if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) - extension = 1; - // Late irreversible move extension if ( move == ttMove && pos.rule50_count() > 80 From 64a63464d7bc72a3aac33aa680cd2b2b240ff903 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Wed, 16 Sep 2020 20:42:38 +0200 Subject: [PATCH 30/35] Simplify futility pruning for captures STC https://tests.stockfishchess.org/tests/view/5f61f0e4b91f2ec371e429c2 LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 75512 W: 8747 L: 8704 D: 58061 Ptnml(0-2): 440, 6589, 23683, 6576, 468 LTC https://tests.stockfishchess.org/tests/view/5f6215d3912c15f19854b801 LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 92912 W: 5030 L: 4992 D: 82890 Ptnml(0-2): 88, 4363, 37532, 4369, 104 closes https://github.com/official-stockfish/Stockfish/pull/3129 bench: 3856086 --- src/search.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 17cd0a73..9c5fb58b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1056,7 +1056,6 @@ moves_loop: // When in check, search starts from here if ( !givesCheck && lmrDepth < 6 && !(PvNode && abs(bestValue) < 2) - && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) From 8b8a510fd6a1a17b39b2d4b166f60ac7be0dab23 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Wed, 16 Sep 2020 17:39:11 +0200 Subject: [PATCH 31/35] Use tiling to speed up accumulator refreshes and updates Perform the update and refresh operations tile by tile in a local array of vectors. By selecting the array size carefully, we achieve that the compiler keeps the whole array in vector registers. Idea and original implementation by @sf-x. STC: https://tests.stockfishchess.org/tests/view/5f623eec912c15f19854b855 LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 4872 W: 623 L: 477 D: 3772 Ptnml(0-2): 14, 350, 1585, 450, 37 LTC: https://tests.stockfishchess.org/tests/view/5f62434e912c15f19854b860 LLR: 2.94 (-2.94,2.94) {0.25,1.25} Total: 25808 W: 1565 L: 1401 D: 22842 Ptnml(0-2): 23, 1186, 10332, 1330, 33 closes https://github.com/official-stockfish/Stockfish/pull/3130 No functional change --- src/nnue/nnue_feature_transformer.h | 233 +++++++++++++++------------- 1 file changed, 125 insertions(+), 108 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2b6259c3..e71ee60d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -29,6 +29,56 @@ namespace Eval::NNUE { + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define TILING + + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_loadA_si512(a) + #define vec_store(a,b) _mm512_storeA_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 8; // only 8 are needed + + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_loadA_si256(a) + #define vec_store(a,b) _mm256_storeA_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 16; + + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + static constexpr IndexType kNumRegs = 16; + + #else + #undef TILING + + #endif + // Input feature converter class FeatureTransformer { @@ -36,6 +86,11 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + #ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + public: // Output type using OutputType = TransformedFeatureType; @@ -189,57 +244,41 @@ namespace Eval::NNUE { RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], active_indices); for (Color perspective : { WHITE, BLACK }) { + #ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; + + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + #else std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); + kHalfDimensions * sizeof(BiasType)); + for (const auto index : active_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX512) - auto accumulation = reinterpret_cast<__m512i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - for (IndexType j = 0; j < kNumChunks; ++j) - _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); - #elif defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); - - #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } + #endif } + #if defined(USE_MMX) _mm_empty(); #endif @@ -257,29 +296,55 @@ namespace Eval::NNUE { bool reset[2]; RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + #ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + if (reset[perspective]) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + #if defined(USE_MMX) + _mm_empty(); #endif + #else + for (Color perspective : { WHITE, BLACK }) { + if (reset[perspective]) { std::memcpy(accumulator.accumulation[perspective][i], biases_, kHalfDimensions * sizeof(BiasType)); @@ -291,67 +356,19 @@ namespace Eval::NNUE { for (const auto index : removed_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; - #endif - } } { // Difference calculation for the activated features for (const auto index : added_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - - #else for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } } } - #if defined(USE_MMX) - _mm_empty(); #endif accumulator.computed_accumulation = true; From 8559c439148d0f183a5d67375c12abe92d63975e Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Sun, 20 Sep 2020 09:03:37 +0200 Subject: [PATCH 32/35] Simplify reduced depth search Simplification in reduced depth search. STC https://tests.stockfishchess.org/tests/view/5f64c72fbb0cae038ca8f531 LLR: 2.94 (-2.94,2.94) {-1.25,0.25} Total: 28320 W: 3475 L: 3359 D: 21486 Ptnml(0-2): 170, 2485, 8773, 2523, 209 LTC https://tests.stockfishchess.org/tests/view/5f650cfabb0cae038ca8f585 LLR: 2.95 (-2.94,2.94) {-0.75,0.25} Total: 58392 W: 3354 L: 3285 D: 51753 Ptnml(0-2): 74, 2826, 23336, 2877, 83 closes https://github.com/official-stockfish/Stockfish/pull/3139 bench: 4201295 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 9c5fb58b..22cb8577 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1151,7 +1151,7 @@ moves_loop: // When in check, search starts from here // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 - && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) + && moveCount > 1 + 2 * rootNode && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha From 16b4578cc1bb0cc0dead19e7d9248553c977f8ca Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Sun, 20 Sep 2020 22:25:19 +0200 Subject: [PATCH 33/35] Tweak hybrid treshold. Increase the first hybrid threshold with more material. Rewrite the hybrid rules for clarity. STC: LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 24416 W: 3039 L: 2848 D: 18529 Ptnml(0-2): 135, 2136, 7503, 2271, 163 https://tests.stockfishchess.org/tests/view/5f6451efbb0cae038ca8f4dc LTC; LLR: 2.95 (-2.94,2.94) {0.25,1.25} Total: 65016 W: 3702 L: 3455 D: 57859 Ptnml(0-2): 66, 2991, 26157, 3218, 76 https://tests.stockfishchess.org/tests/view/5f64b143bb0cae038ca8f51f closes https://github.com/official-stockfish/Stockfish/pull/3140 Bench: 3973739 --- src/evaluate.cpp | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index faf71d27..a9159477 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1015,20 +1015,28 @@ make_v: Value Eval::evaluate(const Position& pos) { - // Use classical eval if there is a large imbalance - // If there is a moderate imbalance, use classical eval with probability (1/8), - // as derived from the node counter. - bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); - bool classical = !Eval::useNNUE - || useClassical - || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); - Value v = classical ? Evaluation(pos).value() - : NNUE::evaluate(pos) * 5 / 4 + Tempo; + Value v; - if ( useClassical - && Eval::useNNUE - && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) - v = NNUE::evaluate(pos) * 5 / 4 + Tempo; + if (!Eval::useNNUE) + v = Evaluation(pos).value(); + else + { + // scale and shift NNUE for compatibility with search and classical evaluation + auto adjusted_NNUE = [&](){ return NNUE::evaluate(pos) * 5 / 4 + Tempo; }; + + // if there is PSQ imbalance use classical eval, with small probability if it is small + Value psq = Value(abs(eg_value(pos.psq_score()))); + int r50 = 16 + pos.rule50_count(); + bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; + bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); + + v = classical ? Evaluation(pos).value() : adjusted_NNUE(); + + // if the classical eval is small and imbalance large, use NNUE nevertheless. + if ( largePsq + && abs(v) * 16 < NNUEThreshold2 * r50) + v = adjusted_NNUE(); + } // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; From 485d517c687a2d3cb0b88cc8c198483759eaf2c7 Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Sun, 30 Aug 2020 19:41:30 +0300 Subject: [PATCH 34/35] Add large page support for NNUE weights and simplify TT mem management Use TT memory functions to allocate memory for the NNUE weights. This should provide a small speed-up on systems where large pages are not automatically used, including Windows and some Linux distributions. Further, since we now have a wrapper for std::aligned_alloc(), we can simplify the TT memory management a bit: - We no longer need to store separate pointers to the hash table and its underlying memory allocation. - We also get to merge the Linux-specific and default implementations of aligned_ttmem_alloc(). Finally, we'll enable the VirtualAlloc code path with large page support also for Win32. STC: https://tests.stockfishchess.org/tests/view/5f66595823a84a47b9036fba LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 14896 W: 1854 L: 1686 D: 11356 Ptnml(0-2): 65, 1224, 4742, 1312, 105 closes https://github.com/official-stockfish/Stockfish/pull/3081 No functional change. --- README.md | 2 +- src/misc.cpp | 57 +++++++++++++++++--------------------- src/misc.h | 4 +-- src/nnue/evaluate_nnue.cpp | 18 ++++++++---- src/nnue/evaluate_nnue.h | 11 ++++++++ src/tt.cpp | 7 +++-- src/tt.h | 3 +- 7 files changed, 57 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 96a495ae..255ebce2 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ to find the best move. The classical evaluation computes this value as a functio of various chess concepts, handcrafted by experts, tested and tuned using fishtest. The NNUE evaluation computes this value with a neural network based on basic inputs (e.g. piece positions only). The network is optimized and trained -on the evalutions of millions of positions at moderate search depth. +on the evaluations of millions of positions at moderate search depth. The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. It can be evaluated efficiently on CPUs, and exploits the fact that only parts diff --git a/src/misc.cpp b/src/misc.cpp index 3fbdea35..d9bc47e3 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -357,27 +357,11 @@ void std_aligned_free(void* ptr) { #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. -/// The returned pointer is the aligned one, while the mem argument is the one that needs -/// to be passed to free. With c++17 some of this functionality could be simplified. +/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. -#if defined(__linux__) && !defined(__ANDROID__) +#if defined(_WIN32) -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { - - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment - if (posix_memalign(&mem, alignment, size)) - mem = nullptr; -#if defined(MADV_HUGEPAGE) - madvise(mem, allocSize, MADV_HUGEPAGE); -#endif - return mem; -} - -#elif defined(_WIN64) - -static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { +static void* aligned_large_pages_alloc_win(size_t allocSize) { HANDLE hProcessToken { }; LUID luid { }; @@ -422,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { return mem; } -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { static bool firstCall = true; + void* mem; // Try to allocate large pages - mem = aligned_ttmem_alloc_large_pages(allocSize); + mem = aligned_large_pages_alloc_win(allocSize); // Suppress info strings on the first call. The first call occurs before 'uci' // is received and in that case this output confuses some GUIs. @@ -449,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { #else -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { - constexpr size_t alignment = 64; // assumed cache line size - size_t size = allocSize + alignment - 1; // allocate some extra space - mem = malloc(size); - void* ret = reinterpret_cast((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1)); - return ret; +#if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size +#else + constexpr size_t alignment = 4096; // assumed small page size +#endif + + // round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void *mem = std_aligned_alloc(alignment, size); +#if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); +#endif + return mem; } #endif -/// aligned_ttmem_free() will free the previously allocated ttmem +/// aligned_large_pages_free() will free the previously allocated ttmem -#if defined(_WIN64) +#if defined(_WIN32) -void aligned_ttmem_free(void* mem) { +void aligned_large_pages_free(void* mem) { if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { @@ -478,8 +471,8 @@ void aligned_ttmem_free(void* mem) { #else -void aligned_ttmem_free(void *mem) { - free(mem); +void aligned_large_pages_free(void *mem) { + std_aligned_free(mem); } #endif diff --git a/src/misc.h b/src/misc.h index 68b9c884..bc48f303 100644 --- a/src/misc.h +++ b/src/misc.h @@ -33,8 +33,8 @@ void prefetch(void* addr); void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); void std_aligned_free(void* ptr); -void* aligned_ttmem_alloc(size_t size, void*& mem); -void aligned_ttmem_free(void* mem); // nop if mem == nullptr +void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes +void aligned_large_pages_free(void* mem); // nop if mem == nullptr void dbg_hit_on(bool b); void dbg_hit_on(bool c, bool b); diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index ed138881..72d18200 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -52,7 +52,7 @@ namespace Eval::NNUE { }; // Input feature converter - AlignedPtr feature_transformer; + LargePagePtr feature_transformer; // Evaluation function AlignedPtr network; @@ -70,14 +70,22 @@ namespace Eval::NNUE { std::memset(pointer.get(), 0, sizeof(T)); } + template + void Initialize(LargePagePtr& pointer) { + + static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } + // Read evaluation function parameters template - bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { + bool ReadParameters(std::istream& stream, T& reference) { std::uint32_t header; header = read_little_endian(stream); if (!stream || header != T::GetHashValue()) return false; - return pointer->ReadParameters(stream); + return reference.ReadParameters(stream); } } // namespace Detail @@ -110,8 +118,8 @@ namespace Eval::NNUE { std::string architecture; if (!ReadHeader(stream, &hash_value, &architecture)) return false; if (hash_value != kHashValue) return false; - if (!Detail::ReadParameters(stream, feature_transformer)) return false; - if (!Detail::ReadParameters(stream, network)) return false; + if (!Detail::ReadParameters(stream, *feature_transformer)) return false; + if (!Detail::ReadParameters(stream, *network)) return false; return stream && stream.peek() == std::ios::traits_type::eof(); } diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 5f0d1855..459a93de 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -40,9 +40,20 @@ namespace Eval::NNUE { } }; + template + struct TtmemDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; + template using AlignedPtr = std::unique_ptr>; + template + using LargePagePtr = std::unique_ptr>; + } // namespace Eval::NNUE #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/tt.cpp b/src/tt.cpp index 60a3a5f1..dea7c712 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -62,11 +62,12 @@ void TranspositionTable::resize(size_t mbSize) { Threads.main()->wait_for_search_finished(); - aligned_ttmem_free(mem); + aligned_large_pages_free(table); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem)); - if (!mem) + + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; diff --git a/src/tt.h b/src/tt.h index fdfd6769..6aa066c5 100644 --- a/src/tt.h +++ b/src/tt.h @@ -73,7 +73,7 @@ class TranspositionTable { static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); public: - ~TranspositionTable() { aligned_ttmem_free(mem); } + ~TranspositionTable() { aligned_large_pages_free(table); } void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound TTEntry* probe(const Key key, bool& found) const; int hashfull() const; @@ -89,7 +89,6 @@ private: size_t clusterCount; Cluster* table; - void* mem; uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 }; From 9a64e737cfef639f202787161498ba94466ad730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Wed, 9 Sep 2020 10:49:31 +0200 Subject: [PATCH 35/35] Small cleanups 12 - Clean signature of functions in namespace NNUE - Add comment for countermove based pruning - Remove bestMoveCount variable - Add const qualifier to kpp_board_index array - Fix spaces in get_best_thread() - Fix indention in capture LMR code in search.cpp - Rename TtmemDeleter to LargePageDeleter Closes https://github.com/official-stockfish/Stockfish/pull/3063 No functional change --- src/evaluate.cpp | 8 ++++---- src/evaluate.h | 8 +++----- src/main.cpp | 2 +- src/nnue/evaluate_nnue.cpp | 6 +++--- src/nnue/evaluate_nnue.h | 4 ++-- src/nnue/nnue_common.h | 2 +- src/search.cpp | 20 +++++++++----------- src/search.h | 1 - src/thread.cpp | 20 ++++++++++---------- src/uci.cpp | 2 +- src/ucioption.cpp | 4 ++-- 11 files changed, 36 insertions(+), 41 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index a9159477..d3937823 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -60,7 +60,7 @@ namespace Eval { bool useNNUE; string eval_file_loaded = "None"; - /// init_NNUE() tries to load a nnue network at startup time, or when the engine + /// NNUE::init() tries to load a nnue network at startup time, or when the engine /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" /// The name of the nnue network is always retrieved from the EvalFile option. /// We search the given network in three locations: internally (the default @@ -68,7 +68,7 @@ namespace Eval { /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY /// variable to have the engine search in a special directory in their distro. - void init_NNUE() { + void NNUE::init() { useNNUE = Options["Use NNUE"]; if (!useNNUE) @@ -111,8 +111,8 @@ namespace Eval { } } - /// verify_NNUE() verifies that the last net used was loaded successfully - void verify_NNUE() { + /// NNUE::verify() verifies that the last net used was loaded successfully + void NNUE::verify() { string eval_file = string(Options["EvalFile"]); diff --git a/src/evaluate.h b/src/evaluate.h index c723bd8f..56354cf5 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -32,8 +32,6 @@ namespace Eval { extern bool useNNUE; extern std::string eval_file_loaded; - void init_NNUE(); - void verify_NNUE(); // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the @@ -43,9 +41,9 @@ namespace Eval { namespace NNUE { Value evaluate(const Position& pos); - Value compute_eval(const Position& pos); - void update_eval(const Position& pos); - bool load_eval(std::string streamName, std::istream& stream); + bool load_eval(std::string name, std::istream& stream); + void init(); + void verify(); } // namespace NNUE diff --git a/src/main.cpp b/src/main.cpp index f95db1c2..e6dff918 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) { Endgames::init(); Threads.set(size_t(Options["Threads"])); Search::clear(); // After threads are up - Eval::init_NNUE(); + Eval::NNUE::init(); UCI::loop(argc, argv); diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 72d18200..b5dcd992 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -30,7 +30,7 @@ namespace Eval::NNUE { - uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { // convention: W - us, B - them // viewed from other side, W and B are reversed { PS_NONE, PS_NONE }, @@ -136,10 +136,10 @@ namespace Eval::NNUE { } // Load eval, from a file stream or a memory stream - bool load_eval(std::string streamName, std::istream& stream) { + bool load_eval(std::string name, std::istream& stream) { Initialize(); - fileName = streamName; + fileName = name; return ReadParameters(stream); } diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 459a93de..6cacf37e 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -41,7 +41,7 @@ namespace Eval::NNUE { }; template - struct TtmemDeleter { + struct LargePageDeleter { void operator()(T* ptr) const { ptr->~T(); aligned_large_pages_free(ptr); @@ -52,7 +52,7 @@ namespace Eval::NNUE { using AlignedPtr = std::unique_ptr>; template - using LargePagePtr = std::unique_ptr>; + using LargePagePtr = std::unique_ptr>; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 7bc905dc..8afea186 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -113,7 +113,7 @@ namespace Eval::NNUE { PS_END2 = 12 * SQUARE_NB + 1 }; - extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; diff --git a/src/search.cpp b/src/search.cpp index 22cb8577..edc020fd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -225,7 +225,7 @@ void MainThread::search() { Time.init(Limits, us, rootPos.game_ply()); TT.new_search(); - Eval::verify_NNUE(); + Eval::NNUE::verify(); if (rootMoves.empty()) { @@ -462,10 +462,7 @@ void Thread::search() { ++failedHighCnt; } else - { - ++rootMoves[pvIdx].bestMoveCount; break; - } delta += delta / 4 + 5; @@ -1218,14 +1215,14 @@ moves_loop: // When in check, search starts from here } else { - // Increase reduction for captures/promotions if late move and at low depth - if (depth < 8 && moveCount > 2) - r++; + // Increase reduction for captures/promotions if late move and at low depth + if (depth < 8 && moveCount > 2) + r++; - // Unless giving check, this capture is likely bad - if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) - r++; + // Unless giving check, this capture is likely bad + if ( !givesCheck + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) + r++; } Depth d = std::clamp(newDepth - r, 1, newDepth); @@ -1570,6 +1567,7 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + // CounterMove based pruning if ( !captureOrPromotion && moveCount && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold diff --git a/src/search.h b/src/search.h index f60da4a5..72d43c31 100644 --- a/src/search.h +++ b/src/search.h @@ -71,7 +71,6 @@ struct RootMove { Value previousScore = -VALUE_INFINITE; int selDepth = 0; int tbRank = 0; - int bestMoveCount = 0; Value tbScore; std::vector pv; }; diff --git a/src/thread.cpp b/src/thread.cpp index b46fce5e..2fbf745d 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -224,16 +224,16 @@ Thread* ThreadPool::get_best_thread() const { votes[th->rootMoves[0].pv[0]] += (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); - if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) - { - // Make sure we pick the shortest mate / TB conversion or stave off mate the longest - if (th->rootMoves[0].score > bestThread->rootMoves[0].score) - bestThread = th; - } - else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY - || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY - && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) - bestThread = th; + if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) + { + // Make sure we pick the shortest mate / TB conversion or stave off mate the longest + if (th->rootMoves[0].score > bestThread->rootMoves[0].score) + bestThread = th; + } + else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY + || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY + && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) + bestThread = th; } return bestThread; diff --git a/src/uci.cpp b/src/uci.cpp index 3f3cc458..b63e55ad 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -85,7 +85,7 @@ namespace { Position p; p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); - Eval::verify_NNUE(); + Eval::NNUE::verify(); sync_cout << "\n" << Eval::trace(p) << sync_endl; } diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 5e747a7f..bb0b8311 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -41,8 +41,8 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); } void on_logger(const Option& o) { start_logger(o); } void on_threads(const Option& o) { Threads.set(size_t(o)); } void on_tb_path(const Option& o) { Tablebases::init(o); } -void on_use_NNUE(const Option& ) { Eval::init_NNUE(); } -void on_eval_file(const Option& ) { Eval::init_NNUE(); } +void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } +void on_eval_file(const Option& ) { Eval::NNUE::init(); } /// Our case insensitive less() function as required by UCI protocol bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {