diff --git a/AUTHORS b/AUTHORS index c96f870a..198dfa5a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -36,10 +36,11 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Dale Weiler (graphitemaster) Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) -Dariusz Orzechowski +Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) DiscanX @@ -62,6 +63,7 @@ Gary Heckman (gheckman) George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) +Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 0ea5ac72..482e9000 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,154 +1,173 @@ -Contributors with >10,000 CPU hours as of January 7, 2020 +Contributors with >10,000 CPU hours as of Sept 2, 2020 Thank you! Username CPU Hours Games played -------------------------------------------------- -noobpwnftw 9305707 695548021 -mlang 780050 61648867 -dew 621626 43921547 -mibere 524702 42238645 -crunchy 354587 27344275 -cw 354495 27274181 -fastgm 332801 22804359 -JojoM 295750 20437451 -CSU_Dynasty 262015 21828122 -Fisherman 232181 18939229 -ctoks 218866 17622052 -glinscott 201989 13780820 -tvijlbrief 201204 15337115 -velislav 188630 14348485 -gvreuls 187164 15149976 -bking_US 180289 11876016 -nordlandia 172076 13467830 -leszek 157152 11443978 -Thanar 148021 12365359 -spams 141975 10319326 -drabel 138073 11121749 -vdv 137850 9394330 -mgrabiak 133578 10454324 -TueRens 132485 10878471 -bcross 129683 11557084 -marrco 126078 9356740 -sqrt2 125830 9724586 -robal 122873 9593418 -vdbergh 120766 8926915 -malala 115926 8002293 -CoffeeOne 114241 5004100 -dsmith 113189 7570238 -BrunoBanani 104644 7436849 -Data 92328 8220352 -mhoram 89333 6695109 -davar 87924 7009424 -xoto 81094 6869316 -ElbertoOne 80899 7023771 -grandphish2 78067 6160199 -brabos 77212 6186135 -psk 75733 5984901 -BRAVONE 73875 5054681 -sunu 70771 5597972 -sterni1971 70605 5590573 -MaZePallas 66886 5188978 -Vizvezdenec 63708 4967313 -nssy 63462 5259388 -jromang 61634 4940891 -teddybaer 61231 5407666 -Pking_cda 60099 5293873 -solarlight 57469 5028306 -dv8silencer 56913 3883992 -tinker 54936 4086118 -renouve 49732 3501516 -Freja 49543 3733019 -robnjr 46972 4053117 -rap 46563 3219146 -Bobo1239 46036 3817196 -ttruscott 45304 3649765 -racerschmacer 44881 3975413 -finfish 44764 3370515 -eva42 41783 3599691 -biffhero 40263 3111352 -bigpen0r 39817 3291647 -mhunt 38871 2691355 -ronaldjerum 38820 3240695 -Antihistamine 38785 2761312 -pb00067 38038 3086320 -speedycpu 37591 3003273 -rkl 37207 3289580 -VoyagerOne 37050 3441673 -jbwiebe 35320 2805433 -cuistot 34191 2146279 -homyur 33927 2850481 -manap 32873 2327384 -gri 32538 2515779 -oryx 31267 2899051 -EthanOConnor 30959 2090311 -SC 30832 2730764 -csnodgrass 29505 2688994 -jmdana 29458 2205261 -strelock 28219 2067805 -jkiiski 27832 1904470 -Pyafue 27533 1902349 -Garf 27515 2747562 -eastorwest 27421 2317535 -slakovv 26903 2021889 -Prcuvu 24835 2170122 -anst 24714 2190091 -hyperbolic.tom 24319 2017394 -Patrick_G 23687 1801617 -Sharaf_DG 22896 1786697 -nabildanial 22195 1519409 -chriswk 21931 1868317 -achambord 21665 1767323 -Zirie 20887 1472937 -team-oh 20217 1636708 -Isidor 20096 1680691 -ncfish1 19931 1520927 -nesoneg 19875 1463031 -Spprtr 19853 1548165 -JanErik 19849 1703875 -agg177 19478 1395014 -SFTUser 19231 1567999 -xor12 19017 1680165 -sg4032 18431 1641865 -rstoesser 18118 1293588 -MazeOfGalious 17917 1629593 -j3corre 17743 941444 -cisco2015 17725 1690126 -ianh2105 17706 1632562 -dex 17678 1467203 -jundery 17194 1115855 -iisiraider 17019 1101015 -horst.prack 17012 1465656 -Adrian.Schmidt123 16563 1281436 -purplefishies 16342 1092533 -wei 16274 1745989 -ville 16144 1384026 -eudhan 15712 1283717 -OuaisBla 15581 972000 -DragonLord 15559 1162790 -dju 14716 875569 -chris 14479 1487385 -0xB00B1ES 14079 1001120 -OssumOpossum 13776 1007129 -enedene 13460 905279 -bpfliegel 13346 884523 -Ente 13198 1156722 -IgorLeMasson 13087 1147232 -jpulman 13000 870599 -ako027ako 12775 1173203 -Nikolay.IT 12352 1068349 -Andrew Grant 12327 895539 -joster 12008 950160 -AdrianSA 11996 804972 -Nesa92 11455 1111993 -fatmurphy 11345 853210 -Dark_wizzie 11108 1007152 -modolief 10869 896470 -mschmidt 10757 803401 -infinity 10594 727027 -mabichito 10524 749391 -Thomas A. Anderson 10474 732094 -thijsk 10431 719357 -Flopzee 10339 894821 -crocogoat 10104 1013854 -SapphireBrand 10104 969604 -stocky 10017 699440 +noobpwnftw 19352969 1231459677 +mlang 957168 61657446 +dew 949885 56893432 +mibere 703817 46865007 +crunchy 427035 27344275 +cw 416006 27521077 +JojoM 415904 24479564 +fastgm 404873 23953472 +CSU_Dynasty 335774 22850550 +tvijlbrief 335199 21871270 +Fisherman 325053 21786603 +gvreuls 311480 20751516 +ctoks 275877 18710423 +velislav 241267 15596372 +glinscott 217799 13780820 +nordlandia 211692 13484886 +bcross 206213 14934233 +bking_US 198894 11876016 +leszek 189170 11446821 +mgrabiak 183896 11778092 +drabel 181408 12489478 +TueRens 181349 12192000 +Thanar 179852 12365359 +vdv 175171 9881246 +robal 166948 10702862 +spams 157128 10319326 +marrco 149947 9376421 +sqrt2 147963 9724586 +vdbergh 137041 8926915 +CoffeeOne 136294 5004100 +malala 136182 8002293 +mhoram 128934 8177193 +davar 122092 7960001 +dsmith 122059 7570238 +xoto 119696 8222144 +grandphish2 116481 7582197 +Data 113305 8220352 +BrunoBanani 112960 7436849 +ElbertoOne 99028 7023771 +MaZePallas 98571 6362619 +brabos 92118 6186135 +psk 89957 5984901 +sunu 88463 6007033 +sterni1971 86948 5613788 +Vizvezdenec 83752 5343724 +BRAVONE 81239 5054681 +nssy 76497 5259388 +teddybaer 75125 5407666 +Pking_cda 73776 5293873 +jromang 70695 4940891 +solarlight 70517 5028306 +dv8silencer 70287 3883992 +Bobo1239 68515 4652287 +racerschmacer 67468 4935996 +manap 66273 4121774 +tinker 63458 4213726 +linrock 59082 4516053 +robnjr 57262 4053117 +Freja 56938 3733019 +ttruscott 56005 3679485 +renouve 53811 3501516 +cuistot 52532 3014920 +finfish 51360 3370515 +eva42 51272 3599691 +rkl 50759 3840947 +rap 49985 3219146 +pb00067 49727 3298270 +ronaldjerum 47654 3240695 +bigpen0r 47278 3291647 +biffhero 46564 3111352 +VoyagerOne 45386 3445881 +speedycpu 43842 3003273 +jbwiebe 43305 2805433 +Antihistamine 41788 2761312 +mhunt 41735 2691355 +eastorwest 40387 2812173 +homyur 39893 2850481 +gri 39871 2515779 +oryx 38228 2941656 +0x3C33 37773 2529097 +SC 37290 2731014 +csnodgrass 36207 2688994 +jmdana 36108 2205261 +strelock 34716 2074055 +Garf 33800 2747562 +EthanOConnor 33370 2090311 +slakovv 32915 2021889 +Spprtr 32591 2139601 +Prcuvu 30377 2170122 +anst 30301 2190091 +jkiiski 30136 1904470 +hyperbolic.tom 29840 2017394 +Pyafue 29650 1902349 +OuaisBla 27629 1578000 +chriswk 26902 1868317 +achambord 26582 1767323 +Patrick_G 26276 1801617 +yorkman 26193 1992080 +SFTUser 25182 1675689 +nabildanial 24942 1519409 +Sharaf_DG 24765 1786697 +ncfish1 24411 1520927 +agg177 23890 1395014 +JanErik 23408 1703875 +Isidor 23388 1680691 +Norabor 22976 1587862 +cisco2015 22880 1759669 +Zirie 22542 1472937 +team-oh 22272 1636708 +MazeOfGalious 21978 1629593 +sg4032 21945 1643065 +ianh2105 21725 1632562 +xor12 21628 1680365 +dex 21612 1467203 +nesoneg 21494 1463031 +horst.prack 20878 1465656 +0xB00B1ES 20590 1208666 +j3corre 20405 941444 +Adrian.Schmidt123 20316 1281436 +wei 19973 1745989 +rstoesser 19569 1293588 +eudhan 19274 1283717 +Ente 19070 1373058 +jundery 18445 1115855 +iisiraider 18247 1101015 +ville 17883 1384026 +chris 17698 1487385 +purplefishies 17595 1092533 +DragonLord 17014 1162790 +dju 16515 929427 +IgorLeMasson 16064 1147232 +ako027ako 15671 1173203 +Nikolay.IT 15154 1068349 +Andrew Grant 15114 895539 +yurikvelo 15027 1165616 +OssumOpossum 14857 1007129 +enedene 14476 905279 +bpfliegel 14298 884523 +jpulman 13982 870599 +joster 13794 950160 +Nesa92 13786 1114691 +Dark_wizzie 13422 1007152 +Hjax 13350 900887 +Fifis 13313 965473 +mabichito 12903 749391 +thijsk 12886 722107 +crocogoat 12876 1048802 +AdrianSA 12860 804972 +Flopzee 12698 894821 +fatmurphy 12547 853210 +SapphireBrand 12416 969604 +modolief 12386 896470 +scuzzi 12362 833465 +pgontarz 12151 848794 +stocky 11954 699440 +mschmidt 11941 803401 +infinity 11470 727027 +torbjo 11387 728873 +Thomas A. Anderson 11372 732094 +snicolet 11106 869170 +amicic 10779 733593 +rpngn 10712 688203 +d64 10680 771144 +basepi 10637 744851 +jjoshua2 10559 670905 +dzjp 10343 732529 +ols 10259 570669 +lbraesch 10252 647825 diff --git a/appveyor.yml b/appveyor.yml index a3732a23..ab608409 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,7 +63,7 @@ build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - ps: | # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" $nnuenet = $Matches.nnuenet Write-Host "Default net:" $nnuenet diff --git a/src/Makefile b/src/Makefile index 69517c3c..2449f541 100644 --- a/src/Makefile +++ b/src/Makefile @@ -101,12 +101,17 @@ VPATH = syzygy:nnue:nnue/features:eval:extra:learn ### 2.1. General and architecture defaults +ifeq ($(ARCH),) + ARCH = x86-64-modern + help_skip_sanity = yes +endif # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` -ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ - x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32)) +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -130,7 +135,6 @@ avx512 = no vnni256 = no vnni512 = no neon = no -ARCH = x86-64-modern STRIP = strip ### 2.2 Architecture specific @@ -394,19 +398,6 @@ ifeq ($(COMP),clang) endif endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use -else -ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use -endif -endif - ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 @@ -418,20 +409,30 @@ endif # Currently we don't know how to make PGO builds with the NDK yet. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang ifeq ($(arch),armv7) - comp=armv7a-linux-androideabi16-clang CXX=armv7a-linux-androideabi16-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif ifeq ($(arch),armv8) - comp=aarch64-linux-android21-clang CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -622,11 +623,13 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(COMP),ndk) - CXXFLAGS += -flto=thin - LDFLAGS += $(CXXFLAGS) - else ifeq ($(comp),clang) + ifeq ($(comp),clang) CXXFLAGS += -flto=thin + ifneq ($(findstring MINGW,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + else ifneq ($(findstring MSYS,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be @@ -650,10 +653,12 @@ ifeq ($(debug), no) # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) + ifneq ($(arch),i386) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif + endif endif endif @@ -729,11 +734,12 @@ help: @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" @echo "-------------------------------" -ifeq ($(SUPPORTED_ARCH), true) +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity else @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" endif @@ -741,7 +747,7 @@ endif config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make -build: config-sanity +build: config-sanity net $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @@ -768,12 +774,13 @@ install: -cp $(EXE) $(BINDIR) -strip $(BINDIR)/$(EXE) -#clean all +# clean all clean: objclean profileclean @rm -f .depend *~ core +# evaluation network (nnue) net: - $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -795,7 +802,6 @@ net: echo "shasum / sha256sum not found, skipping net validation"; \ fi - # clean binaries and objects objclean: @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 806e9840..ffb631a2 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -164,5 +164,7 @@ vector setup_bench(const Position& current, istream& is) { ++posCounter; } + list.emplace_back("setoption name Use NNUE value true"); + return list; } diff --git a/src/evaluate.cpp b/src/evaluate.cpp index e619a747..d3937823 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -20,64 +20,126 @@ #include #include #include // For std::memset +#include #include #include #include -#include +#include +#include #include "bitboard.h" #include "evaluate.h" #include "material.h" +#include "misc.h" #include "pawns.h" #include "thread.h" #include "uci.h" +#include "incbin/incbin.h" + + +// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsof Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) + INCBIN(EmbeddedNNUE, EvalFileDefaultName); +#else + const unsigned char gEmbeddedNNUEData[1] = {0x0}; + const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; + const unsigned int gEmbeddedNNUESize = 1; +#endif + + +using namespace std; +using namespace Eval::NNUE; namespace Eval { - UseNNUEMode useNNUE; - std::string eval_file_loaded="None"; + bool useNNUE; + string eval_file_loaded = "None"; - static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode) - { - if (mode == "false") - return UseNNUEMode::False; - else if (mode == "true") - return UseNNUEMode::True; - else if (mode == "pure") - return UseNNUEMode::Pure; + /// NNUE::init() tries to load a nnue network at startup time, or when the engine + /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" + /// The name of the nnue network is always retrieved from the EvalFile option. + /// We search the given network in three locations: internally (the default + /// network may be embedded in the binary), in the active working directory and + /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY + /// variable to have the engine search in a special directory in their distro. - return UseNNUEMode::False; + void NNUE::init() { + + useNNUE = Options["Use NNUE"]; + if (!useNNUE) + return; + + string eval_file = string(Options["EvalFile"]); + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (eval_file_loaded != eval_file) + { + if (directory != "") + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + + if (directory == "" && eval_file == EvalFileDefaultName) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer : public basic_streambuf { + public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + }; + + MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + + istream stream(&buffer); + if (load_eval(eval_file, stream)) + eval_file_loaded = eval_file; + } + } } - void init_NNUE() { + /// NNUE::verify() verifies that the last net used was loaded successfully + void NNUE::verify() { - useNNUE = nnue_mode_from_option(Options["Use NNUE"]); + string eval_file = string(Options["EvalFile"]); - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) - if (Eval::NNUE::load_eval_file(eval_file)) - eval_file_loaded = eval_file; - } - - void verify_NNUE() { - - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) { + if (useNNUE && eval_file_loaded != eval_file) + { UCI::OptionsMap defaults; UCI::init(defaults); - sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; - sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; - sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; - sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; - sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; - std::exit(EXIT_FAILURE); + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); } - if (useNNUE != UseNNUEMode::False) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + if (useNNUE) + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; else - sync_cout << "info string classical evaluation enabled." << sync_endl; + sync_cout << "info string classical evaluation enabled" << sync_endl; } } @@ -165,26 +227,26 @@ namespace { // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + constexpr Score Outpost[] = { S(56, 34), S(31, 23) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260) }; // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. - constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; + constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161) + S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) + S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; // Assorted bonuses and penalties @@ -952,18 +1014,30 @@ make_v: /// evaluation of the position from the point of view of the side to move. Value Eval::evaluate(const Position& pos) { - if (useNNUE == UseNNUEMode::Pure) { - return NNUE::evaluate(pos); + + Value v; + + if (!Eval::useNNUE) + v = Evaluation(pos).value(); + else + { + // scale and shift NNUE for compatibility with search and classical evaluation + auto adjusted_NNUE = [&](){ return NNUE::evaluate(pos) * 5 / 4 + Tempo; }; + + // if there is PSQ imbalance use classical eval, with small probability if it is small + Value psq = Value(abs(eg_value(pos.psq_score()))); + int r50 = 16 + pos.rule50_count(); + bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; + bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); + + v = classical ? Evaluation(pos).value() : adjusted_NNUE(); + + // if the classical eval is small and imbalance large, use NNUE nevertheless. + if ( largePsq + && abs(v) * 16 < NNUEThreshold2 * r50) + v = adjusted_NNUE(); } - bool classical = useNNUE == UseNNUEMode::False - || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); - Value v = classical ? Evaluation(pos).value() - : NNUE::evaluate(pos) * 5 / 4 + Tempo; - - if (classical && useNNUE != UseNNUEMode::False && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) - v = NNUE::evaluate(pos) * 5 / 4 + Tempo; - // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; @@ -1018,7 +1092,7 @@ std::string Eval::trace(const Position& pos) { ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n"; - if (useNNUE != UseNNUEMode::False) + if (Eval::useNNUE) { v = NNUE::evaluate(pos); v = pos.side_to_move() == WHITE ? v : -v; diff --git a/src/evaluate.h b/src/evaluate.h index 900a77fc..e6ac7e1c 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,15 +38,18 @@ namespace Eval { extern UseNNUEMode useNNUE; extern std::string eval_file_loaded; - void init_NNUE(); - void verify_NNUE(); + + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-03744f8d56d8.nnue" namespace NNUE { Value evaluate(const Position& pos); - Value compute_eval(const Position& pos); - void update_eval(const Position& pos); - bool load_eval_file(const std::string& evalFile); + bool load_eval(std::string name, std::istream& stream); + void init(); + void verify(); } // namespace NNUE diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 00000000..32484ab5 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100755 index 00000000..c19684d7 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/main.cpp b/src/main.cpp index fbad6622..e6dff918 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; + CommandLine::init(argc, argv); UCI::init(Options); Tune::init(); PSQT::init(); @@ -44,7 +45,7 @@ int main(int argc, char* argv[]) { Endgames::init(); Threads.set(size_t(Options["Threads"])); Search::clear(); // After threads are up - Eval::init_NNUE(); + Eval::NNUE::init(); UCI::loop(argc, argv); diff --git a/src/misc.cpp b/src/misc.cpp index 5ef5ecdc..d9bc47e3 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -132,6 +132,7 @@ public: } // namespace + /// engine_info() returns the full name of the current Stockfish version. This /// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when /// the program was compiled) or "Stockfish ", depending on whether @@ -356,27 +357,11 @@ void std_aligned_free(void* ptr) { #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. -/// The returned pointer is the aligned one, while the mem argument is the one that needs -/// to be passed to free. With c++17 some of this functionality could be simplified. +/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. -#if defined(__linux__) && !defined(__ANDROID__) +#if defined(_WIN32) -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { - - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment - if (posix_memalign(&mem, alignment, size)) - mem = nullptr; -#if defined(MADV_HUGEPAGE) - madvise(mem, allocSize, MADV_HUGEPAGE); -#endif - return mem; -} - -#elif defined(_WIN64) - -static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { +static void* aligned_large_pages_alloc_win(size_t allocSize) { HANDLE hProcessToken { }; LUID luid { }; @@ -421,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { return mem; } -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { static bool firstCall = true; + void* mem; // Try to allocate large pages - mem = aligned_ttmem_alloc_large_pages(allocSize); + mem = aligned_large_pages_alloc_win(allocSize); // Suppress info strings on the first call. The first call occurs before 'uci' // is received and in that case this output confuses some GUIs. @@ -448,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { #else -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { - constexpr size_t alignment = 64; // assumed cache line size - size_t size = allocSize + alignment - 1; // allocate some extra space - mem = malloc(size); - void* ret = reinterpret_cast((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1)); - return ret; +#if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size +#else + constexpr size_t alignment = 4096; // assumed small page size +#endif + + // round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void *mem = std_aligned_alloc(alignment, size); +#if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); +#endif + return mem; } #endif -/// aligned_ttmem_free() will free the previously allocated ttmem +/// aligned_large_pages_free() will free the previously allocated ttmem -#if defined(_WIN64) +#if defined(_WIN32) -void aligned_ttmem_free(void* mem) { +void aligned_large_pages_free(void* mem) { if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { @@ -477,8 +471,8 @@ void aligned_ttmem_free(void* mem) { #else -void aligned_ttmem_free(void *mem) { - free(mem); +void aligned_large_pages_free(void *mem) { + std_aligned_free(mem); } #endif @@ -590,109 +584,60 @@ void bindThisThread(size_t idx) { } // namespace WinProcGroup -// Returns a string that represents the current time. (Used when learning evaluation functions) -std::string now_string() -{ - // Using std::ctime(), localtime() gives a warning that MSVC is not secure. - // This shouldn't happen in the C++ standard, but... - -#if defined(_MSC_VER) - // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead. -#pragma warning(disable : 4996) +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd #endif - auto now = std::chrono::system_clock::now(); - auto tp = std::chrono::system_clock::to_time_t(now); - auto result = string(std::ctime(&tp)); +namespace CommandLine { - // remove line endings if they are included at the end - while (*result.rbegin() == '\n' || (*result.rbegin() == '\r')) - result.pop_back(); - return result; +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory +string pathSeparator; // Separator for our current OS + +void init(int argc, char* argv[]) { + (void)argc; + string separator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); } -void sleep(int ms) -{ - std::this_thread::sleep_for(std::chrono::milliseconds(ms)); -} -void* aligned_malloc(size_t size, size_t align) -{ - void* p = _mm_malloc(size, align); - if (p == nullptr) - { - std::cout << "info string can't allocate memory. sise = " << size << std::endl; - exit(1); - } - return p; -} - -std::uint64_t get_file_size(std::fstream& fs) -{ - auto pos = fs.tellg(); - - fs.seekg(0, fstream::end); - const uint64_t eofPos = (uint64_t)fs.tellg(); - fs.clear(); // Otherwise, the next seek may fail. - fs.seekg(0, fstream::beg); - const uint64_t begPos = (uint64_t)fs.tellg(); - fs.seekg(pos); - - return eofPos - begPos; -} - -int read_file_to_memory(std::string filename, std::function callback_func) -{ - fstream fs(filename, ios::in | ios::binary); - if (fs.fail()) - return 1; - - const uint64_t file_size = get_file_size(fs); - //std::cout << "filename = " << filename << " , file_size = " << file_size << endl; - - // I know the file size, so call callback_func to get a buffer for this, - // Get the pointer. - void* ptr = callback_func(file_size); - - // If the buffer could not be secured, or if the file size is different from the expected file size, - // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned. - if (ptr == nullptr) - return 2; - - // read in pieces - - const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB) - for (uint64_t pos = 0; pos < file_size; pos += block_size) - { - // size to read this time - uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos); - fs.read((char*)ptr + pos, read_size); - - // Read error occurred in the middle of the file. - if (fs.fail()) - return 2; - - //cout << "."; - } - fs.close(); - - return 0; -} - -int write_memory_to_file(std::string filename, void* ptr, uint64_t size) -{ - fstream fs(filename, ios::out | ios::binary); - if (fs.fail()) - return 1; - - const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB) - for (uint64_t pos = 0; pos < size; pos += block_size) - { - // Memory size to write this time - uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos); - fs.write((char*)ptr + pos, write_size); - //cout << "."; - } - fs.close(); - return 0; -} +} // namespace CommandLine diff --git a/src/misc.h b/src/misc.h index 5b7c8870..6696b0a8 100644 --- a/src/misc.h +++ b/src/misc.h @@ -39,8 +39,8 @@ void prefetch(void* addr); void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); void std_aligned_free(void* ptr); -void* aligned_ttmem_alloc(size_t size, void*& mem); -void aligned_ttmem_free(void* mem); // nop if mem == nullptr +void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes +void aligned_large_pages_free(void* mem); // nop if mem == nullptr void dbg_hit_on(bool b); void dbg_hit_on(bool c, bool b); @@ -48,9 +48,7 @@ void dbg_mean_of(int v); void dbg_print(); typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds - static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); - inline TimePoint now() { return std::chrono::duration_cast (std::chrono::steady_clock::now().time_since_epoch()).count(); @@ -337,4 +335,11 @@ namespace Dependency extern bool getline(std::ifstream& fs, std::string& s); } +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + #endif // #ifndef MISC_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 75700d03..6cacf37e 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -40,37 +40,19 @@ namespace Eval::NNUE { } }; + template + struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; + template using AlignedPtr = std::unique_ptr>; - // Input feature converter - extern AlignedPtr feature_transformer; - - // Evaluation function - extern AlignedPtr network; - - // Evaluation function file name - extern std::string fileName; - - // Saved evaluation function file name - extern std::string savedfileName; - - // Get a string that represents the structure of the evaluation function - std::string GetArchitectureString(); - - // read the header - bool ReadHeader(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture); - - // write the header - bool WriteHeader(std::ostream& stream, - std::uint32_t hash_value, const std::string& architecture); - - // read evaluation function parameters - bool ReadParameters(std::istream& stream); - - // write evaluation function parameters - bool WriteParameters(std::ostream& stream); + template + using LargePagePtr = std::unique_ptr>; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 69dfaad2..26370710 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,9 +29,7 @@ namespace Eval::NNUE { struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - Value score; bool computed_accumulation; - bool computed_score; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index d7ffa21a..b0e64254 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -113,7 +113,7 @@ namespace Eval::NNUE { PS_END2 = 12 * SQUARE_NB + 1 }; - extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index e1bc2ab8..b4c65bdf 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -29,6 +29,56 @@ namespace Eval::NNUE { + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define TILING + + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_loadA_si512(a) + #define vec_store(a,b) _mm512_storeA_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 8; // only 8 are needed + + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_loadA_si256(a) + #define vec_store(a,b) _mm256_storeA_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + static constexpr IndexType kNumRegs = 16; + + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + static constexpr IndexType kNumRegs = 16; + + #else + #undef TILING + + #endif + // Input feature converter class FeatureTransformer { @@ -36,6 +86,11 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + #ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + public: // Output type using OutputType = TransformedFeatureType; @@ -50,6 +105,7 @@ namespace Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; } @@ -62,6 +118,7 @@ namespace Eval::NNUE { // Read network parameters bool ReadParameters(std::istream& stream) { + for (std::size_t i = 0; i < kHalfDimensions; ++i) biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) @@ -80,23 +137,26 @@ namespace Eval::NNUE { // Proceed with the difference calculation if possible bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); - if (now->accumulator.computed_accumulation) { + if (now->accumulator.computed_accumulation) return true; - } + const auto prev = now->previous; if (prev && prev->accumulator.computed_accumulation) { UpdateAccumulator(pos); return true; } + return false; } // Convert input features - void Transform(const Position& pos, OutputType* output, bool refresh) const { - if (refresh || !UpdateAccumulatorIfPossible(pos)) { + void Transform(const Position& pos, OutputType* output) const { + + if (!UpdateAccumulatorIfPossible(pos)) RefreshAccumulator(pos); - } + const auto& accumulation = pos.state()->accumulator.accumulation; #if defined(USE_AVX2) @@ -193,74 +253,58 @@ namespace Eval::NNUE { private: // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; IndexType i = 0; Features::IndexList active_indices[2]; RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], active_indices); for (Color perspective : { WHITE, BLACK }) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX512) - auto accumulation = reinterpret_cast<__m512i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - for (IndexType j = 0; j < kNumChunks; ++j) - _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); + #ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; - #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_MMX) - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); } - #elif defined(USE_NEON) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } #else + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif - } + #endif } + #if defined(USE_MMX) _mm_empty(); #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } // Calculate cumulative value using difference calculation void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; IndexType i = 0; @@ -268,29 +312,55 @@ namespace Eval::NNUE { bool reset[2]; RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + #ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + if (reset[perspective]) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); + + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + } + + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + #if defined(USE_MMX) + _mm_empty(); #endif + #else + for (Color perspective : { WHITE, BLACK }) { + if (reset[perspective]) { std::memcpy(accumulator.accumulation[perspective][i], biases_, kHalfDimensions * sizeof(BiasType)); @@ -302,83 +372,22 @@ namespace Eval::NNUE { for (const auto index : removed_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } - #endif - + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; } } { // Difference calculation for the activated features for (const auto index : added_indices[perspective]) { const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); - } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } - #endif - + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; } } } - #if defined(USE_MMX) - _mm_empty(); #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } using BiasType = std::int16_t; diff --git a/src/position.cpp b/src/position.cpp index 38ac7c5c..d7f0761a 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -707,7 +707,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; - st->accumulator.computed_score = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -1003,7 +1002,6 @@ void Position::do_null_move(StateInfo& newSt) { if (Eval::useNNUE != Eval::UseNNUEMode::False) { std::memcpy(&newSt, st, sizeof(StateInfo)); - st->accumulator.computed_score = false; } else std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); diff --git a/src/search.cpp b/src/search.cpp index e1616c5c..edc020fd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -54,8 +54,6 @@ using std::string; using Eval::evaluate; using namespace Search; -bool Search::prune_at_shallow_depth_on_pv_node = true; - namespace { // Different node types, used as a template parameter @@ -227,7 +225,7 @@ void MainThread::search() { Time.init(Limits, us, rootPos.game_ply()); TT.new_search(); - Eval::verify_NNUE(); + Eval::NNUE::verify(); if (rootMoves.empty()) { @@ -464,10 +462,7 @@ void Thread::search() { ++failedHighCnt; } else - { - ++rootMoves[pvIdx].bestMoveCount; break; - } delta += delta / 4 + 5; @@ -522,7 +517,7 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); + double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); double totalTime = rootMoves.size() == 1 ? 0 : Time.optimum() * fallingEval * reduction * bestMoveInstability; @@ -599,7 +594,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; + bool formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -646,6 +641,7 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); (ss+1)->ply = ss->ply + 1; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; Square prevSq = to_sq((ss-1)->currentMove); @@ -655,9 +651,7 @@ namespace { // starts with statScore = 0. Later grandchildren start with the last calculated // statScore of the previous grandchild. This influences the reduction rules in // LMR which are based on the statScore of parent position. - if (rootNode) - (ss+4)->statScore = 0; - else + if (!rootNode) (ss+2)->statScore = 0; // Step 4. Transposition table lookup. We don't want the score of a partial @@ -665,14 +659,15 @@ namespace { // position key in case of an excluded move. excludedMove = ss->excludedMove; posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ttHit ? tte->move() : MOVE_NONE; - ttPv = PvNode || (ttHit && tte->is_pv()); - formerPv = ttPv && !PvNode; + : ss->ttHit ? tte->move() : MOVE_NONE; + if (!excludedMove) + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); + formerPv = ss->ttPv && !PvNode; - if ( ttPv + if ( ss->ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture @@ -681,11 +676,11 @@ namespace { // thisThread->ttHitAverage can be used to approximate the running average of ttHit thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ttHit; + + TtHitAverageResolution * ss->ttHit; // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= depth && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -750,7 +745,7 @@ namespace { if ( b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b, + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); @@ -778,7 +773,7 @@ namespace { improving = false; goto moves_loop; } - else if (ttHit) + else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); @@ -800,7 +795,7 @@ namespace { else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; - tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Step 7. Razoring (~1 Elo) @@ -826,7 +821,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -882,14 +877,14 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit + && !( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { // if ttMove is a capture and value from transposition table is good enough produce probCut // cutoff without digging into actual probCut search - if ( ttHit + if ( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue >= probCutBeta @@ -900,6 +895,8 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; + bool ttPv = ss->ttPv; + ss->ttPv = false; while ( (move = mp.next_move()) != MOVE_NONE && probCutCount < 2 + 2 * cutNode) @@ -931,7 +928,7 @@ namespace { if (value >= probCutBeta) { // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ttHit + if ( !(ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) tte->save(posKey, value_to_tt(value, ss->ply), ttPv, @@ -940,8 +937,15 @@ namespace { return value; } } + ss->ttPv = ttPv; } + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; + moves_loop: // When in check, search starts from here const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -965,7 +969,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 11. Loop through all pseudo-legal moves until no moves remain + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -988,9 +992,7 @@ moves_loop: // When in check, search starts from here ss->moveCount = ++moveCount; - if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000 - && !Limits.silent - ) + if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) sync_cout << "info depth " << depth << " currmove " << UCI::move(move, pos.is_chess960()) << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl; @@ -1005,9 +1007,8 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 12. Pruning at shallow depth (~200 Elo) + // Step 13. Pruning at shallow depth (~200 Elo) if ( !rootNode - && (PvNode ? prune_at_shallow_depth_on_pv_node : true) && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { @@ -1052,7 +1053,6 @@ moves_loop: // When in check, search starts from here if ( !givesCheck && lmrDepth < 6 && !(PvNode && abs(bestValue) < 2) - && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) @@ -1064,7 +1064,7 @@ moves_loop: // When in check, search starts from here } } - // Step 13. Extensions (~75 Elo) + // Step 14. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1123,11 +1123,6 @@ moves_loop: // When in check, search starts from here && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; - // Castling extension - if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) - extension = 1; - // Late irreversible move extension if ( move == ttMove && pos.rule50_count() > 80 @@ -1147,14 +1142,13 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 14. Make the move + // Step 15. Make the move pos.do_move(move, st, givesCheck); - // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 - && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) - && (!rootNode || thisThread->best_move_count(move) == 0) + && moveCount > 1 + 2 * rootNode && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha @@ -1163,13 +1157,6 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction at non-check cut nodes for second move at low depths - if ( cutNode - && depth <= 10 - && moveCount <= 2 - && !ss->inCheck) - r--; - // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; @@ -1179,7 +1166,7 @@ moves_loop: // When in check, search starts from here r++; // Decrease reduction if position is or has been on the PV (~10 Elo) - if (ttPv) + if (ss->ttPv) r -= 2; if (moveCountPruning && !formerPv) @@ -1191,7 +1178,7 @@ moves_loop: // When in check, search starts from here // Decrease reduction if ttMove has been singularly extended (~3 Elo) if (singularQuietLMR) - r -= 1 + formerPv; + r--; if (!captureOrPromotion) { @@ -1208,7 +1195,7 @@ moves_loop: // When in check, search starts from here // hence break make_move(). (~2 Elo) else if ( type_of(move) == NORMAL && !pos.see_ge(reverse_move(move))) - r -= 2 + ttPv - (type_of(movedPiece) == PAWN); + r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] @@ -1228,14 +1215,14 @@ moves_loop: // When in check, search starts from here } else { - // Increase reduction for captures/promotions if late move and at low depth - if (depth < 8 && moveCount > 2) - r++; + // Increase reduction for captures/promotions if late move and at low depth + if (depth < 8 && moveCount > 2) + r++; - // Unless giving check, this capture is likely bad - if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) - r++; + // Unless giving check, this capture is likely bad + if ( !givesCheck + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) + r++; } Depth d = std::clamp(newDepth - r, 1, newDepth); @@ -1253,7 +1240,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 16. Full depth search when LMR is skipped or fails high + // Step 17. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1281,12 +1268,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 17. Undo move + // Step 18. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 18. Check for a new best move + // Step 19. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1363,7 +1350,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 19. Check for mate and stalemate + // Step 20. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. @@ -1386,8 +1373,17 @@ moves_loop: // When in check, search starts from here if (PvNode) bestValue = std::min(bestValue, maxValue); + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + // Otherwise, a counter move has been found and if the position is the last leaf + // in the search tree, remove the position from the search tree. + else if (depth > 3) + ss->ttPv = ss->ttPv && (ss+1)->ttPv; + if (!excludedMove && !(rootNode && thisThread->pvIdx)) - tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv, + tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, depth, bestMove, ss->staticEval); @@ -1416,7 +1412,7 @@ moves_loop: // When in check, search starts from here Move ttMove, move, bestMove; Depth ttDepth; Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool ttHit, pvHit, givesCheck, captureOrPromotion; + bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) @@ -1446,13 +1442,13 @@ moves_loop: // When in check, search starts from here : DEPTH_QS_NO_CHECKS; // Transposition table lookup posKey = pos.key(); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - pvHit = ttHit && tte->is_pv(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= ttDepth && ttValue != VALUE_NONE // Only in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -1467,7 +1463,7 @@ moves_loop: // When in check, search starts from here } else { - if (ttHit) + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) @@ -1486,7 +1482,7 @@ moves_loop: // When in check, search starts from here // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (!ttHit) + if (!ss->ttHit) tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, MOVE_NONE, ss->staticEval); @@ -1550,18 +1546,16 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if ( !ss->inCheck && !pos.see_ge(move)) + if ( !ss->inCheck + && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move)) + && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible prefetch(TT.first_entry(pos.key_after(move))); // Check for legality just before making the move - if ( - // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning. - !pos.pseudo_legal(move) || - !pos.legal(move) - ) + if (!pos.legal(move)) { moveCount--; continue; @@ -1573,8 +1567,9 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + // CounterMove based pruning if ( !captureOrPromotion - && moveCount >= abs(depth) + 1 + && moveCount && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) continue; @@ -1706,8 +1701,8 @@ moves_loop: // When in check, search starts from here else captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; - // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted - if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0])) + // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); @@ -1850,12 +1845,15 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { { bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated) + if (depth == 1 && !updated && i > 0) continue; - Depth d = updated ? depth : depth - 1; + Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; v = tb ? rootMoves[i].tbScore : v; @@ -1953,7 +1951,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { if (RootInTB) { // Sort moves according to TB rank - std::sort(rootMoves.begin(), rootMoves.end(), + std::stable_sort(rootMoves.begin(), rootMoves.end(), [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); // Probe during search only if DTZ is not available and we are winning @@ -1967,272 +1965,3 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { m.tbRank = 0; } } - -// --- expose the functions such as fixed depth search used for learning to the outside - - -namespace Learner -{ - // For learning, prepare a stub that can call search,qsearch() from one thread. - // From now on, it is better to have a Searcher and prepare a substitution table for each thread like Apery. - // It might have been good. - - // Initialization for learning. - // Called from Learner::search(),Learner::qsearch(). - void init_for_search(Position& pos, Stack* ss) - { - - // RootNode requires ss->ply == 0. - // Because it clears to zero, ss->ply == 0, so it's okay... - - std::memset(ss - 7, 0, 10 * sizeof(Stack)); - - // Regarding this_thread. - - { - auto th = pos.this_thread(); - - th->completedDepth = 0; - th->selDepth = 0; - th->rootDepth = 0; - th->nmpMinPly = th->bestMoveChanges = 0; - th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2; - - // Zero initialization of the number of search nodes - th->nodes = 0; - - // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood. - // th->clear(); - - int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns - Color us = pos.side_to_move(); - - // In analysis mode, adjust contempt in accordance with user preference - if (Limits.infinite || Options["UCI_AnalyseMode"]) - ct = Options["Analysis Contempt"] == "Off" ? 0 - : Options["Analysis Contempt"] == "Both" ? ct - : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct - : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct - : ct; - - // Evaluation score is from the white point of view - th->contempt = (us == WHITE ? make_score(ct, ct / 2) - : -make_score(ct, ct / 2)); - - for (int i = 7; i > 0; i--) - (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - - // set rootMoves - auto& rootMoves = th->rootMoves; - - rootMoves.clear(); - for (auto m: MoveList(pos)) - rootMoves.push_back(Search::RootMove(m)); - - assert(!rootMoves.empty()); - } - } - - // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch(). - typedef std::pair > ValueAndPV; - - // Stationary search. - // - // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]). - // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct. - // After returning from search(), if Threads.stop == true, do not use the search result. - // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return. - // - // If it is clogged, MOVE_RESIGN is returned in the PV array. - // - //Although it was possible to specify alpha and beta with arguments, this will show the result when searching in that window - // Because it writes to the substitution table, the value that can be pruned is written to that window when learning - // As it has a bad effect, I decided to stop allowing the window range to be specified. - ValueAndPV qsearch(Position& pos) - { - Stack stack[MAX_PLY + 10], * ss = stack + 7; - Move pv[MAX_PLY + 1]; - - init_for_search(pos, ss); - ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer. - - if (pos.is_draw(0)) { - // Return draw value if draw. - return { VALUE_DRAW, {} }; - } - - // Is it stuck? - if (MoveList(pos).size() == 0) - { - // Return the mated value if checkmated. - return { mated_in(/*ss->ply*/ 0 + 1), {} }; - } - - auto bestValue = ::qsearch(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0); - - // Returns the PV obtained. - std::vector pvs; - for (Move* p = &ss->pv[0]; is_ok(*p); ++p) - pvs.push_back(*p); - - return ValueAndPV(bestValue, pvs); - } - - // Normal search. Depth depth (specified as an integer). - // 3 If you want a score for hand reading, - // auto v = search(pos,3); - // Do something like - // Evaluation value is obtained in v.first and PV is obtained in v.second. - // When multi pv is enabled, you can get the PV (reading line) array in pos.this_thread()->rootMoves[N].pv. - // Specify multi pv with the argument multiPV of this function. (The value of Options["MultiPV"] is ignored) - // - // Declaration win judgment is not done as root (because it is troublesome to handle), so it is not done here. - // Handle it by the caller. - // - // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]). - // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct. - // After returning from search(), if Threads.stop == true, do not use the search result. - // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return. - - ValueAndPV search(Position& pos, int depth_, size_t multiPV /* = 1 */, uint64_t nodesLimit /* = 0 */) - { - std::vector pvs; - - Depth depth = depth_; - if (depth < 0) - return std::pair>(Eval::evaluate(pos), std::vector()); - - if (depth == 0) - return qsearch(pos); - - Stack stack[MAX_PLY + 10], * ss = stack + 7; - Move pv[MAX_PLY + 1]; - - init_for_search(pos, ss); - - ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer. - - // Initialize the variables related to this_thread - auto th = pos.this_thread(); - auto& rootDepth = th->rootDepth; - auto& pvIdx = th->pvIdx; - auto& pvLast = th->pvLast; - auto& rootMoves = th->rootMoves; - auto& completedDepth = th->completedDepth; - auto& selDepth = th->selDepth; - - // A function to search the top N of this stage as best move - //size_t multiPV = Options["MultiPV"]; - - // Do not exceed the number of moves in this situation - multiPV = std::min(multiPV, rootMoves.size()); - - // If you do not multiply the node limit by the value of MultiPV, you will not be thinking about the same node for one candidate hand when you fix the depth and have MultiPV. - nodesLimit *= multiPV; - - Value alpha = -VALUE_INFINITE; - Value beta = VALUE_INFINITE; - Value delta = -VALUE_INFINITE; - Value bestValue = -VALUE_INFINITE; - - while ((rootDepth += 1) <= depth - // exit this loop even if the node limit is exceeded - // The number of search nodes is passed in the argument of this function. - && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit) - ) - { - for (RootMove& rm : rootMoves) - rm.previousScore = rm.score; - - size_t pvFirst = 0; - pvLast = 0; - - // MultiPV loop. We perform a full root search for each PV line - for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) - { - if (pvIdx == pvLast) - { - pvFirst = pvLast; - for (pvLast++; pvLast < rootMoves.size(); pvLast++) - if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) - break; - } - - // selDepth output with USI info for each depth and PV line - selDepth = 0; - - // Switch to aspiration search for depth 5 and above. - if (rootDepth >= 5 * 1) - { - delta = Value(20); - - Value p = rootMoves[pvIdx].previousScore; - - alpha = std::max(p - delta, -VALUE_INFINITE); - beta = std::min(p + delta, VALUE_INFINITE); - } - - // aspiration search - int failedHighCnt = 0; - while (true) - { - Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1); - bestValue = ::search(pos, ss, alpha, beta, adjustedDepth, false); - - stable_sort(rootMoves.begin() + pvIdx, rootMoves.end()); - //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx); - - // Expand aspiration window for fail low/high. - // However, if it is the value specified by the argument, it will be treated as fail low/high and break. - if (bestValue <= alpha) - { - beta = (alpha + beta) / 2; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - failedHighCnt = 0; - //if (mainThread) - // mainThread->stopOnPonderhit = false; - - } - else if (bestValue >= beta) - { - beta = std::min(bestValue + delta, VALUE_INFINITE); - ++failedHighCnt; - } - else - break; - - delta += delta / 4 + 5; - assert(-VALUE_INFINITE <= alpha && beta <= VALUE_INFINITE); - - // runaway check - //assert(th->nodes.load(std::memory_order_relaxed) <= 1000000 ); - } - - stable_sort(rootMoves.begin(), rootMoves.begin() + pvIdx + 1); - //my_stable_sort(pos.this_thread()->thread_id() , &rootMoves[0] , pvIdx + 1); - - } // multi PV - - completedDepth = rootDepth; - } - - // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle. - // ¨ PV should not be NULL_MOVE because it is PV - // MOVE_WIN has never been thrust. (For now) - for (Move move : rootMoves[0].pv) - { - if (!is_ok(move)) - break; - pvs.push_back(move); - } - - //sync_cout << rootDepth << sync_endl; - - // Considering multiPV, the score of rootMoves[0] is returned as bestValue. - bestValue = rootMoves[0].score; - - return ValueAndPV(bestValue, pvs); - } - -} diff --git a/src/search.h b/src/search.h index 20dfe909..04583c8d 100644 --- a/src/search.h +++ b/src/search.h @@ -49,6 +49,8 @@ struct Stack { int statScore; int moveCount; bool inCheck; + bool ttPv; + bool ttHit; }; @@ -70,7 +72,6 @@ struct RootMove { Value previousScore = -VALUE_INFINITE; int selDepth = 0; int tbRank = 0; - int bestMoveCount = 0; Value tbScore; std::vector pv; }; diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 20215b96..4d682f1a 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -223,7 +223,9 @@ public: *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif ::close(fd); if (*baseAddress == MAP_FAILED) @@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; @@ -859,7 +861,7 @@ encode_remaining: while (d->groupLen[++next]) { - std::sort(groupSq, groupSq + d->groupLen[next]); + std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous diff --git a/src/thread.cpp b/src/thread.cpp index 1aa66a81..2fbf745d 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -51,17 +51,6 @@ Thread::~Thread() { } -/// Thread::bestMoveCount(Move move) return best move counter for the given root move - -int Thread::best_move_count(Move move) const { - - auto rm = std::find(rootMoves.begin() + pvIdx, - rootMoves.begin() + pvLast, move); - - return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0; -} - - /// Thread::clear() reset histories, usually before a new game void Thread::clear() { @@ -235,16 +224,16 @@ Thread* ThreadPool::get_best_thread() const { votes[th->rootMoves[0].pv[0]] += (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); - if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) - { - // Make sure we pick the shortest mate / TB conversion or stave off mate the longest - if (th->rootMoves[0].score > bestThread->rootMoves[0].score) - bestThread = th; - } - else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY - || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY - && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) - bestThread = th; + if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) + { + // Make sure we pick the shortest mate / TB conversion or stave off mate the longest + if (th->rootMoves[0].score > bestThread->rootMoves[0].score) + bestThread = th; + } + else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY + || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY + && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) + bestThread = th; } return bestThread; diff --git a/src/thread.h b/src/thread.h index 042bc2e9..34b99015 100644 --- a/src/thread.h +++ b/src/thread.h @@ -54,7 +54,6 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int best_move_count(Move move) const; Pawns::Table pawnsTable; Material::Table materialTable; diff --git a/src/tt.cpp b/src/tt.cpp index c64670ac..8f30fee8 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -64,11 +64,12 @@ void TranspositionTable::resize(size_t mbSize) { Threads.main()->wait_for_search_finished(); - aligned_ttmem_free(mem); + aligned_large_pages_free(table); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem)); - if (!mem) + + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; diff --git a/src/tt.h b/src/tt.h index 29072bd8..d817f26d 100644 --- a/src/tt.h +++ b/src/tt.h @@ -73,7 +73,7 @@ class TranspositionTable { static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); public: - ~TranspositionTable() { aligned_ttmem_free(mem); } + ~TranspositionTable() { aligned_large_pages_free(table); } void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound TTEntry* probe(const Key key, bool& found) const; int hashfull() const; @@ -91,7 +91,6 @@ private: size_t clusterCount; Cluster* table; - void* mem; uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 }; diff --git a/src/uci.cpp b/src/uci.cpp index 1128d4d9..ce0580e5 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -100,7 +100,7 @@ namespace { Position p; p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); - Eval::verify_NNUE(); + Eval::NNUE::verify(); sync_cout << "\n" << Eval::trace(p) << sync_endl; } @@ -185,7 +185,7 @@ namespace { if (token == "go" || token == "eval") { - cerr << "\nPosition: " << cnt++ << '/' << num << endl; + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; if (token == "go") { go(pos, is, states); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index dde3844a..89010897 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -21,6 +21,7 @@ #include #include +#include "evaluate.h" #include "misc.h" #include "search.h" #include "thread.h" @@ -48,6 +49,7 @@ void on_prune_at_shallow_depth_on_pv_node(const Option& o) { void on_enable_transposition_table(const Option& o) { TranspositionTable::enable_transposition_table = o; } +void on_eval_file(const Option& ) { Eval::NNUE::init(); } /// Our case insensitive less() function as required by UCI protocol bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { @@ -104,6 +106,7 @@ void init(OptionsMap& o) { o["PruneAtShallowDepthOnPvNode"] << Option(true, on_prune_at_shallow_depth_on_pv_node); // Enable transposition table. o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); }