From bd756ee45ce1181a90ec149892ccf0e4cfe3639e Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Fri, 14 May 2021 17:35:32 +0200 Subject: [PATCH 01/17] Remove BoolConditions from tuning code Remove BoolConditions from tuning code, as the feature does not work and the code has not be touched in years. No functional change --- src/tune.cpp | 19 ------------------- src/tune.h | 34 ---------------------------------- 2 files changed, 53 deletions(-) diff --git a/src/tune.cpp b/src/tune.cpp index d9618efc..ac91b606 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -30,7 +30,6 @@ namespace Stockfish { bool Tune::update_on_last; const UCI::Option* LastOption = nullptr; -BoolConditions Conditions; static std::map TuneResults; string Tune::next(string& names, bool pop) { @@ -110,24 +109,6 @@ template<> void Tune::Entry::read_option() { template<> void Tune::Entry::init_option() {} template<> void Tune::Entry::read_option() { value(); } - -// Set binary conditions according to a probability that depends -// on the corresponding parameter value. - -void BoolConditions::set() { - - static PRNG rng(now()); - static bool startup = true; // To workaround fishtest bench - - for (size_t i = 0; i < binary.size(); i++) - binary[i] = !startup && (values[i] + int(rng.rand() % variance) > threshold); - - startup = false; - - for (size_t i = 0; i < binary.size(); i++) - sync_cout << binary[i] << sync_endl; -} - } // namespace Stockfish diff --git a/src/tune.h b/src/tune.h index c904c09d..b5c715b3 100644 --- a/src/tune.h +++ b/src/tune.h @@ -46,27 +46,6 @@ struct SetRange { #define SetDefaultRange SetRange(default_range) -/// BoolConditions struct is used to tune boolean conditions in the -/// code by toggling them on/off according to a probability that -/// depends on the value of a tuned integer parameter: for high -/// values of the parameter condition is always disabled, for low -/// values is always enabled, otherwise it is enabled with a given -/// probability that depnends on the parameter under tuning. - -struct BoolConditions { - void init(size_t size) { values.resize(size, defaultValue), binary.resize(size, 0); } - void set(); - - std::vector binary, values; - int defaultValue = 465, variance = 40, threshold = 500; - SetRange range = SetRange(0, 1000); -}; - -extern BoolConditions Conditions; - -inline void set_conditions() { Conditions.set(); } - - /// Tune class implements the 'magic' code that makes the setup of a fishtest /// tuning session as easy as it can be. Mainly you have just to remove const /// qualifiers from the variables you want to tune and flag them for tuning, so @@ -159,14 +138,6 @@ class Tune { return add(value, (next(names), std::move(names)), args...); } - // Template specialization for BoolConditions - template - int add(const SetRange& range, std::string&& names, BoolConditions& cond, Args&&... args) { - for (size_t size = cond.values.size(), i = 0; i < size; i++) - add(cond.range, next(names, i == size - 1) + "_" + std::to_string(i), cond.values[i]); - return add(range, std::move(names), args...); - } - std::vector> list; public: @@ -187,11 +158,6 @@ public: #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true -// Some macro to tune toggling of boolean conditions -#define CONDITION(x) (Conditions.binary[__COUNTER__] || (x)) -#define TUNE_CONDITIONS() int UNIQUE(c, __LINE__) = (Conditions.init(__COUNTER__), 0); \ - TUNE(Conditions, set_conditions) - } // namespace Stockfish #endif // #ifndef TUNE_H_INCLUDED From 24b8b3098bc24ec576b7d03ffb72b2908e6c8c80 Mon Sep 17 00:00:00 2001 From: bmc4 Date: Thu, 13 May 2021 11:12:56 -0300 Subject: [PATCH 02/17] Remove early return in Probcut code We simplify away early return in ProbCut, as it seems not to bring any strength anymore. STC: LLR: 2.93 (-2.94,2.94) <-2.50,0.50> Total: 42632 W: 3705 L: 3617 D: 35310 Ptnml(0-2): 123, 2947, 15110, 2991, 145 https://tests.stockfishchess.org/tests/view/609c49da7746e3dc74ffae02 LTC: LLR: 2.96 (-2.94,2.94) <-2.50,0.50> Total: 35384 W: 1314 L: 1251 D: 32819 Ptnml(0-2): 11, 1130, 15355, 1177, 19 https://tests.stockfishchess.org/tests/view/609c71467746e3dc74ffae47 --- While at it, we also update the Elo estimate of ProbCut (see https://tests.stockfishchess.org/tests/view/609bfb597746e3dc74ffabe3). closes https://github.com/official-stockfish/Stockfish/pull/3462 bench: 3764662 --- src/search.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d16d9cad..788be984 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -847,7 +847,7 @@ namespace { probCutBeta = beta + 209 - 44 * improving; - // Step 9. ProbCut (~10 Elo) + // Step 9. ProbCut (~4 Elo) // If we have a good enough capture and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode @@ -862,15 +862,6 @@ namespace { && ttValue != VALUE_NONE && ttValue < probCutBeta)) { - // if ttMove is a capture and value from transposition table is good enough produce probCut - // cutoff without digging into actual probCut search - if ( ss->ttHit - && tte->depth() >= depth - 3 - && ttValue != VALUE_NONE - && ttValue >= probCutBeta - && ttMove - && pos.capture_or_promotion(ttMove)) - return probCutBeta; assert(probCutBeta < VALUE_INFINITE); From c82f6f56a65759461f417602059ad7c97b9451aa Mon Sep 17 00:00:00 2001 From: bmc4 Date: Thu, 13 May 2021 23:47:41 -0300 Subject: [PATCH 03/17] Simplify LMR rules for statScore We simplify two parts of LMR which seem not to bring strength anymore. --- Individual Tests: https://tests.stockfishchess.org/tests/view/609d1cc15085663412d0856a https://tests.stockfishchess.org/tests/view/609cb0cc7746e3dc74ffae8d https://tests.stockfishchess.org/tests/view/609d1c9f5085663412d08568 --- LTC: LLR: 2.97 (-2.94,2.94) <-2.50,0.50> Total: 84184 W: 3093 L: 3066 D: 78025 Ptnml(0-2): 47, 2755, 36458, 2788, 44 https://tests.stockfishchess.org/tests/view/609d84615085663412d08e2f --- While at it, we also update the Elo estimate of the previous rule in LMR, see: https://tests.stockfishchess.org/tests/view/609a933c3a33eb67a844f7ca https://tests.stockfishchess.org/tests/view/609a959c3a33eb67a844f7d5 https://tests.stockfishchess.org/tests/view/609afff73a33eb67a844f870 --- closes https://github.com/official-stockfish/Stockfish/pull/3464 Bench: 4156523 --- src/search.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 788be984..e03016b6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1131,12 +1131,12 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction if the ttHit running average is large + // Decrease reduction if the ttHit running average is large (~0 Elo) if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; // Decrease reduction if position is or has been on the PV - // and node is not likely to fail low. (~10 Elo) + // and node is not likely to fail low. (~3 Elo) if ( ss->ttPv && !likelyFailLow) r -= 2; @@ -1162,9 +1162,10 @@ moves_loop: // When in check, search starts from here r++; // Increase reduction at root if failing high - r += rootNode ? thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512 : 0; + if (rootNode) + r += thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512; - // Increase reduction for cut nodes (~10 Elo) + // Increase reduction for cut nodes (~3 Elo) if (cutNode) r += 2; @@ -1174,20 +1175,8 @@ moves_loop: // When in check, search starts from here + (*contHist[3])[movedPiece][to_sq(move)] - 4741; - // Decrease/increase reduction by comparing opponent's stat score (~10 Elo) - if (ss->statScore >= -89 && (ss-1)->statScore < -116) - r--; - - else if ((ss-1)->statScore >= -112 && ss->statScore < -100) - r++; - // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - // If we are not in check use statScore, but if we are in check we use - // the sum of main history and first continuation history with an offset. - if (ss->inCheck) - r -= (thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - 3833) / 16384; - else + if (!ss->inCheck) r -= ss->statScore / 14790; } From 61e1c66b7cb1dea9346a9b74e801e4da74ad7591 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sat, 15 May 2021 00:55:45 +0300 Subject: [PATCH 04/17] Simplification for countermoves based pruning Simplify away two extra conditions in countermoves based pruning. These conditions (both of them) were introduced quite a long time ago via speculative LTCs and seem to no longer bring any benefit. passed STC https://tests.stockfishchess.org/tests/view/609e81f35085663412d08f31 LLR: 2.96 (-2.94,2.94) <-2.50,0.50> Total: 28488 W: 2487 L: 2382 D: 23619 Ptnml(0-2): 87, 1919, 10123, 2032, 83 passed LTC https://tests.stockfishchess.org/tests/view/609e9c085085663412d08f59 LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 33176 W: 1219 L: 1155 D: 30802 Ptnml(0-2): 13, 1036, 14423, 1106, 10 closes https://github.com/official-stockfish/Stockfish/pull/3468 Bench: 4749514 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index e03016b6..bf6ba6c4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1026,7 +1026,7 @@ moves_loop: // When in check, search starts from here else { // Countermoves based pruning (~20 Elo) - if ( lmrDepth < 4 + ((ss-1)->statScore > 0 || (ss-1)->moveCount == 1) + if ( lmrDepth < 4 && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold) continue; From f90274d8ce1aad4ad0595aacbceb74b6cbe306a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Mon, 17 May 2021 09:13:34 +0200 Subject: [PATCH 05/17] Small clean-ups - Comment for Countemove pruning -> Continuation history - Fix comment in input_slice.h - Shorter lines in Makefile - Comment for scale factor - Fix comment for pinners in see_ge() - Change Thread.id() signature to size_t - Trailing space in reprosearch.sh - Add Douglas Matos Gomes to the AUTHORS file - Introduce comment for undo_null_move() - Use Stockfish coding style for export_net() - Change date in AUTHORS file closes https://github.com/official-stockfish/Stockfish/pull/3416 No functional change --- AUTHORS | 3 ++- src/Makefile | 6 ++---- src/evaluate.cpp | 28 ++++++++++++++++------------ src/nnue/layers/input_slice.h | 2 +- src/position.cpp | 9 ++++++--- src/search.cpp | 5 ++--- src/thread.h | 2 +- tests/reprosearch.sh | 2 +- 8 files changed, 31 insertions(+), 26 deletions(-) diff --git a/AUTHORS b/AUTHORS index 69d682f1..9042495f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ -# List of authors for Stockfish, as of March 31, 2021 +# List of authors for Stockfish, as of May 17, 2021 # Founders of the Stockfish project and fishtest infrastructure Tord Romstad (romstad) @@ -52,6 +52,7 @@ Dieter Dobbelaere (ddobbelaere) DiscanX Dominik Schlösser (domschl) double-beep +Douglas Matos Gomes (dsmsgms) Eduardo Cáceres (eduherminio) Eelco de Groot (KingDefender) Elvin Liu (solarlight2) diff --git a/src/Makefile b/src/Makefile index 71105bdb..660a13fb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -96,8 +96,7 @@ endif ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ - e2k \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ armv7 armv7-neon armv8 apple-silicon general-64 general-32)) SUPPORTED_ARCH=true else @@ -840,8 +839,7 @@ config-sanity: net @test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(SUPPORTED_ARCH)" = "true" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ - test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ - test "$(arch)" = "e2k" || \ + test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" diff --git a/src/evaluate.cpp b/src/evaluate.cpp index c396e0f7..403d59dd 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -114,24 +114,28 @@ namespace Eval { } } + /// NNUE::export_net() exports the currently loaded network to a file void NNUE::export_net(const std::optional& filename) { std::string actualFilename; - if (filename.has_value()) { - actualFilename = filename.value(); - } else { - if (eval_file_loaded != EvalFileDefaultName) { - sync_cout << "Failed to export a net. A non-embedded net can only be saved if the filename is specified." << sync_endl; - return; - } - actualFilename = EvalFileDefaultName; + + if (filename.has_value()) + actualFilename = filename.value(); + else + { + if (eval_file_loaded != EvalFileDefaultName) + { + sync_cout << "Failed to export a net. A non-embedded net can only be saved if the filename is specified." << sync_endl; + return; + } + actualFilename = EvalFileDefaultName; } ofstream stream(actualFilename, std::ios_base::binary); - if (save_eval(stream)) { + + if (save_eval(stream)) sync_cout << "Network saved successfully to " << actualFilename << "." << sync_endl; - } else { + else sync_cout << "Failed to export a net." << sync_endl; - } } /// NNUE::verify() verifies that the last net used was loaded successfully @@ -927,7 +931,7 @@ namespace { Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; int sf = me->scale_factor(pos, strongSide); - // If scale factor is not already specific, scale down via general heuristics + // If scale factor is not already specific, scale up/down via general heuristics if (sf == SCALE_FACTOR_NORMAL) { if (pos.opposite_bishops()) diff --git a/src/nnue/layers/input_slice.h b/src/nnue/layers/input_slice.h index bd4d7447..b6bf1727 100644 --- a/src/nnue/layers/input_slice.h +++ b/src/nnue/layers/input_slice.h @@ -53,7 +53,7 @@ class InputSlice { return true; } - // Read network parameters + // Write network parameters bool write_parameters(std::ostream& /*stream*/) const { return true; } diff --git a/src/position.cpp b/src/position.cpp index 2b3be3f7..f1c36156 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -988,7 +988,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ } -/// Position::do(undo)_null_move() is used to do(undo) a "null move": it flips +/// Position::do_null_move() is used to do a "null move": it flips /// the side to move without executing any move on the board. void Position::do_null_move(StateInfo& newSt) { @@ -1027,6 +1027,9 @@ void Position::do_null_move(StateInfo& newSt) { assert(pos_is_ok()); } + +/// Position::undo_null_move() must be used to undo a "null move" + void Position::undo_null_move() { assert(!checkers()); @@ -1092,8 +1095,8 @@ bool Position::see_ge(Move m, Value threshold) const { if (!(stmAttackers = attackers & pieces(stm))) break; - // Don't allow pinned pieces to attack (except the king) as long as - // there are pinners on their original square. + // Don't allow pinned pieces to attack as long as there are + // pinners on their original square. if (pinners(~stm) & occupied) stmAttackers &= ~blockers_for_king(stm); diff --git a/src/search.cpp b/src/search.cpp index bf6ba6c4..29b334ed 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -862,7 +862,6 @@ namespace { && ttValue != VALUE_NONE && ttValue < probCutBeta)) { - assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); @@ -1025,7 +1024,7 @@ moves_loop: // When in check, search starts from here } else { - // Countermoves based pruning (~20 Elo) + // Continuation history based pruning (~20 Elo) if ( lmrDepth < 4 && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold) @@ -1528,7 +1527,7 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; - // CounterMove based pruning + // Continuation history based pruning if ( !captureOrPromotion && bestValue > VALUE_TB_LOSS_IN_MAX_PLY && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold diff --git a/src/thread.h b/src/thread.h index 4cf5dabb..5785fd25 100644 --- a/src/thread.h +++ b/src/thread.h @@ -55,7 +55,7 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int id() const { return idx; } + size_t id() const { return idx; } Pawns::Table pawnsTable; Material::Table materialTable; diff --git a/tests/reprosearch.sh b/tests/reprosearch.sh index 9fd847ff..c1167f7f 100755 --- a/tests/reprosearch.sh +++ b/tests/reprosearch.sh @@ -10,7 +10,7 @@ trap 'error ${LINENO}' ERR echo "reprosearch testing started" -# repeat two short games, separated by ucinewgame. +# repeat two short games, separated by ucinewgame. # with go nodes $nodes they should result in exactly # the same node count for each iteration. cat << EOF > repeat.exp From e8d64af1230fdac65bb0da246df3e7abe82e0838 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Tue, 18 May 2021 17:36:26 +0200 Subject: [PATCH 06/17] New NNUE architecture and net Introduces a new NNUE network architecture and associated network parameters, as obtained by a new pytorch trainer. The network is already very strong at short TC, without regression at longer TC, and has potential for further improvements. https://tests.stockfishchess.org/tests/view/60a159c65085663412d0921d TC: 10s+0.1s, 1 thread ELO: 21.74 +-3.4 (95%) LOS: 100.0% Total: 10000 W: 1559 L: 934 D: 7507 Ptnml(0-2): 38, 701, 2972, 1176, 113 https://tests.stockfishchess.org/tests/view/60a187005085663412d0925b TC: 60s+0.6s, 1 thread ELO: 5.85 +-1.7 (95%) LOS: 100.0% Total: 20000 W: 1381 L: 1044 D: 17575 Ptnml(0-2): 27, 885, 7864, 1172, 52 https://tests.stockfishchess.org/tests/view/60a2beede229097940a03806 TC: 20s+0.2s, 8 threads LLR: 2.93 (-2.94,2.94) <0.50,3.50> Total: 34272 W: 1610 L: 1452 D: 31210 Ptnml(0-2): 30, 1285, 14350, 1439, 32 https://tests.stockfishchess.org/tests/view/60a2d687e229097940a03c72 TC: 60s+0.6s, 8 threads LLR: 2.94 (-2.94,2.94) <-2.50,0.50> Total: 45544 W: 1262 L: 1214 D: 43068 Ptnml(0-2): 12, 1129, 20442, 1177, 12 The network has been trained (by vondele) using the https://github.com/glinscott/nnue-pytorch/ trainer (started by glinscott), specifically the branch https://github.com/Sopel97/nnue-pytorch/tree/experiment_56. The data used are in 64 billion positions (193GB total) generated and scored with the current master net d8: https://drive.google.com/file/d/1hOOYSDKgOOp38ZmD0N4DV82TOLHzjUiF/view?usp=sharing d9: https://drive.google.com/file/d/1VlhnHL8f-20AXhGkILujnNXHwy9T-MQw/view?usp=sharing d10: https://drive.google.com/file/d/1ZC5upzBYMmMj1gMYCkt6rCxQG0GnO3Kk/view?usp=sharing fishtest_d9: https://drive.google.com/file/d/1GQHt0oNgKaHazwJFTRbXhlCN3FbUedFq/view?usp=sharing This network also contains a few architectural changes with respect to the current master: Size changed from 256x2-32-32-1 to 512x2-16-32-1 ~15-20% slower ~2x larger adds a special path for 16 valued ClippedReLU fixes affine transform code for 16 inputs/outputs, buy using InputDimensions instead of PaddedInputDimensions this is safe now because the inputs are processed in groups of 4 in the current affine transform code The feature set changed from HalfKP to HalfKAv2 Includes information about the kings like HalfKA Packs king features better, resulting in 8% size reduction compared to HalfKA The board is flipped for the black's perspective, instead of rotated like in the current master PSQT values for each feature the feature transformer now outputs a part that is fowarded directly to the output and allows learning piece values more directly than the previous network architecture. The effect is visible for high imbalance positions, where the current master network outputs evaluations skewed towards zero. 8 PSQT values per feature, chosen based on (popcount(pos.pieces()) - 1) / 4 initialized to classical material values on the start of the training 8 subnetworks (512x2->16->32->1), chosen based on (popcount(pos.pieces()) - 1) / 4 only one subnetwork is evaluated for any position, no or marginal speed loss A diagram of the network is available: https://user-images.githubusercontent.com/8037982/118656988-553a1700-b7eb-11eb-82ef-56a11cbebbf2.png A more complete description: https://github.com/glinscott/nnue-pytorch/blob/master/docs/nnue.md closes https://github.com/official-stockfish/Stockfish/pull/3474 Bench: 3806488 --- src/Makefile | 2 +- src/evaluate.cpp | 13 +- src/evaluate.h | 2 +- src/nnue/evaluate_nnue.cpp | 24 ++-- .../features/{half_kp.cpp => half_ka_v2.cpp} | 25 ++-- src/nnue/features/{half_kp.h => half_ka_v2.h} | 51 +++---- src/nnue/layers/affine_transform.h | 116 ++++----------- src/nnue/layers/clipped_relu.h | 50 +++++-- src/nnue/nnue_accumulator.h | 4 +- src/nnue/nnue_architecture.h | 10 +- src/nnue/nnue_common.h | 2 +- src/nnue/nnue_feature_transformer.h | 133 +++++++++++++++++- src/search.cpp | 6 +- 13 files changed, 265 insertions(+), 173 deletions(-) rename src/nnue/features/{half_kp.cpp => half_ka_v2.cpp} (77%) rename src/nnue/features/{half_kp.h => half_ka_v2.h} (73%) diff --git a/src/Makefile b/src/Makefile index 660a13fb..066e7697 100644 --- a/src/Makefile +++ b/src/Makefile @@ -41,7 +41,7 @@ endif SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp OBJS = $(notdir $(SRCS:.cpp=.o)) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 403d59dd..256bd994 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -120,7 +120,7 @@ namespace Eval { if (filename.has_value()) actualFilename = filename.value(); - else + else { if (eval_file_loaded != EvalFileDefaultName) { @@ -1116,10 +1116,8 @@ Value Eval::evaluate(const Position& pos) { // Scale and shift NNUE for compatibility with search and classical evaluation auto adjusted_NNUE = [&]() { - int material = pos.non_pawn_material() + 4 * PawnValueMg * pos.count(); - int scale = 580 - + material / 32 - - 4 * pos.rule50_count(); + + int scale = 903 + 28 * pos.count() + 28 * pos.non_pawn_material() / 1024; Value nnue = NNUE::evaluate(pos) * scale / 1024 + Time.tempoNNUE; @@ -1134,7 +1132,7 @@ Value Eval::evaluate(const Position& pos) { Value psq = Value(abs(eg_value(pos.psq_score()))); int r50 = 16 + pos.rule50_count(); bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; - bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); + bool classical = largePsq; // Use classical evaluation for really low piece endgames. // One critical case is the draw for bishop + A/H file pawn vs naked king. @@ -1151,8 +1149,7 @@ Value Eval::evaluate(const Position& pos) { && !lowPieceEndgame && ( abs(v) * 16 < NNUEThreshold2 * r50 || ( pos.opposite_bishops() - && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50 - && !(pos.this_thread()->nodes & 0xB)))) + && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50))) v = adjusted_NNUE(); } diff --git a/src/evaluate.h b/src/evaluate.h index 128a7cae..ee4c175b 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -39,7 +39,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-62ef826d1a6d.nnue" + #define EvalFileDefaultName "nn-8a08400ed089.nnue" namespace NNUE { diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index e0d4b911..97cef814 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -35,7 +35,7 @@ namespace Stockfish::Eval::NNUE { LargePagePtr featureTransformer; // Evaluation function - AlignedPtr network; + AlignedPtr network[LayerStacks]; // Evaluation function file name std::string fileName; @@ -83,7 +83,8 @@ namespace Stockfish::Eval::NNUE { void initialize() { Detail::initialize(featureTransformer); - Detail::initialize(network); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(network[i]); } // Read network header @@ -92,7 +93,7 @@ namespace Stockfish::Eval::NNUE { std::uint32_t version, size; version = read_little_endian(stream); - *hashValue = read_little_endian(stream); + *hashValue = read_little_endian(stream); size = read_little_endian(stream); if (!stream || version != Version) return false; desc->resize(size); @@ -117,7 +118,8 @@ namespace Stockfish::Eval::NNUE { if (!read_header(stream, &hashValue, &netDescription)) return false; if (hashValue != HashValue) return false; if (!Detail::read_parameters(stream, *featureTransformer)) return false; - if (!Detail::read_parameters(stream, *network)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::read_parameters(stream, *(network[i]))) return false; return stream && stream.peek() == std::ios::traits_type::eof(); } @@ -126,7 +128,8 @@ namespace Stockfish::Eval::NNUE { if (!write_header(stream, HashValue, netDescription)) return false; if (!Detail::write_parameters(stream, *featureTransformer)) return false; - if (!Detail::write_parameters(stream, *network)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::write_parameters(stream, *(network[i]))) return false; return (bool)stream; } @@ -154,10 +157,15 @@ namespace Stockfish::Eval::NNUE { ASSERT_ALIGNED(transformedFeatures, alignment); ASSERT_ALIGNED(buffer, alignment); - featureTransformer->transform(pos, transformedFeatures); - const auto output = network->propagate(transformedFeatures, buffer); + const std::size_t bucket = (pos.count() - 1) / 4; - return static_cast(output[0] / OutputScale); + const auto [psqt, lazy] = featureTransformer->transform(pos, transformedFeatures, bucket); + if (lazy) { + return static_cast(psqt / OutputScale); + } else { + const auto output = network[bucket]->propagate(transformedFeatures, buffer); + return static_cast((output[0] + psqt) / OutputScale); + } } // Load eval, from a file stream or a memory stream diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_ka_v2.cpp similarity index 77% rename from src/nnue/features/half_kp.cpp rename to src/nnue/features/half_ka_v2.cpp index aa1decee..57f43e50 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_ka_v2.cpp @@ -16,32 +16,32 @@ along with this program. If not, see . */ -//Definition of input features HalfKP of NNUE evaluation function +//Definition of input features HalfKAv2 of NNUE evaluation function -#include "half_kp.h" +#include "half_ka_v2.h" #include "../../position.h" namespace Stockfish::Eval::NNUE::Features { // Orient a square according to perspective (rotates by 180 for black) - inline Square HalfKP::orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); + inline Square HalfKAv2::orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 56)); } // Index of a feature for a given king position and another piece on some square - inline IndexType HalfKP::make_index(Color perspective, Square s, Piece pc, Square ksq) { + inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) { return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq); } // Get a list of indices for active features - void HalfKP::append_active_indices( + void HalfKAv2::append_active_indices( const Position& pos, Color perspective, ValueListInserter active ) { Square ksq = orient(perspective, pos.square(perspective)); - Bitboard bb = pos.pieces() & ~pos.pieces(KING); + Bitboard bb = pos.pieces(); while (bb) { Square s = pop_lsb(bb); @@ -52,7 +52,7 @@ namespace Stockfish::Eval::NNUE::Features { // append_changed_indices() : get a list of indices for recently changed features - void HalfKP::append_changed_indices( + void HalfKAv2::append_changed_indices( Square ksq, StateInfo* st, Color perspective, @@ -63,7 +63,6 @@ namespace Stockfish::Eval::NNUE::Features { Square oriented_ksq = orient(perspective, ksq); for (int i = 0; i < dp.dirty_num; ++i) { Piece pc = dp.piece[i]; - if (type_of(pc) == KING) continue; if (dp.from[i] != SQ_NONE) removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq)); if (dp.to[i] != SQ_NONE) @@ -71,15 +70,15 @@ namespace Stockfish::Eval::NNUE::Features { } } - int HalfKP::update_cost(StateInfo* st) { + int HalfKAv2::update_cost(StateInfo* st) { return st->dirtyPiece.dirty_num; } - int HalfKP::refresh_cost(const Position& pos) { - return pos.count() - 2; + int HalfKAv2::refresh_cost(const Position& pos) { + return pos.count(); } - bool HalfKP::requires_refresh(StateInfo* st, Color perspective) { + bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); } diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_ka_v2.h similarity index 73% rename from src/nnue/features/half_kp.h rename to src/nnue/features/half_ka_v2.h index a09c221b..e4b2edd9 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_ka_v2.h @@ -18,8 +18,8 @@ //Definition of input features HalfKP of NNUE evaluation function -#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED -#define NNUE_FEATURES_HALF_KP_H_INCLUDED +#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED #include "../nnue_common.h" @@ -32,33 +32,34 @@ namespace Stockfish { namespace Stockfish::Eval::NNUE::Features { - // Feature HalfKP: Combination of the position of own king - // and the position of pieces other than kings - class HalfKP { + // Feature HalfKAv2: Combination of the position of own king + // and the position of pieces + class HalfKAv2 { // unique number for each piece type on each square enum { PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_NB = 10 * SQUARE_NB + 1 + PS_W_PAWN = 0, + PS_B_PAWN = 1 * SQUARE_NB, + PS_W_KNIGHT = 2 * SQUARE_NB, + PS_B_KNIGHT = 3 * SQUARE_NB, + PS_W_BISHOP = 4 * SQUARE_NB, + PS_B_BISHOP = 5 * SQUARE_NB, + PS_W_ROOK = 6 * SQUARE_NB, + PS_B_ROOK = 7 * SQUARE_NB, + PS_W_QUEEN = 8 * SQUARE_NB, + PS_B_QUEEN = 9 * SQUARE_NB, + PS_KING = 10 * SQUARE_NB, + PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { // convention: W - us, B - them // viewed from other side, W and B are reversed - { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE }, - { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE } + { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE }, + { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE } }; // Orient a square according to perspective (rotates by 180 for black) @@ -69,17 +70,17 @@ namespace Stockfish::Eval::NNUE::Features { public: // Feature name - static constexpr const char* Name = "HalfKP(Friend)"; + static constexpr const char* Name = "HalfKAv2(Friend)"; // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = 0x5D69D5B8u; + static constexpr std::uint32_t HashValue = 0x5f234cb8u; // Number of feature dimensions static constexpr IndexType Dimensions = static_cast(SQUARE_NB) * static_cast(PS_NB); - // Maximum number of simultaneously active features. 30 because kins are not included. - static constexpr IndexType MaxActiveDimensions = 30; + // Maximum number of simultaneously active features. + static constexpr IndexType MaxActiveDimensions = 32; // Get a list of indices for active features static void append_active_indices( @@ -107,4 +108,4 @@ namespace Stockfish::Eval::NNUE::Features { } // namespace Stockfish::Eval::NNUE::Features -#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index fc192691..9a3b778e 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -69,62 +69,15 @@ namespace Stockfish::Eval::NNUE::Layers { if (!previousLayer.read_parameters(stream)) return false; for (std::size_t i = 0; i < OutputDimensions; ++i) biases[i] = read_little_endian(stream); -#if !defined (USE_SSSE3) for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) +#if !defined (USE_SSSE3) weights[i] = read_little_endian(stream); #else - std::unique_ptr indexMap = std::make_unique(OutputDimensions * PaddedInputDimensions); - for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) { - const uint32_t scrambledIdx = + weights[ (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + i / PaddedInputDimensions * 4 + - i % 4; - weights[scrambledIdx] = read_little_endian(stream); - indexMap[scrambledIdx] = i; - } - - // Determine if eights of weight and input products can be summed using 16bits - // without saturation. We assume worst case combinations of 0 and 127 for all inputs. - if (OutputDimensions > 1 && !stream.fail()) - { - canSaturate16.count = 0; -#if !defined(USE_VNNI) - for (IndexType i = 0; i < PaddedInputDimensions; i += 16) - for (IndexType j = 0; j < OutputDimensions; ++j) - for (int x = 0; x < 2; ++x) - { - WeightType* w = &weights[i * OutputDimensions + j * 4 + x * 2]; - int sum[2] = {0, 0}; - for (int k = 0; k < 8; ++k) - { - IndexType idx = k / 2 * OutputDimensions * 4 + k % 2; - sum[w[idx] < 0] += w[idx]; - } - for (int sign : { -1, 1 }) - while (sign * sum[sign == -1] > 258) - { - int maxK = 0, maxW = 0; - for (int k = 0; k < 8; ++k) - { - IndexType idx = k / 2 * OutputDimensions * 4 + k % 2; - if (maxW < sign * w[idx]) - maxK = k, maxW = sign * w[idx]; - } - - IndexType idx = maxK / 2 * OutputDimensions * 4 + maxK % 2; - sum[sign == -1] -= w[idx]; - const uint32_t scrambledIdx = idx + i * OutputDimensions + j * 4 + x * 2; - canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx], indexMap[scrambledIdx]); - w[idx] = 0; - } - } - - // Non functional optimization for faster more linear access - std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count, - [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2) - { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; }); -#endif - } + i % 4 + ] = read_little_endian(stream); #endif return !stream.fail(); @@ -148,8 +101,6 @@ namespace Stockfish::Eval::NNUE::Layers { i % 4 ]; } - for (int i = 0; i < canSaturate16.count; ++i) - unscrambledWeights[canSaturate16.ids[i].wIdx] = canSaturate16.ids[i].w; for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, unscrambledWeights[i]); @@ -194,11 +145,11 @@ namespace Stockfish::Eval::NNUE::Layers { __m512i product1 = _mm512_maddubs_epi16(a1, b1); __m512i product2 = _mm512_maddubs_epi16(a2, b2); __m512i product3 = _mm512_maddubs_epi16(a3, b3); - product0 = _mm512_add_epi16(product0, product1); - product2 = _mm512_add_epi16(product2, product3); - product0 = _mm512_add_epi16(product0, product2); + product0 = _mm512_adds_epi16(product0, product1); product0 = _mm512_madd_epi16(product0, Ones512); - acc = _mm512_add_epi32(acc, product0); + product2 = _mm512_adds_epi16(product2, product3); + product2 = _mm512_madd_epi16(product2, Ones512); + acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2)); #endif }; @@ -236,11 +187,11 @@ namespace Stockfish::Eval::NNUE::Layers { __m256i product1 = _mm256_maddubs_epi16(a1, b1); __m256i product2 = _mm256_maddubs_epi16(a2, b2); __m256i product3 = _mm256_maddubs_epi16(a3, b3); - product0 = _mm256_add_epi16(product0, product1); - product2 = _mm256_add_epi16(product2, product3); - product0 = _mm256_add_epi16(product0, product2); + product0 = _mm256_adds_epi16(product0, product1); product0 = _mm256_madd_epi16(product0, Ones256); - acc = _mm256_add_epi32(acc, product0); + product2 = _mm256_adds_epi16(product2, product3); + product2 = _mm256_madd_epi16(product2, Ones256); + acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2)); #endif }; @@ -267,11 +218,11 @@ namespace Stockfish::Eval::NNUE::Layers { __m128i product1 = _mm_maddubs_epi16(a1, b1); __m128i product2 = _mm_maddubs_epi16(a2, b2); __m128i product3 = _mm_maddubs_epi16(a3, b3); - product0 = _mm_add_epi16(product0, product1); - product2 = _mm_add_epi16(product2, product3); - product0 = _mm_add_epi16(product0, product2); + product0 = _mm_adds_epi16(product0, product1); product0 = _mm_madd_epi16(product0, Ones128); - acc = _mm_add_epi32(acc, product0); + product2 = _mm_adds_epi16(product2, product3); + product2 = _mm_madd_epi16(product2, Ones128); + acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2)); }; #endif @@ -300,6 +251,8 @@ namespace Stockfish::Eval::NNUE::Layers { #endif #if defined (USE_SSSE3) + // Different layout, we process 4 inputs at a time, always. + static_assert(InputDimensions % 4 == 0); const auto output = reinterpret_cast(buffer); const auto inputVector = reinterpret_cast(input); @@ -310,7 +263,7 @@ namespace Stockfish::Eval::NNUE::Layers { // because then it is also an input dimension. if constexpr (OutputDimensions % OutputSimdWidth == 0) { - constexpr IndexType NumChunks = PaddedInputDimensions / 4; + constexpr IndexType NumChunks = InputDimensions / 4; const auto input32 = reinterpret_cast(input); vec_t* outptr = reinterpret_cast(output); @@ -329,8 +282,6 @@ namespace Stockfish::Eval::NNUE::Layers { for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j) vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]); } - for (int i = 0; i < canSaturate16.count; ++i) - output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w; } else if constexpr (OutputDimensions == 1) { @@ -377,17 +328,21 @@ namespace Stockfish::Eval::NNUE::Layers { auto output = reinterpret_cast(buffer); #if defined(USE_SSE2) - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + // At least a multiple of 16, with SSE2. + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; const __m128i Zeros = _mm_setzero_si128(); const auto inputVector = reinterpret_cast(input); #elif defined(USE_MMX) - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; const __m64 Zeros = _mm_setzero_si64(); const auto inputVector = reinterpret_cast(input); #elif defined(USE_NEON) - constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; const auto inputVector = reinterpret_cast(input); #endif @@ -473,25 +428,6 @@ namespace Stockfish::Eval::NNUE::Layers { alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; -#if defined (USE_SSSE3) - struct CanSaturate { - int count; - struct Entry { - uint32_t wIdx; - uint16_t out; - uint16_t in; - int8_t w; - } ids[PaddedInputDimensions * OutputDimensions * 3 / 4]; - - void add(int i, int j, int8_t w, uint32_t wIdx) { - ids[count].wIdx = wIdx; - ids[count].out = i; - ids[count].in = j; - ids[count].w = w; - ++count; - } - } canSaturate16; -#endif }; } // namespace Stockfish::Eval::NNUE::Layers diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index f1ac2dfe..65455df4 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -72,22 +72,42 @@ namespace Stockfish::Eval::NNUE::Layers { const auto output = reinterpret_cast(buffer); #if defined(USE_AVX2) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m256i Zero = _mm256_setzero_si256(); - const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m256i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); - const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); - _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( - _mm256_packs_epi16(words0, words1), Zero), Offsets)); + if constexpr (InputDimensions % SimdWidth == 0) { + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m256i Zero = _mm256_setzero_si256(); + const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); + const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); + _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_packs_epi16(words0, words1), Zero), Offsets)); + } + } else { + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const __m128i Zero = _mm_setzero_si128(); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); + const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + } } - constexpr IndexType Start = NumChunks * SimdWidth; + constexpr IndexType Start = + InputDimensions % SimdWidth == 0 + ? InputDimensions / SimdWidth * SimdWidth + : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); #elif defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / SimdWidth; diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 72a151f8..e24902c4 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -30,8 +30,8 @@ namespace Stockfish::Eval::NNUE { // Class that holds the result of affine transformation of input features struct alignas(CacheLineSize) Accumulator { - std::int16_t - accumulation[2][TransformedFeatureDimensions]; + std::int16_t accumulation[2][TransformedFeatureDimensions]; + std::int32_t psqtAccumulation[2][PSQTBuckets]; AccumulatorState state[2]; }; diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 55a01fbe..879a39cd 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -23,7 +23,7 @@ #include "nnue_common.h" -#include "features/half_kp.h" +#include "features/half_ka_v2.h" #include "layers/input_slice.h" #include "layers/affine_transform.h" @@ -32,16 +32,18 @@ namespace Stockfish::Eval::NNUE { // Input features used in evaluation function - using FeatureSet = Features::HalfKP; + using FeatureSet = Features::HalfKAv2; // Number of input feature dimensions after conversion - constexpr IndexType TransformedFeatureDimensions = 256; + constexpr IndexType TransformedFeatureDimensions = 512; + constexpr IndexType PSQTBuckets = 8; + constexpr IndexType LayerStacks = 8; namespace Layers { // Define network structure using InputLayer = InputSlice; - using HiddenLayer1 = ClippedReLU>; + using HiddenLayer1 = ClippedReLU>; using HiddenLayer2 = ClippedReLU>; using OutputLayer = AffineTransform; diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index d41e0237..dc700061 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -46,7 +46,7 @@ namespace Stockfish::Eval::NNUE { // Version of the evaluation file - constexpr std::uint32_t Version = 0x7AF32F16u; + constexpr std::uint32_t Version = 0x7AF32F20u; // Constant used in evaluation value calculation constexpr int OutputScale = 16; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index a4a8e98f..2c0a0c6d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -35,45 +35,82 @@ namespace Stockfish::Eval::NNUE { // vector registers. #define VECTOR + static_assert(PSQTBuckets == 8, "Assumed by the current choice of constants."); + #ifdef USE_AVX512 typedef __m512i vec_t; + typedef __m256i psqt_vec_t; #define vec_load(a) _mm512_load_si512(a) #define vec_store(a,b) _mm512_store_si512(a,b) #define vec_add_16(a,b) _mm512_add_epi16(a,b) #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() static constexpr IndexType NumRegs = 8; // only 8 are needed + static constexpr IndexType NumPsqtRegs = 1; #elif USE_AVX2 typedef __m256i vec_t; + typedef __m256i psqt_vec_t; #define vec_load(a) _mm256_load_si256(a) #define vec_store(a,b) _mm256_store_si256(a,b) #define vec_add_16(a,b) _mm256_add_epi16(a,b) #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() static constexpr IndexType NumRegs = 16; + static constexpr IndexType NumPsqtRegs = 1; #elif USE_SSE2 typedef __m128i vec_t; + typedef __m128i psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_epi16(a,b) #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) + #define vec_zero_psqt() _mm_setzero_si128() static constexpr IndexType NumRegs = Is64Bit ? 16 : 8; + static constexpr IndexType NumPsqtRegs = 2; #elif USE_MMX typedef __m64 vec_t; + typedef std::int32_t psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) a+b + #define vec_sub_psqt_32(a,b) a-b + #define vec_zero_psqt() 0 static constexpr IndexType NumRegs = 8; + static constexpr IndexType NumPsqtRegs = 8; #elif USE_NEON typedef int16x8_t vec_t; + typedef int32x4_t psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) vaddq_s16(a,b) #define vec_sub_16(a,b) vsubq_s16(a,b) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) vaddq_s32(a,b) + #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) + #define vec_zero_psqt() psqt_vec_t{0} static constexpr IndexType NumRegs = 16; + static constexpr IndexType NumPsqtRegs = 2; #else #undef VECTOR @@ -87,9 +124,13 @@ namespace Stockfish::Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; + static constexpr int LazyThreshold = 1400; + #ifdef VECTOR static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; + static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); + static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); #endif public: @@ -115,6 +156,8 @@ namespace Stockfish::Eval::NNUE { biases[i] = read_little_endian(stream); for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i) weights[i] = read_little_endian(stream); + for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i) + psqtWeights[i] = read_little_endian(stream); return !stream.fail(); } @@ -128,11 +171,21 @@ namespace Stockfish::Eval::NNUE { } // Convert input features - void transform(const Position& pos, OutputType* output) const { + std::pair transform(const Position& pos, OutputType* output, int bucket) const { update_accumulator(pos, WHITE); update_accumulator(pos, BLACK); + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; const auto& accumulation = pos.state()->accumulator.accumulation; + const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + + const auto psqt = ( + psqtAccumulation[static_cast(perspectives[0])][bucket] + - psqtAccumulation[static_cast(perspectives[1])][bucket] + ) / 2; + + if (abs(psqt) > LazyThreshold * OutputScale) + return { psqt, true }; #if defined(USE_AVX512) constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2); @@ -163,7 +216,6 @@ namespace Stockfish::Eval::NNUE { const int8x8_t Zero = {0}; #endif - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; for (IndexType p = 0; p < 2; ++p) { const IndexType offset = HalfDimensions * p; @@ -240,6 +292,8 @@ namespace Stockfish::Eval::NNUE { #if defined(USE_MMX) _mm_empty(); #endif + + return { psqt, false }; } private: @@ -255,6 +309,7 @@ namespace Stockfish::Eval::NNUE { // Gcc-10.2 unnecessarily spills AVX2 registers if this array // is defined in the VECTOR code below, once in each branch vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; #endif // Look for a usable accumulator of an earlier position. We keep track @@ -333,12 +388,52 @@ namespace Stockfish::Eval::NNUE { } } + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + // Load accumulator + auto accTilePsqt = reinterpret_cast( + &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqt[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + // Store accumulator + accTilePsqt = reinterpret_cast( + &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + } + #else for (IndexType i = 0; states_to_update[i]; ++i) { std::memcpy(states_to_update[i]->accumulator.accumulation[perspective], st->accumulator.accumulation[perspective], HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k]; + st = states_to_update[i]; // Difference calculation for the deactivated features @@ -348,6 +443,9 @@ namespace Stockfish::Eval::NNUE { for (IndexType j = 0; j < HalfDimensions; ++j) st->accumulator.accumulation[perspective][j] -= weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k]; } // Difference calculation for the activated features @@ -357,6 +455,9 @@ namespace Stockfish::Eval::NNUE { for (IndexType j = 0; j < HalfDimensions; ++j) st->accumulator.accumulation[perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; } } #endif @@ -392,16 +493,42 @@ namespace Stockfish::Eval::NNUE { vec_store(&accTile[k], acc[k]); } + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); + + for (const auto index : active) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + #else std::memcpy(accumulator.accumulation[perspective], biases, HalfDimensions * sizeof(BiasType)); + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[perspective][k] = 0; + for (const auto index : active) { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) accumulator.accumulation[perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; } #endif } @@ -413,9 +540,11 @@ namespace Stockfish::Eval::NNUE { using BiasType = std::int16_t; using WeightType = std::int16_t; + using PSQTWeightType = std::int32_t; alignas(CacheLineSize) BiasType biases[HalfDimensions]; alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; + alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; }; } // namespace Stockfish::Eval::NNUE diff --git a/src/search.cpp b/src/search.cpp index 29b334ed..ac026a79 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -66,7 +66,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(234 * (d - improving)); + return Value(231 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -801,7 +801,7 @@ namespace { && (ss-1)->statScore < 24185 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 24 * depth - 34 * improving + 162 * ss->ttPv + 159 + && ss->staticEval >= beta - 22 * depth - 34 * improving + 162 * ss->ttPv + 159 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -1172,7 +1172,7 @@ moves_loop: // When in check, search starts from here + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - - 4741; + - 4791; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) if (!ss->inCheck) From d37de3cb1de63da5b2f8e6978c45c5b36973063b Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Tue, 18 May 2021 10:02:20 +0300 Subject: [PATCH 07/17] Do more continuation history based pruning This patch increases lmrDepth threshold for continuation history based pruning in search. This part of code for a long time was known to be really TC sensitive - decreasing this threshold easily passed lower time controls but failed badly at LTC, on the other hand it increase was part of a tuning that resulted in being negative at STC but was +12 elo at 180+1.8. After recent simplification of special conditions that sometimes increase it from 4 to 5 it was logical to overall test at longer time controls if 5 is better than 4 with deeper searches. reduces strenght on STC https://tests.stockfishchess.org/tests/view/60a3a8bbce8ea25a3ef03c74 ELO: -2.57 +-2.0 (95%) LOS: 0.6% Total: 20000 W: 1820 L: 1968 D: 16212 Ptnml(0-2): 68, 1582, 6836, 1458, 56 Passed LTC with STC bounds https://tests.stockfishchess.org/tests/view/60a027395085663412d090ce LLR: 2.93 (-2.94,2.94) <-0.50,2.50> Total: 175256 W: 6774 L: 6548 D: 161934 Ptnml(0-2): 91, 5808, 75604, 6034, 91 Passed VLTC with LTC bounds https://tests.stockfishchess.org/tests/view/60a2bccce229097940a037a7 LLR: 2.96 (-2.94,2.94) <0.50,3.50> Total: 65736 W: 1224 L: 1092 D: 63420 Ptnml(0-2): 5, 1012, 30706, 1136, 9 closes https://github.com/official-stockfish/Stockfish/pull/3473 bench 3689330 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index ac026a79..8f8d42c0 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1025,7 +1025,7 @@ moves_loop: // When in check, search starts from here else { // Continuation history based pruning (~20 Elo) - if ( lmrDepth < 4 + if ( lmrDepth < 5 && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold) continue; From 0faf81d1f6b8ebe1a64482145a634d2e7d15db94 Mon Sep 17 00:00:00 2001 From: Yohaan Seth Nathan <73843275+TheYoBots@users.noreply.github.com> Date: Wed, 19 May 2021 00:22:59 +0530 Subject: [PATCH 08/17] Use Markdown syntax in the readme provide direct links to the mentioned files. closes https://github.com/official-stockfish/Stockfish/pull/3477 No Functional Change --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8d5ce8d0..0f6caec9 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,13 @@ intrinsics available on most CPUs (sse2, avx2, neon, or similar). This distribution of Stockfish consists of the following files: - * Readme.md, the file you are currently reading. + * [Readme.md](https://github.com/official-stockfish/Stockfish/blob/master/README.md), the file you are currently reading. - * Copying.txt, a text file containing the GNU General Public License version 3. + * [Copying.txt](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt), a text file containing the GNU General Public License version 3. - * AUTHORS, a text file with the list of authors for the project + * [AUTHORS](https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS), a text file with the list of authors for the project - * src, a subdirectory containing the full source code, including a Makefile + * [src](https://github.com/official-stockfish/Stockfish/tree/master/src), a subdirectory containing the full source code, including a Makefile that can be used to compile Stockfish on Unix-like systems. * a file with the .nnue extension, storing the neural network for the NNUE @@ -330,4 +330,4 @@ you are distributing. If you make any changes to the source code, these changes must also be made available under the GPL. For full details, read the copy of the GPL v3 found in the file named -*Copying.txt*. +[*Copying.txt*](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt). From 038487f95499665bf86ca5343d7a83f970d4b06e Mon Sep 17 00:00:00 2001 From: Fanael Linithien Date: Tue, 18 May 2021 19:17:59 +0200 Subject: [PATCH 09/17] Use packed 32-bit MMX operations for updating the PSQT accumulator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves the speed of NNUE by a bit on old hardware that code path is intended for, like a Pentium III 1.13 GHz: 10 repeats of "./stockfish bench 16 1 13 default depth NNUE": Before: 54 642 504 897 cycles (± 0.12%) 62 301 937 829 instructions (± 0.03%) After: 54 320 821 928 cycles (± 0.13%) 62 084 742 699 instructions (± 0.02%) Speed of go depth 20 from startpos: Before: 53103 nps After: 53856 nps closes https://github.com/official-stockfish/Stockfish/pull/3476 No functional change. --- src/nnue/nnue_feature_transformer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2c0a0c6d..bfa2e25a 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -84,18 +84,18 @@ namespace Stockfish::Eval::NNUE { #elif USE_MMX typedef __m64 vec_t; - typedef std::int32_t psqt_vec_t; + typedef __m64 psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) #define vec_sub_16(a,b) _mm_sub_pi16(a,b) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) a+b - #define vec_sub_psqt_32(a,b) a-b - #define vec_zero_psqt() 0 + #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) + #define vec_zero_psqt() _mm_setzero_si64() static constexpr IndexType NumRegs = 8; - static constexpr IndexType NumPsqtRegs = 8; + static constexpr IndexType NumPsqtRegs = 4; #elif USE_NEON typedef int16x8_t vec_t; From 6b9a70ace8073f5ff4c50b4dd5ddc041cf9c819f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Prokop=20Rand=C3=A1=C4=8Dek?= Date: Wed, 12 May 2021 20:15:21 +0200 Subject: [PATCH 10/17] Use if instead of goto This PR inverts the if and removes goto in the generate_all function. closes https://github.com/official-stockfish/Stockfish/pull/3461 No functional change --- AUTHORS | 1 + src/movegen.cpp | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/AUTHORS b/AUTHORS index 9042495f..7165363f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -175,6 +175,7 @@ Stefan Geschwentner (locutus2) Stefano Cardanobile (Stefano80) Steinar Gunderson (sesse) Stéphane Nicolet (snicolet) +Prokop Randáček (ProkopRandacek) Thanar2 thaspel theo77186 diff --git a/src/movegen.cpp b/src/movegen.cpp index be168450..bb81aeac 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -192,21 +192,20 @@ namespace { const Square ksq = pos.square(Us); Bitboard target; - if (Type == EVASIONS && more_than_one(pos.checkers())) - goto kingMoves; // Double check, only a king move can save the day + // Skip generating non-king moves when in double check + if (Type != EVASIONS || !more_than_one(pos.checkers())) + { + target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) + : Type == NON_EVASIONS ? ~pos.pieces( Us) + : Type == CAPTURES ? pos.pieces(~Us) + : ~pos.pieces( ); // QUIETS || QUIET_CHECKS - target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) - : Type == NON_EVASIONS ? ~pos.pieces( Us) - : Type == CAPTURES ? pos.pieces(~Us) - : ~pos.pieces( ); // QUIETS || QUIET_CHECKS - - moveList = generate_pawn_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - -kingMoves: + moveList = generate_pawn_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + } if (!Checks || pos.blockers_for_king(~Us) & ksq) { Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); From 2c3f7619f9ef267cbaec5216b71e0e435dc1393b Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Wed, 19 May 2021 20:57:04 +0300 Subject: [PATCH 11/17] Simplify usage of LMR for captures This patch simplifies a lot of "enablers" for LMR when move is a capture or promotion. After it we will have only 2 conditions - if node is a cutNode or if it's an allNode that was not in PV, so all captures or promotions wouldn't go thru LMR at any PVnodes. passed STC https://tests.stockfishchess.org/tests/view/60a40117ce8ea25a3ef03ca7 LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 58976 W: 4875 L: 4807 D: 49294 Ptnml(0-2): 176, 3897, 21270, 3973, 172 passed LTC https://tests.stockfishchess.org/tests/view/60a43ff8ce8ea25a3ef03d18 LLR: 2.93 (-2.94,2.94) <-2.50,0.50> Total: 65272 W: 2203 L: 2165 D: 60904 Ptnml(0-2): 28, 1936, 28668, 1978, 26 closes https://github.com/official-stockfish/Stockfish/pull/3480 bench 4110764 --- src/search.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 8f8d42c0..6e1d2b53 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1121,11 +1121,8 @@ moves_loop: // When in check, search starts from here if ( depth >= 3 && moveCount > 1 + 2 * rootNode && ( !captureOrPromotion - || moveCountPruning - || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || cutNode - || (!PvNode && !formerPv && captureHistory[movedPiece][to_sq(move)][type_of(pos.captured_piece())] < 3678) - || thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024) + || (!PvNode && !formerPv)) && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3)) { Depth r = reduction(improving, depth, moveCount); From 754fc8a8b5ca7466926d54465eeb1df4d4a481ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Wed, 19 May 2021 01:24:51 +0200 Subject: [PATCH 12/17] Remove Tempo The Tempo variable was introduced 10 years ago in our search because the classical evaluation function was antisymmetrical in White and Black by design to gain speed: Eval(White to play) = -Eval(Black to play) Nowadays our neural networks know which side is to play in a position when they evaluate a position and are trained on real games, so the neural network encodes the advantage of moving as an output of search. This patch shows that the Tempo variable is not necessary anymore. STC: LLR: 2.94 (-2.94,2.94) <-2.50,0.50> Total: 33512 W: 2805 L: 2709 D: 27998 Ptnml(0-2): 80, 2209, 12095, 2279, 93 https://tests.stockfishchess.org/tests/view/60a44ceace8ea25a3ef03d30 LTC: LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 53920 W: 1807 L: 1760 D: 50353 Ptnml(0-2): 16, 1617, 23650, 1658, 19 https://tests.stockfishchess.org/tests/view/60a477f0ce8ea25a3ef03d49 We also tried a match (20000 games) at STC using purely classical, result was neutral: https://tests.stockfishchess.org/tests/view/60a4eebcce8ea25a3ef03db5 Note: there are two locations left in search.cpp where we assume antisymmetry of evaluation (in relation with a speed optimization for null moves in lines 770 and 1439), but as the values are just used for heuristic pruning this approximation should not hurt too much because the order of magnitude is still true most of the time. closes https://github.com/official-stockfish/Stockfish/pull/3481 Bench: 4015864 --- src/evaluate.cpp | 4 ++-- src/search.cpp | 6 +++--- src/timeman.cpp | 8 -------- src/timeman.h | 1 - src/types.h | 1 - 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 256bd994..543644ee 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1058,7 +1058,7 @@ make_v: v = (v / 16) * 16; // Side to move point of view - v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; + v = (pos.side_to_move() == WHITE ? v : -v); return v; } @@ -1119,7 +1119,7 @@ Value Eval::evaluate(const Position& pos) { int scale = 903 + 28 * pos.count() + 28 * pos.non_pawn_material() / 1024; - Value nnue = NNUE::evaluate(pos) * scale / 1024 + Time.tempoNNUE; + Value nnue = NNUE::evaluate(pos) * scale / 1024; if (pos.is_chess960()) nnue += fix_FRC(pos); diff --git a/src/search.cpp b/src/search.cpp index 6e1d2b53..359a774f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -767,7 +767,7 @@ namespace { if ((ss-1)->currentMove != MOVE_NULL) ss->staticEval = eval = evaluate(pos); else - ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; + ss->staticEval = eval = -(ss-1)->staticEval; // Save static evaluation into transposition table tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); @@ -776,7 +776,7 @@ namespace { // Use static evaluation difference to improve quiet move ordering if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval - 2 * Tempo), -1000, 1000); + int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } @@ -1436,7 +1436,7 @@ moves_loop: // When in check, search starts from here // and addition of two tempos ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval + 2 * Tempo; + : -(ss-1)->staticEval; // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) diff --git a/src/timeman.cpp b/src/timeman.cpp index 3236b6e9..f742d1e4 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -94,14 +94,6 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { optimumTime = TimePoint(optScale * timeLeft); maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); - if (Stockfish::Search::Limits.use_time_management()) - { - int strength = std::log( std::max(1, int(optimumTime * Threads.size() / 10))) * 60; - tempoNNUE = std::clamp( (strength + 264) / 24, 18, 30); - } - else - tempoNNUE = 28; // default for no time given - if (Options["Ponder"]) optimumTime += optimumTime / 4; } diff --git a/src/timeman.h b/src/timeman.h index 4ac0b4be..b1878d65 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -37,7 +37,6 @@ public: TimePoint(Threads.nodes_searched()) : now() - startTime; } int64_t availableNodes; // When in 'nodes as time' mode - int tempoNNUE; private: TimePoint startTime; diff --git a/src/types.h b/src/types.h index efebce1a..0bd4a1c4 100644 --- a/src/types.h +++ b/src/types.h @@ -191,7 +191,6 @@ enum Value : int { BishopValueMg = 825, BishopValueEg = 915, RookValueMg = 1276, RookValueEg = 1380, QueenValueMg = 2538, QueenValueEg = 2682, - Tempo = 28, MidgameLimit = 15258, EndgameLimit = 3915 }; From f233ca1af4d36ded8ce924131f42bc4d0093ec6e Mon Sep 17 00:00:00 2001 From: Guy Vreuls Date: Fri, 21 May 2021 20:22:29 +0200 Subject: [PATCH 13/17] Compact position structures Reorder the structures data members in position.h to reduce padding. Passed STC: https://tests.stockfishchess.org/tests/view/60a8011fce8ea25a3ef04069 LLR: 2.94 (-2.94,2.94) <-0.50,2.50> Total: 14120 W: 1214 L: 1067 D: 11839 Ptnml(0-2): 26, 857, 5161, 976, 40 --- Also tested for speed locally by Joost: Result of 50 runs ================== base (./stockfish.master ) = 2254919 +/- 4439 test (./stockfish.patch ) = 2274003 +/- 5278 diff = +19084 +/- 6386 ================== speedup = +0.0085 P(speedup > 0) = 1.0000 --- closes https://github.com/official-stockfish/Stockfish/pull/3488 No functional change. --- src/position.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/position.h b/src/position.h index c226373b..e6b072bc 100644 --- a/src/position.h +++ b/src/position.h @@ -51,11 +51,11 @@ struct StateInfo { // Not copied when making a move (will be recomputed anyhow) Key key; Bitboard checkersBB; - Piece capturedPiece; StateInfo* previous; Bitboard blockersForKing[COLOR_NB]; Bitboard pinners[COLOR_NB]; Bitboard checkSquares[PIECE_TYPE_NB]; + Piece capturedPiece; int repetition; // Used by NNUE @@ -192,11 +192,11 @@ private: int castlingRightsMask[SQUARE_NB]; Square castlingRookSquare[CASTLING_RIGHT_NB]; Bitboard castlingPath[CASTLING_RIGHT_NB]; + Thread* thisThread; + StateInfo* st; int gamePly; Color sideToMove; Score psq; - Thread* thisThread; - StateInfo* st; bool chess960; }; From fb2d175f97a12be6464fdcc20293682a192ca156 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 22 May 2021 07:00:53 +0200 Subject: [PATCH 14/17] Update default net to nn-7756374aaed3.nnue trained with pytorch using the master branch and recommended settings, same data set as previously used: python train.py ../../all_d9_fishd9_d8_d10_shuffle.binpack ../../all_d9_fishd9_d8_d10_shuffle.binpack \ --gpus 1 --threads 2 --num-workers 2 --batch-size 16384 --progress_bar_refresh_rate 300 \ --smart-fen-skipping --random-fen-skipping 3 --features=HalfKAv2^ --lambda=1.0 \ --max_epochs=400 --seed $RANDOM --default_root_dir exp/run_8 passed STC: LLR: 2.93 (-2.94,2.94) <-0.50,2.50> Total: 21424 W: 2078 L: 1907 D: 17439 Ptnml(0-2): 80, 1512, 7385, 1627, 108 https://tests.stockfishchess.org/tests/view/60a6c749ce8ea25a3ef03f4d passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,3.50> Total: 67912 W: 2851 L: 2648 D: 62413 Ptnml(0-2): 40, 2348, 28984, 2537, 47 https://tests.stockfishchess.org/tests/view/60a722ecce8ea25a3ef03fb9 closes https://github.com/official-stockfish/Stockfish/pull/3489 Bench: 3779522 --- src/evaluate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.h b/src/evaluate.h index ee4c175b..40622e93 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -39,7 +39,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-8a08400ed089.nnue" + #define EvalFileDefaultName "nn-7756374aaed3.nnue" namespace NNUE { From 49c79aa15ce5a0de54fe0f4cef1037751af3d7d1 Mon Sep 17 00:00:00 2001 From: bmc4 Date: Sat, 22 May 2021 02:41:52 -0300 Subject: [PATCH 15/17] Simplify reduction for consecutive fails Revert the heuristic introduced in #3184, by which we reduced more the late sons of the root position after consecutive fail highs. --- Before new net architecture: STC: LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 226336 W: 20373 L: 20500 D: 185463 Ptnml(0-2): 755, 16087, 79595, 15992, 739 https://tests.stockfishchess.org/tests/view/609dec205085663412d08e9d LTC: LLR: 2.93 (-2.94,2.94) <-2.50,0.50> Total: 67432 W: 2411 L: 2375 D: 62646 Ptnml(0-2): 33, 1944, 29714, 2004, 21 https://tests.stockfishchess.org/tests/view/609ee30f5085663412d08fc3 --- After new net architecture: STC: LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 141752 W: 11591 L: 11617 D: 118544 Ptnml(0-2): 387, 9231, 51674, 9189, 395 https://tests.stockfishchess.org/tests/view/60a4320ace8ea25a3ef03cfd LTC: LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 294072 W: 9825 L: 9950 D: 274297 Ptnml(0-2): 121, 8610, 129681, 8521, 103 https://tests.stockfishchess.org/tests/view/60a51b5ece8ea25a3ef03dcd --- closes https://github.com/official-stockfish/Stockfish/pull/3490 Bench: 3752892 --- src/search.cpp | 6 +----- src/thread.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 359a774f..f8f956fa 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -377,7 +377,7 @@ void Thread::search() { // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we don't fail // high/low anymore. - failedHighCnt = 0; + int failedHighCnt = 0; while (true) { Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - searchAgainCounter); @@ -1157,10 +1157,6 @@ moves_loop: // When in check, search starts from here if (ttCapture) r++; - // Increase reduction at root if failing high - if (rootNode) - r += thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512; - // Increase reduction for cut nodes (~3 Elo) if (cutNode) r += 2; diff --git a/src/thread.h b/src/thread.h index 5785fd25..ae662880 100644 --- a/src/thread.h +++ b/src/thread.h @@ -75,7 +75,6 @@ public: CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; Score contempt; - int failedHighCnt; }; From ff4c22238a199625cf7f02be1816b07fc49f5d45 Mon Sep 17 00:00:00 2001 From: bmc4 Date: Sat, 22 May 2021 02:47:23 -0300 Subject: [PATCH 16/17] Tuning Search This patch tunes constant in search.cpp STC: LLR: 2.94 (-2.94,2.94) <-0.50,2.50> Total: 30648 W: 2580 L: 2410 D: 25658 Ptnml(0-2): 80, 1969, 11093, 2065, 117 https://tests.stockfishchess.org/tests/view/60a71d3cce8ea25a3ef03fae LTC: LLR: 2.95 (-2.94,2.94) <0.50,3.50> Total: 52896 W: 1776 L: 1617 D: 49503 Ptnml(0-2): 13, 1462, 23347, 1605, 21 https://tests.stockfishchess.org/tests/view/60a794ddce8ea25a3ef0400a closes https://github.com/official-stockfish/Stockfish/pull/3491 Bench: 4004731 --- src/search.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index f8f956fa..143d0883 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -66,7 +66,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(231 * (d - improving)); + return Value(214 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -74,7 +74,7 @@ namespace { Depth reduction(bool i, Depth d, int mn) { int r = Reductions[d] * Reductions[mn]; - return (r + 503) / 1024 + (!i && r > 915); + return (r + 534) / 1024 + (!i && r > 904); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -83,7 +83,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 14 ? 66 : 6 * d * d + 231 * d - 206; + return d > 14 ? 73 : 6 * d * d + 229 * d - 215; } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -798,10 +798,10 @@ namespace { // Step 8. Null move search with verification search (~40 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 24185 + && (ss-1)->statScore < 23767 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 22 * depth - 34 * improving + 162 * ss->ttPv + 159 + && ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -809,7 +809,7 @@ namespace { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and value - Depth R = (1062 + 68 * depth) / 256 + std::min(int(eval - beta) / 190, 3); + Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -922,7 +922,7 @@ moves_loop: // When in check, search starts from here ttCapture = ttMove && pos.capture_or_promotion(ttMove); // Step 11. A small Probcut idea, when we are in check - probCutBeta = beta + 400; + probCutBeta = beta + 409; if ( ss->inCheck && !PvNode && depth >= 4 @@ -1073,7 +1073,7 @@ moves_loop: // When in check, search starts from here { extension = 1; singularQuietLMR = !ttCapture; - if (!PvNode && value < singularBeta - 140) + if (!PvNode && value < singularBeta - 93) extension = 2; } @@ -1165,11 +1165,11 @@ moves_loop: // When in check, search starts from here + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - - 4791; + - 4923; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) if (!ss->inCheck) - r -= ss->statScore / 14790; + r -= ss->statScore / 14721; } // In general we want to cap the LMR depth search at newDepth. But if From a2f01c07eb91524fc372bd82d6513ab058d3e043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sat, 22 May 2021 19:44:15 +0200 Subject: [PATCH 17/17] Sometimes change the (materialist, positional) balance Our new nets output two values for the side to move in the last layer. We can interpret the first value as a material evaluation of the position, and the second one as the dynamic, positional value of the location of pieces. This patch changes the balance for the (materialist, positional) parts of the score from (128, 128) to (121, 135) when the piece material is equal between the two players, but keeps the standard (128, 128) balance when one player is at least an exchange up. Passed STC: LLR: 2.93 (-2.94,2.94) <-0.50,2.50> Total: 15936 W: 1421 L: 1266 D: 13249 Ptnml(0-2): 37, 1037, 5694, 1134, 66 https://tests.stockfishchess.org/tests/view/60a82df9ce8ea25a3ef0408f Passed LTC: LLR: 2.94 (-2.94,2.94) <0.50,3.50> Total: 13904 W: 516 L: 410 D: 12978 Ptnml(0-2): 4, 374, 6088, 484, 2 https://tests.stockfishchess.org/tests/view/60a8bbf9ce8ea25a3ef04101 closes https://github.com/official-stockfish/Stockfish/pull/3492 Bench: 3856635 --- src/evaluate.cpp | 2 +- src/evaluate.h | 2 +- src/nnue/evaluate_nnue.cpp | 23 ++++++++++++++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 543644ee..c8094ca8 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1119,7 +1119,7 @@ Value Eval::evaluate(const Position& pos) { int scale = 903 + 28 * pos.count() + 28 * pos.non_pawn_material() / 1024; - Value nnue = NNUE::evaluate(pos) * scale / 1024; + Value nnue = NNUE::evaluate(pos, true) * scale / 1024; if (pos.is_chess960()) nnue += fix_FRC(pos); diff --git a/src/evaluate.h b/src/evaluate.h index 40622e93..41aace67 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -43,7 +43,7 @@ namespace Eval { namespace NNUE { - Value evaluate(const Position& pos); + Value evaluate(const Position& pos, bool adjusted = false); bool load_eval(std::string name, std::istream& stream); bool save_eval(std::ostream& stream); void init(); diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 97cef814..cee77fe9 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -134,7 +134,7 @@ namespace Stockfish::Eval::NNUE { } // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { + Value evaluate(const Position& pos, bool adjusted) { // We manually align the arrays on the stack because with gcc < 9.3 // overaligning stack variables with alignas() doesn't work correctly. @@ -158,13 +158,26 @@ namespace Stockfish::Eval::NNUE { ASSERT_ALIGNED(buffer, alignment); const std::size_t bucket = (pos.count() - 1) / 4; - const auto [psqt, lazy] = featureTransformer->transform(pos, transformedFeatures, bucket); - if (lazy) { + + if (lazy) return static_cast(psqt / OutputScale); - } else { + else + { const auto output = network[bucket]->propagate(transformedFeatures, buffer); - return static_cast((output[0] + psqt) / OutputScale); + + int materialist = psqt; + int positional = output[0]; + + int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK)); + int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0); + + int A = 128 - entertainment; + int B = 128 + entertainment; + + int sum = (A * materialist + B * positional) / 128; + + return static_cast( sum / OutputScale ); } }