Merge branch 'master' into tools

2025-12-24 19:16:49 +08:00 · 2021-05-24 11:32:58 +02:00
parent cbd72299c1 a2f01c07eb
commit 127c1f2fe2
26 changed files with 432 additions and 426 deletions
--- a/4
+++ b/4
@@ -1,4 +1,4 @@
-# List of authors for Stockfish, as of March 31, 2021
+# List of authors for Stockfish, as of May 17, 2021

 # Founders of the Stockfish project and fishtest infrastructure
 Tord Romstad (romstad)
@@ -52,6 +52,7 @@ Dieter Dobbelaere (ddobbelaere)
 DiscanX
 Dominik Schlösser (domschl)
 double-beep
+Douglas Matos Gomes (dsmsgms)
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
 Elvin Liu (solarlight2)
@@ -174,6 +175,7 @@ Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
 Steinar Gunderson (sesse)
 Stéphane Nicolet (snicolet)
+Prokop Randáček (ProkopRandacek)
 Thanar2
 thaspel
 theo77186
--- a/README.md
+++ b/README.md
@@ -21,13 +21,13 @@ intrinsics available on most CPUs (sse2, avx2, neon, or similar).

 This distribution of Stockfish consists of the following files:

-  * Readme.md, the file you are currently reading.
+  * [Readme.md](https://github.com/official-stockfish/Stockfish/blob/master/README.md), the file you are currently reading.

-  * Copying.txt, a text file containing the GNU General Public License version 3.
+  * [Copying.txt](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt), a text file containing the GNU General Public License version 3.

-  * AUTHORS, a text file with the list of authors for the project
+  * [AUTHORS](https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS), a text file with the list of authors for the project

-  * src, a subdirectory containing the full source code, including a Makefile
+  * [src](https://github.com/official-stockfish/Stockfish/tree/master/src), a subdirectory containing the full source code, including a Makefile
    that can be used to compile Stockfish on Unix-like systems.

  * a file with the .nnue extension, storing the neural network for the NNUE
@@ -365,4 +365,4 @@ you are distributing. If you make any changes to the source code,
 these changes must also be made available under the GPL.

 For full details, read the copy of the GPL v3 found in the file named
-*Copying.txt*.
+[*Copying.txt*](https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt).
--- a/src/Makefile
+++ b/src/Makefile
@@ -50,7 +50,7 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
 	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
 	nnue/evaluate_nnue.cpp \
-	nnue/features/half_kp.cpp \
+	nnue/features/half_ka_v2.cpp \
 	tools/sfen_packer.cpp \
 	tools/training_data_generator.cpp \
 	tools/training_data_generator_nonpv.cpp \
@@ -106,8 +106,7 @@ endif
 ifeq ($(ARCH), $(filter $(ARCH), \
                 x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
                 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
-                 x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
-                 e2k \
+                 x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \
                 armv7 armv7-neon armv8 apple-silicon general-64 general-32))
   SUPPORTED_ARCH=true
 else
@@ -853,8 +852,7 @@ config-sanity: net
 	@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
 	@test "$(SUPPORTED_ARCH)" = "true"
 	@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
-	 test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
-	 test "$(arch)" = "e2k" || \
+	 test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \
 	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
 	@test "$(bits)" = "32" || test "$(bits)" = "64"
 	@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -63,120 +63,124 @@ namespace Eval {
  namespace NNUE {
    string eval_file_loaded = "None";
    UseNNUEMode useNNUE;
+  }

-    /// NNUE::init() tries to load a NNUE network at startup time, or when the engine
-    /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
-    /// The name of the NNUE network is always retrieved from the EvalFile option.
-    /// We search the given network in three locations: internally (the default
-    /// network may be embedded in the binary), in the active working directory and
-    /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
-    /// variable to have the engine search in a special directory in their distro.
-
-    static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
-    {
-      if (mode == "false")
-        return UseNNUEMode::False;
-      else if (mode == "true")
-         return UseNNUEMode::True;
-      else if (mode == "pure")
-        return UseNNUEMode::Pure;
-
+  static UseNNUEMode NNUE::nnue_mode_from_option(const UCI::Option& mode)
+  {
+    if (mode == "false")
      return UseNNUEMode::False;
-    }
+    else if (mode == "true")
+       return UseNNUEMode::True;
+    else if (mode == "pure")
+      return UseNNUEMode::Pure;

-    void init() {
+    return UseNNUEMode::False;
+  }

-      useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
-      if (useNNUE == UseNNUEMode::False)
-          return;
+  /// NNUE::init() tries to load a NNUE network at startup time, or when the engine
+  /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
+  /// The name of the NNUE network is always retrieved from the EvalFile option.
+  /// We search the given network in three locations: internally (the default
+  /// network may be embedded in the binary), in the active working directory and
+  /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
+  /// variable to have the engine search in a special directory in their distro.

-      string eval_file = string(Options["EvalFile"]);
+  void NNUE::init() {

-      #if defined(DEFAULT_NNUE_DIRECTORY)
-      #define stringify2(x) #x
-      #define stringify(x) stringify2(x)
-      vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
-      #else
-      vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory };
-      #endif
+    useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
+    if (useNNUE == UseNNUEMode::False)
+        return;

-      for (string directory : dirs)
-          if (eval_file_loaded != eval_file)
-          {
-              if (directory != "<internal>")
-              {
-                  ifstream stream(directory + eval_file, ios::binary);
-                  if (load_eval(eval_file, stream))
-                      eval_file_loaded = eval_file;
-              }
+    string eval_file = string(Options["EvalFile"]);

-              if (directory == "<internal>" && eval_file == EvalFileDefaultName)
-              {
-                  // C++ way to prepare a buffer for a memory stream
-                  class MemoryBuffer : public basic_streambuf<char> {
-                      public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); }
-                  };
+    #if defined(DEFAULT_NNUE_DIRECTORY)
+    #define stringify2(x) #x
+    #define stringify(x) stringify2(x)
+    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+    #else
+    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory };
+    #endif

-                  MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
-                                      size_t(gEmbeddedNNUESize));
+    for (string directory : dirs)
+        if (eval_file_loaded != eval_file)
+        {
+            if (directory != "<internal>")
+            {
+                ifstream stream(directory + eval_file, ios::binary);
+                if (load_eval(eval_file, stream))
+                    eval_file_loaded = eval_file;
+            }

-                  istream stream(&buffer);
-                  if (load_eval(eval_file, stream))
-                      eval_file_loaded = eval_file;
-              }
-          }
-    }
+            if (directory == "<internal>" && eval_file == EvalFileDefaultName)
+            {
+                // C++ way to prepare a buffer for a memory stream
+                class MemoryBuffer : public basic_streambuf<char> {
+                    public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); }
+                };

-    void export_net(const std::optional<std::string>& filename) {
-      std::string actualFilename;
-      if (filename.has_value()) {
+                MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
+                                    size_t(gEmbeddedNNUESize));
+
+                istream stream(&buffer);
+                if (load_eval(eval_file, stream))
+                    eval_file_loaded = eval_file;
+            }
+        }
+  }
+
+  /// NNUE::export_net() exports the currently loaded network to a file
+  void NNUE::export_net(const std::optional<std::string>& filename) {
+    std::string actualFilename;
+
+    if (filename.has_value())
        actualFilename = filename.value();
-      } else {
-        if (eval_file_loaded != EvalFileDefaultName) {
-          sync_cout << "Failed to export a net. A non-embedded net can only be saved if the filename is specified." << sync_endl;
-          return;
+    else
+    {
+        if (eval_file_loaded != EvalFileDefaultName)
+        {
+             sync_cout << "Failed to export a net. A non-embedded net can only be saved if the filename is specified." << sync_endl;
+             return;
        }
        actualFilename = EvalFileDefaultName;
-      }
-
-      ofstream stream(actualFilename, std::ios_base::binary);
-      if (save_eval(stream)) {
-          sync_cout << "Network saved successfully to " << actualFilename << "." << sync_endl;
-      } else {
-          sync_cout << "Failed to export a net." << sync_endl;
-      }
    }

-    /// NNUE::verify() verifies that the last net used was loaded successfully
-    void verify() {
+    ofstream stream(actualFilename, std::ios_base::binary);

-      string eval_file = string(Options["EvalFile"]);
+    if (save_eval(stream))
+        sync_cout << "Network saved successfully to " << actualFilename << "." << sync_endl;
+    else
+        sync_cout << "Failed to export a net." << sync_endl;
+  }

-      if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
-      {
-          UCI::OptionsMap defaults;
-          UCI::init(defaults);
+  /// NNUE::verify() verifies that the last net used was loaded successfully
+  void NNUE::verify() {

-          string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
-          string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
-          string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
-          string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
-          string msg5 = "The engine will be terminated now.";
+    string eval_file = string(Options["EvalFile"]);

-          sync_cout << "info string ERROR: " << msg1 << sync_endl;
-          sync_cout << "info string ERROR: " << msg2 << sync_endl;
-          sync_cout << "info string ERROR: " << msg3 << sync_endl;
-          sync_cout << "info string ERROR: " << msg4 << sync_endl;
-          sync_cout << "info string ERROR: " << msg5 << sync_endl;
+    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
+    {
+        UCI::OptionsMap defaults;
+        UCI::init(defaults);

-          exit(EXIT_FAILURE);
-      }
+        string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
+        string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
+        string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
+        string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
+        string msg5 = "The engine will be terminated now.";

-      if (useNNUE != UseNNUEMode::False)
-          sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
-      else
-          sync_cout << "info string classical evaluation enabled" << sync_endl;
+        sync_cout << "info string ERROR: " << msg1 << sync_endl;
+        sync_cout << "info string ERROR: " << msg2 << sync_endl;
+        sync_cout << "info string ERROR: " << msg3 << sync_endl;
+        sync_cout << "info string ERROR: " << msg4 << sync_endl;
+        sync_cout << "info string ERROR: " << msg5 << sync_endl;
+
+        exit(EXIT_FAILURE);
    }
+
+    if (useNNUE != UseNNUEMode::False)
+        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
+    else
+        sync_cout << "info string classical evaluation enabled" << sync_endl;
  }
 }

@@ -941,7 +945,7 @@ namespace {
    Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK;
    int sf = me->scale_factor(pos, strongSide);

-    // If scale factor is not already specific, scale down via general heuristics
+    // If scale factor is not already specific, scale up/down via general heuristics
    if (sf == SCALE_FACTOR_NORMAL)
    {
        if (pos.opposite_bishops())
@@ -1068,7 +1072,7 @@ make_v:
    v = (v / 16) * 16;

    // Side to move point of view
-    v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
+    v = (pos.side_to_move() == WHITE ? v : -v);

    return v;
  }
@@ -1136,12 +1140,10 @@ Value Eval::evaluate(const Position& pos) {
      // Scale and shift NNUE for compatibility with search and classical evaluation
      auto  adjusted_NNUE = [&]()
      {
-         int material = pos.non_pawn_material() + 4 * PawnValueMg * pos.count<PAWN>();
-         int scale =  580
-                    + material / 32
-                    - 4 * pos.rule50_count();

-         Value nnue = NNUE::evaluate(pos) * scale / 1024 + Time.tempoNNUE;
+         int scale = 903 + 28 * pos.count<PAWN>() + 28 * pos.non_pawn_material() / 1024;
+
+         Value nnue = NNUE::evaluate(pos, true) * scale / 1024;

         if (pos.is_chess960())
             nnue += fix_FRC(pos);
@@ -1154,7 +1156,7 @@ Value Eval::evaluate(const Position& pos) {
      Value psq = Value(abs(eg_value(pos.psq_score())));
      int   r50 = 16 + pos.rule50_count();
      bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
-      bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
+      bool  classical = largePsq;

      // Use classical evaluation for really low piece endgames.
      // One critical case is the draw for bishop + A/H file pawn vs naked king.
@@ -1171,8 +1173,7 @@ Value Eval::evaluate(const Position& pos) {
          && !lowPieceEndgame
          && (   abs(v) * 16 < NNUEThreshold2 * r50
              || (   pos.opposite_bishops()
-                  && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
-                  && !(pos.this_thread()->nodes & 0xB))))
+                  && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50)))
          v = adjusted_NNUE();
  }

--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -36,7 +36,7 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-62ef826d1a6d.nnue"
+  #define EvalFileDefaultName   "nn-7756374aaed3.nnue"

  namespace NNUE {
    enum struct UseNNUEMode
@@ -49,7 +49,7 @@ namespace Eval {
    extern UseNNUEMode useNNUE;
    extern std::string eval_file_loaded;

-    Value evaluate(const Position& pos);
+    Value evaluate(const Position& pos, bool adjusted = false);
    bool load_eval(std::string name, std::istream& stream);
    bool save_eval(std::ostream& stream);
    void init();
--- a/src/movegen.cpp
+++ b/src/movegen.cpp
@@ -192,21 +192,20 @@ namespace {
    const Square ksq = pos.square<KING>(Us);
    Bitboard target;

-    if (Type == EVASIONS && more_than_one(pos.checkers()))
-        goto kingMoves; // Double check, only a king move can save the day
+    // Skip generating non-king moves when in double check
+    if (Type != EVASIONS || !more_than_one(pos.checkers()))
+    {
+        target = Type == EVASIONS     ?  between_bb(ksq, lsb(pos.checkers()))
+               : Type == NON_EVASIONS ? ~pos.pieces( Us)
+               : Type == CAPTURES     ?  pos.pieces(~Us)
+                                      : ~pos.pieces(   ); // QUIETS || QUIET_CHECKS

-    target = Type == EVASIONS     ?  between_bb(ksq, lsb(pos.checkers()))
-           : Type == NON_EVASIONS ? ~pos.pieces( Us)
-           : Type == CAPTURES     ?  pos.pieces(~Us)
-                                  : ~pos.pieces(   ); // QUIETS || QUIET_CHECKS
-
-    moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
-    moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
-    moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
-    moveList = generate_moves<Us,   ROOK, Checks>(pos, moveList, target);
-    moveList = generate_moves<Us,  QUEEN, Checks>(pos, moveList, target);
-
-kingMoves:
+        moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
+        moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us,   ROOK, Checks>(pos, moveList, target);
+        moveList = generate_moves<Us,  QUEEN, Checks>(pos, moveList, target);
+    }
    if (!Checks || pos.blockers_for_king(~Us) & ksq)
    {
        Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -35,7 +35,7 @@ namespace Stockfish::Eval::NNUE {
  LargePagePtr<FeatureTransformer> featureTransformer;

  // Evaluation function
-  AlignedPtr<Network> network;
+  AlignedPtr<Network> network[LayerStacks];

  // Evaluation function file name
  std::string fileName;
@@ -83,7 +83,8 @@ namespace Stockfish::Eval::NNUE {
  void initialize() {

    Detail::initialize(featureTransformer);
-    Detail::initialize(network);
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      Detail::initialize(network[i]);
  }

  // Read network header
@@ -92,7 +93,7 @@ namespace Stockfish::Eval::NNUE {
    std::uint32_t version, size;

    version     = read_little_endian<std::uint32_t>(stream);
-    *hashValue = read_little_endian<std::uint32_t>(stream);
+    *hashValue  = read_little_endian<std::uint32_t>(stream);
    size        = read_little_endian<std::uint32_t>(stream);
    if (!stream || version != Version) return false;
    desc->resize(size);
@@ -117,7 +118,8 @@ namespace Stockfish::Eval::NNUE {
    if (!read_header(stream, &hashValue, &netDescription)) return false;
    if (hashValue != HashValue) return false;
    if (!Detail::read_parameters(stream, *featureTransformer)) return false;
-    if (!Detail::read_parameters(stream, *network)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::read_parameters(stream, *(network[i]))) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }

@@ -126,12 +128,13 @@ namespace Stockfish::Eval::NNUE {

    if (!write_header(stream, HashValue, netDescription)) return false;
    if (!Detail::write_parameters(stream, *featureTransformer)) return false;
-    if (!Detail::write_parameters(stream, *network)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::write_parameters(stream, *(network[i]))) return false;
    return (bool)stream;
  }

  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
+  Value evaluate(const Position& pos, bool adjusted) {

    // We manually align the arrays on the stack because with gcc < 9.3
    // overaligning stack variables with alignas() doesn't work correctly.
@@ -154,10 +157,28 @@ namespace Stockfish::Eval::NNUE {
    ASSERT_ALIGNED(transformedFeatures, alignment);
    ASSERT_ALIGNED(buffer, alignment);

-    featureTransformer->transform(pos, transformedFeatures);
-    const auto output = network->propagate(transformedFeatures, buffer);
+    const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    const auto [psqt, lazy] = featureTransformer->transform(pos, transformedFeatures, bucket);

-    return static_cast<Value>(output[0] / OutputScale);
+    if (lazy)
+      return static_cast<Value>(psqt / OutputScale);
+    else
+    {
+      const auto output = network[bucket]->propagate(transformedFeatures, buffer);
+
+      int materialist = psqt;
+      int positional  = output[0];
+
+      int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK));
+      int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0);
+
+      int A = 128 - entertainment;
+      int B = 128 + entertainment;
+
+      int sum = (A * materialist + B * positional) / 128;
+
+      return static_cast<Value>( sum / OutputScale );
+    }
  }

  // Load eval, from a file stream or a memory stream
--- a/src/nnue/features/half_ka_v2.cpp
+++ b/src/nnue/features/half_ka_v2.cpp
@@ -16,32 +16,32 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-//Definition of input features HalfKP of NNUE evaluation function
+//Definition of input features HalfKAv2 of NNUE evaluation function

-#include "half_kp.h"
+#include "half_ka_v2.h"

 #include "../../position.h"

 namespace Stockfish::Eval::NNUE::Features {

  // Orient a square according to perspective (rotates by 180 for black)
-  inline Square HalfKP::orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
+  inline Square HalfKAv2::orient(Color perspective, Square s) {
+    return Square(int(s) ^ (bool(perspective) * 56));
  }

  // Index of a feature for a given king position and another piece on some square
-  inline IndexType HalfKP::make_index(Color perspective, Square s, Piece pc, Square ksq) {
+  inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
    return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
  }

  // Get a list of indices for active features
-  void HalfKP::append_active_indices(
+  void HalfKAv2::append_active_indices(
    const Position& pos,
    Color perspective,
    ValueListInserter<IndexType> active
  ) {
    Square ksq = orient(perspective, pos.square<KING>(perspective));
-    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
+    Bitboard bb = pos.pieces();
    while (bb)
    {
      Square s = pop_lsb(bb);
@@ -52,7 +52,7 @@ namespace Stockfish::Eval::NNUE::Features {

  // append_changed_indices() : get a list of indices for recently changed features

-  void HalfKP::append_changed_indices(
+  void HalfKAv2::append_changed_indices(
    Square ksq,
    StateInfo* st,
    Color perspective,
@@ -63,7 +63,6 @@ namespace Stockfish::Eval::NNUE::Features {
    Square oriented_ksq = orient(perspective, ksq);
    for (int i = 0; i < dp.dirty_num; ++i) {
      Piece pc = dp.piece[i];
-      if (type_of(pc) == KING) continue;
      if (dp.from[i] != SQ_NONE)
        removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
      if (dp.to[i] != SQ_NONE)
@@ -71,15 +70,15 @@ namespace Stockfish::Eval::NNUE::Features {
    }
  }

-  int HalfKP::update_cost(StateInfo* st) {
+  int HalfKAv2::update_cost(StateInfo* st) {
    return st->dirtyPiece.dirty_num;
  }

-  int HalfKP::refresh_cost(const Position& pos) {
-    return pos.count<ALL_PIECES>() - 2;
+  int HalfKAv2::refresh_cost(const Position& pos) {
+    return pos.count<ALL_PIECES>();
  }

-  bool HalfKP::requires_refresh(StateInfo* st, Color perspective) {
+  bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
    return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
  }

--- a/src/nnue/features/half_ka_v2.h
+++ b/src/nnue/features/half_ka_v2.h
@@ -18,8 +18,8 @@

 //Definition of input features HalfKP of NNUE evaluation function

-#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
-#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED

 #include "../nnue_common.h"

@@ -32,33 +32,34 @@ namespace Stockfish {

 namespace Stockfish::Eval::NNUE::Features {

-  // Feature HalfKP: Combination of the position of own king
-  // and the position of pieces other than kings
-  class HalfKP {
+  // Feature HalfKAv2: Combination of the position of own king
+  // and the position of pieces
+  class HalfKAv2 {

    // unique number for each piece type on each square
    enum {
      PS_NONE     =  0,
-      PS_W_PAWN   =  1,
-      PS_B_PAWN   =  1 * SQUARE_NB + 1,
-      PS_W_KNIGHT =  2 * SQUARE_NB + 1,
-      PS_B_KNIGHT =  3 * SQUARE_NB + 1,
-      PS_W_BISHOP =  4 * SQUARE_NB + 1,
-      PS_B_BISHOP =  5 * SQUARE_NB + 1,
-      PS_W_ROOK   =  6 * SQUARE_NB + 1,
-      PS_B_ROOK   =  7 * SQUARE_NB + 1,
-      PS_W_QUEEN  =  8 * SQUARE_NB + 1,
-      PS_B_QUEEN  =  9 * SQUARE_NB + 1,
-      PS_NB = 10 * SQUARE_NB + 1
+      PS_W_PAWN   =  0,
+      PS_B_PAWN   =  1 * SQUARE_NB,
+      PS_W_KNIGHT =  2 * SQUARE_NB,
+      PS_B_KNIGHT =  3 * SQUARE_NB,
+      PS_W_BISHOP =  4 * SQUARE_NB,
+      PS_B_BISHOP =  5 * SQUARE_NB,
+      PS_W_ROOK   =  6 * SQUARE_NB,
+      PS_B_ROOK   =  7 * SQUARE_NB,
+      PS_W_QUEEN  =  8 * SQUARE_NB,
+      PS_B_QUEEN  =  9 * SQUARE_NB,
+      PS_KING     =  10 * SQUARE_NB,
+      PS_NB = 11 * SQUARE_NB
    };

    static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
      // convention: W - us, B - them
      // viewed from other side, W and B are reversed
-      { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE,
-        PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE },
-      { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE,
-        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE }
+      { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE },
+      { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE }
    };

    // Orient a square according to perspective (rotates by 180 for black)
@@ -69,17 +70,17 @@ namespace Stockfish::Eval::NNUE::Features {

   public:
    // Feature name
-    static constexpr const char* Name = "HalfKP(Friend)";
+    static constexpr const char* Name = "HalfKAv2(Friend)";

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t HashValue = 0x5D69D5B8u;
+    static constexpr std::uint32_t HashValue = 0x5f234cb8u;

    // Number of feature dimensions
    static constexpr IndexType Dimensions =
        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);

-    // Maximum number of simultaneously active features. 30 because kins are not included.
-    static constexpr IndexType MaxActiveDimensions = 30;
+    // Maximum number of simultaneously active features.
+    static constexpr IndexType MaxActiveDimensions = 32;

    // Get a list of indices for active features
    static void append_active_indices(
@@ -107,4 +108,4 @@ namespace Stockfish::Eval::NNUE::Features {

 }  // namespace Stockfish::Eval::NNUE::Features

-#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -69,62 +69,15 @@ namespace Stockfish::Eval::NNUE::Layers {
      if (!previousLayer.read_parameters(stream)) return false;
      for (std::size_t i = 0; i < OutputDimensions; ++i)
        biases[i] = read_little_endian<BiasType>(stream);
-#if !defined (USE_SSSE3)
      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+#if !defined (USE_SSSE3)
        weights[i] = read_little_endian<WeightType>(stream);
 #else
-      std::unique_ptr<uint32_t[]> indexMap = std::make_unique<uint32_t[]>(OutputDimensions * PaddedInputDimensions);
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
-        const uint32_t scrambledIdx =
+        weights[
          (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
          i / PaddedInputDimensions * 4 +
-          i % 4;
-        weights[scrambledIdx] = read_little_endian<WeightType>(stream);
-        indexMap[scrambledIdx] = i;
-      }
-
-      // Determine if eights of weight and input products can be summed using 16bits
-      // without saturation. We assume worst case combinations of 0 and 127 for all inputs.
-      if (OutputDimensions > 1 && !stream.fail())
-      {
-          canSaturate16.count = 0;
-#if !defined(USE_VNNI)
-          for (IndexType i = 0; i < PaddedInputDimensions; i += 16)
-              for (IndexType j = 0; j < OutputDimensions; ++j)
-                  for (int x = 0; x < 2; ++x)
-                  {
-                      WeightType* w = &weights[i * OutputDimensions + j * 4 + x * 2];
-                      int sum[2] = {0, 0};
-                      for (int k = 0; k < 8; ++k)
-                      {
-                          IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
-                          sum[w[idx] < 0] += w[idx];
-                      }
-                      for (int sign : { -1, 1 })
-                          while (sign * sum[sign == -1] > 258)
-                          {
-                              int maxK = 0, maxW = 0;
-                              for (int k = 0; k < 8; ++k)
-                              {
-                                  IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
-                                  if (maxW < sign * w[idx])
-                                      maxK = k, maxW = sign * w[idx];
-                              }
-
-                              IndexType idx = maxK / 2 * OutputDimensions * 4 + maxK % 2;
-                              sum[sign == -1] -= w[idx];
-                              const uint32_t scrambledIdx = idx + i * OutputDimensions + j * 4 + x * 2;
-                              canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx], indexMap[scrambledIdx]);
-                              w[idx] = 0;
-                          }
-                  }
-
-          // Non functional optimization for faster more linear access
-          std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count,
-                    [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2)
-                    { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; });
-#endif
-      }
+          i % 4
+        ] = read_little_endian<WeightType>(stream);
 #endif

      return !stream.fail();
@@ -148,8 +101,6 @@ namespace Stockfish::Eval::NNUE::Layers {
                i % 4
              ];
      }
-      for (int i = 0; i < canSaturate16.count; ++i)
-          unscrambledWeights[canSaturate16.ids[i].wIdx] = canSaturate16.ids[i].w;

      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
          write_little_endian<WeightType>(stream, unscrambledWeights[i]);
@@ -194,11 +145,11 @@ namespace Stockfish::Eval::NNUE::Layers {
        __m512i product1 = _mm512_maddubs_epi16(a1, b1);
        __m512i product2 = _mm512_maddubs_epi16(a2, b2);
        __m512i product3 = _mm512_maddubs_epi16(a3, b3);
-        product0 = _mm512_add_epi16(product0, product1);
-        product2 = _mm512_add_epi16(product2, product3);
-        product0 = _mm512_add_epi16(product0, product2);
+        product0 = _mm512_adds_epi16(product0, product1);
        product0 = _mm512_madd_epi16(product0, Ones512);
-        acc = _mm512_add_epi32(acc, product0);
+        product2 = _mm512_adds_epi16(product2, product3);
+        product2 = _mm512_madd_epi16(product2, Ones512);
+        acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
 #endif
      };

@@ -236,11 +187,11 @@ namespace Stockfish::Eval::NNUE::Layers {
        __m256i product1 = _mm256_maddubs_epi16(a1, b1);
        __m256i product2 = _mm256_maddubs_epi16(a2, b2);
        __m256i product3 = _mm256_maddubs_epi16(a3, b3);
-        product0 = _mm256_add_epi16(product0, product1);
-        product2 = _mm256_add_epi16(product2, product3);
-        product0 = _mm256_add_epi16(product0, product2);
+        product0 = _mm256_adds_epi16(product0, product1);
        product0 = _mm256_madd_epi16(product0, Ones256);
-        acc = _mm256_add_epi32(acc, product0);
+        product2 = _mm256_adds_epi16(product2, product3);
+        product2 = _mm256_madd_epi16(product2, Ones256);
+        acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
 #endif
      };

@@ -267,11 +218,11 @@ namespace Stockfish::Eval::NNUE::Layers {
        __m128i product1 = _mm_maddubs_epi16(a1, b1);
        __m128i product2 = _mm_maddubs_epi16(a2, b2);
        __m128i product3 = _mm_maddubs_epi16(a3, b3);
-        product0 = _mm_add_epi16(product0, product1);
-        product2 = _mm_add_epi16(product2, product3);
-        product0 = _mm_add_epi16(product0, product2);
+        product0 = _mm_adds_epi16(product0, product1);
        product0 = _mm_madd_epi16(product0, Ones128);
-        acc = _mm_add_epi32(acc, product0);
+        product2 = _mm_adds_epi16(product2, product3);
+        product2 = _mm_madd_epi16(product2, Ones128);
+        acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
      };

 #endif
@@ -300,6 +251,8 @@ namespace Stockfish::Eval::NNUE::Layers {
 #endif

 #if defined (USE_SSSE3)
+      // Different layout, we process 4 inputs at a time, always.
+      static_assert(InputDimensions % 4 == 0);

      const auto output = reinterpret_cast<OutputType*>(buffer);
      const auto inputVector = reinterpret_cast<const vec_t*>(input);
@@ -310,7 +263,7 @@ namespace Stockfish::Eval::NNUE::Layers {
      // because then it is also an input dimension.
      if constexpr (OutputDimensions % OutputSimdWidth == 0)
      {
-          constexpr IndexType NumChunks = PaddedInputDimensions / 4;
+          constexpr IndexType NumChunks = InputDimensions / 4;

          const auto input32 = reinterpret_cast<const std::int32_t*>(input);
          vec_t* outptr = reinterpret_cast<vec_t*>(output);
@@ -329,8 +282,6 @@ namespace Stockfish::Eval::NNUE::Layers {
              for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
                  vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
          }
-          for (int i = 0; i < canSaturate16.count; ++i)
-              output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w;
      }
      else if constexpr (OutputDimensions == 1)
      {
@@ -377,17 +328,21 @@ namespace Stockfish::Eval::NNUE::Layers {
      auto output = reinterpret_cast<OutputType*>(buffer);

 #if defined(USE_SSE2)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      // At least a multiple of 16, with SSE2.
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
      const __m128i Zeros = _mm_setzero_si128();
      const auto inputVector = reinterpret_cast<const __m128i*>(input);

 #elif defined(USE_MMX)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
      const __m64 Zeros = _mm_setzero_si64();
      const auto inputVector = reinterpret_cast<const __m64*>(input);

 #elif defined(USE_NEON)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
      const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
 #endif

@@ -473,25 +428,6 @@ namespace Stockfish::Eval::NNUE::Layers {

    alignas(CacheLineSize) BiasType biases[OutputDimensions];
    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
-#if defined (USE_SSSE3)
-    struct CanSaturate {
-        int count;
-        struct Entry {
-            uint32_t wIdx;
-            uint16_t out;
-            uint16_t in;
-            int8_t w;
-        } ids[PaddedInputDimensions * OutputDimensions * 3 / 4];
-
-        void add(int i, int j, int8_t w, uint32_t wIdx) {
-            ids[count].wIdx = wIdx;
-            ids[count].out = i;
-            ids[count].in = j;
-            ids[count].w = w;
-            ++count;
-        }
-    } canSaturate16;
-#endif
  };

 }  // namespace Stockfish::Eval::NNUE::Layers
--- a/src/nnue/layers/clipped_relu.h
+++ b/src/nnue/layers/clipped_relu.h
@@ -72,22 +72,42 @@ namespace Stockfish::Eval::NNUE::Layers {
      const auto output = reinterpret_cast<OutputType*>(buffer);

  #if defined(USE_AVX2)
-      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-      const __m256i Zero = _mm256_setzero_si256();
-      const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-      const auto in = reinterpret_cast<const __m256i*>(input);
-      const auto out = reinterpret_cast<__m256i*>(output);
-      for (IndexType i = 0; i < NumChunks; ++i) {
-        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 0]),
-            _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
-        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 2]),
-            _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
-        _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-            _mm256_packs_epi16(words0, words1), Zero), Offsets));
+      if constexpr (InputDimensions % SimdWidth == 0) {
+        constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+        const __m256i Zero = _mm256_setzero_si256();
+        const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+        const auto in = reinterpret_cast<const __m256i*>(input);
+        const auto out = reinterpret_cast<__m256i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 0]),
+              _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
+          const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 2]),
+              _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
+          _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+              _mm256_packs_epi16(words0, words1), Zero), Offsets));
+        }
+      } else {
+        constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+        const __m128i Zero = _mm_setzero_si128();
+        const auto in = reinterpret_cast<const __m128i*>(input);
+        const auto out = reinterpret_cast<__m128i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 0]),
+              _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits);
+          const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 2]),
+              _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits);
+          const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+          _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero));
+        }
      }
-      constexpr IndexType Start = NumChunks * SimdWidth;
+      constexpr IndexType Start =
+        InputDimensions % SimdWidth == 0
+        ? InputDimensions / SimdWidth * SimdWidth
+        : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);

  #elif defined(USE_SSE2)
      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
--- a/src/nnue/layers/input_slice.h
+++ b/src/nnue/layers/input_slice.h
@@ -53,7 +53,7 @@ class InputSlice {
    return true;
  }

-  // Read network parameters
+  // Write network parameters
  bool write_parameters(std::ostream& /*stream*/) const {
    return true;
  }
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -30,8 +30,8 @@ namespace Stockfish::Eval::NNUE {

  // Class that holds the result of affine transformation of input features
  struct alignas(CacheLineSize) Accumulator {
-    std::int16_t
-        accumulation[2][TransformedFeatureDimensions];
+    std::int16_t accumulation[2][TransformedFeatureDimensions];
+    std::int32_t psqtAccumulation[2][PSQTBuckets];
    AccumulatorState state[2];
  };

--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -23,7 +23,7 @@

 #include "nnue_common.h"

-#include "features/half_kp.h"
+#include "features/half_ka_v2.h"

 #include "layers/input_slice.h"
 #include "layers/affine_transform.h"
@@ -32,16 +32,18 @@
 namespace Stockfish::Eval::NNUE {

  // Input features used in evaluation function
-  using FeatureSet = Features::HalfKP;
+  using FeatureSet = Features::HalfKAv2;

  // Number of input feature dimensions after conversion
-  constexpr IndexType TransformedFeatureDimensions = 256;
+  constexpr IndexType TransformedFeatureDimensions = 512;
+  constexpr IndexType PSQTBuckets = 8;
+  constexpr IndexType LayerStacks = 8;

  namespace Layers {

    // Define network structure
    using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
-    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
    using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
    using OutputLayer = AffineTransform<HiddenLayer2, 1>;

--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@@ -48,7 +48,7 @@
 namespace Stockfish::Eval::NNUE {

  // Version of the evaluation file
-  constexpr std::uint32_t Version = 0x7AF32F16u;
+  constexpr std::uint32_t Version = 0x7AF32F20u;

  // Constant used in evaluation value calculation
  constexpr int OutputScale = 16;
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -36,45 +36,82 @@ namespace Stockfish::Eval::NNUE {
  // vector registers.
  #define VECTOR

+  static_assert(PSQTBuckets == 8, "Assumed by the current choice of constants.");
+
  #ifdef USE_AVX512
  typedef __m512i vec_t;
+  typedef __m256i psqt_vec_t;
  #define vec_load(a) _mm512_load_si512(a)
  #define vec_store(a,b) _mm512_store_si512(a,b)
  #define vec_add_16(a,b) _mm512_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
  static constexpr IndexType NumRegs = 8; // only 8 are needed
+  static constexpr IndexType NumPsqtRegs = 1;

  #elif USE_AVX2
  typedef __m256i vec_t;
+  typedef __m256i psqt_vec_t;
  #define vec_load(a) _mm256_load_si256(a)
  #define vec_store(a,b) _mm256_store_si256(a,b)
  #define vec_add_16(a,b) _mm256_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
  static constexpr IndexType NumRegs = 16;
+  static constexpr IndexType NumPsqtRegs = 1;

  #elif USE_SSE2
  typedef __m128i vec_t;
+  typedef __m128i psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm_setzero_si128()
  static constexpr IndexType NumRegs = Is64Bit ? 16 : 8;
+  static constexpr IndexType NumPsqtRegs = 2;

  #elif USE_MMX
  typedef __m64 vec_t;
+  typedef __m64 psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_pi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
+  #define vec_zero_psqt() _mm_setzero_si64()
  static constexpr IndexType NumRegs = 8;
+  static constexpr IndexType NumPsqtRegs = 4;

  #elif USE_NEON
  typedef int16x8_t vec_t;
+  typedef int32x4_t psqt_vec_t;
  #define vec_load(a) (*(a))
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) vaddq_s16(a,b)
  #define vec_sub_16(a,b) vsubq_s16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) vaddq_s32(a,b)
+  #define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
+  #define vec_zero_psqt() psqt_vec_t{0}
  static constexpr IndexType NumRegs = 16;
+  static constexpr IndexType NumPsqtRegs = 2;

  #else
  #undef VECTOR
@@ -88,9 +125,13 @@ namespace Stockfish::Eval::NNUE {
    // Number of output dimensions for one side
    static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;

+    static constexpr int LazyThreshold = 1400;
+
    #ifdef VECTOR
    static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
+    static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
    static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
+    static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
    #endif

   public:
@@ -116,6 +157,8 @@ namespace Stockfish::Eval::NNUE {
        biases[i] = read_little_endian<BiasType>(stream);
      for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
        weights[i] = read_little_endian<WeightType>(stream);
+      for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
+        psqtWeights[i] = read_little_endian<PSQTWeightType>(stream);
      return !stream.fail();
    }

@@ -129,11 +172,21 @@ namespace Stockfish::Eval::NNUE {
    }

    // Convert input features
-    void transform(const Position& pos, OutputType* output) const {
+    std::pair<std::int32_t, bool> transform(const Position& pos, OutputType* output, int bucket) const {
      update_accumulator(pos, WHITE);
      update_accumulator(pos, BLACK);

+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
      const auto& accumulation = pos.state()->accumulator.accumulation;
+      const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
+
+      const auto psqt = (
+            psqtAccumulation[static_cast<int>(perspectives[0])][bucket]
+          - psqtAccumulation[static_cast<int>(perspectives[1])][bucket]
+        ) / 2;
+
+      if (abs(psqt) > LazyThreshold * OutputScale)
+        return { psqt, true };

  #if defined(USE_AVX512)
      constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
@@ -164,7 +217,6 @@ namespace Stockfish::Eval::NNUE {
      const int8x8_t Zero = {0};
  #endif

-      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
      for (IndexType p = 0; p < 2; ++p) {
        const IndexType offset = HalfDimensions * p;

@@ -241,6 +293,8 @@ namespace Stockfish::Eval::NNUE {
  #if defined(USE_MMX)
      _mm_empty();
  #endif
+
+      return { psqt, false };
    }

   private:
@@ -256,6 +310,7 @@ namespace Stockfish::Eval::NNUE {
      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
      // is defined in the VECTOR code below, once in each branch
      vec_t acc[NumRegs];
+      psqt_vec_t psqt[NumPsqtRegs];
  #endif

      // Look for a usable accumulator of an earlier position. We keep track
@@ -334,12 +389,52 @@ namespace Stockfish::Eval::NNUE {
          }
        }

+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+          // Load accumulator
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+
+          for (IndexType i = 0; states_to_update[i]; ++i)
+          {
+            // Difference calculation for the deactivated features
+            for (const auto index : removed[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Difference calculation for the activated features
+            for (const auto index : added[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Store accumulator
+            accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+              &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              vec_store_psqt(&accTilePsqt[k], psqt[k]);
+          }
+        }
+
  #else
        for (IndexType i = 0; states_to_update[i]; ++i)
        {
          std::memcpy(states_to_update[i]->accumulator.accumulation[perspective],
              st->accumulator.accumulation[perspective],
              HalfDimensions * sizeof(BiasType));
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k];
+
          st = states_to_update[i];

          // Difference calculation for the deactivated features
@@ -349,6 +444,9 @@ namespace Stockfish::Eval::NNUE {

            for (IndexType j = 0; j < HalfDimensions; ++j)
              st->accumulator.accumulation[perspective][j] -= weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k];
          }

          // Difference calculation for the activated features
@@ -358,6 +456,9 @@ namespace Stockfish::Eval::NNUE {

            for (IndexType j = 0; j < HalfDimensions; ++j)
              st->accumulator.accumulation[perspective][j] += weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
          }
        }
  #endif
@@ -393,16 +494,42 @@ namespace Stockfish::Eval::NNUE {
            vec_store(&accTile[k], acc[k]);
        }

+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_zero_psqt();
+
+          for (const auto index : active)
+          {
+            const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+            auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+          }
+
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            vec_store_psqt(&accTilePsqt[k], psqt[k]);
+        }
+
  #else
        std::memcpy(accumulator.accumulation[perspective], biases,
            HalfDimensions * sizeof(BiasType));

+        for (std::size_t k = 0; k < PSQTBuckets; ++k)
+          accumulator.psqtAccumulation[perspective][k] = 0;
+
        for (const auto index : active)
        {
          const IndexType offset = HalfDimensions * index;

          for (IndexType j = 0; j < HalfDimensions; ++j)
            accumulator.accumulation[perspective][j] += weights[offset + j];
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
        }
  #endif
      }
@@ -414,9 +541,11 @@ namespace Stockfish::Eval::NNUE {

    using BiasType = std::int16_t;
    using WeightType = std::int16_t;
+    using PSQTWeightType = std::int32_t;

    alignas(CacheLineSize) BiasType biases[HalfDimensions];
    alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
+    alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
  };

 }  // namespace Stockfish::Eval::NNUE
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -993,7 +993,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
 }


-/// Position::do(undo)_null_move() is used to do(undo) a "null move": it flips
+/// Position::do_null_move() is used to do a "null move": it flips
 /// the side to move without executing any move on the board.

 void Position::do_null_move(StateInfo& newSt) {
@@ -1033,6 +1033,9 @@ void Position::do_null_move(StateInfo& newSt) {
  assert(pos_is_ok());
 }

+
+/// Position::undo_null_move() must be used to undo a "null move"
+
 void Position::undo_null_move() {

  assert(!checkers());
@@ -1098,8 +1101,8 @@ bool Position::see_ge(Move m, Value threshold) const {
      if (!(stmAttackers = attackers & pieces(stm)))
          break;

-      // Don't allow pinned pieces to attack (except the king) as long as
-      // there are pinners on their original square.
+      // Don't allow pinned pieces to attack as long as there are
+      // pinners on their original square.
      if (pinners(~stm) & occupied)
          stmAttackers &= ~blockers_for_king(stm);

--- a/src/position.h
+++ b/src/position.h
@@ -54,11 +54,11 @@ struct StateInfo {
  // Not copied when making a move (will be recomputed anyhow)
  Key        key;
  Bitboard   checkersBB;
-  Piece      capturedPiece;
  StateInfo* previous;
  Bitboard   blockersForKing[COLOR_NB];
  Bitboard   pinners[COLOR_NB];
  Bitboard   checkSquares[PIECE_TYPE_NB];
+  Piece      capturedPiece;
  int        repetition;

  // Used by NNUE
@@ -219,11 +219,11 @@ private:
  int castlingRightsMask[SQUARE_NB];
  Square castlingRookSquare[CASTLING_RIGHT_NB];
  Bitboard castlingPath[CASTLING_RIGHT_NB];
+  Thread* thisThread;
+  StateInfo* st;
  int gamePly;
  Color sideToMove;
  Score psq;
-  Thread* thisThread;
-  StateInfo* st;
  bool chess960;
 };

--- a/src/search.cpp
+++ b/src/search.cpp
@@ -60,7 +60,7 @@ namespace {

  // Futility margin
  Value futility_margin(Depth d, bool improving) {
-    return Value(234 * (d - improving));
+    return Value(214 * (d - improving));
  }

  // Reductions lookup table, initialized at startup
@@ -68,7 +68,7 @@ namespace {

  Depth reduction(bool i, Depth d, int mn) {
    int r = Reductions[d] * Reductions[mn];
-    return (r + 503) / 1024 + (!i && r > 915);
+    return (r + 534) / 1024 + (!i && r > 904);
  }

  constexpr int futility_move_count(bool improving, Depth depth) {
@@ -77,7 +77,7 @@ namespace {

  // History and stats update bonus, based on depth
  int stat_bonus(Depth d) {
-    return d > 14 ? 66 : 6 * d * d + 231 * d - 206;
+    return d > 14 ? 73 : 6 * d * d + 229 * d - 215;
  }

  // Add a small random component to draw evaluations to avoid 3-fold blindness
@@ -374,7 +374,7 @@ void Thread::search() {
          // Start with a small aspiration window and, in the case of a fail
          // high/low, re-search with a bigger window until we don't fail
          // high/low anymore.
-          failedHighCnt = 0;
+          int failedHighCnt = 0;
          while (true)
          {
              Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - searchAgainCounter);
@@ -764,7 +764,7 @@ namespace {
        if ((ss-1)->currentMove != MOVE_NULL)
            ss->staticEval = eval = evaluate(pos);
        else
-            ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
+            ss->staticEval = eval = -(ss-1)->staticEval;

        // Save static evaluation into transposition table
        tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
@@ -773,7 +773,7 @@ namespace {
    // Use static evaluation difference to improve quiet move ordering
    if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture)
    {
-        int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval - 2 * Tempo), -1000, 1000);
+        int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000);
        thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus;
    }

@@ -795,10 +795,10 @@ namespace {
    // Step 8. Null move search with verification search (~40 Elo)
    if (   !PvNode
        && (ss-1)->currentMove != MOVE_NULL
-        && (ss-1)->statScore < 24185
+        && (ss-1)->statScore < 23767
        &&  eval >= beta
        &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 24 * depth - 34 * improving + 162 * ss->ttPv + 159
+        &&  ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159
        && !excludedMove
        &&  pos.non_pawn_material(us)
        && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -806,7 +806,7 @@ namespace {
        assert(eval - beta >= 0);

        // Null move dynamic reduction based on depth and value
-        Depth R = (1062 + 68 * depth) / 256 + std::min(int(eval - beta) / 190, 3);
+        Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3);

        ss->currentMove = MOVE_NULL;
        ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -844,7 +844,7 @@ namespace {

    probCutBeta = beta + 209 - 44 * improving;

-    // Step 9. ProbCut (~10 Elo)
+    // Step 9. ProbCut (~4 Elo)
    // If we have a good enough capture and a reduced search returns a value
    // much above beta, we can (almost) safely prune the previous move.
    if (   !PvNode
@@ -859,16 +859,6 @@ namespace {
             && ttValue != VALUE_NONE
             && ttValue < probCutBeta))
    {
-        // if ttMove is a capture and value from transposition table is good enough produce probCut
-        // cutoff without digging into actual probCut search
-        if (   ss->ttHit
-            && tte->depth() >= depth - 3
-            && ttValue != VALUE_NONE
-            && ttValue >= probCutBeta
-            && ttMove
-            && pos.capture_or_promotion(ttMove))
-            return probCutBeta;
-
        assert(probCutBeta < VALUE_INFINITE);

        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
@@ -929,7 +919,7 @@ moves_loop: // When in check, search starts from here
    ttCapture = ttMove && pos.capture_or_promotion(ttMove);

    // Step 11. A small Probcut idea, when we are in check
-    probCutBeta = beta + 400;
+    probCutBeta = beta + 409;
    if (   ss->inCheck
        && !PvNode
        && depth >= 4
@@ -1034,8 +1024,8 @@ moves_loop: // When in check, search starts from here
          }
          else
          {
-              // Countermoves based pruning (~20 Elo)
-              if (   lmrDepth < 4 + ((ss-1)->statScore > 0 || (ss-1)->moveCount == 1)
+              // Continuation history based pruning (~20 Elo)
+              if (   lmrDepth < 5
                  && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold
                  && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold)
                  continue;
@@ -1083,7 +1073,7 @@ moves_loop: // When in check, search starts from here
          {
              extension = 1;
              singularQuietLMR = !ttCapture;
-              if (!PvNode && value < singularBeta - 140)
+              if (!PvNode && value < singularBeta - 93)
                  extension = 2;
          }

@@ -1131,21 +1121,18 @@ moves_loop: // When in check, search starts from here
      if (    depth >= 3
          &&  moveCount > 1 + 2 * rootNode
          && (  !captureOrPromotion
-              || moveCountPruning
-              || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
              || cutNode
-              || (!PvNode && !formerPv && captureHistory[movedPiece][to_sq(move)][type_of(pos.captured_piece())] < 3678)
-              || thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024)
+              || (!PvNode && !formerPv))
          && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3))
      {
          Depth r = reduction(improving, depth, moveCount);

-          // Decrease reduction if the ttHit running average is large
+          // Decrease reduction if the ttHit running average is large (~0 Elo)
          if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
              r--;

          // Decrease reduction if position is or has been on the PV
-          // and node is not likely to fail low. (~10 Elo)
+          // and node is not likely to fail low. (~3 Elo)
          if (   ss->ttPv
              && !likelyFailLow)
              r -= 2;
@@ -1170,10 +1157,7 @@ moves_loop: // When in check, search starts from here
              if (ttCapture)
                  r++;

-              // Increase reduction at root if failing high
-              r += rootNode ? thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512 : 0;
-
-              // Increase reduction for cut nodes (~10 Elo)
+              // Increase reduction for cut nodes (~3 Elo)
              if (cutNode)
                  r += 2;

@@ -1181,23 +1165,11 @@ moves_loop: // When in check, search starts from here
                             + (*contHist[0])[movedPiece][to_sq(move)]
                             + (*contHist[1])[movedPiece][to_sq(move)]
                             + (*contHist[3])[movedPiece][to_sq(move)]
-                             - 4741;
-
-              // Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
-              if (ss->statScore >= -89 && (ss-1)->statScore < -116)
-                  r--;
-
-              else if ((ss-1)->statScore >= -112 && ss->statScore < -100)
-                  r++;
+                             - 4923;

              // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
-              // If we are not in check use statScore, but if we are in check we use
-              // the sum of main history and first continuation history with an offset.
-              if (ss->inCheck)
-                  r -= (thisThread->mainHistory[us][from_to(move)]
-                     + (*contHist[0])[movedPiece][to_sq(move)] - 3833) / 16384;
-              else
-                  r -= ss->statScore / 14790;
+              if (!ss->inCheck)
+                  r -= ss->statScore / 14721;
          }

          // In general we want to cap the LMR depth search at newDepth. But if
@@ -1460,7 +1432,7 @@ moves_loop: // When in check, search starts from here
            // and addition of two tempos
            ss->staticEval = bestValue =
            (ss-1)->currentMove != MOVE_NULL ? evaluate(pos)
-                                             : -(ss-1)->staticEval + 2 * Tempo;
+                                             : -(ss-1)->staticEval;

        // Stand pat. Return immediately if static value is at least beta
        if (bestValue >= beta)
@@ -1548,7 +1520,7 @@ moves_loop: // When in check, search starts from here
                                                                [pos.moved_piece(move)]
                                                                [to_sq(move)];

-      // CounterMove based pruning
+      // Continuation history based pruning
      if (  !captureOrPromotion
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY
          && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
--- a/src/thread.h
+++ b/src/thread.h
@@ -74,16 +74,7 @@ public:
  void idle_loop();
  void start_searching();
  void wait_for_search_finished();
-  int id() const { return idx; }
-  void wait_for_worker_finished();
-  size_t thread_idx() const { return idx; }
-
-  template <typename FuncT>
-  void set_eval_callback(FuncT&& f) { on_eval_callback = std::forward<FuncT>(f); }
-
-  void clear_eval_callback() { on_eval_callback = nullptr; }
-
-  void on_eval() { if (on_eval_callback) on_eval_callback(rootPos); }
+  size_t id() const { return idx; }

  Pawns::Table pawnsTable;
  Material::Table materialTable;
@@ -103,11 +94,6 @@ public:
  CapturePieceToHistory captureHistory;
  ContinuationHistory continuationHistory[2][2];
  Score contempt;
-  int failedHighCnt;
-  bool rootInTB;
-  int Cardinality;
-  bool UseRule50;
-  Depth ProbeDepth;
 };


--- a/src/timeman.cpp
+++ b/src/timeman.cpp
@@ -94,14 +94,6 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
  optimumTime = TimePoint(optScale * timeLeft);
  maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));

-  if (Stockfish::Search::Limits.use_time_management())
-  {
-      int strength = std::log( std::max(1, int(optimumTime * Threads.size() / 10))) * 60;
-      tempoNNUE = std::clamp( (strength + 264) / 24, 18, 30);
-  }
-  else
-      tempoNNUE = 28; // default for no time given
-
  if (Options["Ponder"])
      optimumTime += optimumTime / 4;
 }
--- a/src/timeman.h
+++ b/src/timeman.h
@@ -37,7 +37,6 @@ public:
                                     TimePoint(Threads.nodes_searched()) : now() - startTime; }

  int64_t availableNodes; // When in 'nodes as time' mode
-  int tempoNNUE;

 private:
  TimePoint startTime;
--- a/src/tune.cpp
+++ b/src/tune.cpp
@@ -30,7 +30,6 @@ namespace Stockfish {

 bool Tune::update_on_last;
 const UCI::Option* LastOption = nullptr;
-BoolConditions Conditions;
 static std::map<std::string, int> TuneResults;

 string Tune::next(string& names, bool pop) {
@@ -110,24 +109,6 @@ template<> void Tune::Entry<Score>::read_option() {
 template<> void Tune::Entry<Tune::PostUpdate>::init_option() {}
 template<> void Tune::Entry<Tune::PostUpdate>::read_option() { value(); }

-
-// Set binary conditions according to a probability that depends
-// on the corresponding parameter value.
-
-void BoolConditions::set() {
-
-  static PRNG rng(now());
-  static bool startup = true; // To workaround fishtest bench
-
-  for (size_t i = 0; i < binary.size(); i++)
-      binary[i] = !startup && (values[i] + int(rng.rand<unsigned>() % variance) > threshold);
-
-  startup = false;
-
-  for (size_t i = 0; i < binary.size(); i++)
-      sync_cout << binary[i] << sync_endl;
-}
-
 } // namespace Stockfish


--- a/src/tune.h
+++ b/src/tune.h
@@ -46,27 +46,6 @@ struct SetRange {
 #define SetDefaultRange SetRange(default_range)


-/// BoolConditions struct is used to tune boolean conditions in the
-/// code by toggling them on/off according to a probability that
-/// depends on the value of a tuned integer parameter: for high
-/// values of the parameter condition is always disabled, for low
-/// values is always enabled, otherwise it is enabled with a given
-/// probability that depnends on the parameter under tuning.
-
-struct BoolConditions {
-  void init(size_t size) { values.resize(size, defaultValue), binary.resize(size, 0); }
-  void set();
-
-  std::vector<int> binary, values;
-  int defaultValue = 465, variance = 40, threshold = 500;
-  SetRange range = SetRange(0, 1000);
-};
-
-extern BoolConditions Conditions;
-
-inline void set_conditions() { Conditions.set(); }
-
-
 /// Tune class implements the 'magic' code that makes the setup of a fishtest
 /// tuning session as easy as it can be. Mainly you have just to remove const
 /// qualifiers from the variables you want to tune and flag them for tuning, so
@@ -159,14 +138,6 @@ class Tune {
    return add(value, (next(names), std::move(names)), args...);
  }

-  // Template specialization for BoolConditions
-  template<typename... Args>
-  int add(const SetRange& range, std::string&& names, BoolConditions& cond, Args&&... args) {
-    for (size_t size = cond.values.size(), i = 0; i < size; i++)
-        add(cond.range, next(names, i == size - 1) + "_" + std::to_string(i), cond.values[i]);
-    return add(range, std::move(names), args...);
-  }
-
  std::vector<std::unique_ptr<EntryBase>> list;

 public:
@@ -187,11 +158,6 @@ public:

 #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true

-// Some macro to tune toggling of boolean conditions
-#define CONDITION(x) (Conditions.binary[__COUNTER__] || (x))
-#define TUNE_CONDITIONS() int UNIQUE(c, __LINE__) = (Conditions.init(__COUNTER__), 0); \
-                          TUNE(Conditions, set_conditions)
-
 } // namespace Stockfish

 #endif // #ifndef TUNE_H_INCLUDED
--- a/src/types.h
+++ b/src/types.h
@@ -193,7 +193,6 @@ enum Value : int {
  BishopValueMg = 825,   BishopValueEg = 915,
  RookValueMg   = 1276,  RookValueEg   = 1380,
  QueenValueMg  = 2538,  QueenValueEg  = 2682,
-  Tempo = 28,

  MidgameLimit  = 15258, EndgameLimit  = 3915
 };
--- a/tests/reprosearch.sh
+++ b/tests/reprosearch.sh
@@ -10,7 +10,7 @@ trap 'error ${LINENO}' ERR

 echo "reprosearch testing started"

-# repeat two short games, separated by ucinewgame. 
+# repeat two short games, separated by ucinewgame.
 # with go nodes $nodes they should result in exactly
 # the same node count for each iteration.
 cat << EOF > repeat.exp