Enable NEON for armv8

apple silicon platform with NEON
USE_NEON instead of IS_ARM New platform apple-silicon with default USE_NEON nnue_common.h includes arm_neon.h for USE_NEON
2025-12-06 10:53:50 +08:00 · 2020-08-05 10:45:17 -07:00 · 2020-08-05 16:45:07 +02:00 · 2020-08-05 07:46:45 +02:00 · 2020-08-05 07:29:12 +02:00 · 2020-08-05 07:20:23 +02:00
114 changed files with 12632 additions and 20013 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,44 +0,0 @@
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: Consecutive
-AlignConsecutiveDeclarations: Consecutive
-AlignEscapedNewlines: DontAlign
-AlignOperands: AlignAfterOperator
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortEnumsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: false
-BreakTemplateDeclarations: Yes
-BasedOnStyle: WebKit
-BitFieldColonSpacing: After
-BinPackParameters: false
-BreakBeforeBinaryOperators: NonAssignment
-BreakBeforeBraces: Custom
-BraceWrapping:
-  AfterFunction: false
-  AfterClass: false
-  AfterControlStatement: true
-  BeforeElse: true
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializers: AfterColon
-BreakStringLiterals: false
-ColumnLimit: 100
-ContinuationIndentWidth: 2
-Cpp11BracedListStyle: true
-IndentGotoLabels: false
-IndentPPDirectives: BeforeHash
-IndentWidth: 4
-MaxEmptyLinesToKeep: 2
-NamespaceIndentation: None
-PackConstructorInitializers: Never
-ReflowComments: false
-SortIncludes: false
-SortUsingDeclarations: false
-SpaceAfterCStyleCast: true
-SpaceAfterTemplateKeyword: false
-SpaceBeforeCaseColon: true
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeInheritanceColon: false
-SpaceInEmptyBlock: false
-SpacesBeforeTrailingComments: 2
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,7 +0,0 @@
-# .git-blame-ignore-revs
-# Ignore commit which added clang-format
-2d0237db3f0e596fb06e3ffbadba84dcc4e018f6
-
-# Post commit formatting fixes
-0fca5605fa2e5e7240fde5e1aae50952b2612231
-08ed4c90db31959521b7ef3186c026edd1e90307
--- a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml
+++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml
@@ -1,65 +0,0 @@
-name: Report issue
-description: Create a report to help us fix issues with the engine
-body:
- type: textarea
-  attributes:
-    label: Describe the issue
-    description: A clear and concise description of what you're experiencing.
-  validations:
-    required: true
-
- type: textarea
-  attributes:
-    label: Expected behavior
-    description: A clear and concise description of what you expected to happen.
-  validations:
-    required: true
-
- type: textarea
-  attributes:
-    label: Steps to reproduce
-    description: |
-      Steps to reproduce the behavior.
-      You can also use this section to paste the command line output.
-    placeholder: |
-      ```
-      position startpos moves g2g4 e7e5 f2f3
-      go mate 1
-      info string NNUE evaluation using nn-6877cd24400e.nnue enabled
-      info depth 1 seldepth 1 multipv 1 score mate 1 nodes 33 nps 11000 tbhits 0 time 3 pv d8h4
-      bestmove d8h4
-      ```
-  validations:
-    required: true
-
- type: textarea
-  attributes:
-    label: Anything else?
-    description: |
-      Anything that will give us more context about the issue you are encountering.
-      You can also use this section to propose ideas on how to solve the issue. 
-  validations:
-    required: false
-
- type: dropdown
-  attributes:
-    label: Operating system
-    options:
-      - All
-      - Windows
-      - Linux
-      - MacOS
-      - Android
-      - Other or N/A
-  validations:
-    required: true
-
- type: input
-  attributes:
-    label: Stockfish version
-    description: |
-      This can be found by running the engine.
-      You can also use the commit ID.
-    placeholder: Stockfish 15 / e6e324e
-  validations:
-    required: true
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-  - name: Discord server
-    url: https://discord.gg/GWDRS3kU6R
-    about: Feel free to ask for support or have a chat with us on our Discord server!
-  - name: Discussions, Q&A, ideas, show us something...
-    url: https://github.com/official-stockfish/Stockfish/discussions/new
-    about: Do you have an idea for Stockfish? Do you want to show something that you made? Please open a discussion about it!
--- a/.github/ci/arm_matrix.json
+++ b/.github/ci/arm_matrix.json
@@ -1,51 +0,0 @@
-{
-  "config": [
-    {
-      "name": "Android NDK aarch64",
-      "os": "ubuntu-22.04",
-      "simple_name": "android",
-      "compiler": "aarch64-linux-android29-clang++",
-      "emu": "qemu-aarch64",
-      "comp": "ndk",
-      "shell": "bash",
-      "archive_ext": "tar"
-    },
-    {
-      "name": "Android NDK arm",
-      "os": "ubuntu-22.04",
-      "simple_name": "android",
-      "compiler": "armv7a-linux-androideabi29-clang++",
-      "emu": "qemu-arm",
-      "comp": "ndk",
-      "shell": "bash",
-      "archive_ext": "tar"
-    }
-  ],
-  "binaries": ["armv8-dotprod", "armv8", "armv7", "armv7-neon"],
-  "exclude": [
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "compiler": "armv7a-linux-androideabi29-clang++"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "compiler": "armv7a-linux-androideabi29-clang++"
-      }
-    },
-    {
-      "binaries": "armv7",
-      "config": {
-        "compiler": "aarch64-linux-android29-clang++"
-      }
-    },
-    {
-      "binaries": "armv7-neon",
-      "config": {
-        "compiler": "aarch64-linux-android29-clang++"
-      }
-    }
-  ]
-}
--- a/.github/ci/libcxx17.imp
+++ b/.github/ci/libcxx17.imp
@@ -1,22 +0,0 @@
-[
-    # Mappings for libcxx's internal headers
-    { include: [ "<__fwd/fstream.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/ios.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/istream.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/ostream.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/sstream.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/streambuf.h>", private, "<iosfwd>", public ] },
-    { include: [ "<__fwd/string_view.h>", private, "<string_view>", public ] },
-    { include: [ "<__system_error/errc.h>", private, "<system_error>", public ] },
-
-    # Mappings for includes between public headers
-    { include: [ "<ios>", public, "<iostream>", public ] },
-    { include: [ "<streambuf>", public, "<iostream>", public ] },
-    { include: [ "<istream>", public, "<iostream>", public ] },
-    { include: [ "<ostream>", public, "<iostream>", public ] },
-    { include: [ "<iosfwd>", public, "<iostream>", public ] },
-
-    # Missing mappings in include-what-you-use's libcxx.imp
-    { include: ["@<__condition_variable/.*>", private, "<condition_variable>", public ] },
-    { include: ["@<__mutex/.*>", private, "<mutex>", public ] },
-]
--- a/.github/ci/matrix.json
+++ b/.github/ci/matrix.json
@@ -1,282 +0,0 @@
-{
-  "config": [
-    {
-      "name": "Ubuntu 22.04 GCC",
-      "os": "ubuntu-22.04",
-      "simple_name": "ubuntu",
-      "compiler": "g++",
-      "comp": "gcc",
-      "shell": "bash",
-      "archive_ext": "tar",
-      "sde": "/home/runner/work/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-lin/sde -future --"
-    },
-    {
-      "name": "MacOS 13 Apple Clang",
-      "os": "macos-13",
-      "simple_name": "macos",
-      "compiler": "clang++",
-      "comp": "clang",
-      "shell": "bash",
-      "archive_ext": "tar"
-    },
-    {
-      "name": "MacOS 14 Apple Clang M1",
-      "os": "macos-14",
-      "simple_name": "macos-m1",
-      "compiler": "clang++",
-      "comp": "clang",
-      "shell": "bash",
-      "archive_ext": "tar"
-    },
-    {
-      "name": "Windows 2022 Mingw-w64 GCC x86_64",
-      "os": "windows-2022",
-      "simple_name": "windows",
-      "compiler": "g++",
-      "comp": "mingw",
-      "msys_sys": "mingw64",
-      "msys_env": "x86_64-gcc",
-      "shell": "msys2 {0}",
-      "ext": ".exe",
-      "sde": "/d/a/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-win/sde.exe -future --",
-      "archive_ext": "zip"
-    },
-    {
-      "name": "Windows 11 Mingw-w64 Clang arm64",
-      "os": "windows-11-arm",
-      "simple_name": "windows",
-      "compiler": "clang++",
-      "comp": "clang",
-      "msys_sys": "clangarm64",
-      "msys_env": "clang-aarch64-clang",
-      "shell": "msys2 {0}",
-      "ext": ".exe",
-      "archive_ext": "zip"
-    }
-  ],
-  "binaries": [
-    "x86-64",
-    "x86-64-sse41-popcnt",
-    "x86-64-avx2",
-    "x86-64-bmi2",
-    "x86-64-avxvnni",
-    "x86-64-avx512",
-    "x86-64-vnni256",
-    "x86-64-vnni512",
-    "apple-silicon",
-    "armv8",
-    "armv8-dotprod"
-  ],
-  "exclude": [
-    {
-      "binaries": "x86-64",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-sse41-popcnt",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-avx2",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-bmi2",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-avxvnni",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-avx512",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni256",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni512",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "x86-64-avxvnni",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "x86-64-avx512",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni256",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni512",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "x86-64",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-sse41-popcnt",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-avx2",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-bmi2",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-avxvnni",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-avx512",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni256",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "x86-64-vnni512",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "apple-silicon",
-      "config": {
-        "os": "windows-2022"
-      }
-    },
-    {
-      "binaries": "apple-silicon",
-      "config": {
-        "os": "windows-11-arm"
-      }
-    },
-    {
-      "binaries": "apple-silicon",
-      "config": {
-        "os": "ubuntu-20.04"
-      }
-    },
-    {
-      "binaries": "apple-silicon",
-      "config": {
-        "os": "ubuntu-22.04"
-      }
-    },
-    {
-      "binaries": "apple-silicon",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "os": "windows-2022"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "os": "ubuntu-20.04"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "os": "ubuntu-22.04"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "armv8",
-      "config": {
-        "os": "macos-14"
-      }
-    },
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "os": "windows-2022"
-      }
-    },
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "os": "ubuntu-20.04"
-      }
-    },
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "os": "ubuntu-22.04"
-      }
-    },
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "os": "macos-13"
-      }
-    },
-    {
-      "binaries": "armv8-dotprod",
-      "config": {
-        "os": "macos-14"
-      }
-    }
-  ]
-}
--- a/.github/workflows/arm_compilation.yml
+++ b/.github/workflows/arm_compilation.yml
@@ -1,98 +0,0 @@
-name: Compilation
-on:
-  workflow_call:
-    inputs:
-      matrix:
-        type: string
-        required: true
-jobs:
-  Compilation:
-    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
-    runs-on: ${{ matrix.config.os }}
-    env:
-      COMPCXX: ${{ matrix.config.compiler }}
-      COMP: ${{ matrix.config.comp }}
-      EMU: ${{ matrix.config.emu }}
-      EXT: ${{ matrix.config.ext }}
-      BINARY: ${{ matrix.binaries }}
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJson(inputs.matrix) }}
-    defaults:
-      run:
-        working-directory: src
-        shell: ${{ matrix.config.shell }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          persist-credentials: false
-
-      - name: Download required linux packages
-        if: runner.os == 'Linux'
-        run: |
-          sudo apt update
-          sudo apt install qemu-user
-
-      - name: Install NDK
-        if: runner.os == 'Linux'
-        run: |
-          if [ $COMP == ndk ]; then
-            NDKV="27.2.12479018"
-            ANDROID_ROOT=/usr/local/lib/android
-            ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
-            SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
-            echo "y" | $SDKMANAGER "ndk;$NDKV"
-            ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV
-            ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin
-            echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV
-          fi
-
-      - name: Extract the bench number from the commit history
-        run: |
-          for hash in $(git rev-list -100 HEAD); do
-            benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true
-          done
-          [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found"
-
-      - name: Download the used network from the fishtest framework
-        run: make net
-
-      - name: Check compiler
-        run: |
-          if [ $COMP == ndk ]; then
-            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-          fi
-          $COMPCXX -v
-
-      - name: Test help target
-        run: make help
-
-      - name: Check git
-        run: git --version
-
-      # Compile profile guided builds
-
-      - name: Compile ${{ matrix.binaries }} build
-        run: |
-          if [ $COMP == ndk ]; then
-            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-            export LDFLAGS="-static -Wno-unused-command-line-argument"
-          fi
-          make clean
-          make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH=$EMU
-          make strip ARCH=$BINARY COMP=$COMP
-          WINE_PATH=$EMU ../tests/signature.sh $benchref
-          mv ./stockfish$EXT ../stockfish-android-$BINARY$EXT
-
-      - name: Remove non src files
-        run: git clean -fx
-
-      - name: Upload artifact for (pre)-release
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
-          path: |
-            .
-            !.git
-            !.output
--- a/.github/workflows/clang-format.yml
+++ b/.github/workflows/clang-format.yml
@@ -1,57 +0,0 @@
-# This workflow will run clang-format and comment on the PR.
-# Because of security reasons, it is crucial that this workflow
-# executes no shell script nor runs make.
-# Read this before editing: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/
-
-name: Clang-Format
-on:
-  pull_request_target:
-    branches:
-      - "master"
-    paths:
-      - "**.cpp"
-      - "**.h"
-
-permissions:
-  pull-requests: write
-
-jobs:
-  Clang-Format:
-    name: Clang-Format
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-
-      - name: Run clang-format style check
-        uses: jidicula/clang-format-action@4726374d1aa3c6aecf132e5197e498979588ebc8 # @v4.15.0
-        id: clang-format
-        continue-on-error: true
-        with:
-          clang-format-version: "20"
-          exclude-regex: "incbin"
-
-      - name: Comment on PR
-        if: steps.clang-format.outcome == 'failure'
-        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
-        with:
-          message: |
-            clang-format 20 needs to be run on this PR.
-            If you do not have clang-format installed, the maintainer will run it when merging.
-            For the exact version please see https://packages.ubuntu.com/plucky/clang-format-20.
-
-            _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
-          comment_tag: execution
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Comment on PR
-        if: steps.clang-format.outcome != 'failure'
-        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
-        with:
-          message: |
-            _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
-          create_if_not_exists: false
-          comment_tag: execution
-          mode: delete
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -1,55 +0,0 @@
-name: "CodeQL"
-
-on:
-  push:
-    branches: ["master"]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: ["master"]
-  schedule:
-    - cron: "17 18 * * 1"
-
-jobs:
-  analyze:
-    name: Analyze
-    runs-on: ubuntu-latest
-    permissions:
-      actions: read
-      contents: read
-      security-events: write
-
-    strategy:
-      fail-fast: false
-      matrix:
-        language: ["cpp"]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
-        # Use only 'java' to analyze code written in Java, Kotlin, or both
-        # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
-        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      # Initializes the CodeQL tools for scanning.
-      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v3
-        with:
-          languages: ${{ matrix.language }}
-          # If you wish to specify custom queries, you can do so here or in a config file.
-          # By default, queries listed here will override any specified in a config file.
-          # Prefix the list here with "+" to use these queries and those in the config file.
-
-          # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
-          # queries: security-extended,security-and-quality
-
-      - name: Build
-        working-directory: src
-        run: make -j build
-
-      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v3
-        with:
-          category: "/language:${{matrix.language}}"
--- a/.github/workflows/compilation.yml
+++ b/.github/workflows/compilation.yml
@@ -1,94 +0,0 @@
-name: Compilation
-on:
-  workflow_call:
-    inputs:
-      matrix:
-        type: string
-        required: true
-jobs:
-  Compilation:
-    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
-    runs-on: ${{ matrix.config.os }}
-    env:
-      COMPCXX: ${{ matrix.config.compiler }}
-      COMP: ${{ matrix.config.comp }}
-      EXT: ${{ matrix.config.ext }}
-      NAME: ${{ matrix.config.simple_name }}
-      BINARY: ${{ matrix.binaries }}
-      SDE: ${{ matrix.config.sde }}
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJson(inputs.matrix) }}
-    defaults:
-      run:
-        working-directory: src
-        shell: ${{ matrix.config.shell }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      - name: Install fixed GCC on Linux
-        if: runner.os == 'Linux'
-        uses: egor-tensin/setup-gcc@eaa888eb19115a521fa72b65cd94fe1f25bbcaac # @v1.3
-        with:
-          version: 11
-
-      - name: Setup msys and install required packages
-        if: runner.os == 'Windows'
-        uses: msys2/setup-msys2@v2
-        with:
-          msystem: ${{ matrix.config.msys_sys }}
-          install: mingw-w64-${{ matrix.config.msys_env }} make git zip
-
-      - name: Download SDE package
-        if: runner.os == 'Linux' || runner.os == 'Windows'
-        uses: petarpetrovt/setup-sde@91a1a03434384e064706634125a15f7446d2aafb # @v2.3
-        with:
-          environmentVariableName: SDE_DIR
-          sdeVersion: 9.27.0
-
-      - name: Download the used network from the fishtest framework
-        run: make net
-
-      - name: Check compiler
-        run: $COMPCXX -v
-
-      - name: Test help target
-        run: make help
-
-      - name: Check git
-        run: git --version
-
-      - name: Check compiler
-        run: $COMPCXX -v
-
-      - name: Show compiler cpu info
-        run: |
-          if [[ "$COMPCXX" == clang* ]]; then
-             $COMPCXX -E - -march=native -###
-          else
-            $COMPCXX -Q -march=native --help=target
-          fi
-
-      # x86-64 with newer extensions tests
-
-      - name: Compile ${{ matrix.config.binaries }} build
-        run: |
-          make clean
-          make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH="$SDE"
-          make strip ARCH=$BINARY COMP=$COMP
-          WINE_PATH="$SDE" ../tests/signature.sh $benchref
-          mv ./stockfish$EXT ../stockfish-$NAME-$BINARY$EXT
-
-      - name: Remove non src files
-        run: git clean -fx
-
-      - name: Upload artifact for (pre)-release
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
-          path: |
-             .
-             !.git
-             !.output
--- a/.github/workflows/games.yml
+++ b/.github/workflows/games.yml
@@ -1,43 +0,0 @@
-# This workflow will play games with a debug enabled SF using the PR
-
-name: Games
-on:
-  workflow_call:
-jobs:
-  Matetrack:
-    name: Games
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout SF repo 
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-          path: Stockfish
-          persist-credentials: false
-
-      - name: build debug enabled version of SF
-        working-directory: Stockfish/src
-        run: make -j build debug=yes
-
-      - name: Checkout fastchess repo
-        uses: actions/checkout@v4
-        with:
-          repository: Disservin/fastchess
-          path: fastchess
-          ref: 894616028492ae6114835195f14a899f6fa237d3
-          persist-credentials: false
-
-      - name: fastchess build
-        working-directory: fastchess
-        run: make -j
-
-      - name: Run games
-        working-directory: fastchess
-        run: |
-          ./fastchess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\
-               -engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
-               -engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
-               -ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out
-          cat fast.log
-          ! grep "Assertion" fast.log > /dev/null
-          ! grep "disconnect" fast.out > /dev/null
--- a/.github/workflows/iwyu.yml
+++ b/.github/workflows/iwyu.yml
@@ -1,49 +0,0 @@
-name: IWYU
-on:
-  workflow_call:
-jobs:
-  Analyzers:
-    name: Check includes
-    runs-on: ubuntu-22.04
-    defaults:
-      run:
-        working-directory: Stockfish/src
-        shell: bash
-    steps:
-      - name: Checkout Stockfish
-        uses: actions/checkout@v4
-        with:
-          path: Stockfish
-          persist-credentials: false
-
-      - name: Checkout include-what-you-use
-        uses: actions/checkout@v4
-        with:
-          repository: include-what-you-use/include-what-you-use
-          ref: f25caa280dc3277c4086ec345ad279a2463fea0f
-          path: include-what-you-use
-          persist-credentials: false
-
-      - name: Download required linux packages
-        run: |
-          sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main'
-          wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-          sudo apt update
-          sudo apt install -y libclang-17-dev clang-17 libc++-17-dev
-
-      - name: Set up include-what-you-use
-        run: |
-          mkdir build && cd build
-          cmake -G "Unix Makefiles" -DCMAKE_PREFIX_PATH="/usr/lib/llvm-17" ..
-          sudo make install
-        working-directory: include-what-you-use
-
-      - name: Check include-what-you-use
-        run: include-what-you-use --version
-
-      - name: Check includes
-        run: >
-          make analyze
-          COMP=clang
-          CXX=include-what-you-use
-          CXXFLAGS="-stdlib=libc++ -Xiwyu --comment_style=long -Xiwyu --mapping='${{ github.workspace }}/Stockfish/.github/ci/libcxx17.imp' -Xiwyu --error"
--- a/.github/workflows/matetrack.yml
+++ b/.github/workflows/matetrack.yml
@@ -1,71 +0,0 @@
-# This workflow will run matetrack on the PR
-
-name: Matetrack
-on:
-  workflow_call:
-jobs:
-  Matetrack:
-    name: Matetrack
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout SF repo 
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-          path: Stockfish
-          persist-credentials: false
-
-      - name: build SF
-        working-directory: Stockfish/src
-        run: make -j profile-build
-
-      - name: Checkout matetrack repo
-        uses: actions/checkout@v4
-        with:
-          repository: vondele/matetrack
-          path: matetrack
-          ref: 4f8a80860ed8f3607f05a9195df8b40203bdc360
-          persist-credentials: false
-
-      - name: matetrack install deps
-        working-directory: matetrack
-        run: pip install -r requirements.txt
-
-      - name: cache syzygy
-        id: cache-syzygy
-        uses: actions/cache@v4
-        with:
-           path: |
-              matetrack/3-4-5-wdl/
-              matetrack/3-4-5-dtz/
-           key: key-syzygy
-
-      - name: download syzygy 3-4-5 if needed
-        working-directory: matetrack
-        if: steps.cache-syzygy.outputs.cache-hit != 'true'
-        run: |
-          wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/
-          wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/
-
-      - name: Run matetrack
-        working-directory: matetrack
-        run: |
-          python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out
-          ! grep "issues were detected" matecheckout.out > /dev/null
-
-      - name: Run matetrack with --syzygy50MoveRule false
-        working-directory: matetrack
-        run: |
-          grep 5men cursed.epd > cursed5.epd
-          python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile cursed5.epd --nodes 100000 --syzygy50MoveRule false | tee matecheckcursed.out
-          ! grep "issues were detected" matecheckcursed.out > /dev/null
-
-      - name: Verify mate and TB win count for matecheckcursed.out
-        working-directory: matetrack
-        run: |
-          mates=$(grep "Found mates:" matecheckcursed.out | awk '{print $3}')
-          tbwins=$(grep "Found TB wins:" matecheckcursed.out | awk '{print $4}')
-          if [ $(($mates + $tbwins)) -ne 32 ]; then
-            echo "Sum of mates and TB wins is not 32 in matecheckcursed.out" >&2
-            exit 1
-          fi
--- a/.github/workflows/sanitizers.yml
+++ b/.github/workflows/sanitizers.yml
@@ -1,87 +0,0 @@
-name: Sanitizers
-on:
-  workflow_call:
-jobs:
-  Test-under-sanitizers:
-    name: ${{ matrix.sanitizers.name }}
-    runs-on: ${{ matrix.config.os }}
-    env:
-      COMPCXX: ${{ matrix.config.compiler }}
-      COMP: ${{ matrix.config.comp }}
-      CXXFLAGS: "-Werror"
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - name: Ubuntu 22.04 GCC
-            os: ubuntu-22.04
-            compiler: g++
-            comp: gcc
-            shell: bash
-        sanitizers:
-          - name: Run with thread sanitizer
-            make_option: sanitize=thread
-            cxx_extra_flags: ""
-            instrumented_option: sanitizer-thread
-          - name: Run with UB sanitizer
-            make_option: sanitize=undefined
-            cxx_extra_flags: ""
-            instrumented_option: sanitizer-undefined
-          - name: Run under valgrind
-            make_option: ""
-            cxx_extra_flags: ""
-            instrumented_option: valgrind
-          - name: Run under valgrind-thread
-            make_option: ""
-            cxx_extra_flags: ""
-            instrumented_option: valgrind-thread
-          - name: Run non-instrumented
-            make_option: ""
-            cxx_extra_flags: ""
-            instrumented_option: none
-          - name: Run with glibcxx assertions
-            make_option: ""
-            cxx_extra_flags: -D_GLIBCXX_ASSERTIONS
-            instrumented_option: non
-    defaults:
-      run:
-        working-directory: src
-        shell: ${{ matrix.config.shell }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      - name: Download required linux packages
-        run: |
-          sudo apt update
-          sudo apt install expect valgrind g++-multilib
-
-      - name: Download the used network from the fishtest framework
-        run: make net
-
-      - name: Check compiler
-        run: $COMPCXX -v
-
-      - name: Test help target
-        run: make help
-
-      - name: Check git
-        run: git --version
-
-      # Since Linux Kernel 6.5 we are getting false positives from the ci,
-      # lower the ALSR entropy to disable ALSR, which works as a temporary workaround.
-      # https://github.com/google/sanitizers/issues/1716
-      # https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762
-
-      - name: Lower ALSR entropy
-        run: sudo sysctl -w vm.mmap_rnd_bits=28
-
-      # Sanitizers
-
-      - name: ${{ matrix.sanitizers.name }}
-        run: |
-          export CXXFLAGS="-O1 -fno-inline ${{ matrix.sanitizers.cxx_extra_flags }}"
-          make clean
-          make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null
-          python3 ../tests/instrumented.py --${{ matrix.sanitizers.instrumented_option }} ./stockfish
--- a/.github/workflows/stockfish.yml
+++ b/.github/workflows/stockfish.yml
@@ -1,122 +0,0 @@
-name: Stockfish
-on:
-  push:
-    tags:
-      - "*"
-    branches:
-      - master
-      - tools
-      - github_ci
-  pull_request:
-    branches:
-      - master
-      - tools
-jobs:
-  Prerelease:
-    if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'))
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write # For deleting/creating a prerelease
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      # returns null if no pre-release exists
-      - name: Get Commit SHA of Latest Pre-release
-        run: |
-          # Install required packages
-          sudo apt-get update
-          sudo apt-get install -y curl jq
-
-          echo "COMMIT_SHA_TAG=$(jq -r 'map(select(.prerelease)) | first | .tag_name' <<< $(curl -s https://api.github.com/repos/${{ github.repository_owner }}/Stockfish/releases))" >> $GITHUB_ENV
-
-      # delete old previous pre-release and tag
-      - run: gh release delete ${{ env.COMMIT_SHA_TAG }} --cleanup-tag
-        if: env.COMMIT_SHA_TAG != 'null'
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      # Make sure that an old ci that still runs on master doesn't recreate a prerelease
-      - name: Check Pullable Commits
-        id: check_commits
-        run: |
-          git fetch
-          CHANGES=$(git rev-list HEAD..origin/master --count)
-          echo "CHANGES=$CHANGES" >> $GITHUB_ENV
-
-      - name: Get last commit SHA
-        id: last_commit
-        run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV
-
-      - name: Get commit date
-        id: commit_date
-        run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV
-
-      # Create a new pre-release, the other upload_binaries.yml will upload the binaries
-      # to this pre-release.
-      - name: Create Prerelease
-        if: github.ref_name == 'master' && env.CHANGES == '0'
-        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
-        with:
-          name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
-          tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
-          prerelease: true
-
-  Matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-      - id: set-matrix
-        run: |
-          TASKS=$(echo $(cat .github/ci/matrix.json) )
-          echo "MATRIX=$TASKS" >> $GITHUB_OUTPUT
-      - id: set-arm-matrix
-        run: |
-          TASKS_ARM=$(echo $(cat .github/ci/arm_matrix.json) )
-          echo "ARM_MATRIX=$TASKS_ARM" >> $GITHUB_OUTPUT
-  Compilation:
-    needs: [Matrix]
-    uses: ./.github/workflows/compilation.yml
-    with:
-      matrix: ${{ needs.Matrix.outputs.matrix }}
-  ARMCompilation:
-    needs: [Matrix]
-    uses: ./.github/workflows/arm_compilation.yml
-    with:
-      matrix: ${{ needs.Matrix.outputs.arm_matrix }}
-  IWYU:
-    uses: ./.github/workflows/iwyu.yml
-  Sanitizers:
-    uses: ./.github/workflows/sanitizers.yml
-  Tests:
-    uses: ./.github/workflows/tests.yml
-  Matetrack:
-    uses: ./.github/workflows/matetrack.yml
-  Games:
-    uses: ./.github/workflows/games.yml
-  Binaries:
-    if: github.repository == 'official-stockfish/Stockfish'
-    needs: [Matrix, Prerelease, Compilation]
-    uses: ./.github/workflows/upload_binaries.yml
-    with:
-      matrix: ${{ needs.Matrix.outputs.matrix }}
-    permissions:
-      contents: write # For deleting/creating a (pre)release
-    secrets:
-      token: ${{ secrets.GITHUB_TOKEN }}
-  ARM_Binaries:
-    if: github.repository == 'official-stockfish/Stockfish'
-    needs: [Matrix, Prerelease, ARMCompilation]
-    uses: ./.github/workflows/upload_binaries.yml
-    with:
-      matrix: ${{ needs.Matrix.outputs.arm_matrix }}
-    permissions:
-      contents: write # For deleting/creating a (pre)release
-    secrets:
-      token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,379 +0,0 @@
-name: Tests
-on:
-  workflow_call:
-jobs:
-  Test-Targets:
-    name: ${{ matrix.config.name }}
-    runs-on: ${{ matrix.config.os }}
-    env:
-      COMPCXX: ${{ matrix.config.compiler }}
-      COMP: ${{ matrix.config.comp }}
-      CXXFLAGS: "-Werror"
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - name: Ubuntu 22.04 GCC
-            os: ubuntu-22.04
-            compiler: g++
-            comp: gcc
-            run_32bit_tests: true
-            run_64bit_tests: true
-            shell: bash
-          - name: Ubuntu 22.04 Clang
-            os: ubuntu-22.04
-            compiler: clang++
-            comp: clang
-            run_32bit_tests: true
-            run_64bit_tests: true
-            shell: bash
-          - name: Android NDK aarch64
-            os: ubuntu-22.04
-            compiler: aarch64-linux-android29-clang++
-            comp: ndk
-            run_armv8_tests: true
-            shell: bash
-          - name: Android NDK arm
-            os: ubuntu-22.04
-            compiler: armv7a-linux-androideabi29-clang++
-            comp: ndk
-            run_armv7_tests: true
-            shell: bash
-          # Currently segfaults in the CI unrelated to a Stockfish change.
-          # - name: Linux GCC riscv64
-          #   os: ubuntu-22.04
-          #   compiler: g++
-          #   comp: gcc
-          #   run_riscv64_tests: true
-          #   base_image: "riscv64/alpine:edge"
-          #   platform: linux/riscv64
-          #   shell: bash
-          - name: Linux GCC ppc64
-            os: ubuntu-22.04
-            compiler: g++
-            comp: gcc
-            run_ppc64_tests: true
-            base_image: "ppc64le/alpine:latest"
-            platform: linux/ppc64le
-            shell: bash
-          - name: MacOS 13 Apple Clang
-            os: macos-13
-            compiler: clang++
-            comp: clang
-            run_64bit_tests: true
-            shell: bash
-          - name: MacOS 14 Apple Clang M1
-            os: macos-14
-            compiler: clang++
-            comp: clang
-            run_64bit_tests: false
-            run_m1_tests: true
-            shell: bash
-          - name: MacOS 13 GCC 11
-            os: macos-13
-            compiler: g++-11
-            comp: gcc
-            run_64bit_tests: true
-            shell: bash
-          - name: Windows 2022 Mingw-w64 GCC x86_64
-            os: windows-2022
-            compiler: g++
-            comp: mingw
-            run_64bit_tests: true
-            msys_sys: mingw64
-            msys_env: x86_64-gcc
-            shell: msys2 {0}
-          - name: Windows 2022 Mingw-w64 GCC i686
-            os: windows-2022
-            compiler: g++
-            comp: mingw
-            run_32bit_tests: true
-            msys_sys: mingw32
-            msys_env: i686-gcc
-            shell: msys2 {0}
-          - name: Windows 2022 Mingw-w64 Clang x86_64
-            os: windows-2022
-            compiler: clang++
-            comp: clang
-            run_64bit_tests: true
-            msys_sys: clang64
-            msys_env: clang-x86_64-clang
-            shell: msys2 {0}
-          - name: Windows 11 Mingw-w64 Clang arm64
-            os: windows-11-arm
-            compiler: clang++
-            comp: clang
-            run_armv8_tests: true
-            msys_sys: clangarm64
-            msys_env: clang-aarch64-clang
-            shell: msys2 {0}
-    defaults:
-      run:
-        working-directory: src
-        shell: ${{ matrix.config.shell }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          persist-credentials: false
-
-      - name: Download required linux packages
-        if: runner.os == 'Linux'
-        run: |
-          sudo apt update
-          sudo apt install expect valgrind g++-multilib qemu-user-static
-
-      - name: Install NDK
-        if: runner.os == 'Linux'
-        run: |
-          if [ $COMP == ndk ]; then
-            NDKV="27.2.12479018"
-            ANDROID_ROOT=/usr/local/lib/android
-            ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
-            SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
-            echo "y" | $SDKMANAGER "ndk;$NDKV"
-            ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV
-            ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin
-            echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV
-          fi
-
-      - name: Set up QEMU
-        if: matrix.config.base_image
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        if: matrix.config.base_image
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build Docker container
-        if: matrix.config.base_image
-        run: |
-          docker buildx build --platform ${{ matrix.config.platform }} --load -t sf_builder - << EOF
-          FROM ${{ matrix.config.base_image }}
-          WORKDIR /app
-          RUN apk update && apk add make g++
-          CMD ["sh", "src/script.sh"]
-          EOF
-
-      - name: Download required macOS packages
-        if: runner.os == 'macOS'
-        run: brew install coreutils gcc@11
-
-      - name: Setup msys and install required packages
-        if: runner.os == 'Windows'
-        uses: msys2/setup-msys2@v2
-        with:
-          msystem: ${{ matrix.config.msys_sys }}
-          install: mingw-w64-${{ matrix.config.msys_env }} make git expect
-
-      - name: Download the used network from the fishtest framework
-        run: make net
-
-      - name: Extract the bench number from the commit history
-        run: |
-          for hash in $(git rev-list -100 HEAD); do
-            benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true
-          done
-          [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found"
-
-      - name: Check compiler
-        run: |
-          if [ -z "${{ matrix.config.base_image }}" ]; then
-            if [ $COMP == ndk ]; then
-              export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-            fi
-            $COMPCXX -v
-          else
-            echo "$COMPCXX -v" > script.sh
-            docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
-          fi
-
-      - name: Test help target
-        run: make help
-
-      - name: Check git
-        run: git --version
-
-      # x86-32 tests
-
-      - name: Test debug x86-32 build
-        if: matrix.config.run_32bit_tests
-        run: |
-          export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
-          make clean
-          make -j4 ARCH=x86-32 optimize=no debug=yes build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-32 build
-        if: matrix.config.run_32bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-32 build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-32-sse41-popcnt build
-        if: matrix.config.run_32bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-32-sse41-popcnt build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-32-sse2 build
-        if: matrix.config.run_32bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-32-sse2 build
-          ../tests/signature.sh $benchref
-
-      - name: Test general-32 build
-        if: matrix.config.run_32bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=general-32 build
-          ../tests/signature.sh $benchref
-
-      # x86-64 tests
-
-      - name: Test debug x86-64-avx2 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
-          make clean
-          make -j4 ARCH=x86-64-avx2 optimize=no debug=yes build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64-bmi2 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-bmi2 build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64-avx2 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-avx2 build
-          ../tests/signature.sh $benchref
-
-      # Test a deprecated arch
-      - name: Test x86-64-modern build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-modern build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64-sse41-popcnt build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-sse41-popcnt build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64-ssse3 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-ssse3 build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64-sse3-popcnt build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-sse3-popcnt build
-          ../tests/signature.sh $benchref
-
-      - name: Test x86-64 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64 build
-          ../tests/signature.sh $benchref
-
-      - name: Test general-64 build
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=general-64 build
-          ../tests/signature.sh $benchref
-
-      - name: Test apple-silicon build
-        if: matrix.config.run_m1_tests
-        run: |
-          make clean
-          make -j4 ARCH=apple-silicon build
-          ../tests/signature.sh $benchref
-
-      # armv8 tests
-
-      - name: Test armv8 build
-        if: matrix.config.run_armv8_tests
-        run: |
-          if [ $COMP == ndk ]; then
-            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-            export LDFLAGS="-static -Wno-unused-command-line-argument"
-          fi
-          make clean
-          make -j4 ARCH=armv8 build
-          ../tests/signature.sh $benchref
-
-      - name: Test armv8-dotprod build
-        if: matrix.config.run_armv8_tests
-        run: |
-          if [ $COMP == ndk ]; then
-            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-            export LDFLAGS="-static -Wno-unused-command-line-argument"
-          fi
-          make clean
-          make -j4 ARCH=armv8-dotprod build
-          ../tests/signature.sh $benchref
-
-      # armv7 tests
-
-      - name: Test armv7 build
-        if: matrix.config.run_armv7_tests
-        run: |
-          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-          export LDFLAGS="-static -Wno-unused-command-line-argument"
-          make clean
-          make -j4 ARCH=armv7 build
-          ../tests/signature.sh $benchref
-
-      - name: Test armv7-neon build
-        if: matrix.config.run_armv7_tests
-        run: |
-          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
-          export LDFLAGS="-static -Wno-unused-command-line-argument"
-          make clean
-          make -j4 ARCH=armv7-neon build
-          ../tests/signature.sh $benchref
-
-      # riscv64 tests
-
-      - name: Test riscv64 build
-        if: matrix.config.run_riscv64_tests
-        run: |
-          echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh
-          docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
-          ../tests/signature.sh $benchref
-
-      # ppc64 tests
-
-      - name: Test ppc64 build
-        if: matrix.config.run_ppc64_tests
-        run: |
-          echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh
-          docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
-          ../tests/signature.sh $benchref
-
-      # Other tests
-
-      - name: Check perft and search reproducibility
-        if: matrix.config.run_64bit_tests
-        run: |
-          make clean
-          make -j4 ARCH=x86-64-avx2 build
-          ../tests/perft.sh
-          ../tests/reprosearch.sh
--- a/.github/workflows/upload_binaries.yml
+++ b/.github/workflows/upload_binaries.yml
@@ -1,114 +0,0 @@
-name: Upload Binaries
-on:
-  workflow_call:
-    inputs:
-      matrix:
-        type: string
-        required: true
-    secrets:
-      token:
-        required: true
-
-jobs:
-  Artifacts:
-    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
-    runs-on: ${{ matrix.config.os }}
-    env:
-      COMPCXX: ${{ matrix.config.compiler }}
-      COMP: ${{ matrix.config.comp }}
-      EXT: ${{ matrix.config.ext }}
-      NAME: ${{ matrix.config.simple_name }}
-      BINARY: ${{ matrix.binaries }}
-      SDE: ${{ matrix.config.sde }}
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJson(inputs.matrix) }}
-    defaults:
-      run:
-        shell: ${{ matrix.config.shell }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      - name: Download artifact from compilation
-        uses: actions/download-artifact@v4
-        with:
-          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
-          path: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
-
-      - name: Setup msys and install required packages
-        if: runner.os == 'Windows'
-        uses: msys2/setup-msys2@v2
-        with:
-          msystem: ${{ matrix.config.msys_sys }}
-          install: mingw-w64-${{ matrix.config.msys_env }} make git zip
-
-      - name: Create Package
-        run: |
-          mkdir stockfish
-
-      - name: Download wiki
-        run: |
-          git clone https://github.com/official-stockfish/Stockfish.wiki.git wiki
-          rm -rf wiki/.git
-          mv wiki stockfish/
-
-      - name: Copy files
-        run: |
-          mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow
-          cd stockfish-workflow
-          cp -r src ../stockfish/
-          cp -r scripts ../stockfish/
-          cp stockfish-$NAME-$BINARY$EXT ../stockfish/
-          cp "Top CPU Contributors.txt" ../stockfish/
-          cp Copying.txt ../stockfish/
-          cp AUTHORS ../stockfish/
-          cp CITATION.cff ../stockfish/
-          cp README.md ../stockfish/
-          cp CONTRIBUTING.md ../stockfish/
-
-      - name: Create tar
-        if: runner.os != 'Windows'
-        run: |
-          chmod +x ./stockfish/stockfish-$NAME-$BINARY$EXT
-          tar -cvf stockfish-$NAME-$BINARY.tar stockfish
-
-      - name: Create zip
-        if: runner.os == 'Windows'
-        run: |
-          zip -r stockfish-$NAME-$BINARY.zip stockfish
-
-      - name: Release
-        if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'
-        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
-        with:
-          files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
-          token: ${{ secrets.token }}
-
-      - name: Get last commit sha
-        id: last_commit
-        run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV
-
-      - name: Get commit date
-        id: commit_date
-        run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV
-
-      # Make sure that an old ci that still runs on master doesn't recreate a prerelease
-      - name: Check Pullable Commits
-        id: check_commits
-        run: |
-          git fetch
-          CHANGES=$(git rev-list HEAD..origin/master --count)
-          echo "CHANGES=$CHANGES" >> $GITHUB_ENV
-
-      - name: Prerelease
-        if: github.ref_name == 'master' && env.CHANGES == '0'
-        continue-on-error: true
-        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
-        with:
-          name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
-          tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
-          prerelease: true
-          files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
-          token: ${{ secrets.token }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,17 +0,0 @@
-# Files from build
-**/*.o
-**/*.s
-src/.depend
-
-# Built binary
-src/stockfish*
-src/-lstdc++.res
-
-# Neural network for the NNUE evaluation
-**/*.nnue
-
-# Files generated by the instrumented tests
-tsan.supp
-__pycache__/
-tests/syzygy
-tests/bench_tmp.epd
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,80 @@
+language: cpp
+dist: bionic
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+      addons:
+        apt:
+          packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl']
+      env:
+        - COMPILER=g++-8
+        - COMP=gcc
+
+    - os: linux
+      compiler: clang
+      addons:
+        apt:
+          packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
+      env:
+        - COMPILER=clang++-10
+        - COMP=clang
+
+    - os: osx
+      osx_image: xcode12
+      compiler: gcc
+      env:
+        - COMPILER=g++
+        - COMP=gcc
+
+    - os: osx
+      osx_image: xcode12
+      compiler: clang
+      env:
+        - COMPILER=clang++
+        - COMP=clang
+
+branches:
+  only:
+   - master
+
+before_script:
+  - cd src
+
+script:
+  # Obtain bench reference from git log
+  - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
+  - export benchref=$(cat git_sig)
+  - echo "Reference bench:" $benchref
+
+  #
+  # Compiler version string
+  - $COMPILER -v
+
+  #
+  # Verify bench number against various builds
+  - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
+  - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
+
+  #
+  # Check perft and reproducible search
+  - export CXXFLAGS="-Werror"
+  - make clean && make -j2 ARCH=x86-64 build
+  - ../tests/perft.sh
+  - ../tests/reprosearch.sh
+
+  #
+  # Valgrind
+  #
+  - export CXXFLAGS="-O1 -fno-inline"
+  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
+  - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
+
+  #
+  # Sanitizer
+  #
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
--- a/129
+++ b/129
@@ -1,146 +1,99 @@
-# Founders of the Stockfish project and Fishtest infrastructure
+# List of authors for Stockfish, as of August 4, 2020
+
+# Founders of the Stockfish project and fishtest infrastructure
 Tord Romstad (romstad)
 Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)

-# Authors and inventors of NNUE, training, and NNUE port
+# Authors and inventors of NNUE, training, NNUE port
 Yu Nasu (ynasu87)
 Motohiro Isozaki (yaneurao)
 Hisayori Noda (nodchip)

-# All other authors of Stockfish code (in alphabetical order)
+# all other authors of the code in alphabetical order
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
-Ahmed Kerimov (wcdbmv)
 Ajith Chandy Jose (ajithcj)
 Alain Savard (Rocky640)
 Alayan Feh (Alayan-stk-2)
 Alexander Kure
 Alexander Pagel (Lolligerhans)
-Alfredo Menezes (lonfom169)
 Ali AlZhrani (Cooffe)
-AliceRoselia
-Andreas Jan van der Meulen (Andyson007)
-Andreas Matthies (Matthies)
-Andrei Vetrov (proukornew)
 Andrew Grant (AndyGrant)
 Andrey Neporada (nepal)
 Andy Duplain
-Antoine Champion (antoinechampion)
 Aram Tumanian (atumanian)
 Arjun Temurnikar
-Artem Solopiy (EntityFX)
 Auguste Pop
-Balazs Szilagyi
 Balint Pfliegel
-Baptiste Rech (breatn)
-Ben Chaney (Chaneybenjamini)
 Ben Koshy (BKSpurgeon)
 Bill Henry (VoyagerOne)
 Bojun Guo (noobpwnftw, Nooby)
-borg323
-Boštjan Mejak (PedanticHacker)
 braich
 Brian Sheppard (SapphireBrand, briansheppard-toast)
-Bruno de Melo Costa (BM123499)
-Bruno Pellanda (pellanda)
 Bryan Cross (crossbr)
 candirufish
-Carlos Esparza Sánchez (ces42)
 Chess13234
-Chris Bao (sscg13)
 Chris Cain (ceebo)
-Ciekce
-clefrks
-Clemens L. (rn5f107s2)
-Cody Ho (aesrentai)
-Dale Weiler (graphitemaster)
+Dan Schmidt (dfannius)
 Daniel Axtens (daxtens)
 Daniel Dugovic (ddugovic)
-Daniel Monroe (Ergodice)
-Daniel Samek (DanSamek)
-Dan Schmidt (dfannius)
-Dariusz Orzechowski (dorzechowski)
-David (dav1312)
+Dariusz Orzechowski
 David Zar
 Daylen Yang (daylen)
-Deshawn Mohan-Smith (GoldenRare)
-Dieter Dobbelaere (ddobbelaere)
 DiscanX
 Dominik Schlösser (domschl)
 double-beep
-Douglas Matos Gomes (dsmsgms)
-Dubslow
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
-Ehsan Rashid (erashid)
 Elvin Liu (solarlight2)
 erbsenzaehler
 Ernesto Gatti
-evqsx
+Linmiao Xu (linrock)
 Fabian Beuke (madnight)
 Fabian Fichter (ianfab)
-Fanael Linithien (Fanael)
 fanon
-Fauzi Akram Dabat (fauzi2)
+Fauzi Akram Dabat (FauziAkram)
 Felix Wittmann
 gamander
-Gabriele Lombardo (gabe)
-Gahtan Nahdi
 Gary Heckman (gheckman)
-George Sobala (gsobala)
 gguliash
-Giacomo Lorenzetti (G-Lorenz)
 Gian-Carlo Pascutto (gcp)
-Goh CJ (cj5716)
 Gontran Lemaire (gonlem)
 Goodkov Vasiliy Aleksandrovich (goodkov)
 Gregor Cramer
 GuardianRM
-Guy Vreuls (gvreuls)
 Günther Demetz (pb00067, pb00068)
+Guy Vreuls (gvreuls)
 Henri Wiechers
 Hiraoka Takuya (HiraokaTakuya)
 homoSapiensSapiens
 Hongzhi Cheng
 Ivan Ivec (IIvec)
 Jacques B. (Timshel)
-Jake Senne (w1wwwwww)
 Jan Ondruš (hxim)
-Jared Kish (Kurtbusch, kurt22i)
+Jared Kish (Kurtbusch)
 Jarrod Torriero (DU-jdto)
-Jasper Shovelton (Beanie496)
-Jean-Francois Romang (jromang)
 Jean Gauthier (OuaisBla)
+Jean-Francois Romang (jromang)
 Jekaa
 Jerry Donald Watson (jerrydonaldwatson)
-jjoshua2
-Jonathan Buladas Dumale (SFisGOD)
 Jonathan Calovski (Mysseno)
-Jonathan McDermid (jonathanmcdermid)
+Jonathan Dumale (SFisGOD)
 Joost VandeVondele (vondele)
+Jörg Oster (joergoster)
 Joseph Ellis (jhellis3)
 Joseph R. Prostko
-Jörg Oster (joergoster)
-Julian Willemer (NightlyKing)
 jundery
 Justin Blanchard (UncombedCoconut)
 Kelly Wilson
 Ken Takusagawa
-Kenneth Lee (kennethlee33)
-Kian E (KJE-98)
 kinderchocolate
 Kiran Panditrao (Krgp)
-Kirill Zaripov (kokodio)
 Kojirion
-Krisztián Peőcz
-Krystian Kuzniarek (kuzkry)
 Leonardo Ljubičić (ICCF World Champion)
 Leonid Pechenik (lp--)
-Li Ying (yl25946)
-Liam Keegan (lkeegan)
-Linmiao Xu (linrock)
 Linus Arver (listx)
 loco-loco
 Lub van den Berg (ElbertoOne)
@@ -149,16 +102,10 @@ Lucas Braesch (lucasart)
 Lyudmil Antonov (lantonov)
 Maciej Żenczykowski (zenczykowski)
 Malcolm Campbell (xoto10)
-Mark Marosi (Mapika)
 Mark Tenzer (31m059)
 marotear
-Mathias Parnaudeau (mparnaudeau)
-Matt Ginsberg (mattginsberg)
 Matthew Lai (matthewlai)
 Matthew Sullivan (Matt14916)
-Max A. (Disservin)
-Maxim Masiutin (maximmasiutin)
-Maxim Molchanov (Maxim)
 Michael An (man)
 Michael Byrne (MichaelB7)
 Michael Chaly (Vizvezdenec)
@@ -167,94 +114,66 @@ Michael Whiteley (protonspring)
 Michel Van den Bergh (vdbergh)
 Miguel Lahoz (miguel-l)
 Mikael Bäckman (mbootsector)
-Mike Babigian (Farseer)
 Mira
 Miroslav Fontán (Hexik)
 Moez Jellouli (MJZ1977)
 Mohammed Li (tthsqe12)
-Muzhen J (XInTheDark)
 Nathan Rugg (nmrugg)
-Nguyen Pham (nguyenpham)
-Nicklas Persson (NicklasPersson)
 Nick Pelling (nickpelling)
+Nicklas Persson (NicklasPersson)
 Niklas Fiekas (niklasf)
 Nikolay Kostov (NikolayIT)
+Nguyen Pham (nguyenpham)
 Norman Schmidt (FireFather)
-notruck
-Nour Berakdar (Nonlinear)
-Ofek Shochat (OfekShochat, ghostway)
 Ondrej Mosnáček (WOnder93)
-Ondřej Mišina (AndrovT)
 Oskar Werkelin Ahlin
-Ömer Faruk Tutkun (OmerFarukTutkun)
 Pablo Vazquez
 Panthee
 Pascal Romaret
 Pasquale Pigazzini (ppigazzini)
 Patrick Jansen (mibere)
-Peter Schneider (pschneider1968)
+pellanda
 Peter Zsifkovits (CoffeeOne)
-PikaCat
 Praveen Kumar Tummala (praveentml)
-Prokop Randáček (ProkopRandacek)
 Rahul Dsilva (silversolver1)
 Ralph Stößer (Ralph Stoesser)
 Raminder Singh
 renouve
-Reuven Peleg (R-Peleg)
-Richard Lloyd (Richard-Lloyd)
-Robert Nürnberg (robertnurnberg)
+Reuven Peleg
+Richard Lloyd
 Rodrigo Exterckötter Tjäder
-Rodrigo Roim (roim)
-Ronald de Man (syzygy1, syzygy)
 Ron Britvich (Britvich)
+Ronald de Man (syzygy1, syzygy)
 rqs
-Rui Coelho (ruicoelhopedro)
 Ryan Schmitt
 Ryan Takker
 Sami Kiminki (skiminki)
 Sebastian Buchwald (UniQP)
 Sergei Antonov (saproj)
 Sergei Ivanov (svivanov72)
-Sergio Vieri (sergiovieri)
 sf-x
-Shahin M. Shahin (peregrine)
 Shane Booth (shane31)
 Shawn Varghese (xXH4CKST3RXx)
-Shawn Xu (xu-shawn)
-Siad Daboul (Topologist)
 Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
-Stefano Di Martino (StefanoD)
 Steinar Gunderson (sesse)
 Stéphane Nicolet (snicolet)
-Stephen Touset (stouset)
-Syine Mineta (MinetaS)
-Taras Vuk (TarasVuk)
 Thanar2
 thaspel
 theo77186
-TierynnB
-Ting-Hsuan Huang (fffelix-huang)
-Tobias Steinmann
-Tomasz Sobczyk (Sopel97)
 Tom Truscott
 Tom Vijlbrief (tomtor)
+Tomasz Sobczyk (Sopel97)
 Torsten Franz (torfranz, tfranzer)
-Torsten Hellwig (Torom)
 Tracey Emery (basepr1me)
 tttak
 Unai Corzo (unaiic)
 Uri Blass (uriblass)
 Vince Negri (cuddlestmonkey)
-Viren
-Wencey Wang
-windfishballad
-xefoci7612
-Xiang Wang (KatyushaScarlet)
 zz4032

+
 # Additionally, we acknowledge the authors and maintainers of fishtest,
-# an amazing and essential framework for Stockfish development!
+# an amazing and essential framework for the development of Stockfish!
 #
-# https://github.com/official-stockfish/fishtest/blob/master/AUTHORS
+# https://github.com/glinscott/fishtest/blob/master/AUTHORS
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,23 +0,0 @@
-# This CITATION.cff file was generated with cffinit.
-# Visit https://bit.ly/cffinit to generate yours today!
-
-cff-version: 1.2.0
-title: Stockfish
-message: >-
-  Please cite this software using the metadata from this
-  file.
-type: software
-authors:
-  - name: The Stockfish developers (see AUTHORS file)
-repository-code: 'https://github.com/official-stockfish/Stockfish'
-url: 'https://stockfishchess.org/'
-repository-artifact: 'https://stockfishchess.org/download/'
-abstract: Stockfish is a free and strong UCI chess engine.
-keywords:
-  - chess
-  - artificial intelligence (AI)
-  - tree search
-  - alpha-beta search
-  - neural networks (NN)
-  - efficiently updatable neural networks (NNUE)
-license: GPL-3.0
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,96 +0,0 @@
-# Contributing to Stockfish
-
-Welcome to the Stockfish project! We are excited that you are interested in
-contributing. This document outlines the guidelines and steps to follow when
-making contributions to Stockfish.
-
-## Table of Contents
-
- [Building Stockfish](#building-stockfish)
- [Making Contributions](#making-contributions)
-  - [Reporting Issues](#reporting-issues)
-  - [Submitting Pull Requests](#submitting-pull-requests)
- [Code Style](#code-style)
- [Community and Communication](#community-and-communication)
- [License](#license)
-
-## Building Stockfish
-
-In case you do not have a C++ compiler installed, you can follow the
-instructions from our wiki.
-
- [Ubuntu][ubuntu-compiling-link]
- [Windows][windows-compiling-link]
- [macOS][macos-compiling-link]
-
-## Making Contributions
-
-### Reporting Issues
-
-If you find a bug, please open an issue on the
-[issue tracker][issue-tracker-link]. Be sure to include relevant information
-like your operating system, build environment, and a detailed description of the
-problem.
-
-_Please note that Stockfish's development is not focused on adding new features.
-Thus any issue regarding missing features will potentially be closed without
-further discussion._
-
-### Submitting Pull Requests
-
- Functional changes need to be tested on fishtest. See
-  [Creating my First Test][creating-my-first-test] for more details.
-  The accompanying pull request should include a link to the test results and
-  the new bench.
-
- Non-functional changes (e.g. refactoring, code style, documentation) do not
-  need to be tested on fishtest, unless they might impact performance.
-
- Provide a clear and concise description of the changes in the pull request
-  description.
-
-_First time contributors should add their name to [AUTHORS](./AUTHORS)._
-
-_Stockfish's development is not focused on adding new features. Thus any pull
-request introducing new features will potentially be closed without further
-discussion._
-
-## Code Style
-
-Changes to Stockfish C++ code should respect our coding style defined by
-[.clang-format](.clang-format). You can format your changes by running
-`make format`. This requires clang-format version 20 to be installed on your system.
-
-## Navigate
-
-For experienced Git users who frequently use git blame, it is recommended to
-configure the blame.ignoreRevsFile setting.
-This setting is useful for excluding noisy formatting commits.
-
-```bash
-git config blame.ignoreRevsFile .git-blame-ignore-revs
-```
-
-## Community and Communication
-
- Join the [Stockfish discord][discord-link] to discuss ideas, issues, and
-  development.
- Participate in the [Stockfish GitHub discussions][discussions-link] for
-  broader conversations.
-
-## License
-
-By contributing to Stockfish, you agree that your contributions will be licensed
-under the GNU General Public License v3.0. See [Copying.txt][copying-link] for
-more details.
-
-Thank you for contributing to Stockfish and helping us make it even better!
-
-[copying-link]:           https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt
-[discord-link]:           https://discord.gg/GWDRS3kU6R
-[discussions-link]:       https://github.com/official-stockfish/Stockfish/discussions/new
-[creating-my-first-test]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test#create-your-test
-[issue-tracker-link]:     https://github.com/official-stockfish/Stockfish/issues
-[ubuntu-compiling-link]:  https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-1
-[windows-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler
-[macos-compiling-link]:   https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-2
--- a/Copying.txt
+++ b/Copying.txt
--- a/README.md
+++ b/README.md
@@ -1,161 +1,289 @@
-<div align="center">
-
-  [![Stockfish][stockfish128-logo]][website-link]
-
-  <h3>Stockfish</h3>
-
-  A free and strong UCI chess engine.
-  <br>
-  <strong>[Explore Stockfish docs »][wiki-link]</strong>
-  <br>
-  <br>
-  [Report bug][issue-link]
-  ·
-  [Open a discussion][discussions-link]
-  ·
-  [Discord][discord-link]
-  ·
-  [Blog][website-blog-link]
-
-  [![Build][build-badge]][build-link]
-  [![License][license-badge]][license-link]
-  <br>
-  [![Release][release-badge]][release-link]
-  [![Commits][commits-badge]][commits-link]
-  <br>
-  [![Website][website-badge]][website-link]
-  [![Fishtest][fishtest-badge]][fishtest-link]
-  [![Discord][discord-badge]][discord-link]
-
-</div>
-
 ## Overview

-[Stockfish][website-link] is a **free and strong UCI chess engine** derived from
-Glaurung 2.1 that analyzes chess positions and computes the optimal moves.
+[![Build Status](https://travis-ci.org/official-stockfish/Stockfish.svg?branch=master)](https://travis-ci.org/official-stockfish/Stockfish)
+[![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master)

-Stockfish **does not include a graphical user interface** (GUI) that is required
-to display a chessboard and to make it easy to input moves. These GUIs are
-developed independently from Stockfish and are available online. **Read the
-documentation for your GUI** of choice for information about how to use
+[Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine
+derived from Glaurung 2.1. It features two evaluation functions, the classical
+evaluation based on handcrafted terms, and the NNUE evaluation based on
+efficiently updateable neural networks. The classical evaluation runs efficiently
+on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the
+vector intrinsics available on modern CPUs (avx2 or similar).
+
+Stockfish is not a complete chess program and requires a
+UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena,
+Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably.
+Read the documentation for your GUI of choice for information about how to use
 Stockfish with it.

-See also the Stockfish [documentation][wiki-usage-link] for further usage help.

 ## Files

 This distribution of Stockfish consists of the following files:

-  * [README.md][readme-link], the file you are currently reading.
+  * Readme.md, the file you are currently reading.

-  * [Copying.txt][license-link], a text file containing the GNU General Public
-    License version 3.
+  * Copying.txt, a text file containing the GNU General Public License version 3.

-  * [AUTHORS][authors-link], a text file with the list of authors for the project.
+  * src, a subdirectory containing the full source code, including a Makefile
+    that can be used to compile Stockfish on Unix-like systems.

-  * [src][src-link], a subdirectory containing the full source code, including a
-    Makefile that can be used to compile Stockfish on Unix-like systems.
+To use the NNUE evaluation an additional data file with neural network parameters
+needs to be downloaded. The filename for the default set can be found as the default
+value of the `EvalFile` UCI option, with the format
+`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from
+```
+https://tests.stockfishchess.org/api/nn/[filename]
+```
+replacing `[filename]` as needed.

-  * a file with the .nnue extension, storing the neural network for the NNUE
-    evaluation. Binary distributions will have this file embedded.

-## Contributing
+## UCI options

-__See [Contributing Guide](CONTRIBUTING.md).__
+Currently, Stockfish has the following UCI options:
+
+  * #### Threads
+    The number of CPU threads used for searching a position. For best performance, set
+    this equal to the number of CPU cores available.
+
+  * #### Hash
+    The size of the hash table in MB. It is recommended to set Hash after setting Threads.
+
+  * #### Ponder
+    Let Stockfish ponder its next move while the opponent is thinking.
+
+  * #### MultiPV
+    Output the N best lines (principal variations, PVs) when searching.
+    Leave at 1 for best performance.
+
+  * #### Use NNUE
+    Toggle between the NNUE and classical evaluation functions. If set to "true",
+    the network parameters must be availabe to load from file (see also EvalFile).
+
+  * #### EvalFile
+    The name of the file of the NNUE evaluation parameters. Depending on the GUI the
+    filename should include the full path to the folder/directory that contains the file.
+
+  * #### Contempt
+    A positive value for contempt favors middle game positions and avoids draws,
+    effective for the classical evaluation only.
+
+  * #### Analysis Contempt
+    By default, contempt is set to prefer the side to move. Set this option to "White"
+    or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
+
+  * #### UCI_AnalyseMode
+    An option handled by your GUI.
+
+  * #### UCI_Chess960
+    An option handled by your GUI. If true, Stockfish will play Chess960.
+
+  * #### UCI_ShowWDL
+    If enabled, show approximate WDL statistics as part of the engine output.
+    These WDL numbers model expected game outcomes for a given evaluation and
+    game ply for engine self-play at fishtest LTC conditions (60+0.6s per game).
+
+  * #### UCI_LimitStrength
+    Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level.
+
+  * #### UCI_Elo
+    If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo.
+    This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4.
+
+  * #### Skill Level
+    Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength).
+    Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a
+    weaker move will be played.
+
+  * #### SyzygyPath
+    Path to the folders/directories storing the Syzygy tablebase files. Multiple
+    directories are to be separated by ";" on Windows and by ":" on Unix-based
+    operating systems. Do not use spaces around the ";" or ":".
+
+    Example: `C:\tablebases\wdl345;C:\tablebases\wdl6;D:\tablebases\dtz345;D:\tablebases\dtz6`
+
+    It is recommended to store .rtbw files on an SSD. There is no loss in storing
+    the .rtbz files on a regular HD. It is recommended to verify all md5 checksums
+    of the downloaded tablebase files (`md5sum -c checksum.md5`) as corruption will
+    lead to engine crashes.
+
+  * #### SyzygyProbeDepth
+    Minimum remaining search depth for which a position is probed. Set this option
+    to a higher value to probe less agressively if you experience too much slowdown
+    (in terms of nps) due to TB probing.
+
+  * #### Syzygy50MoveRule
+    Disable to let fifty-move rule draws detected by Syzygy tablebase probes count
+    as wins or losses. This is useful for ICCF correspondence games.
+
+  * #### SyzygyProbeLimit
+    Limit Syzygy tablebase probing to positions with at most this many pieces left
+    (including kings and pawns).
+
+  * #### Move Overhead
+    Assume a time delay of x ms due to network and GUI overheads. This is useful to
+    avoid losses on time in those cases.
+
+  * #### Slow Mover
+    Lower values will make Stockfish take less time in games, higher values will
+    make it think longer.
+
+  * #### nodestime
+    Tells the engine to use nodes searched instead of wall time to account for
+    elapsed time. Useful for engine testing.
+
+  * #### Clear Hash
+    Clear the hash table.
+
+  * #### Debug Log File
+    Write all communication to and from the engine into a text file.
+
+## classical and NNUE evaluation
+
+Both approaches assign a value to a position that is used in alpha-beta (PVS) search
+to find the best move. The classical evaluation computes this value as a function
+of various chess concepts, handcrafted by experts, tested and tuned using fishtest.
+The NNUE evaluation computes this value with a neural network based on basic
+inputs (e.g. piece positions only). The network is optimized and trained
+on the evalutions of millions of positions at moderate search depth.
+
+The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward.
+It can be evaluated efficiently on CPUs, and exploits the fact that only parts
+of the neural network need to be updated after a typical chess move.
+[The nodchip repository](https://github.com/nodchip/Stockfish) provides additional
+tools to train and develop the NNUE networks.
+
+On CPUs supporting modern vector instructions (avx2 and similar), the NNUE evaluation
+results in stronger playing strength, even if the nodes per second computed by the engine
+is somewhat lower (roughly 60% of nps is typical).
+
+Note that the NNUE evaluation depends on the Stockfish binary and the network parameter
+file (see EvalFile). Not every parameter file is compatible with a given Stockfish binary.
+The default value of the EvalFile UCI option is the name of a network that is guaranteed
+to be compatible with that binary.
+
+## What to expect from Syzygybases?
+
+If the engine is searching a position that is not in the tablebases (e.g.
+a position with 8 pieces), it will access the tablebases during the search.
+If the engine reports a very large score (typically 153.xx), this means
+that it has found a winning line into a tablebase position.
+
+If the engine is given a position to search that is in the tablebases, it
+will use the tablebases at the beginning of the search to preselect all
+good moves, i.e. all moves that preserve the win or preserve the draw while
+taking into account the 50-move rule.
+It will then perform a search only on those moves. **The engine will not move
+immediately**, unless there is only a single good move. **The engine likely
+will not report a mate score even if the position is known to be won.**
+
+It is therefore clear that this behaviour is not identical to what one might
+be used to with Nalimov tablebases. There are technical reasons for this
+difference, the main technical reason being that Nalimov tablebases use the
+DTM metric (distance-to-mate), while Syzygybases use a variation of the
+DTZ metric (distance-to-zero, zero meaning any move that resets the 50-move
+counter). This special metric is one of the reasons that Syzygybases are
+more compact than Nalimov tablebases, while still storing all information
+needed for optimal play and in addition being able to take into account
+the 50-move rule.
+
+## Large Pages
+
+Stockfish supports large pages on Linux and Windows. Large pages make
+the hash access more efficient, improving the engine speed, especially
+on large hash sizes. Typical increases are 5..10% in terms of nps, but
+speed increases up to 30% have been measured. The support is
+automatic. Stockfish attempts to use large pages when available and
+will fall back to regular memory allocation when this is not the case.
+
+### Support on Linux
+
+Large page support on Linux is obtained by the Linux kernel
+transparent huge pages functionality. Typically, transparent huge pages
+are already enabled and no configuration is needed.
+
+### Support on Windows
+
+The use of large pages requires "Lock Pages in Memory" privilege. See
+[Enable the Lock Pages in Memory Option (Windows)](https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows)
+on how to enable this privilege. Logout/login may be needed
+afterwards. Due to memory fragmentation, it may not always be
+possible to allocate large pages even when enabled. A reboot
+might alleviate this problem. To determine whether large pages
+are in use, see the engine log.
+
+## Compiling Stockfish yourself from the sources
+
+Stockfish has support for 32 or 64-bit CPUs, certain hardware
+instructions, big-endian machines such as Power PC, and other platforms.
+
+On Unix-like systems, it should be easy to compile Stockfish
+directly from the source code with the included Makefile in the folder
+`src`. In general it is recommended to run `make help` to see a list of make
+targets with corresponding descriptions.
+
+```
+    cd src
+    make help
+    make build ARCH=x86-64-modern
+```
+
+When not using the Makefile to compile (for instance with Microsoft MSVC) you
+need to manually set/unset some switches in the compiler command line; see
+file *types.h* for a quick reference.
+
+When reporting an issue or a bug, please tell us which version and
+compiler you used to create your executable. These informations can
+be found by typing the following commands in a console:
+
+```
+    ./stockfish
+    compiler
+```
+
+## Understanding the code base and participating in the project
+
+Stockfish's improvement over the last couple of years has been a great
+community effort. There are a few ways to help contribute to its growth.

 ### Donating hardware

-Improving Stockfish requires a massive amount of testing. You can donate your
-hardware resources by installing the [Fishtest Worker][worker-link] and viewing
-the current tests on [Fishtest][fishtest-link].
+Improving Stockfish requires a massive amount of testing. You can donate
+your hardware resources by installing the [Fishtest Worker](https://github.com/glinscott/fishtest/wiki/Running-the-worker:-overview)
+and view the current tests on [Fishtest](https://tests.stockfishchess.org/tests).

 ### Improving the code

-In the [chessprogramming wiki][programming-link], many techniques used in
-Stockfish are explained with a lot of background information.
-The [section on Stockfish][programmingsf-link] describes many features
-and techniques used by Stockfish. However, it is generic rather than
-focused on Stockfish's precise implementation.
+If you want to help improve the code, there are several valuable resources:

-The engine testing is done on [Fishtest][fishtest-link].
-If you want to help improve Stockfish, please read this [guideline][guideline-link]
+* [In this wiki,](https://www.chessprogramming.org) many techniques used in
+Stockfish are explained with a lot of background information.
+
+* [The section on Stockfish](https://www.chessprogramming.org/Stockfish)
+describes many features and techniques used by Stockfish. However, it is
+generic rather than being focused on Stockfish's precise implementation.
+Nevertheless, a helpful resource.
+
+* The latest source can always be found on [GitHub](https://github.com/official-stockfish/Stockfish).
+Discussions about Stockfish take place in the [FishCooking](https://groups.google.com/forum/#!forum/fishcooking)
+group and engine testing is done on [Fishtest](https://tests.stockfishchess.org/tests).
+If you want to help improve Stockfish, please read this [guideline](https://github.com/glinscott/fishtest/wiki/Creating-my-first-test)
 first, where the basics of Stockfish development are explained.

-Discussions about Stockfish take place these days mainly in the Stockfish
-[Discord server][discord-link]. This is also the best place to ask questions
-about the codebase and how to improve it.
-
-## Compiling Stockfish
-
-Stockfish has support for 32 or 64-bit CPUs, certain hardware instructions,
-big-endian machines such as Power PC, and other platforms.
-
-On Unix-like systems, it should be easy to compile Stockfish directly from the
-source code with the included Makefile in the folder `src`. In general, it is
-recommended to run `make help` to see a list of make targets with corresponding
-descriptions. An example suitable for most Intel and AMD chips:
-
-```
-cd src
-make -j profile-build
-```
-
-Detailed compilation instructions for all platforms can be found in our
-[documentation][wiki-compile-link]. Our wiki also has information about
-the [UCI commands][wiki-uci-link] supported by Stockfish.

 ## Terms of use

-Stockfish is free and distributed under the
-[**GNU General Public License version 3**][license-link] (GPL v3). Essentially,
-this means you are free to do almost exactly what you want with the program,
-including distributing it among your friends, making it available for download
-from your website, selling it (either by itself or as part of some bigger
-software package), or using it as the starting point for a software project of
-your own.
+Stockfish is free, and distributed under the **GNU General Public License version 3**
+(GPL v3). Essentially, this means that you are free to do almost exactly
+what you want with the program, including distributing it among your
+friends, making it available for download from your web site, selling
+it (either by itself or as part of some bigger software package), or
+using it as the starting point for a software project of your own.

-The only real limitation is that whenever you distribute Stockfish in some way,
-you MUST always include the license and the full source code (or a pointer to
-where the source code can be found) to generate the exact binary you are
-distributing. If you make any changes to the source code, these changes must
-also be made available under GPL v3.
+The only real limitation is that whenever you distribute Stockfish in
+some way, you must always include the full source code, or a pointer
+to where the source code can be found. If you make any changes to the
+source code, these changes must also be made available under the GPL.

-## Acknowledgements
-
-Stockfish uses neural networks trained on [data provided by the Leela Chess Zero
-project][lc0-data-link], which is made available under the [Open Database License][odbl-link] (ODbL).
-
-
-[authors-link]:       https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS
-[build-link]:         https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml
-[commits-link]:       https://github.com/official-stockfish/Stockfish/commits/master
-[discord-link]:       https://discord.gg/GWDRS3kU6R
-[issue-link]:         https://github.com/official-stockfish/Stockfish/issues/new?assignees=&labels=&template=BUG-REPORT.yml
-[discussions-link]:   https://github.com/official-stockfish/Stockfish/discussions/new
-[fishtest-link]:      https://tests.stockfishchess.org/tests
-[guideline-link]:     https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test
-[license-link]:       https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt
-[programming-link]:   https://www.chessprogramming.org/Main_Page
-[programmingsf-link]: https://www.chessprogramming.org/Stockfish
-[readme-link]:        https://github.com/official-stockfish/Stockfish/blob/master/README.md
-[release-link]:       https://github.com/official-stockfish/Stockfish/releases/latest
-[src-link]:           https://github.com/official-stockfish/Stockfish/tree/master/src
-[stockfish128-logo]:  https://stockfishchess.org/images/logo/icon_128x128.png
-[uci-link]:           https://backscattering.de/chess/uci/
-[website-link]:       https://stockfishchess.org
-[website-blog-link]:  https://stockfishchess.org/blog/
-[wiki-link]:          https://github.com/official-stockfish/Stockfish/wiki
-[wiki-compile-link]:  https://github.com/official-stockfish/Stockfish/wiki/Compiling-from-source
-[wiki-uci-link]:      https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands
-[wiki-usage-link]:    https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage
-[worker-link]:        https://github.com/official-stockfish/fishtest/wiki/Running-the-worker
-[lc0-data-link]:      https://storage.lczero.org/files/training_data
-[odbl-link]:          https://opendatacommons.org/licenses/odbl/odbl-10.txt
-
-[build-badge]:        https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github
-[commits-badge]:      https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge
-[discord-badge]:      https://img.shields.io/discord/435943710472011776?style=for-the-badge&label=discord&logo=Discord
-[fishtest-badge]:     https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=Fishtest&up_color=success&up_message=Online&url=https%3A%2F%2Ftests.stockfishchess.org%2Ftests%2Ffinished
-[license-badge]:      https://img.shields.io/github/license/official-stockfish/Stockfish?style=for-the-badge&label=license&color=success
-[release-badge]:      https://img.shields.io/github/v/release/official-stockfish/Stockfish?style=for-the-badge&label=official%20release
-[website-badge]:      https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=website&up_color=success&up_message=Online&url=https%3A%2F%2Fstockfishchess.org
+For full details, read the copy of the GPL v3 found in the file named
+*Copying.txt*.
--- a/Contributors.txt
+++ b/Contributors.txt
@@ -1,322 +1,154 @@
-Contributors to Fishtest with >10,000 CPU hours, as of 2025-03-22.
+Contributors with >10,000 CPU hours as of January 7, 2020
 Thank you!

-Username                                CPU Hours     Games played
------------------------------------------------------------------
-noobpwnftw                               41712226       3294628533
-vdv                                      28993864        954145232
-technologov                              24984442       1115931964
-linrock                                  11463033        741692823
-mlang                                     3026000        200065824
-okrout                                    2726068        248285678
-olafm                                     2420096        161297116
-pemo                                      1838361         62294199
-TueRens                                   1804847         80170868
-dew                                       1689162        100033738
-sebastronomy                              1655637         67294942
-grandphish2                               1474752         92156319
-JojoM                                     1130625         73666098
-rpngn                                      973590         59996557
-oz                                         921203         60370346
-tvijlbrief                                 796125         51897690
-gvreuls                                    792215         55184194
-mibere                                     703840         46867607
-leszek                                     599745         44681421
-cw                                         519602         34988289
-fastgm                                     503862         30260818
-CSU_Dynasty                                474794         31654170
-maximmasiutin                              441753         28129452
-robal                                      437950         28869118
-ctoks                                      435150         28542141
-crunchy                                    427414         27371625
-bcross                                     415724         29061187
-mgrabiak                                   380202         27586936
-velislav                                   342588         22140902
-ncfish1                                    329039         20624527
-Fisherman                                  327231         21829379
-Sylvain27                                  317021         11494912
-marrco                                     310446         19587107
-Dantist                                    296386         18031762
-Fifis                                      289595         14969251
-tolkki963                                  286043         23596996
-Calis007                                   272677         17281620
-cody                                       258835         13301710
-nordlandia                                 249322         16420192
-javran                                     212141         16507618
-glinscott                                  208125         13277240
-drabel                                     204167         13930674
-mhoram                                     202894         12601997
-bking_US                                   198894         11876016
-Wencey                                     198537          9606420
-Thanar                                     179852         12365359
-sschnee                                    170521         10891112
-armo9494                                   168141         11177514
-DesolatedDodo                              160605         10392474
-spams                                      157128         10319326
-maposora                                   155839         13963260
-sqrt2                                      147963          9724586
-vdbergh                                    140514          9242985
-jcAEie                                     140086         10603658
-CoffeeOne                                  137100          5024116
-malala                                     136182          8002293
-Goatminola                                 134893         11640524
-xoto                                       133759          9159372
-markkulix                                  132104         11000548
-naclosagc                                  131472          4660806
-Dubslow                                    129685          8527664
-davar                                      129023          8376525
-DMBK                                       122960          8980062
-dsmith                                     122059          7570238
-Wolfgang                                   120919          8619168
-CypressChess                               120902          8683904
-amicic                                     119661          7938029
-cuistot                                    116864          7828864
-sterni1971                                 113754          6054022
-Data                                       113305          8220352
-BrunoBanani                                112960          7436849
-megaman7de                                 109139          7360928
-skiminki                                   107583          7218170
-zeryl                                      104523          6618969
-MaZePallas                                 102823          6633619
-sunu                                       100167          7040199
-thirdlife                                   99178          2246544
-ElbertoOne                                  99028          7023771
-TataneSan                                   97257          4239502
-romangol                                    95662          7784954
-bigpen0r                                    94825          6529241
-brabos                                      92118          6186135
-Maxim                                       90818          3283364
-psk                                         89957          5984901
-szupaw                                      89775          7800606
-jromang                                     87260          5988073
-racerschmacer                               85805          6122790
-Vizvezdenec                                 83761          5344740
-0x3C33                                      82614          5271253
-Spprtr                                      82103          5663635
-BRAVONE                                     81239          5054681
-MarcusTullius                               78930          5189659
-Mineta                                      78731          4947996
-Torom                                       77978          2651656
-nssy                                        76497          5259388
-woutboat                                    76379          6031688
-teddybaer                                   75125          5407666
-Pking_cda                                   73776          5293873
-Viren6                                      73664          1356502
-yurikvelo                                   73611          5046822
-Bobo1239                                    70579          4794999
-solarlight                                  70517          5028306
-dv8silencer                                 70287          3883992
-manap                                       66273          4121774
-tinker                                      64333          4268790
-qurashee                                    61208          3429862
-DanielMiao1                                 60181          1317252
-AGI                                         58316          4336328
-jojo2357                                    57435          4944212
-robnjr                                      57262          4053117
-Freja                                       56938          3733019
-MaxKlaxxMiner                               56879          3423958
-ttruscott                                   56010          3680085
-rkl                                         55132          4164467
-jmdana                                      54988          4041917
-notchris                                    53936          4184018
-renouve                                     53811          3501516
-CounterFlow                                 52536          3203740
-finfish                                     51360          3370515
-eva42                                       51272          3599691
-eastorwest                                  51117          3454811
-rap                                         49985          3219146
-pb00067                                     49733          3298934
-GPUex                                       48686          3684998
-OuaisBla                                    48626          3445134
-ronaldjerum                                 47654          3240695
-biffhero                                    46564          3111352
-oryx                                        46141          3583236
-jibarbosa                                   45890          4541218
-DeepnessFulled                              45734          3944282
-abdicj                                      45577          2631772
-VoyagerOne                                  45476          3452465
-mecevdimitar                                44240          2584396
-speedycpu                                   43842          3003273
-jbwiebe                                     43305          2805433
-gopeto                                      43046          2821514
-YvesKn                                      42628          2177630
-Antihistamine                               41788          2761312
-mhunt                                       41735          2691355
-somethingintheshadows                       41502          3330418
-homyur                                      39893          2850481
-gri                                         39871          2515779
-vidar808                                    39774          1656372
-Garf                                        37741          2999686
-SC                                          37299          2731694
-Gaster319                                   37229          3289674
-csnodgrass                                  36207          2688994
-ZacHFX                                      35528          2486328
-icewulf                                     34782          2415146
-strelock                                    34716          2074055
-EthanOConnor                                33370          2090311
-slakovv                                     32915          2021889
-shawnxu                                     32144          2814668
-Gelma                                       31771          1551204
-srowen                                      31181          1732120
-kdave                                       31157          2198362
-manapbk                                     30987          1810399
-votoanthuan                                 30691          2460856
-Prcuvu                                      30377          2170122
-anst                                        30301          2190091
-jkiiski                                     30136          1904470
-spcc                                        29925          1901692
-hyperbolic.tom                              29840          2017394
-chuckstablers                               29659          2093438
-Pyafue                                      29650          1902349
-WoodMan777                                  29300          2579864
-belzedar94                                  28846          1811530
-chriswk                                     26902          1868317
-xwziegtm                                    26897          2124586
-Jopo12321                                   26818          1816482
-achambord                                   26582          1767323
-Patrick_G                                   26276          1801617
-yorkman                                     26193          1992080
-Ulysses                                     25517          1711634
-SFTUser                                     25182          1675689
-nabildanial                                 25068          1531665
-Sharaf_DG                                   24765          1786697
-rodneyc                                     24376          1416402
-jsys14                                      24297          1721230
-AndreasKrug                                 24235          1934711
-agg177                                      23890          1395014
-Ente                                        23752          1678188
-JanErik                                     23408          1703875
-Isidor                                      23388          1680691
-Norabor                                     23371          1603244
-Nullvalue                                   23155          2022752
-fishtester                                  23115          1581502
-wizardassassin                              23073          1789536
-Skiff84                                     22984          1053680
-cisco2015                                   22920          1763301
-ols                                         22914          1322047
-Hjax                                        22561          1566151
-Zirie                                       22542          1472937
-team-oh                                     22272          1636708
-mkstockfishtester                           22253          2029566
-Roady                                       22220          1465606
-MazeOfGalious                               21978          1629593
-sg4032                                      21950          1643373
-tsim67                                      21939          1343944
-ianh2105                                    21725          1632562
-Serpensin                                   21704          1809188
-xor12                                       21628          1680365
-dex                                         21612          1467203
-nesoneg                                     21494          1463031
-IslandLambda                                21468          1239756
-user213718                                  21454          1404128
-sphinx                                      21211          1384728
-qoo_charly_cai                              21136          1514927
-jjoshua2                                    21001          1423089
-Zake9298                                    20938          1565848
-horst.prack                                 20878          1465656
-0xB00B1ES                                   20590          1208666
-Dinde                                       20459          1292774
-t3hf1sht3ster                               20456           670646
-j3corre                                     20405           941444
-0x539                                       20332          1039516
-Adrian.Schmidt123                           20316          1281436
-malfoy                                      20313          1350694
-purpletree                                  20019          1461026
-wei                                         19973          1745989
-teenychess                                  19819          1762006
-rstoesser                                   19569          1293588
-eudhan                                      19274          1283717
-nalanzeyu                                   19211           396674
-vulcan                                      18871          1729392
-Karpovbot                                   18766          1053178
-jundery                                     18445          1115855
-Farseer                                     18281          1074642
-sebv15                                      18267          1262588
-whelanh                                     17887           347974
-ville                                       17883          1384026
-chris                                       17698          1487385
-purplefishies                               17595          1092533
-dju                                         17414           981289
-iisiraider                                  17275          1049015
-Karby                                       17177          1030688
-DragonLord                                  17014          1162790
-pirt                                        16991          1274215
-redstone59                                  16842          1461780
-Alb11747                                    16787          1213990
-Naven94                                     16414           951718
-scuzzi                                      16155           995347
-IgorLeMasson                                16064          1147232
-ako027ako                                   15671          1173203
-xuhdev                                      15516          1528278
-infinigon                                   15285           965966
-Nikolay.IT                                  15154          1068349
-Andrew Grant                                15114           895539
-OssumOpossum                                14857          1007129
-LunaticBFF57                                14525          1190310
-enedene                                     14476           905279
-YELNAMRON                                   14475          1141330
-RickGroszkiewicz                            14272          1385984
-joendter                                    14269           982014
-bpfliegel                                   14233           882523
-mpx86                                       14019           759568
-jpulman                                     13982           870599
-getraideBFF                                 13871          1172846
-crocogoat                                   13817          1119086
-Nesa92                                      13806          1116101
-joster                                      13710           946160
-mbeier                                      13650          1044928
-Pablohn26                                   13552          1088532
-wxt9861                                     13550          1312306
-Dark_wizzie                                 13422          1007152
-Rudolphous                                  13244           883140
-Jackfish                                    13177           894206
-MooTheCow                                   13091           892304
-Machariel                                   13010           863104
-mabichito                                   12903           749391
-thijsk                                      12886           722107
-AdrianSA                                    12860           804972
-Flopzee                                     12698           894821
-szczur90                                    12684           977536
-Kyrega                                      12661           456438
-mschmidt                                    12644           863193
-korposzczur                                 12606           838168
-fatmurphy                                   12547           853210
-Oakwen                                      12532           855759
-SapphireBrand                               12416           969604
-deflectooor                                 12386           579392
-modolief                                    12386           896470
-ckaz                                        12273           754644
-Hongildong                                  12201           648712
-pgontarz                                    12151           848794
-dbernier                                    12103           860824
-FormazChar                                  12051           913497
-shreven                                     12044           884734
-rensonthemove                               11999           971993
-stocky                                      11954           699440
-3cho                                        11842          1036786
-ImperiumAeternum                            11482           979142
-infinity                                    11470           727027
-aga                                         11412           695127
-Def9Infinity                                11408           700682
-torbjo                                      11395           729145
-Thomas A. Anderson                          11372           732094
-savage84                                    11358           670860
-d64                                         11263           789184
-ali-al-zhrani                               11245           779246
-vaskoul                                     11144           953906
-snicolet                                    11106           869170
-dapper                                      11032           771402
-Ethnikoi                                    10993           945906
-Snuuka                                      10938           435504
-Karmatron                                   10871           678306
-gerbil                                      10871          1005842
-OliverClarke                                10696           942654
-basepi                                      10637           744851
-michaelrpg                                  10624           748179
-Cubox                                       10621           826448
-dragon123118                                10421           936506
-OIVAS7572                                   10420           995586
-GBx3TV                                      10388           339952
-Garruk                                      10365           706465
-dzjp                                        10343           732529
-borinot                                     10026           902130
+Username                  CPU Hours   Games played
+--------------------------------------------------
+noobpwnftw                  9305707      695548021
+mlang                        780050       61648867
+dew                          621626       43921547
+mibere                       524702       42238645
+crunchy                      354587       27344275
+cw                           354495       27274181
+fastgm                       332801       22804359
+JojoM                        295750       20437451
+CSU_Dynasty                  262015       21828122
+Fisherman                    232181       18939229
+ctoks                        218866       17622052
+glinscott                    201989       13780820
+tvijlbrief                   201204       15337115
+velislav                     188630       14348485
+gvreuls                      187164       15149976
+bking_US                     180289       11876016
+nordlandia                   172076       13467830
+leszek                       157152       11443978
+Thanar                       148021       12365359
+spams                        141975       10319326
+drabel                       138073       11121749
+vdv                          137850        9394330
+mgrabiak                     133578       10454324
+TueRens                      132485       10878471
+bcross                       129683       11557084
+marrco                       126078        9356740
+sqrt2                        125830        9724586
+robal                        122873        9593418
+vdbergh                      120766        8926915
+malala                       115926        8002293
+CoffeeOne                    114241        5004100
+dsmith                       113189        7570238
+BrunoBanani                  104644        7436849
+Data                          92328        8220352
+mhoram                        89333        6695109
+davar                         87924        7009424
+xoto                          81094        6869316
+ElbertoOne                    80899        7023771
+grandphish2                   78067        6160199
+brabos                        77212        6186135
+psk                           75733        5984901
+BRAVONE                       73875        5054681
+sunu                          70771        5597972
+sterni1971                    70605        5590573
+MaZePallas                    66886        5188978
+Vizvezdenec                   63708        4967313
+nssy                          63462        5259388
+jromang                       61634        4940891
+teddybaer                     61231        5407666
+Pking_cda                     60099        5293873
+solarlight                    57469        5028306
+dv8silencer                   56913        3883992
+tinker                        54936        4086118
+renouve                       49732        3501516
+Freja                         49543        3733019
+robnjr                        46972        4053117
+rap                           46563        3219146
+Bobo1239                      46036        3817196
+ttruscott                     45304        3649765
+racerschmacer                 44881        3975413
+finfish                       44764        3370515
+eva42                         41783        3599691
+biffhero                      40263        3111352
+bigpen0r                      39817        3291647
+mhunt                         38871        2691355
+ronaldjerum                   38820        3240695
+Antihistamine                 38785        2761312
+pb00067                       38038        3086320
+speedycpu                     37591        3003273
+rkl                           37207        3289580
+VoyagerOne                    37050        3441673
+jbwiebe                       35320        2805433
+cuistot                       34191        2146279
+homyur                        33927        2850481
+manap                         32873        2327384
+gri                           32538        2515779
+oryx                          31267        2899051
+EthanOConnor                  30959        2090311
+SC                            30832        2730764
+csnodgrass                    29505        2688994
+jmdana                        29458        2205261
+strelock                      28219        2067805
+jkiiski                       27832        1904470
+Pyafue                        27533        1902349
+Garf                          27515        2747562
+eastorwest                    27421        2317535
+slakovv                       26903        2021889
+Prcuvu                        24835        2170122
+anst                          24714        2190091
+hyperbolic.tom                24319        2017394
+Patrick_G                     23687        1801617
+Sharaf_DG                     22896        1786697
+nabildanial                   22195        1519409
+chriswk                       21931        1868317
+achambord                     21665        1767323
+Zirie                         20887        1472937
+team-oh                       20217        1636708
+Isidor                        20096        1680691
+ncfish1                       19931        1520927
+nesoneg                       19875        1463031
+Spprtr                        19853        1548165
+JanErik                       19849        1703875
+agg177                        19478        1395014
+SFTUser                       19231        1567999
+xor12                         19017        1680165
+sg4032                        18431        1641865
+rstoesser                     18118        1293588
+MazeOfGalious                 17917        1629593
+j3corre                       17743         941444
+cisco2015                     17725        1690126
+ianh2105                      17706        1632562
+dex                           17678        1467203
+jundery                       17194        1115855
+iisiraider                    17019        1101015
+horst.prack                   17012        1465656
+Adrian.Schmidt123             16563        1281436
+purplefishies                 16342        1092533
+wei                           16274        1745989
+ville                         16144        1384026
+eudhan                        15712        1283717
+OuaisBla                      15581         972000
+DragonLord                    15559        1162790
+dju                           14716         875569
+chris                         14479        1487385
+0xB00B1ES                     14079        1001120
+OssumOpossum                  13776        1007129
+enedene                       13460         905279
+bpfliegel                     13346         884523
+Ente                          13198        1156722
+IgorLeMasson                  13087        1147232
+jpulman                       13000         870599
+ako027ako                     12775        1173203
+Nikolay.IT                    12352        1068349
+Andrew Grant                  12327         895539
+joster                        12008         950160
+AdrianSA                      11996         804972
+Nesa92                        11455        1111993
+fatmurphy                     11345         853210
+Dark_wizzie                   11108        1007152
+modolief                      10869         896470
+mschmidt                      10757         803401
+infinity                      10594         727027
+mabichito                     10524         749391
+Thomas A. Anderson            10474         732094
+thijsk                        10431         719357
+Flopzee                       10339         894821
+crocogoat                     10104        1013854
+SapphireBrand                 10104         969604
+stocky                        10017         699440
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -0,0 +1,75 @@
+version: 1.0.{build}
+clone_depth: 50
+
+branches:
+  only:
+    - master
+    - nnue-player-wip
+
+# Operating system (build VM template)
+os: Visual Studio 2019
+
+# Build platform, i.e. x86, x64, AnyCPU. This setting is optional.
+platform:
+  - x86
+  - x64
+
+# build Configuration, i.e. Debug, Release, etc.
+configuration:
+  - Debug
+  - Release
+
+matrix:
+  # The build fail immediately once one of the job fails
+  fast_finish: true
+
+# Scripts that are called at very beginning, before repo cloning
+init:
+  - cmake --version
+  - msbuild /version
+
+before_build:
+  - ps: |
+      # Get sources
+      $src = get-childitem -Path *.cpp -Recurse | select -ExpandProperty FullName
+      $src = $src -join ' '
+      $src = $src.Replace("\", "/")
+
+      # Build CMakeLists.txt
+      $t = 'cmake_minimum_required(VERSION 3.17)',
+           'project(Stockfish)',
+           'set(CMAKE_CXX_STANDARD 17)',
+           'set(CMAKE_CXX_STANDARD_REQUIRED ON)',
+           'set (CMAKE_CXX_EXTENSIONS OFF)',
+           'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)',
+           'set(source_files', $src, ')',
+           'add_executable(stockfish ${source_files})'
+
+      # Write CMakeLists.txt withouth BOM
+      $MyPath = (Get-Item -Path "." -Verbose).FullName + '\CMakeLists.txt'
+      $Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
+      [System.IO.File]::WriteAllLines($MyPath, $t, $Utf8NoBomEncoding)
+
+      # Obtain bench reference from git log
+      $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1
+      $bench = $b -match '\D+(\d+)' | % { $matches[1] }
+      Write-Host "Reference bench:" $bench
+      $g = "Visual Studio 16 2019"
+      If (${env:PLATFORM} -eq 'x64') { $a = "x64" }
+      If (${env:PLATFORM} -eq 'x86') { $a = "Win32" }
+      cmake -G "${g}" -A ${a} .
+      Write-Host "Generated files for: " $g $a
+
+build_script:
+  - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
+
+before_test:
+  - cd src/%CONFIGURATION%
+  - stockfish bench 2> out.txt >NUL
+  - ps: |
+      # Verify bench number
+      $s = (gc "./out.txt" | out-string)
+      $r = ($s -match 'Nodes searched \D+(\d+)' | % { $matches[1] })
+      Write-Host "Engine bench:" $r
+      Write-Host "Reference bench:" $bench
+      If ($r -ne $bench) { exit 1 }
--- a/scripts/.gitattributes
+++ b/scripts/.gitattributes
@@ -1 +0,0 @@
-*.sh text eol=lf
--- a/scripts/get_native_properties.sh
+++ b/scripts/get_native_properties.sh
@@ -1,159 +0,0 @@
-#!/bin/sh
-
-#
-# Returns properties of the native system.
-# best architecture as supported by the CPU
-# filename of the best binary uploaded as an artifact during CI
-#
-
-# Check if all the given flags are present in the CPU flags list
-check_flags() {
-  for flag; do
-    printf '%s\n' "$flags" | grep -q -w "$flag" || return 1
-  done
-}
-
-# Set the CPU flags list
-# remove underscores and points from flags, e.g. gcc uses avx512vnni, while some cpuinfo can have avx512_vnni, some systems use sse4_1 others sse4.1
-get_flags() {
-  flags=$(awk '/^flags[ \t]*:|^Features[ \t]*:/{gsub(/^flags[ \t]*:[ \t]*|^Features[ \t]*:[ \t]*|[_.]/, ""); line=$0} END{print line}' /proc/cpuinfo)
-}
-
-# Check for gcc march "znver1" or "znver2" https://en.wikichip.org/wiki/amd/cpuid
-check_znver_1_2() {
-  vendor_id=$(awk '/^vendor_id/{print $3; exit}' /proc/cpuinfo)
-  cpu_family=$(awk '/^cpu family/{print $4; exit}' /proc/cpuinfo)
-  [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true
-}
-
-# Set the file CPU loongarch64 architecture
-set_arch_loongarch64() {
-  if check_flags 'lasx'; then
-    true_arch='loongarch64-lasx'
-  elif check_flags 'lsx'; then
-    true_arch='lonngarch64-lsx'
-  else
-    true_arch='loongarch64'
-  fi
-}
-
-# Set the file CPU x86_64 architecture
-set_arch_x86_64() {
-  if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then
-    true_arch='x86-64-vnni256'
-  elif check_flags 'avx512f' 'avx512bw'; then
-    true_arch='x86-64-avx512'
-  elif [ -z "${znver_1_2+1}" ] && check_flags 'bmi2'; then
-    true_arch='x86-64-bmi2'
-  elif check_flags 'avx2'; then
-    true_arch='x86-64-avx2'
-  elif check_flags 'sse41' && check_flags 'popcnt'; then
-    true_arch='x86-64-sse41-popcnt'
-  else
-    true_arch='x86-64'
-  fi
-}
-
-set_arch_ppc_64() {
-  if $(grep -q -w "altivec" /proc/cpuinfo); then
-    power=$(grep -oP -m 1 'cpu\t+: POWER\K\d+' /proc/cpuinfo)
-    if [ "0$power" -gt 7 ]; then
-      # VSX started with POWER8
-      true_arch='ppc-64-vsx'
-    else
-      true_arch='ppc-64-altivec'
-    fi
-  else
-    true_arch='ppc-64'
-  fi
-}
-
-# Check the system type
-uname_s=$(uname -s)
-uname_m=$(uname -m)
-case $uname_s in
-  'Darwin') # Mac OSX system
-    case $uname_m in
-      'arm64')
-        true_arch='apple-silicon'
-        file_arch='m1-apple-silicon'
-        ;;
-      'x86_64')
-        flags=$(sysctl -n machdep.cpu.features machdep.cpu.leaf7_features | tr '\n' ' ' | tr '[:upper:]' '[:lower:]' | tr -d '_.')
-        set_arch_x86_64
-        if [ "$true_arch" = 'x86-64-vnni256' ] || [ "$true_arch" = 'x86-64-avx512' ]; then
-           file_arch='x86-64-bmi2'
-        fi
-        ;;
-    esac
-    file_os='macos'
-    file_ext='tar'
-    ;;
-  'Linux') # Linux system
-    get_flags
-    case $uname_m in
-      'x86_64')
-        file_os='ubuntu'
-        check_znver_1_2
-        set_arch_x86_64
-        ;;
-      'i686')
-        file_os='ubuntu'
-        true_arch='x86-32'
-        ;;
-      'ppc64'*)
-        file_os='ubuntu'
-        set_arch_ppc_64
-        ;;
-      'aarch64')
-        file_os='android'
-        true_arch='armv8'
-        if check_flags 'asimddp'; then
-          true_arch="$true_arch-dotprod"
-        fi
-        ;;
-      'armv7'*)
-        file_os='android'
-        true_arch='armv7'
-        if check_flags 'neon'; then
-          true_arch="$true_arch-neon"
-        fi
-        ;;
-      'loongarch64'*)
-        file_os='linux'
-        set_arch_loongarch64
-        ;;
-      *) # Unsupported machine type, exit with error
-        printf 'Unsupported machine type: %s\n' "$uname_m"
-        exit 1
-        ;;
-    esac
-    file_ext='tar'
-    ;;
-  'MINGW'*'ARM64'*) # Windows ARM64 system with POSIX compatibility layer
-    # TODO: older chips might be armv8, but we have no good way to detect, /proc/cpuinfo shows x86 info
-    file_os='windows'
-    true_arch='armv8-dotprod'
-    file_ext='zip'
-    ;;
-  'CYGWIN'*|'MINGW'*|'MSYS'*) # Windows x86_64system with POSIX compatibility layer
-    get_flags
-    check_znver_1_2
-    set_arch_x86_64
-    file_os='windows'
-    file_ext='zip'
-    ;;
-  *)
-    # Unknown system type, exit with error
-    printf 'Unsupported system type: %s\n' "$uname_s"
-    exit 1
-    ;;
-esac
-
-if [ -z "$file_arch" ]; then
-  file_arch=$true_arch
-fi
-
-file_name="stockfish-$file_os-$file_arch.$file_ext"
-
-printf '%s %s\n' "$true_arch" "$file_name"
--- a/scripts/net.sh
+++ b/scripts/net.sh
@@ -1,76 +0,0 @@
-#!/bin/sh
-
-wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -qO-") || \
-                (command -v curl > /dev/null 2>&1 && echo "curl -skL"))
-
-
-sha256sum=$( (command -v shasum > /dev/null 2>&1 && echo "shasum -a 256") || \
-             (command -v sha256sum > /dev/null 2>&1 && echo "sha256sum"))
-
-if [ -z "$sha256sum" ]; then
-  >&2 echo "sha256sum not found, NNUE files will be assumed valid."
-fi
-
-get_nnue_filename() {
-  grep "$1" evaluate.h | grep "#define" | sed "s/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/"
-}
-
-validate_network() {
-  # If no sha256sum command is available, assume the file is always valid.
-  if [ -n "$sha256sum" ] && [ -f "$1" ]; then
-    if [ "$1" != "nn-$($sha256sum "$1" | cut -c 1-12).nnue" ]; then
-      rm -f "$1"
-      return 1
-    fi
-  fi
-}
-
-fetch_network() {
-  _filename="$(get_nnue_filename "$1")"
-
-  if [ -z "$_filename" ]; then
-    >&2 echo "NNUE file name not found for: $1"
-    return 1
-  fi
-
-  if [ -f "$_filename" ]; then
-    if validate_network "$_filename"; then
-      echo "Existing $_filename validated, skipping download"
-      return
-    else
-      echo "Removing invalid NNUE file: $_filename"
-    fi
-  fi
-
-  if [ -z "$wget_or_curl" ]; then
-    >&2 printf "%s\n" "Neither wget or curl is installed." \
-          "Install one of these tools to download NNUE files automatically."
-    exit 1
-  fi
-
-  for url in \
-    "https://tests.stockfishchess.org/api/nn/$_filename" \
-    "https://github.com/official-stockfish/networks/raw/master/$_filename"; do
-    echo "Downloading from $url ..."
-    if $wget_or_curl "$url" > "$_filename"; then
-      if validate_network "$_filename"; then
-        echo "Successfully validated $_filename"
-      else
-        echo "Downloaded $_filename is invalid"
-        continue
-      fi
-    else
-      echo "Failed to download from $url"
-    fi
-    if [ -f "$_filename" ]; then
-      return
-    fi
-  done
-
-  # Download was not successful in the loop, return false.
-  >&2 echo "Failed to download $_filename"
-  return 1
-}
-
-fetch_network EvalFileDefaultNameBig && \
-fetch_network EvalFileDefaultNameSmall
--- a/src/Makefile
+++ b/src/Makefile
--- a/src/benchmark.cpp
+++ b/src/benchmark.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,18 +16,18 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "benchmark.h"
-#include "numa.h"
-
-#include <cstdlib>
 #include <fstream>
 #include <iostream>
+#include <istream>
 #include <vector>

+#include "position.h"
+
+using namespace std;
+
 namespace {

-// clang-format off
-const std::vector<std::string> Defaults = {
+const vector<string> Defaults = {
  "setoption name UCI_Chess960 value false",
  "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1",
  "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10",
@@ -87,426 +87,73 @@ const std::vector<std::string> Defaults = {
  // Chess 960
  "setoption name UCI_Chess960 value true",
  "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
-  "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
  "setoption name UCI_Chess960 value false"
 };
-// clang-format on

-// clang-format off
-// human-randomly picked 5 games with <60 moves from
-// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0
-// only moves for one side
-const std::vector<std::vector<std::string>> BenchmarkPositions = {
-    {
-        "rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8",
-        "rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9",
-        "r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10",
-        "r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11",
-        "r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12",
-        "r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13",
-        "r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14",
-        "r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15",
-        "r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16",
-        "r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17",
-        "r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18",
-        "r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19",
-        "1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20",
-        "1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21",
-        "1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22",
-        "1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23",
-        "1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24",
-        "1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25",
-        "1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26",
-        "1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27",
-        "1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28",
-        "1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29",
-        "1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30",
-        "1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31",
-        "3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32",
-        "3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33",
-        "8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34",
-        "8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35",
-        "8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36",
-        "1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37",
-        "8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38",
-        "1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39",
-        "1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40",
-        "1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41",
-        "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42",
-        "5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43",
-        "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44",
-        "5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45",
-        "8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46",
-        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47",
-        "3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48",
-        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49",
-        "3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50",
-        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51",
-        "3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52",
-        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53",
-        "3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54",
-        "3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55",
-        "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56",
-        "8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57",
-        "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58",
-        "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59",
-        "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60",
-        "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61",
-        "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62",
-        "8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63",
-        "4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64",
-    },
-    {
-        "r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6",
-        "r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7",
-        "r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8",
-        "r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9",
-        "r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10",
-        "r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11",
-        "3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12",
-        "q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13",
-        "r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14",
-        "r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15",
-        "r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16",
-        "r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17",
-        "r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18",
-        "r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19",
-        "r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20",
-        "r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21",
-        "rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22",
-        "1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23",
-        "1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24",
-        "8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25",
-        "8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26",
-        "2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27",
-        "8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28",
-        "8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29",
-        "8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30",
-        "8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31",
-        "8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32",
-        "8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33",
-        "8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34",
-        "8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35",
-        "8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36",
-        "8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37",
-        "8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38",
-        "8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39",
-        "8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40",
-        "8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41",
-        "8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42",
-        "8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43",
-        "8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44",
-        "8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45",
-        "8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46",
-        "8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47",
-        "8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48",
-        "8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49",
-        "2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50",
-        "2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51",
-        "2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52",
-        "3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53",
-        "3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54",
-        "3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55",
-        "3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56",
-        "3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57",
-    },
-    {
-        "rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8",
-        "rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9",
-        "rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10",
-        "r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11",
-        "r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12",
-        "r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13",
-        "r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14",
-        "r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15",
-        "r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16",
-        "r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17",
-        "r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18",
-        "r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19",
-        "r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20",
-        "r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21",
-        "r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22",
-        "r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23",
-        "r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24",
-        "r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25",
-        "r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26",
-        "r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27",
-        "5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28",
-        "5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29",
-        "4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30",
-        "4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31",
-        "4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32",
-        "4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33",
-        "4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34",
-        "4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35",
-        "4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36",
-        "4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37",
-        "4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38",
-        "5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39",
-        "5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40",
-        "5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41",
-        "5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42",
-        "1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43",
-        "1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44",
-        "1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45",
-        "1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46",
-        "1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47",
-        "5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48",
-        "5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49",
-        "5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50",
-        "8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51",
-        "8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52",
-        "8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53",
-        "8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54",
-        "8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55",
-        "8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56",
-    },
-    {
-        "rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7",
-        "r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8",
-        "r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9",
-        "r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10",
-        "r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11",
-        "r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12",
-        "r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13",
-        "r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14",
-        "r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15",
-        "r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16",
-        "r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17",
-        "r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18",
-        "r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19",
-        "r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20",
-        "r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21",
-        "2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22",
-        "2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23",
-        "2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24",
-        "2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25",
-        "2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26",
-        "r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27",
-        "rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28",
-        "rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29",
-        "rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30",
-        "rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31",
-        "rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32",
-        "rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33",
-        "rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34",
-        "rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35",
-        "1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36",
-        "1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37",
-        "1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38",
-        "1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39",
-        "1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40",
-        "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41",
-        "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42",
-        "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43",
-        "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44",
-        "1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45",
-        "2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46",
-        "Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47",
-    },
-    {
-        "rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6",
-        "r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7",
-        "r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8",
-        "r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9",
-        "r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10",
-        "r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11",
-        "r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12",
-        "r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13",
-        "r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14",
-        "r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15",
-        "r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16",
-        "2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17",
-        "2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18",
-        "2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19",
-        "2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20",
-        "2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21",
-        "2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22",
-        "2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23",
-        "2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24",
-        "2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25",
-        "2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26",
-        "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27",
-        "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28",
-        "3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29",
-        "3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30",
-        "3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31",
-        "3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32",
-        "6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33",
-        "6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34",
-        "6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35",
-        "6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36",
-        "6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37",
-        "6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38",
-        "6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39",
-        "6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40",
-        "6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41",
-        "6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42",
-        "6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43",
-        "6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44",
-        "8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45",
-        "3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46",
-        "8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47",
-        "8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48",
-        "8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49",
-        "8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50",
-        "8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51",
-        "8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52",
-        "2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53",
-        "6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54",
-        "R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55",
-        "R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56",
-        "8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57",
-        "8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58",
-        "8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59",
-        "8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60",
-        "8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61",
-        "8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62",
-        "8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63",
-        "8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64",
-    }
-};
-// clang-format on
+} // namespace

-}  // namespace
+/// setup_bench() builds a list of UCI commands to be run by bench. There
+/// are five parameters: TT size in MB, number of search threads that
+/// should be used, the limit value spent for each position, a file name
+/// where to look for positions in FEN format and the type of the limit:
+/// depth, perft, nodes and movetime (in millisecs).
+///
+/// bench -> search default positions up to depth 13
+/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
+/// bench 64 4 5000 current movetime -> search current position with 4 threads for 5 sec
+/// bench 64 1 100000 default nodes -> search default positions for 100K nodes each
+/// bench 16 1 5 default perft -> run a perft 5 on default positions

-namespace Stockfish::Benchmark {
+vector<string> setup_bench(const Position& current, istream& is) {

-// Builds a list of UCI commands to be run by bench. There
-// are five parameters: TT size in MB, number of search threads that
-// should be used, the limit value spent for each position, a file name
-// where to look for positions in FEN format, and the type of the limit:
-// depth, perft, nodes and movetime (in milliseconds). Examples:
-//
-// bench                            : search default positions up to depth 13
-// bench 64 1 15                    : search default positions up to depth 15 (TT = 64MB)
-// bench 64 1 100000 default nodes  : search default positions for 100K nodes each
-// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec
-// bench 16 1 5 blah perft          : run a perft 5 on positions in file "blah"
-std::vector<std::string> setup_bench(const std::string& currentFen, std::istream& is) {
+  vector<string> fens, list;
+  string go, token;

-    std::vector<std::string> fens, list;
-    std::string              go, token;
+  // Assign default values to missing arguments
+  string ttSize    = (is >> token) ? token : "16";
+  string threads   = (is >> token) ? token : "1";
+  string limit     = (is >> token) ? token : "13";
+  string fenFile   = (is >> token) ? token : "default";
+  string limitType = (is >> token) ? token : "depth";

-    // Assign default values to missing arguments
-    std::string ttSize    = (is >> token) ? token : "16";
-    std::string threads   = (is >> token) ? token : "1";
-    std::string limit     = (is >> token) ? token : "13";
-    std::string fenFile   = (is >> token) ? token : "default";
-    std::string limitType = (is >> token) ? token : "depth";
+  go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;

-    go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
+  if (fenFile == "default")
+      fens = Defaults;

-    if (fenFile == "default")
-        fens = Defaults;
+  else if (fenFile == "current")
+      fens.push_back(current.fen());

-    else if (fenFile == "current")
-        fens.push_back(currentFen);
+  else
+  {
+      string fen;
+      ifstream file(fenFile);

-    else
-    {
-        std::string   fen;
-        std::ifstream file(fenFile);
+      if (!file.is_open())
+      {
+          cerr << "Unable to open file " << fenFile << endl;
+          exit(EXIT_FAILURE);
+      }

-        if (!file.is_open())
-        {
-            std::cerr << "Unable to open file " << fenFile << std::endl;
-            exit(EXIT_FAILURE);
-        }
+      while (getline(file, fen))
+          if (!fen.empty())
+              fens.push_back(fen);

-        while (getline(file, fen))
-            if (!fen.empty())
-                fens.push_back(fen);
+      file.close();
+  }

-        file.close();
-    }
+  list.emplace_back("setoption name Threads value " + threads);
+  list.emplace_back("setoption name Hash value " + ttSize);
+  list.emplace_back("ucinewgame");

-    list.emplace_back("setoption name Threads value " + threads);
-    list.emplace_back("setoption name Hash value " + ttSize);
-    list.emplace_back("ucinewgame");
+  for (const string& fen : fens)
+      if (fen.find("setoption") != string::npos)
+          list.emplace_back(fen);
+      else
+      {
+          list.emplace_back("position fen " + fen);
+          list.emplace_back(go);
+      }

-    for (const std::string& fen : fens)
-        if (fen.find("setoption") != std::string::npos)
-            list.emplace_back(fen);
-        else
-        {
-            list.emplace_back("position fen " + fen);
-            list.emplace_back(go);
-        }
-
-    return list;
+  return list;
 }
-
-BenchmarkSetup setup_benchmark(std::istream& is) {
-    // TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions
-    // for the current sequence have been searched.
-    static constexpr int TT_SIZE_PER_THREAD = 128;
-
-    static constexpr int DEFAULT_DURATION_S = 150;
-
-    BenchmarkSetup setup{};
-
-    // Assign default values to missing arguments
-    int desiredTimeS;
-
-    if (!(is >> setup.threads))
-        setup.threads = get_hardware_concurrency();
-    else
-        setup.originalInvocation += std::to_string(setup.threads);
-
-    if (!(is >> setup.ttSize))
-        setup.ttSize = TT_SIZE_PER_THREAD * setup.threads;
-    else
-        setup.originalInvocation += " " + std::to_string(setup.ttSize);
-
-    if (!(is >> desiredTimeS))
-        desiredTimeS = DEFAULT_DURATION_S;
-    else
-        setup.originalInvocation += " " + std::to_string(desiredTimeS);
-
-    setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize)
-                            + " " + std::to_string(desiredTimeS);
-
-    auto getCorrectedTime = [&](int ply) {
-        // time per move is fit roughly based on LTC games
-        // seconds = 50/{ply+15}
-        // ms = 50000/{ply+15}
-        // with this fit 10th move gets 2000ms
-        // adjust for desired 10th move time
-        return 50000.0 / (static_cast<double>(ply) + 15.0);
-    };
-
-    float totalTime = 0;
-    for (const auto& game : BenchmarkPositions)
-    {
-        setup.commands.emplace_back("ucinewgame");
-        int ply = 1;
-        for (int i = 0; i < static_cast<int>(game.size()); ++i)
-        {
-            const float correctedTime = getCorrectedTime(ply);
-            totalTime += correctedTime;
-            ply += 1;
-        }
-    }
-
-    float timeScaleFactor = static_cast<float>(desiredTimeS * 1000) / totalTime;
-
-    for (const auto& game : BenchmarkPositions)
-    {
-        setup.commands.emplace_back("ucinewgame");
-        int ply = 1;
-        for (const std::string& fen : game)
-        {
-            setup.commands.emplace_back("position fen " + fen);
-
-            const int correctedTime = static_cast<int>(getCorrectedTime(ply) * timeScaleFactor);
-            setup.commands.emplace_back("go movetime " + std::to_string(correctedTime));
-
-            ply += 1;
-        }
-    }
-
-    return setup;
-}
-
-}  // namespace Stockfish
--- a/src/bitbase.cpp
+++ b/src/bitbase.cpp
@@ -0,0 +1,170 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+#include <vector>
+#include <bitset>
+
+#include "bitboard.h"
+#include "types.h"
+
+namespace {
+
+  // There are 24 possible pawn squares: files A to D and ranks from 2 to 7.
+  // Positions with the pawn on files E to H will be mirrored before probing.
+  constexpr unsigned MAX_INDEX = 2*24*64*64; // stm * psq * wksq * bksq = 196608
+
+  std::bitset<MAX_INDEX> KPKBitbase;
+
+  // A KPK bitbase index is an integer in [0, IndexMax] range
+  //
+  // Information is mapped in a way that minimizes the number of iterations:
+  //
+  // bit  0- 5: white king square (from SQ_A1 to SQ_H8)
+  // bit  6-11: black king square (from SQ_A1 to SQ_H8)
+  // bit    12: side to move (WHITE or BLACK)
+  // bit 13-14: white pawn file (from FILE_A to FILE_D)
+  // bit 15-17: white pawn RANK_7 - rank (from RANK_7 - RANK_7 to RANK_7 - RANK_2)
+  unsigned index(Color stm, Square bksq, Square wksq, Square psq) {
+    return int(wksq) | (bksq << 6) | (stm << 12) | (file_of(psq) << 13) | ((RANK_7 - rank_of(psq)) << 15);
+  }
+
+  enum Result {
+    INVALID = 0,
+    UNKNOWN = 1,
+    DRAW    = 2,
+    WIN     = 4
+  };
+
+  Result& operator|=(Result& r, Result v) { return r = Result(r | v); }
+
+  struct KPKPosition {
+    KPKPosition() = default;
+    explicit KPKPosition(unsigned idx);
+    operator Result() const { return result; }
+    Result classify(const std::vector<KPKPosition>& db);
+
+    Color stm;
+    Square ksq[COLOR_NB], psq;
+    Result result;
+  };
+
+} // namespace
+
+
+bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) {
+
+  assert(file_of(wpsq) <= FILE_D);
+
+  return KPKBitbase[index(stm, bksq, wksq, wpsq)];
+}
+
+
+void Bitbases::init() {
+
+  std::vector<KPKPosition> db(MAX_INDEX);
+  unsigned idx, repeat = 1;
+
+  // Initialize db with known win / draw positions
+  for (idx = 0; idx < MAX_INDEX; ++idx)
+      db[idx] = KPKPosition(idx);
+
+  // Iterate through the positions until none of the unknown positions can be
+  // changed to either wins or draws (15 cycles needed).
+  while (repeat)
+      for (repeat = idx = 0; idx < MAX_INDEX; ++idx)
+          repeat |= (db[idx] == UNKNOWN && db[idx].classify(db) != UNKNOWN);
+
+  // Fill the bitbase with the decisive results
+  for (idx = 0; idx < MAX_INDEX; ++idx)
+      if (db[idx] == WIN)
+          KPKBitbase.set(idx);
+}
+
+
+namespace {
+
+  KPKPosition::KPKPosition(unsigned idx) {
+
+    ksq[WHITE] = Square((idx >>  0) & 0x3F);
+    ksq[BLACK] = Square((idx >>  6) & 0x3F);
+    stm        = Color ((idx >> 12) & 0x01);
+    psq        = make_square(File((idx >> 13) & 0x3), Rank(RANK_7 - ((idx >> 15) & 0x7)));
+
+    // Invalid if two pieces are on the same square or if a king can be captured
+    if (   distance(ksq[WHITE], ksq[BLACK]) <= 1
+        || ksq[WHITE] == psq
+        || ksq[BLACK] == psq
+        || (stm == WHITE && (pawn_attacks_bb(WHITE, psq) & ksq[BLACK])))
+        result = INVALID;
+
+    // Win if the pawn can be promoted without getting captured
+    else if (   stm == WHITE
+             && rank_of(psq) == RANK_7
+             && ksq[WHITE] != psq + NORTH
+             && (    distance(ksq[BLACK], psq + NORTH) > 1
+                 || (distance(ksq[WHITE], psq + NORTH) == 1)))
+        result = WIN;
+
+    // Draw if it is stalemate or the black king can capture the pawn
+    else if (   stm == BLACK
+             && (  !(attacks_bb<KING>(ksq[BLACK]) & ~(attacks_bb<KING>(ksq[WHITE]) | pawn_attacks_bb(WHITE, psq)))
+                 || (attacks_bb<KING>(ksq[BLACK]) & ~attacks_bb<KING>(ksq[WHITE]) & psq)))
+        result = DRAW;
+
+    // Position will be classified later
+    else
+        result = UNKNOWN;
+  }
+
+  Result KPKPosition::classify(const std::vector<KPKPosition>& db) {
+
+    // White to move: If one move leads to a position classified as WIN, the result
+    // of the current position is WIN. If all moves lead to positions classified
+    // as DRAW, the current position is classified as DRAW, otherwise the current
+    // position is classified as UNKNOWN.
+    //
+    // Black to move: If one move leads to a position classified as DRAW, the result
+    // of the current position is DRAW. If all moves lead to positions classified
+    // as WIN, the position is classified as WIN, otherwise the current position is
+    // classified as UNKNOWN.
+    const Result Good = (stm == WHITE ? WIN   : DRAW);
+    const Result Bad  = (stm == WHITE ? DRAW  : WIN);
+
+    Result r = INVALID;
+    Bitboard b = attacks_bb<KING>(ksq[stm]);
+
+    while (b)
+        r |= stm == WHITE ? db[index(BLACK, ksq[BLACK] , pop_lsb(&b), psq)]
+                          : db[index(WHITE, pop_lsb(&b),  ksq[WHITE], psq)];
+
+    if (stm == WHITE)
+    {
+        if (rank_of(psq) < RANK_7)      // Single push
+            r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH)];
+
+        if (   rank_of(psq) == RANK_2   // Double push
+            && psq + NORTH != ksq[WHITE]
+            && psq + NORTH != ksq[BLACK])
+            r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH + NORTH)];
+    }
+
+    return result = r & Good  ? Good  : r & UNKNOWN ? UNKNOWN : Bad;
+  }
+
+} // namespace
--- a/src/bitboard.cpp
+++ b/src/bitboard.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,171 +16,148 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "bitboard.h"
-
 #include <algorithm>
 #include <bitset>
-#include <initializer_list>

+#include "bitboard.h"
 #include "misc.h"

-namespace Stockfish {
-
 uint8_t PopCnt16[1 << 16];
 uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

+Bitboard SquareBB[SQUARE_NB];
 Bitboard LineBB[SQUARE_NB][SQUARE_NB];
-Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
 Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
+Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];

-alignas(64) Magic Magics[SQUARE_NB][2];
+Magic RookMagics[SQUARE_NB];
+Magic BishopMagics[SQUARE_NB];

 namespace {

-Bitboard RookTable[0x19000];   // To store rook attacks
-Bitboard BishopTable[0x1480];  // To store bishop attacks
+  Bitboard RookTable[0x19000];  // To store rook attacks
+  Bitboard BishopTable[0x1480]; // To store bishop attacks

-void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]);
-
-// Returns the bitboard of target square for the given step
-// from the given square. If the step is off the board, returns empty bitboard.
-Bitboard safe_destination(Square s, int step) {
-    Square to = Square(s + step);
-    return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
-}
-}
-
-// Returns an ASCII representation of a bitboard suitable
-// to be printed to standard output. Useful for debugging.
-std::string Bitboards::pretty(Bitboard b) {
-
-    std::string s = "+---+---+---+---+---+---+---+---+\n";
-
-    for (Rank r = RANK_8; r >= RANK_1; --r)
-    {
-        for (File f = FILE_A; f <= FILE_H; ++f)
-            s += b & make_square(f, r) ? "| X " : "|   ";
-
-        s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n";
-    }
-    s += "  a   b   c   d   e   f   g   h\n";
-
-    return s;
+  void init_magics(PieceType pt, Bitboard table[], Magic magics[]);
 }


-// Initializes various bitboard tables. It is called at
-// startup and relies on global objects to be already zero-initialized.
+/// Bitboards::pretty() returns an ASCII representation of a bitboard suitable
+/// to be printed to standard output. Useful for debugging.
+
+const std::string Bitboards::pretty(Bitboard b) {
+
+  std::string s = "+---+---+---+---+---+---+---+---+\n";
+
+  for (Rank r = RANK_8; r >= RANK_1; --r)
+  {
+      for (File f = FILE_A; f <= FILE_H; ++f)
+          s += b & make_square(f, r) ? "| X " : "|   ";
+
+      s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n";
+  }
+  s += "  a   b   c   d   e   f   g   h\n";
+
+  return s;
+}
+
+
+/// Bitboards::init() initializes various bitboard tables. It is called at
+/// startup and relies on global objects to be already zero-initialized.
+
 void Bitboards::init() {

-    for (unsigned i = 0; i < (1 << 16); ++i)
-        PopCnt16[i] = uint8_t(std::bitset<16>(i).count());
+  for (unsigned i = 0; i < (1 << 16); ++i)
+      PopCnt16[i] = uint8_t(std::bitset<16>(i).count());

-    for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
-        for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
-            SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));
+  for (Square s = SQ_A1; s <= SQ_H8; ++s)
+      SquareBB[s] = (1ULL << s);

-    init_magics(ROOK, RookTable, Magics);
-    init_magics(BISHOP, BishopTable, Magics);
+  for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
+      for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
+          SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));

-    for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
-    {
-        PseudoAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
-        PseudoAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));
+  init_magics(ROOK, RookTable, RookMagics);
+  init_magics(BISHOP, BishopTable, BishopMagics);

-        for (int step : {-9, -8, -7, -1, 1, 7, 8, 9})
-            PseudoAttacks[KING][s1] |= safe_destination(s1, step);
+  for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
+  {
+      PawnAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
+      PawnAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));

-        for (int step : {-17, -15, -10, -6, 6, 10, 15, 17})
-            PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step);
+      for (int step : {-9, -8, -7, -1, 1, 7, 8, 9} )
+         PseudoAttacks[KING][s1] |= safe_destination(s1, step);

-        PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb<BISHOP>(s1, 0);
-        PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ROOK][s1]  = attacks_bb<ROOK>(s1, 0);
+      for (int step : {-17, -15, -10, -6, 6, 10, 15, 17} )
+         PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step);

-        for (PieceType pt : {BISHOP, ROOK})
-            for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
-            {
-                if (PseudoAttacks[pt][s1] & s2)
-                {
-                    LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
-                    BetweenBB[s1][s2] =
-                      (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1)));
-                }
-                BetweenBB[s1][s2] |= s2;
-            }
-    }
+      PseudoAttacks[QUEEN][s1]  = PseudoAttacks[BISHOP][s1] = attacks_bb<BISHOP>(s1, 0);
+      PseudoAttacks[QUEEN][s1] |= PseudoAttacks[  ROOK][s1] = attacks_bb<  ROOK>(s1, 0);
+
+      for (PieceType pt : { BISHOP, ROOK })
+          for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
+              if (PseudoAttacks[pt][s1] & s2)
+                  LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
+  }
 }

+
 namespace {

-Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
+  Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {

-    Bitboard  attacks             = 0;
-    Direction RookDirections[4]   = {NORTH, SOUTH, EAST, WEST};
+    Bitboard attacks = 0;
+    Direction   RookDirections[4] = {NORTH, SOUTH, EAST, WEST};
    Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST};

-    for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
+    for(Direction d : (pt == ROOK ? RookDirections : BishopDirections))
    {
        Square s = sq;
-        while (safe_destination(s, d))
-        {
+        while(safe_destination(s, d) && !(occupied & s))
            attacks |= (s += d);
-            if (occupied & s)
-            {
-                break;
-            }
-        }
    }

    return attacks;
-}
+  }


-// Computes all rook and bishop attacks at startup. Magic
-// bitboards are used to look up attacks of sliding pieces. As a reference see
-// https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use
-// the so called "fancy" approach.
-void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {
+  // init_magics() computes all rook and bishop attacks at startup. Magic
+  // bitboards are used to look up attacks of sliding pieces. As a reference see
+  // www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so
+  // called "fancy" approach.
+
+  void init_magics(PieceType pt, Bitboard table[], Magic magics[]) {

-#ifndef USE_PEXT
    // Optimal PRNG seeds to pick the correct magics in the shortest time
-    int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020},
-                            {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}};
+    int seeds[][RANK_NB] = { { 8977, 44560, 54343, 38998,  5731, 95205, 104912, 17020 },
+                             {  728, 10316, 55013, 32803, 12281, 15100,  16645,   255 } };

-    Bitboard occupancy[4096];
-    int      epoch[4096] = {}, cnt = 0;
-#endif
-    Bitboard reference[4096];
-    int      size = 0;
+    Bitboard occupancy[4096], reference[4096], edges, b;
+    int epoch[4096] = {}, cnt = 0, size = 0;

    for (Square s = SQ_A1; s <= SQ_H8; ++s)
    {
        // Board edges are not considered in the relevant occupancies
-        Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));
+        edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));

        // Given a square 's', the mask is the bitboard of sliding attacks from
        // 's' computed on an empty board. The index must be big enough to contain
        // all the attacks for each possible subset of the mask and so is 2 power
        // the number of 1s of the mask. Hence we deduce the size of the shift to
        // apply to the 64 or 32 bits word to get the index.
-        Magic& m = magics[s][pt - BISHOP];
-        m.mask   = sliding_attack(pt, s, 0) & ~edges;
-#ifndef USE_PEXT
+        Magic& m = magics[s];
+        m.mask  = sliding_attack(pt, s, 0) & ~edges;
        m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask);
-#endif
+
        // Set the offset for the attacks table of the square. We have individual
        // table sizes for each square with "Fancy Magic Bitboards".
-        m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size;
-        size      = 0;
+        m.attacks = s == SQ_A1 ? table : magics[s - 1].attacks + size;

        // Use Carry-Rippler trick to enumerate all subsets of masks[s] and
        // store the corresponding sliding attack bitboard in reference[].
-        Bitboard b = 0;
-        do
-        {
-#ifndef USE_PEXT
+        b = size = 0;
+        do {
            occupancy[size] = b;
-#endif
            reference[size] = sliding_attack(pt, s, b);

            if (HasPext)
@@ -190,14 +167,16 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {
            b = (b - m.mask) & m.mask;
        } while (b);

-#ifndef USE_PEXT
+        if (HasPext)
+            continue;
+
        PRNG rng(seeds[Is64Bit][rank_of(s)]);

        // Find a magic for square 's' picking up an (almost) random number
        // until we find the one that passes the verification test.
-        for (int i = 0; i < size;)
+        for (int i = 0; i < size; )
        {
-            for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;)
+            for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6; )
                m.magic = rng.sparse_rand<Bitboard>();

            // A good magic must map every possible occupancy to an index that
@@ -212,16 +191,13 @@ void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {

                if (epoch[idx] < cnt)
                {
-                    epoch[idx]     = cnt;
+                    epoch[idx] = cnt;
                    m.attacks[idx] = reference[i];
                }
                else if (m.attacks[idx] != reference[i])
                    break;
            }
        }
-#endif
    }
+  }
 }
-}
-
-}  // namespace Stockfish
--- a/src/bitboard.h
+++ b/src/bitboard.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,24 +19,26 @@
 #ifndef BITBOARD_H_INCLUDED
 #define BITBOARD_H_INCLUDED

-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include <cstdint>
-#include <cstdlib>
 #include <string>

 #include "types.h"

-namespace Stockfish {
+namespace Bitbases {
+
+void init();
+bool probe(Square wksq, Square wpsq, Square bksq, Color us);
+
+}

 namespace Bitboards {

-void        init();
-std::string pretty(Bitboard b);
+void init();
+const std::string pretty(Bitboard b);

-}  // namespace Stockfish::Bitboards
+}
+
+constexpr Bitboard AllSquares = ~Bitboard(0);
+constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL;

 constexpr Bitboard FileABB = 0x0101010101010101ULL;
 constexpr Bitboard FileBBB = FileABB << 1;
@@ -56,313 +58,386 @@ constexpr Bitboard Rank6BB = Rank1BB << (8 * 5);
 constexpr Bitboard Rank7BB = Rank1BB << (8 * 6);
 constexpr Bitboard Rank8BB = Rank1BB << (8 * 7);

+constexpr Bitboard QueenSide   = FileABB | FileBBB | FileCBB | FileDBB;
+constexpr Bitboard CenterFiles = FileCBB | FileDBB | FileEBB | FileFBB;
+constexpr Bitboard KingSide    = FileEBB | FileFBB | FileGBB | FileHBB;
+constexpr Bitboard Center      = (FileDBB | FileEBB) & (Rank4BB | Rank5BB);
+
+constexpr Bitboard KingFlank[FILE_NB] = {
+  QueenSide ^ FileDBB, QueenSide, QueenSide,
+  CenterFiles, CenterFiles,
+  KingSide, KingSide, KingSide ^ FileEBB
+};
+
 extern uint8_t PopCnt16[1 << 16];
 extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

-extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
+extern Bitboard SquareBB[SQUARE_NB];
 extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
 extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
+extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];


-// Magic holds all magic bitboards relevant data for a single square
+/// Magic holds all magic bitboards relevant data for a single square
 struct Magic {
-    Bitboard  mask;
-    Bitboard* attacks;
-#ifndef USE_PEXT
-    Bitboard magic;
-    unsigned shift;
-#endif
+  Bitboard  mask;
+  Bitboard  magic;
+  Bitboard* attacks;
+  unsigned  shift;

-    // Compute the attack's index using the 'magic bitboards' approach
-    unsigned index(Bitboard occupied) const {
+  // Compute the attack's index using the 'magic bitboards' approach
+  unsigned index(Bitboard occupied) const {

-#ifdef USE_PEXT
+    if (HasPext)
        return unsigned(pext(occupied, mask));
-#else
-        if (Is64Bit)
-            return unsigned(((occupied & mask) * magic) >> shift);

-        unsigned lo = unsigned(occupied) & unsigned(mask);
-        unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
-        return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
-#endif
-    }
+    if (Is64Bit)
+        return unsigned(((occupied & mask) * magic) >> shift);

-    Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; }
+    unsigned lo = unsigned(occupied) & unsigned(mask);
+    unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
+    return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
+  }
 };

-extern Magic Magics[SQUARE_NB][2];
+extern Magic RookMagics[SQUARE_NB];
+extern Magic BishopMagics[SQUARE_NB];

-constexpr Bitboard square_bb(Square s) {
-    assert(is_ok(s));
-    return (1ULL << s);
+inline Bitboard square_bb(Square s) {
+  assert(is_ok(s));
+  return SquareBB[s];
 }


-// Overloads of bitwise operators between a Bitboard and a Square for testing
-// whether a given bit is set in a bitboard, and for setting and clearing bits.
+/// Overloads of bitwise operators between a Bitboard and a Square for testing
+/// whether a given bit is set in a bitboard, and for setting and clearing bits.

-constexpr Bitboard  operator&(Bitboard b, Square s) { return b & square_bb(s); }
-constexpr Bitboard  operator|(Bitboard b, Square s) { return b | square_bb(s); }
-constexpr Bitboard  operator^(Bitboard b, Square s) { return b ^ square_bb(s); }
-constexpr Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
-constexpr Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }
+inline Bitboard  operator&( Bitboard  b, Square s) { return b &  square_bb(s); }
+inline Bitboard  operator|( Bitboard  b, Square s) { return b |  square_bb(s); }
+inline Bitboard  operator^( Bitboard  b, Square s) { return b ^  square_bb(s); }
+inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
+inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }

-constexpr Bitboard operator&(Square s, Bitboard b) { return b & s; }
-constexpr Bitboard operator|(Square s, Bitboard b) { return b | s; }
-constexpr Bitboard operator^(Square s, Bitboard b) { return b ^ s; }
+inline Bitboard  operator&(Square s, Bitboard b) { return b & s; }
+inline Bitboard  operator|(Square s, Bitboard b) { return b | s; }
+inline Bitboard  operator^(Square s, Bitboard b) { return b ^ s; }

-constexpr Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
+inline Bitboard  operator|(Square s1, Square s2) { return square_bb(s1) | s2; }

-constexpr bool more_than_one(Bitboard b) { return b & (b - 1); }
+constexpr bool more_than_one(Bitboard b) {
+  return b & (b - 1);
+}


-// rank_bb() and file_bb() return a bitboard representing all the squares on
-// the given file or rank.
-
-constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); }
-
-constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); }
-
-constexpr Bitboard file_bb(File f) { return FileABB << f; }
-
-constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); }
+constexpr bool opposite_colors(Square s1, Square s2) {
+  return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1;
+}


-// Moves a bitboard one or two steps as specified by the direction D
+/// rank_bb() and file_bb() return a bitboard representing all the squares on
+/// the given file or rank.
+
+constexpr Bitboard rank_bb(Rank r) {
+  return Rank1BB << (8 * r);
+}
+
+constexpr Bitboard rank_bb(Square s) {
+  return rank_bb(rank_of(s));
+}
+
+constexpr Bitboard file_bb(File f) {
+  return FileABB << f;
+}
+
+constexpr Bitboard file_bb(Square s) {
+  return file_bb(file_of(s));
+}
+
+
+/// shift() moves a bitboard one or two steps as specified by the direction D
+
 template<Direction D>
 constexpr Bitboard shift(Bitboard b) {
-    return D == NORTH         ? b << 8
-         : D == SOUTH         ? b >> 8
-         : D == NORTH + NORTH ? b << 16
-         : D == SOUTH + SOUTH ? b >> 16
-         : D == EAST          ? (b & ~FileHBB) << 1
-         : D == WEST          ? (b & ~FileABB) >> 1
-         : D == NORTH_EAST    ? (b & ~FileHBB) << 9
-         : D == NORTH_WEST    ? (b & ~FileABB) << 7
-         : D == SOUTH_EAST    ? (b & ~FileHBB) >> 7
-         : D == SOUTH_WEST    ? (b & ~FileABB) >> 9
-                              : 0;
+  return  D == NORTH      ?  b             << 8 : D == SOUTH      ?  b             >> 8
+        : D == NORTH+NORTH?  b             <<16 : D == SOUTH+SOUTH?  b             >>16
+        : D == EAST       ? (b & ~FileHBB) << 1 : D == WEST       ? (b & ~FileABB) >> 1
+        : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7
+        : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9
+        : 0;
 }


-// Returns the squares attacked by pawns of the given color
-// from the squares in the given bitboard.
+/// pawn_attacks_bb() returns the squares attacked by pawns of the given color
+/// from the squares in the given bitboard.
+
 template<Color C>
 constexpr Bitboard pawn_attacks_bb(Bitboard b) {
-    return C == WHITE ? shift<NORTH_WEST>(b) | shift<NORTH_EAST>(b)
-                      : shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
+  return C == WHITE ? shift<NORTH_WEST>(b) | shift<NORTH_EAST>(b)
+                    : shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
+}
+
+inline Bitboard pawn_attacks_bb(Color c, Square s) {
+
+  assert(is_ok(s));
+  return PawnAttacks[c][s];
 }


-// Returns a bitboard representing an entire line (from board edge
-// to board edge) that intersects the two given squares. If the given squares
-// are not on a same file/rank/diagonal, the function returns 0. For instance,
-// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
+/// pawn_double_attacks_bb() returns the squares doubly attacked by pawns of the
+/// given color from the squares in the given bitboard.
+
+template<Color C>
+constexpr Bitboard pawn_double_attacks_bb(Bitboard b) {
+  return C == WHITE ? shift<NORTH_WEST>(b) & shift<NORTH_EAST>(b)
+                    : shift<SOUTH_WEST>(b) & shift<SOUTH_EAST>(b);
+}
+
+
+/// adjacent_files_bb() returns a bitboard representing all the squares on the
+/// adjacent files of a given square.
+
+constexpr Bitboard adjacent_files_bb(Square s) {
+  return shift<EAST>(file_bb(s)) | shift<WEST>(file_bb(s));
+}
+
+
+/// line_bb() returns a bitboard representing an entire line (from board edge
+/// to board edge) that intersects the two given squares. If the given squares
+/// are not on a same file/rank/diagonal, the function returns 0. For instance,
+/// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
+
 inline Bitboard line_bb(Square s1, Square s2) {

-    assert(is_ok(s1) && is_ok(s2));
-    return LineBB[s1][s2];
+  assert(is_ok(s1) && is_ok(s2));
+  return LineBB[s1][s2];
 }


-// Returns a bitboard representing the squares in the semi-open
-// segment between the squares s1 and s2 (excluding s1 but including s2). If the
-// given squares are not on a same file/rank/diagonal, it returns s2. For instance,
-// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but
-// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick
-// allows to generate non-king evasion moves faster: the defending piece must either
-// interpose itself to cover the check or capture the checking piece.
+/// between_bb() returns a bitboard representing squares that are linearly
+/// between the two given squares (excluding the given squares). If the given
+/// squares are not on a same file/rank/diagonal, we return 0. For instance,
+/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6.
+
 inline Bitboard between_bb(Square s1, Square s2) {
-
-    assert(is_ok(s1) && is_ok(s2));
-    return BetweenBB[s1][s2];
+  Bitboard b = line_bb(s1, s2) & ((AllSquares << s1) ^ (AllSquares << s2));
+  return b & (b - 1); //exclude lsb
 }

-// Returns true if the squares s1, s2 and s3 are aligned either on a
-// straight or on a diagonal line.
-inline bool aligned(Square s1, Square s2, Square s3) { return line_bb(s1, s2) & s3; }

+/// forward_ranks_bb() returns a bitboard representing the squares on the ranks
+/// in front of the given one, from the point of view of the given color. For instance,
+/// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2.

-// distance() functions return the distance between x and y, defined as the
-// number of steps for a king in x to reach y.
-
-template<typename T1 = Square>
-inline int distance(Square x, Square y);
-
-template<>
-inline int distance<File>(Square x, Square y) {
-    return std::abs(file_of(x) - file_of(y));
+constexpr Bitboard forward_ranks_bb(Color c, Square s) {
+  return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s)
+                    : ~Rank8BB >> 8 * relative_rank(BLACK, s);
 }

-template<>
-inline int distance<Rank>(Square x, Square y) {
-    return std::abs(rank_of(x) - rank_of(y));
+
+/// forward_file_bb() returns a bitboard representing all the squares along the
+/// line in front of the given one, from the point of view of the given color.
+
+constexpr Bitboard forward_file_bb(Color c, Square s) {
+  return forward_ranks_bb(c, s) & file_bb(s);
 }

-template<>
-inline int distance<Square>(Square x, Square y) {
-    return SquareDistance[x][y];
+
+/// pawn_attack_span() returns a bitboard representing all the squares that can
+/// be attacked by a pawn of the given color when it moves along its file, starting
+/// from the given square.
+
+constexpr Bitboard pawn_attack_span(Color c, Square s) {
+  return forward_ranks_bb(c, s) & adjacent_files_bb(s);
 }

+
+/// passed_pawn_span() returns a bitboard which can be used to test if a pawn of
+/// the given color and on the given square is a passed pawn.
+
+constexpr Bitboard passed_pawn_span(Color c, Square s) {
+  return pawn_attack_span(c, s) | forward_file_bb(c, s);
+}
+
+
+/// aligned() returns true if the squares s1, s2 and s3 are aligned either on a
+/// straight or on a diagonal line.
+
+inline bool aligned(Square s1, Square s2, Square s3) {
+  return line_bb(s1, s2) & s3;
+}
+
+
+/// distance() functions return the distance between x and y, defined as the
+/// number of steps for a king in x to reach y.
+
+template<typename T1 = Square> inline int distance(Square x, Square y);
+template<> inline int distance<File>(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); }
+template<> inline int distance<Rank>(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); }
+template<> inline int distance<Square>(Square x, Square y) { return SquareDistance[x][y]; }
+
 inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
+inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); }

-// Returns the pseudo attacks of the given piece type
-// assuming an empty board.
-template<PieceType Pt>
-inline Bitboard attacks_bb(Square s, Color c = COLOR_NB) {

-    assert((Pt != PAWN || c < COLOR_NB) && (is_ok(s)));
-    return Pt == PAWN ? PseudoAttacks[c][s] : PseudoAttacks[Pt][s];
+/// safe_destination() returns the bitboard of target square for the given step
+/// from the given square. If the step is off the board, returns empty bitboard.
+
+inline Bitboard safe_destination(Square s, int step)
+{
+    Square to = Square(s + step);
+    return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
 }


-// Returns the attacks by the given piece
-// assuming the board is occupied according to the passed Bitboard.
-// Sliding piece attacks do not continue passed an occupied square.
+/// attacks_bb(Square) returns the pseudo attacks of the give piece type
+/// assuming an empty board.
+
+template<PieceType Pt>
+inline Bitboard attacks_bb(Square s) {
+
+  assert((Pt != PAWN) && (is_ok(s)));
+
+  return PseudoAttacks[Pt][s];
+}
+
+
+/// attacks_bb(Square, Bitboard) returns the attacks by the given piece
+/// assuming the board is occupied according to the passed Bitboard.
+/// Sliding piece attacks do not continue passed an occupied square.
+
 template<PieceType Pt>
 inline Bitboard attacks_bb(Square s, Bitboard occupied) {

-    assert((Pt != PAWN) && (is_ok(s)));
+  assert((Pt != PAWN) && (is_ok(s)));

-    switch (Pt)
-    {
-    case BISHOP :
-    case ROOK :
-        return Magics[s][Pt - BISHOP].attacks_bb(occupied);
-    case QUEEN :
-        return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
-    default :
-        return PseudoAttacks[Pt][s];
-    }
+  switch (Pt)
+  {
+  case BISHOP: return BishopMagics[s].attacks[BishopMagics[s].index(occupied)];
+  case ROOK  : return   RookMagics[s].attacks[  RookMagics[s].index(occupied)];
+  case QUEEN : return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
+  default    : return PseudoAttacks[Pt][s];
+  }
 }

-// Returns the attacks by the given piece
-// assuming the board is occupied according to the passed Bitboard.
-// Sliding piece attacks do not continue passed an occupied square.
 inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) {

-    assert((pt != PAWN) && (is_ok(s)));
+  assert((pt != PAWN) && (is_ok(s)));

-    switch (pt)
-    {
-    case BISHOP :
-        return attacks_bb<BISHOP>(s, occupied);
-    case ROOK :
-        return attacks_bb<ROOK>(s, occupied);
-    case QUEEN :
-        return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
-    default :
-        return PseudoAttacks[pt][s];
-    }
+  switch (pt)
+  {
+  case BISHOP: return attacks_bb<BISHOP>(s, occupied);
+  case ROOK  : return attacks_bb<  ROOK>(s, occupied);
+  case QUEEN : return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
+  default    : return PseudoAttacks[pt][s];
+  }
 }


-// Counts the number of non-zero bits in a bitboard.
+/// popcount() counts the number of non-zero bits in a bitboard
+
 inline int popcount(Bitboard b) {

 #ifndef USE_POPCNT

-    std::uint16_t indices[4];
-    std::memcpy(indices, &b, sizeof(b));
-    return PopCnt16[indices[0]] + PopCnt16[indices[1]] + PopCnt16[indices[2]]
-         + PopCnt16[indices[3]];
+  union { Bitboard bb; uint16_t u[4]; } v = { b };
+  return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]];

-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)

-    return int(_mm_popcnt_u64(b));
+  return (int)_mm_popcnt_u64(b);

-#else  // Assumed gcc or compatible compiler
+#else // Assumed gcc or compatible compiler

-    return __builtin_popcountll(b);
+  return __builtin_popcountll(b);

 #endif
 }

-// Returns the least significant bit in a non-zero bitboard.
+
+/// lsb() and msb() return the least/most significant bit in a non-zero bitboard
+
+#if defined(__GNUC__)  // GCC, Clang, ICC
+
 inline Square lsb(Bitboard b) {
-    assert(b);
-
-#if defined(__GNUC__)  // GCC, Clang, ICX
-
-    return Square(__builtin_ctzll(b));
-
-#elif defined(_MSC_VER)
-    #ifdef _WIN64  // MSVC, WIN64
-
-    unsigned long idx;
-    _BitScanForward64(&idx, b);
-    return Square(idx);
-
-    #else  // MSVC, WIN32
-    unsigned long idx;
-
-    if (b & 0xffffffff)
-    {
-        _BitScanForward(&idx, int32_t(b));
-        return Square(idx);
-    }
-    else
-    {
-        _BitScanForward(&idx, int32_t(b >> 32));
-        return Square(idx + 32);
-    }
-    #endif
-#else  // Compiler is neither GCC nor MSVC compatible
-    #error "Compiler not supported."
-#endif
+  assert(b);
+  return Square(__builtin_ctzll(b));
 }

-// Returns the most significant bit in a non-zero bitboard.
 inline Square msb(Bitboard b) {
-    assert(b);
+  assert(b);
+  return Square(63 ^ __builtin_clzll(b));
+}

-#if defined(__GNUC__)  // GCC, Clang, ICX
+#elif defined(_MSC_VER)  // MSVC

-    return Square(63 ^ __builtin_clzll(b));
+#ifdef _WIN64  // MSVC, WIN64

-#elif defined(_MSC_VER)
-    #ifdef _WIN64  // MSVC, WIN64
+inline Square lsb(Bitboard b) {
+  assert(b);
+  unsigned long idx;
+  _BitScanForward64(&idx, b);
+  return (Square) idx;
+}

-    unsigned long idx;
-    _BitScanReverse64(&idx, b);
-    return Square(idx);
+inline Square msb(Bitboard b) {
+  assert(b);
+  unsigned long idx;
+  _BitScanReverse64(&idx, b);
+  return (Square) idx;
+}

-    #else  // MSVC, WIN32
+#else  // MSVC, WIN32

-    unsigned long idx;
+inline Square lsb(Bitboard b) {
+  assert(b);
+  unsigned long idx;
+
+  if (b & 0xffffffff) {
+      _BitScanForward(&idx, int32_t(b));
+      return Square(idx);
+  } else {
+      _BitScanForward(&idx, int32_t(b >> 32));
+      return Square(idx + 32);
+  }
+}
+
+inline Square msb(Bitboard b) {
+  assert(b);
+  unsigned long idx;
+
+  if (b >> 32) {
+      _BitScanReverse(&idx, int32_t(b >> 32));
+      return Square(idx + 32);
+  } else {
+      _BitScanReverse(&idx, int32_t(b));
+      return Square(idx);
+  }
+}

-    if (b >> 32)
-    {
-        _BitScanReverse(&idx, int32_t(b >> 32));
-        return Square(idx + 32);
-    }
-    else
-    {
-        _BitScanReverse(&idx, int32_t(b));
-        return Square(idx);
-    }
-    #endif
-#else  // Compiler is neither GCC nor MSVC compatible
-    #error "Compiler not supported."
 #endif
+
+#else  // Compiler is neither GCC nor MSVC compatible
+
+#error "Compiler not supported."
+
+#endif
+
+
+/// pop_lsb() finds and clears the least significant bit in a non-zero bitboard
+
+inline Square pop_lsb(Bitboard* b) {
+  assert(*b);
+  const Square s = lsb(*b);
+  *b &= *b - 1;
+  return s;
 }

-// Returns the bitboard of the least significant
-// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)).
-inline Bitboard least_significant_square_bb(Bitboard b) {
-    assert(b);
-    return b & -b;
+
+/// frontmost_sq() returns the most advanced square for the given color,
+/// requires a non-zero bitboard.
+inline Square frontmost_sq(Color c, Bitboard b) {
+  assert(b);
+  return c == WHITE ? msb(b) : lsb(b);
 }

-// Finds and clears the least significant bit in a non-zero bitboard.
-inline Square pop_lsb(Bitboard& b) {
-    assert(b);
-    const Square s = lsb(b);
-    b &= b - 1;
-    return s;
-}
-
-}  // namespace Stockfish
-
-#endif  // #ifndef BITBOARD_H_INCLUDED
+#endif // #ifndef BITBOARD_H_INCLUDED
--- a/src/endgame.cpp
+++ b/src/endgame.cpp
@@ -0,0 +1,743 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+
+#include "bitboard.h"
+#include "endgame.h"
+#include "movegen.h"
+
+namespace {
+
+  // Used to drive the king towards the edge of the board
+  // in KX vs K and KQ vs KR endgames.
+  // Values range from 27 (center squares) to 90 (in the corners)
+  inline int push_to_edge(Square s) {
+      int rd = edge_distance(rank_of(s)), fd = edge_distance(file_of(s));
+      return 90 - (7 * fd * fd / 2 + 7 * rd * rd / 2);
+  }
+
+  // Used to drive the king towards A1H8 corners in KBN vs K endgames.
+  // Values range from 0 on A8H1 diagonal to 7 in A1H8 corners
+  inline int push_to_corner(Square s) {
+      return abs(7 - rank_of(s) - file_of(s));
+  }
+
+  // Drive a piece close to or away from another piece
+  inline int push_close(Square s1, Square s2) { return 140 - 20 * distance(s1, s2); }
+  inline int push_away(Square s1, Square s2) { return 120 - push_close(s1, s2); }
+
+#ifndef NDEBUG
+  bool verify_material(const Position& pos, Color c, Value npm, int pawnsCnt) {
+    return pos.non_pawn_material(c) == npm && pos.count<PAWN>(c) == pawnsCnt;
+  }
+#endif
+
+  // Map the square as if strongSide is white and strongSide's only pawn
+  // is on the left half of the board.
+  Square normalize(const Position& pos, Color strongSide, Square sq) {
+
+    assert(pos.count<PAWN>(strongSide) == 1);
+
+    if (file_of(pos.square<PAWN>(strongSide)) >= FILE_E)
+        sq = flip_file(sq);
+
+    return strongSide == WHITE ? sq : flip_rank(sq);
+  }
+
+} // namespace
+
+
+namespace Endgames {
+
+  std::pair<Map<Value>, Map<ScaleFactor>> maps;
+
+  void init() {
+
+    add<KPK>("KPK");
+    add<KNNK>("KNNK");
+    add<KBNK>("KBNK");
+    add<KRKP>("KRKP");
+    add<KRKB>("KRKB");
+    add<KRKN>("KRKN");
+    add<KQKP>("KQKP");
+    add<KQKR>("KQKR");
+    add<KNNKP>("KNNKP");
+
+    add<KRPKR>("KRPKR");
+    add<KRPKB>("KRPKB");
+    add<KBPKB>("KBPKB");
+    add<KBPKN>("KBPKN");
+    add<KBPPKB>("KBPPKB");
+    add<KRPPKRP>("KRPPKRP");
+  }
+}
+
+
+/// Mate with KX vs K. This function is used to evaluate positions with
+/// king and plenty of material vs a lone king. It simply gives the
+/// attacking side a bonus for driving the defending king towards the edge
+/// of the board, and for keeping the distance between the two kings small.
+template<>
+Value Endgame<KXK>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
+  assert(!pos.checkers()); // Eval is never called when in check
+
+  // Stalemate detection with lone king
+  if (pos.side_to_move() == weakSide && !MoveList<LEGAL>(pos).size())
+      return VALUE_DRAW;
+
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+
+  Value result =  pos.non_pawn_material(strongSide)
+                + pos.count<PAWN>(strongSide) * PawnValueEg
+                + push_to_edge(weakKing)
+                + push_close(strongKing, weakKing);
+
+  if (   pos.count<QUEEN>(strongSide)
+      || pos.count<ROOK>(strongSide)
+      ||(pos.count<BISHOP>(strongSide) && pos.count<KNIGHT>(strongSide))
+      || (   (pos.pieces(strongSide, BISHOP) & ~DarkSquares)
+          && (pos.pieces(strongSide, BISHOP) &  DarkSquares)))
+      result = std::min(result + VALUE_KNOWN_WIN, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// Mate with KBN vs K. This is similar to KX vs K, but we have to drive the
+/// defending king towards a corner square that our bishop attacks.
+template<>
+Value Endgame<KBNK>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, KnightValueMg + BishopValueMg, 0));
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
+
+  Square strongKing   = pos.square<KING>(strongSide);
+  Square strongBishop = pos.square<BISHOP>(strongSide);
+  Square weakKing     = pos.square<KING>(weakSide);
+
+  // If our bishop does not attack A1/H8, we flip the enemy king square
+  // to drive to opposite corners (A8/H1).
+
+  Value result =  (VALUE_KNOWN_WIN + 3520)
+                + push_close(strongKing, weakKing)
+                + 420 * push_to_corner(opposite_colors(strongBishop, SQ_A1) ? flip_file(weakKing) : weakKing);
+
+  assert(abs(result) < VALUE_TB_WIN_IN_MAX_PLY);
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KP vs K. This endgame is evaluated with the help of a bitbase
+template<>
+Value Endgame<KPK>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, VALUE_ZERO, 1));
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
+
+  // Assume strongSide is white and the pawn is on files A-D
+  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
+  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
+  Square weakKing   = normalize(pos, strongSide, pos.square<KING>(weakSide));
+
+  Color us = strongSide == pos.side_to_move() ? WHITE : BLACK;
+
+  if (!Bitbases::probe(strongKing, strongPawn, weakKing, us))
+      return VALUE_DRAW;
+
+  Value result = VALUE_KNOWN_WIN + PawnValueEg + Value(rank_of(strongPawn));
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KR vs KP. This is a somewhat tricky endgame to evaluate precisely without
+/// a bitbase. The function below returns drawish scores when the pawn is
+/// far advanced with support of the king, while the attacking king is far
+/// away.
+template<>
+Value Endgame<KRKP>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 0));
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
+
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+  Square strongRook = pos.square<ROOK>(strongSide);
+  Square weakPawn   = pos.square<PAWN>(weakSide);
+  Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8));
+  Value result;
+
+  // If the stronger side's king is in front of the pawn, it's a win
+  if (forward_file_bb(strongSide, strongKing) & weakPawn)
+      result = RookValueEg - distance(strongKing, weakPawn);
+
+  // If the weaker side's king is too far from the pawn and the rook,
+  // it's a win.
+  else if (   distance(weakKing, weakPawn) >= 3 + (pos.side_to_move() == weakSide)
+           && distance(weakKing, strongRook) >= 3)
+      result = RookValueEg - distance(strongKing, weakPawn);
+
+  // If the pawn is far advanced and supported by the defending king,
+  // the position is drawish
+  else if (   relative_rank(strongSide, weakKing) <= RANK_3
+           && distance(weakKing, weakPawn) == 1
+           && relative_rank(strongSide, strongKing) >= RANK_4
+           && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide))
+      result = Value(80) - 8 * distance(strongKing, weakPawn);
+
+  else
+      result =  Value(200) - 8 * (  distance(strongKing, weakPawn + pawn_push(weakSide))
+                                  - distance(weakKing, weakPawn + pawn_push(weakSide))
+                                  - distance(weakPawn, queeningSquare));
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KR vs KB. This is very simple, and always returns drawish scores. The
+/// score is slightly bigger when the defending king is close to the edge.
+template<>
+Value Endgame<KRKB>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 0));
+  assert(verify_material(pos, weakSide, BishopValueMg, 0));
+
+  Value result = Value(push_to_edge(pos.square<KING>(weakSide)));
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KR vs KN. The attacking side has slightly better winning chances than
+/// in KR vs KB, particularly if the king and the knight are far apart.
+template<>
+Value Endgame<KRKN>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 0));
+  assert(verify_material(pos, weakSide, KnightValueMg, 0));
+
+  Square weakKing   = pos.square<KING>(weakSide);
+  Square weakKnight = pos.square<KNIGHT>(weakSide);
+  Value result = Value(push_to_edge(weakKing) + push_away(weakKing, weakKnight));
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KQ vs KP. In general, this is a win for the stronger side, but there are a
+/// few important exceptions. A pawn on 7th rank and on the A,C,F or H files
+/// with a king positioned next to it can be a draw, so in that case, we only
+/// use the distance between the kings.
+template<>
+Value Endgame<KQKP>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, QueenValueMg, 0));
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
+
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+  Square weakPawn   = pos.square<PAWN>(weakSide);
+
+  Value result = Value(push_close(strongKing, weakKing));
+
+  if (   relative_rank(weakSide, weakPawn) != RANK_7
+      || distance(weakKing, weakPawn) != 1
+      || ((FileBBB | FileDBB | FileEBB | FileGBB) & weakPawn))
+      result += QueenValueEg - PawnValueEg;
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KQ vs KR. This is almost identical to KX vs K: we give the attacking
+/// king a bonus for having the kings close together, and for forcing the
+/// defending king towards the edge. If we also take care to avoid null move for
+/// the defending side in the search, this is usually sufficient to win KQ vs KR.
+template<>
+Value Endgame<KQKR>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, QueenValueMg, 0));
+  assert(verify_material(pos, weakSide, RookValueMg, 0));
+
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+
+  Value result =  QueenValueEg
+                - RookValueEg
+                + push_to_edge(weakKing)
+                + push_close(strongKing, weakKing);
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// KNN vs KP. Very drawish, but there are some mate opportunities if we can
+/// press the weakSide King to a corner before the pawn advances too much.
+template<>
+Value Endgame<KNNKP>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, 2 * KnightValueMg, 0));
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
+
+  Square weakKing = pos.square<KING>(weakSide);
+  Square weakPawn = pos.square<PAWN>(weakSide);
+
+  Value result =      PawnValueEg
+               +  2 * push_to_edge(weakKing)
+               - 10 * relative_rank(weakSide, weakPawn);
+
+  return strongSide == pos.side_to_move() ? result : -result;
+}
+
+
+/// Some cases of trivial draws
+template<> Value Endgame<KNNK>::operator()(const Position&) const { return VALUE_DRAW; }
+
+
+/// KB and one or more pawns vs K. It checks for draws with rook pawns and
+/// a bishop of the wrong color. If such a draw is detected, SCALE_FACTOR_DRAW
+/// is returned. If not, the return value is SCALE_FACTOR_NONE, i.e. no scaling
+/// will be used.
+template<>
+ScaleFactor Endgame<KBPsK>::operator()(const Position& pos) const {
+
+  assert(pos.non_pawn_material(strongSide) == BishopValueMg);
+  assert(pos.count<PAWN>(strongSide) >= 1);
+
+  // No assertions about the material of weakSide, because we want draws to
+  // be detected even when the weaker side has some pawns.
+
+  Bitboard strongPawns = pos.pieces(strongSide, PAWN);
+  Bitboard allPawns = pos.pieces(PAWN);
+
+  Square strongBishop = pos.square<BISHOP>(strongSide);
+  Square weakKing = pos.square<KING>(weakSide);
+  Square strongKing = pos.square<KING>(strongSide);
+
+  // All strongSide pawns are on a single rook file?
+  if (!(strongPawns & ~FileABB) || !(strongPawns & ~FileHBB))
+  {
+      Square queeningSquare = relative_square(strongSide, make_square(file_of(lsb(strongPawns)), RANK_8));
+
+      if (   opposite_colors(queeningSquare, strongBishop)
+          && distance(queeningSquare, weakKing) <= 1)
+          return SCALE_FACTOR_DRAW;
+  }
+
+  // If all the pawns are on the same B or G file, then it's potentially a draw
+  if ((!(allPawns & ~FileBBB) || !(allPawns & ~FileGBB))
+      && pos.non_pawn_material(weakSide) == 0
+      && pos.count<PAWN>(weakSide) >= 1)
+  {
+      // Get the least advanced weakSide pawn
+      Square weakPawn = frontmost_sq(strongSide, pos.pieces(weakSide, PAWN));
+
+      // There's potential for a draw if our pawn is blocked on the 7th rank,
+      // the bishop cannot attack it or they only have one pawn left.
+      if (   relative_rank(strongSide, weakPawn) == RANK_7
+          && (strongPawns & (weakPawn + pawn_push(weakSide)))
+          && (opposite_colors(strongBishop, weakPawn) || !more_than_one(strongPawns)))
+      {
+          int strongKingDist = distance(weakPawn, strongKing);
+          int weakKingDist = distance(weakPawn, weakKing);
+
+          // It's a draw if the weak king is on its back two ranks, within 2
+          // squares of the blocking pawn and the strong king is not
+          // closer. (I think this rule only fails in practically
+          // unreachable positions such as 5k1K/6p1/6P1/8/8/3B4/8/8 w
+          // and positions where qsearch will immediately correct the
+          // problem such as 8/4k1p1/6P1/1K6/3B4/8/8/8 w).
+          if (   relative_rank(strongSide, weakKing) >= RANK_7
+              && weakKingDist <= 2
+              && weakKingDist <= strongKingDist)
+              return SCALE_FACTOR_DRAW;
+      }
+  }
+
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// KQ vs KR and one or more pawns. It tests for fortress draws with a rook on
+/// the third rank defended by a pawn.
+template<>
+ScaleFactor Endgame<KQKRPs>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, QueenValueMg, 0));
+  assert(pos.count<ROOK>(weakSide) == 1);
+  assert(pos.count<PAWN>(weakSide) >= 1);
+
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+  Square weakRook   = pos.square<ROOK>(weakSide);
+
+  if (    relative_rank(weakSide,   weakKing) <= RANK_2
+      &&  relative_rank(weakSide, strongKing) >= RANK_4
+      &&  relative_rank(weakSide,   weakRook) == RANK_3
+      && (  pos.pieces(weakSide, PAWN)
+          & attacks_bb<KING>(weakKing)
+          & pawn_attacks_bb(strongSide, weakRook)))
+          return SCALE_FACTOR_DRAW;
+
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// KRP vs KR. This function knows a handful of the most important classes of
+/// drawn positions, but is far from perfect. It would probably be a good idea
+/// to add more knowledge in the future.
+///
+/// It would also be nice to rewrite the actual code for this function,
+/// which is mostly copied from Glaurung 1.x, and isn't very pretty.
+template<>
+ScaleFactor Endgame<KRPKR>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 1));
+  assert(verify_material(pos, weakSide,   RookValueMg, 0));
+
+  // Assume strongSide is white and the pawn is on files A-D
+  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
+  Square strongRook = normalize(pos, strongSide, pos.square<ROOK>(strongSide));
+  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
+  Square weakKing = normalize(pos, strongSide, pos.square<KING>(weakSide));
+  Square weakRook = normalize(pos, strongSide, pos.square<ROOK>(weakSide));
+
+  File pawnFile = file_of(strongPawn);
+  Rank pawnRank = rank_of(strongPawn);
+  Square queeningSquare = make_square(pawnFile, RANK_8);
+  int tempo = (pos.side_to_move() == strongSide);
+
+  // If the pawn is not too far advanced and the defending king defends the
+  // queening square, use the third-rank defence.
+  if (   pawnRank <= RANK_5
+      && distance(weakKing, queeningSquare) <= 1
+      && strongKing <= SQ_H5
+      && (rank_of(weakRook) == RANK_6 || (pawnRank <= RANK_3 && rank_of(strongRook) != RANK_6)))
+      return SCALE_FACTOR_DRAW;
+
+  // The defending side saves a draw by checking from behind in case the pawn
+  // has advanced to the 6th rank with the king behind.
+  if (   pawnRank == RANK_6
+      && distance(weakKing, queeningSquare) <= 1
+      && rank_of(strongKing) + tempo <= RANK_6
+      && (rank_of(weakRook) == RANK_1 || (!tempo && distance<File>(weakRook, strongPawn) >= 3)))
+      return SCALE_FACTOR_DRAW;
+
+  if (   pawnRank >= RANK_6
+      && weakKing == queeningSquare
+      && rank_of(weakRook) == RANK_1
+      && (!tempo || distance(strongKing, strongPawn) >= 2))
+      return SCALE_FACTOR_DRAW;
+
+  // White pawn on a7 and rook on a8 is a draw if black's king is on g7 or h7
+  // and the black rook is behind the pawn.
+  if (   strongPawn == SQ_A7
+      && strongRook == SQ_A8
+      && (weakKing == SQ_H7 || weakKing == SQ_G7)
+      && file_of(weakRook) == FILE_A
+      && (rank_of(weakRook) <= RANK_3 || file_of(strongKing) >= FILE_D || rank_of(strongKing) <= RANK_5))
+      return SCALE_FACTOR_DRAW;
+
+  // If the defending king blocks the pawn and the attacking king is too far
+  // away, it's a draw.
+  if (   pawnRank <= RANK_5
+      && weakKing == strongPawn + NORTH
+      && distance(strongKing, strongPawn) - tempo >= 2
+      && distance(strongKing, weakRook) - tempo >= 2)
+      return SCALE_FACTOR_DRAW;
+
+  // Pawn on the 7th rank supported by the rook from behind usually wins if the
+  // attacking king is closer to the queening square than the defending king,
+  // and the defending king cannot gain tempi by threatening the attacking rook.
+  if (   pawnRank == RANK_7
+      && pawnFile != FILE_A
+      && file_of(strongRook) == pawnFile
+      && strongRook != queeningSquare
+      && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo)
+      && (distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo))
+      return ScaleFactor(SCALE_FACTOR_MAX - 2 * distance(strongKing, queeningSquare));
+
+  // Similar to the above, but with the pawn further back
+  if (   pawnFile != FILE_A
+      && file_of(strongRook) == pawnFile
+      && strongRook < strongPawn
+      && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo)
+      && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn + NORTH) - 2 + tempo)
+      && (  distance(weakKing, strongRook) + tempo >= 3
+          || (    distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo
+              && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn) + tempo))))
+      return ScaleFactor(  SCALE_FACTOR_MAX
+                         - 8 * distance(strongPawn, queeningSquare)
+                         - 2 * distance(strongKing, queeningSquare));
+
+  // If the pawn is not far advanced and the defending king is somewhere in
+  // the pawn's path, it's probably a draw.
+  if (pawnRank <= RANK_4 && weakKing > strongPawn)
+  {
+      if (file_of(weakKing) == file_of(strongPawn))
+          return ScaleFactor(10);
+      if (   distance<File>(weakKing, strongPawn) == 1
+          && distance(strongKing, weakKing) > 2)
+          return ScaleFactor(24 - 2 * distance(strongKing, weakKing));
+  }
+  return SCALE_FACTOR_NONE;
+}
+
+template<>
+ScaleFactor Endgame<KRPKB>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 1));
+  assert(verify_material(pos, weakSide, BishopValueMg, 0));
+
+  // Test for a rook pawn
+  if (pos.pieces(PAWN) & (FileABB | FileHBB))
+  {
+      Square weakKing = pos.square<KING>(weakSide);
+      Square weakBishop = pos.square<BISHOP>(weakSide);
+      Square strongKing = pos.square<KING>(strongSide);
+      Square strongPawn = pos.square<PAWN>(strongSide);
+      Rank pawnRank = relative_rank(strongSide, strongPawn);
+      Direction push = pawn_push(strongSide);
+
+      // If the pawn is on the 5th rank and the pawn (currently) is on
+      // the same color square as the bishop then there is a chance of
+      // a fortress. Depending on the king position give a moderate
+      // reduction or a stronger one if the defending king is near the
+      // corner but not trapped there.
+      if (pawnRank == RANK_5 && !opposite_colors(weakBishop, strongPawn))
+      {
+          int d = distance(strongPawn + 3 * push, weakKing);
+
+          if (d <= 2 && !(d == 0 && weakKing == strongKing + 2 * push))
+              return ScaleFactor(24);
+          else
+              return ScaleFactor(48);
+      }
+
+      // When the pawn has moved to the 6th rank we can be fairly sure
+      // it's drawn if the bishop attacks the square in front of the
+      // pawn from a reasonable distance and the defending king is near
+      // the corner
+      if (   pawnRank == RANK_6
+          && distance(strongPawn + 2 * push, weakKing) <= 1
+          && (attacks_bb<BISHOP>(weakBishop) & (strongPawn + push))
+          && distance<File>(weakBishop, strongPawn) >= 2)
+          return ScaleFactor(8);
+  }
+
+  return SCALE_FACTOR_NONE;
+}
+
+/// KRPP vs KRP. There is just a single rule: if the stronger side has no passed
+/// pawns and the defending king is actively placed, the position is drawish.
+template<>
+ScaleFactor Endgame<KRPPKRP>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, RookValueMg, 2));
+  assert(verify_material(pos, weakSide,   RookValueMg, 1));
+
+  Square strongPawn1 = pos.squares<PAWN>(strongSide)[0];
+  Square strongPawn2 = pos.squares<PAWN>(strongSide)[1];
+  Square weakKing = pos.square<KING>(weakSide);
+
+  // Does the stronger side have a passed pawn?
+  if (pos.pawn_passed(strongSide, strongPawn1) || pos.pawn_passed(strongSide, strongPawn2))
+      return SCALE_FACTOR_NONE;
+
+  Rank pawnRank = std::max(relative_rank(strongSide, strongPawn1), relative_rank(strongSide, strongPawn2));
+
+  if (   distance<File>(weakKing, strongPawn1) <= 1
+      && distance<File>(weakKing, strongPawn2) <= 1
+      && relative_rank(strongSide, weakKing) > pawnRank)
+  {
+      assert(pawnRank > RANK_1 && pawnRank < RANK_7);
+      return ScaleFactor(7 * pawnRank);
+  }
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// K and two or more pawns vs K. There is just a single rule here: if all pawns
+/// are on the same rook file and are blocked by the defending king, it's a draw.
+template<>
+ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
+
+  assert(pos.non_pawn_material(strongSide) == VALUE_ZERO);
+  assert(pos.count<PAWN>(strongSide) >= 2);
+  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
+
+  Square weakKing = pos.square<KING>(weakSide);
+  Bitboard strongPawns = pos.pieces(strongSide, PAWN);
+
+  // If all pawns are ahead of the king on a single rook file, it's a draw.
+  if (   !(strongPawns & ~(FileABB | FileHBB))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+      return SCALE_FACTOR_DRAW;
+
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// KBP vs KB. There are two rules: if the defending king is somewhere along the
+/// path of the pawn, and the square of the king is not of the same color as the
+/// stronger side's bishop, it's a draw. If the two bishops have opposite color,
+/// it's almost always a draw.
+template<>
+ScaleFactor Endgame<KBPKB>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, BishopValueMg, 1));
+  assert(verify_material(pos, weakSide,   BishopValueMg, 0));
+
+  Square strongPawn = pos.square<PAWN>(strongSide);
+  Square strongBishop = pos.square<BISHOP>(strongSide);
+  Square weakBishop = pos.square<BISHOP>(weakSide);
+  Square weakKing = pos.square<KING>(weakSide);
+
+  // Case 1: Defending king blocks the pawn, and cannot be driven away
+  if (   (forward_file_bb(strongSide, strongPawn) & weakKing)
+      && (   opposite_colors(weakKing, strongBishop)
+          || relative_rank(strongSide, weakKing) <= RANK_6))
+      return SCALE_FACTOR_DRAW;
+
+  // Case 2: Opposite colored bishops
+  if (opposite_colors(strongBishop, weakBishop))
+      return SCALE_FACTOR_DRAW;
+
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// KBPP vs KB. It detects a few basic draws with opposite-colored bishops
+template<>
+ScaleFactor Endgame<KBPPKB>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, BishopValueMg, 2));
+  assert(verify_material(pos, weakSide,   BishopValueMg, 0));
+
+  Square strongBishop = pos.square<BISHOP>(strongSide);
+  Square weakBishop   = pos.square<BISHOP>(weakSide);
+
+  if (!opposite_colors(strongBishop, weakBishop))
+      return SCALE_FACTOR_NONE;
+
+  Square weakKing = pos.square<KING>(weakSide);
+  Square strongPawn1 = pos.squares<PAWN>(strongSide)[0];
+  Square strongPawn2 = pos.squares<PAWN>(strongSide)[1];
+  Square blockSq1, blockSq2;
+
+  if (relative_rank(strongSide, strongPawn1) > relative_rank(strongSide, strongPawn2))
+  {
+      blockSq1 = strongPawn1 + pawn_push(strongSide);
+      blockSq2 = make_square(file_of(strongPawn2), rank_of(strongPawn1));
+  }
+  else
+  {
+      blockSq1 = strongPawn2 + pawn_push(strongSide);
+      blockSq2 = make_square(file_of(strongPawn1), rank_of(strongPawn2));
+  }
+
+  switch (distance<File>(strongPawn1, strongPawn2))
+  {
+  case 0:
+    // Both pawns are on the same file. It's an easy draw if the defender firmly
+    // controls some square in the frontmost pawn's path.
+    if (   file_of(weakKing) == file_of(blockSq1)
+        && relative_rank(strongSide, weakKing) >= relative_rank(strongSide, blockSq1)
+        && opposite_colors(weakKing, strongBishop))
+        return SCALE_FACTOR_DRAW;
+    else
+        return SCALE_FACTOR_NONE;
+
+  case 1:
+    // Pawns on adjacent files. It's a draw if the defender firmly controls the
+    // square in front of the frontmost pawn's path, and the square diagonally
+    // behind this square on the file of the other pawn.
+    if (   weakKing == blockSq1
+        && opposite_colors(weakKing, strongBishop)
+        && (   weakBishop == blockSq2
+            || (attacks_bb<BISHOP>(blockSq2, pos.pieces()) & pos.pieces(weakSide, BISHOP))
+            || distance<Rank>(strongPawn1, strongPawn2) >= 2))
+        return SCALE_FACTOR_DRAW;
+
+    else if (   weakKing == blockSq2
+             && opposite_colors(weakKing, strongBishop)
+             && (   weakBishop == blockSq1
+                 || (attacks_bb<BISHOP>(blockSq1, pos.pieces()) & pos.pieces(weakSide, BISHOP))))
+        return SCALE_FACTOR_DRAW;
+    else
+        return SCALE_FACTOR_NONE;
+
+  default:
+    // The pawns are not on the same file or adjacent files. No scaling.
+    return SCALE_FACTOR_NONE;
+  }
+}
+
+
+/// KBP vs KN. There is a single rule: if the defending king is somewhere along
+/// the path of the pawn, and the square of the king is not of the same color as
+/// the stronger side's bishop, it's a draw.
+template<>
+ScaleFactor Endgame<KBPKN>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, BishopValueMg, 1));
+  assert(verify_material(pos, weakSide, KnightValueMg, 0));
+
+  Square strongPawn = pos.square<PAWN>(strongSide);
+  Square strongBishop = pos.square<BISHOP>(strongSide);
+  Square weakKing = pos.square<KING>(weakSide);
+
+  if (   file_of(weakKing) == file_of(strongPawn)
+      && relative_rank(strongSide, strongPawn) < relative_rank(strongSide, weakKing)
+      && (   opposite_colors(weakKing, strongBishop)
+          || relative_rank(strongSide, weakKing) <= RANK_6))
+      return SCALE_FACTOR_DRAW;
+
+  return SCALE_FACTOR_NONE;
+}
+
+
+/// KP vs KP. This is done by removing the weakest side's pawn and probing the
+/// KP vs K bitbase: if the weakest side has a draw without the pawn, it probably
+/// has at least a draw with the pawn as well. The exception is when the stronger
+/// side's pawn is far advanced and not on a rook file; in this case it is often
+/// possible to win (e.g. 8/4k3/3p4/3P4/6K1/8/8/8 w - - 0 1).
+template<>
+ScaleFactor Endgame<KPKP>::operator()(const Position& pos) const {
+
+  assert(verify_material(pos, strongSide, VALUE_ZERO, 1));
+  assert(verify_material(pos, weakSide,   VALUE_ZERO, 1));
+
+  // Assume strongSide is white and the pawn is on files A-D
+  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
+  Square weakKing   = normalize(pos, strongSide, pos.square<KING>(weakSide));
+  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
+
+  Color us = strongSide == pos.side_to_move() ? WHITE : BLACK;
+
+  // If the pawn has advanced to the fifth rank or further, and is not a
+  // rook pawn, it's too dangerous to assume that it's at least a draw.
+  if (rank_of(strongPawn) >= RANK_5 && file_of(strongPawn) != FILE_A)
+      return SCALE_FACTOR_NONE;
+
+  // Probe the KPK bitbase with the weakest side's pawn removed. If it's a draw,
+  // it's probably at least a draw even with the pawn.
+  return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW;
+}
--- a/src/endgame.h
+++ b/src/endgame.h
@@ -0,0 +1,123 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef ENDGAME_H_INCLUDED
+#define ENDGAME_H_INCLUDED
+
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+#include "position.h"
+#include "types.h"
+
+
+/// EndgameCode lists all supported endgame functions by corresponding codes
+
+enum EndgameCode {
+
+  EVALUATION_FUNCTIONS,
+  KNNK,  // KNN vs K
+  KNNKP, // KNN vs KP
+  KXK,   // Generic "mate lone king" eval
+  KBNK,  // KBN vs K
+  KPK,   // KP vs K
+  KRKP,  // KR vs KP
+  KRKB,  // KR vs KB
+  KRKN,  // KR vs KN
+  KQKP,  // KQ vs KP
+  KQKR,  // KQ vs KR
+
+  SCALING_FUNCTIONS,
+  KBPsK,   // KB and pawns vs K
+  KQKRPs,  // KQ vs KR and pawns
+  KRPKR,   // KRP vs KR
+  KRPKB,   // KRP vs KB
+  KRPPKRP, // KRPP vs KRP
+  KPsK,    // K and pawns vs K
+  KBPKB,   // KBP vs KB
+  KBPPKB,  // KBPP vs KB
+  KBPKN,   // KBP vs KN
+  KPKP     // KP vs KP
+};
+
+
+/// Endgame functions can be of two types depending on whether they return a
+/// Value or a ScaleFactor.
+
+template<EndgameCode E> using
+eg_type = typename std::conditional<(E < SCALING_FUNCTIONS), Value, ScaleFactor>::type;
+
+
+/// Base and derived functors for endgame evaluation and scaling functions
+
+template<typename T>
+struct EndgameBase {
+
+  explicit EndgameBase(Color c) : strongSide(c), weakSide(~c) {}
+  virtual ~EndgameBase() = default;
+  virtual T operator()(const Position&) const = 0;
+
+  const Color strongSide, weakSide;
+};
+
+
+template<EndgameCode E, typename T = eg_type<E>>
+struct Endgame : public EndgameBase<T> {
+
+  explicit Endgame(Color c) : EndgameBase<T>(c) {}
+  T operator()(const Position&) const override;
+};
+
+
+/// The Endgames namespace handles the pointers to endgame evaluation and scaling
+/// base objects in two std::map. We use polymorphism to invoke the actual
+/// endgame function by calling its virtual operator().
+
+namespace Endgames {
+
+  template<typename T> using Ptr = std::unique_ptr<EndgameBase<T>>;
+  template<typename T> using Map = std::unordered_map<Key, Ptr<T>>;
+
+  extern std::pair<Map<Value>, Map<ScaleFactor>> maps;
+
+  void init();
+
+  template<typename T>
+  Map<T>& map() {
+    return std::get<std::is_same<T, ScaleFactor>::value>(maps);
+  }
+
+  template<EndgameCode E, typename T = eg_type<E>>
+  void add(const std::string& code) {
+
+    StateInfo st;
+    map<T>()[Position().set(code, WHITE, &st).material_key()] = Ptr<T>(new Endgame<E>(WHITE));
+    map<T>()[Position().set(code, BLACK, &st).material_key()] = Ptr<T>(new Endgame<E>(BLACK));
+  }
+
+  template<typename T>
+  const EndgameBase<T>* probe(Key key) {
+    auto it = map<T>().find(key);
+    return it != map<T>().end() ? it->second.get() : nullptr;
+  }
+}
+
+#endif // #ifndef ENDGAME_H_INCLUDED
--- a/src/engine.cpp
+++ b/src/engine.cpp
@@ -1,372 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "engine.h"
-
-#include <algorithm>
-#include <cassert>
-#include <deque>
-#include <iosfwd>
-#include <memory>
-#include <ostream>
-#include <sstream>
-#include <string_view>
-#include <utility>
-#include <vector>
-
-#include "evaluate.h"
-#include "misc.h"
-#include "nnue/network.h"
-#include "nnue/nnue_common.h"
-#include "numa.h"
-#include "perft.h"
-#include "position.h"
-#include "search.h"
-#include "syzygy/tbprobe.h"
-#include "types.h"
-#include "uci.h"
-#include "ucioption.h"
-
-namespace Stockfish {
-
-namespace NN = Eval::NNUE;
-
-constexpr auto StartFEN   = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
-constexpr int  MaxHashMB  = Is64Bit ? 33554432 : 2048;
-int            MaxThreads = std::max(1024, 4 * int(get_hardware_concurrency()));
-
-Engine::Engine(std::optional<std::string> path) :
-    binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""),
-    numaContext(NumaConfig::from_system()),
-    states(new std::deque<StateInfo>(1)),
-    threads(),
-    networks(
-      numaContext,
-      NN::Networks(
-        NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
-        NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
-    pos.set(StartFEN, false, &states->back());
-
-
-    options.add(  //
-      "Debug Log File", Option("", [](const Option& o) {
-          start_logger(o);
-          return std::nullopt;
-      }));
-
-    options.add(  //
-      "NumaPolicy", Option("auto", [this](const Option& o) {
-          set_numa_config_from_option(o);
-          return numa_config_information_as_string() + "\n"
-               + thread_allocation_information_as_string();
-      }));
-
-    options.add(  //
-      "Threads", Option(1, 1, MaxThreads, [this](const Option&) {
-          resize_threads();
-          return thread_allocation_information_as_string();
-      }));
-
-    options.add(  //
-      "Hash", Option(16, 1, MaxHashMB, [this](const Option& o) {
-          set_tt_size(o);
-          return std::nullopt;
-      }));
-
-    options.add(  //
-      "Clear Hash", Option([this](const Option&) {
-          search_clear();
-          return std::nullopt;
-      }));
-
-    options.add(  //
-      "Ponder", Option(false));
-
-    options.add(  //
-      "MultiPV", Option(1, 1, MAX_MOVES));
-
-    options.add("Skill Level", Option(20, 0, 20));
-
-    options.add("Move Overhead", Option(10, 0, 5000));
-
-    options.add("nodestime", Option(0, 0, 10000));
-
-    options.add("UCI_Chess960", Option(false));
-
-    options.add("UCI_LimitStrength", Option(false));
-
-    options.add("UCI_Elo",
-                Option(Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::LowestElo,
-                       Stockfish::Search::Skill::HighestElo));
-
-    options.add("UCI_ShowWDL", Option(false));
-
-    options.add(  //
-      "SyzygyPath", Option("", [](const Option& o) {
-          Tablebases::init(o);
-          return std::nullopt;
-      }));
-
-    options.add("SyzygyProbeDepth", Option(1, 1, 100));
-
-    options.add("Syzygy50MoveRule", Option(true));
-
-    options.add("SyzygyProbeLimit", Option(7, 0, 7));
-
-    options.add(  //
-      "EvalFile", Option(EvalFileDefaultNameBig, [this](const Option& o) {
-          load_big_network(o);
-          return std::nullopt;
-      }));
-
-    options.add(  //
-      "EvalFileSmall", Option(EvalFileDefaultNameSmall, [this](const Option& o) {
-          load_small_network(o);
-          return std::nullopt;
-      }));
-
-    load_networks();
-    resize_threads();
-}
-
-std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) {
-    verify_networks();
-
-    return Benchmark::perft(fen, depth, isChess960);
-}
-
-void Engine::go(Search::LimitsType& limits) {
-    assert(limits.perft == 0);
-    verify_networks();
-
-    threads.start_thinking(options, pos, states, limits);
-}
-void Engine::stop() { threads.stop = true; }
-
-void Engine::search_clear() {
-    wait_for_search_finished();
-
-    tt.clear(threads);
-    threads.clear();
-
-    // @TODO wont work with multiple instances
-    Tablebases::init(options["SyzygyPath"]);  // Free mapped files
-}
-
-void Engine::set_on_update_no_moves(std::function<void(const Engine::InfoShort&)>&& f) {
-    updateContext.onUpdateNoMoves = std::move(f);
-}
-
-void Engine::set_on_update_full(std::function<void(const Engine::InfoFull&)>&& f) {
-    updateContext.onUpdateFull = std::move(f);
-}
-
-void Engine::set_on_iter(std::function<void(const Engine::InfoIter&)>&& f) {
-    updateContext.onIter = std::move(f);
-}
-
-void Engine::set_on_bestmove(std::function<void(std::string_view, std::string_view)>&& f) {
-    updateContext.onBestmove = std::move(f);
-}
-
-void Engine::set_on_verify_networks(std::function<void(std::string_view)>&& f) {
-    onVerifyNetworks = std::move(f);
-}
-
-void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); }
-
-void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) {
-    // Drop the old state and create a new one
-    states = StateListPtr(new std::deque<StateInfo>(1));
-    pos.set(fen, options["UCI_Chess960"], &states->back());
-
-    for (const auto& move : moves)
-    {
-        auto m = UCIEngine::to_move(pos, move);
-
-        if (m == Move::none())
-            break;
-
-        states->emplace_back();
-        pos.do_move(m, states->back());
-    }
-}
-
-// modifiers
-
-void Engine::set_numa_config_from_option(const std::string& o) {
-    if (o == "auto" || o == "system")
-    {
-        numaContext.set_numa_config(NumaConfig::from_system());
-    }
-    else if (o == "hardware")
-    {
-        // Don't respect affinity set in the system.
-        numaContext.set_numa_config(NumaConfig::from_system(false));
-    }
-    else if (o == "none")
-    {
-        numaContext.set_numa_config(NumaConfig{});
-    }
-    else
-    {
-        numaContext.set_numa_config(NumaConfig::from_string(o));
-    }
-
-    // Force reallocation of threads in case affinities need to change.
-    resize_threads();
-    threads.ensure_network_replicated();
-}
-
-void Engine::resize_threads() {
-    threads.wait_for_search_finished();
-    threads.set(numaContext.get_numa_config(), {options, threads, tt, networks}, updateContext);
-
-    // Reallocate the hash with the new threadpool size
-    set_tt_size(options["Hash"]);
-    threads.ensure_network_replicated();
-}
-
-void Engine::set_tt_size(size_t mb) {
-    wait_for_search_finished();
-    tt.resize(mb, threads);
-}
-
-void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
-
-// network related
-
-void Engine::verify_networks() const {
-    networks->big.verify(options["EvalFile"], onVerifyNetworks);
-    networks->small.verify(options["EvalFileSmall"], onVerifyNetworks);
-}
-
-void Engine::load_networks() {
-    networks.modify_and_replicate([this](NN::Networks& networks_) {
-        networks_.big.load(binaryDirectory, options["EvalFile"]);
-        networks_.small.load(binaryDirectory, options["EvalFileSmall"]);
-    });
-    threads.clear();
-    threads.ensure_network_replicated();
-}
-
-void Engine::load_big_network(const std::string& file) {
-    networks.modify_and_replicate(
-      [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); });
-    threads.clear();
-    threads.ensure_network_replicated();
-}
-
-void Engine::load_small_network(const std::string& file) {
-    networks.modify_and_replicate(
-      [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); });
-    threads.clear();
-    threads.ensure_network_replicated();
-}
-
-void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) {
-    networks.modify_and_replicate([&files](NN::Networks& networks_) {
-        networks_.big.save(files[0].first);
-        networks_.small.save(files[1].first);
-    });
-}
-
-// utility functions
-
-void Engine::trace_eval() const {
-    StateListPtr trace_states(new std::deque<StateInfo>(1));
-    Position     p;
-    p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back());
-
-    verify_networks();
-
-    sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl;
-}
-
-const OptionsMap& Engine::get_options() const { return options; }
-OptionsMap&       Engine::get_options() { return options; }
-
-std::string Engine::fen() const { return pos.fen(); }
-
-void Engine::flip() { pos.flip(); }
-
-std::string Engine::visualize() const {
-    std::stringstream ss;
-    ss << pos;
-    return ss.str();
-}
-
-int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); }
-
-std::vector<std::pair<size_t, size_t>> Engine::get_bound_thread_count_by_numa_node() const {
-    auto                                   counts = threads.get_bound_thread_count_by_numa_node();
-    const NumaConfig&                      cfg    = numaContext.get_numa_config();
-    std::vector<std::pair<size_t, size_t>> ratios;
-    NumaIndex                              n = 0;
-    for (; n < counts.size(); ++n)
-        ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n));
-    if (!counts.empty())
-        for (; n < cfg.num_numa_nodes(); ++n)
-            ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n));
-    return ratios;
-}
-
-std::string Engine::get_numa_config_as_string() const {
-    return numaContext.get_numa_config().to_string();
-}
-
-std::string Engine::numa_config_information_as_string() const {
-    auto cfgStr = get_numa_config_as_string();
-    return "Available processors: " + cfgStr;
-}
-
-std::string Engine::thread_binding_information_as_string() const {
-    auto              boundThreadsByNode = get_bound_thread_count_by_numa_node();
-    std::stringstream ss;
-    if (boundThreadsByNode.empty())
-        return ss.str();
-
-    bool isFirst = true;
-
-    for (auto&& [current, total] : boundThreadsByNode)
-    {
-        if (!isFirst)
-            ss << ":";
-        ss << current << "/" << total;
-        isFirst = false;
-    }
-
-    return ss.str();
-}
-
-std::string Engine::thread_allocation_information_as_string() const {
-    std::stringstream ss;
-
-    size_t threadsSize = threads.size();
-    ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread");
-
-    auto boundThreadsByNodeStr = thread_binding_information_as_string();
-    if (boundThreadsByNodeStr.empty())
-        return ss.str();
-
-    ss << " with NUMA node thread binding: ";
-    ss << boundThreadsByNodeStr;
-
-    return ss.str();
-}
-}
--- a/src/engine.h
+++ b/src/engine.h
@@ -1,130 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef ENGINE_H_INCLUDED
-#define ENGINE_H_INCLUDED
-
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <optional>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-
-#include "nnue/network.h"
-#include "numa.h"
-#include "position.h"
-#include "search.h"
-#include "syzygy/tbprobe.h"  // for Stockfish::Depth
-#include "thread.h"
-#include "tt.h"
-#include "ucioption.h"
-
-namespace Stockfish {
-
-class Engine {
-   public:
-    using InfoShort = Search::InfoShort;
-    using InfoFull  = Search::InfoFull;
-    using InfoIter  = Search::InfoIteration;
-
-    Engine(std::optional<std::string> path = std::nullopt);
-
-    // Cannot be movable due to components holding backreferences to fields
-    Engine(const Engine&)            = delete;
-    Engine(Engine&&)                 = delete;
-    Engine& operator=(const Engine&) = delete;
-    Engine& operator=(Engine&&)      = delete;
-
-    ~Engine() { wait_for_search_finished(); }
-
-    std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960);
-
-    // non blocking call to start searching
-    void go(Search::LimitsType&);
-    // non blocking call to stop searching
-    void stop();
-
-    // blocking call to wait for search to finish
-    void wait_for_search_finished();
-    // set a new position, moves are in UCI format
-    void set_position(const std::string& fen, const std::vector<std::string>& moves);
-
-    // modifiers
-
-    void set_numa_config_from_option(const std::string& o);
-    void resize_threads();
-    void set_tt_size(size_t mb);
-    void set_ponderhit(bool);
-    void search_clear();
-
-    void set_on_update_no_moves(std::function<void(const InfoShort&)>&&);
-    void set_on_update_full(std::function<void(const InfoFull&)>&&);
-    void set_on_iter(std::function<void(const InfoIter&)>&&);
-    void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&);
-    void set_on_verify_networks(std::function<void(std::string_view)>&&);
-
-    // network related
-
-    void verify_networks() const;
-    void load_networks();
-    void load_big_network(const std::string& file);
-    void load_small_network(const std::string& file);
-    void save_network(const std::pair<std::optional<std::string>, std::string> files[2]);
-
-    // utility functions
-
-    void trace_eval() const;
-
-    const OptionsMap& get_options() const;
-    OptionsMap&       get_options();
-
-    int get_hashfull(int maxAge = 0) const;
-
-    std::string                            fen() const;
-    void                                   flip();
-    std::string                            visualize() const;
-    std::vector<std::pair<size_t, size_t>> get_bound_thread_count_by_numa_node() const;
-    std::string                            get_numa_config_as_string() const;
-    std::string                            numa_config_information_as_string() const;
-    std::string                            thread_allocation_information_as_string() const;
-    std::string                            thread_binding_information_as_string() const;
-
-   private:
-    const std::string binaryDirectory;
-
-    NumaReplicationContext numaContext;
-
-    Position     pos;
-    StateListPtr states;
-
-    OptionsMap                               options;
-    ThreadPool                               threads;
-    TranspositionTable                       tt;
-    LazyNumaReplicated<Eval::NNUE::Networks> networks;
-
-    Search::SearchManager::UpdateContext  updateContext;
-    std::function<void(std::string_view)> onVerifyNetworks;
-};
-
-}  // namespace Stockfish
-
-
-#endif  // #ifndef ENGINE_H_INCLUDED
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -23,36 +23,27 @@

 #include "types.h"

-namespace Stockfish {
-
 class Position;

 namespace Eval {

-// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
-// for the build process (profile-build and fishtest) to work. Do not change the
-// name of the macro or the location where this macro is defined, as it is used
-// in the Makefile/Fishtest.
-#define EvalFileDefaultNameBig "nn-1c0000000000.nnue"
-#define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue"
+  std::string trace(const Position& pos);
+  Value evaluate(const Position& pos);

-namespace NNUE {
-struct Networks;
-struct AccumulatorCaches;
-class AccumulatorStack;
-}
+  extern bool useNNUE;
+  extern std::string eval_file_loaded;
+  void init_NNUE();
+  void verify_NNUE();

-std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
+  namespace NNUE {

-int   simple_eval(const Position& pos);
-bool  use_smallnet(const Position& pos);
-Value evaluate(const NNUE::Networks&          networks,
-               const Position&                pos,
-               Eval::NNUE::AccumulatorStack&  accumulators,
-               Eval::NNUE::AccumulatorCaches& caches,
-               int                            optimism);
-}  // namespace Eval
+    Value evaluate(const Position& pos);
+    Value compute_eval(const Position& pos);
+    void  update_eval(const Position& pos);
+    bool  load_eval_file(const std::string& evalFile);

-}  // namespace Stockfish
+  } // namespace NNUE

-#endif  // #ifndef EVALUATE_H_INCLUDED
+} // namespace Eval
+
+#endif // #ifndef EVALUATE_H_INCLUDED
--- a/src/history.h
+++ b/src/history.h
@@ -1,173 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef HISTORY_H_INCLUDED
-#define HISTORY_H_INCLUDED
-
-#include <algorithm>
-#include <array>
-#include <cassert>
-#include <cmath>
-#include <cstdint>
-#include <cstdlib>
-#include <limits>
-#include <type_traits>  // IWYU pragma: keep
-
-#include "misc.h"
-#include "position.h"
-
-namespace Stockfish {
-
-constexpr int PAWN_HISTORY_SIZE        = 512;    // has to be a power of 2
-constexpr int CORRECTION_HISTORY_SIZE  = 32768;  // has to be a power of 2
-constexpr int CORRECTION_HISTORY_LIMIT = 1024;
-constexpr int LOW_PLY_HISTORY_SIZE     = 5;
-
-static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0,
-              "PAWN_HISTORY_SIZE has to be a power of 2");
-
-static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0,
-              "CORRECTION_HISTORY_SIZE has to be a power of 2");
-
-enum PawnHistoryType {
-    Normal,
-    Correction
-};
-
-template<PawnHistoryType T = Normal>
-inline int pawn_structure_index(const Position& pos) {
-    return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1);
-}
-
-inline int minor_piece_index(const Position& pos) {
-    return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1);
-}
-
-template<Color c>
-inline int non_pawn_index(const Position& pos) {
-    return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1);
-}
-
-// StatsEntry is the container of various numerical statistics. We use a class
-// instead of a naked value to directly call history update operator<<() on
-// the entry. The first template parameter T is the base type of the array,
-// and the second template parameter D limits the range of updates in [-D, D]
-// when we update values with the << operator
-template<typename T, int D>
-class StatsEntry {
-
-    static_assert(std::is_arithmetic_v<T>, "Not an arithmetic type");
-    static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");
-
-    T entry;
-
-   public:
-    StatsEntry& operator=(const T& v) {
-        entry = v;
-        return *this;
-    }
-    operator const T&() const { return entry; }
-
-    void operator<<(int bonus) {
-        // Make sure that bonus is in range [-D, D]
-        int clampedBonus = std::clamp(bonus, -D, D);
-        entry += clampedBonus - entry * std::abs(clampedBonus) / D;
-
-        assert(std::abs(entry) <= D);
-    }
-};
-
-enum StatsType {
-    NoCaptures,
-    Captures
-};
-
-template<typename T, int D, std::size_t... Sizes>
-using Stats = MultiArray<StatsEntry<T, D>, Sizes...>;
-
-// ButterflyHistory records how often quiet moves have been successful or unsuccessful
-// during the current search, and is used for reduction and move ordering decisions.
-// It uses 2 tables (one for each color) indexed by the move's from and to squares,
-// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo)
-using ButterflyHistory = Stats<std::int16_t, 7183, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)>;
-
-// LowPlyHistory is adressed by play and move's from and to squares, used
-// to improve move ordering near the root
-using LowPlyHistory =
-  Stats<std::int16_t, 7183, LOW_PLY_HISTORY_SIZE, int(SQUARE_NB) * int(SQUARE_NB)>;
-
-// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
-using CapturePieceToHistory = Stats<std::int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
-
-// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
-using PieceToHistory = Stats<std::int16_t, 30000, PIECE_NB, SQUARE_NB>;
-
-// ContinuationHistory is the combined history of a given pair of moves, usually
-// the current one given a previous one. The nested history table is based on
-// PieceToHistory instead of ButterflyBoards.
-// (~63 elo)
-using ContinuationHistory = MultiArray<PieceToHistory, PIECE_NB, SQUARE_NB>;
-
-// PawnHistory is addressed by the pawn structure and a move's [piece][to]
-using PawnHistory = Stats<std::int16_t, 8192, PAWN_HISTORY_SIZE, PIECE_NB, SQUARE_NB>;
-
-// Correction histories record differences between the static evaluation of
-// positions and their search score. It is used to improve the static evaluation
-// used by some search heuristics.
-// see https://www.chessprogramming.org/Static_Evaluation_Correction_History
-enum CorrHistType {
-    Pawn,          // By color and pawn structure
-    Minor,         // By color and positions of minor pieces (Knight, Bishop)
-    NonPawn,       // By non-pawn material positions and color
-    PieceTo,       // By [piece][to] move
-    Continuation,  // Combined history of move pairs
-};
-
-namespace Detail {
-
-template<CorrHistType>
-struct CorrHistTypedef {
-    using type = Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, CORRECTION_HISTORY_SIZE, COLOR_NB>;
-};
-
-template<>
-struct CorrHistTypedef<PieceTo> {
-    using type = Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, PIECE_NB, SQUARE_NB>;
-};
-
-template<>
-struct CorrHistTypedef<Continuation> {
-    using type = MultiArray<CorrHistTypedef<PieceTo>::type, PIECE_NB, SQUARE_NB>;
-};
-
-template<>
-struct CorrHistTypedef<NonPawn> {
-    using type =
-      Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, CORRECTION_HISTORY_SIZE, COLOR_NB, COLOR_NB>;
-};
-
-}
-
-template<CorrHistType T>
-using CorrectionHistory = typename Detail::CorrHistTypedef<T>::type;
-
-using TTMoveHistory = StatsEntry<std::int16_t, 8192>;
-
-}  // namespace Stockfish
-
-#endif  // #ifndef HISTORY_H_INCLUDED
--- a/src/incbin/UNLICENCE
+++ b/src/incbin/UNLICENCE
@@ -1,26 +0,0 @@
-The file "incbin.h" is free and unencumbered software released into
-the public domain by Dale Weiler, see:
-   <https://github.com/graphitemaster/incbin>
-
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
-
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org/>
--- a/src/incbin/incbin.h
+++ b/src/incbin/incbin.h
@@ -1,476 +0,0 @@
-/**
- * @file incbin.h
- * @author Dale Weiler
- * @brief Utility for including binary files
- *
- * Facilities for including binary files into the current translation unit and
- * making use from them externally in other translation units.
- */
-#ifndef INCBIN_HDR
-#define INCBIN_HDR
-#include <limits.h>
-#if   defined(__AVX512BW__) || \
-      defined(__AVX512CD__) || \
-      defined(__AVX512DQ__) || \
-      defined(__AVX512ER__) || \
-      defined(__AVX512PF__) || \
-      defined(__AVX512VL__) || \
-      defined(__AVX512F__)
-# define INCBIN_ALIGNMENT_INDEX 6
-#elif defined(__AVX__)      || \
-      defined(__AVX2__)
-# define INCBIN_ALIGNMENT_INDEX 5
-#elif defined(__SSE__)      || \
-      defined(__SSE2__)     || \
-      defined(__SSE3__)     || \
-      defined(__SSSE3__)    || \
-      defined(__SSE4_1__)   || \
-      defined(__SSE4_2__)   || \
-      defined(__neon__)     || \
-      defined(__ARM_NEON)   || \
-      defined(__ALTIVEC__)
-# define INCBIN_ALIGNMENT_INDEX 4
-#elif ULONG_MAX != 0xffffffffu
-# define INCBIN_ALIGNMENT_INDEX 3
-# else
-# define INCBIN_ALIGNMENT_INDEX 2
-#endif
-
-/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
-#define INCBIN_ALIGN_SHIFT_0 1
-#define INCBIN_ALIGN_SHIFT_1 2
-#define INCBIN_ALIGN_SHIFT_2 4
-#define INCBIN_ALIGN_SHIFT_3 8
-#define INCBIN_ALIGN_SHIFT_4 16
-#define INCBIN_ALIGN_SHIFT_5 32
-#define INCBIN_ALIGN_SHIFT_6 64
-
-/* Actual alignment value */
-#define INCBIN_ALIGNMENT \
-    INCBIN_CONCATENATE( \
-        INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
-        INCBIN_ALIGNMENT_INDEX)
-
-/* Stringize */
-#define INCBIN_STR(X) \
-    #X
-#define INCBIN_STRINGIZE(X) \
-    INCBIN_STR(X)
-/* Concatenate */
-#define INCBIN_CAT(X, Y) \
-    X ## Y
-#define INCBIN_CONCATENATE(X, Y) \
-    INCBIN_CAT(X, Y)
-/* Deferred macro expansion */
-#define INCBIN_EVAL(X) \
-    X
-#define INCBIN_INVOKE(N, ...) \
-    INCBIN_EVAL(N(__VA_ARGS__))
-/* Variable argument count for overloading by arity */
-#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
-#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
-
-/* Green Hills uses a different directive for including binary data */
-#if defined(__ghs__)
-#  if (__ghs_asm == 2)
-#    define INCBIN_MACRO ".file"
-/* Or consider the ".myrawdata" entry in the ld file */
-#  else
-#    define INCBIN_MACRO "\tINCBIN"
-#  endif
-#else
-#  define INCBIN_MACRO ".incbin"
-#endif
-
-#ifndef _MSC_VER
-#  define INCBIN_ALIGN \
-    __attribute__((aligned(INCBIN_ALIGNMENT)))
-#else
-#  define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
-#endif
-
-#if defined(__arm__) || /* GNU C and RealView */ \
-    defined(__arm) || /* Diab */ \
-    defined(_ARM) /* ImageCraft */
-#  define INCBIN_ARM
-#endif
-
-#ifdef __GNUC__
-/* Utilize .balign where supported */
-#  define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
-#  define INCBIN_ALIGN_BYTE ".balign 1\n"
-#elif defined(INCBIN_ARM)
-/*
- * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
- * the shift count. This is the value passed to `.align'
- */
-#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
-#  define INCBIN_ALIGN_BYTE ".align 0\n"
-#else
-/* We assume other inline assembler's treat `.align' as `.balign' */
-#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
-#  define INCBIN_ALIGN_BYTE ".align 1\n"
-#endif
-
-/* INCBIN_CONST is used by incbin.c generated files */
-#if defined(__cplusplus)
-#  define INCBIN_EXTERNAL extern "C"
-#  define INCBIN_CONST    extern const
-#else
-#  define INCBIN_EXTERNAL extern
-#  define INCBIN_CONST    const
-#endif
-
-/**
- * @brief Optionally override the linker section into which size and data is
- * emitted.
- * 
- * @warning If you use this facility, you might have to deal with
- * platform-specific linker output section naming on your own.
- */
-#if !defined(INCBIN_OUTPUT_SECTION)
-#  if defined(__APPLE__)
-#    define INCBIN_OUTPUT_SECTION ".const_data"
-#  else
-#    define INCBIN_OUTPUT_SECTION ".rodata"
-#  endif
-#endif
-
-/**
- * @brief Optionally override the linker section into which data is emitted.
- *
- * @warning If you use this facility, you might have to deal with
- * platform-specific linker output section naming on your own.
- */
-#if !defined(INCBIN_OUTPUT_DATA_SECTION)
-#  define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
-#endif
-
-/**
- * @brief Optionally override the linker section into which size is emitted.
- *
- * @warning If you use this facility, you might have to deal with
- * platform-specific linker output section naming on your own.
- * 
- * @note This is useful for Harvard architectures where program memory cannot
- * be directly read from the program without special instructions. With this you
- * can chose to put the size variable in RAM rather than ROM.
- */
-#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
-#  define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
-#endif
-
-#if defined(__APPLE__)
-#  include "TargetConditionals.h"
-#  if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
-#    warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
-#  endif
-/* The directives are different for Apple branded compilers */
-#  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n"
-#  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
-#  define INCBIN_INT             ".long "
-#  define INCBIN_MANGLE          "_"
-#  define INCBIN_BYTE            ".byte "
-#  define INCBIN_TYPE(...)
-#else
-#  define INCBIN_SECTION         ".section " INCBIN_OUTPUT_SECTION "\n"
-#  define INCBIN_GLOBAL(NAME)    ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
-#  if defined(__ghs__)
-#    define INCBIN_INT           ".word "
-#  else
-#    define INCBIN_INT           ".int "
-#  endif
-#  if defined(__USER_LABEL_PREFIX__)
-#    define INCBIN_MANGLE        INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
-#  else
-#    define INCBIN_MANGLE        ""
-#  endif
-#  if defined(INCBIN_ARM)
-/* On arm assemblers, `@' is used as a line comment token */
-#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
-#  elif defined(__MINGW32__) || defined(__MINGW64__)
-/* Mingw doesn't support this directive either */
-#    define INCBIN_TYPE(NAME)
-#  else
-/* It's safe to use `@' on other architectures */
-#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
-#  endif
-#  define INCBIN_BYTE            ".byte "
-#endif
-
-/* List of style types used for symbol names */
-#define INCBIN_STYLE_CAMEL 0
-#define INCBIN_STYLE_SNAKE 1
-
-/**
- * @brief Specify the prefix to use for symbol names.
- *
- * @note By default this is "g".
- *
- * @code
- * #define INCBIN_PREFIX incbin
- * #include "incbin.h"
- * INCBIN(Foo, "foo.txt");
- *
- * // Now you have the following symbols instead:
- * // const unsigned char incbinFoo<data>[];
- * // const unsigned char *const incbinFoo<end>;
- * // const unsigned int incbinFoo<size>;
- * @endcode
- */
-#if !defined(INCBIN_PREFIX)
-#  define INCBIN_PREFIX g
-#endif
-
-/**
- * @brief Specify the style used for symbol names.
- *
- * Possible options are
- * - INCBIN_STYLE_CAMEL "CamelCase"
- * - INCBIN_STYLE_SNAKE "snake_case"
- *
- * @note By default this is INCBIN_STYLE_CAMEL
- *
- * @code
- * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
- * #include "incbin.h"
- * INCBIN(foo, "foo.txt");
- *
- * // Now you have the following symbols:
- * // const unsigned char <prefix>foo_data[];
- * // const unsigned char *const <prefix>foo_end;
- * // const unsigned int <prefix>foo_size;
- * @endcode
- */
-#if !defined(INCBIN_STYLE)
-#  define INCBIN_STYLE INCBIN_STYLE_CAMEL
-#endif
-
-/* Style lookup tables */
-#define INCBIN_STYLE_0_DATA Data
-#define INCBIN_STYLE_0_END End
-#define INCBIN_STYLE_0_SIZE Size
-#define INCBIN_STYLE_1_DATA _data
-#define INCBIN_STYLE_1_END _end
-#define INCBIN_STYLE_1_SIZE _size
-
-/* Style lookup: returning identifier */
-#define INCBIN_STYLE_IDENT(TYPE) \
-    INCBIN_CONCATENATE( \
-        INCBIN_STYLE_, \
-        INCBIN_CONCATENATE( \
-            INCBIN_EVAL(INCBIN_STYLE), \
-            INCBIN_CONCATENATE(_, TYPE)))
-
-/* Style lookup: returning string literal */
-#define INCBIN_STYLE_STRING(TYPE) \
-    INCBIN_STRINGIZE( \
-        INCBIN_STYLE_IDENT(TYPE)) \
-
-/* Generate the global labels by indirectly invoking the macro with our style
- * type and concatenating the name against them. */
-#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
-    INCBIN_INVOKE( \
-        INCBIN_GLOBAL, \
-        INCBIN_CONCATENATE( \
-            NAME, \
-            INCBIN_INVOKE( \
-                INCBIN_STYLE_IDENT, \
-                TYPE))) \
-    INCBIN_INVOKE( \
-        INCBIN_TYPE, \
-        INCBIN_CONCATENATE( \
-            NAME, \
-            INCBIN_INVOKE( \
-                INCBIN_STYLE_IDENT, \
-                TYPE)))
-
-/**
- * @brief Externally reference binary data included in another translation unit.
- *
- * Produces three external symbols that reference the binary data included in
- * another translation unit.
- *
- * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
- * "Data", as well as "End" and "Size" after. An example is provided below.
- *
- * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
- * @param NAME The name given for the binary data
- *
- * @code
- * INCBIN_EXTERN(Foo);
- *
- * // Now you have the following symbols:
- * // extern const unsigned char <prefix>Foo<data>[];
- * // extern const unsigned char *const <prefix>Foo<end>;
- * // extern const unsigned int <prefix>Foo<size>;
- * @endcode
- * 
- * You may specify a custom optional data type as well as the first argument.
- * @code
- * INCBIN_EXTERN(custom_type, Foo);
- * 
- * // Now you have the following symbols:
- * // extern const custom_type <prefix>Foo<data>[];
- * // extern const custom_type *const <prefix>Foo<end>;
- * // extern const unsigned int <prefix>Foo<size>;
- * @endcode
- */
-#define INCBIN_EXTERN(...) \
-    INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
-#define INCBIN_EXTERN_1(NAME, ...) \
-    INCBIN_EXTERN_2(unsigned char, NAME)
-#define INCBIN_EXTERN_2(TYPE, NAME) \
-    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
-        INCBIN_CONCATENATE( \
-            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
-            INCBIN_STYLE_IDENT(DATA))[]; \
-    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
-    INCBIN_CONCATENATE( \
-        INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
-        INCBIN_STYLE_IDENT(END)); \
-    INCBIN_EXTERNAL const unsigned int \
-        INCBIN_CONCATENATE( \
-            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
-            INCBIN_STYLE_IDENT(SIZE))
-
-/**
- * @brief Externally reference textual data included in another translation unit.
- *
- * Produces three external symbols that reference the textual data included in
- * another translation unit.
- *
- * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
- * "Data", as well as "End" and "Size" after. An example is provided below.
- *
- * @param NAME The name given for the textual data
- *
- * @code
- * INCBIN_EXTERN(Foo);
- *
- * // Now you have the following symbols:
- * // extern const char <prefix>Foo<data>[];
- * // extern const char *const <prefix>Foo<end>;
- * // extern const unsigned int <prefix>Foo<size>;
- * @endcode
- */
-#define INCTXT_EXTERN(NAME) \
-    INCBIN_EXTERN_2(char, NAME)
-
-/**
- * @brief Include a binary file into the current translation unit.
- *
- * Includes a binary file into the current translation unit, producing three symbols
- * for objects that encode the data and size respectively.
- *
- * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
- * "Data", as well as "End" and "Size" after. An example is provided below.
- *
- * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
- * @param NAME The name to associate with this binary data (as an identifier.)
- * @param FILENAME The file to include (as a string literal.)
- *
- * @code
- * INCBIN(Icon, "icon.png");
- *
- * // Now you have the following symbols:
- * // const unsigned char <prefix>Icon<data>[];
- * // const unsigned char *const <prefix>Icon<end>;
- * // const unsigned int <prefix>Icon<size>;
- * @endcode
- * 
- * You may specify a custom optional data type as well as the first argument.
- * These macros are specialized by arity.
- * @code
- * INCBIN(custom_type, Icon, "icon.png");
- *
- * // Now you have the following symbols:
- * // const custom_type <prefix>Icon<data>[];
- * // const custom_type *const <prefix>Icon<end>;
- * // const unsigned int <prefix>Icon<size>;
- * @endcode
- *
- * @warning This must be used in global scope
- * @warning The identifiers may be different if INCBIN_STYLE is not default
- *
- * To externally reference the data included by this in another translation unit
- * please @see INCBIN_EXTERN.
- */
-#ifdef _MSC_VER
-#  define INCBIN(NAME, FILENAME) \
-      INCBIN_EXTERN(NAME)
-#else
-#  define INCBIN(...) \
-     INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
-#  if defined(__GNUC__)
-#    define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
-#  elif defined(__clang__)
-#    define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
-#  else
-#    define INCBIN_1(...) /* Cannot do anything here */
-#  endif
-#  define INCBIN_2(NAME, FILENAME) \
-      INCBIN_3(unsigned char, NAME, FILENAME)
-#  define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
-#  define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
-    __asm__(INCBIN_SECTION \
-            INCBIN_GLOBAL_LABELS(NAME, DATA) \
-            INCBIN_ALIGN_HOST \
-            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
-            INCBIN_MACRO " \"" FILENAME "\"\n" \
-                TERMINATOR \
-            INCBIN_GLOBAL_LABELS(NAME, END) \
-            INCBIN_ALIGN_BYTE \
-            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
-                INCBIN_BYTE "1\n" \
-            INCBIN_GLOBAL_LABELS(NAME, SIZE) \
-            INCBIN_ALIGN_HOST \
-            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
-                INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
-                           INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
-            INCBIN_ALIGN_HOST \
-            ".text\n" \
-    ); \
-    INCBIN_EXTERN(TYPE, NAME)
-#endif
-
-/**
- * @brief Include a textual file into the current translation unit.
- * 
- * This behaves the same as INCBIN except it produces char compatible arrays
- * and implicitly adds a null-terminator byte, thus the size of data included
- * by this is one byte larger than that of INCBIN.
- *
- * Includes a textual file into the current translation unit, producing three
- * symbols for objects that encode the data and size respectively.
- *
- * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
- * "Data", as well as "End" and "Size" after. An example is provided below.
- *
- * @param NAME The name to associate with this binary data (as an identifier.)
- * @param FILENAME The file to include (as a string literal.)
- *
- * @code
- * INCTXT(Readme, "readme.txt");
- *
- * // Now you have the following symbols:
- * // const char <prefix>Readme<data>[];
- * // const char *const <prefix>Readme<end>;
- * // const unsigned int <prefix>Readme<size>;
- * @endcode
- *
- * @warning This must be used in global scope
- * @warning The identifiers may be different if INCBIN_STYLE is not default
- *
- * To externally reference the data included by this in another translation unit
- * please @see INCBIN_EXTERN.
- */
-#if defined(_MSC_VER)
-#  define INCTXT(NAME, FILENAME) \
-     INCBIN_EXTERN(NAME)
-#else
-#  define INCTXT(NAME, FILENAME) \
-     INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
-#endif
-
-#endif
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,26 +19,35 @@
 #include <iostream>

 #include "bitboard.h"
-#include "misc.h"
+#include "endgame.h"
 #include "position.h"
-#include "types.h"
+#include "search.h"
+#include "thread.h"
+#include "tt.h"
 #include "uci.h"
-#include "tune.h"
+#include "syzygy/tbprobe.h"

-using namespace Stockfish;
+namespace PSQT {
+  void init();
+}

 int main(int argc, char* argv[]) {

-    std::cout << engine_info() << std::endl;
+  std::cout << engine_info() << std::endl;

-    Bitboards::init();
-    Position::init();
+  UCI::init(Options);
+  Tune::init();
+  PSQT::init();
+  Bitboards::init();
+  Position::init();
+  Bitbases::init();
+  Endgames::init();
+  Threads.set(size_t(Options["Threads"]));
+  Search::clear(); // After threads are up
+  Eval::init_NNUE();

-    UCIEngine uci(argc, argv);
+  UCI::loop(argc, argv);

-    Tune::init(uci.engine_options());
-
-    uci.loop();
-
-    return 0;
+  Threads.set(0);
+  return 0;
 }
--- a/src/material.cpp
+++ b/src/material.cpp
@@ -0,0 +1,220 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <cassert>
+#include <cstring>   // For std::memset
+
+#include "material.h"
+#include "thread.h"
+
+using namespace std;
+
+namespace {
+
+  // Polynomial material imbalance parameters
+
+  constexpr int QuadraticOurs[][PIECE_TYPE_NB] = {
+    //            OUR PIECES
+    // pair pawn knight bishop rook queen
+    {1438                               }, // Bishop pair
+    {  40,   38                         }, // Pawn
+    {  32,  255, -62                    }, // Knight      OUR PIECES
+    {   0,  104,   4,    0              }, // Bishop
+    { -26,   -2,  47,   105,  -208      }, // Rook
+    {-189,   24, 117,   133,  -134, -6  }  // Queen
+  };
+
+  constexpr int QuadraticTheirs[][PIECE_TYPE_NB] = {
+    //           THEIR PIECES
+    // pair pawn knight bishop rook queen
+    {                                   }, // Bishop pair
+    {  36,                              }, // Pawn
+    {   9,   63,                        }, // Knight      OUR PIECES
+    {  59,   65,  42,                   }, // Bishop
+    {  46,   39,  24,   -24,            }, // Rook
+    {  97,  100, -42,   137,  268,      }  // Queen
+  };
+
+  // Endgame evaluation and scaling functions are accessed directly and not through
+  // the function maps because they correspond to more than one material hash key.
+  Endgame<KXK>    EvaluateKXK[] = { Endgame<KXK>(WHITE),    Endgame<KXK>(BLACK) };
+
+  Endgame<KBPsK>  ScaleKBPsK[]  = { Endgame<KBPsK>(WHITE),  Endgame<KBPsK>(BLACK) };
+  Endgame<KQKRPs> ScaleKQKRPs[] = { Endgame<KQKRPs>(WHITE), Endgame<KQKRPs>(BLACK) };
+  Endgame<KPsK>   ScaleKPsK[]   = { Endgame<KPsK>(WHITE),   Endgame<KPsK>(BLACK) };
+  Endgame<KPKP>   ScaleKPKP[]   = { Endgame<KPKP>(WHITE),   Endgame<KPKP>(BLACK) };
+
+  // Helper used to detect a given material distribution
+  bool is_KXK(const Position& pos, Color us) {
+    return  !more_than_one(pos.pieces(~us))
+          && pos.non_pawn_material(us) >= RookValueMg;
+  }
+
+  bool is_KBPsK(const Position& pos, Color us) {
+    return   pos.non_pawn_material(us) == BishopValueMg
+          && pos.count<PAWN  >(us) >= 1;
+  }
+
+  bool is_KQKRPs(const Position& pos, Color us) {
+    return  !pos.count<PAWN>(us)
+          && pos.non_pawn_material(us) == QueenValueMg
+          && pos.count<ROOK>(~us) == 1
+          && pos.count<PAWN>(~us) >= 1;
+  }
+
+
+  /// imbalance() calculates the imbalance by comparing the piece count of each
+  /// piece type for both colors.
+
+  template<Color Us>
+  int imbalance(const int pieceCount[][PIECE_TYPE_NB]) {
+
+    constexpr Color Them = ~Us;
+
+    int bonus = 0;
+
+    // Second-degree polynomial material imbalance, by Tord Romstad
+    for (int pt1 = NO_PIECE_TYPE; pt1 <= QUEEN; ++pt1)
+    {
+        if (!pieceCount[Us][pt1])
+            continue;
+
+        int v = QuadraticOurs[pt1][pt1] * pieceCount[Us][pt1];
+
+        for (int pt2 = NO_PIECE_TYPE; pt2 < pt1; ++pt2)
+            v +=  QuadraticOurs[pt1][pt2] * pieceCount[Us][pt2]
+                + QuadraticTheirs[pt1][pt2] * pieceCount[Them][pt2];
+
+        bonus += pieceCount[Us][pt1] * v;
+    }
+
+    return bonus;
+  }
+
+} // namespace
+
+namespace Material {
+
+
+/// Material::probe() looks up the current position's material configuration in
+/// the material hash table. It returns a pointer to the Entry if the position
+/// is found. Otherwise a new Entry is computed and stored there, so we don't
+/// have to recompute all when the same material configuration occurs again.
+
+Entry* probe(const Position& pos) {
+
+  Key key = pos.material_key();
+  Entry* e = pos.this_thread()->materialTable[key];
+
+  if (e->key == key)
+      return e;
+
+  std::memset(e, 0, sizeof(Entry));
+  e->key = key;
+  e->factor[WHITE] = e->factor[BLACK] = (uint8_t)SCALE_FACTOR_NORMAL;
+
+  Value npm_w = pos.non_pawn_material(WHITE);
+  Value npm_b = pos.non_pawn_material(BLACK);
+  Value npm   = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
+
+  // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME]
+  e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit));
+
+  // Let's look if we have a specialized evaluation function for this particular
+  // material configuration. Firstly we look for a fixed configuration one, then
+  // for a generic one if the previous search failed.
+  if ((e->evaluationFunction = Endgames::probe<Value>(key)) != nullptr)
+      return e;
+
+  for (Color c : { WHITE, BLACK })
+      if (is_KXK(pos, c))
+      {
+          e->evaluationFunction = &EvaluateKXK[c];
+          return e;
+      }
+
+  // OK, we didn't find any special evaluation function for the current material
+  // configuration. Is there a suitable specialized scaling function?
+  const auto* sf = Endgames::probe<ScaleFactor>(key);
+
+  if (sf)
+  {
+      e->scalingFunction[sf->strongSide] = sf; // Only strong color assigned
+      return e;
+  }
+
+  // We didn't find any specialized scaling function, so fall back on generic
+  // ones that refer to more than one material distribution. Note that in this
+  // case we don't return after setting the function.
+  for (Color c : { WHITE, BLACK })
+  {
+    if (is_KBPsK(pos, c))
+        e->scalingFunction[c] = &ScaleKBPsK[c];
+
+    else if (is_KQKRPs(pos, c))
+        e->scalingFunction[c] = &ScaleKQKRPs[c];
+  }
+
+  if (npm_w + npm_b == VALUE_ZERO && pos.pieces(PAWN)) // Only pawns on the board
+  {
+      if (!pos.count<PAWN>(BLACK))
+      {
+          assert(pos.count<PAWN>(WHITE) >= 2);
+
+          e->scalingFunction[WHITE] = &ScaleKPsK[WHITE];
+      }
+      else if (!pos.count<PAWN>(WHITE))
+      {
+          assert(pos.count<PAWN>(BLACK) >= 2);
+
+          e->scalingFunction[BLACK] = &ScaleKPsK[BLACK];
+      }
+      else if (pos.count<PAWN>(WHITE) == 1 && pos.count<PAWN>(BLACK) == 1)
+      {
+          // This is a special case because we set scaling functions
+          // for both colors instead of only one.
+          e->scalingFunction[WHITE] = &ScaleKPKP[WHITE];
+          e->scalingFunction[BLACK] = &ScaleKPKP[BLACK];
+      }
+  }
+
+  // Zero or just one pawn makes it difficult to win, even with a small material
+  // advantage. This catches some trivial draws like KK, KBK and KNK and gives a
+  // drawish scale factor for cases such as KRKBP and KmmKm (except for KBBKN).
+  if (!pos.count<PAWN>(WHITE) && npm_w - npm_b <= BishopValueMg)
+      e->factor[WHITE] = uint8_t(npm_w <  RookValueMg   ? SCALE_FACTOR_DRAW :
+                                 npm_b <= BishopValueMg ? 4 : 14);
+
+  if (!pos.count<PAWN>(BLACK) && npm_b - npm_w <= BishopValueMg)
+      e->factor[BLACK] = uint8_t(npm_b <  RookValueMg   ? SCALE_FACTOR_DRAW :
+                                 npm_w <= BishopValueMg ? 4 : 14);
+
+  // Evaluate the material imbalance. We use PIECE_TYPE_NONE as a place holder
+  // for the bishop pair "extended piece", which allows us to be more flexible
+  // in defining bishop pair bonuses.
+  const int pieceCount[COLOR_NB][PIECE_TYPE_NB] = {
+  { pos.count<BISHOP>(WHITE) > 1, pos.count<PAWN>(WHITE), pos.count<KNIGHT>(WHITE),
+    pos.count<BISHOP>(WHITE)    , pos.count<ROOK>(WHITE), pos.count<QUEEN >(WHITE) },
+  { pos.count<BISHOP>(BLACK) > 1, pos.count<PAWN>(BLACK), pos.count<KNIGHT>(BLACK),
+    pos.count<BISHOP>(BLACK)    , pos.count<ROOK>(BLACK), pos.count<QUEEN >(BLACK) } };
+
+  e->value = int16_t((imbalance<WHITE>(pieceCount) - imbalance<BLACK>(pieceCount)) / 16);
+  return e;
+}
+
+} // namespace Material
--- a/src/material.h
+++ b/src/material.h
@@ -0,0 +1,71 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef MATERIAL_H_INCLUDED
+#define MATERIAL_H_INCLUDED
+
+#include "endgame.h"
+#include "misc.h"
+#include "position.h"
+#include "types.h"
+
+namespace Material {
+
+/// Material::Entry contains various information about a material configuration.
+/// It contains a material imbalance evaluation, a function pointer to a special
+/// endgame evaluation function (which in most cases is NULL, meaning that the
+/// standard evaluation function will be used), and scale factors.
+///
+/// The scale factors are used to scale the evaluation score up or down. For
+/// instance, in KRB vs KR endgames, the score is scaled down by a factor of 4,
+/// which will result in scores of absolute value less than one pawn.
+
+struct Entry {
+
+  Score imbalance() const { return make_score(value, value); }
+  Phase game_phase() const { return gamePhase; }
+  bool specialized_eval_exists() const { return evaluationFunction != nullptr; }
+  Value evaluate(const Position& pos) const { return (*evaluationFunction)(pos); }
+
+  // scale_factor() takes a position and a color as input and returns a scale factor
+  // for the given color. We have to provide the position in addition to the color
+  // because the scale factor may also be a function which should be applied to
+  // the position. For instance, in KBP vs K endgames, the scaling function looks
+  // for rook pawns and wrong-colored bishops.
+  ScaleFactor scale_factor(const Position& pos, Color c) const {
+    ScaleFactor sf = scalingFunction[c] ? (*scalingFunction[c])(pos)
+                                        :  SCALE_FACTOR_NONE;
+    return sf != SCALE_FACTOR_NONE ? sf : ScaleFactor(factor[c]);
+  }
+
+  Key key;
+  const EndgameBase<Value>* evaluationFunction;
+  const EndgameBase<ScaleFactor>* scalingFunction[COLOR_NB]; // Could be one for each
+                                                             // side (e.g. KPKP, KBPsK)
+  int16_t value;
+  uint8_t factor[COLOR_NB];
+  Phase gamePhase;
+};
+
+typedef HashTable<Entry, 8192> Table;
+
+Entry* probe(const Position& pos);
+
+} // namespace Material
+
+#endif // #ifndef MATERIAL_H_INCLUDED
--- a/src/memory.cpp
+++ b/src/memory.cpp
@@ -1,268 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "memory.h"
-
-#include <cstdlib>
-
-#if __has_include("features.h")
-    #include <features.h>
-#endif
-
-#if defined(__linux__) && !defined(__ANDROID__)
-    #include <sys/mman.h>
-#endif
-
-#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
-  || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
-  || defined(__e2k__)
-    #define POSIXALIGNEDALLOC
-    #include <stdlib.h>
-#endif
-
-#ifdef _WIN32
-    #if _WIN32_WINNT < 0x0601
-        #undef _WIN32_WINNT
-        #define _WIN32_WINNT 0x0601  // Force to include needed API prototypes
-    #endif
-
-    #ifndef NOMINMAX
-        #define NOMINMAX
-    #endif
-
-    #include <ios>       // std::hex, std::dec
-    #include <iostream>  // std::cerr
-    #include <ostream>   // std::endl
-    #include <windows.h>
-
-// The needed Windows API for processor groups could be missed from old Windows
-// versions, so instead of calling them directly (forcing the linker to resolve
-// the calls at compile time), try to load them at runtime. To do this we need
-// first to define the corresponding function pointers.
-
-extern "C" {
-using OpenProcessToken_t      = bool (*)(HANDLE, DWORD, PHANDLE);
-using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
-using AdjustTokenPrivileges_t =
-  bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
-}
-#endif
-
-
-namespace Stockfish {
-
-// Wrappers for systems where the c++17 implementation does not guarantee the
-// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
-// must be freed with std_aligned_free().
-
-void* std_aligned_alloc(size_t alignment, size_t size) {
-#if defined(_ISOC11_SOURCE)
-    return aligned_alloc(alignment, size);
-#elif defined(POSIXALIGNEDALLOC)
-    void* mem = nullptr;
-    posix_memalign(&mem, alignment, size);
-    return mem;
-#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
-    return _mm_malloc(size, alignment);
-#elif defined(_WIN32)
-    return _aligned_malloc(size, alignment);
-#else
-    return std::aligned_alloc(alignment, size);
-#endif
-}
-
-void std_aligned_free(void* ptr) {
-
-#if defined(POSIXALIGNEDALLOC)
-    free(ptr);
-#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
-    _mm_free(ptr);
-#elif defined(_WIN32)
-    _aligned_free(ptr);
-#else
-    free(ptr);
-#endif
-}
-
-// aligned_large_pages_alloc() will return suitably aligned memory,
-// if possible using large pages.
-
-#if defined(_WIN32)
-
-static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
-
-    #if !defined(_WIN64)
-    return nullptr;
-    #else
-
-    HANDLE hProcessToken{};
-    LUID   luid{};
-    void*  mem = nullptr;
-
-    const size_t largePageSize = GetLargePageMinimum();
-    if (!largePageSize)
-        return nullptr;
-
-    // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
-
-    HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
-
-    if (!hAdvapi32)
-        hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
-
-    auto OpenProcessToken_f =
-      OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
-    if (!OpenProcessToken_f)
-        return nullptr;
-    auto LookupPrivilegeValueA_f =
-      LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
-    if (!LookupPrivilegeValueA_f)
-        return nullptr;
-    auto AdjustTokenPrivileges_f =
-      AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
-    if (!AdjustTokenPrivileges_f)
-        return nullptr;
-
-    // We need SeLockMemoryPrivilege, so try to enable it for the process
-
-    if (!OpenProcessToken_f(  // OpenProcessToken()
-          GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
-        return nullptr;
-
-    if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
-    {
-        TOKEN_PRIVILEGES tp{};
-        TOKEN_PRIVILEGES prevTp{};
-        DWORD            prevTpLen = 0;
-
-        tp.PrivilegeCount           = 1;
-        tp.Privileges[0].Luid       = luid;
-        tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
-        // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
-        // succeeds, we still need to query GetLastError() to ensure that the privileges
-        // were actually obtained.
-
-        if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
-                                    &prevTpLen)
-            && GetLastError() == ERROR_SUCCESS)
-        {
-            // Round up size to full pages and allocate
-            allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
-            mem       = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
-                                     PAGE_READWRITE);
-
-            // Privilege no longer needed, restore previous state
-            AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
-        }
-    }
-
-    CloseHandle(hProcessToken);
-
-    return mem;
-
-    #endif
-}
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
-    // Try to allocate large pages
-    void* mem = aligned_large_pages_alloc_windows(allocSize);
-
-    // Fall back to regular, page-aligned, allocation if necessary
-    if (!mem)
-        mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
-
-    return mem;
-}
-
-#else
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
-    #if defined(__linux__)
-    constexpr size_t alignment = 2 * 1024 * 1024;  // 2MB page size assumed
-    #else
-    constexpr size_t alignment = 4096;  // small page size assumed
-    #endif
-
-    // Round up to multiples of alignment
-    size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
-    void*  mem  = std_aligned_alloc(alignment, size);
-    #if defined(MADV_HUGEPAGE)
-    madvise(mem, size, MADV_HUGEPAGE);
-    #endif
-    return mem;
-}
-
-#endif
-
-bool has_large_pages() {
-
-#if defined(_WIN32)
-
-    constexpr size_t page_size = 2 * 1024 * 1024;  // 2MB page size assumed
-    void*            mem       = aligned_large_pages_alloc_windows(page_size);
-    if (mem == nullptr)
-    {
-        return false;
-    }
-    else
-    {
-        aligned_large_pages_free(mem);
-        return true;
-    }
-
-#elif defined(__linux__)
-
-    #if defined(MADV_HUGEPAGE)
-    return true;
-    #else
-    return false;
-    #endif
-
-#else
-
-    return false;
-
-#endif
-}
-
-
-// aligned_large_pages_free() will free the previously memory allocated
-// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
-
-#if defined(_WIN32)
-
-void aligned_large_pages_free(void* mem) {
-
-    if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
-    {
-        DWORD err = GetLastError();
-        std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
-                  << std::dec << std::endl;
-        exit(EXIT_FAILURE);
-    }
-}
-
-#else
-
-void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
-
-#endif
-}  // namespace Stockfish
--- a/src/memory.h
+++ b/src/memory.h
@@ -1,217 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef MEMORY_H_INCLUDED
-#define MEMORY_H_INCLUDED
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <new>
-#include <type_traits>
-#include <utility>
-
-#include "types.h"
-
-namespace Stockfish {
-
-void* std_aligned_alloc(size_t alignment, size_t size);
-void  std_aligned_free(void* ptr);
-
-// Memory aligned by page size, min alignment: 4096 bytes
-void* aligned_large_pages_alloc(size_t size);
-void  aligned_large_pages_free(void* mem);
-
-bool has_large_pages();
-
-// Frees memory which was placed there with placement new.
-// Works for both single objects and arrays of unknown bound.
-template<typename T, typename FREE_FUNC>
-void memory_deleter(T* ptr, FREE_FUNC free_func) {
-    if (!ptr)
-        return;
-
-    // Explicitly needed to call the destructor
-    if constexpr (!std::is_trivially_destructible_v<T>)
-        ptr->~T();
-
-    free_func(ptr);
-}
-
-// Frees memory which was placed there with placement new.
-// Works for both single objects and arrays of unknown bound.
-template<typename T, typename FREE_FUNC>
-void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
-    if (!ptr)
-        return;
-
-
-    // Move back on the pointer to where the size is allocated
-    const size_t array_offset = std::max(sizeof(size_t), alignof(T));
-    char*        raw_memory   = reinterpret_cast<char*>(ptr) - array_offset;
-
-    if constexpr (!std::is_trivially_destructible_v<T>)
-    {
-        const size_t size = *reinterpret_cast<size_t*>(raw_memory);
-
-        // Explicitly call the destructor for each element in reverse order
-        for (size_t i = size; i-- > 0;)
-            ptr[i].~T();
-    }
-
-    free_func(raw_memory);
-}
-
-// Allocates memory for a single object and places it there with placement new
-template<typename T, typename ALLOC_FUNC, typename... Args>
-inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
-                                                                  Args&&... args) {
-    void* raw_memory = alloc_func(sizeof(T));
-    ASSERT_ALIGNED(raw_memory, alignof(T));
-    return new (raw_memory) T(std::forward<Args>(args)...);
-}
-
-// Allocates memory for an array of unknown bound and places it there with placement new
-template<typename T, typename ALLOC_FUNC>
-inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
-memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
-    using ElementType = std::remove_extent_t<T>;
-
-    const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
-
-    // Save the array size in the memory location
-    char* raw_memory =
-      reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
-    ASSERT_ALIGNED(raw_memory, alignof(T));
-
-    new (raw_memory) size_t(num);
-
-    for (size_t i = 0; i < num; ++i)
-        new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
-
-    // Need to return the pointer at the start of the array so that
-    // the indexing in unique_ptr<T[]> works.
-    return reinterpret_cast<ElementType*>(raw_memory + array_offset);
-}
-
-//
-//
-// aligned large page unique ptr
-//
-//
-
-template<typename T>
-struct LargePageDeleter {
-    void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
-};
-
-template<typename T>
-struct LargePageArrayDeleter {
-    void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
-};
-
-template<typename T>
-using LargePagePtr =
-  std::conditional_t<std::is_array_v<T>,
-                     std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
-                     std::unique_ptr<T, LargePageDeleter<T>>>;
-
-// make_unique_large_page for single objects
-template<typename T, typename... Args>
-std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
-    static_assert(alignof(T) <= 4096,
-                  "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
-
-    T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
-
-    return LargePagePtr<T>(obj);
-}
-
-// make_unique_large_page for arrays of unknown bound
-template<typename T>
-std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
-    using ElementType = std::remove_extent_t<T>;
-
-    static_assert(alignof(ElementType) <= 4096,
-                  "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
-
-    ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
-
-    return LargePagePtr<T>(memory);
-}
-
-//
-//
-// aligned unique ptr
-//
-//
-
-template<typename T>
-struct AlignedDeleter {
-    void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
-};
-
-template<typename T>
-struct AlignedArrayDeleter {
-    void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
-};
-
-template<typename T>
-using AlignedPtr =
-  std::conditional_t<std::is_array_v<T>,
-                     std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
-                     std::unique_ptr<T, AlignedDeleter<T>>>;
-
-// make_unique_aligned for single objects
-template<typename T, typename... Args>
-std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
-    const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
-    T*         obj  = memory_allocator<T>(func, std::forward<Args>(args)...);
-
-    return AlignedPtr<T>(obj);
-}
-
-// make_unique_aligned for arrays of unknown bound
-template<typename T>
-std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
-    using ElementType = std::remove_extent_t<T>;
-
-    const auto   func   = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
-    ElementType* memory = memory_allocator<T>(func, num);
-
-    return AlignedPtr<T>(memory);
-}
-
-
-// Get the first aligned element of an array.
-// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
-// where N is the number of elements in the array.
-template<uintptr_t Alignment, typename T>
-T* align_ptr_up(T* ptr) {
-    static_assert(alignof(T) < Alignment);
-
-    const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
-    return reinterpret_cast<T*>(
-      reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
-}
-
-
-}  // namespace Stockfish
-
-#endif  // #ifndef MEMORY_H_INCLUDED
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,509 +16,558 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "misc.h"
+#ifdef _WIN32
+#if _WIN32_WINNT < 0x0601
+#undef  _WIN32_WINNT
+#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
+#endif
+
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+
+#include <windows.h>
+// The needed Windows API for processor groups could be missed from old Windows
+// versions, so instead of calling them directly (forcing the linker to resolve
+// the calls at compile time), try to load them at runtime. To do this we need
+// first to define the corresponding function pointers.
+extern "C" {
+typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
+                      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
+typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
+typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
+}
+#endif

-#include <array>
-#include <atomic>
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstdlib>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
-#include <iterator>
-#include <limits>
-#include <mutex>
 #include <sstream>
-#include <string_view>
+#include <vector>
+#include <cstdlib>

-#include "types.h"
+#if defined(__linux__) && !defined(__ANDROID__)
+#include <stdlib.h>
+#include <sys/mman.h>
+#endif

-namespace Stockfish {
+#include "misc.h"
+#include "thread.h"
+
+using namespace std;

 namespace {

-// Version number or dev.
-constexpr std::string_view version = "dev";
+/// Version number. If Version is left empty, then compile date in the format
+/// DD-MM-YY and show in engine_info.
+const string Version = "";

-// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
-// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
-// can toggle the logging of std::cout and std:cin at runtime whilst preserving
-// usual I/O functionality, all without changing a single line of code!
-// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81
+/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
+/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
+/// can toggle the logging of std::cout and std:cin at runtime whilst preserving
+/// usual I/O functionality, all without changing a single line of code!
+/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81

-struct Tie: public std::streambuf {  // MSVC requires split streambuf for cin and cout
+struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout

-    Tie(std::streambuf* b, std::streambuf* l) :
-        buf(b),
-        logBuf(l) {}
+  Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {}

-    int sync() override { return logBuf->pubsync(), buf->pubsync(); }
-    int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); }
-    int underflow() override { return buf->sgetc(); }
-    int uflow() override { return log(buf->sbumpc(), ">> "); }
+  int sync() override { return logBuf->pubsync(), buf->pubsync(); }
+  int overflow(int c) override { return log(buf->sputc((char)c), "<< "); }
+  int underflow() override { return buf->sgetc(); }
+  int uflow() override { return log(buf->sbumpc(), ">> "); }

-    std::streambuf *buf, *logBuf;
+  streambuf *buf, *logBuf;

-    int log(int c, const char* prefix) {
+  int log(int c, const char* prefix) {

-        static int last = '\n';  // Single log file
+    static int last = '\n'; // Single log file

-        if (last == '\n')
-            logBuf->sputn(prefix, 3);
+    if (last == '\n')
+        logBuf->sputn(prefix, 3);

-        return last = logBuf->sputc(char(c));
-    }
+    return last = logBuf->sputc((char)c);
+  }
 };

 class Logger {

-    Logger() :
-        in(std::cin.rdbuf(), file.rdbuf()),
-        out(std::cout.rdbuf(), file.rdbuf()) {}
-    ~Logger() { start(""); }
+  Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {}
+ ~Logger() { start(""); }

-    std::ofstream file;
-    Tie           in, out;
+  ofstream file;
+  Tie in, out;

-   public:
-    static void start(const std::string& fname) {
+public:
+  static void start(const std::string& fname) {

-        static Logger l;
+    static Logger l;

-        if (l.file.is_open())
-        {
-            std::cout.rdbuf(l.out.buf);
-            std::cin.rdbuf(l.in.buf);
-            l.file.close();
-        }
-
-        if (!fname.empty())
-        {
-            l.file.open(fname, std::ifstream::out);
-
-            if (!l.file.is_open())
-            {
-                std::cerr << "Unable to open debug log file " << fname << std::endl;
-                exit(EXIT_FAILURE);
-            }
-
-            std::cin.rdbuf(&l.in);
-            std::cout.rdbuf(&l.out);
-        }
-    }
-};
-
-}  // namespace
-
-
-// Returns the full name of the current Stockfish version.
-//
-// For local dev compiles we try to append the commit SHA and
-// commit date from git. If that fails only the local compilation
-// date is set and "nogit" is specified:
-//      Stockfish dev-YYYYMMDD-SHA
-//      or
-//      Stockfish dev-YYYYMMDD-nogit
-//
-// For releases (non-dev builds) we only include the version number:
-//      Stockfish version
-std::string engine_version_info() {
-    std::stringstream ss;
-    ss << "Stockfish " << version << std::setfill('0');
-
-    if constexpr (version == "dev")
+    if (!fname.empty() && !l.file.is_open())
    {
-        ss << "-";
-#ifdef GIT_DATE
-        ss << stringify(GIT_DATE);
-#else
-        constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
+        l.file.open(fname, ifstream::out);

-        std::string       month, day, year;
-        std::stringstream date(__DATE__);  // From compiler, format is "Sep 21 2008"
+        if (!l.file.is_open())
+        {
+            cerr << "Unable to open debug log file " << fname << endl;
+            exit(EXIT_FAILURE);
+        }

-        date >> month >> day >> year;
-        ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
-           << std::setw(2) << std::setfill('0') << day;
-#endif
-
-        ss << "-";
-
-#ifdef GIT_SHA
-        ss << stringify(GIT_SHA);
-#else
-        ss << "nogit";
-#endif
+        cin.rdbuf(&l.in);
+        cout.rdbuf(&l.out);
    }
+    else if (fname.empty() && l.file.is_open())
+    {
+        cout.rdbuf(l.out.buf);
+        cin.rdbuf(l.in.buf);
+        l.file.close();
+    }
+  }
+};

-    return ss.str();
-}
+} // namespace

-std::string engine_info(bool to_uci) {
-    return engine_version_info() + (to_uci ? "\nid author " : " by ")
-         + "the Stockfish developers (see AUTHORS file)";
+/// engine_info() returns the full name of the current Stockfish version. This
+/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
+/// the program was compiled) or "Stockfish <Version>", depending on whether
+/// Version is empty.
+
+const string engine_info(bool to_uci) {
+
+  const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
+  string month, day, year;
+  stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"
+
+  ss << "Stockfish " << Version << setfill('0');
+
+  if (Version.empty())
+  {
+      date >> month >> day >> year;
+      ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
+  }
+
+  ss << (to_uci  ? "\nid author ": " by ")
+     << "the Stockfish developers (see AUTHORS file)";
+
+  return ss.str();
 }


-// Returns a string trying to describe the compiler we use
-std::string compiler_info() {
+/// compiler_info() returns a string trying to describe the compiler we use

-#define make_version_string(major, minor, patch) \
-    stringify(major) "." stringify(minor) "." stringify(patch)
+const std::string compiler_info() {

-    // Predefined macros hell:
-    //
-    // __GNUC__                Compiler is GCC, Clang or ICX
-    // __clang__               Compiler is Clang or ICX
-    // __INTEL_LLVM_COMPILER   Compiler is ICX
-    // _MSC_VER                Compiler is MSVC
-    // _WIN32                  Building on Windows (any)
-    // _WIN64                  Building on Windows 64 bit
+  #define stringify2(x) #x
+  #define stringify(x) stringify2(x)
+  #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch)

-    std::string compiler = "\nCompiled by                : ";
+/// Predefined macros hell:
+///
+/// __GNUC__           Compiler is gcc, Clang or Intel on Linux
+/// __INTEL_COMPILER   Compiler is Intel
+/// _MSC_VER           Compiler is MSVC or Intel on Windows
+/// _WIN32             Building on Windows (any)
+/// _WIN64             Building on Windows 64 bit

-#if defined(__INTEL_LLVM_COMPILER)
-    compiler += "ICX ";
-    compiler += stringify(__INTEL_LLVM_COMPILER);
-#elif defined(__clang__)
-    compiler += "clang++ ";
-    compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__);
-#elif _MSC_VER
-    compiler += "MSVC ";
-    compiler += "(version ";
-    compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
-    compiler += ")";
-#elif defined(__e2k__) && defined(__LCC__)
-    #define dot_ver2(n) \
-        compiler += char('.'); \
-        compiler += char('0' + (n) / 10); \
-        compiler += char('0' + (n) % 10);
+  std::string compiler = "\nCompiled by ";

-    compiler += "MCST LCC ";
-    compiler += "(version ";
-    compiler += std::to_string(__LCC__ / 100);
-    dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")";
-#elif __GNUC__
-    compiler += "g++ (GNUC) ";
-    compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
-#else
-    compiler += "Unknown compiler ";
-    compiler += "(unknown version)";
-#endif
+  #ifdef __clang__
+     compiler += "clang++ ";
+     compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__);
+  #elif __INTEL_COMPILER
+     compiler += "Intel compiler ";
+     compiler += "(version ";
+     compiler += stringify(__INTEL_COMPILER) " update " stringify(__INTEL_COMPILER_UPDATE);
+     compiler += ")";
+  #elif _MSC_VER
+     compiler += "MSVC ";
+     compiler += "(version ";
+     compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
+     compiler += ")";
+  #elif __GNUC__
+     compiler += "g++ (GNUC) ";
+     compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+  #else
+     compiler += "Unknown compiler ";
+     compiler += "(unknown version)";
+  #endif

-#if defined(__APPLE__)
-    compiler += " on Apple";
-#elif defined(__CYGWIN__)
-    compiler += " on Cygwin";
-#elif defined(__MINGW64__)
-    compiler += " on MinGW64";
-#elif defined(__MINGW32__)
-    compiler += " on MinGW32";
-#elif defined(__ANDROID__)
-    compiler += " on Android";
-#elif defined(__linux__)
-    compiler += " on Linux";
-#elif defined(_WIN64)
-    compiler += " on Microsoft Windows 64-bit";
-#elif defined(_WIN32)
-    compiler += " on Microsoft Windows 32-bit";
-#else
-    compiler += " on unknown system";
-#endif
+  #if defined(__APPLE__)
+     compiler += " on Apple";
+  #elif defined(__CYGWIN__)
+     compiler += " on Cygwin";
+  #elif defined(__MINGW64__)
+     compiler += " on MinGW64";
+  #elif defined(__MINGW32__)
+     compiler += " on MinGW32";
+  #elif defined(__ANDROID__)
+     compiler += " on Android";
+  #elif defined(__linux__)
+     compiler += " on Linux";
+  #elif defined(_WIN64)
+     compiler += " on Microsoft Windows 64-bit";
+  #elif defined(_WIN32)
+     compiler += " on Microsoft Windows 32-bit";
+  #else
+     compiler += " on unknown system";
+  #endif

-    compiler += "\nCompilation architecture   : ";
-#if defined(ARCH)
-    compiler += stringify(ARCH);
-#else
-    compiler += "(undefined architecture)";
-#endif
-
-    compiler += "\nCompilation settings       : ";
-    compiler += (Is64Bit ? "64bit" : "32bit");
-#if defined(USE_VNNI)
-    compiler += " VNNI";
-#endif
-#if defined(USE_AVX512)
+  compiler += "\nCompilation settings include: ";
+  compiler += (Is64Bit ? " 64bit" : " 32bit");
+  #if defined(USE_AVX512)
    compiler += " AVX512";
-#endif
-    compiler += (HasPext ? " BMI2" : "");
-#if defined(USE_AVX2)
+  #endif
+  #if defined(USE_AVX2)
    compiler += " AVX2";
-#endif
-#if defined(USE_SSE41)
+  #endif
+  #if defined(USE_SSE42)
+    compiler += " SSE42";
+  #endif
+  #if defined(USE_SSE41)
    compiler += " SSE41";
-#endif
-#if defined(USE_SSSE3)
+  #endif
+  #if defined(USE_SSSE3)
    compiler += " SSSE3";
-#endif
-#if defined(USE_SSE2)
-    compiler += " SSE2";
-#endif
+  #endif
+  #if defined(USE_SSE3)
+    compiler += " SSE3";
+  #endif
+    compiler += (HasPext ? " BMI2" : "");
    compiler += (HasPopCnt ? " POPCNT" : "");
-#if defined(USE_NEON_DOTPROD)
-    compiler += " NEON_DOTPROD";
-#elif defined(USE_NEON)
-    compiler += " NEON";
-#endif
-
-#if !defined(NDEBUG)
+  #if !defined(NDEBUG)
    compiler += " DEBUG";
-#endif
+  #endif

-    compiler += "\nCompiler __VERSION__ macro : ";
-#ifdef __VERSION__
-    compiler += __VERSION__;
-#else
-    compiler += "(undefined macro)";
-#endif
+  compiler += "\n__VERSION__ macro expands to: ";
+  #ifdef __VERSION__
+     compiler += __VERSION__;
+  #else
+     compiler += "(undefined macro)";
+  #endif
+  compiler += "\n";

-    compiler += "\n";
-
-    return compiler;
+  return compiler;
 }


-// Debug functions used mainly to collect run-time statistics
-constexpr int MaxDebugSlots = 32;
+/// Debug functions used mainly to collect run-time statistics
+static std::atomic<int64_t> hits[2], means[2];

-namespace {
-
-template<size_t N>
-struct DebugInfo {
-    std::array<std::atomic<int64_t>, N> data = {0};
-
-    [[nodiscard]] constexpr std::atomic<int64_t>& operator[](size_t index) {
-        assert(index < N);
-        return data[index];
-    }
-
-    constexpr DebugInfo& operator=(const DebugInfo& other) {
-        for (size_t i = 0; i < N; i++)
-            data[i].store(other.data[i].load());
-        return *this;
-    }
-};
-
-struct DebugExtremes: public DebugInfo<3> {
-    DebugExtremes() {
-        data[1] = std::numeric_limits<int64_t>::min();
-        data[2] = std::numeric_limits<int64_t>::max();
-    }
-};
-
-std::array<DebugInfo<2>, MaxDebugSlots>  hit;
-std::array<DebugInfo<2>, MaxDebugSlots>  mean;
-std::array<DebugInfo<3>, MaxDebugSlots>  stdev;
-std::array<DebugInfo<6>, MaxDebugSlots>  correl;
-std::array<DebugExtremes, MaxDebugSlots> extremes;
-
-}  // namespace
-
-void dbg_hit_on(bool cond, int slot) {
-
-    ++hit.at(slot)[0];
-    if (cond)
-        ++hit.at(slot)[1];
-}
-
-void dbg_mean_of(int64_t value, int slot) {
-
-    ++mean.at(slot)[0];
-    mean.at(slot)[1] += value;
-}
-
-void dbg_stdev_of(int64_t value, int slot) {
-
-    ++stdev.at(slot)[0];
-    stdev.at(slot)[1] += value;
-    stdev.at(slot)[2] += value * value;
-}
-
-void dbg_extremes_of(int64_t value, int slot) {
-    ++extremes.at(slot)[0];
-
-    int64_t current_max = extremes.at(slot)[1].load();
-    while (current_max < value && !extremes.at(slot)[1].compare_exchange_weak(current_max, value))
-    {}
-
-    int64_t current_min = extremes.at(slot)[2].load();
-    while (current_min > value && !extremes.at(slot)[2].compare_exchange_weak(current_min, value))
-    {}
-}
-
-void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
-
-    ++correl.at(slot)[0];
-    correl.at(slot)[1] += value1;
-    correl.at(slot)[2] += value1 * value1;
-    correl.at(slot)[3] += value2;
-    correl.at(slot)[4] += value2 * value2;
-    correl.at(slot)[5] += value1 * value2;
-}
+void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; }
+void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); }
+void dbg_mean_of(int v) { ++means[0]; means[1] += v; }

 void dbg_print() {

-    int64_t n;
-    auto    E   = [&n](int64_t x) { return double(x) / n; };
-    auto    sqr = [](double x) { return x * x; };
+  if (hits[0])
+      cerr << "Total " << hits[0] << " Hits " << hits[1]
+           << " hit rate (%) " << 100 * hits[1] / hits[0] << endl;

-    for (int i = 0; i < MaxDebugSlots; ++i)
-        if ((n = hit[i][0]))
-            std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1]
-                      << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl;
-
-    for (int i = 0; i < MaxDebugSlots; ++i)
-        if ((n = mean[i][0]))
-        {
-            std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl;
-        }
-
-    for (int i = 0; i < MaxDebugSlots; ++i)
-        if ((n = stdev[i][0]))
-        {
-            double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1])));
-            std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl;
-        }
-
-    for (int i = 0; i < MaxDebugSlots; ++i)
-        if ((n = extremes[i][0]))
-        {
-            std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2]
-                      << " Max " << extremes[i][1] << std::endl;
-        }
-
-    for (int i = 0; i < MaxDebugSlots; ++i)
-        if ((n = correl[i][0]))
-        {
-            double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3]))
-                     / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1])))
-                        * sqrt(E(correl[i][4]) - sqr(E(correl[i][3]))));
-            std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl;
-        }
+  if (means[0])
+      cerr << "Total " << means[0] << " Mean "
+           << (double)means[1] / means[0] << endl;
 }

-void dbg_clear() {
-    hit.fill({});
-    mean.fill({});
-    stdev.fill({});
-    correl.fill({});
-    extremes.fill({});
-}

-// Used to serialize access to std::cout
-// to avoid multiple threads writing at the same time.
+/// Used to serialize access to std::cout to avoid multiple threads writing at
+/// the same time.
+
 std::ostream& operator<<(std::ostream& os, SyncCout sc) {

-    static std::mutex m;
+  static std::mutex m;

-    if (sc == IO_LOCK)
-        m.lock();
+  if (sc == IO_LOCK)
+      m.lock();

-    if (sc == IO_UNLOCK)
-        m.unlock();
+  if (sc == IO_UNLOCK)
+      m.unlock();

-    return os;
+  return os;
 }

-void sync_cout_start() { std::cout << IO_LOCK; }
-void sync_cout_end() { std::cout << IO_UNLOCK; }

-// Trampoline helper to avoid moving Logger to misc.h
+/// Trampoline helper to avoid moving Logger to misc.h
 void start_logger(const std::string& fname) { Logger::start(fname); }


+/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking
+/// function that doesn't stall the CPU waiting for data to be loaded from memory,
+/// which can be quite slow.
 #ifdef NO_PREFETCH

-void prefetch(const void*) {}
+void prefetch(void*) {}

 #else

-void prefetch(const void* addr) {
+void prefetch(void* addr) {

-    #if defined(_MSC_VER)
-    _mm_prefetch((char const*) addr, _MM_HINT_T0);
-    #else
-    __builtin_prefetch(addr);
-    #endif
+#  if defined(__INTEL_COMPILER)
+   // This hack prevents prefetches from being optimized away by
+   // Intel compiler. Both MSVC and gcc seem not be affected by this.
+   __asm__ ("");
+#  endif
+
+#  if defined(__INTEL_COMPILER) || defined(_MSC_VER)
+  _mm_prefetch((char*)addr, _MM_HINT_T0);
+#  else
+  __builtin_prefetch(addr);
+#  endif
 }

 #endif

-#ifdef _WIN32
-    #include <direct.h>
-    #define GETCWD _getcwd
+/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
+/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
+///
+
+void* std_aligned_alloc(size_t alignment, size_t size) {
+#if defined(__APPLE__)
+  return aligned_alloc(alignment, size);
+#elif defined(_WIN32)
+  return _mm_malloc(size, alignment);
 #else
-    #include <unistd.h>
-    #define GETCWD getcwd
+  return std::aligned_alloc(alignment, size);
+#endif
+}
+
+void std_aligned_free(void* ptr) {
+#if defined(__APPLE__)
+  free(ptr);
+#elif defined(_WIN32)
+  _mm_free(ptr);
+#else
+  free(ptr);
+#endif
+}
+
+/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
+/// The returned pointer is the aligned one, while the mem argument is the one that needs
+/// to be passed to free. With c++17 some of this functionality could be simplified.
+
+#if defined(__linux__) && !defined(__ANDROID__)
+
+void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+
+  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
+  size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
+  if (posix_memalign(&mem, alignment, size))
+     mem = nullptr;
+  madvise(mem, allocSize, MADV_HUGEPAGE);
+  return mem;
+}
+
+#elif defined(_WIN64)
+
+static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
+
+  HANDLE hProcessToken { };
+  LUID luid { };
+  void* mem = nullptr;
+
+  const size_t largePageSize = GetLargePageMinimum();
+  if (!largePageSize)
+      return nullptr;
+
+  // We need SeLockMemoryPrivilege, so try to enable it for the process
+  if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+      return nullptr;
+
+  if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
+  {
+      TOKEN_PRIVILEGES tp { };
+      TOKEN_PRIVILEGES prevTp { };
+      DWORD prevTpLen = 0;
+
+      tp.PrivilegeCount = 1;
+      tp.Privileges[0].Luid = luid;
+      tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+      // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
+      // we still need to query GetLastError() to ensure that the privileges were actually obtained.
+      if (AdjustTokenPrivileges(
+              hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) &&
+          GetLastError() == ERROR_SUCCESS)
+      {
+          // Round up size to full pages and allocate
+          allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
+          mem = VirtualAlloc(
+              NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+
+          // Privilege no longer needed, restore previous state
+          AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL);
+      }
+  }
+
+  CloseHandle(hProcessToken);
+
+  return mem;
+}
+
+void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+
+  static bool firstCall = true;
+
+  // Try to allocate large pages
+  mem = aligned_ttmem_alloc_large_pages(allocSize);
+
+  // Suppress info strings on the first call. The first call occurs before 'uci'
+  // is received and in that case this output confuses some GUIs.
+  if (!firstCall)
+  {
+      if (mem)
+          sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
+      else
+          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+  }
+  firstCall = false;
+
+  // Fall back to regular, page aligned, allocation if necessary
+  if (!mem)
+      mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+
+  return mem;
+}
+
+#else
+
+void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+
+  constexpr size_t alignment = 64; // assumed cache line size
+  size_t size = allocSize + alignment - 1; // allocate some extra space
+  mem = malloc(size);
+  void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
+  return ret;
+}
+
 #endif

-size_t str_to_size_t(const std::string& s) {
-    unsigned long long value = std::stoull(s);
-    if (value > std::numeric_limits<size_t>::max())
-        std::exit(EXIT_FAILURE);
-    return static_cast<size_t>(value);
+
+/// aligned_ttmem_free() will free the previously allocated ttmem
+
+#if defined(_WIN64)
+
+void aligned_ttmem_free(void* mem) {
+
+  if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
+  {
+      DWORD err = GetLastError();
+      std::cerr << "Failed to free transposition table. Error code: 0x" <<
+          std::hex << err << std::dec << std::endl;
+      exit(EXIT_FAILURE);
+  }
 }

-std::optional<std::string> read_file_to_string(const std::string& path) {
-    std::ifstream f(path, std::ios_base::binary);
-    if (!f)
-        return std::nullopt;
-    return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
-}
-
-void remove_whitespace(std::string& s) {
-    s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
-}
-
-bool is_whitespace(std::string_view s) {
-    return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); });
-}
-
-std::string CommandLine::get_binary_directory(std::string argv0) {
-    std::string pathSeparator;
-
-#ifdef _WIN32
-    pathSeparator = "\\";
-    #ifdef _MSC_VER
-    // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
-    // issues in some Windows 10 versions, so check returned values carefully.
-    char* pgmptr = nullptr;
-    if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
-        argv0 = pgmptr;
-    #endif
 #else
-    pathSeparator = "/";
+
+void aligned_ttmem_free(void *mem) {
+  free(mem);
+}
+
 #endif

-    // Extract the working directory
-    auto workingDirectory = CommandLine::get_working_directory();

-    // Extract the binary directory path from argv0
-    auto   binaryDirectory = argv0;
-    size_t pos             = binaryDirectory.find_last_of("\\/");
-    if (pos == std::string::npos)
-        binaryDirectory = "." + pathSeparator;
-    else
-        binaryDirectory.resize(pos + 1);
+namespace WinProcGroup {

-    // Pattern replacement: "./" at the start of path is replaced by the working directory
-    if (binaryDirectory.find("." + pathSeparator) == 0)
-        binaryDirectory.replace(0, 1, workingDirectory);
+#ifndef _WIN32

-    return binaryDirectory;
-}
+void bindThisThread(size_t) {}

-std::string CommandLine::get_working_directory() {
-    std::string workingDirectory = "";
-    char        buff[40000];
-    char*       cwd = GETCWD(buff, 40000);
-    if (cwd)
-        workingDirectory = cwd;
+#else

-    return workingDirectory;
+/// best_group() retrieves logical processor information using Windows specific
+/// API and returns the best group id for the thread with index idx. Original
+/// code from Texel by Peter Österlund.
+
+int best_group(size_t idx) {
+
+  int threads = 0;
+  int nodes = 0;
+  int cores = 0;
+  DWORD returnLength = 0;
+  DWORD byteOffset = 0;
+
+  // Early exit if the needed API is not available at runtime
+  HMODULE k32 = GetModuleHandle("Kernel32.dll");
+  auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
+  if (!fun1)
+      return -1;
+
+  // First call to get returnLength. We expect it to fail due to null buffer
+  if (fun1(RelationAll, nullptr, &returnLength))
+      return -1;
+
+  // Once we know returnLength, allocate the buffer
+  SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
+  ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
+
+  // Second call, now we expect to succeed
+  if (!fun1(RelationAll, buffer, &returnLength))
+  {
+      free(buffer);
+      return -1;
+  }
+
+  while (byteOffset < returnLength)
+  {
+      if (ptr->Relationship == RelationNumaNode)
+          nodes++;
+
+      else if (ptr->Relationship == RelationProcessorCore)
+      {
+          cores++;
+          threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
+      }
+
+      assert(ptr->Size);
+      byteOffset += ptr->Size;
+      ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
+  }
+
+  free(buffer);
+
+  std::vector<int> groups;
+
+  // Run as many threads as possible on the same node until core limit is
+  // reached, then move on filling the next node.
+  for (int n = 0; n < nodes; n++)
+      for (int i = 0; i < cores / nodes; i++)
+          groups.push_back(n);
+
+  // In case a core has more than one logical processor (we assume 2) and we
+  // have still threads to allocate, then spread them evenly across available
+  // nodes.
+  for (int t = 0; t < threads - cores; t++)
+      groups.push_back(t % nodes);
+
+  // If we still have more threads than the total number of logical processors
+  // then return -1 and let the OS to decide what to do.
+  return idx < groups.size() ? groups[idx] : -1;
 }


-}  // namespace Stockfish
+/// bindThisThread() set the group affinity of the current thread
+
+void bindThisThread(size_t idx) {
+
+  // Use only local variables to be thread-safe
+  int group = best_group(idx);
+
+  if (group == -1)
+      return;
+
+  // Early exit if the needed API are not available at runtime
+  HMODULE k32 = GetModuleHandle("Kernel32.dll");
+  auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
+  auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
+
+  if (!fun2 || !fun3)
+      return;
+
+  GROUP_AFFINITY affinity;
+  if (fun2(group, &affinity))
+      fun3(GetCurrentThread(), &affinity, nullptr);
+}
+
+#endif
+
+} // namespace WinProcGroup
--- a/src/misc.h
+++ b/src/misc.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,320 +19,119 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED

-#include <algorithm>
-#include <array>
 #include <cassert>
 #include <chrono>
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-#include <iosfwd>
-#include <optional>
+#include <ostream>
 #include <string>
-#include <string_view>
 #include <vector>

-#define stringify2(x) #x
-#define stringify(x) stringify2(x)
-
-namespace Stockfish {
-
-std::string engine_version_info();
-std::string engine_info(bool to_uci = false);
-std::string compiler_info();
-
-// Preloads the given address in L1/L2 cache. This is a non-blocking
-// function that doesn't stall the CPU waiting for data to be loaded from memory,
-// which can be quite slow.
-void prefetch(const void* addr);
+#include "types.h"

+const std::string engine_info(bool to_uci = false);
+const std::string compiler_info();
+void prefetch(void* addr);
 void start_logger(const std::string& fname);
+void* std_aligned_alloc(size_t alignment, size_t size);
+void std_aligned_free(void* ptr);
+void* aligned_ttmem_alloc(size_t size, void*& mem);
+void aligned_ttmem_free(void* mem); // nop if mem == nullptr

-size_t str_to_size_t(const std::string& s);
-
-#if defined(__linux__)
-
-struct PipeDeleter {
-    void operator()(FILE* file) const {
-        if (file != nullptr)
-        {
-            pclose(file);
-        }
-    }
-};
-
-#endif
-
-// Reads the file as bytes.
-// Returns std::nullopt if the file does not exist.
-std::optional<std::string> read_file_to_string(const std::string& path);
-
-void dbg_hit_on(bool cond, int slot = 0);
-void dbg_mean_of(int64_t value, int slot = 0);
-void dbg_stdev_of(int64_t value, int slot = 0);
-void dbg_extremes_of(int64_t value, int slot = 0);
-void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
+void dbg_hit_on(bool b);
+void dbg_hit_on(bool c, bool b);
+void dbg_mean_of(int v);
 void dbg_print();
-void dbg_clear();

-using TimePoint = std::chrono::milliseconds::rep;  // A value in milliseconds
+typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
+
 static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
+
 inline TimePoint now() {
-    return std::chrono::duration_cast<std::chrono::milliseconds>(
-             std::chrono::steady_clock::now().time_since_epoch())
-      .count();
+  return std::chrono::duration_cast<std::chrono::milliseconds>
+        (std::chrono::steady_clock::now().time_since_epoch()).count();
 }

-inline std::vector<std::string_view> split(std::string_view s, std::string_view delimiter) {
-    std::vector<std::string_view> res;
+template<class Entry, int Size>
+struct HashTable {
+  Entry* operator[](Key key) { return &table[(uint32_t)key & (Size - 1)]; }

-    if (s.empty())
-        return res;
-
-    size_t begin = 0;
-    for (;;)
-    {
-        const size_t end = s.find(delimiter, begin);
-        if (end == std::string::npos)
-            break;
-
-        res.emplace_back(s.substr(begin, end - begin));
-        begin = end + delimiter.size();
-    }
-
-    res.emplace_back(s.substr(begin));
-
-    return res;
-}
-
-void remove_whitespace(std::string& s);
-bool is_whitespace(std::string_view s);
-
-enum SyncCout {
-    IO_LOCK,
-    IO_UNLOCK
+private:
+  std::vector<Entry> table = std::vector<Entry>(Size); // Allocate on the heap
 };
+
+
+enum SyncCout { IO_LOCK, IO_UNLOCK };
 std::ostream& operator<<(std::ostream&, SyncCout);

 #define sync_cout std::cout << IO_LOCK
 #define sync_endl std::endl << IO_UNLOCK

-void sync_cout_start();
-void sync_cout_end();
+namespace Utility {

-// True if and only if the binary is compiled on a little-endian machine
-static inline const std::uint16_t Le             = 1;
-static inline const bool          IsLittleEndian = *reinterpret_cast<const char*>(&Le) == 1;
-
-
-template<typename T, std::size_t MaxSize>
-class ValueList {
-
-   public:
-    std::size_t size() const { return size_; }
-    void        push_back(const T& value) { values_[size_++] = value; }
-    const T*    begin() const { return values_; }
-    const T*    end() const { return values_ + size_; }
-    const T&    operator[](int index) const { return values_[index]; }
-
-   private:
-    T           values_[MaxSize];
-    std::size_t size_ = 0;
-};
-
-
-template<typename T, std::size_t Size, std::size_t... Sizes>
-class MultiArray;
-
-namespace Detail {
-
-template<typename T, std::size_t Size, std::size_t... Sizes>
-struct MultiArrayHelper {
-    using ChildType = MultiArray<T, Sizes...>;
-};
-
-template<typename T, std::size_t Size>
-struct MultiArrayHelper<T, Size> {
-    using ChildType = T;
-};
-
-template<typename To, typename From>
-constexpr bool is_strictly_assignable_v =
-  std::is_assignable_v<To&, From> && (std::is_same_v<To, From> || !std::is_convertible_v<From, To>);
+/// Clamp a value between lo and hi. Available in c++17.
+template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
+  return v < lo ? lo : v > hi ? hi : v;
+}

 }

-// MultiArray is a generic N-dimensional array.
-// The template parameters (Size and Sizes) encode the dimensions of the array.
-template<typename T, std::size_t Size, std::size_t... Sizes>
-class MultiArray {
-    using ChildType = typename Detail::MultiArrayHelper<T, Size, Sizes...>::ChildType;
-    using ArrayType = std::array<ChildType, Size>;
-    ArrayType data_;
-
-   public:
-    using value_type             = typename ArrayType::value_type;
-    using size_type              = typename ArrayType::size_type;
-    using difference_type        = typename ArrayType::difference_type;
-    using reference              = typename ArrayType::reference;
-    using const_reference        = typename ArrayType::const_reference;
-    using pointer                = typename ArrayType::pointer;
-    using const_pointer          = typename ArrayType::const_pointer;
-    using iterator               = typename ArrayType::iterator;
-    using const_iterator         = typename ArrayType::const_iterator;
-    using reverse_iterator       = typename ArrayType::reverse_iterator;
-    using const_reverse_iterator = typename ArrayType::const_reverse_iterator;
-
-    constexpr auto&       at(size_type index) noexcept { return data_.at(index); }
-    constexpr const auto& at(size_type index) const noexcept { return data_.at(index); }
-
-    constexpr auto&       operator[](size_type index) noexcept { return data_[index]; }
-    constexpr const auto& operator[](size_type index) const noexcept { return data_[index]; }
-
-    constexpr auto&       front() noexcept { return data_.front(); }
-    constexpr const auto& front() const noexcept { return data_.front(); }
-    constexpr auto&       back() noexcept { return data_.back(); }
-    constexpr const auto& back() const noexcept { return data_.back(); }
-
-    auto*       data() { return data_.data(); }
-    const auto* data() const { return data_.data(); }
-
-    constexpr auto begin() noexcept { return data_.begin(); }
-    constexpr auto end() noexcept { return data_.end(); }
-    constexpr auto begin() const noexcept { return data_.begin(); }
-    constexpr auto end() const noexcept { return data_.end(); }
-    constexpr auto cbegin() const noexcept { return data_.cbegin(); }
-    constexpr auto cend() const noexcept { return data_.cend(); }
-
-    constexpr auto rbegin() noexcept { return data_.rbegin(); }
-    constexpr auto rend() noexcept { return data_.rend(); }
-    constexpr auto rbegin() const noexcept { return data_.rbegin(); }
-    constexpr auto rend() const noexcept { return data_.rend(); }
-    constexpr auto crbegin() const noexcept { return data_.crbegin(); }
-    constexpr auto crend() const noexcept { return data_.crend(); }
-
-    constexpr bool      empty() const noexcept { return data_.empty(); }
-    constexpr size_type size() const noexcept { return data_.size(); }
-    constexpr size_type max_size() const noexcept { return data_.max_size(); }
-
-    template<typename U>
-    void fill(const U& v) {
-        static_assert(Detail::is_strictly_assignable_v<T, U>,
-                      "Cannot assign fill value to entry type");
-        for (auto& ele : data_)
-        {
-            if constexpr (sizeof...(Sizes) == 0)
-                ele = v;
-            else
-                ele.fill(v);
-        }
-    }
-
-    constexpr void swap(MultiArray<T, Size, Sizes...>& other) noexcept { data_.swap(other.data_); }
-};
-
-
-// xorshift64star Pseudo-Random Number Generator
-// This class is based on original code written and dedicated
-// to the public domain by Sebastiano Vigna (2014).
-// It has the following characteristics:
-//
-//  -  Outputs 64-bit numbers
-//  -  Passes Dieharder and SmallCrush test batteries
-//  -  Does not require warm-up, no zeroland to escape
-//  -  Internal state is a single 64-bit integer
-//  -  Period is 2^64 - 1
-//  -  Speed: 1.60 ns/call (Core i7 @3.40GHz)
-//
-// For further analysis see
-//   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
+/// xorshift64star Pseudo-Random Number Generator
+/// This class is based on original code written and dedicated
+/// to the public domain by Sebastiano Vigna (2014).
+/// It has the following characteristics:
+///
+///  -  Outputs 64-bit numbers
+///  -  Passes Dieharder and SmallCrush test batteries
+///  -  Does not require warm-up, no zeroland to escape
+///  -  Internal state is a single 64-bit integer
+///  -  Period is 2^64 - 1
+///  -  Speed: 1.60 ns/call (Core i7 @3.40GHz)
+///
+/// For further analysis see
+///   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>

 class PRNG {

-    uint64_t s;
+  uint64_t s;

-    uint64_t rand64() {
+  uint64_t rand64() {

-        s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
-        return s * 2685821657736338717LL;
-    }
+    s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
+    return s * 2685821657736338717LL;
+  }

-   public:
-    PRNG(uint64_t seed) :
-        s(seed) {
-        assert(seed);
-    }
+public:
+  PRNG(uint64_t seed) : s(seed) { assert(seed); }

-    template<typename T>
-    T rand() {
-        return T(rand64());
-    }
+  template<typename T> T rand() { return T(rand64()); }

-    // Special generator used to fast init magic numbers.
-    // Output values only have 1/8th of their bits set on average.
-    template<typename T>
-    T sparse_rand() {
-        return T(rand64() & rand64() & rand64());
-    }
+  /// Special generator used to fast init magic numbers.
+  /// Output values only have 1/8th of their bits set on average.
+  template<typename T> T sparse_rand()
+  { return T(rand64() & rand64() & rand64()); }
 };

 inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #if defined(__GNUC__) && defined(IS_64BIT)
-    __extension__ using uint128 = unsigned __int128;
-    return (uint128(a) * uint128(b)) >> 64;
+    __extension__ typedef unsigned __int128 uint128;
+    return ((uint128)a * (uint128)b) >> 64;
 #else
-    uint64_t aL = uint32_t(a), aH = a >> 32;
-    uint64_t bL = uint32_t(b), bH = b >> 32;
+    uint64_t aL = (uint32_t)a, aH = a >> 32;
+    uint64_t bL = (uint32_t)b, bH = b >> 32;
    uint64_t c1 = (aL * bL) >> 32;
    uint64_t c2 = aH * bL + c1;
-    uint64_t c3 = aL * bH + uint32_t(c2);
+    uint64_t c3 = aL * bH + (uint32_t)c2;
    return aH * bH + (c2 >> 32) + (c3 >> 32);
 #endif
 }

+/// Under Windows it is not possible for a process to run on more than one
+/// logical processor group. This usually means to be limited to use max 64
+/// cores. To overcome this, some special platform specific API should be
+/// called to set group affinity for each thread. Original code from Texel by
+/// Peter Österlund.

-struct CommandLine {
-   public:
-    CommandLine(int _argc, char** _argv) :
-        argc(_argc),
-        argv(_argv) {}
-
-    static std::string get_binary_directory(std::string argv0);
-    static std::string get_working_directory();
-
-    int    argc;
-    char** argv;
-};
-
-namespace Utility {
-
-template<typename T, typename Predicate>
-void move_to_front(std::vector<T>& vec, Predicate pred) {
-    auto it = std::find_if(vec.begin(), vec.end(), pred);
-
-    if (it != vec.end())
-    {
-        std::rotate(vec.begin(), it, it + 1);
-    }
-}
+namespace WinProcGroup {
+  void bindThisThread(size_t idx);
 }

-#if defined(__GNUC__) && !defined(__clang__)
-    #if __GNUC__ >= 13
-        #define sf_assume(cond) __attribute__((assume(cond)))
-    #else
-        #define sf_assume(cond) \
-            do \
-            { \
-                if (!(cond)) \
-                    __builtin_unreachable(); \
-            } while (0)
-    #endif
-#else
-    // do nothing for other compilers
-    #define sf_assume(cond)
-#endif
-
-}  // namespace Stockfish
-
-#endif  // #ifndef MISC_H_INCLUDED
+#endif // #ifndef MISC_H_INCLUDED
--- a/src/movegen.cpp
+++ b/src/movegen.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,241 +16,352 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "movegen.h"
-
 #include <cassert>
-#include <initializer_list>

-#include "bitboard.h"
+#include "movegen.h"
 #include "position.h"

-namespace Stockfish {
-
 namespace {

-template<GenType Type, Direction D, bool Enemy>
-ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) {
+  template<GenType Type, Direction D>
+  ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) {

-    constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS;
-
-    if constexpr (Type == CAPTURES || all)
-        *moveList++ = Move::make<PROMOTION>(to - D, to, QUEEN);
-
-    if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all)
+    if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
    {
-        *moveList++ = Move::make<PROMOTION>(to - D, to, ROOK);
-        *moveList++ = Move::make<PROMOTION>(to - D, to, BISHOP);
-        *moveList++ = Move::make<PROMOTION>(to - D, to, KNIGHT);
+        *moveList++ = make<PROMOTION>(to - D, to, QUEEN);
+        if (attacks_bb<KNIGHT>(to) & ksq)
+            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
+    }
+
+    if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS)
+    {
+        *moveList++ = make<PROMOTION>(to - D, to, ROOK);
+        *moveList++ = make<PROMOTION>(to - D, to, BISHOP);
+        if (!(attacks_bb<KNIGHT>(to) & ksq))
+            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
    }

    return moveList;
-}
+  }


-template<Color Us, GenType Type>
-ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
+  template<Color Us, GenType Type>
+  ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) {

    constexpr Color     Them     = ~Us;
-    constexpr Bitboard  TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB);
-    constexpr Bitboard  TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB);
+    constexpr Bitboard  TRank7BB = (Us == WHITE ? Rank7BB    : Rank2BB);
+    constexpr Bitboard  TRank3BB = (Us == WHITE ? Rank3BB    : Rank6BB);
    constexpr Direction Up       = pawn_push(Us);
    constexpr Direction UpRight  = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
    constexpr Direction UpLeft   = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);

-    const Bitboard emptySquares = ~pos.pieces();
-    const Bitboard enemies      = Type == EVASIONS ? pos.checkers() : pos.pieces(Them);
+    const Square ksq = pos.square<KING>(Them);
+    Bitboard emptySquares;

-    Bitboard pawnsOn7    = pos.pieces(Us, PAWN) & TRank7BB;
+    Bitboard pawnsOn7    = pos.pieces(Us, PAWN) &  TRank7BB;
    Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;

+    Bitboard enemies = (Type == EVASIONS ? pos.pieces(Them) & target:
+                        Type == CAPTURES ? target : pos.pieces(Them));
+
    // Single and double pawn pushes, no promotions
-    if constexpr (Type != CAPTURES)
+    if (Type != CAPTURES)
    {
-        Bitboard b1 = shift<Up>(pawnsNotOn7) & emptySquares;
+        emptySquares = (Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces());
+
+        Bitboard b1 = shift<Up>(pawnsNotOn7)   & emptySquares;
        Bitboard b2 = shift<Up>(b1 & TRank3BB) & emptySquares;

-        if constexpr (Type == EVASIONS)  // Consider only blocking squares
+        if (Type == EVASIONS) // Consider only blocking squares
        {
            b1 &= target;
            b2 &= target;
        }

+        if (Type == QUIET_CHECKS)
+        {
+            b1 &= pawn_attacks_bb(Them, ksq);
+            b2 &= pawn_attacks_bb(Them, ksq);
+
+            // Add pawn pushes which give discovered check. This is possible only
+            // if the pawn is not on the same file as the enemy king, because we
+            // don't generate captures. Note that a possible discovery check
+            // promotion has been already generated amongst the captures.
+            Bitboard dcCandidateQuiets = pos.blockers_for_king(Them) & pawnsNotOn7;
+            if (dcCandidateQuiets)
+            {
+                Bitboard dc1 = shift<Up>(dcCandidateQuiets) & emptySquares & ~file_bb(ksq);
+                Bitboard dc2 = shift<Up>(dc1 & TRank3BB) & emptySquares;
+
+                b1 |= dc1;
+                b2 |= dc2;
+            }
+        }
+
        while (b1)
        {
-            Square to   = pop_lsb(b1);
-            *moveList++ = Move(to - Up, to);
+            Square to = pop_lsb(&b1);
+            *moveList++ = make_move(to - Up, to);
        }

        while (b2)
        {
-            Square to   = pop_lsb(b2);
-            *moveList++ = Move(to - Up - Up, to);
+            Square to = pop_lsb(&b2);
+            *moveList++ = make_move(to - Up - Up, to);
        }
    }

    // Promotions and underpromotions
    if (pawnsOn7)
    {
-        Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
-        Bitboard b2 = shift<UpLeft>(pawnsOn7) & enemies;
-        Bitboard b3 = shift<Up>(pawnsOn7) & emptySquares;
+        if (Type == CAPTURES)
+            emptySquares = ~pos.pieces();

-        if constexpr (Type == EVASIONS)
-            b3 &= target;
+        if (Type == EVASIONS)
+            emptySquares &= target;
+
+        Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
+        Bitboard b2 = shift<UpLeft >(pawnsOn7) & enemies;
+        Bitboard b3 = shift<Up     >(pawnsOn7) & emptySquares;

        while (b1)
-            moveList = make_promotions<Type, UpRight, true>(moveList, pop_lsb(b1));
+            moveList = make_promotions<Type, UpRight>(moveList, pop_lsb(&b1), ksq);

        while (b2)
-            moveList = make_promotions<Type, UpLeft, true>(moveList, pop_lsb(b2));
+            moveList = make_promotions<Type, UpLeft >(moveList, pop_lsb(&b2), ksq);

        while (b3)
-            moveList = make_promotions<Type, Up, false>(moveList, pop_lsb(b3));
+            moveList = make_promotions<Type, Up     >(moveList, pop_lsb(&b3), ksq);
    }

-    // Standard and en passant captures
-    if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+    // Standard and en-passant captures
+    if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
    {
        Bitboard b1 = shift<UpRight>(pawnsNotOn7) & enemies;
-        Bitboard b2 = shift<UpLeft>(pawnsNotOn7) & enemies;
+        Bitboard b2 = shift<UpLeft >(pawnsNotOn7) & enemies;

        while (b1)
        {
-            Square to   = pop_lsb(b1);
-            *moveList++ = Move(to - UpRight, to);
+            Square to = pop_lsb(&b1);
+            *moveList++ = make_move(to - UpRight, to);
        }

        while (b2)
        {
-            Square to   = pop_lsb(b2);
-            *moveList++ = Move(to - UpLeft, to);
+            Square to = pop_lsb(&b2);
+            *moveList++ = make_move(to - UpLeft, to);
        }

        if (pos.ep_square() != SQ_NONE)
        {
            assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6));

-            // An en passant capture cannot resolve a discovered check
-            if (Type == EVASIONS && (target & (pos.ep_square() + Up)))
+            // An en passant capture can be an evasion only if the checking piece
+            // is the double pushed pawn and so is in the target. Otherwise this
+            // is a discovery check and we are forced to do otherwise.
+            if (Type == EVASIONS && !(target & (pos.ep_square() - Up)))
                return moveList;

-            b1 = pawnsNotOn7 & attacks_bb<PAWN>(pos.ep_square(), Them);
+            b1 = pawnsNotOn7 & pawn_attacks_bb(Them, pos.ep_square());

            assert(b1);

            while (b1)
-                *moveList++ = Move::make<EN_PASSANT>(pop_lsb(b1), pos.ep_square());
+                *moveList++ = make<ENPASSANT>(pop_lsb(&b1), pos.ep_square());
        }
    }

    return moveList;
-}
+  }


-template<Color Us, PieceType Pt>
-ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
+  template<Color Us, PieceType Pt, bool Checks>
+  ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {

    static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");

-    Bitboard bb = pos.pieces(Us, Pt);
+    const Square* pl = pos.squares<Pt>(Us);

-    while (bb)
+    for (Square from = *pl; from != SQ_NONE; from = *++pl)
    {
-        Square   from = pop_lsb(bb);
-        Bitboard b    = attacks_bb<Pt>(from, pos.pieces()) & target;
+        if (Checks)
+        {
+            if (    (Pt == BISHOP || Pt == ROOK || Pt == QUEEN)
+                && !(attacks_bb<Pt>(from) & target & pos.check_squares(Pt)))
+                continue;
+
+            if (pos.blockers_for_king(~Us) & from)
+                continue;
+        }
+
+        Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
+
+        if (Checks)
+            b &= pos.check_squares(Pt);

        while (b)
-            *moveList++ = Move(from, pop_lsb(b));
+            *moveList++ = make_move(from, pop_lsb(&b));
    }

    return moveList;
-}
+  }


-template<Color Us, GenType Type>
-ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
+  template<Color Us, GenType Type>
+  ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
+    constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantations
+    Bitboard target;

-    static_assert(Type != LEGAL, "Unsupported type in generate_all()");
-
-    const Square ksq = pos.square<KING>(Us);
-    Bitboard     target;
-
-    // Skip generating non-king moves when in double check
-    if (Type != EVASIONS || !more_than_one(pos.checkers()))
+    switch (Type)
    {
-        target = Type == EVASIONS     ? between_bb(ksq, lsb(pos.checkers()))
-               : Type == NON_EVASIONS ? ~pos.pieces(Us)
-               : Type == CAPTURES     ? pos.pieces(~Us)
-                                      : ~pos.pieces();  // QUIETS
-
-        moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
-        moveList = generate_moves<Us, KNIGHT>(pos, moveList, target);
-        moveList = generate_moves<Us, BISHOP>(pos, moveList, target);
-        moveList = generate_moves<Us, ROOK>(pos, moveList, target);
-        moveList = generate_moves<Us, QUEEN>(pos, moveList, target);
+        case CAPTURES:
+            target =  pos.pieces(~Us);
+            break;
+        case QUIETS:
+        case QUIET_CHECKS:
+            target = ~pos.pieces();
+            break;
+        case EVASIONS:
+        {
+            Square checksq = lsb(pos.checkers());
+            target = between_bb(pos.square<KING>(Us), checksq) | checksq;
+            break;
+        }
+        case NON_EVASIONS:
+            target = ~pos.pieces(Us);
+            break;
+        default:
+            static_assert(true, "Unsupported type in generate_all()");
    }

-    Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
+    moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
+    moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
+    moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
+    moveList = generate_moves<Us,   ROOK, Checks>(pos, moveList, target);
+    moveList = generate_moves<Us,  QUEEN, Checks>(pos, moveList, target);

-    while (b)
-        *moveList++ = Move(ksq, pop_lsb(b));
+    if (Type != QUIET_CHECKS && Type != EVASIONS)
+    {
+        Square ksq = pos.square<KING>(Us);
+        Bitboard b = attacks_bb<KING>(ksq) & target;
+        while (b)
+            *moveList++ = make_move(ksq, pop_lsb(&b));

-    if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
-        for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
-            if (!pos.castling_impeded(cr) && pos.can_castle(cr))
-                *moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));
+        if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
+            for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
+                if (!pos.castling_impeded(cr) && pos.can_castle(cr))
+                    *moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
+    }

    return moveList;
-}
+  }

-}  // namespace
+} // namespace


-// <CAPTURES>     Generates all pseudo-legal captures plus queen promotions
-// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions
-// <EVASIONS>     Generates all pseudo-legal check evasions
-// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
-//
-// Returns a pointer to the end of the move list.
+/// <CAPTURES>     Generates all pseudo-legal captures plus queen and checking knight promotions
+/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions(except checking knight)
+/// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
+///
+/// Returns a pointer to the end of the move list.
+
 template<GenType Type>
 ExtMove* generate(const Position& pos, ExtMove* moveList) {

-    static_assert(Type != LEGAL, "Unsupported type in generate()");
-    assert((Type == EVASIONS) == bool(pos.checkers()));
+  static_assert(Type == CAPTURES || Type == QUIETS || Type == NON_EVASIONS, "Unsupported type in generate()");
+  assert(!pos.checkers());

-    Color us = pos.side_to_move();
+  Color us = pos.side_to_move();

-    return us == WHITE ? generate_all<WHITE, Type>(pos, moveList)
-                       : generate_all<BLACK, Type>(pos, moveList);
+  return us == WHITE ? generate_all<WHITE, Type>(pos, moveList)
+                     : generate_all<BLACK, Type>(pos, moveList);
 }

 // Explicit template instantiations
 template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
 template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
-template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
 template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);


-// generate<LEGAL> generates all the legal moves in the given position
+/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures.
+/// Returns a pointer to the end of the move list.
+template<>
+ExtMove* generate<QUIET_CHECKS>(const Position& pos, ExtMove* moveList) {
+
+  assert(!pos.checkers());
+
+  Color us = pos.side_to_move();
+  Bitboard dc = pos.blockers_for_king(~us) & pos.pieces(us) & ~pos.pieces(PAWN);
+
+  while (dc)
+  {
+     Square from = pop_lsb(&dc);
+     PieceType pt = type_of(pos.piece_on(from));
+
+     Bitboard b = attacks_bb(pt, from, pos.pieces()) & ~pos.pieces();
+
+     if (pt == KING)
+         b &= ~attacks_bb<QUEEN>(pos.square<KING>(~us));
+
+     while (b)
+         *moveList++ = make_move(from, pop_lsb(&b));
+  }
+
+  return us == WHITE ? generate_all<WHITE, QUIET_CHECKS>(pos, moveList)
+                     : generate_all<BLACK, QUIET_CHECKS>(pos, moveList);
+}
+
+
+/// generate<EVASIONS> generates all pseudo-legal check evasions when the side
+/// to move is in check. Returns a pointer to the end of the move list.
+template<>
+ExtMove* generate<EVASIONS>(const Position& pos, ExtMove* moveList) {
+
+  assert(pos.checkers());
+
+  Color us = pos.side_to_move();
+  Square ksq = pos.square<KING>(us);
+  Bitboard sliderAttacks = 0;
+  Bitboard sliders = pos.checkers() & ~pos.pieces(KNIGHT, PAWN);
+
+  // Find all the squares attacked by slider checkers. We will remove them from
+  // the king evasions in order to skip known illegal moves, which avoids any
+  // useless legality checks later on.
+  while (sliders)
+      sliderAttacks |= line_bb(ksq, pop_lsb(&sliders)) & ~pos.checkers();
+
+  // Generate evasions for king, capture and non capture moves
+  Bitboard b = attacks_bb<KING>(ksq) & ~pos.pieces(us) & ~sliderAttacks;
+  while (b)
+      *moveList++ = make_move(ksq, pop_lsb(&b));
+
+  if (more_than_one(pos.checkers()))
+      return moveList; // Double check, only a king move can save the day
+
+  // Generate blocking evasions or captures of the checking piece
+  return us == WHITE ? generate_all<WHITE, EVASIONS>(pos, moveList)
+                     : generate_all<BLACK, EVASIONS>(pos, moveList);
+}
+
+
+/// generate<LEGAL> generates all the legal moves in the given position

 template<>
 ExtMove* generate<LEGAL>(const Position& pos, ExtMove* moveList) {

-    Color    us     = pos.side_to_move();
-    Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us);
-    Square   ksq    = pos.square<KING>(us);
-    ExtMove* cur    = moveList;
+  Color us = pos.side_to_move();
+  Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us);
+  Square ksq = pos.square<KING>(us);
+  ExtMove* cur = moveList;

-    moveList =
-      pos.checkers() ? generate<EVASIONS>(pos, moveList) : generate<NON_EVASIONS>(pos, moveList);
-    while (cur != moveList)
-        if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT)
-            && !pos.legal(*cur))
-            *cur = *(--moveList);
-        else
-            ++cur;
+  moveList = pos.checkers() ? generate<EVASIONS    >(pos, moveList)
+                            : generate<NON_EVASIONS>(pos, moveList);
+  while (cur != moveList)
+      if (   (pinned || from_sq(*cur) == ksq || type_of(*cur) == ENPASSANT)
+          && !pos.legal(*cur))
+          *cur = (--moveList)->move;
+      else
+          ++cur;

-    return moveList;
+  return moveList;
 }
-
-}  // namespace Stockfish
--- a/src/movegen.h
+++ b/src/movegen.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,55 +19,55 @@
 #ifndef MOVEGEN_H_INCLUDED
 #define MOVEGEN_H_INCLUDED

-#include <algorithm>  // IWYU pragma: keep
-#include <cstddef>
+#include <algorithm>

 #include "types.h"

-namespace Stockfish {
-
 class Position;

 enum GenType {
-    CAPTURES,
-    QUIETS,
-    EVASIONS,
-    NON_EVASIONS,
-    LEGAL
+  CAPTURES,
+  QUIETS,
+  QUIET_CHECKS,
+  EVASIONS,
+  NON_EVASIONS,
+  LEGAL
 };

-struct ExtMove: public Move {
-    int value;
+struct ExtMove {
+  Move move;
+  int value;

-    void operator=(Move m) { data = m.raw(); }
+  operator Move() const { return move; }
+  void operator=(Move m) { move = m; }

-    // Inhibit unwanted implicit conversions to Move
-    // with an ambiguity that yields to a compile error.
-    operator float() const = delete;
+  // Inhibit unwanted implicit conversions to Move
+  // with an ambiguity that yields to a compile error.
+  operator float() const = delete;
 };

-inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; }
+inline bool operator<(const ExtMove& f, const ExtMove& s) {
+  return f.value < s.value;
+}

 template<GenType>
 ExtMove* generate(const Position& pos, ExtMove* moveList);

-// The MoveList struct wraps the generate() function and returns a convenient
-// list of moves. Using MoveList is sometimes preferable to directly calling
-// the lower level generate() function.
+/// The MoveList struct is a simple wrapper around generate(). It sometimes comes
+/// in handy to use this class instead of the low level generate() function.
 template<GenType T>
 struct MoveList {

-    explicit MoveList(const Position& pos) :
-        last(generate<T>(pos, moveList)) {}
-    const ExtMove* begin() const { return moveList; }
-    const ExtMove* end() const { return last; }
-    size_t         size() const { return last - moveList; }
-    bool           contains(Move move) const { return std::find(begin(), end(), move) != end(); }
+  explicit MoveList(const Position& pos) : last(generate<T>(pos, moveList)) {}
+  const ExtMove* begin() const { return moveList; }
+  const ExtMove* end() const { return last; }
+  size_t size() const { return last - moveList; }
+  bool contains(Move move) const {
+    return std::find(begin(), end(), move) != end();
+  }

-   private:
-    ExtMove moveList[MAX_MOVES], *last;
+private:
+  ExtMove moveList[MAX_MOVES], *last;
 };

-}  // namespace Stockfish
-
-#endif  // #ifndef MOVEGEN_H_INCLUDED
+#endif // #ifndef MOVEGEN_H_INCLUDED
--- a/src/movepick.cpp
+++ b/src/movepick.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,308 +16,249 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "movepick.h"
-
 #include <cassert>
-#include <limits>
-#include <utility>

-#include "bitboard.h"
-#include "misc.h"
-#include "position.h"
-
-namespace Stockfish {
+#include "movepick.h"

 namespace {

-enum Stages {
-    // generate main search moves
-    MAIN_TT,
-    CAPTURE_INIT,
-    GOOD_CAPTURE,
-    QUIET_INIT,
-    GOOD_QUIET,
-    BAD_CAPTURE,
-    BAD_QUIET,
+  enum Stages {
+    MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, REFUTATION, QUIET_INIT, QUIET, BAD_CAPTURE,
+    EVASION_TT, EVASION_INIT, EVASION,
+    PROBCUT_TT, PROBCUT_INIT, PROBCUT,
+    QSEARCH_TT, QCAPTURE_INIT, QCAPTURE, QCHECK_INIT, QCHECK
+  };

-    // generate evasion moves
-    EVASION_TT,
-    EVASION_INIT,
-    EVASION,
-
-    // generate probcut moves
-    PROBCUT_TT,
-    PROBCUT_INIT,
-    PROBCUT,
-
-    // generate qsearch moves
-    QSEARCH_TT,
-    QCAPTURE_INIT,
-    QCAPTURE
-};
-
-
-// Sort moves in descending order up to and including a given limit.
-// The order of moves smaller than the limit is left unspecified.
-void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
+  // partial_insertion_sort() sorts moves in descending order up to and including
+  // a given limit. The order of moves smaller than the limit is left unspecified.
+  void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {

    for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
        if (p->value >= limit)
        {
            ExtMove tmp = *p, *q;
-            *p          = *++sortedEnd;
+            *p = *++sortedEnd;
            for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q)
                *q = *(q - 1);
            *q = tmp;
        }
+  }
+
+} // namespace
+
+
+/// Constructors of the MovePicker class. As arguments we pass information
+/// to help it to return the (presumably) good moves first, to decide which
+/// moves to return (in the quiescence search, for instance, we only want to
+/// search captures, promotions, and some checks) and how important good move
+/// ordering is at the current node.
+
+/// MovePicker constructor for the main search
+MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp,
+                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl)
+           : pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch),
+             ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) {
+
+  assert(d > 0);
+
+  stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
+          !(ttm && pos.pseudo_legal(ttm));
 }

-}  // namespace
+/// MovePicker constructor for quiescence search
+MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
+                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs)
+           : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) {

+  assert(d <= 0);

-// Constructors of the MovePicker class. As arguments, we pass information
-// to decide which class of moves to emit, to help sorting the (presumably)
-// good moves first, and how important move ordering is at the current node.
-
-// MovePicker constructor for the main search and for the quiescence search
-MovePicker::MovePicker(const Position&              p,
-                       Move                         ttm,
-                       Depth                        d,
-                       const ButterflyHistory*      mh,
-                       const LowPlyHistory*         lph,
-                       const CapturePieceToHistory* cph,
-                       const PieceToHistory**       ch,
-                       const PawnHistory*           ph,
-                       int                          pl) :
-    pos(p),
-    mainHistory(mh),
-    lowPlyHistory(lph),
-    captureHistory(cph),
-    continuationHistory(ch),
-    pawnHistory(ph),
-    ttMove(ttm),
-    depth(d),
-    ply(pl) {
-
-    if (pos.checkers())
-        stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm));
-
-    else
-        stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
+  stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
+           !(ttm && (depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
+                 && pos.pseudo_legal(ttm));
 }

-// MovePicker constructor for ProbCut: we generate captures with Static Exchange
-// Evaluation (SEE) greater than or equal to the given threshold.
-MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
-    pos(p),
-    captureHistory(cph),
-    ttMove(ttm),
-    threshold(th) {
-    assert(!pos.checkers());
+/// MovePicker constructor for ProbCut: we generate captures with SEE greater
+/// than or equal to the given threshold.
+MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph)
+           : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) {

-    stage = PROBCUT_TT
-          + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold));
+  assert(!pos.checkers());
+
+  stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
+                             && pos.pseudo_legal(ttm)
+                             && pos.see_ge(ttm, threshold));
 }

-// Assigns a numerical value to each move in a list, used for sorting.
-// Captures are ordered by Most Valuable Victim (MVV), preferring captures
-// with a good history. Quiets moves are ordered using the history tables.
+/// MovePicker::score() assigns a numerical value to each move in a list, used
+/// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring
+/// captures with a good history. Quiets moves are ordered using the histories.
 template<GenType Type>
 void MovePicker::score() {

-    static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
+  static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");

-    Color us = pos.side_to_move();
+  for (auto& m : *this)
+      if (Type == CAPTURES)
+          m.value =  int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6
+                   + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];

-    [[maybe_unused]] Bitboard threatByLesser[QUEEN + 1];
-    if constexpr (Type == QUIETS)
-    {
-        threatByLesser[KNIGHT] = threatByLesser[BISHOP] = pos.attacks_by<PAWN>(~us);
-        threatByLesser[ROOK] =
-          pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatByLesser[KNIGHT];
-        threatByLesser[QUEEN] = pos.attacks_by<ROOK>(~us) | threatByLesser[ROOK];
-    }
+      else if (Type == QUIETS)
+          m.value =      (*mainHistory)[pos.side_to_move()][from_to(m)]
+                   + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]
+                   + 2 * (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)]
+                   + 2 * (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)]
+                   +     (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]
+                   + (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0);

-    for (auto& m : *this)
-    {
-        const Square    from          = m.from_sq();
-        const Square    to            = m.to_sq();
-        const Piece     pc            = pos.moved_piece(m);
-        const PieceType pt            = type_of(pc);
-        const Piece     capturedPiece = pos.piece_on(to);
-
-        if constexpr (Type == CAPTURES)
-            m.value = (*captureHistory)[pc][to][type_of(capturedPiece)]
-                    + 7 * int(PieceValue[capturedPiece]) + 1024 * bool(pos.check_squares(pt) & to);
-
-        else if constexpr (Type == QUIETS)
-        {
-            // histories
-            m.value = 2 * (*mainHistory)[us][m.from_to()];
-            m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to];
-            m.value += (*continuationHistory[0])[pc][to];
-            m.value += (*continuationHistory[1])[pc][to];
-            m.value += (*continuationHistory[2])[pc][to];
-            m.value += (*continuationHistory[3])[pc][to];
-            m.value += (*continuationHistory[5])[pc][to];
-
-            // bonus for checks
-            m.value += (bool(pos.check_squares(pt) & to) && pos.see_ge(m, -75)) * 16384;
-
-            // penalty for moving to a square threatened by a lesser piece
-            // or bonus for escaping an attack by a lesser piece.
-            if (KNIGHT <= pt && pt <= QUEEN)
-            {
-                static constexpr int bonus[QUEEN + 1] = {0, 0, 144, 144, 256, 517};
-                int v = threatByLesser[pt] & to ? -95 : 100 * bool(threatByLesser[pt] & from);
-                m.value += bonus[pt] * v;
-            }
-
-            if (ply < LOW_PLY_HISTORY_SIZE)
-                m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + ply);
-        }
-
-        else  // Type == EVASIONS
-        {
-            if (pos.capture_stage(m))
-                m.value = PieceValue[capturedPiece] + (1 << 28);
-            else
-            {
-                m.value = (*mainHistory)[us][m.from_to()] + (*continuationHistory[0])[pc][to];
-                if (ply < LOW_PLY_HISTORY_SIZE)
-                    m.value += 2 * (*lowPlyHistory)[ply][m.from_to()] / (1 + ply);
-            }
-        }
-    }
+      else // Type == EVASIONS
+      {
+          if (pos.capture(m))
+              m.value =  PieceValue[MG][pos.piece_on(to_sq(m))]
+                       - Value(type_of(pos.moved_piece(m)));
+          else
+              m.value =  (*mainHistory)[pos.side_to_move()][from_to(m)]
+                       + (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]
+                       - (1 << 28);
+      }
 }

-// Returns the next move satisfying a predicate function.
-// This never returns the TT move, as it was emitted before.
-template<typename Pred>
+/// MovePicker::select() returns the next move satisfying a predicate function.
+/// It never returns the TT move.
+template<MovePicker::PickType T, typename Pred>
 Move MovePicker::select(Pred filter) {

-    for (; cur < endCur; ++cur)
-        if (*cur != ttMove && filter())
-            return *cur++;
+  while (cur < endMoves)
+  {
+      if (T == Best)
+          std::swap(*cur, *std::max_element(cur, endMoves));

-    return Move::none();
+      if (*cur != ttMove && filter())
+          return *cur++;
+
+      cur++;
+  }
+  return MOVE_NONE;
 }

-// This is the most important method of the MovePicker class. We emit one
-// new pseudo-legal move on every call until there are no more moves left,
-// picking the move with the highest score from a list of generated moves.
-Move MovePicker::next_move() {
+/// MovePicker::next_move() is the most important method of the MovePicker class. It
+/// returns a new pseudo legal move every time it is called until there are no more
+/// moves left, picking the move with the highest score from a list of generated moves.
+Move MovePicker::next_move(bool skipQuiets) {

-    constexpr int goodQuietThreshold = -14000;
 top:
-    switch (stage)
-    {
+  switch (stage) {

-    case MAIN_TT :
-    case EVASION_TT :
-    case QSEARCH_TT :
-    case PROBCUT_TT :
-        ++stage;
-        return ttMove;
+  case MAIN_TT:
+  case EVASION_TT:
+  case QSEARCH_TT:
+  case PROBCUT_TT:
+      ++stage;
+      return ttMove;

-    case CAPTURE_INIT :
-    case PROBCUT_INIT :
-    case QCAPTURE_INIT :
-        cur = endBadCaptures = moves;
-        endCur = endCaptures = generate<CAPTURES>(pos, cur);
+  case CAPTURE_INIT:
+  case PROBCUT_INIT:
+  case QCAPTURE_INIT:
+      cur = endBadCaptures = moves;
+      endMoves = generate<CAPTURES>(pos, cur);

-        score<CAPTURES>();
-        partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
-        ++stage;
-        goto top;
+      score<CAPTURES>();
+      ++stage;
+      goto top;

-    case GOOD_CAPTURE :
-        if (select([&]() {
-                if (pos.see_ge(*cur, -cur->value / 18))
-                    return true;
-                std::swap(*endBadCaptures++, *cur);
-                return false;
-            }))
-            return *(cur - 1);
+  case GOOD_CAPTURE:
+      if (select<Best>([&](){
+                       return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
+                              // Move losing capture to endBadCaptures to be tried later
+                              true : (*endBadCaptures++ = *cur, false); }))
+          return *(cur - 1);

-        ++stage;
-        [[fallthrough]];
+      // Prepare the pointers to loop over the refutations array
+      cur = std::begin(refutations);
+      endMoves = std::end(refutations);

-    case QUIET_INIT :
-        if (!skipQuiets)
-        {
-            endCur = endGenerated = generate<QUIETS>(pos, cur);
+      // If the countermove is the same as a killer, skip it
+      if (   refutations[0].move == refutations[2].move
+          || refutations[1].move == refutations[2].move)
+          --endMoves;

-            score<QUIETS>();
-            partial_insertion_sort(cur, endCur, -3560 * depth);
-        }
+      ++stage;
+      /* fallthrough */

-        ++stage;
-        [[fallthrough]];
+  case REFUTATION:
+      if (select<Next>([&](){ return    *cur != MOVE_NONE
+                                    && !pos.capture(*cur)
+                                    &&  pos.pseudo_legal(*cur); }))
+          return *(cur - 1);
+      ++stage;
+      /* fallthrough */

-    case GOOD_QUIET :
-        if (!skipQuiets && select([&]() { return cur->value > goodQuietThreshold; }))
-            return *(cur - 1);
+  case QUIET_INIT:
+      if (!skipQuiets)
+      {
+          cur = endBadCaptures;
+          endMoves = generate<QUIETS>(pos, cur);

-        // Prepare the pointers to loop over the bad captures
-        cur    = moves;
-        endCur = endBadCaptures;
+          score<QUIETS>();
+          partial_insertion_sort(cur, endMoves, -3000 * depth);
+      }

-        ++stage;
-        [[fallthrough]];
+      ++stage;
+      /* fallthrough */

-    case BAD_CAPTURE :
-        if (select([]() { return true; }))
-            return *(cur - 1);
+  case QUIET:
+      if (   !skipQuiets
+          && select<Next>([&](){return   *cur != refutations[0].move
+                                      && *cur != refutations[1].move
+                                      && *cur != refutations[2].move;}))
+          return *(cur - 1);

-        // Prepare the pointers to loop over quiets again
-        cur    = endCaptures;
-        endCur = endGenerated;
+      // Prepare the pointers to loop over the bad captures
+      cur = moves;
+      endMoves = endBadCaptures;

-        ++stage;
-        [[fallthrough]];
+      ++stage;
+      /* fallthrough */

-    case BAD_QUIET :
-        if (!skipQuiets)
-            return select([&]() { return cur->value <= goodQuietThreshold; });
+  case BAD_CAPTURE:
+      return select<Next>([](){ return true; });

-        return Move::none();
+  case EVASION_INIT:
+      cur = moves;
+      endMoves = generate<EVASIONS>(pos, cur);

-    case EVASION_INIT :
-        cur    = moves;
-        endCur = endGenerated = generate<EVASIONS>(pos, cur);
+      score<EVASIONS>();
+      ++stage;
+      /* fallthrough */

-        score<EVASIONS>();
-        partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
-        ++stage;
-        [[fallthrough]];
+  case EVASION:
+      return select<Best>([](){ return true; });

-    case EVASION :
-    case QCAPTURE :
-        return select([]() { return true; });
+  case PROBCUT:
+      return select<Best>([&](){ return pos.see_ge(*cur, threshold); });

-    case PROBCUT :
-        return select([&]() { return pos.see_ge(*cur, threshold); });
-    }
+  case QCAPTURE:
+      if (select<Best>([&](){ return   depth > DEPTH_QS_RECAPTURES
+                                    || to_sq(*cur) == recaptureSquare; }))
+          return *(cur - 1);

-    assert(false);
-    return Move::none();  // Silence warning
+      // If we did not find any move and we do not try checks, we have finished
+      if (depth != DEPTH_QS_CHECKS)
+          return MOVE_NONE;
+
+      ++stage;
+      /* fallthrough */
+
+  case QCHECK_INIT:
+      cur = moves;
+      endMoves = generate<QUIET_CHECKS>(pos, cur);
+
+      ++stage;
+      /* fallthrough */
+
+  case QCHECK:
+      return select<Next>([](){ return true; });
+  }
+
+  assert(false);
+  return MOVE_NONE; // Silence warning
 }
-
-void MovePicker::skip_quiet_moves() { skipQuiets = true; }
-
-// this function must be called after all quiet moves and captures have been generated
-bool MovePicker::can_move_king_or_pawn() const {
-    // SEE negative captures shouldn't be returned in GOOD_CAPTURE stage
-    assert(stage > GOOD_CAPTURE && stage != EVASION_INIT);
-
-    for (const ExtMove* m = moves; m < endGenerated; ++m)
-    {
-        PieceType movedPieceType = type_of(pos.moved_piece(*m));
-        if ((movedPieceType == PAWN || movedPieceType == KING) && pos.legal(*m))
-            return true;
-    }
-    return false;
-}
-
-}  // namespace Stockfish
--- a/src/movepick.h
+++ b/src/movepick.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,63 +19,141 @@
 #ifndef MOVEPICK_H_INCLUDED
 #define MOVEPICK_H_INCLUDED

-#include "history.h"
+#include <array>
+#include <limits>
+#include <type_traits>
+
 #include "movegen.h"
+#include "position.h"
 #include "types.h"

-namespace Stockfish {
+/// StatsEntry stores the stat table value. It is usually a number but could
+/// be a move or even a nested history. We use a class instead of naked value
+/// to directly call history update operator<<() on the entry so to use stats
+/// tables at caller sites as simple multi-dim arrays.
+template<typename T, int D>
+class StatsEntry {

-class Position;
+  T entry;

-// The MovePicker class is used to pick one pseudo-legal move at a time from the
-// current position. The most important method is next_move(), which emits one
-// new pseudo-legal move on every call, until there are no moves left, when
-// Move::none() is returned. In order to improve the efficiency of the alpha-beta
-// algorithm, MovePicker attempts to return the moves which are most likely to get
-// a cut-off first.
-class MovePicker {
+public:
+  void operator=(const T& v) { entry = v; }
+  T* operator&() { return &entry; }
+  T* operator->() { return &entry; }
+  operator const T&() const { return entry; }

-   public:
-    MovePicker(const MovePicker&)            = delete;
-    MovePicker& operator=(const MovePicker&) = delete;
-    MovePicker(const Position&,
-               Move,
-               Depth,
-               const ButterflyHistory*,
-               const LowPlyHistory*,
-               const CapturePieceToHistory*,
-               const PieceToHistory**,
-               const PawnHistory*,
-               int);
-    MovePicker(const Position&, Move, int, const CapturePieceToHistory*);
-    Move next_move();
-    void skip_quiet_moves();
-    bool can_move_king_or_pawn() const;
+  void operator<<(int bonus) {
+    assert(abs(bonus) <= D); // Ensure range is [-D, D]
+    static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");

-   private:
-    template<typename Pred>
-    Move select(Pred);
-    template<GenType>
-    void     score();
-    ExtMove* begin() { return cur; }
-    ExtMove* end() { return endCur; }
+    entry += bonus - entry * abs(bonus) / D;

-    const Position&              pos;
-    const ButterflyHistory*      mainHistory;
-    const LowPlyHistory*         lowPlyHistory;
-    const CapturePieceToHistory* captureHistory;
-    const PieceToHistory**       continuationHistory;
-    const PawnHistory*           pawnHistory;
-    Move                         ttMove;
-    ExtMove *                    cur, *endCur, *endBadCaptures, *endCaptures, *endGenerated;
-    int                          stage;
-    int                          threshold;
-    Depth                        depth;
-    int                          ply;
-    bool                         skipQuiets = false;
-    ExtMove                      moves[MAX_MOVES];
+    assert(abs(entry) <= D);
+  }
 };

-}  // namespace Stockfish
+/// Stats is a generic N-dimensional array used to store various statistics.
+/// The first template parameter T is the base type of the array, the second
+/// template parameter D limits the range of updates in [-D, D] when we update
+/// values with the << operator, while the last parameters (Size and Sizes)
+/// encode the dimensions of the array.
+template <typename T, int D, int Size, int... Sizes>
+struct Stats : public std::array<Stats<T, D, Sizes...>, Size>
+{
+  typedef Stats<T, D, Size, Sizes...> stats;

-#endif  // #ifndef MOVEPICK_H_INCLUDED
+  void fill(const T& v) {
+
+    // For standard-layout 'this' points to first struct member
+    assert(std::is_standard_layout<stats>::value);
+
+    typedef StatsEntry<T, D> entry;
+    entry* p = reinterpret_cast<entry*>(this);
+    std::fill(p, p + sizeof(*this) / sizeof(entry), v);
+  }
+};
+
+template <typename T, int D, int Size>
+struct Stats<T, D, Size> : public std::array<StatsEntry<T, D>, Size> {};
+
+/// In stats table, D=0 means that the template parameter is not used
+enum StatsParams { NOT_USED = 0 };
+enum StatsType { NoCaptures, Captures };
+
+/// ButterflyHistory records how often quiet moves have been successful or
+/// unsuccessful during the current search, and is used for reduction and move
+/// ordering decisions. It uses 2 tables (one for each color) indexed by
+/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
+typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
+
+/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet
+/// moves which are/were in the PV (ttPv)
+/// It is cleared with each new search and filled during iterative deepening
+constexpr int MAX_LPH = 4;
+typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
+
+/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
+/// move, see www.chessprogramming.org/Countermove_Heuristic
+typedef Stats<Move, NOT_USED, PIECE_NB, SQUARE_NB> CounterMoveHistory;
+
+/// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
+typedef Stats<int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB> CapturePieceToHistory;
+
+/// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
+typedef Stats<int16_t, 29952, PIECE_NB, SQUARE_NB> PieceToHistory;
+
+/// ContinuationHistory is the combined history of a given pair of moves, usually
+/// the current one given a previous one. The nested history table is based on
+/// PieceToHistory instead of ButterflyBoards.
+typedef Stats<PieceToHistory, NOT_USED, PIECE_NB, SQUARE_NB> ContinuationHistory;
+
+
+/// MovePicker class is used to pick one pseudo legal move at a time from the
+/// current position. The most important method is next_move(), which returns a
+/// new pseudo legal move each time it is called, until there are no moves left,
+/// when MOVE_NONE is returned. In order to improve the efficiency of the alpha
+/// beta algorithm, MovePicker attempts to return the moves which are most likely
+/// to get a cut-off first.
+class MovePicker {
+
+  enum PickType { Next, Best };
+
+public:
+  MovePicker(const MovePicker&) = delete;
+  MovePicker& operator=(const MovePicker&) = delete;
+  MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
+  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
+                                           const CapturePieceToHistory*,
+                                           const PieceToHistory**,
+                                           Square);
+  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
+                                           const LowPlyHistory*,
+                                           const CapturePieceToHistory*,
+                                           const PieceToHistory**,
+                                           Move,
+                                           const Move*,
+                                           int);
+  Move next_move(bool skipQuiets = false);
+
+private:
+  template<PickType T, typename Pred> Move select(Pred);
+  template<GenType> void score();
+  ExtMove* begin() { return cur; }
+  ExtMove* end() { return endMoves; }
+
+  const Position& pos;
+  const ButterflyHistory* mainHistory;
+  const LowPlyHistory* lowPlyHistory;
+  const CapturePieceToHistory* captureHistory;
+  const PieceToHistory** continuationHistory;
+  Move ttMove;
+  ExtMove refutations[3], *cur, *endMoves, *endBadCaptures;
+  int stage;
+  Square recaptureSquare;
+  Value threshold;
+  Depth depth;
+  int ply;
+  ExtMove moves[MAX_MOVES];
+};
+
+#endif // #ifndef MOVEPICK_H_INCLUDED
--- a/src/nnue/architectures/halfkp_256x2-32-32.h
+++ b/src/nnue/architectures/halfkp_256x2-32-32.h
@@ -0,0 +1,54 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -0,0 +1,178 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Code for calculating NNUE evaluation function
+
+#include <fstream>
+#include <iostream>
+#include <set>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../misc.h"
+#include "../uci.h"
+
+#include "evaluate_nnue.h"
+
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};
+
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  std::string fileName;
+
+  namespace Detail {
+
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {
+
+    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
+  // Read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+
+    std::uint32_t header;
+    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }
+
+  }  // namespace Detail
+
+  // Initialize the evaluation function parameters
+  void Initialize() {
+
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }
+
+  // Read network header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture) {
+
+    std::uint32_t version, size;
+    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
+    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
+    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& stream) {
+
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // Proceed with the difference calculation if possible
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
+    return accumulator.score;
+  }
+
+  // Load the evaluation function file
+  bool load_eval_file(const std::string& evalFile) {
+
+    Initialize();
+    fileName = evalFile;
+
+    std::ifstream stream(evalFile, std::ios::binary);
+
+    const bool result = ReadParameters(stream);
+
+    return result;
+  }
+
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    Value v = ComputeScore(pos, false);
+    v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+    return v;
+  }
+
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
+  }
+
+  // Proceed with the difference calculation if possible
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
+  }
+
+} // namespace Eval::NNUE
--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@@ -0,0 +1,48 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED
+
+#include "nnue_feature_transformer.h"
+
+#include <memory>
+
+namespace Eval::NNUE {
+
+  // Hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      std_aligned_free(ptr);
+    }
+  };
+
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
--- a/src/nnue/features/feature_set.h
+++ b/src/nnue/features/feature_set.h
@@ -0,0 +1,135 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED
+
+#include "features_common.h"
+#include <array>
+
+namespace Eval::NNUE::Features {
+
+  // Class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;
+
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };
+
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {
+
+   public:
+    // Get a list of indices for active features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
+
+      for (Color perspective : { WHITE, BLACK }) {
+        Derived::CollectActiveIndices(
+            pos, trigger, perspective, &active[perspective]);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
+
+   public:
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // Trigger for full calculation instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+   private:
+    // Get a list of indices for active features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {
+
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
--- a/src/nnue/features/features_common.h
+++ b/src/nnue/features/features_common.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,27 +16,30 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef BENCHMARK_H_INCLUDED
-#define BENCHMARK_H_INCLUDED
+//Common header of input features of NNUE evaluation function

-#include <iosfwd>
-#include <string>
-#include <vector>
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED

-namespace Stockfish::Benchmark {
+#include "../../evaluate.h"
+#include "../nnue_common.h"

-std::vector<std::string> setup_bench(const std::string&, std::istream&);
+namespace Eval::NNUE::Features {

-struct BenchmarkSetup {
-    int                      ttSize;
-    int                      threads;
-    std::vector<std::string> commands;
-    std::string              originalInvocation;
-    std::string              filledInvocation;
-};
+  class IndexList;

-BenchmarkSetup setup_benchmark(std::istream&);
+  template <typename... FeatureTypes>
+  class FeatureSet;

-}  // namespace Stockfish
+  // Trigger to perform full calculations instead of difference only
+  enum class TriggerEvent {
+    kFriendKingMoved // calculate full evaluation when own king moves
+  };

-#endif  // #ifndef BENCHMARK_H_INCLUDED
+  enum class Side {
+    kFriend // side to move
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
--- a/src/nnue/features/half_ka_v2_hm.cpp
+++ b/src/nnue/features/half_ka_v2_hm.cpp
@@ -1,86 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKAv2_hm of NNUE evaluation function
-
-#include "half_ka_v2_hm.h"
-
-#include "../../bitboard.h"
-#include "../../position.h"
-#include "../../types.h"
-#include "../nnue_common.h"
-
-namespace Stockfish::Eval::NNUE::Features {
-
-// Index of a feature for a given king position and another piece on some square
-template<Color Perspective>
-inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) {
-    return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc]
-                     + KingBuckets[Perspective][ksq]);
-}
-
-// Get a list of indices for active features
-template<Color Perspective>
-void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active) {
-    Square   ksq = pos.square<KING>(Perspective);
-    Bitboard bb  = pos.pieces();
-    while (bb)
-    {
-        Square s = pop_lsb(bb);
-        active.push_back(make_index<Perspective>(s, pos.piece_on(s), ksq));
-    }
-}
-
-// Explicit template instantiations
-template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
-template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
-template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
-template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
-
-// Get a list of indices for recently changed features
-template<Color Perspective>
-void HalfKAv2_hm::append_changed_indices(Square            ksq,
-                                         const DirtyPiece& dp,
-                                         IndexList&        removed,
-                                         IndexList&        added) {
-    removed.push_back(make_index<Perspective>(dp.from, dp.pc, ksq));
-    if (dp.to != SQ_NONE)
-        added.push_back(make_index<Perspective>(dp.to, dp.pc, ksq));
-
-    if (dp.remove_sq != SQ_NONE)
-        removed.push_back(make_index<Perspective>(dp.remove_sq, dp.remove_pc, ksq));
-
-    if (dp.add_sq != SQ_NONE)
-        added.push_back(make_index<Perspective>(dp.add_sq, dp.add_pc, ksq));
-}
-
-// Explicit template instantiations
-template void HalfKAv2_hm::append_changed_indices<WHITE>(Square            ksq,
-                                                         const DirtyPiece& dp,
-                                                         IndexList&        removed,
-                                                         IndexList&        added);
-template void HalfKAv2_hm::append_changed_indices<BLACK>(Square            ksq,
-                                                         const DirtyPiece& dp,
-                                                         IndexList&        removed,
-                                                         IndexList&        added);
-
-bool HalfKAv2_hm::requires_refresh(const DirtyPiece& dirtyPiece, Color perspective) {
-    return dirtyPiece.pc == make_piece(perspective, KING);
-}
-
-}  // namespace Stockfish::Eval::NNUE::Features
--- a/src/nnue/features/half_ka_v2_hm.h
+++ b/src/nnue/features/half_ka_v2_hm.h
@@ -1,144 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
-#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
-
-#include <cstdint>
-
-#include "../../misc.h"
-#include "../../types.h"
-#include "../nnue_common.h"
-
-namespace Stockfish {
-class Position;
-}
-
-namespace Stockfish::Eval::NNUE::Features {
-
-// Feature HalfKAv2_hm: Combination of the position of own king and the
-// position of pieces. Position mirrored such that king is always on e..h files.
-class HalfKAv2_hm {
-
-    // Unique number for each piece type on each square
-    enum {
-        PS_NONE     = 0,
-        PS_W_PAWN   = 0,
-        PS_B_PAWN   = 1 * SQUARE_NB,
-        PS_W_KNIGHT = 2 * SQUARE_NB,
-        PS_B_KNIGHT = 3 * SQUARE_NB,
-        PS_W_BISHOP = 4 * SQUARE_NB,
-        PS_B_BISHOP = 5 * SQUARE_NB,
-        PS_W_ROOK   = 6 * SQUARE_NB,
-        PS_B_ROOK   = 7 * SQUARE_NB,
-        PS_W_QUEEN  = 8 * SQUARE_NB,
-        PS_B_QUEEN  = 9 * SQUARE_NB,
-        PS_KING     = 10 * SQUARE_NB,
-        PS_NB       = 11 * SQUARE_NB
-    };
-
-    static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
-      // Convention: W - us, B - them
-      // Viewed from other side, W and B are reversed
-      {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
-       PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE},
-      {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
-       PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
-
-   public:
-    // Feature name
-    static constexpr const char* Name = "HalfKAv2_hm(Friend)";
-
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t HashValue = 0x7f234cb8u;
-
-    // Number of feature dimensions
-    static constexpr IndexType Dimensions =
-      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
-
-#define B(v) (v * PS_NB)
-    // clang-format off
-    static constexpr int KingBuckets[COLOR_NB][SQUARE_NB] = {
-      { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28),
-        B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24),
-        B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20),
-        B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16),
-        B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12),
-        B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8),
-        B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4),
-        B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0) },
-      { B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0),
-        B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4),
-        B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8),
-        B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12),
-        B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16),
-        B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20),
-        B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24),
-        B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28) }
-    };
-    // clang-format on
-#undef B
-    // clang-format off
-    // Orient a square according to perspective (rotates by 180 for black)
-    static constexpr int OrientTBL[COLOR_NB][SQUARE_NB] = {
-      { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
-        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 },
-      { SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
-        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8 }
-    };
-    // clang-format on
-
-    // Maximum number of simultaneously active features.
-    static constexpr IndexType MaxActiveDimensions = 32;
-    using IndexList                                = ValueList<IndexType, MaxActiveDimensions>;
-
-    // Index of a feature for a given king position and another piece on some square
-    template<Color Perspective>
-    static IndexType make_index(Square s, Piece pc, Square ksq);
-
-    // Get a list of indices for active features
-    template<Color Perspective>
-    static void append_active_indices(const Position& pos, IndexList& active);
-
-    // Get a list of indices for recently changed features
-    template<Color Perspective>
-    static void
-    append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added);
-
-    // Returns whether the change stored in this DirtyPiece means
-    // that a full accumulator refresh is required.
-    static bool requires_refresh(const DirtyPiece& dirtyPiece, Color perspective);
-};
-
-}  // namespace Stockfish::Eval::NNUE::Features
-
-#endif  // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
--- a/src/nnue/features/half_kp.cpp
+++ b/src/nnue/features/half_kp.cpp
@@ -0,0 +1,92 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#include "half_kp.h"
+#include "index_list.h"
+
+namespace Eval::NNUE::Features {
+
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }
+
+  // Get pieces information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {
+
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }
+
+  // Get a list of indices for active features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {
+
+    // Do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
+    }
+  }
+
+  // Get a list of indices for recently changed features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
+    }
+  }
+
+  template class HalfKP<Side::kFriend>;
+
+}  // namespace Eval::NNUE::Features
--- a/src/nnue/features/half_kp.h
+++ b/src/nnue/features/half_kp.h
@@ -0,0 +1,67 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "features_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Feature HalfKP: Combination of the position of own king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {
+
+   public:
+    // Feature name
+    static constexpr const char* kName = "HalfKP(Friend)";
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Trigger for full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
+
+    // Get a list of indices for active features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);
+
+    // Get a list of indices for recently changed features
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);
+
+    // Index of a feature for a given king position and another piece on some square
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);
+
+   private:
+    // Get pieces information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
--- a/src/nnue/features/index_list.h
+++ b/src/nnue/features/index_list.h
@@ -0,0 +1,64 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+
+#include "../../position.h"
+#include "../nnue_architecture.h"
+
+namespace Eval::NNUE::Features {
+
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }
+
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
+    }
+
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };
+
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,282 +21,195 @@
 #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
 #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED

-#include <cstdint>
 #include <iostream>
-
 #include "../nnue_common.h"
-#include "../simd.h"

-/*
-  This file contains the definition for a fully connected layer (aka affine transform).
+namespace Eval::NNUE::Layers {

-    - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
-      - that's why AVX512 is hard to implement
-    - expected use-case is small layers
-    - inputs are processed in chunks of 4, weights are respectively transposed
-    - accumulation happens directly to int32s
-*/
-
-namespace Stockfish::Eval::NNUE::Layers {
-
-#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD)
-    #define ENABLE_SEQ_OPT
-#endif
-
-// Fallback implementation for older/other architectures.
-// Requires the input to be padded to at least 16 values.
-#ifndef ENABLE_SEQ_OPT
-
-template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
-static void affine_transform_non_ssse3(std::int32_t*       output,
-                                       const std::int8_t*  weights,
-                                       const std::int32_t* biases,
-                                       const std::uint8_t* input) {
-    #if defined(USE_SSE2) || defined(USE_NEON)
-        #if defined(USE_SSE2)
-    // At least a multiple of 16, with SSE2.
-    constexpr IndexType NumChunks   = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
-    const __m128i       Zeros       = _mm_setzero_si128();
-    const auto          inputVector = reinterpret_cast<const __m128i*>(input);
-
-        #elif defined(USE_NEON)
-    constexpr IndexType NumChunks   = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
-    const auto          inputVector = reinterpret_cast<const int8x8_t*>(input);
-        #endif
-
-    for (IndexType i = 0; i < OutputDimensions; ++i)
-    {
-        const IndexType offset = i * PaddedInputDimensions;
-
-        #if defined(USE_SSE2)
-        __m128i    sumLo = _mm_cvtsi32_si128(biases[i]);
-        __m128i    sumHi = Zeros;
-        const auto row   = reinterpret_cast<const __m128i*>(&weights[offset]);
-        for (IndexType j = 0; j < NumChunks; ++j)
-        {
-            __m128i row_j           = _mm_load_si128(&row[j]);
-            __m128i input_j         = _mm_load_si128(&inputVector[j]);
-            __m128i extendedRowLo   = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
-            __m128i extendedRowHi   = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
-            __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
-            __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
-            __m128i productLo       = _mm_madd_epi16(extendedRowLo, extendedInputLo);
-            __m128i productHi       = _mm_madd_epi16(extendedRowHi, extendedInputHi);
-            sumLo                   = _mm_add_epi32(sumLo, productLo);
-            sumHi                   = _mm_add_epi32(sumHi, productHi);
-        }
-        __m128i sum           = _mm_add_epi32(sumLo, sumHi);
-        __m128i sumHigh_64    = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum                   = _mm_add_epi32(sum, sumHigh_64);
-        __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum                   = _mm_add_epi32(sum, sum_second_32);
-        output[i]             = _mm_cvtsi128_si32(sum);
-
-        #elif defined(USE_NEON)
-
-        int32x4_t  sum = {biases[i]};
-        const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
-        for (IndexType j = 0; j < NumChunks; ++j)
-        {
-            int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
-            product           = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
-            sum               = vpadalq_s16(sum, product);
-        }
-        output[i] = SIMD::neon_m128_reduce_add_epi32(sum);
-
-        #endif
-    }
-    #else
-    std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions);
-
-    // Traverse weights in transpose order to take advantage of input sparsity
-    for (IndexType i = 0; i < InputDimensions; ++i)
-        if (input[i])
-        {
-            const std::int8_t* w  = &weights[i];
-            const int          in = input[i];
-            for (IndexType j = 0; j < OutputDimensions; ++j)
-                output[j] += w[j * PaddedInputDimensions] * in;
-        }
-    #endif
-}
-
-#endif  // !ENABLE_SEQ_OPT
-
-template<IndexType InDims, IndexType OutDims>
-class AffineTransform {
+  // Affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
   public:
    // Input/output type
-    using InputType  = std::uint8_t;
+    using InputType = typename PreviousLayer::OutputType;
    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");

    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions  = InDims;
-    static constexpr IndexType OutputDimensions = OutDims;
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);

-    static constexpr IndexType PaddedInputDimensions =
-      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
-    static constexpr IndexType PaddedOutputDimensions =
-      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);

-    using OutputBuffer = OutputType[PaddedOutputDimensions];
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
-        std::uint32_t hashValue = 0xCC03DAE4u;
-        hashValue += OutputDimensions;
-        hashValue ^= prevHash >> 1;
-        hashValue ^= prevHash << 31;
-        return hashValue;
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
    }

-    static constexpr IndexType get_weight_index_scrambled(IndexType i) {
-        return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4
-             + i / PaddedInputDimensions * 4 + i % 4;
+   // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kOutputDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kOutputDimensions * kPaddedInputDimensions *
+                  sizeof(WeightType));
+      return !stream.fail();
    }

-    static constexpr IndexType get_weight_index(IndexType i) {
-#ifdef ENABLE_SEQ_OPT
-        return get_weight_index_scrambled(i);
-#else
-        return i;
-#endif
-    }
-
-    // Read network parameters
-    bool read_parameters(std::istream& stream) {
-        read_little_endian<BiasType>(stream, biases, OutputDimensions);
-        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-            weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
-
-        return !stream.fail();
-    }
-
-    // Write network parameters
-    bool write_parameters(std::ostream& stream) const {
-        write_little_endian<BiasType>(stream, biases, OutputDimensions);
-
-        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-            write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
-
-        return !stream.fail();
-    }
    // Forward propagation
-    void propagate(const InputType* input, OutputType* output) const {
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);

-#ifdef ENABLE_SEQ_OPT
+  #if defined(USE_AVX512)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
+      const __m512i kOnes = _mm512_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m512i*>(input);

-        if constexpr (OutputDimensions > 1)
-        {
-    #if defined(USE_AVX512)
-            using vec_t = __m512i;
-        #define vec_set_32 _mm512_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
-    #elif defined(USE_AVX2)
-            using vec_t = __m256i;
-        #define vec_set_32 _mm256_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
-    #elif defined(USE_SSSE3)
-            using vec_t = __m128i;
-        #define vec_set_32 _mm_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
-    #elif defined(USE_NEON_DOTPROD)
-            using vec_t = int32x4_t;
-        #define vec_set_32 vdupq_n_s32
-        #define vec_add_dpbusd_32(acc, a, b) \
-            SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
-                                                vreinterpretq_s8_s32(b))
-    #endif
+  #elif defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);

-            static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m128i kOnes = _mm_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);

-            static_assert(OutputDimensions % OutputSimdWidth == 0);
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif

-            constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
-            constexpr IndexType NumRegs   = OutputDimensions / OutputSimdWidth;
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;

-            const auto   input32 = reinterpret_cast<const std::int32_t*>(input);
-            const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
-            vec_t        acc[NumRegs];
-            for (IndexType k = 0; k < NumRegs; ++k)
-                acc[k] = biasvec[k];
+  #if defined(USE_AVX512)
+        __m512i sum = _mm512_setzero_si512();
+        const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {

-            for (IndexType i = 0; i < NumChunks; ++i)
-            {
-                const vec_t in0 = vec_set_32(input32[i]);
-                const auto  col0 =
-                  reinterpret_cast<const vec_t*>(&weights[i * OutputDimensions * 4]);
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+  #else
+            __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+  #endif

-                for (IndexType k = 0; k < NumRegs; ++k)
-                    vec_add_dpbusd_32(acc[k], in0, col0[k]);
-            }
-
-            vec_t* outptr = reinterpret_cast<vec_t*>(output);
-            for (IndexType k = 0; k < NumRegs; ++k)
-                outptr[k] = acc[k];
-
-    #undef vec_set_32
-    #undef vec_add_dpbusd_32
+            product = _mm512_madd_epi16(product, kOnes);
+            sum = _mm512_add_epi32(sum, product);
        }
-        else if constexpr (OutputDimensions == 1)
+        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
+
+        // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
+        // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
+        // and we have to do one more 256bit chunk.
+        if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
        {
-    // We cannot use AVX512 for the last layer because there are only 32 inputs
-    // and the buffer is not padded to 64 elements.
-    #if defined(USE_AVX2)
-            using vec_t = __m256i;
-        #define vec_setzero() _mm256_setzero_si256()
-        #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
-        #define vec_hadd SIMD::m256_hadd
-    #elif defined(USE_SSSE3)
-            using vec_t = __m128i;
-        #define vec_setzero() _mm_setzero_si128()
-        #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
-        #define vec_hadd SIMD::m128_hadd
-    #elif defined(USE_NEON_DOTPROD)
-            using vec_t = int32x4_t;
-        #define vec_setzero() vdupq_n_s32(0)
-        #define vec_add_dpbusd_32(acc, a, b) \
-            SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
-                                                vreinterpretq_s8_s32(b))
-        #define vec_hadd SIMD::neon_m128_hadd
-    #endif
+            const auto iv_256  = reinterpret_cast<const __m256i*>(input);
+            const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            int j = kNumChunks * 2;

-            const auto inputVector = reinterpret_cast<const vec_t*>(input);
+  #if defined(__MINGW32__) || defined(__MINGW64__)  // See HACK comment below in AVX2.
+            __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+  #else
+            __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+  #endif

-            static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType);
-
-            static_assert(PaddedInputDimensions % InputSimdWidth == 0);
-
-            constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth;
-            vec_t               sum0      = vec_setzero();
-            const auto          row0      = reinterpret_cast<const vec_t*>(&weights[0]);
-
-            for (int j = 0; j < int(NumChunks); ++j)
-            {
-                const vec_t in = inputVector[j];
-                vec_add_dpbusd_32(sum0, in, row0[j]);
-            }
-            output[0] = vec_hadd(sum0, biases[0]);
-
-    #undef vec_setzero
-    #undef vec_add_dpbusd_32
-    #undef vec_hadd
+            sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            const __m128i lo = _mm256_extracti128_si256(sum256, 0);
+            const __m128i hi = _mm256_extracti128_si256(sum256, 1);
+            output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
        }
-#else
-        // Use old implementation for the other architectures.
-        affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
-          output, weights, biases, input);
-#endif
+
+  #elif defined(USE_AVX2)
+        __m256i sum = _mm256_setzero_si256();
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        sum = _mm256_hadd_epi32(sum, sum);
+        sum = _mm256_hadd_epi32(sum, sum);
+        const __m128i lo = _mm256_extracti128_si256(sum, 0);
+        const __m128i hi = _mm256_extracti128_si256(sum, 1);
+        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i product = _mm_maddubs_epi16(
+              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_hadd_epi32(sum, sum);
+        sum = _mm_hadd_epi32(sum, sum);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(USE_NEON)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+      return output;
    }

   private:
-    using BiasType   = OutputType;
+    using BiasType = OutputType;
    using WeightType = std::int8_t;

-    alignas(CacheLineSize) BiasType biases[OutputDimensions];
-    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
-};
+    PreviousLayer previous_layer_;

-}  // namespace Stockfish::Eval::NNUE::Layers
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };

-#endif  // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
--- a/src/nnue/layers/affine_transform_sparse_input.h
+++ b/src/nnue/layers/affine_transform_sparse_input.h
@@ -1,292 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of layer AffineTransformSparseInput of NNUE evaluation function
-
-#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
-#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
-
-#include <algorithm>
-#include <cstdint>
-#include <iostream>
-
-#include "../../bitboard.h"
-#include "../simd.h"
-#include "../nnue_common.h"
-
-/*
-  This file contains the definition for a fully connected layer (aka affine transform) with block sparse input.
-*/
-
-namespace Stockfish::Eval::NNUE::Layers {
-
-#if (USE_SSSE3 | (USE_NEON >= 8))
-static constexpr int lsb_index64[64] = {
-  0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61, 54, 58, 35, 52, 50, 42,
-  21, 44, 38, 32, 29, 23, 17, 11, 4,  62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43,
-  31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
-
-constexpr int constexpr_lsb(uint64_t bb) {
-    assert(bb != 0);
-    constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL;
-    return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58];
-}
-
-alignas(CacheLineSize) static constexpr struct OffsetIndices {
-
-    std::uint16_t offset_indices[256][8];
-
-    constexpr OffsetIndices() :
-        offset_indices() {
-        for (int i = 0; i < 256; ++i)
-        {
-            std::uint64_t j = i, k = 0;
-            while (j)
-            {
-                offset_indices[i][k++] = constexpr_lsb(j);
-                j &= j - 1;
-            }
-            while (k < 8)
-                offset_indices[i][k++] = 0;
-        }
-    }
-
-} Lookup;
-
-    #if defined(__GNUC__) || defined(__clang__)
-        #define RESTRICT __restrict__
-    #elif defined(_MSC_VER)
-        #define RESTRICT __restrict
-    #else
-        #define RESTRICT
-    #endif
-
-// Find indices of nonzero numbers in an int32_t array
-template<const IndexType InputDimensions>
-void find_nnz(const std::int32_t* RESTRICT input,
-              std::uint16_t* RESTRICT      out,
-              IndexType&                   count_out) {
-
-    #ifdef USE_AVX512
-    constexpr IndexType SimdWidth = 16;  // 512 bits / 32 bits
-    constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-    const __m512i       increment = _mm512_set1_epi32(SimdWidth);
-    __m512i base = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-
-    IndexType count = 0;
-    for (IndexType i = 0; i < NumChunks; ++i)
-    {
-        const __m512i inputV = _mm512_load_si512(input + i * SimdWidth);
-
-        // Get a bitmask and gather non zero indices
-        const __mmask16 nnzMask = _mm512_test_epi32_mask(inputV, inputV);
-        const __m512i   nnzV    = _mm512_maskz_compress_epi32(nnzMask, base);
-        _mm512_mask_cvtepi32_storeu_epi16(out + count, 0xFFFF, nnzV);
-        count += popcount(nnzMask);
-        base = _mm512_add_epi32(base, increment);
-    }
-    count_out = count;
-
-    #else
-
-    using namespace SIMD;
-
-    constexpr IndexType InputSimdWidth = sizeof(vec_uint_t) / sizeof(std::int32_t);
-    // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)
-    constexpr IndexType ChunkSize       = std::max<IndexType>(InputSimdWidth, 8);
-    constexpr IndexType NumChunks       = InputDimensions / ChunkSize;
-    constexpr IndexType InputsPerChunk  = ChunkSize / InputSimdWidth;
-    constexpr IndexType OutputsPerChunk = ChunkSize / 8;
-
-    const auto     inputVector = reinterpret_cast<const vec_uint_t*>(input);
-    IndexType      count       = 0;
-    vec128_t       base        = vec128_zero;
-    const vec128_t increment   = vec128_set_16(8);
-    for (IndexType i = 0; i < NumChunks; ++i)
-    {
-        // bitmask of nonzero values in this chunk
-        unsigned nnz = 0;
-        for (IndexType j = 0; j < InputsPerChunk; ++j)
-        {
-            const vec_uint_t inputChunk = inputVector[i * InputsPerChunk + j];
-            nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth);
-        }
-        for (IndexType j = 0; j < OutputsPerChunk; ++j)
-        {
-            const unsigned lookup = (nnz >> (j * 8)) & 0xFF;
-            const vec128_t offsets =
-              vec128_load(reinterpret_cast<const vec128_t*>(&Lookup.offset_indices[lookup]));
-            vec128_storeu(reinterpret_cast<vec128_t*>(out + count), vec128_add(base, offsets));
-            count += popcount(lookup);
-            base = vec128_add(base, increment);
-        }
-    }
-    count_out = count;
-    #endif
-}
-
-#endif
-
-// Sparse input implementation
-template<IndexType InDims, IndexType OutDims>
-class AffineTransformSparseInput {
-   public:
-    // Input/output type
-    using InputType  = std::uint8_t;
-    using OutputType = std::int32_t;
-
-    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions  = InDims;
-    static constexpr IndexType OutputDimensions = OutDims;
-
-    static_assert(OutputDimensions % 16 == 0,
-                  "Only implemented for OutputDimensions divisible by 16.");
-
-    static constexpr IndexType PaddedInputDimensions =
-      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
-    static constexpr IndexType PaddedOutputDimensions =
-      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
-
-#if (USE_SSSE3 | (USE_NEON >= 8))
-    static constexpr IndexType ChunkSize = 4;
-#else
-    static constexpr IndexType ChunkSize = 1;
-#endif
-
-    using OutputBuffer = OutputType[PaddedOutputDimensions];
-
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
-        std::uint32_t hashValue = 0xCC03DAE4u;
-        hashValue += OutputDimensions;
-        hashValue ^= prevHash >> 1;
-        hashValue ^= prevHash << 31;
-        return hashValue;
-    }
-
-    static constexpr IndexType get_weight_index_scrambled(IndexType i) {
-        return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize
-             + i / PaddedInputDimensions * ChunkSize + i % ChunkSize;
-    }
-
-    static constexpr IndexType get_weight_index(IndexType i) {
-#if (USE_SSSE3 | (USE_NEON >= 8))
-        return get_weight_index_scrambled(i);
-#else
-        return i;
-#endif
-    }
-
-    // Read network parameters
-    bool read_parameters(std::istream& stream) {
-        read_little_endian<BiasType>(stream, biases, OutputDimensions);
-        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-            weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
-
-        return !stream.fail();
-    }
-
-    // Write network parameters
-    bool write_parameters(std::ostream& stream) const {
-        write_little_endian<BiasType>(stream, biases, OutputDimensions);
-
-        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
-            write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
-
-        return !stream.fail();
-    }
-    // Forward propagation
-    void propagate(const InputType* input, OutputType* output) const {
-
-#if (USE_SSSE3 | (USE_NEON >= 8))
-    #if defined(USE_AVX512)
-        using invec_t  = __m512i;
-        using outvec_t = __m512i;
-        #define vec_set_32 _mm512_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
-    #elif defined(USE_AVX2)
-        using invec_t  = __m256i;
-        using outvec_t = __m256i;
-        #define vec_set_32 _mm256_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
-    #elif defined(USE_SSSE3)
-        using invec_t  = __m128i;
-        using outvec_t = __m128i;
-        #define vec_set_32 _mm_set1_epi32
-        #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
-    #elif defined(USE_NEON_DOTPROD)
-        using invec_t  = int8x16_t;
-        using outvec_t = int32x4_t;
-        #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
-        #define vec_add_dpbusd_32 SIMD::dotprod_m128_add_dpbusd_epi32
-    #elif defined(USE_NEON)
-        using invec_t  = int8x16_t;
-        using outvec_t = int32x4_t;
-        #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
-        #define vec_add_dpbusd_32 SIMD::neon_m128_add_dpbusd_epi32
-    #endif
-        static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType);
-
-        constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / ChunkSize;
-        constexpr IndexType NumRegs   = OutputDimensions / OutputSimdWidth;
-        std::uint16_t       nnz[NumChunks];
-        IndexType           count;
-
-        const auto input32 = reinterpret_cast<const std::int32_t*>(input);
-
-        // Find indices of nonzero 32-bit blocks
-        find_nnz<NumChunks>(input32, nnz, count);
-
-        const outvec_t* biasvec = reinterpret_cast<const outvec_t*>(biases);
-        outvec_t        acc[NumRegs];
-        for (IndexType k = 0; k < NumRegs; ++k)
-            acc[k] = biasvec[k];
-
-        for (IndexType j = 0; j < count; ++j)
-        {
-            const auto    i  = nnz[j];
-            const invec_t in = vec_set_32(input32[i]);
-            const auto    col =
-              reinterpret_cast<const invec_t*>(&weights[i * OutputDimensions * ChunkSize]);
-            for (IndexType k = 0; k < NumRegs; ++k)
-                vec_add_dpbusd_32(acc[k], in, col[k]);
-        }
-
-        outvec_t* outptr = reinterpret_cast<outvec_t*>(output);
-        for (IndexType k = 0; k < NumRegs; ++k)
-            outptr[k] = acc[k];
-    #undef vec_set_32
-    #undef vec_add_dpbusd_32
-#else
-        // Use dense implementation for the other architectures.
-        affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
-          output, weights, biases, input);
-#endif
-    }
-
-   private:
-    using BiasType   = OutputType;
-    using WeightType = std::int8_t;
-
-    alignas(CacheLineSize) BiasType biases[OutputDimensions];
-    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
-};
-
-}  // namespace Stockfish::Eval::NNUE::Layers
-
-#endif  // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
--- a/src/nnue/layers/clipped_relu.h
+++ b/src/nnue/layers/clipped_relu.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,144 +21,166 @@
 #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
 #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED

-#include <algorithm>
-#include <cstdint>
-#include <iosfwd>
-
 #include "../nnue_common.h"

-namespace Stockfish::Eval::NNUE::Layers {
+namespace Eval::NNUE::Layers {

-// Clipped ReLU
-template<IndexType InDims>
-class ClippedReLU {
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
   public:
    // Input/output type
-    using InputType  = std::int32_t;
+    using InputType = typename PreviousLayer::OutputType;
    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");

    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions  = InDims;
-    static constexpr IndexType OutputDimensions = InputDimensions;
-    static constexpr IndexType PaddedOutputDimensions =
-      ceil_to_multiple<IndexType>(OutputDimensions, 32);
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;

-    using OutputBuffer = OutputType[PaddedOutputDimensions];
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
-        std::uint32_t hashValue = 0x538D24C7u;
-        hashValue += prevHash;
-        return hashValue;
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
    }

    // Read network parameters
-    bool read_parameters(std::istream&) { return true; }
-
-    // Write network parameters
-    bool write_parameters(std::ostream&) const { return true; }
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }

    // Forward propagation
-    void propagate(const InputType* input, OutputType* output) const {
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);

-#if defined(USE_AVX2)
-        if constexpr (InputDimensions % SimdWidth == 0)
-        {
-            constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-            const __m256i       Offsets   = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-            const auto          in        = reinterpret_cast<const __m256i*>(input);
-            const auto          out       = reinterpret_cast<__m256i*>(output);
-            for (IndexType i = 0; i < NumChunks; ++i)
-            {
-                const __m256i words0 =
-                  _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]),
-                                                        _mm256_load_si256(&in[i * 4 + 1])),
-                                    WeightScaleBits);
-                const __m256i words1 =
-                  _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]),
-                                                        _mm256_load_si256(&in[i * 4 + 3])),
-                                    WeightScaleBits);
-                _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(
-                                              _mm256_packs_epi16(words0, words1), Offsets));
-            }
-        }
-        else
-        {
-            constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
-            const auto          in        = reinterpret_cast<const __m128i*>(input);
-            const auto          out       = reinterpret_cast<__m128i*>(output);
-            for (IndexType i = 0; i < NumChunks; ++i)
-            {
-                const __m128i words0 = _mm_srli_epi16(
-                  _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
-                  WeightScaleBits);
-                const __m128i words1 = _mm_srli_epi16(
-                  _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
-                  WeightScaleBits);
-                _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
-            }
-        }
-        constexpr IndexType Start = InputDimensions % SimdWidth == 0
-                                    ? InputDimensions / SimdWidth * SimdWidth
-                                    : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(

-#elif defined(USE_SSE2)
-        constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+          //       even though alignas is specified.
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif

-    #ifndef USE_SSE41
-        const __m128i k0x80s = _mm_set1_epi8(-128);
-    #endif
+          (&in[i * 4 + 0]),

-        const auto in  = reinterpret_cast<const __m128i*>(input);
-        const auto out = reinterpret_cast<__m128i*>(output);
-        for (IndexType i = 0; i < NumChunks; ++i)
-        {
-    #if defined(USE_SSE41)
-            const __m128i words0 = _mm_srli_epi16(
-              _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
-              WeightScaleBits);
-            const __m128i words1 = _mm_srli_epi16(
-              _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
-              WeightScaleBits);
-            _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
-    #else
-            const __m128i words0 = _mm_srai_epi16(
-              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
-              WeightScaleBits);
-            const __m128i words1 = _mm_srai_epi16(
-              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
-              WeightScaleBits);
-            const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-            _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
-    #endif
-        }
-        constexpr IndexType Start = NumChunks * SimdWidth;
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif

-#elif defined(USE_NEON)
-        constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
-        const int8x8_t      Zero      = {0};
-        const auto          in        = reinterpret_cast<const int32x4_t*>(input);
-        const auto          out       = reinterpret_cast<int8x8_t*>(output);
-        for (IndexType i = 0; i < NumChunks; ++i)
-        {
-            int16x8_t  shifted;
-            const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-            pack[0]         = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits);
-            pack[1]         = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits);
-            out[i]          = vmax_s8(vqmovn_s16(shifted), Zero);
-        }
-        constexpr IndexType Start = NumChunks * (SimdWidth / 2);
-#else
-        constexpr IndexType Start = 0;
-#endif
+          (&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(

-        for (IndexType i = Start; i < InputDimensions; ++i)
-        {
-            output[i] = static_cast<OutputType>(std::clamp(input[i] >> WeightScaleBits, 0, 127));
-        }
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 2]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 3])), kWeightScaleBits);
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+        _mm256_storeu_si256
+  #else
+        _mm256_store_si256
+  #endif
+
+          (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
    }
-};

-}  // namespace Stockfish::Eval::NNUE::Layers
+   private:
+    PreviousLayer previous_layer_;
+  };

-#endif  // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+}  // namespace Eval::NNUE::Layers
+
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
--- a/src/nnue/layers/input_slice.h
+++ b/src/nnue/layers/input_slice.h
@@ -0,0 +1,68 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// NNUE evaluation function layer InputSlice definition
+
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+// Input layer
+template <IndexType OutputDimensions, IndexType Offset = 0>
+class InputSlice {
+ public:
+  // Need to maintain alignment
+  static_assert(Offset % kMaxSimdWidth == 0, "");
+
+  // Output type
+  using OutputType = TransformedFeatureType;
+
+  // Output dimensionality
+  static constexpr IndexType kOutputDimensions = OutputDimensions;
+
+  // Size of forward propagation buffer used from the input layer to this layer
+  static constexpr std::size_t kBufferSize = 0;
+
+  // Hash value embedded in the evaluation file
+  static constexpr std::uint32_t GetHashValue() {
+    std::uint32_t hash_value = 0xEC42E90Du;
+    hash_value ^= kOutputDimensions ^ (Offset << 10);
+    return hash_value;
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& /*stream*/) {
+    return true;
+  }
+
+  // Forward propagation
+  const OutputType* Propagate(
+      const TransformedFeatureType* transformed_features,
+      char* /*buffer*/) const {
+    return transformed_features + Offset;
+  }
+
+ private:
+};
+
+}  // namespace Layers
+
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
--- a/src/nnue/layers/sqr_clipped_relu.h
+++ b/src/nnue/layers/sqr_clipped_relu.h
@@ -1,103 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of layer ClippedReLU of NNUE evaluation function
-
-#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
-#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
-
-#include <algorithm>
-#include <cstdint>
-#include <iosfwd>
-
-#include "../nnue_common.h"
-
-namespace Stockfish::Eval::NNUE::Layers {
-
-// Clipped ReLU
-template<IndexType InDims>
-class SqrClippedReLU {
-   public:
-    // Input/output type
-    using InputType  = std::int32_t;
-    using OutputType = std::uint8_t;
-
-    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions  = InDims;
-    static constexpr IndexType OutputDimensions = InputDimensions;
-    static constexpr IndexType PaddedOutputDimensions =
-      ceil_to_multiple<IndexType>(OutputDimensions, 32);
-
-    using OutputBuffer = OutputType[PaddedOutputDimensions];
-
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
-        std::uint32_t hashValue = 0x538D24C7u;
-        hashValue += prevHash;
-        return hashValue;
-    }
-
-    // Read network parameters
-    bool read_parameters(std::istream&) { return true; }
-
-    // Write network parameters
-    bool write_parameters(std::ostream&) const { return true; }
-
-    // Forward propagation
-    void propagate(const InputType* input, OutputType* output) const {
-
-#if defined(USE_SSE2)
-        constexpr IndexType NumChunks = InputDimensions / 16;
-
-        static_assert(WeightScaleBits == 6);
-        const auto in  = reinterpret_cast<const __m128i*>(input);
-        const auto out = reinterpret_cast<__m128i*>(output);
-        for (IndexType i = 0; i < NumChunks; ++i)
-        {
-            __m128i words0 =
-              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1]));
-            __m128i words1 =
-              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3]));
-
-            // We shift by WeightScaleBits * 2 = 12 and divide by 128
-            // which is an additional shift-right of 7, meaning 19 in total.
-            // MulHi strips the lower 16 bits so we need to shift out 3 more to match.
-            words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3);
-            words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3);
-
-            _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
-        }
-        constexpr IndexType Start = NumChunks * 16;
-
-#else
-        constexpr IndexType Start = 0;
-#endif
-
-        for (IndexType i = Start; i < InputDimensions; ++i)
-        {
-            output[i] = static_cast<OutputType>(
-              // Really should be /127 but we need to make it fast so we right-shift
-              // by an extra 7 bits instead. Needs to be accounted for in the trainer.
-              std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7)));
-        }
-    }
-};
-
-}  // namespace Stockfish::Eval::NNUE::Layers
-
-#endif  // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
--- a/src/nnue/network.cpp
+++ b/src/nnue/network.cpp
@@ -1,442 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "network.h"
-
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <optional>
-#include <type_traits>
-#include <vector>
-
-#define INCBIN_SILENCE_BITCODE_WARNING
-#include "../incbin/incbin.h"
-
-#include "../evaluate.h"
-#include "../memory.h"
-#include "../misc.h"
-#include "../position.h"
-#include "../types.h"
-#include "nnue_architecture.h"
-#include "nnue_common.h"
-#include "nnue_misc.h"
-
-// Macro to embed the default efficiently updatable neural network (NNUE) file
-// data in the engine binary (using incbin.h, by Dale Weiler).
-// This macro invocation will declare the following three variables
-//     const unsigned char        gEmbeddedNNUEData[];  // a pointer to the embedded data
-//     const unsigned char *const gEmbeddedNNUEEnd;     // a marker to the end
-//     const unsigned int         gEmbeddedNNUESize;    // the size of the embedded file
-// Note that this does not work in Microsoft Visual Studio.
-#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
-INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig);
-INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall);
-#else
-const unsigned char        gEmbeddedNNUEBigData[1]   = {0x0};
-const unsigned char* const gEmbeddedNNUEBigEnd       = &gEmbeddedNNUEBigData[1];
-const unsigned int         gEmbeddedNNUEBigSize      = 1;
-const unsigned char        gEmbeddedNNUESmallData[1] = {0x0};
-const unsigned char* const gEmbeddedNNUESmallEnd     = &gEmbeddedNNUESmallData[1];
-const unsigned int         gEmbeddedNNUESmallSize    = 1;
-#endif
-
-namespace {
-
-struct EmbeddedNNUE {
-    EmbeddedNNUE(const unsigned char* embeddedData,
-                 const unsigned char* embeddedEnd,
-                 const unsigned int   embeddedSize) :
-        data(embeddedData),
-        end(embeddedEnd),
-        size(embeddedSize) {}
-    const unsigned char* data;
-    const unsigned char* end;
-    const unsigned int   size;
-};
-
-using namespace Stockfish::Eval::NNUE;
-
-EmbeddedNNUE get_embedded(EmbeddedNNUEType type) {
-    if (type == EmbeddedNNUEType::BIG)
-        return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize);
-    else
-        return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize);
-}
-
-}
-
-
-namespace Stockfish::Eval::NNUE {
-
-
-namespace Detail {
-
-// Read evaluation function parameters
-template<typename T>
-bool read_parameters(std::istream& stream, T& reference) {
-
-    std::uint32_t header;
-    header = read_little_endian<std::uint32_t>(stream);
-    if (!stream || header != T::get_hash_value())
-        return false;
-    return reference.read_parameters(stream);
-}
-
-// Write evaluation function parameters
-template<typename T>
-bool write_parameters(std::ostream& stream, T& reference) {
-
-    write_little_endian<std::uint32_t>(stream, T::get_hash_value());
-    return reference.write_parameters(stream);
-}
-
-}  // namespace Detail
-
-template<typename Arch, typename Transformer>
-Network<Arch, Transformer>::Network(const Network<Arch, Transformer>& other) :
-    evalFile(other.evalFile),
-    embeddedType(other.embeddedType) {
-
-    if (other.featureTransformer)
-        featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
-
-    network = make_unique_aligned<Arch[]>(LayerStacks);
-
-    if (!other.network)
-        return;
-
-    for (std::size_t i = 0; i < LayerStacks; ++i)
-        network[i] = other.network[i];
-}
-
-template<typename Arch, typename Transformer>
-Network<Arch, Transformer>&
-Network<Arch, Transformer>::operator=(const Network<Arch, Transformer>& other) {
-    evalFile     = other.evalFile;
-    embeddedType = other.embeddedType;
-
-    if (other.featureTransformer)
-        featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
-
-    network = make_unique_aligned<Arch[]>(LayerStacks);
-
-    if (!other.network)
-        return *this;
-
-    for (std::size_t i = 0; i < LayerStacks; ++i)
-        network[i] = other.network[i];
-
-    return *this;
-}
-
-template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) {
-#if defined(DEFAULT_NNUE_DIRECTORY)
-    std::vector<std::string> dirs = {"<internal>", "", rootDirectory,
-                                     stringify(DEFAULT_NNUE_DIRECTORY)};
-#else
-    std::vector<std::string> dirs = {"<internal>", "", rootDirectory};
-#endif
-
-    if (evalfilePath.empty())
-        evalfilePath = evalFile.defaultName;
-
-    for (const auto& directory : dirs)
-    {
-        if (evalFile.current != evalfilePath)
-        {
-            if (directory != "<internal>")
-            {
-                load_user_net(directory, evalfilePath);
-            }
-
-            if (directory == "<internal>" && evalfilePath == evalFile.defaultName)
-            {
-                load_internal();
-            }
-        }
-    }
-}
-
-
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename) const {
-    std::string actualFilename;
-    std::string msg;
-
-    if (filename.has_value())
-        actualFilename = filename.value();
-    else
-    {
-        if (evalFile.current != evalFile.defaultName)
-        {
-            msg = "Failed to export a net. "
-                  "A non-embedded net can only be saved if the filename is specified";
-
-            sync_cout << msg << sync_endl;
-            return false;
-        }
-
-        actualFilename = evalFile.defaultName;
-    }
-
-    std::ofstream stream(actualFilename, std::ios_base::binary);
-    bool          saved = save(stream, evalFile.current, evalFile.netDescription);
-
-    msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net";
-
-    sync_cout << msg << sync_endl;
-    return saved;
-}
-
-
-template<typename Arch, typename Transformer>
-NetworkOutput
-Network<Arch, Transformer>::evaluate(const Position&                         pos,
-                                     AccumulatorStack&                       accumulatorStack,
-                                     AccumulatorCaches::Cache<FTDimensions>* cache) const {
-
-    constexpr uint64_t alignment = CacheLineSize;
-
-    alignas(alignment)
-      TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
-
-    ASSERT_ALIGNED(transformedFeatures, alignment);
-
-    const int  bucket = (pos.count<ALL_PIECES>() - 1) / 4;
-    const auto psqt =
-      featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
-    const auto positional = network[bucket].propagate(transformedFeatures);
-    return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
-}
-
-
-template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::verify(std::string                                  evalfilePath,
-                                        const std::function<void(std::string_view)>& f) const {
-    if (evalfilePath.empty())
-        evalfilePath = evalFile.defaultName;
-
-    if (evalFile.current != evalfilePath)
-    {
-        if (f)
-        {
-            std::string msg1 =
-              "Network evaluation parameters compatible with the engine must be available.";
-            std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully.";
-            std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
-                               "including the directory name, to the network file.";
-            std::string msg4 = "The default net can be downloaded from: "
-                               "https://tests.stockfishchess.org/api/nn/"
-                             + evalFile.defaultName;
-            std::string msg5 = "The engine will be terminated now.";
-
-            std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3
-                            + '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n';
-
-            f(msg);
-        }
-
-        exit(EXIT_FAILURE);
-    }
-
-    if (f)
-    {
-        size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks;
-        f("NNUE evaluation using " + evalfilePath + " (" + std::to_string(size / (1024 * 1024))
-          + "MiB, (" + std::to_string(featureTransformer->InputDimensions) + ", "
-          + std::to_string(network[0].TransformedFeatureDimensions) + ", "
-          + std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS)
-          + ", 1))");
-    }
-}
-
-
-template<typename Arch, typename Transformer>
-NnueEvalTrace
-Network<Arch, Transformer>::trace_evaluate(const Position&                         pos,
-                                           AccumulatorStack&                       accumulatorStack,
-                                           AccumulatorCaches::Cache<FTDimensions>* cache) const {
-
-    constexpr uint64_t alignment = CacheLineSize;
-
-    alignas(alignment)
-      TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
-
-    ASSERT_ALIGNED(transformedFeatures, alignment);
-
-    NnueEvalTrace t{};
-    t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
-    for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
-    {
-        const auto materialist =
-          featureTransformer->transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
-        const auto positional = network[bucket].propagate(transformedFeatures);
-
-        t.psqt[bucket]       = static_cast<Value>(materialist / OutputScale);
-        t.positional[bucket] = static_cast<Value>(positional / OutputScale);
-    }
-
-    return t;
-}
-
-
-template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::load_user_net(const std::string& dir,
-                                               const std::string& evalfilePath) {
-    std::ifstream stream(dir + evalfilePath, std::ios::binary);
-    auto          description = load(stream);
-
-    if (description.has_value())
-    {
-        evalFile.current        = evalfilePath;
-        evalFile.netDescription = description.value();
-    }
-}
-
-
-template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::load_internal() {
-    // C++ way to prepare a buffer for a memory stream
-    class MemoryBuffer: public std::basic_streambuf<char> {
-       public:
-        MemoryBuffer(char* p, size_t n) {
-            setg(p, p, p + n);
-            setp(p, p + n);
-        }
-    };
-
-    const auto embedded = get_embedded(embeddedType);
-
-    MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(embedded.data)),
-                        size_t(embedded.size));
-
-    std::istream stream(&buffer);
-    auto         description = load(stream);
-
-    if (description.has_value())
-    {
-        evalFile.current        = evalFile.defaultName;
-        evalFile.netDescription = description.value();
-    }
-}
-
-
-template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::initialize() {
-    featureTransformer = make_unique_large_page<Transformer>();
-    network            = make_unique_aligned<Arch[]>(LayerStacks);
-}
-
-
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::save(std::ostream&      stream,
-                                      const std::string& name,
-                                      const std::string& netDescription) const {
-    if (name.empty() || name == "None")
-        return false;
-
-    return write_parameters(stream, netDescription);
-}
-
-
-template<typename Arch, typename Transformer>
-std::optional<std::string> Network<Arch, Transformer>::load(std::istream& stream) {
-    initialize();
-    std::string description;
-
-    return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt;
-}
-
-
-// Read network header
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::read_header(std::istream&  stream,
-                                             std::uint32_t* hashValue,
-                                             std::string*   desc) const {
-    std::uint32_t version, size;
-
-    version    = read_little_endian<std::uint32_t>(stream);
-    *hashValue = read_little_endian<std::uint32_t>(stream);
-    size       = read_little_endian<std::uint32_t>(stream);
-    if (!stream || version != Version)
-        return false;
-    desc->resize(size);
-    stream.read(&(*desc)[0], size);
-    return !stream.fail();
-}
-
-
-// Write network header
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::write_header(std::ostream&      stream,
-                                              std::uint32_t      hashValue,
-                                              const std::string& desc) const {
-    write_little_endian<std::uint32_t>(stream, Version);
-    write_little_endian<std::uint32_t>(stream, hashValue);
-    write_little_endian<std::uint32_t>(stream, std::uint32_t(desc.size()));
-    stream.write(&desc[0], desc.size());
-    return !stream.fail();
-}
-
-
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::read_parameters(std::istream& stream,
-                                                 std::string&  netDescription) const {
-    std::uint32_t hashValue;
-    if (!read_header(stream, &hashValue, &netDescription))
-        return false;
-    if (hashValue != Network::hash)
-        return false;
-    if (!Detail::read_parameters(stream, *featureTransformer))
-        return false;
-    for (std::size_t i = 0; i < LayerStacks; ++i)
-    {
-        if (!Detail::read_parameters(stream, network[i]))
-            return false;
-    }
-    return stream && stream.peek() == std::ios::traits_type::eof();
-}
-
-
-template<typename Arch, typename Transformer>
-bool Network<Arch, Transformer>::write_parameters(std::ostream&      stream,
-                                                  const std::string& netDescription) const {
-    if (!write_header(stream, Network::hash, netDescription))
-        return false;
-    if (!Detail::write_parameters(stream, *featureTransformer))
-        return false;
-    for (std::size_t i = 0; i < LayerStacks; ++i)
-    {
-        if (!Detail::write_parameters(stream, network[i]))
-            return false;
-    }
-    return bool(stream);
-}
-
-// Explicit template instantiations
-
-template class Network<NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
-                       FeatureTransformer<TransformedFeatureDimensionsBig>>;
-
-template class Network<NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
-                       FeatureTransformer<TransformedFeatureDimensionsSmall>>;
-
-}  // namespace Stockfish::Eval::NNUE
--- a/src/nnue/network.h
+++ b/src/nnue/network.h
@@ -1,137 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef NETWORK_H_INCLUDED
-#define NETWORK_H_INCLUDED
-
-#include <cstdint>
-#include <functional>
-#include <iostream>
-#include <optional>
-#include <string>
-#include <string_view>
-#include <tuple>
-#include <utility>
-
-#include "../memory.h"
-#include "../types.h"
-#include "nnue_accumulator.h"
-#include "nnue_architecture.h"
-#include "nnue_common.h"
-#include "nnue_feature_transformer.h"
-#include "nnue_misc.h"
-
-namespace Stockfish {
-class Position;
-}
-
-namespace Stockfish::Eval::NNUE {
-
-enum class EmbeddedNNUEType {
-    BIG,
-    SMALL,
-};
-
-using NetworkOutput = std::tuple<Value, Value>;
-
-template<typename Arch, typename Transformer>
-class Network {
-    static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
-
-   public:
-    Network(EvalFile file, EmbeddedNNUEType type) :
-        evalFile(file),
-        embeddedType(type) {}
-
-    Network(const Network& other);
-    Network(Network&& other) = default;
-
-    Network& operator=(const Network& other);
-    Network& operator=(Network&& other) = default;
-
-    void load(const std::string& rootDirectory, std::string evalfilePath);
-    bool save(const std::optional<std::string>& filename) const;
-
-    NetworkOutput evaluate(const Position&                         pos,
-                           AccumulatorStack&                       accumulatorStack,
-                           AccumulatorCaches::Cache<FTDimensions>* cache) const;
-
-
-    void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
-    NnueEvalTrace trace_evaluate(const Position&                         pos,
-                                 AccumulatorStack&                       accumulatorStack,
-                                 AccumulatorCaches::Cache<FTDimensions>* cache) const;
-
-   private:
-    void load_user_net(const std::string&, const std::string&);
-    void load_internal();
-
-    void initialize();
-
-    bool                       save(std::ostream&, const std::string&, const std::string&) const;
-    std::optional<std::string> load(std::istream&);
-
-    bool read_header(std::istream&, std::uint32_t*, std::string*) const;
-    bool write_header(std::ostream&, std::uint32_t, const std::string&) const;
-
-    bool read_parameters(std::istream&, std::string&) const;
-    bool write_parameters(std::ostream&, const std::string&) const;
-
-    // Input feature converter
-    LargePagePtr<Transformer> featureTransformer;
-
-    // Evaluation function
-    AlignedPtr<Arch[]> network;
-
-    EvalFile         evalFile;
-    EmbeddedNNUEType embeddedType;
-
-    // Hash value of evaluation function structure
-    static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
-
-    template<IndexType Size>
-    friend struct AccumulatorCaches::Cache;
-
-    friend class AccumulatorStack;
-};
-
-// Definitions of the network types
-using SmallFeatureTransformer = FeatureTransformer<TransformedFeatureDimensionsSmall>;
-using SmallNetworkArchitecture =
-  NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
-
-using BigFeatureTransformer  = FeatureTransformer<TransformedFeatureDimensionsBig>;
-using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
-
-using NetworkBig   = Network<BigNetworkArchitecture, BigFeatureTransformer>;
-using NetworkSmall = Network<SmallNetworkArchitecture, SmallFeatureTransformer>;
-
-
-struct Networks {
-    Networks(NetworkBig&& nB, NetworkSmall&& nS) :
-        big(std::move(nB)),
-        small(std::move(nS)) {}
-
-    NetworkBig   big;
-    NetworkSmall small;
-};
-
-
-}  // namespace Stockfish
-
-#endif
--- a/src/nnue/nnue_accumulator.cpp
+++ b/src/nnue/nnue_accumulator.cpp
@@ -1,531 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "nnue_accumulator.h"
-
-#include <cassert>
-#include <cstdint>
-#include <initializer_list>
-#include <type_traits>
-
-#include "../bitboard.h"
-#include "../misc.h"
-#include "../position.h"
-#include "../types.h"
-#include "nnue_architecture.h"
-#include "nnue_feature_transformer.h"  // IWYU pragma: keep
-#include "simd.h"
-
-namespace Stockfish::Eval::NNUE {
-
-using namespace SIMD;
-
-namespace {
-
-template<Color Perspective, IndexType TransformedFeatureDimensions>
-void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
-                       const Square                                            ksq,
-                       AccumulatorState&                                       middle_state,
-                       AccumulatorState&                                       target_state,
-                       const AccumulatorState&                                 computed);
-
-template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
-void update_accumulator_incremental(
-  const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
-  const Square                                            ksq,
-  AccumulatorState&                                       target_state,
-  const AccumulatorState&                                 computed);
-
-template<Color Perspective, IndexType Dimensions>
-void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
-                                      const Position&                       pos,
-                                      AccumulatorState&                     accumulatorState,
-                                      AccumulatorCaches::Cache<Dimensions>& cache);
-
-}
-
-void AccumulatorState::reset(const DirtyPiece& dp) noexcept {
-    dirtyPiece = dp;
-    accumulatorBig.computed.fill(false);
-    accumulatorSmall.computed.fill(false);
-}
-
-const AccumulatorState& AccumulatorStack::latest() const noexcept { return accumulators[size - 1]; }
-
-AccumulatorState& AccumulatorStack::mut_latest() noexcept { return accumulators[size - 1]; }
-
-void AccumulatorStack::reset() noexcept {
-    accumulators[0].reset({});
-    size = 1;
-}
-
-void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {
-    assert(size + 1 < accumulators.size());
-    accumulators[size].reset(dirtyPiece);
-    size++;
-}
-
-void AccumulatorStack::pop() noexcept {
-    assert(size > 1);
-    size--;
-}
-
-template<IndexType Dimensions>
-void AccumulatorStack::evaluate(const Position&                       pos,
-                                const FeatureTransformer<Dimensions>& featureTransformer,
-                                AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
-
-    evaluate_side<WHITE>(pos, featureTransformer, cache);
-    evaluate_side<BLACK>(pos, featureTransformer, cache);
-}
-
-template<Color Perspective, IndexType Dimensions>
-void AccumulatorStack::evaluate_side(const Position&                       pos,
-                                     const FeatureTransformer<Dimensions>& featureTransformer,
-                                     AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
-
-    const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions>();
-
-    if ((accumulators[last_usable_accum].template acc<Dimensions>()).computed[Perspective])
-        forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
-
-    else
-    {
-        update_accumulator_refresh_cache<Perspective>(featureTransformer, pos, mut_latest(), cache);
-        backward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
-    }
-}
-
-// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
-// state just before a change that requires full refresh.
-template<Color Perspective, IndexType Dimensions>
-std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {
-
-    for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)
-    {
-        if ((accumulators[curr_idx].template acc<Dimensions>()).computed[Perspective])
-            return curr_idx;
-
-        if (FeatureSet::requires_refresh(accumulators[curr_idx].dirtyPiece, Perspective))
-            return curr_idx;
-    }
-
-    return 0;
-}
-
-template<Color Perspective, IndexType Dimensions>
-void AccumulatorStack::forward_update_incremental(
-  const Position&                       pos,
-  const FeatureTransformer<Dimensions>& featureTransformer,
-  const std::size_t                     begin) noexcept {
-
-    assert(begin < accumulators.size());
-    assert((accumulators[begin].acc<Dimensions>()).computed[Perspective]);
-
-    const Square ksq = pos.square<KING>(Perspective);
-
-    for (std::size_t next = begin + 1; next < size; next++)
-    {
-        if (next + 1 < size)
-        {
-            DirtyPiece& dp1 = accumulators[next].dirtyPiece;
-            DirtyPiece& dp2 = accumulators[next + 1].dirtyPiece;
-
-            if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)
-            {
-                const Square captureSq = dp1.to;
-                dp1.to = dp2.remove_sq = SQ_NONE;
-                double_inc_update<Perspective>(featureTransformer, ksq, accumulators[next],
-                                               accumulators[next + 1], accumulators[next - 1]);
-                dp1.to = dp2.remove_sq = captureSq;
-
-                next++;
-                continue;
-            }
-        }
-        update_accumulator_incremental<Perspective, true>(
-          featureTransformer, ksq, accumulators[next], accumulators[next - 1]);
-    }
-
-    assert((latest().acc<Dimensions>()).computed[Perspective]);
-}
-
-template<Color Perspective, IndexType Dimensions>
-void AccumulatorStack::backward_update_incremental(
-  const Position&                       pos,
-  const FeatureTransformer<Dimensions>& featureTransformer,
-  const std::size_t                     end) noexcept {
-
-    assert(end < accumulators.size());
-    assert(end < size);
-    assert((latest().acc<Dimensions>()).computed[Perspective]);
-
-    const Square ksq = pos.square<KING>(Perspective);
-
-    for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)
-        update_accumulator_incremental<Perspective, false>(
-          featureTransformer, ksq, accumulators[next], accumulators[next + 1]);
-
-    assert((accumulators[end].acc<Dimensions>()).computed[Perspective]);
-}
-
-// Explicit template instantiations
-template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(
-  const Position&                                            pos,
-  const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,
-  AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
-template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(
-  const Position&                                              pos,
-  const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,
-  AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
-
-
-namespace {
-
-template<typename VectorWrapper,
-         IndexType Width,
-         UpdateOperation... ops,
-         typename ElementType,
-         typename... Ts,
-         std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>
-void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {
-    constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);
-
-    auto* vecIn  = reinterpret_cast<const typename VectorWrapper::type*>(in);
-    auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);
-
-    for (IndexType i = 0; i < size; ++i)
-        vecOut[i] = fused<VectorWrapper, ops...>(
-          vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);
-}
-
-template<Color Perspective, IndexType Dimensions>
-struct AccumulatorUpdateContext {
-    const FeatureTransformer<Dimensions>& featureTransformer;
-    const AccumulatorState&               from;
-    AccumulatorState&                     to;
-
-    AccumulatorUpdateContext(const FeatureTransformer<Dimensions>& ft,
-                             const AccumulatorState&               accF,
-                             AccumulatorState&                     accT) noexcept :
-        featureTransformer{ft},
-        from{accF},
-        to{accT} {}
-
-    template<UpdateOperation... ops,
-             typename... Ts,
-             std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>
-    void apply(const Ts... indices) {
-        auto to_weight_vector = [&](const IndexType index) {
-            return &featureTransformer.weights[index * Dimensions];
-        };
-
-        auto to_psqt_weight_vector = [&](const IndexType index) {
-            return &featureTransformer.psqtWeights[index * PSQTBuckets];
-        };
-
-        fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(
-          (from.acc<Dimensions>()).accumulation[Perspective],
-          (to.acc<Dimensions>()).accumulation[Perspective], to_weight_vector(indices)...);
-
-        fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(
-          (from.acc<Dimensions>()).psqtAccumulation[Perspective],
-          (to.acc<Dimensions>()).psqtAccumulation[Perspective], to_psqt_weight_vector(indices)...);
-    }
-};
-
-template<Color Perspective, IndexType Dimensions>
-auto make_accumulator_update_context(const FeatureTransformer<Dimensions>& featureTransformer,
-                                     const AccumulatorState&               accumulatorFrom,
-                                     AccumulatorState&                     accumulatorTo) noexcept {
-    return AccumulatorUpdateContext<Perspective, Dimensions>{featureTransformer, accumulatorFrom,
-                                                             accumulatorTo};
-}
-
-template<Color Perspective, IndexType TransformedFeatureDimensions>
-void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
-                       const Square                                            ksq,
-                       AccumulatorState&                                       middle_state,
-                       AccumulatorState&                                       target_state,
-                       const AccumulatorState&                                 computed) {
-
-    assert(computed.acc<TransformedFeatureDimensions>().computed[Perspective]);
-    assert(!middle_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
-    assert(!target_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
-
-    FeatureSet::IndexList removed, added;
-    FeatureSet::append_changed_indices<Perspective>(ksq, middle_state.dirtyPiece, removed, added);
-    // you can't capture a piece that was just involved in castling since the rook ends up
-    // in a square that the king passed
-    assert(added.size() < 2);
-    FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed, added);
-
-    assert(added.size() == 1);
-    assert(removed.size() == 2 || removed.size() == 3);
-
-    // Workaround compiler warning for uninitialized variables, replicated on
-    // profile builds on windows with gcc 14.2.0.
-    // TODO remove once unneeded
-    sf_assume(added.size() == 1);
-    sf_assume(removed.size() == 2 || removed.size() == 3);
-
-    auto updateContext =
-      make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
-
-    if (removed.size() == 2)
-    {
-        updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
-    }
-    else
-    {
-        updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],
-                                                         removed[2]);
-    }
-
-    target_state.acc<TransformedFeatureDimensions>().computed[Perspective] = true;
-}
-
-template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
-void update_accumulator_incremental(
-  const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
-  const Square                                            ksq,
-  AccumulatorState&                                       target_state,
-  const AccumulatorState&                                 computed) {
-
-    assert((computed.acc<TransformedFeatureDimensions>()).computed[Perspective]);
-    assert(!(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective]);
-
-    // The size must be enough to contain the largest possible update.
-    // That might depend on the feature set and generally relies on the
-    // feature set's update cost calculation to be correct and never allow
-    // updates with more added/removed features than MaxActiveDimensions.
-    // In this case, the maximum size of both feature addition and removal
-    // is 2, since we are incrementally updating one move at a time.
-    FeatureSet::IndexList removed, added;
-    if constexpr (Forward)
-        FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed,
-                                                        added);
-    else
-        FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);
-
-    assert(added.size() == 1 || added.size() == 2);
-    assert(removed.size() == 1 || removed.size() == 2);
-    assert((Forward && added.size() <= removed.size())
-           || (!Forward && added.size() >= removed.size()));
-
-    // Workaround compiler warning for uninitialized variables, replicated on
-    // profile builds on windows with gcc 14.2.0.
-    // TODO remove once unneeded
-    sf_assume(added.size() == 1 || added.size() == 2);
-    sf_assume(removed.size() == 1 || removed.size() == 2);
-
-    auto updateContext =
-      make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
-
-    if ((Forward && removed.size() == 1) || (!Forward && added.size() == 1))
-    {
-        assert(added.size() == 1 && removed.size() == 1);
-        updateContext.template apply<Add, Sub>(added[0], removed[0]);
-    }
-    else if (Forward && added.size() == 1)
-    {
-        assert(removed.size() == 2);
-        updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
-    }
-    else if (!Forward && removed.size() == 1)
-    {
-        assert(added.size() == 2);
-        updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);
-    }
-    else
-    {
-        assert(added.size() == 2 && removed.size() == 2);
-        updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],
-                                                         removed[1]);
-    }
-
-    (target_state.acc<TransformedFeatureDimensions>()).computed[Perspective] = true;
-}
-
-template<Color Perspective, IndexType Dimensions>
-void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
-                                      const Position&                       pos,
-                                      AccumulatorState&                     accumulatorState,
-                                      AccumulatorCaches::Cache<Dimensions>& cache) {
-
-    using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
-
-    const Square          ksq   = pos.square<KING>(Perspective);
-    auto&                 entry = cache[ksq][Perspective];
-    FeatureSet::IndexList removed, added;
-
-    for (Color c : {WHITE, BLACK})
-    {
-        for (PieceType pt = PAWN; pt <= KING; ++pt)
-        {
-            const Piece    piece    = make_piece(c, pt);
-            const Bitboard oldBB    = entry.byColorBB[c] & entry.byTypeBB[pt];
-            const Bitboard newBB    = pos.pieces(c, pt);
-            Bitboard       toRemove = oldBB & ~newBB;
-            Bitboard       toAdd    = newBB & ~oldBB;
-
-            while (toRemove)
-            {
-                Square sq = pop_lsb(toRemove);
-                removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
-            }
-            while (toAdd)
-            {
-                Square sq = pop_lsb(toAdd);
-                added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
-            }
-        }
-    }
-
-    auto& accumulator                 = accumulatorState.acc<Dimensions>();
-    accumulator.computed[Perspective] = true;
-
-#ifdef VECTOR
-    vec_t      acc[Tiling::NumRegs];
-    psqt_vec_t psqt[Tiling::NumPsqtRegs];
-
-    for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
-    {
-        auto* accTile =
-          reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
-        auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
-
-        for (IndexType k = 0; k < Tiling::NumRegs; ++k)
-            acc[k] = entryTile[k];
-
-        IndexType i = 0;
-        for (; i < std::min(removed.size(), added.size()); ++i)
-        {
-            IndexType       indexR  = removed[i];
-            const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
-            auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
-            IndexType       indexA  = added[i];
-            const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
-            auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
-
-            for (IndexType k = 0; k < Tiling::NumRegs; ++k)
-                acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
-        }
-        for (; i < removed.size(); ++i)
-        {
-            IndexType       index  = removed[i];
-            const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
-            auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
-
-            for (IndexType k = 0; k < Tiling::NumRegs; ++k)
-                acc[k] = vec_sub_16(acc[k], column[k]);
-        }
-        for (; i < added.size(); ++i)
-        {
-            IndexType       index  = added[i];
-            const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
-            auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
-
-            for (IndexType k = 0; k < Tiling::NumRegs; ++k)
-                acc[k] = vec_add_16(acc[k], column[k]);
-        }
-
-        for (IndexType k = 0; k < Tiling::NumRegs; k++)
-            vec_store(&entryTile[k], acc[k]);
-        for (IndexType k = 0; k < Tiling::NumRegs; k++)
-            vec_store(&accTile[k], acc[k]);
-    }
-
-    for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
-    {
-        auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
-          &accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
-        auto* entryTilePsqt =
-          reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
-
-        for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
-            psqt[k] = entryTilePsqt[k];
-
-        for (IndexType i = 0; i < removed.size(); ++i)
-        {
-            IndexType       index  = removed[i];
-            const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
-            auto*           columnPsqt =
-              reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
-
-            for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
-                psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
-        }
-        for (IndexType i = 0; i < added.size(); ++i)
-        {
-            IndexType       index  = added[i];
-            const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
-            auto*           columnPsqt =
-              reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
-
-            for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
-                psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
-        }
-
-        for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
-            vec_store_psqt(&entryTilePsqt[k], psqt[k]);
-        for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
-            vec_store_psqt(&accTilePsqt[k], psqt[k]);
-    }
-
-#else
-
-    for (const auto index : removed)
-    {
-        const IndexType offset = Dimensions * index;
-        for (IndexType j = 0; j < Dimensions; ++j)
-            entry.accumulation[j] -= featureTransformer.weights[offset + j];
-
-        for (std::size_t k = 0; k < PSQTBuckets; ++k)
-            entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
-    }
-    for (const auto index : added)
-    {
-        const IndexType offset = Dimensions * index;
-        for (IndexType j = 0; j < Dimensions; ++j)
-            entry.accumulation[j] += featureTransformer.weights[offset + j];
-
-        for (std::size_t k = 0; k < PSQTBuckets; ++k)
-            entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
-    }
-
-    // The accumulator of the refresh entry has been updated.
-    // Now copy its content to the actual accumulator we were refreshing.
-
-    std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
-                sizeof(BiasType) * Dimensions);
-
-    std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
-                sizeof(int32_t) * PSQTBuckets);
-#endif
-
-    for (Color c : {WHITE, BLACK})
-        entry.byColorBB[c] = pos.pieces(c);
-
-    for (PieceType pt = PAWN; pt <= KING; ++pt)
-        entry.byTypeBB[pt] = pos.pieces(pt);
-}
-
-}
-
-}
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,167 +21,19 @@
 #ifndef NNUE_ACCUMULATOR_H_INCLUDED
 #define NNUE_ACCUMULATOR_H_INCLUDED

-#include <array>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <vector>
-
-#include "../types.h"
 #include "nnue_architecture.h"
-#include "nnue_common.h"

-namespace Stockfish {
-class Position;
-}
+namespace Eval::NNUE {

-namespace Stockfish::Eval::NNUE {
+  // Class that holds the result of affine transformation of input features
+  struct alignas(32) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score;
+    bool computed_accumulation;
+    bool computed_score;
+  };

-template<IndexType Size>
-struct alignas(CacheLineSize) Accumulator;
+}  // namespace Eval::NNUE

-template<IndexType TransformedFeatureDimensions>
-class FeatureTransformer;
-
-// Class that holds the result of affine transformation of input features
-template<IndexType Size>
-struct alignas(CacheLineSize) Accumulator {
-    std::int16_t               accumulation[COLOR_NB][Size];
-    std::int32_t               psqtAccumulation[COLOR_NB][PSQTBuckets];
-    std::array<bool, COLOR_NB> computed;
-};
-
-
-// AccumulatorCaches struct provides per-thread accumulator caches, where each
-// cache contains multiple entries for each of the possible king squares.
-// When the accumulator needs to be refreshed, the cached entry is used to more
-// efficiently update the accumulator, instead of rebuilding it from scratch.
-// This idea, was first described by Luecx (author of Koivisto) and
-// is commonly referred to as "Finny Tables".
-struct AccumulatorCaches {
-
-    template<typename Networks>
-    AccumulatorCaches(const Networks& networks) {
-        clear(networks);
-    }
-
-    template<IndexType Size>
-    struct alignas(CacheLineSize) Cache {
-
-        struct alignas(CacheLineSize) Entry {
-            BiasType       accumulation[Size];
-            PSQTWeightType psqtAccumulation[PSQTBuckets];
-            Bitboard       byColorBB[COLOR_NB];
-            Bitboard       byTypeBB[PIECE_TYPE_NB];
-
-            // To initialize a refresh entry, we set all its bitboards empty,
-            // so we put the biases in the accumulation, without any weights on top
-            void clear(const BiasType* biases) {
-
-                std::memcpy(accumulation, biases, sizeof(accumulation));
-                std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0,
-                            sizeof(Entry) - offsetof(Entry, psqtAccumulation));
-            }
-        };
-
-        template<typename Network>
-        void clear(const Network& network) {
-            for (auto& entries1D : entries)
-                for (auto& entry : entries1D)
-                    entry.clear(network.featureTransformer->biases);
-        }
-
-        std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
-
-        std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;
-    };
-
-    template<typename Networks>
-    void clear(const Networks& networks) {
-        big.clear(networks.big);
-        small.clear(networks.small);
-    }
-
-    Cache<TransformedFeatureDimensionsBig>   big;
-    Cache<TransformedFeatureDimensionsSmall> small;
-};
-
-
-struct AccumulatorState {
-    Accumulator<TransformedFeatureDimensionsBig>   accumulatorBig;
-    Accumulator<TransformedFeatureDimensionsSmall> accumulatorSmall;
-    DirtyPiece                                     dirtyPiece;
-
-    template<IndexType Size>
-    auto& acc() noexcept {
-        static_assert(Size == TransformedFeatureDimensionsBig
-                        || Size == TransformedFeatureDimensionsSmall,
-                      "Invalid size for accumulator");
-
-        if constexpr (Size == TransformedFeatureDimensionsBig)
-            return accumulatorBig;
-        else if constexpr (Size == TransformedFeatureDimensionsSmall)
-            return accumulatorSmall;
-    }
-
-    template<IndexType Size>
-    const auto& acc() const noexcept {
-        static_assert(Size == TransformedFeatureDimensionsBig
-                        || Size == TransformedFeatureDimensionsSmall,
-                      "Invalid size for accumulator");
-
-        if constexpr (Size == TransformedFeatureDimensionsBig)
-            return accumulatorBig;
-        else if constexpr (Size == TransformedFeatureDimensionsSmall)
-            return accumulatorSmall;
-    }
-
-    void reset(const DirtyPiece& dp) noexcept;
-};
-
-
-class AccumulatorStack {
-   public:
-    AccumulatorStack() :
-        accumulators(MAX_PLY + 1),
-        size{1} {}
-
-    [[nodiscard]] const AccumulatorState& latest() const noexcept;
-
-    void reset() noexcept;
-    void push(const DirtyPiece& dirtyPiece) noexcept;
-    void pop() noexcept;
-
-    template<IndexType Dimensions>
-    void evaluate(const Position&                       pos,
-                  const FeatureTransformer<Dimensions>& featureTransformer,
-                  AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
-
-   private:
-    [[nodiscard]] AccumulatorState& mut_latest() noexcept;
-
-    template<Color Perspective, IndexType Dimensions>
-    void evaluate_side(const Position&                       pos,
-                       const FeatureTransformer<Dimensions>& featureTransformer,
-                       AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
-
-    template<Color Perspective, IndexType Dimensions>
-    [[nodiscard]] std::size_t find_last_usable_accumulator() const noexcept;
-
-    template<Color Perspective, IndexType Dimensions>
-    void forward_update_incremental(const Position&                       pos,
-                                    const FeatureTransformer<Dimensions>& featureTransformer,
-                                    const std::size_t                     begin) noexcept;
-
-    template<Color Perspective, IndexType Dimensions>
-    void backward_update_incremental(const Position&                       pos,
-                                     const FeatureTransformer<Dimensions>& featureTransformer,
-                                     const std::size_t                     end) noexcept;
-
-    std::vector<AccumulatorState> accumulators;
-    std::size_t                   size;
-};
-
-}  // namespace Stockfish::Eval::NNUE
-
-#endif  // NNUE_ACCUMULATOR_H_INCLUDED
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,123 +21,18 @@
 #ifndef NNUE_ARCHITECTURE_H_INCLUDED
 #define NNUE_ARCHITECTURE_H_INCLUDED

-#include <cstdint>
-#include <cstring>
-#include <iosfwd>
+// Defines the network structure
+#include "architectures/halfkp_256x2-32-32.h"

-#include "features/half_ka_v2_hm.h"
-#include "layers/affine_transform.h"
-#include "layers/affine_transform_sparse_input.h"
-#include "layers/clipped_relu.h"
-#include "layers/sqr_clipped_relu.h"
-#include "nnue_common.h"
+namespace Eval::NNUE {

-namespace Stockfish::Eval::NNUE {
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");

-// Input features used in evaluation function
-using FeatureSet = Features::HalfKAv2_hm;
+  // Trigger for full calculation instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;

-// Number of input feature dimensions after conversion
-constexpr IndexType TransformedFeatureDimensionsBig = 3072;
-constexpr int       L2Big                           = 15;
-constexpr int       L3Big                           = 32;
+}  // namespace Eval::NNUE

-constexpr IndexType TransformedFeatureDimensionsSmall = 128;
-constexpr int       L2Small                           = 15;
-constexpr int       L3Small                           = 32;
-
-constexpr IndexType PSQTBuckets = 8;
-constexpr IndexType LayerStacks = 8;
-
-// If vector instructions are enabled, we update and refresh the
-// accumulator tile by tile such that each tile fits in the CPU's
-// vector registers.
-static_assert(PSQTBuckets % 8 == 0,
-              "Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
-
-template<IndexType L1, int L2, int L3>
-struct NetworkArchitecture {
-    static constexpr IndexType TransformedFeatureDimensions = L1;
-    static constexpr int       FC_0_OUTPUTS                 = L2;
-    static constexpr int       FC_1_OUTPUTS                 = L3;
-
-    Layers::AffineTransformSparseInput<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
-    Layers::SqrClippedReLU<FC_0_OUTPUTS + 1>                                           ac_sqr_0;
-    Layers::ClippedReLU<FC_0_OUTPUTS + 1>                                              ac_0;
-    Layers::AffineTransform<FC_0_OUTPUTS * 2, FC_1_OUTPUTS>                            fc_1;
-    Layers::ClippedReLU<FC_1_OUTPUTS>                                                  ac_1;
-    Layers::AffineTransform<FC_1_OUTPUTS, 1>                                           fc_2;
-
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value() {
-        // input slice hash
-        std::uint32_t hashValue = 0xEC42E90Du;
-        hashValue ^= TransformedFeatureDimensions * 2;
-
-        hashValue = decltype(fc_0)::get_hash_value(hashValue);
-        hashValue = decltype(ac_0)::get_hash_value(hashValue);
-        hashValue = decltype(fc_1)::get_hash_value(hashValue);
-        hashValue = decltype(ac_1)::get_hash_value(hashValue);
-        hashValue = decltype(fc_2)::get_hash_value(hashValue);
-
-        return hashValue;
-    }
-
-    // Read network parameters
-    bool read_parameters(std::istream& stream) {
-        return fc_0.read_parameters(stream) && ac_0.read_parameters(stream)
-            && fc_1.read_parameters(stream) && ac_1.read_parameters(stream)
-            && fc_2.read_parameters(stream);
-    }
-
-    // Write network parameters
-    bool write_parameters(std::ostream& stream) const {
-        return fc_0.write_parameters(stream) && ac_0.write_parameters(stream)
-            && fc_1.write_parameters(stream) && ac_1.write_parameters(stream)
-            && fc_2.write_parameters(stream);
-    }
-
-    std::int32_t propagate(const TransformedFeatureType* transformedFeatures) {
-        struct alignas(CacheLineSize) Buffer {
-            alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out;
-            alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType
-              ac_sqr_0_out[ceil_to_multiple<IndexType>(FC_0_OUTPUTS * 2, 32)];
-            alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out;
-            alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out;
-            alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out;
-            alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out;
-
-            Buffer() { std::memset(this, 0, sizeof(*this)); }
-        };
-
-#if defined(__clang__) && (__APPLE__)
-        // workaround for a bug reported with xcode 12
-        static thread_local auto tlsBuffer = std::make_unique<Buffer>();
-        // Access TLS only once, cache result.
-        Buffer& buffer = *tlsBuffer;
-#else
-        alignas(CacheLineSize) static thread_local Buffer buffer;
-#endif
-
-        fc_0.propagate(transformedFeatures, buffer.fc_0_out);
-        ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out);
-        ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
-        std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out,
-                    FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType));
-        fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out);
-        ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
-        fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
-
-        // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in
-        // quantized form, but we want 1.0 to be equal to 600*OutputScale
-        std::int32_t fwdOut =
-          (buffer.fc_0_out[FC_0_OUTPUTS]) * (600 * OutputScale) / (127 * (1 << WeightScaleBits));
-        std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
-
-        return outputValue;
-    }
-};
-
-}  // namespace Stockfish::Eval::NNUE
-
-#endif  // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,268 +21,57 @@
 #ifndef NNUE_COMMON_H_INCLUDED
 #define NNUE_COMMON_H_INCLUDED

-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <iostream>
-#include <type_traits>
-
-#include "../misc.h"
-
 #if defined(USE_AVX2)
-    #include <immintrin.h>
+#include <immintrin.h>

 #elif defined(USE_SSE41)
-    #include <smmintrin.h>
+#include <smmintrin.h>

 #elif defined(USE_SSSE3)
-    #include <tmmintrin.h>
+#include <tmmintrin.h>

 #elif defined(USE_SSE2)
-    #include <emmintrin.h>
+#include <emmintrin.h>

 #elif defined(USE_NEON)
-    #include <arm_neon.h>
+#include <arm_neon.h>
 #endif

-namespace Stockfish::Eval::NNUE {
+namespace Eval::NNUE {

-using BiasType       = std::int16_t;
-using WeightType     = std::int16_t;
-using PSQTWeightType = std::int32_t;
-using IndexType      = std::uint32_t;
+  // Version of the evaluation file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;

-// Version of the evaluation file
-constexpr std::uint32_t Version = 0x7AF32F20u;
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;

-// Constant used in evaluation value calculation
-constexpr int OutputScale     = 16;
-constexpr int WeightScaleBits = 6;
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;

-// Size of cache line (in bytes)
-constexpr std::size_t CacheLineSize = 64;
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;

-constexpr const char        Leb128MagicString[]   = "COMPRESSED_LEB128";
-constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1;
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;

-// SIMD width (in bytes)
-#if defined(USE_AVX2)
-constexpr std::size_t SimdWidth = 32;
+  #elif defined(USE_NEON)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif

-#elif defined(USE_SSE2)
-constexpr std::size_t SimdWidth = 16;
+  constexpr std::size_t kMaxSimdWidth = 32;

-#elif defined(USE_NEON)
-constexpr std::size_t SimdWidth = 16;
-#endif
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+  using IndexType = std::uint32_t;

-constexpr std::size_t MaxSimdWidth = 32;
-
-// Type of input feature after conversion
-using TransformedFeatureType = std::uint8_t;
-
-// Round n up to be a multiple of base
-template<typename IntType>
-constexpr IntType ceil_to_multiple(IntType n, IntType base) {
+  // Round n up to be a multiple of base
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
    return (n + base - 1) / base * base;
-}
+  }

+}  // namespace Eval::NNUE

-// Utility to read an integer (signed or unsigned, any size)
-// from a stream in little-endian order. We swap the byte order after the read if
-// necessary to return a result with the byte ordering of the compiling machine.
-template<typename IntType>
-inline IntType read_little_endian(std::istream& stream) {
-    IntType result;
-
-    if (IsLittleEndian)
-        stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
-    else
-    {
-        std::uint8_t                  u[sizeof(IntType)];
-        std::make_unsigned_t<IntType> v = 0;
-
-        stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
-        for (std::size_t i = 0; i < sizeof(IntType); ++i)
-            v = (v << 8) | u[sizeof(IntType) - i - 1];
-
-        std::memcpy(&result, &v, sizeof(IntType));
-    }
-
-    return result;
-}
-
-
-// Utility to write an integer (signed or unsigned, any size)
-// to a stream in little-endian order. We swap the byte order before the write if
-// necessary to always write in little-endian order, independently of the byte
-// ordering of the compiling machine.
-template<typename IntType>
-inline void write_little_endian(std::ostream& stream, IntType value) {
-
-    if (IsLittleEndian)
-        stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
-    else
-    {
-        std::uint8_t                  u[sizeof(IntType)];
-        std::make_unsigned_t<IntType> v = value;
-
-        std::size_t i = 0;
-        // if constexpr to silence the warning about shift by 8
-        if constexpr (sizeof(IntType) > 1)
-        {
-            for (; i + 1 < sizeof(IntType); ++i)
-            {
-                u[i] = std::uint8_t(v);
-                v >>= 8;
-            }
-        }
-        u[i] = std::uint8_t(v);
-
-        stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
-    }
-}
-
-
-// Read integers in bulk from a little-endian stream.
-// This reads N integers from stream s and puts them in array out.
-template<typename IntType>
-inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
-    if (IsLittleEndian)
-        stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
-    else
-        for (std::size_t i = 0; i < count; ++i)
-            out[i] = read_little_endian<IntType>(stream);
-}
-
-
-// Write integers in bulk to a little-endian stream.
-// This takes N integers from array values and writes them on stream s.
-template<typename IntType>
-inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
-    if (IsLittleEndian)
-        stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
-    else
-        for (std::size_t i = 0; i < count; ++i)
-            write_little_endian<IntType>(stream, values[i]);
-}
-
-
-// Read N signed integers from the stream s, putting them in the array out.
-// The stream is assumed to be compressed using the signed LEB128 format.
-// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
-template<typename IntType>
-inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) {
-
-    // Check the presence of our LEB128 magic string
-    char leb128MagicString[Leb128MagicStringSize];
-    stream.read(leb128MagicString, Leb128MagicStringSize);
-    assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0);
-
-    static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
-
-    const std::uint32_t BUF_SIZE = 4096;
-    std::uint8_t        buf[BUF_SIZE];
-
-    auto bytes_left = read_little_endian<std::uint32_t>(stream);
-
-    std::uint32_t buf_pos = BUF_SIZE;
-    for (std::size_t i = 0; i < count; ++i)
-    {
-        IntType result = 0;
-        size_t  shift  = 0;
-        do
-        {
-            if (buf_pos == BUF_SIZE)
-            {
-                stream.read(reinterpret_cast<char*>(buf), std::min(bytes_left, BUF_SIZE));
-                buf_pos = 0;
-            }
-
-            std::uint8_t byte = buf[buf_pos++];
-            --bytes_left;
-            result |= (byte & 0x7f) << shift;
-            shift += 7;
-
-            if ((byte & 0x80) == 0)
-            {
-                out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0)
-                         ? result
-                         : result | ~((1 << shift) - 1);
-                break;
-            }
-        } while (shift < sizeof(IntType) * 8);
-    }
-
-    assert(bytes_left == 0);
-}
-
-
-// Write signed integers to a stream with LEB128 compression.
-// This takes N integers from array values, compresses them with
-// the LEB128 algorithm and writes the result on the stream s.
-// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
-template<typename IntType>
-inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) {
-
-    // Write our LEB128 magic string
-    stream.write(Leb128MagicString, Leb128MagicStringSize);
-
-    static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
-
-    std::uint32_t byte_count = 0;
-    for (std::size_t i = 0; i < count; ++i)
-    {
-        IntType      value = values[i];
-        std::uint8_t byte;
-        do
-        {
-            byte = value & 0x7f;
-            value >>= 7;
-            ++byte_count;
-        } while ((byte & 0x40) == 0 ? value != 0 : value != -1);
-    }
-
-    write_little_endian(stream, byte_count);
-
-    const std::uint32_t BUF_SIZE = 4096;
-    std::uint8_t        buf[BUF_SIZE];
-    std::uint32_t       buf_pos = 0;
-
-    auto flush = [&]() {
-        if (buf_pos > 0)
-        {
-            stream.write(reinterpret_cast<char*>(buf), buf_pos);
-            buf_pos = 0;
-        }
-    };
-
-    auto write = [&](std::uint8_t byte) {
-        buf[buf_pos++] = byte;
-        if (buf_pos == BUF_SIZE)
-            flush();
-    };
-
-    for (std::size_t i = 0; i < count; ++i)
-    {
-        IntType value = values[i];
-        while (true)
-        {
-            std::uint8_t byte = value & 0x7f;
-            value >>= 7;
-            if ((byte & 0x40) == 0 ? value == 0 : value == -1)
-            {
-                write(byte);
-                break;
-            }
-            write(byte | 0x80);
-        }
-    }
-
-    flush();
-}
-
-}  // namespace Stockfish::Eval::NNUE
-
-#endif  // #ifndef NNUE_COMMON_H_INCLUDED
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,292 +21,335 @@
 #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
 #define NNUE_FEATURE_TRANSFORMER_H_INCLUDED

-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <iosfwd>
-
-#include "../position.h"
-#include "../types.h"
-#include "nnue_accumulator.h"
-#include "nnue_architecture.h"
 #include "nnue_common.h"
-#include "simd.h"
+#include "nnue_architecture.h"
+#include "features/index_list.h"

-namespace Stockfish::Eval::NNUE {
+#include <cstring> // std::memset()

-// Returns the inverse of a permutation
-template<std::size_t Len>
-constexpr std::array<std::size_t, Len>
-invert_permutation(const std::array<std::size_t, Len>& order) {
-    std::array<std::size_t, Len> inverse{};
-    for (std::size_t i = 0; i < order.size(); i++)
-        inverse[order[i]] = i;
-    return inverse;
-}
+namespace Eval::NNUE {

-// Divide a byte region of size TotalSize to chunks of size
-// BlockSize, and permute the blocks by a given order
-template<std::size_t BlockSize, typename T, std::size_t N, std::size_t OrderSize>
-void permute(T (&data)[N], const std::array<std::size_t, OrderSize>& order) {
-    constexpr std::size_t TotalSize = N * sizeof(T);
-
-    static_assert(TotalSize % (BlockSize * OrderSize) == 0,
-                  "ChunkSize * OrderSize must perfectly divide TotalSize");
-
-    constexpr std::size_t ProcessChunkSize = BlockSize * OrderSize;
-
-    std::array<std::byte, ProcessChunkSize> buffer{};
-
-    std::byte* const bytes = reinterpret_cast<std::byte*>(data);
-
-    for (std::size_t i = 0; i < TotalSize; i += ProcessChunkSize)
-    {
-        std::byte* const values = &bytes[i];
-
-        for (std::size_t j = 0; j < OrderSize; j++)
-        {
-            auto* const buffer_chunk = &buffer[j * BlockSize];
-            auto* const value_chunk  = &values[order[j] * BlockSize];
-
-            std::copy(value_chunk, value_chunk + BlockSize, buffer_chunk);
-        }
-
-        std::copy(std::begin(buffer), std::end(buffer), values);
-    }
-}
-
-// Input feature converter
-template<IndexType TransformedFeatureDimensions>
-class FeatureTransformer {
+  // Input feature converter
+  class FeatureTransformer {

+   private:
    // Number of output dimensions for one side
-    static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;

   public:
    // Output type
    using OutputType = TransformedFeatureType;

    // Number of input/output dimensions
-    static constexpr IndexType InputDimensions  = FeatureSet::Dimensions;
-    static constexpr IndexType OutputDimensions = HalfDimensions;
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;

    // Size of forward propagation buffer
-    static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType);
-
-    // Store the order by which 128-bit blocks of a 1024-bit data must
-    // be permuted so that calling packus on adjacent vectors of 16-bit
-    // integers loaded from the data results in the pre-permutation order
-    static constexpr auto PackusEpi16Order = []() -> std::array<std::size_t, 8> {
-#if defined(USE_AVX512)
-        // _mm512_packus_epi16 after permutation:
-        // |   0   |   2   |   4   |   6   | // Vector 0
-        // |   1   |   3   |   5   |   7   | // Vector 1
-        // | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | // Packed Result
-        return {0, 2, 4, 6, 1, 3, 5, 7};
-#elif defined(USE_AVX2)
-        // _mm256_packus_epi16 after permutation:
-        // |   0   |   2   |  |   4   |   6   | // Vector 0, 2
-        // |   1   |   3   |  |   5   |   7   | // Vector 1, 3
-        // | 0 | 1 | 2 | 3 |  | 4 | 5 | 6 | 7 | // Packed Result
-        return {0, 2, 1, 3, 4, 6, 5, 7};
-#else
-        return {0, 1, 2, 3, 4, 5, 6, 7};
-#endif
-    }();
-
-    static constexpr auto InversePackusEpi16Order = invert_permutation(PackusEpi16Order);
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t get_hash_value() {
-        return FeatureSet::HashValue ^ (OutputDimensions * 2);
-    }
-
-    void permute_weights() {
-        permute<16>(biases, PackusEpi16Order);
-        permute<16>(weights, PackusEpi16Order);
-    }
-
-    void unpermute_weights() {
-        permute<16>(biases, InversePackusEpi16Order);
-        permute<16>(weights, InversePackusEpi16Order);
-    }
-
-    inline void scale_weights(bool read) {
-        for (IndexType j = 0; j < InputDimensions; ++j)
-        {
-            WeightType* w = &weights[j * HalfDimensions];
-            for (IndexType i = 0; i < HalfDimensions; ++i)
-                w[i] = read ? w[i] * 2 : w[i] / 2;
-        }
-
-        for (IndexType i = 0; i < HalfDimensions; ++i)
-            biases[i] = read ? biases[i] * 2 : biases[i] / 2;
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
    }

    // Read network parameters
-    bool read_parameters(std::istream& stream) {
-
-        read_leb_128<BiasType>(stream, biases, HalfDimensions);
-        read_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions);
-        read_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
-
-        permute_weights();
-        scale_weights(true);
-        return !stream.fail();
+    bool ReadParameters(std::istream& stream) {
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kHalfDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
    }

-    // Write network parameters
-    bool write_parameters(std::ostream& stream) {
-
-        unpermute_weights();
-        scale_weights(false);
-
-        write_leb_128<BiasType>(stream, biases, HalfDimensions);
-        write_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions);
-        write_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
-
-        permute_weights();
-        scale_weights(true);
-        return !stream.fail();
+    // Proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
    }

    // Convert input features
-    std::int32_t transform(const Position&                           pos,
-                           AccumulatorStack&                         accumulatorStack,
-                           AccumulatorCaches::Cache<HalfDimensions>* cache,
-                           OutputType*                               output,
-                           int                                       bucket) const {
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;

-        using namespace SIMD;
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();

-        accumulatorStack.evaluate(pos, *this, *cache);
-        const auto& accumulatorState = accumulatorStack.latest();
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;

-        const Color perspectives[2]  = {pos.side_to_move(), ~pos.side_to_move()};
-        const auto& psqtAccumulation = (accumulatorState.acc<HalfDimensions>()).psqtAccumulation;
-        const auto  psqt =
-          (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
-          / 2;
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif

-        const auto& accumulation = (accumulatorState.acc<HalfDimensions>()).accumulation;
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif

-        for (IndexType p = 0; p < 2; ++p)
-        {
-            const IndexType offset = (HalfDimensions / 2) * p;
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;

-#if defined(VECTOR)
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 =

-            constexpr IndexType OutputChunkSize = MaxChunkSize;
-            static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
-            constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif

-            const vec_t Zero = vec_zero();
-            const vec_t One  = vec_set_16(127 * 2);
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 =

-            const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
-            const vec_t* in1 =
-              reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
-            vec_t* out = reinterpret_cast<vec_t*>(output + offset);
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif

-            // Per the NNUE architecture, here we want to multiply pairs of
-            // clipped elements and divide the product by 128. To do this,
-            // we can naively perform min/max operation to clip each of the
-            // four int16 vectors, mullo pairs together, then pack them into
-            // one int8 vector. However, there exists a faster way.
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);

-            // The idea here is to use the implicit clipping from packus to
-            // save us two vec_max_16 instructions. This clipping works due
-            // to the fact that any int16 integer below zero will be zeroed
-            // on packus.
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_storeu_si256
+  #else
+          _mm256_store_si256
+  #endif

-            // Consider the case where the second element is negative.
-            // If we do standard clipping, that element will be zero, which
-            // means our pairwise product is zero. If we perform packus and
-            // remove the lower-side clip for the second element, then our
-            // product before packus will be negative, and is zeroed on pack.
-            // The two operation produce equivalent results, but the second
-            // one (using packus) saves one max operation per pair.
-
-            // But here we run into a problem: mullo does not preserve the
-            // sign of the multiplication. We can get around this by doing
-            // mulhi, which keeps the sign. But that requires an additional
-            // tweak.
-
-            // mulhi cuts off the last 16 bits of the resulting product,
-            // which is the same as performing a rightward shift of 16 bits.
-            // We can use this to our advantage. Recall that we want to
-            // divide the final product by 128, which is equivalent to a
-            // 7-bit right shift. Intuitively, if we shift the clipped
-            // value left by 9, and perform mulhi, which shifts the product
-            // right by 16 bits, then we will net a right shift of 7 bits.
-            // However, this won't work as intended. Since we clip the
-            // values to have a maximum value of 127, shifting it by 9 bits
-            // might occupy the signed bit, resulting in some positive
-            // values being interpreted as negative after the shift.
-
-            // There is a way, however, to get around this limitation. When
-            // loading the network, scale accumulator weights and biases by
-            // 2. To get the same pairwise multiplication result as before,
-            // we need to divide the product by 128 * 2 * 2 = 512, which
-            // amounts to a right shift of 9 bits. So now we only have to
-            // shift left by 7 bits, perform mulhi (shifts right by 16 bits)
-            // and net a 9 bit right shift. Since we scaled everything by
-            // two, the values are clipped at 127 * 2 = 254, which occupies
-            // 8 bits. Shifting it by 7 bits left will no longer occupy the
-            // signed bit, so we are safe.
-
-            // Note that on NEON processors, we shift left by 6 instead
-            // because the instruction "vqdmulhq_s16" also doubles the
-            // return value after the multiplication, adding an extra shift
-            // to the left by 1, so we compensate by shifting less before
-            // the multiplication.
-
-            constexpr int shift =
-    #if defined(USE_SSE2)
-              7;
-    #else
-              6;
-    #endif
-
-            for (IndexType j = 0; j < NumOutputChunks; ++j)
-            {
-                const vec_t sum0a =
-                  vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift);
-                const vec_t sum0b =
-                  vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift);
-                const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One);
-                const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One);
-
-                const vec_t pa = vec_mulhi_16(sum0a, sum1a);
-                const vec_t pb = vec_mulhi_16(sum0b, sum1b);
-
-                out[j] = vec_packus_16(pa, pb);
-            }
-
-#else
-
-            for (IndexType j = 0; j < HalfDimensions / 2; ++j)
-            {
-                BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
-                BiasType sum1 =
-                  accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
-                sum0               = std::clamp<BiasType>(sum0, 0, 127 * 2);
-                sum1               = std::clamp<BiasType>(sum1, 0, 127 * 2);
-                output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 512);
-            }
-
-#endif
+          (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
        }

-        return psqt;
-    }  // end of function transform()
+  #elif defined(USE_SSSE3)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);

-    alignas(CacheLineSize) BiasType biases[HalfDimensions];
-    alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
-    alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
-};
+          _mm_store_si128(&out[j],

-}  // namespace Stockfish::Eval::NNUE
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif

-#endif  // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+          );
+        }
+
+  #elif defined(USE_NEON)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList active_indices[2];
+      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                       active_indices);
+      for (Color perspective : { WHITE, BLACK }) {
+        std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                   kHalfDimensions * sizeof(BiasType));
+        for (const auto index : active_indices[perspective]) {
+          const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+          auto accumulation = reinterpret_cast<__m256i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
+  #else
+            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+  #endif
+          }
+
+  #elif defined(USE_SSE2)
+          auto accumulation = reinterpret_cast<__m128i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+          }
+
+  #elif defined(USE_NEON)
+          auto accumulation = reinterpret_cast<int16x8_t*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+          }
+
+  #else
+          for (IndexType j = 0; j < kHalfDimensions; ++j) {
+            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+          }
+  #endif
+
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList removed_indices[2], added_indices[2];
+      bool reset[2];
+      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                        removed_indices, added_indices, reset);
+      for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m256i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_SSE2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m128i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_NEON)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<int16x8_t*>(
+            &accumulator.accumulation[perspective][i][0]);
+  #endif
+
+        if (reset[perspective]) {
+          std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                      kHalfDimensions * sizeof(BiasType));
+        } else {
+          std::memcpy(accumulator.accumulation[perspective][i],
+                      prev_accumulator.accumulation[perspective][i],
+                      kHalfDimensions * sizeof(BiasType));
+          // Difference calculation for the deactivated features
+          for (const auto index : removed_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] -=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+        { // Difference calculation for the activated features
+          for (const auto index : added_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] +=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;
+
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
--- a/src/nnue/nnue_misc.cpp
+++ b/src/nnue/nnue_misc.cpp
@@ -1,193 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Code for calculating NNUE evaluation function
-
-#include "nnue_misc.h"
-
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <iosfwd>
-#include <iostream>
-#include <sstream>
-#include <string_view>
-#include <tuple>
-
-#include "../position.h"
-#include "../types.h"
-#include "../uci.h"
-#include "network.h"
-#include "nnue_accumulator.h"
-
-namespace Stockfish::Eval::NNUE {
-
-
-constexpr std::string_view PieceToChar(" PNBRQK  pnbrqk");
-
-
-namespace {
-// Converts a Value into (centi)pawns and writes it in a buffer.
-// The buffer must have capacity for at least 5 chars.
-void format_cp_compact(Value v, char* buffer, const Position& pos) {
-
-    buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
-
-    int cp = std::abs(UCIEngine::to_cp(v, pos));
-    if (cp >= 10000)
-    {
-        buffer[1] = '0' + cp / 10000;
-        cp %= 10000;
-        buffer[2] = '0' + cp / 1000;
-        cp %= 1000;
-        buffer[3] = '0' + cp / 100;
-        buffer[4] = ' ';
-    }
-    else if (cp >= 1000)
-    {
-        buffer[1] = '0' + cp / 1000;
-        cp %= 1000;
-        buffer[2] = '0' + cp / 100;
-        cp %= 100;
-        buffer[3] = '.';
-        buffer[4] = '0' + cp / 10;
-    }
-    else
-    {
-        buffer[1] = '0' + cp / 100;
-        cp %= 100;
-        buffer[2] = '.';
-        buffer[3] = '0' + cp / 10;
-        cp %= 10;
-        buffer[4] = '0' + cp / 1;
-    }
-}
-
-
-// Converts a Value into pawns, always keeping two decimals
-void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) {
-
-    const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos));
-
-    stream << (v < 0   ? '-'
-               : v > 0 ? '+'
-                       : ' ')
-           << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns;
-}
-}
-
-
-// Returns a string with the value of each piece on a board,
-// and a table for (PSQT, Layers) values bucket by bucket.
-std::string
-trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
-
-    std::stringstream ss;
-
-    char board[3 * 8 + 1][8 * 8 + 2];
-    std::memset(board, ' ', sizeof(board));
-    for (int row = 0; row < 3 * 8 + 1; ++row)
-        board[row][8 * 8 + 1] = '\0';
-
-    // A lambda to output one box of the board
-    auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) {
-        const int x = int(file) * 8;
-        const int y = (7 - int(rank)) * 3;
-        for (int i = 1; i < 8; ++i)
-            board[y][x + i] = board[y + 3][x + i] = '-';
-        for (int i = 1; i < 3; ++i)
-            board[y + i][x] = board[y + i][x + 8] = '|';
-        board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+';
-        if (pc != NO_PIECE)
-            board[y + 1][x + 4] = PieceToChar[pc];
-        if (is_valid(value))
-            format_cp_compact(value, &board[y + 2][x + 2], pos);
-    };
-
-    AccumulatorStack accumulators;
-
-    // We estimate the value of each piece by doing a differential evaluation from
-    // the current base eval, simulating the removal of the piece from its square.
-    auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches.big);
-    Value base              = psqt + positional;
-    base                    = pos.side_to_move() == WHITE ? base : -base;
-
-    for (File f = FILE_A; f <= FILE_H; ++f)
-        for (Rank r = RANK_1; r <= RANK_8; ++r)
-        {
-            Square sq = make_square(f, r);
-            Piece  pc = pos.piece_on(sq);
-            Value  v  = VALUE_NONE;
-
-            if (pc != NO_PIECE && type_of(pc) != KING)
-            {
-                pos.remove_piece(sq);
-
-                accumulators.reset();
-                std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
-                Value eval                 = psqt + positional;
-                eval                       = pos.side_to_move() == WHITE ? eval : -eval;
-                v                          = base - eval;
-
-                pos.put_piece(pc, sq);
-            }
-
-            writeSquare(f, r, pc, v);
-        }
-
-    ss << " NNUE derived piece values:\n";
-    for (int row = 0; row < 3 * 8 + 1; ++row)
-        ss << board[row] << '\n';
-    ss << '\n';
-
-    accumulators.reset();
-    auto t = networks.big.trace_evaluate(pos, accumulators, &caches.big);
-
-    ss << " NNUE network contributions "
-       << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
-       << "+------------+------------+------------+------------+\n"
-       << "|   Bucket   |  Material  | Positional |   Total    |\n"
-       << "|            |   (PSQT)   |  (Layers)  |            |\n"
-       << "+------------+------------+------------+------------+\n";
-
-    for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
-    {
-        ss << "|  " << bucket << "        "  //
-           << " |  ";
-        format_cp_aligned_dot(t.psqt[bucket], ss, pos);
-        ss << "  "  //
-           << " |  ";
-        format_cp_aligned_dot(t.positional[bucket], ss, pos);
-        ss << "  "  //
-           << " |  ";
-        format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos);
-        ss << "  "  //
-           << " |";
-        if (bucket == t.correctBucket)
-            ss << " <-- this bucket is used";
-        ss << '\n';
-    }
-
-    ss << "+------------+------------+------------+------------+\n";
-
-    return ss.str();
-}
-
-
-}  // namespace Stockfish::Eval::NNUE
--- a/src/nnue/nnue_misc.h
+++ b/src/nnue/nnue_misc.h
@@ -1,61 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef NNUE_MISC_H_INCLUDED
-#define NNUE_MISC_H_INCLUDED
-
-#include <cstddef>
-#include <string>
-
-#include "../types.h"
-#include "nnue_architecture.h"
-
-namespace Stockfish {
-
-class Position;
-
-namespace Eval::NNUE {
-
-struct EvalFile {
-    // Default net name, will use one of the EvalFileDefaultName* macros defined
-    // in evaluate.h
-    std::string defaultName;
-    // Selected net name, either via uci option or default
-    std::string current;
-    // Net description extracted from the net file
-    std::string netDescription;
-};
-
-
-struct NnueEvalTrace {
-    static_assert(LayerStacks == PSQTBuckets);
-
-    Value       psqt[LayerStacks];
-    Value       positional[LayerStacks];
-    std::size_t correctBucket;
-};
-
-struct Networks;
-struct AccumulatorCaches;
-
-std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
-
-}  // namespace Stockfish::Eval::NNUE
-}  // namespace Stockfish
-
-#endif  // #ifndef NNUE_MISC_H_INCLUDED
--- a/src/nnue/simd.h
+++ b/src/nnue/simd.h
@@ -1,406 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef NNUE_SIMD_H_INCLUDED
-#define NNUE_SIMD_H_INCLUDED
-
-#if defined(USE_AVX2)
-    #include <immintrin.h>
-
-#elif defined(USE_SSE41)
-    #include <smmintrin.h>
-
-#elif defined(USE_SSSE3)
-    #include <tmmintrin.h>
-
-#elif defined(USE_SSE2)
-    #include <emmintrin.h>
-
-#elif defined(USE_NEON)
-    #include <arm_neon.h>
-#endif
-
-#include "../types.h"
-#include "nnue_common.h"
-
-namespace Stockfish::Eval::NNUE::SIMD {
-
-// If vector instructions are enabled, we update and refresh the
-// accumulator tile by tile such that each tile fits in the CPU's
-// vector registers.
-#define VECTOR
-
-#ifdef USE_AVX512
-using vec_t      = __m512i;
-using vec128_t   = __m128i;
-using psqt_vec_t = __m256i;
-using vec_uint_t = __m512i;
-    #define vec_load(a) _mm512_load_si512(a)
-    #define vec_store(a, b) _mm512_store_si512(a, b)
-    #define vec_add_16(a, b) _mm512_add_epi16(a, b)
-    #define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
-    #define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
-    #define vec_zero() _mm512_setzero_epi32()
-    #define vec_set_16(a) _mm512_set1_epi16(a)
-    #define vec_max_16(a, b) _mm512_max_epi16(a, b)
-    #define vec_min_16(a, b) _mm512_min_epi16(a, b)
-    #define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
-    // Inverse permuted at load time
-    #define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
-    #define vec_load_psqt(a) _mm256_load_si256(a)
-    #define vec_store_psqt(a, b) _mm256_store_si256(a, b)
-    #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
-    #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
-    #define vec_zero_psqt() _mm256_setzero_si256()
-
-    #ifdef USE_SSSE3
-        #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
-    #endif
-
-    #define vec128_zero _mm_setzero_si128()
-    #define vec128_set_16(a) _mm_set1_epi16(a)
-    #define vec128_load(a) _mm_load_si128(a)
-    #define vec128_storeu(a, b) _mm_storeu_si128(a, b)
-    #define vec128_add(a, b) _mm_add_epi16(a, b)
-    #define NumRegistersSIMD 16
-    #define MaxChunkSize 64
-
-#elif USE_AVX2
-using vec_t      = __m256i;
-using vec128_t   = __m128i;
-using psqt_vec_t = __m256i;
-using vec_uint_t = __m256i;
-    #define vec_load(a) _mm256_load_si256(a)
-    #define vec_store(a, b) _mm256_store_si256(a, b)
-    #define vec_add_16(a, b) _mm256_add_epi16(a, b)
-    #define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
-    #define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
-    #define vec_zero() _mm256_setzero_si256()
-    #define vec_set_16(a) _mm256_set1_epi16(a)
-    #define vec_max_16(a, b) _mm256_max_epi16(a, b)
-    #define vec_min_16(a, b) _mm256_min_epi16(a, b)
-    #define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
-    // Inverse permuted at load time
-    #define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
-    #define vec_load_psqt(a) _mm256_load_si256(a)
-    #define vec_store_psqt(a, b) _mm256_store_si256(a, b)
-    #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
-    #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
-    #define vec_zero_psqt() _mm256_setzero_si256()
-
-    #ifdef USE_SSSE3
-        #if defined(USE_VNNI) && !defined(USE_AVXVNNI)
-            #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
-        #else
-            #define vec_nnz(a) \
-                _mm256_movemask_ps( \
-                  _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
-        #endif
-    #endif
-
-    #define vec128_zero _mm_setzero_si128()
-    #define vec128_set_16(a) _mm_set1_epi16(a)
-    #define vec128_load(a) _mm_load_si128(a)
-    #define vec128_storeu(a, b) _mm_storeu_si128(a, b)
-    #define vec128_add(a, b) _mm_add_epi16(a, b)
-
-    #define NumRegistersSIMD 16
-    #define MaxChunkSize 32
-
-#elif USE_SSE2
-using vec_t      = __m128i;
-using vec128_t   = __m128i;
-using psqt_vec_t = __m128i;
-using vec_uint_t = __m128i;
-    #define vec_load(a) (*(a))
-    #define vec_store(a, b) *(a) = (b)
-    #define vec_add_16(a, b) _mm_add_epi16(a, b)
-    #define vec_sub_16(a, b) _mm_sub_epi16(a, b)
-    #define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
-    #define vec_zero() _mm_setzero_si128()
-    #define vec_set_16(a) _mm_set1_epi16(a)
-    #define vec_max_16(a, b) _mm_max_epi16(a, b)
-    #define vec_min_16(a, b) _mm_min_epi16(a, b)
-    #define vec_slli_16(a, b) _mm_slli_epi16(a, b)
-    #define vec_packus_16(a, b) _mm_packus_epi16(a, b)
-    #define vec_load_psqt(a) (*(a))
-    #define vec_store_psqt(a, b) *(a) = (b)
-    #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
-    #define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b)
-    #define vec_zero_psqt() _mm_setzero_si128()
-
-    #ifdef USE_SSSE3
-        #define vec_nnz(a) \
-            _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
-    #endif
-
-    #define vec128_zero _mm_setzero_si128()
-    #define vec128_set_16(a) _mm_set1_epi16(a)
-    #define vec128_load(a) _mm_load_si128(a)
-    #define vec128_storeu(a, b) _mm_storeu_si128(a, b)
-    #define vec128_add(a, b) _mm_add_epi16(a, b)
-
-    #define NumRegistersSIMD (Is64Bit ? 16 : 8)
-    #define MaxChunkSize 16
-
-#elif USE_NEON
-using vec_t      = int16x8_t;
-using psqt_vec_t = int32x4_t;
-using vec128_t   = uint16x8_t;
-using vec_uint_t = uint32x4_t;
-    #define vec_load(a) (*(a))
-    #define vec_store(a, b) *(a) = (b)
-    #define vec_add_16(a, b) vaddq_s16(a, b)
-    #define vec_sub_16(a, b) vsubq_s16(a, b)
-    #define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
-    #define vec_zero() vec_t{0}
-    #define vec_set_16(a) vdupq_n_s16(a)
-    #define vec_max_16(a, b) vmaxq_s16(a, b)
-    #define vec_min_16(a, b) vminq_s16(a, b)
-    #define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
-    #define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
-    #define vec_load_psqt(a) (*(a))
-    #define vec_store_psqt(a, b) *(a) = (b)
-    #define vec_add_psqt_32(a, b) vaddq_s32(a, b)
-    #define vec_sub_psqt_32(a, b) vsubq_s32(a, b)
-    #define vec_zero_psqt() psqt_vec_t{0}
-
-static constexpr std::uint32_t Mask[4] = {1, 2, 4, 8};
-    #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask)))
-    #define vec128_zero vdupq_n_u16(0)
-    #define vec128_set_16(a) vdupq_n_u16(a)
-    #define vec128_load(a) vld1q_u16(reinterpret_cast<const std::uint16_t*>(a))
-    #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast<std::uint16_t*>(a), b)
-    #define vec128_add(a, b) vaddq_u16(a, b)
-
-    #define NumRegistersSIMD 16
-    #define MaxChunkSize 16
-
-#else
-    #undef VECTOR
-
-#endif
-
-struct Vec16Wrapper {
-#ifdef VECTOR
-    using type = vec_t;
-    static type add(const type& lhs, const type& rhs) { return vec_add_16(lhs, rhs); }
-    static type sub(const type& lhs, const type& rhs) { return vec_sub_16(lhs, rhs); }
-#else
-    using type = BiasType;
-    static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
-    static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
-#endif
-};
-
-struct Vec32Wrapper {
-#ifdef VECTOR
-    using type = psqt_vec_t;
-    static type add(const type& lhs, const type& rhs) { return vec_add_psqt_32(lhs, rhs); }
-    static type sub(const type& lhs, const type& rhs) { return vec_sub_psqt_32(lhs, rhs); }
-#else
-    using type = PSQTWeightType;
-    static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
-    static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
-#endif
-};
-
-enum UpdateOperation {
-    Add,
-    Sub
-};
-
-template<typename VecWrapper,
-         UpdateOperation... ops,
-         std::enable_if_t<sizeof...(ops) == 0, bool> = true>
-typename VecWrapper::type fused(const typename VecWrapper::type& in) {
-    return in;
-}
-
-template<typename VecWrapper,
-         UpdateOperation update_op,
-         UpdateOperation... ops,
-         typename T,
-         typename... Ts,
-         std::enable_if_t<is_all_same_v<typename VecWrapper::type, T, Ts...>, bool> = true,
-         std::enable_if_t<sizeof...(ops) == sizeof...(Ts), bool>                    = true>
-typename VecWrapper::type
-fused(const typename VecWrapper::type& in, const T& operand, const Ts&... operands) {
-    switch (update_op)
-    {
-    case Add :
-        return fused<VecWrapper, ops...>(VecWrapper::add(in, operand), operands...);
-    case Sub :
-        return fused<VecWrapper, ops...>(VecWrapper::sub(in, operand), operands...);
-    default :
-        static_assert(update_op == Add || update_op == Sub,
-                      "Only Add and Sub are currently supported.");
-        return typename VecWrapper::type();
-    }
-}
-
-#if defined(USE_AVX512)
-
-[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
-    return _mm512_reduce_add_epi32(sum) + bias;
-}
-
-[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
-
-    #if defined(USE_VNNI)
-    acc = _mm512_dpbusd_epi32(acc, a, b);
-    #else
-    __m512i product0 = _mm512_maddubs_epi16(a, b);
-    product0         = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
-    acc              = _mm512_add_epi32(acc, product0);
-    #endif
-}
-
-#endif
-
-#if defined(USE_AVX2)
-
-[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
-    __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
-    sum128         = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
-    sum128         = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
-    return _mm_cvtsi128_si32(sum128) + bias;
-}
-
-[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) {
-
-    #if defined(USE_VNNI)
-    acc = _mm256_dpbusd_epi32(acc, a, b);
-    #else
-    __m256i product0 = _mm256_maddubs_epi16(a, b);
-    product0         = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
-    acc              = _mm256_add_epi32(acc, product0);
-    #endif
-}
-
-#endif
-
-#if defined(USE_SSSE3)
-
-[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
-    sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E));  //_MM_PERM_BADC
-    sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1));  //_MM_PERM_CDAB
-    return _mm_cvtsi128_si32(sum) + bias;
-}
-
-[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) {
-
-    __m128i product0 = _mm_maddubs_epi16(a, b);
-    product0         = _mm_madd_epi16(product0, _mm_set1_epi16(1));
-    acc              = _mm_add_epi32(acc, product0);
-}
-
-#endif
-
-#if defined(USE_NEON_DOTPROD)
-
-[[maybe_unused]] static void
-dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
-
-    acc = vdotq_s32(acc, a, b);
-}
-#endif
-
-#if defined(USE_NEON)
-
-[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
-    #if USE_NEON >= 8
-    return vaddvq_s32(s);
-    #else
-    return s[0] + s[1] + s[2] + s[3];
-    #endif
-}
-
-[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
-    return neon_m128_reduce_add_epi32(sum) + bias;
-}
-
-#endif
-
-#if USE_NEON >= 8
-[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
-
-    int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b));
-    int16x8_t product1 = vmull_high_s8(a, b);
-    int16x8_t sum      = vpaddq_s16(product0, product1);
-    acc                = vpadalq_s16(acc, sum);
-}
-#endif
-
-
-// Compute optimal SIMD register count for feature transformer accumulation.
-template<IndexType TransformedFeatureWidth, IndexType HalfDimensions, IndexType PSQTBuckets>
-class SIMDTiling {
-#ifdef VECTOR
-        // We use __m* types as template arguments, which causes GCC to emit warnings
-        // about losing some attribute information. This is irrelevant to us as we
-        // only take their size, so the following pragma are harmless.
-    #if defined(__GNUC__)
-        #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wignored-attributes"
-    #endif
-
-    template<typename SIMDRegisterType, typename LaneType, int NumLanes, int MaxRegisters>
-    static constexpr int BestRegisterCount() {
-        constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType);
-        constexpr std::size_t LaneSize     = sizeof(LaneType);
-
-        static_assert(RegisterSize >= LaneSize);
-        static_assert(MaxRegisters <= NumRegistersSIMD);
-        static_assert(MaxRegisters > 0);
-        static_assert(NumRegistersSIMD > 0);
-        static_assert(RegisterSize % LaneSize == 0);
-        static_assert((NumLanes * LaneSize) % RegisterSize == 0);
-
-        const int ideal = (NumLanes * LaneSize) / RegisterSize;
-        if (ideal <= MaxRegisters)
-            return ideal;
-
-        // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters
-        for (int divisor = MaxRegisters; divisor > 1; --divisor)
-            if (ideal % divisor == 0)
-                return divisor;
-
-        return 1;
-    }
-
-    #if defined(__GNUC__)
-        #pragma GCC diagnostic pop
-    #endif
-
-   public:
-    static constexpr int NumRegs =
-      BestRegisterCount<vec_t, WeightType, TransformedFeatureWidth, NumRegistersSIMD>();
-    static constexpr int NumPsqtRegs =
-      BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
-
-    static constexpr IndexType TileHeight     = NumRegs * sizeof(vec_t) / 2;
-    static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
-
-    static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
-    static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
-#endif
-};
-}
-
-#endif
--- a/src/numa.h
+++ b/src/numa.h
--- a/src/pawns.cpp
+++ b/src/pawns.cpp
@@ -0,0 +1,281 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+#include <cassert>
+
+#include "bitboard.h"
+#include "pawns.h"
+#include "position.h"
+#include "thread.h"
+
+namespace {
+
+  #define V Value
+  #define S(mg, eg) make_score(mg, eg)
+
+  // Pawn penalties
+  constexpr Score Backward      = S( 9, 24);
+  constexpr Score Doubled       = S(11, 56);
+  constexpr Score Isolated      = S( 5, 15);
+  constexpr Score WeakLever     = S( 0, 56);
+  constexpr Score WeakUnopposed = S(13, 27);
+
+  // Bonus for blocked pawns at 5th or 6th rank
+  constexpr Score BlockedPawn[2] = { S(-11, -4), S(-3, 4) };
+
+  constexpr Score BlockedStorm[RANK_NB] = {
+    S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
+  };
+
+  // Connected pawn bonus
+  constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 };
+
+  // Strength of pawn shelter for our king by [distance from edge][rank].
+  // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
+  constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
+    { V( -6), V( 81), V( 93), V( 58), V( 39), V( 18), V(  25) },
+    { V(-43), V( 61), V( 35), V(-49), V(-29), V(-11), V( -63) },
+    { V(-10), V( 75), V( 23), V( -2), V( 32), V(  3), V( -45) },
+    { V(-39), V(-13), V(-29), V(-52), V(-48), V(-67), V(-166) }
+  };
+
+  // Danger of enemy pawns moving toward our king by [distance from edge][rank].
+  // RANK_1 = 0 is used for files where the enemy has no pawn, or their pawn
+  // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
+  // on edge, likely blocked by our king.
+  constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
+    { V( 85), V(-289), V(-166), V(97), V(50), V( 45), V( 50) },
+    { V( 46), V( -25), V( 122), V(45), V(37), V(-10), V( 20) },
+    { V( -6), V(  51), V( 168), V(34), V(-2), V(-22), V(-14) },
+    { V(-15), V( -11), V( 101), V( 4), V(11), V(-15), V(-29) }
+  };
+
+  #undef S
+  #undef V
+
+
+  /// evaluate() calculates a score for the static pawn structure of the given position.
+  /// We cannot use the location of pieces or king in this function, as the evaluation
+  /// of the pawn structure will be stored in a small cache for speed reasons, and will
+  /// be re-used even when the pieces have moved.
+
+  template<Color Us>
+  Score evaluate(const Position& pos, Pawns::Entry* e) {
+
+    constexpr Color     Them = ~Us;
+    constexpr Direction Up   = pawn_push(Us);
+
+    Bitboard neighbours, stoppers, support, phalanx, opposed;
+    Bitboard lever, leverPush, blocked;
+    Square s;
+    bool backward, passed, doubled;
+    Score score = SCORE_ZERO;
+    const Square* pl = pos.squares<PAWN>(Us);
+
+    Bitboard ourPawns   = pos.pieces(  Us, PAWN);
+    Bitboard theirPawns = pos.pieces(Them, PAWN);
+
+    Bitboard doubleAttackThem = pawn_double_attacks_bb<Them>(theirPawns);
+
+    e->passedPawns[Us] = 0;
+    e->kingSquares[Us] = SQ_NONE;
+    e->pawnAttacks[Us] = e->pawnAttacksSpan[Us] = pawn_attacks_bb<Us>(ourPawns);
+    e->blockedCount += popcount(shift<Up>(ourPawns) & (theirPawns | doubleAttackThem));
+
+    // Loop through all pawns of the current color and score each pawn
+    while ((s = *pl++) != SQ_NONE)
+    {
+        assert(pos.piece_on(s) == make_piece(Us, PAWN));
+
+        Rank r = relative_rank(Us, s);
+
+        // Flag the pawn
+        opposed    = theirPawns & forward_file_bb(Us, s);
+        blocked    = theirPawns & (s + Up);
+        stoppers   = theirPawns & passed_pawn_span(Us, s);
+        lever      = theirPawns & pawn_attacks_bb(Us, s);
+        leverPush  = theirPawns & pawn_attacks_bb(Us, s + Up);
+        doubled    = ourPawns   & (s - Up);
+        neighbours = ourPawns   & adjacent_files_bb(s);
+        phalanx    = neighbours & rank_bb(s);
+        support    = neighbours & rank_bb(s - Up);
+
+        // A pawn is backward when it is behind all pawns of the same color on
+        // the adjacent files and cannot safely advance.
+        backward =  !(neighbours & forward_ranks_bb(Them, s + Up))
+                  && (leverPush | blocked);
+
+        // Compute additional span if pawn is not backward nor blocked
+        if (!backward && !blocked)
+            e->pawnAttacksSpan[Us] |= pawn_attack_span(Us, s);
+
+        // A pawn is passed if one of the three following conditions is true:
+        // (a) there is no stoppers except some levers
+        // (b) the only stoppers are the leverPush, but we outnumber them
+        // (c) there is only one front stopper which can be levered.
+        //     (Refined in Evaluation::passed)
+        passed =   !(stoppers ^ lever)
+                || (   !(stoppers ^ leverPush)
+                    && popcount(phalanx) >= popcount(leverPush))
+                || (   stoppers == blocked && r >= RANK_5
+                    && (shift<Up>(support) & ~(theirPawns | doubleAttackThem)));
+
+        passed &= !(forward_file_bb(Us, s) & ourPawns);
+
+        // Passed pawns will be properly scored later in evaluation when we have
+        // full attack info.
+        if (passed)
+            e->passedPawns[Us] |= s;
+
+        // Score this pawn
+        if (support | phalanx)
+        {
+            int v =  Connected[r] * (2 + bool(phalanx) - bool(opposed))
+                   + 21 * popcount(support);
+
+            score += make_score(v, v * (r - 2) / 4);
+        }
+
+        else if (!neighbours)
+        {
+            if (     opposed
+                &&  (ourPawns & forward_file_bb(Them, s))
+                && !(theirPawns & adjacent_files_bb(s)))
+                score -= Doubled;
+            else
+                score -=  Isolated
+                        + WeakUnopposed * !opposed;
+        }
+
+        else if (backward)
+            score -=  Backward
+                    + WeakUnopposed * !opposed;
+
+        if (!support)
+            score -=  Doubled * doubled
+                    + WeakLever * more_than_one(lever);
+
+        if (blocked && r > RANK_4)
+            score += BlockedPawn[r-4];
+    }
+
+    return score;
+  }
+
+} // namespace
+
+namespace Pawns {
+
+
+/// Pawns::probe() looks up the current position's pawns configuration in
+/// the pawns hash table. It returns a pointer to the Entry if the position
+/// is found. Otherwise a new Entry is computed and stored there, so we don't
+/// have to recompute all when the same pawns configuration occurs again.
+
+Entry* probe(const Position& pos) {
+
+  Key key = pos.pawn_key();
+  Entry* e = pos.this_thread()->pawnsTable[key];
+
+  if (e->key == key)
+      return e;
+
+  e->key = key;
+  e->blockedCount = 0;
+  e->scores[WHITE] = evaluate<WHITE>(pos, e);
+  e->scores[BLACK] = evaluate<BLACK>(pos, e);
+
+  return e;
+}
+
+
+/// Entry::evaluate_shelter() calculates the shelter bonus and the storm
+/// penalty for a king, looking at the king file and the two closest files.
+
+template<Color Us>
+Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
+
+  constexpr Color Them = ~Us;
+
+  Bitboard b = pos.pieces(PAWN) & ~forward_ranks_bb(Them, ksq);
+  Bitboard ourPawns = b & pos.pieces(Us) & ~pawnAttacks[Them];
+  Bitboard theirPawns = b & pos.pieces(Them);
+
+  Score bonus = make_score(5, 5);
+
+  File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G);
+  for (File f = File(center - 1); f <= File(center + 1); ++f)
+  {
+      b = ourPawns & file_bb(f);
+      int ourRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0;
+
+      b = theirPawns & file_bb(f);
+      int theirRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0;
+
+      int d = edge_distance(f);
+      bonus += make_score(ShelterStrength[d][ourRank], 0);
+
+      if (ourRank && (ourRank == theirRank - 1))
+          bonus -= BlockedStorm[theirRank];
+      else
+          bonus -= make_score(UnblockedStorm[d][theirRank], 0);
+  }
+
+  return bonus;
+}
+
+
+/// Entry::do_king_safety() calculates a bonus for king safety. It is called only
+/// when king square changes, which is about 20% of total king_safety() calls.
+
+template<Color Us>
+Score Entry::do_king_safety(const Position& pos) {
+
+  Square ksq = pos.square<KING>(Us);
+  kingSquares[Us] = ksq;
+  castlingRights[Us] = pos.castling_rights(Us);
+  auto compare = [](Score a, Score b) { return mg_value(a) < mg_value(b); };
+
+  Score shelter = evaluate_shelter<Us>(pos, ksq);
+
+  // If we can castle use the bonus after castling if it is bigger
+
+  if (pos.can_castle(Us & KING_SIDE))
+      shelter = std::max(shelter, evaluate_shelter<Us>(pos, relative_square(Us, SQ_G1)), compare);
+
+  if (pos.can_castle(Us & QUEEN_SIDE))
+      shelter = std::max(shelter, evaluate_shelter<Us>(pos, relative_square(Us, SQ_C1)), compare);
+
+  // In endgame we like to bring our king near our closest pawn
+  Bitboard pawns = pos.pieces(Us, PAWN);
+  int minPawnDist = 6;
+
+  if (pawns & attacks_bb<KING>(ksq))
+      minPawnDist = 1;
+  else while (pawns)
+      minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(&pawns)));
+
+  return shelter - make_score(0, 16 * minPawnDist);
+}
+
+// Explicit template instantiation
+template Score Entry::do_king_safety<WHITE>(const Position& pos);
+template Score Entry::do_king_safety<BLACK>(const Position& pos);
+
+} // namespace Pawns
--- a/src/pawns.h
+++ b/src/pawns.h
@@ -0,0 +1,70 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef PAWNS_H_INCLUDED
+#define PAWNS_H_INCLUDED
+
+#include "misc.h"
+#include "position.h"
+#include "types.h"
+
+namespace Pawns {
+
+/// Pawns::Entry contains various information about a pawn structure. A lookup
+/// to the pawn hash table (performed by calling the probe function) returns a
+/// pointer to an Entry object.
+
+struct Entry {
+
+  Score pawn_score(Color c) const { return scores[c]; }
+  Bitboard pawn_attacks(Color c) const { return pawnAttacks[c]; }
+  Bitboard passed_pawns(Color c) const { return passedPawns[c]; }
+  Bitboard pawn_attacks_span(Color c) const { return pawnAttacksSpan[c]; }
+  int passed_count() const { return popcount(passedPawns[WHITE] | passedPawns[BLACK]); }
+  int blocked_count() const { return blockedCount; }
+
+  template<Color Us>
+  Score king_safety(const Position& pos) {
+    return  kingSquares[Us] == pos.square<KING>(Us) && castlingRights[Us] == pos.castling_rights(Us)
+          ? kingSafety[Us] : (kingSafety[Us] = do_king_safety<Us>(pos));
+  }
+
+  template<Color Us>
+  Score do_king_safety(const Position& pos);
+
+  template<Color Us>
+  Score evaluate_shelter(const Position& pos, Square ksq) const;
+
+  Key key;
+  Score scores[COLOR_NB];
+  Bitboard passedPawns[COLOR_NB];
+  Bitboard pawnAttacks[COLOR_NB];
+  Bitboard pawnAttacksSpan[COLOR_NB];
+  Square kingSquares[COLOR_NB];
+  Score kingSafety[COLOR_NB];
+  int castlingRights[COLOR_NB];
+  int blockedCount;
+};
+
+typedef HashTable<Entry, 131072> Table;
+
+Entry* probe(const Position& pos);
+
+} // namespace Pawns
+
+#endif // #ifndef PAWNS_H_INCLUDED
--- a/src/perft.h
+++ b/src/perft.h
@@ -1,67 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef PERFT_H_INCLUDED
-#define PERFT_H_INCLUDED
-
-#include <cstdint>
-
-#include "movegen.h"
-#include "position.h"
-#include "types.h"
-#include "uci.h"
-
-namespace Stockfish::Benchmark {
-
-// Utility to verify move generation. All the leaf nodes up
-// to the given depth are generated and counted, and the sum is returned.
-template<bool Root>
-uint64_t perft(Position& pos, Depth depth) {
-
-    StateInfo st;
-
-    uint64_t   cnt, nodes = 0;
-    const bool leaf = (depth == 2);
-
-    for (const auto& m : MoveList<LEGAL>(pos))
-    {
-        if (Root && depth <= 1)
-            cnt = 1, nodes++;
-        else
-        {
-            pos.do_move(m, st);
-            cnt = leaf ? MoveList<LEGAL>(pos).size() : perft<false>(pos, depth - 1);
-            nodes += cnt;
-            pos.undo_move(m);
-        }
-        if (Root)
-            sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl;
-    }
-    return nodes;
-}
-
-inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) {
-    StateListPtr states(new std::deque<StateInfo>(1));
-    Position     p;
-    p.set(fen, isChess960, &states->back());
-
-    return perft<true>(p, depth);
-}
-}
-
-#endif  // PERFT_H_INCLUDED
--- a/src/position.cpp
+++ b/src/position.cpp
--- a/src/position.h
+++ b/src/position.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,352 +21,445 @@

 #include <cassert>
 #include <deque>
-#include <iosfwd>
-#include <memory>
+#include <memory> // For std::unique_ptr
 #include <string>

 #include "bitboard.h"
+#include "evaluate.h"
 #include "types.h"

-namespace Stockfish {
+#include "nnue/nnue_accumulator.h"

-class TranspositionTable;

-// StateInfo struct stores information needed to restore a Position object to
-// its previous state when we retract a move. Whenever a move is made on the
-// board (by calling Position::do_move), a StateInfo object must be passed.
+/// StateInfo struct stores information needed to restore a Position object to
+/// its previous state when we retract a move. Whenever a move is made on the
+/// board (by calling Position::do_move), a StateInfo object must be passed.

 struct StateInfo {

-    // Copied when making a move
-    Key    materialKey;
-    Key    pawnKey;
-    Key    minorPieceKey;
-    Key    nonPawnKey[COLOR_NB];
-    Value  nonPawnMaterial[COLOR_NB];
-    int    castlingRights;
-    int    rule50;
-    int    pliesFromNull;
-    Square epSquare;
+  // Copied when making a move
+  Key    pawnKey;
+  Key    materialKey;
+  Value  nonPawnMaterial[COLOR_NB];
+  int    castlingRights;
+  int    rule50;
+  int    pliesFromNull;
+  Square epSquare;

-    // Not copied when making a move (will be recomputed anyhow)
-    Key        key;
-    Bitboard   checkersBB;
-    StateInfo* previous;
-    Bitboard   blockersForKing[COLOR_NB];
-    Bitboard   pinners[COLOR_NB];
-    Bitboard   checkSquares[PIECE_TYPE_NB];
-    Piece      capturedPiece;
-    int        repetition;
+  // Not copied when making a move (will be recomputed anyhow)
+  Key        key;
+  Bitboard   checkersBB;
+  Piece      capturedPiece;
+  StateInfo* previous;
+  Bitboard   blockersForKing[COLOR_NB];
+  Bitboard   pinners[COLOR_NB];
+  Bitboard   checkSquares[PIECE_TYPE_NB];
+  int        repetition;
+
+  // Used by NNUE
+  Eval::NNUE::Accumulator accumulator;
+  DirtyPiece dirtyPiece;
 };


-// A list to keep track of the position states along the setup moves (from the
-// start position to the position just before the search starts). Needed by
-// 'draw by repetition' detection. Use a std::deque because pointers to
-// elements are not invalidated upon list resizing.
-using StateListPtr = std::unique_ptr<std::deque<StateInfo>>;
+/// A list to keep track of the position states along the setup moves (from the
+/// start position to the position just before the search starts). Needed by
+/// 'draw by repetition' detection. Use a std::deque because pointers to
+/// elements are not invalidated upon list resizing.
+typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;


-// Position class stores information regarding the board representation as
-// pieces, side to move, hash keys, castling info, etc. Important methods are
-// do_move() and undo_move(), used by the search to update node info when
-// traversing the search tree.
+/// Position class stores information regarding the board representation as
+/// pieces, side to move, hash keys, castling info, etc. Important methods are
+/// do_move() and undo_move(), used by the search to update node info when
+/// traversing the search tree.
+class Thread;
+
 class Position {
-   public:
-    static void init();
+public:
+  static void init();

-    Position()                           = default;
-    Position(const Position&)            = delete;
-    Position& operator=(const Position&) = delete;
+  Position() = default;
+  Position(const Position&) = delete;
+  Position& operator=(const Position&) = delete;

-    // FEN string input/output
-    Position&   set(const std::string& fenStr, bool isChess960, StateInfo* si);
-    Position&   set(const std::string& code, Color c, StateInfo* si);
-    std::string fen() const;
+  // FEN string input/output
+  Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th);
+  Position& set(const std::string& code, Color c, StateInfo* si);
+  const std::string fen() const;

-    // Position representation
-    Bitboard pieces() const;  // All pieces
-    template<typename... PieceTypes>
-    Bitboard pieces(PieceTypes... pts) const;
-    Bitboard pieces(Color c) const;
-    template<typename... PieceTypes>
-    Bitboard pieces(Color c, PieceTypes... pts) const;
-    Piece    piece_on(Square s) const;
-    Square   ep_square() const;
-    bool     empty(Square s) const;
-    template<PieceType Pt>
-    int count(Color c) const;
-    template<PieceType Pt>
-    int count() const;
-    template<PieceType Pt>
-    Square square(Color c) const;
+  // Position representation
+  Bitboard pieces(PieceType pt) const;
+  Bitboard pieces(PieceType pt1, PieceType pt2) const;
+  Bitboard pieces(Color c) const;
+  Bitboard pieces(Color c, PieceType pt) const;
+  Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const;
+  Piece piece_on(Square s) const;
+  Square ep_square() const;
+  bool empty(Square s) const;
+  template<PieceType Pt> int count(Color c) const;
+  template<PieceType Pt> int count() const;
+  template<PieceType Pt> const Square* squares(Color c) const;
+  template<PieceType Pt> Square square(Color c) const;
+  bool is_on_semiopen_file(Color c, Square s) const;

-    // Castling
-    CastlingRights castling_rights(Color c) const;
-    bool           can_castle(CastlingRights cr) const;
-    bool           castling_impeded(CastlingRights cr) const;
-    Square         castling_rook_square(CastlingRights cr) const;
+  // Castling
+  CastlingRights castling_rights(Color c) const;
+  bool can_castle(CastlingRights cr) const;
+  bool castling_impeded(CastlingRights cr) const;
+  Square castling_rook_square(CastlingRights cr) const;

-    // Checking
-    Bitboard checkers() const;
-    Bitboard blockers_for_king(Color c) const;
-    Bitboard check_squares(PieceType pt) const;
-    Bitboard pinners(Color c) const;
+  // Checking
+  Bitboard checkers() const;
+  Bitboard blockers_for_king(Color c) const;
+  Bitboard check_squares(PieceType pt) const;
+  bool is_discovery_check_on_king(Color c, Move m) const;

-    // Attacks to/from a given square
-    Bitboard attackers_to(Square s) const;
-    Bitboard attackers_to(Square s, Bitboard occupied) const;
-    bool     attackers_to_exist(Square s, Bitboard occupied, Color c) const;
-    void     update_slider_blockers(Color c) const;
-    template<PieceType Pt>
-    Bitboard attacks_by(Color c) const;
+  // Attacks to/from a given square
+  Bitboard attackers_to(Square s) const;
+  Bitboard attackers_to(Square s, Bitboard occupied) const;
+  Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const;

-    // Properties of moves
-    bool  legal(Move m) const;
-    bool  pseudo_legal(const Move m) const;
-    bool  capture(Move m) const;
-    bool  capture_stage(Move m) const;
-    bool  gives_check(Move m) const;
-    Piece moved_piece(Move m) const;
-    Piece captured_piece() const;
+  // Properties of moves
+  bool legal(Move m) const;
+  bool pseudo_legal(const Move m) const;
+  bool capture(Move m) const;
+  bool capture_or_promotion(Move m) const;
+  bool gives_check(Move m) const;
+  bool advanced_pawn_push(Move m) const;
+  Piece moved_piece(Move m) const;
+  Piece captured_piece() const;

-    // Doing and undoing moves
-    void       do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
-    DirtyPiece do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
-    void       undo_move(Move m);
-    void       do_null_move(StateInfo& newSt, const TranspositionTable& tt);
-    void       undo_null_move();
+  // Piece specific
+  bool pawn_passed(Color c, Square s) const;
+  bool opposite_bishops() const;
+  int  pawns_on_same_color_squares(Color c, Square s) const;

-    // Static Exchange Evaluation
-    bool see_ge(Move m, int threshold = 0) const;
+  // Doing and undoing moves
+  void do_move(Move m, StateInfo& newSt);
+  void do_move(Move m, StateInfo& newSt, bool givesCheck);
+  void undo_move(Move m);
+  void do_null_move(StateInfo& newSt);
+  void undo_null_move();

-    // Accessing hash keys
-    Key key() const;
-    Key material_key() const;
-    Key pawn_key() const;
-    Key minor_piece_key() const;
-    Key non_pawn_key(Color c) const;
+  // Static Exchange Evaluation
+  bool see_ge(Move m, Value threshold = VALUE_ZERO) const;

-    // Other properties of the position
-    Color side_to_move() const;
-    int   game_ply() const;
-    bool  is_chess960() const;
-    bool  is_draw(int ply) const;
-    bool  is_repetition(int ply) const;
-    bool  upcoming_repetition(int ply) const;
-    bool  has_repeated() const;
-    int   rule50_count() const;
-    Value non_pawn_material(Color c) const;
-    Value non_pawn_material() const;
+  // Accessing hash keys
+  Key key() const;
+  Key key_after(Move m) const;
+  Key material_key() const;
+  Key pawn_key() const;

-    // Position consistency check, for debugging
-    bool pos_is_ok() const;
-    void flip();
+  // Other properties of the position
+  Color side_to_move() const;
+  int game_ply() const;
+  bool is_chess960() const;
+  Thread* this_thread() const;
+  bool is_draw(int ply) const;
+  bool has_game_cycle(int ply) const;
+  bool has_repeated() const;
+  int rule50_count() const;
+  Score psq_score() const;
+  Value non_pawn_material(Color c) const;
+  Value non_pawn_material() const;

-    StateInfo* state() const;
+  // Position consistency check, for debugging
+  bool pos_is_ok() const;
+  void flip();

-    void put_piece(Piece pc, Square s);
-    void remove_piece(Square s);
+  // Used by NNUE
+  StateInfo* state() const;
+  const EvalList* eval_list() const;

-   private:
-    // Initialization helpers (used while setting up a position)
-    void set_castling_right(Color c, Square rfrom);
-    void set_state() const;
-    void set_check_info() const;
+private:
+  // Initialization helpers (used while setting up a position)
+  void set_castling_right(Color c, Square rfrom);
+  void set_state(StateInfo* si) const;
+  void set_check_info(StateInfo* si) const;

-    // Other helpers
-    void move_piece(Square from, Square to);
-    template<bool Do>
-    void do_castling(Color             us,
-                     Square            from,
-                     Square&           to,
-                     Square&           rfrom,
-                     Square&           rto,
-                     DirtyPiece* const dp = nullptr);
-    template<bool AfterMove>
-    Key adjust_key50(Key k) const;
+  // Other helpers
+  void put_piece(Piece pc, Square s);
+  void remove_piece(Square s);
+  void move_piece(Square from, Square to);
+  template<bool Do>
+  void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);

-    // Data members
-    Piece      board[SQUARE_NB];
-    Bitboard   byTypeBB[PIECE_TYPE_NB];
-    Bitboard   byColorBB[COLOR_NB];
-    int        pieceCount[PIECE_NB];
-    int        castlingRightsMask[SQUARE_NB];
-    Square     castlingRookSquare[CASTLING_RIGHT_NB];
-    Bitboard   castlingPath[CASTLING_RIGHT_NB];
-    StateInfo* st;
-    int        gamePly;
-    Color      sideToMove;
-    bool       chess960;
+  // ID of a piece on a given square
+  PieceId piece_id_on(Square sq) const;
+
+  // Data members
+  Piece board[SQUARE_NB];
+  Bitboard byTypeBB[PIECE_TYPE_NB];
+  Bitboard byColorBB[COLOR_NB];
+  int pieceCount[PIECE_NB];
+  Square pieceList[PIECE_NB][16];
+  int index[SQUARE_NB];
+  int castlingRightsMask[SQUARE_NB];
+  Square castlingRookSquare[CASTLING_RIGHT_NB];
+  Bitboard castlingPath[CASTLING_RIGHT_NB];
+  int gamePly;
+  Color sideToMove;
+  Score psq;
+  Thread* thisThread;
+  StateInfo* st;
+  bool chess960;
+
+  // List of pieces used in NNUE evaluation function
+  EvalList evalList;
 };

-std::ostream& operator<<(std::ostream& os, const Position& pos);
+namespace PSQT {
+  extern Score psq[PIECE_NB][SQUARE_NB];
+}

-inline Color Position::side_to_move() const { return sideToMove; }
+extern std::ostream& operator<<(std::ostream& os, const Position& pos);
+
+inline Color Position::side_to_move() const {
+  return sideToMove;
+}

 inline Piece Position::piece_on(Square s) const {
-    assert(is_ok(s));
-    return board[s];
+  assert(is_ok(s));
+  return board[s];
 }

-inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; }
-
-inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); }
-
-inline Bitboard Position::pieces() const { return byTypeBB[ALL_PIECES]; }
-
-template<typename... PieceTypes>
-inline Bitboard Position::pieces(PieceTypes... pts) const {
-    return (byTypeBB[pts] | ...);
+inline bool Position::empty(Square s) const {
+  return piece_on(s) == NO_PIECE;
 }

-inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; }
-
-template<typename... PieceTypes>
-inline Bitboard Position::pieces(Color c, PieceTypes... pts) const {
-    return pieces(c) & pieces(pts...);
+inline Piece Position::moved_piece(Move m) const {
+  return piece_on(from_sq(m));
 }

-template<PieceType Pt>
-inline int Position::count(Color c) const {
-    return pieceCount[make_piece(c, Pt)];
+inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const {
+  return byTypeBB[pt];
 }

-template<PieceType Pt>
-inline int Position::count() const {
-    return count<Pt>(WHITE) + count<Pt>(BLACK);
+inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const {
+  return pieces(pt1) | pieces(pt2);
 }

-template<PieceType Pt>
-inline Square Position::square(Color c) const {
-    assert(count<Pt>(c) == 1);
-    return lsb(pieces(c, Pt));
+inline Bitboard Position::pieces(Color c) const {
+  return byColorBB[c];
 }

-inline Square Position::ep_square() const { return st->epSquare; }
+inline Bitboard Position::pieces(Color c, PieceType pt) const {
+  return pieces(c) & pieces(pt);
+}

-inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; }
+inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const {
+  return pieces(c) & (pieces(pt1) | pieces(pt2));
+}
+
+template<PieceType Pt> inline int Position::count(Color c) const {
+  return pieceCount[make_piece(c, Pt)];
+}
+
+template<PieceType Pt> inline int Position::count() const {
+  return count<Pt>(WHITE) + count<Pt>(BLACK);
+}
+
+template<PieceType Pt> inline const Square* Position::squares(Color c) const {
+  return pieceList[make_piece(c, Pt)];
+}
+
+template<PieceType Pt> inline Square Position::square(Color c) const {
+  assert(pieceCount[make_piece(c, Pt)] == 1);
+  return squares<Pt>(c)[0];
+}
+
+inline Square Position::ep_square() const {
+  return st->epSquare;
+}
+
+inline bool Position::is_on_semiopen_file(Color c, Square s) const {
+  return !(pieces(c, PAWN) & file_bb(s));
+}
+
+inline bool Position::can_castle(CastlingRights cr) const {
+  return st->castlingRights & cr;
+}

 inline CastlingRights Position::castling_rights(Color c) const {
-    return c & CastlingRights(st->castlingRights);
+  return c & CastlingRights(st->castlingRights);
 }

 inline bool Position::castling_impeded(CastlingRights cr) const {
-    assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
-    return pieces() & castlingPath[cr];
+  assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
+
+  return pieces() & castlingPath[cr];
 }

 inline Square Position::castling_rook_square(CastlingRights cr) const {
-    assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
-    return castlingRookSquare[cr];
+  assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
+
+  return castlingRookSquare[cr];
 }

-inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); }
-
-template<PieceType Pt>
-inline Bitboard Position::attacks_by(Color c) const {
-
-    if constexpr (Pt == PAWN)
-        return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
-                          : pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
-    else
-    {
-        Bitboard threats   = 0;
-        Bitboard attackers = pieces(c, Pt);
-        while (attackers)
-            threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
-        return threats;
-    }
+inline Bitboard Position::attackers_to(Square s) const {
+  return attackers_to(s, pieces());
 }

-inline Bitboard Position::checkers() const { return st->checkersBB; }
-
-inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; }
-
-inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; }
-
-inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; }
-
-inline Key Position::key() const { return adjust_key50<false>(st->key); }
-
-template<bool AfterMove>
-inline Key Position::adjust_key50(Key k) const {
-    return st->rule50 < 14 - AfterMove ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8);
+inline Bitboard Position::checkers() const {
+  return st->checkersBB;
 }

-inline Key Position::pawn_key() const { return st->pawnKey; }
+inline Bitboard Position::blockers_for_king(Color c) const {
+  return st->blockersForKing[c];
+}

-inline Key Position::material_key() const { return st->materialKey; }
+inline Bitboard Position::check_squares(PieceType pt) const {
+  return st->checkSquares[pt];
+}

-inline Key Position::minor_piece_key() const { return st->minorPieceKey; }
+inline bool Position::is_discovery_check_on_king(Color c, Move m) const {
+  return st->blockersForKing[c] & from_sq(m);
+}

-inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; }
+inline bool Position::pawn_passed(Color c, Square s) const {
+  return !(pieces(~c, PAWN) & passed_pawn_span(c, s));
+}

-inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; }
+inline bool Position::advanced_pawn_push(Move m) const {
+  return   type_of(moved_piece(m)) == PAWN
+        && relative_rank(sideToMove, to_sq(m)) > RANK_5;
+}
+
+inline int Position::pawns_on_same_color_squares(Color c, Square s) const {
+  return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares));
+}
+
+inline Key Position::key() const {
+  return st->key;
+}
+
+inline Key Position::pawn_key() const {
+  return st->pawnKey;
+}
+
+inline Key Position::material_key() const {
+  return st->materialKey;
+}
+
+inline Score Position::psq_score() const {
+  return psq;
+}
+
+inline Value Position::non_pawn_material(Color c) const {
+  return st->nonPawnMaterial[c];
+}

 inline Value Position::non_pawn_material() const {
-    return non_pawn_material(WHITE) + non_pawn_material(BLACK);
+  return non_pawn_material(WHITE) + non_pawn_material(BLACK);
 }

-inline int Position::game_ply() const { return gamePly; }
+inline int Position::game_ply() const {
+  return gamePly;
+}

-inline int Position::rule50_count() const { return st->rule50; }
+inline int Position::rule50_count() const {
+  return st->rule50;
+}

-inline bool Position::is_chess960() const { return chess960; }
+inline bool Position::opposite_bishops() const {
+  return   count<BISHOP>(WHITE) == 1
+        && count<BISHOP>(BLACK) == 1
+        && opposite_colors(square<BISHOP>(WHITE), square<BISHOP>(BLACK));
+}
+
+inline bool Position::is_chess960() const {
+  return chess960;
+}
+
+inline bool Position::capture_or_promotion(Move m) const {
+  assert(is_ok(m));
+  return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
+}

 inline bool Position::capture(Move m) const {
-    assert(m.is_ok());
-    return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT;
+  assert(is_ok(m));
+  // Castling is encoded as "king captures rook"
+  return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == ENPASSANT;
 }

-// Returns true if a move is generated from the capture stage, having also
-// queen promotions covered, i.e. consistency with the capture stage move
-// generation is needed to avoid the generation of duplicate moves.
-inline bool Position::capture_stage(Move m) const {
-    assert(m.is_ok());
-    return capture(m) || m.promotion_type() == QUEEN;
+inline Piece Position::captured_piece() const {
+  return st->capturedPiece;
 }

-inline Piece Position::captured_piece() const { return st->capturedPiece; }
+inline Thread* Position::this_thread() const {
+  return thisThread;
+}

 inline void Position::put_piece(Piece pc, Square s) {

-    board[s] = pc;
-    byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s;
-    byColorBB[color_of(pc)] |= s;
-    pieceCount[pc]++;
-    pieceCount[make_piece(color_of(pc), ALL_PIECES)]++;
+  board[s] = pc;
+  byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s;
+  byColorBB[color_of(pc)] |= s;
+  index[s] = pieceCount[pc]++;
+  pieceList[pc][index[s]] = s;
+  pieceCount[make_piece(color_of(pc), ALL_PIECES)]++;
+  psq += PSQT::psq[pc][s];
 }

 inline void Position::remove_piece(Square s) {

-    Piece pc = board[s];
-    byTypeBB[ALL_PIECES] ^= s;
-    byTypeBB[type_of(pc)] ^= s;
-    byColorBB[color_of(pc)] ^= s;
-    board[s] = NO_PIECE;
-    pieceCount[pc]--;
-    pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
+  // WARNING: This is not a reversible operation. If we remove a piece in
+  // do_move() and then replace it in undo_move() we will put it at the end of
+  // the list and not in its original place, it means index[] and pieceList[]
+  // are not invariant to a do_move() + undo_move() sequence.
+  Piece pc = board[s];
+  byTypeBB[ALL_PIECES] ^= s;
+  byTypeBB[type_of(pc)] ^= s;
+  byColorBB[color_of(pc)] ^= s;
+  /* board[s] = NO_PIECE;  Not needed, overwritten by the capturing one */
+  Square lastSquare = pieceList[pc][--pieceCount[pc]];
+  index[lastSquare] = index[s];
+  pieceList[pc][index[lastSquare]] = lastSquare;
+  pieceList[pc][pieceCount[pc]] = SQ_NONE;
+  pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
+  psq -= PSQT::psq[pc][s];
 }

 inline void Position::move_piece(Square from, Square to) {

-    Piece    pc     = board[from];
-    Bitboard fromTo = from | to;
-    byTypeBB[ALL_PIECES] ^= fromTo;
-    byTypeBB[type_of(pc)] ^= fromTo;
-    byColorBB[color_of(pc)] ^= fromTo;
-    board[from] = NO_PIECE;
-    board[to]   = pc;
+  // index[from] is not updated and becomes stale. This works as long as index[]
+  // is accessed just by known occupied squares.
+  Piece pc = board[from];
+  Bitboard fromTo = from | to;
+  byTypeBB[ALL_PIECES] ^= fromTo;
+  byTypeBB[type_of(pc)] ^= fromTo;
+  byColorBB[color_of(pc)] ^= fromTo;
+  board[from] = NO_PIECE;
+  board[to] = pc;
+  index[to] = index[from];
+  pieceList[pc][index[to]] = to;
+  psq += PSQT::psq[pc][to] - PSQT::psq[pc][from];
 }

-inline void Position::do_move(Move m, StateInfo& newSt, const TranspositionTable* tt = nullptr) {
-    do_move(m, newSt, gives_check(m), tt);
+inline void Position::do_move(Move m, StateInfo& newSt) {
+  do_move(m, newSt, gives_check(m));
 }

-inline StateInfo* Position::state() const { return st; }
+inline StateInfo* Position::state() const {

-}  // namespace Stockfish
+  return st;
+}

-#endif  // #ifndef POSITION_H_INCLUDED
+inline const EvalList* Position::eval_list() const {
+
+  return &evalList;
+}
+
+inline PieceId Position::piece_id_on(Square sq) const
+{
+
+  assert(piece_on(sq) != NO_PIECE);
+
+  PieceId pid = evalList.piece_id_list[sq];
+  assert(is_ok(pid));
+
+  return pid;
+}
+
+#endif // #ifndef POSITION_H_INCLUDED
--- a/src/psqt.cpp
+++ b/src/psqt.cpp
@@ -0,0 +1,122 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+
+#include "types.h"
+#include "bitboard.h"
+
+namespace PSQT {
+
+#define S(mg, eg) make_score(mg, eg)
+
+// Bonus[PieceType][Square / 2] contains Piece-Square scores. For each piece
+// type on a given square a (middlegame, endgame) score pair is assigned. Table
+// is defined for files A..D and white side: it is symmetric for black side and
+// second half of the files.
+constexpr Score Bonus[][RANK_NB][int(FILE_NB) / 2] = {
+  { },
+  { },
+  { // Knight
+   { S(-175, -96), S(-92,-65), S(-74,-49), S(-73,-21) },
+   { S( -77, -67), S(-41,-54), S(-27,-18), S(-15,  8) },
+   { S( -61, -40), S(-17,-27), S(  6, -8), S( 12, 29) },
+   { S( -35, -35), S(  8, -2), S( 40, 13), S( 49, 28) },
+   { S( -34, -45), S( 13,-16), S( 44,  9), S( 51, 39) },
+   { S(  -9, -51), S( 22,-44), S( 58,-16), S( 53, 17) },
+   { S( -67, -69), S(-27,-50), S(  4,-51), S( 37, 12) },
+   { S(-201,-100), S(-83,-88), S(-56,-56), S(-26,-17) }
+  },
+  { // Bishop
+   { S(-53,-57), S( -5,-30), S( -8,-37), S(-23,-12) },
+   { S(-15,-37), S(  8,-13), S( 19,-17), S(  4,  1) },
+   { S( -7,-16), S( 21, -1), S( -5, -2), S( 17, 10) },
+   { S( -5,-20), S( 11, -6), S( 25,  0), S( 39, 17) },
+   { S(-12,-17), S( 29, -1), S( 22,-14), S( 31, 15) },
+   { S(-16,-30), S(  6,  6), S(  1,  4), S( 11,  6) },
+   { S(-17,-31), S(-14,-20), S(  5, -1), S(  0,  1) },
+   { S(-48,-46), S(  1,-42), S(-14,-37), S(-23,-24) }
+  },
+  { // Rook
+   { S(-31, -9), S(-20,-13), S(-14,-10), S(-5, -9) },
+   { S(-21,-12), S(-13, -9), S( -8, -1), S( 6, -2) },
+   { S(-25,  6), S(-11, -8), S( -1, -2), S( 3, -6) },
+   { S(-13, -6), S( -5,  1), S( -4, -9), S(-6,  7) },
+   { S(-27, -5), S(-15,  8), S( -4,  7), S( 3, -6) },
+   { S(-22,  6), S( -2,  1), S(  6, -7), S(12, 10) },
+   { S( -2,  4), S( 12,  5), S( 16, 20), S(18, -5) },
+   { S(-17, 18), S(-19,  0), S( -1, 19), S( 9, 13) }
+  },
+  { // Queen
+   { S( 3,-69), S(-5,-57), S(-5,-47), S( 4,-26) },
+   { S(-3,-55), S( 5,-31), S( 8,-22), S(12, -4) },
+   { S(-3,-39), S( 6,-18), S(13, -9), S( 7,  3) },
+   { S( 4,-23), S( 5, -3), S( 9, 13), S( 8, 24) },
+   { S( 0,-29), S(14, -6), S(12,  9), S( 5, 21) },
+   { S(-4,-38), S(10,-18), S( 6,-12), S( 8,  1) },
+   { S(-5,-50), S( 6,-27), S(10,-24), S( 8, -8) },
+   { S(-2,-75), S(-2,-52), S( 1,-43), S(-2,-36) }
+  },
+  { // King
+   { S(271,  1), S(327, 45), S(271, 85), S(198, 76) },
+   { S(278, 53), S(303,100), S(234,133), S(179,135) },
+   { S(195, 88), S(258,130), S(169,169), S(120,175) },
+   { S(164,103), S(190,156), S(138,172), S( 98,172) },
+   { S(154, 96), S(179,166), S(105,199), S( 70,199) },
+   { S(123, 92), S(145,172), S( 81,184), S( 31,191) },
+   { S( 88, 47), S(120,121), S( 65,116), S( 33,131) },
+   { S( 59, 11), S( 89, 59), S( 45, 73), S( -1, 78) }
+  }
+};
+
+constexpr Score PBonus[RANK_NB][FILE_NB] =
+  { // Pawn (asymmetric distribution)
+   { },
+   { S(  3,-10), S(  3, -6), S( 10, 10), S( 19,  0), S( 16, 14), S( 19,  7), S(  7, -5), S( -5,-19) },
+   { S( -9,-10), S(-15,-10), S( 11,-10), S( 15,  4), S( 32,  4), S( 22,  3), S(  5, -6), S(-22, -4) },
+   { S( -4,  6), S(-23, -2), S(  6, -8), S( 20, -4), S( 40,-13), S( 17,-12), S(  4,-10), S( -8, -9) },
+   { S( 13, 10), S(  0,  5), S(-13,  4), S(  1, -5), S( 11, -5), S( -2, -5), S(-13, 14), S(  5,  9) },
+   { S(  5, 28), S(-12, 20), S( -7, 21), S( 22, 28), S( -8, 30), S( -5,  7), S(-15,  6), S( -8, 13) },
+   { S( -7,  0), S(  7,-11), S( -3, 12), S(-13, 21), S(  5, 25), S(-16, 19), S( 10,  4), S( -8,  7) }
+  };
+
+#undef S
+
+Score psq[PIECE_NB][SQUARE_NB];
+
+
+// PSQT::init() initializes piece-square tables: the white halves of the tables are
+// copied from Bonus[] and PBonus[], adding the piece value, then the black halves of
+// the tables are initialized by flipping and changing the sign of the white scores.
+void init() {
+
+  for (Piece pc : {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING})
+  {
+      Score score = make_score(PieceValue[MG][pc], PieceValue[EG][pc]);
+
+      for (Square s = SQ_A1; s <= SQ_H8; ++s)
+      {
+          File f = File(edge_distance(file_of(s)));
+          psq[ pc][s] = score + (type_of(pc) == PAWN ? PBonus[rank_of(s)][file_of(s)]
+                                                     : Bonus[pc][rank_of(s)][f]);
+          psq[~pc][flip_rank(s)] = -psq[pc][s];
+      }
+  }
+}
+
+} // namespace PSQT
--- a/src/score.cpp
+++ b/src/score.cpp
@@ -1,48 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "score.h"
-
-#include <cassert>
-#include <cmath>
-#include <cstdlib>
-
-#include "uci.h"
-
-namespace Stockfish {
-
-Score::Score(Value v, const Position& pos) {
-    assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);
-
-    if (!is_decisive(v))
-    {
-        score = InternalUnits{UCIEngine::to_cp(v, pos)};
-    }
-    else if (std::abs(v) <= VALUE_TB)
-    {
-        auto distance = VALUE_TB - std::abs(v);
-        score         = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false};
-    }
-    else
-    {
-        auto distance = VALUE_MATE - std::abs(v);
-        score         = (v > 0) ? Mate{distance} : Mate{-distance};
-    }
-}
-
-}
--- a/src/score.h
+++ b/src/score.h
@@ -1,70 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef SCORE_H_INCLUDED
-#define SCORE_H_INCLUDED
-
-#include <variant>
-#include <utility>
-
-#include "types.h"
-
-namespace Stockfish {
-
-class Position;
-
-class Score {
-   public:
-    struct Mate {
-        int plies;
-    };
-
-    struct Tablebase {
-        int  plies;
-        bool win;
-    };
-
-    struct InternalUnits {
-        int value;
-    };
-
-    Score() = default;
-    Score(Value v, const Position& pos);
-
-    template<typename T>
-    bool is() const {
-        return std::holds_alternative<T>(score);
-    }
-
-    template<typename T>
-    T get() const {
-        return std::get<T>(score);
-    }
-
-    template<typename F>
-    decltype(auto) visit(F&& f) const {
-        return std::visit(std::forward<F>(f), score);
-    }
-
-   private:
-    std::variant<Mate, Tablebase, InternalUnits> score;
-};
-
-}
-
-#endif  // #ifndef SCORE_H_INCLUDED
--- a/src/search.cpp
+++ b/src/search.cpp
--- a/src/search.h
+++ b/src/search.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,358 +19,90 @@
 #ifndef SEARCH_H_INCLUDED
 #define SEARCH_H_INCLUDED

-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <string>
-#include <string_view>
 #include <vector>

-#include "history.h"
 #include "misc.h"
-#include "nnue/network.h"
-#include "nnue/nnue_accumulator.h"
-#include "numa.h"
-#include "position.h"
-#include "score.h"
-#include "syzygy/tbprobe.h"
-#include "timeman.h"
+#include "movepick.h"
 #include "types.h"

-namespace Stockfish {
-
-// Different node types, used as a template parameter
-enum NodeType {
-    NonPV,
-    PV,
-    Root
-};
-
-class TranspositionTable;
-class ThreadPool;
-class OptionsMap;
+class Position;

 namespace Search {

-// Stack struct keeps track of the information we need to remember from nodes
-// shallower and deeper in the tree during the search. Each search thread has
-// its own array of Stack objects, indexed by the current ply.
+/// Threshold used for countermoves based pruning
+constexpr int CounterMovePruneThreshold = 0;
+
+
+/// Stack struct keeps track of the information we need to remember from nodes
+/// shallower and deeper in the tree during the search. Each search thread has
+/// its own array of Stack objects, indexed by the current ply.
+
 struct Stack {
-    Move*                       pv;
-    PieceToHistory*             continuationHistory;
-    CorrectionHistory<PieceTo>* continuationCorrectionHistory;
-    int                         ply;
-    Move                        currentMove;
-    Move                        excludedMove;
-    Value                       staticEval;
-    int                         statScore;
-    int                         moveCount;
-    bool                        inCheck;
-    bool                        ttPv;
-    bool                        ttHit;
-    int                         cutoffCnt;
-    int                         reduction;
-    bool                        isPvNode;
-    int                         quietMoveStreak;
+  Move* pv;
+  PieceToHistory* continuationHistory;
+  int ply;
+  Move currentMove;
+  Move excludedMove;
+  Move killers[2];
+  Value staticEval;
+  int statScore;
+  int moveCount;
+  bool inCheck;
 };


-// RootMove struct is used for moves at the root of the tree. For each root move
-// we store a score and a PV (really a refutation in the case of moves which
-// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves.
+/// RootMove struct is used for moves at the root of the tree. For each root move
+/// we store a score and a PV (really a refutation in the case of moves which
+/// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves.
+
 struct RootMove {

-    explicit RootMove(Move m) :
-        pv(1, m) {}
-    bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos);
-    bool operator==(const Move& m) const { return pv[0] == m; }
-    // Sort in descending order
-    bool operator<(const RootMove& m) const {
-        return m.score != score ? m.score < score : m.previousScore < previousScore;
-    }
+  explicit RootMove(Move m) : pv(1, m) {}
+  bool extract_ponder_from_tt(Position& pos);
+  bool operator==(const Move& m) const { return pv[0] == m; }
+  bool operator<(const RootMove& m) const { // Sort in descending order
+    return m.score != score ? m.score < score
+                            : m.previousScore < previousScore;
+  }

-    uint64_t          effort           = 0;
-    Value             score            = -VALUE_INFINITE;
-    Value             previousScore    = -VALUE_INFINITE;
-    Value             averageScore     = -VALUE_INFINITE;
-    Value             meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE;
-    Value             uciScore         = -VALUE_INFINITE;
-    bool              scoreLowerbound  = false;
-    bool              scoreUpperbound  = false;
-    int               selDepth         = 0;
-    int               tbRank           = 0;
-    Value             tbScore;
-    std::vector<Move> pv;
+  Value score = -VALUE_INFINITE;
+  Value previousScore = -VALUE_INFINITE;
+  int selDepth = 0;
+  int tbRank = 0;
+  int bestMoveCount = 0;
+  Value tbScore;
+  std::vector<Move> pv;
 };

-using RootMoves = std::vector<RootMove>;
+typedef std::vector<RootMove> RootMoves;


-// LimitsType struct stores information sent by the caller about the analysis required.
+/// LimitsType struct stores information sent by GUI about available time to
+/// search the current move, maximum depth/time, or if we are in analysis mode.
+
 struct LimitsType {

-    // Init explicitly due to broken value-initialization of non POD in MSVC
-    LimitsType() {
-        time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
-        movestogo = depth = mate = perft = infinite = 0;
-        nodes                                       = 0;
-        ponderMode                                  = false;
-    }
+  LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC
+    time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
+    movestogo = depth = mate = perft = infinite = 0;
+    nodes = 0;
+  }

-    bool use_time_management() const { return time[WHITE] || time[BLACK]; }
+  bool use_time_management() const {
+    return time[WHITE] || time[BLACK];
+  }

-    std::vector<std::string> searchmoves;
-    TimePoint                time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
-    int                      movestogo, depth, mate, perft, infinite;
-    uint64_t                 nodes;
-    bool                     ponderMode;
+  std::vector<Move> searchmoves;
+  TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
+  int movestogo, depth, mate, perft, infinite;
+  int64_t nodes;
 };

+extern LimitsType Limits;

-// The UCI stores the uci options, thread pool, and transposition table.
-// This struct is used to easily forward data to the Search::Worker class.
-struct SharedState {
-    SharedState(const OptionsMap&                               optionsMap,
-                ThreadPool&                                     threadPool,
-                TranspositionTable&                             transpositionTable,
-                const LazyNumaReplicated<Eval::NNUE::Networks>& nets) :
-        options(optionsMap),
-        threads(threadPool),
-        tt(transpositionTable),
-        networks(nets) {}
+void init();
+void clear();

-    const OptionsMap&                               options;
-    ThreadPool&                                     threads;
-    TranspositionTable&                             tt;
-    const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
-};
+} // namespace Search

-class Worker;
-
-// Null Object Pattern, implement a common interface for the SearchManagers.
-// A Null Object will be given to non-mainthread workers.
-class ISearchManager {
-   public:
-    virtual ~ISearchManager() {}
-    virtual void check_time(Search::Worker&) = 0;
-};
-
-struct InfoShort {
-    int   depth;
-    Score score;
-};
-
-struct InfoFull: InfoShort {
-    int              selDepth;
-    size_t           multiPV;
-    std::string_view wdl;
-    std::string_view bound;
-    size_t           timeMs;
-    size_t           nodes;
-    size_t           nps;
-    size_t           tbHits;
-    std::string_view pv;
-    int              hashfull;
-};
-
-struct InfoIteration {
-    int              depth;
-    std::string_view currmove;
-    size_t           currmovenumber;
-};
-
-// Skill structure is used to implement strength limit. If we have a UCI_Elo,
-// we convert it to an appropriate skill level, anchored to the Stash engine.
-// This method is based on a fit of the Elo results for games played between
-// Stockfish at various skill levels and various versions of the Stash engine.
-// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately
-// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2
-struct Skill {
-    // Lowest and highest Elo ratings used in the skill level calculation
-    constexpr static int LowestElo  = 1320;
-    constexpr static int HighestElo = 3190;
-
-    Skill(int skill_level, int uci_elo) {
-        if (uci_elo)
-        {
-            double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo);
-            level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
-        }
-        else
-            level = double(skill_level);
-    }
-    bool enabled() const { return level < 20.0; }
-    bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
-    Move pick_best(const RootMoves&, size_t multiPV);
-
-    double level;
-    Move   best = Move::none();
-};
-
-// SearchManager manages the search from the main thread. It is responsible for
-// keeping track of the time, and storing data strictly related to the main thread.
-class SearchManager: public ISearchManager {
-   public:
-    using UpdateShort    = std::function<void(const InfoShort&)>;
-    using UpdateFull     = std::function<void(const InfoFull&)>;
-    using UpdateIter     = std::function<void(const InfoIteration&)>;
-    using UpdateBestmove = std::function<void(std::string_view, std::string_view)>;
-
-    struct UpdateContext {
-        UpdateShort    onUpdateNoMoves;
-        UpdateFull     onUpdateFull;
-        UpdateIter     onIter;
-        UpdateBestmove onBestmove;
-    };
-
-
-    SearchManager(const UpdateContext& updateContext) :
-        updates(updateContext) {}
-
-    void check_time(Search::Worker& worker) override;
-
-    void pv(Search::Worker&           worker,
-            const ThreadPool&         threads,
-            const TranspositionTable& tt,
-            Depth                     depth);
-
-    Stockfish::TimeManagement tm;
-    double                    originalTimeAdjust;
-    int                       callsCnt;
-    std::atomic_bool          ponder;
-
-    std::array<Value, 4> iterValue;
-    double               previousTimeReduction;
-    Value                bestPreviousScore;
-    Value                bestPreviousAverageScore;
-    bool                 stopOnPonderhit;
-
-    size_t id;
-
-    const UpdateContext& updates;
-};
-
-class NullSearchManager: public ISearchManager {
-   public:
-    void check_time(Search::Worker&) override {}
-};
-
-
-// Search::Worker is the class that does the actual search.
-// It is instantiated once per thread, and it is responsible for keeping track
-// of the search history, and storing data required for the search.
-class Worker {
-   public:
-    Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
-
-    // Called at instantiation to initialize reductions tables.
-    // Reset histories, usually before a new game.
-    void clear();
-
-    // Called when the program receives the UCI 'go' command.
-    // It searches from the root position and outputs the "bestmove".
-    void start_searching();
-
-    bool is_mainthread() const { return threadIdx == 0; }
-
-    void ensure_network_replicated();
-
-    // Public because they need to be updatable by the stats
-    ButterflyHistory mainHistory;
-    LowPlyHistory    lowPlyHistory;
-
-    CapturePieceToHistory captureHistory;
-    ContinuationHistory   continuationHistory[2][2];
-    PawnHistory           pawnHistory;
-
-    CorrectionHistory<Pawn>         pawnCorrectionHistory;
-    CorrectionHistory<Minor>        minorPieceCorrectionHistory;
-    CorrectionHistory<NonPawn>      nonPawnCorrectionHistory;
-    CorrectionHistory<Continuation> continuationCorrectionHistory;
-
-    TTMoveHistory ttMoveHistory;
-
-   private:
-    void iterative_deepening();
-
-    void do_move(Position& pos, const Move move, StateInfo& st);
-    void do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck);
-    void do_null_move(Position& pos, StateInfo& st);
-    void undo_move(Position& pos, const Move move);
-    void undo_null_move(Position& pos);
-
-    // This is the main search function, for both PV and non-PV nodes
-    template<NodeType nodeType>
-    Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
-
-    // Quiescence search function, which is called by the main search
-    template<NodeType nodeType>
-    Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta);
-
-    Depth reduction(bool i, Depth d, int mn, int delta) const;
-
-    // Pointer to the search manager, only allowed to be called by the main thread
-    SearchManager* main_manager() const {
-        assert(threadIdx == 0);
-        return static_cast<SearchManager*>(manager.get());
-    }
-
-    TimePoint elapsed() const;
-    TimePoint elapsed_time() const;
-
-    Value evaluate(const Position&);
-
-    LimitsType limits;
-
-    size_t                pvIdx, pvLast;
-    std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
-    int                   selDepth, nmpMinPly;
-
-    Value optimism[COLOR_NB];
-
-    Position  rootPos;
-    StateInfo rootState;
-    RootMoves rootMoves;
-    Depth     rootDepth, completedDepth;
-    Value     rootDelta;
-
-    size_t                    threadIdx;
-    NumaReplicatedAccessToken numaAccessToken;
-
-    // Reductions lookup table initialized at startup
-    std::array<int, MAX_MOVES> reductions;  // [depth or moveNumber]
-
-    // The main thread has a SearchManager, the others have a NullSearchManager
-    std::unique_ptr<ISearchManager> manager;
-
-    Tablebases::Config tbConfig;
-
-    const OptionsMap&                               options;
-    ThreadPool&                                     threads;
-    TranspositionTable&                             tt;
-    const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
-
-    // Used by NNUE
-    Eval::NNUE::AccumulatorStack  accumulatorStack;
-    Eval::NNUE::AccumulatorCaches refreshTable;
-
-    friend class Stockfish::ThreadPool;
-    friend class SearchManager;
-};
-
-struct ConthistBonus {
-    int index;
-    int weight;
-};
-
-
-}  // namespace Search
-
-}  // namespace Stockfish
-
-#endif  // #ifndef SEARCH_H_INCLUDED
+#endif // #ifndef SEARCH_H_INCLUDED
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
--- a/src/syzygy/tbprobe.h
+++ b/src/syzygy/tbprobe.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,60 +19,60 @@
 #ifndef TBPROBE_H
 #define TBPROBE_H

-#include <string>
-#include <vector>
+#include <ostream>

+#include "../search.h"

-namespace Stockfish {
-class Position;
-class OptionsMap;
-
-using Depth = int;
-
-namespace Search {
-struct RootMove;
-using RootMoves = std::vector<RootMove>;
-}
-}
-
-namespace Stockfish::Tablebases {
-
-struct Config {
-    int   cardinality = 0;
-    bool  rootInTB    = false;
-    bool  useRule50   = false;
-    Depth probeDepth  = 0;
-};
+namespace Tablebases {

 enum WDLScore {
-    WDLLoss        = -2,  // Loss
-    WDLBlessedLoss = -1,  // Loss, but draw under 50-move rule
-    WDLDraw        = 0,   // Draw
-    WDLCursedWin   = 1,   // Win, but draw under 50-move rule
-    WDLWin         = 2,   // Win
+    WDLLoss        = -2, // Loss
+    WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
+    WDLDraw        =  0, // Draw
+    WDLCursedWin   =  1, // Win, but draw under 50-move rule
+    WDLWin         =  2, // Win
+
+    WDLScoreNone  = -1000
 };

 // Possible states after a probing operation
 enum ProbeState {
-    FAIL              = 0,   // Probe failed (missing file table)
-    OK                = 1,   // Probe successful
-    CHANGE_STM        = -1,  // DTZ should check the other side
-    ZEROING_BEST_MOVE = 2    // Best move zeroes DTZ (capture or pawn move)
+    FAIL              =  0, // Probe failed (missing file table)
+    OK                =  1, // Probe succesful
+    CHANGE_STM        = -1, // DTZ should check the other side
+    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };

 extern int MaxCardinality;

-
-void     init(const std::string& paths);
+void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
-int      probe_dtz(Position& pos, ProbeState* result);
-bool     root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ);
-bool     root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50);
-Config   rank_root_moves(const OptionsMap&  options,
-                         Position&          pos,
-                         Search::RootMoves& rootMoves,
-                         bool               rankDTZ = false);
+int probe_dtz(Position& pos, ProbeState* result);
+bool root_probe(Position& pos, Search::RootMoves& rootMoves);
+bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves);
+void rank_root_moves(Position& pos, Search::RootMoves& rootMoves);

-}  // namespace Stockfish::Tablebases
+inline std::ostream& operator<<(std::ostream& os, const WDLScore v) {
+
+    os << (v == WDLLoss        ? "Loss" :
+           v == WDLBlessedLoss ? "Blessed loss" :
+           v == WDLDraw        ? "Draw" :
+           v == WDLCursedWin   ? "Cursed win" :
+           v == WDLWin         ? "Win" : "None");
+
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const ProbeState v) {
+
+    os << (v == FAIL              ? "Failed" :
+           v == OK                ? "Success" :
+           v == CHANGE_STM        ? "Probed opponent side" :
+           v == ZEROING_BEST_MOVE ? "Best move zeroes DTZ" : "None");
+
+    return os;
+}
+
+}

 #endif
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,395 +16,259 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "thread.h"
-
-#include <algorithm>
 #include <cassert>
-#include <deque>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <utility>

+#include <algorithm> // For std::count
 #include "movegen.h"
 #include "search.h"
-#include "syzygy/tbprobe.h"
-#include "timeman.h"
-#include "types.h"
+#include "thread.h"
 #include "uci.h"
-#include "ucioption.h"
+#include "syzygy/tbprobe.h"
+#include "tt.h"

-namespace Stockfish {
+ThreadPool Threads; // Global object

-// Constructor launches the thread and waits until it goes to sleep
-// in idle_loop(). Note that 'searching' and 'exit' should be already set.
-Thread::Thread(Search::SharedState&                    sharedState,
-               std::unique_ptr<Search::ISearchManager> sm,
-               size_t                                  n,
-               OptionalThreadToNumaNodeBinder          binder) :
-    idx(n),
-    nthreads(sharedState.options["Threads"]),
-    stdThread(&Thread::idle_loop, this) {

-    wait_for_search_finished();
+/// Thread constructor launches the thread and waits until it goes to sleep
+/// in idle_loop(). Note that 'searching' and 'exit' should be already set.

-    run_custom_job([this, &binder, &sharedState, &sm, n]() {
-        // Use the binder to [maybe] bind the threads to a NUMA node before doing
-        // the Worker allocation. Ideally we would also allocate the SearchManager
-        // here, but that's minor.
-        this->numaAccessToken = binder();
-        this->worker =
-          std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
-    });
+Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) {

-    wait_for_search_finished();
+  wait_for_search_finished();
 }


-// Destructor wakes up the thread in idle_loop() and waits
-// for its termination. Thread should be already waiting.
+/// Thread destructor wakes up the thread in idle_loop() and waits
+/// for its termination. Thread should be already waiting.
+
 Thread::~Thread() {

-    assert(!searching);
+  assert(!searching);

-    exit = true;
-    start_searching();
-    stdThread.join();
+  exit = true;
+  start_searching();
+  stdThread.join();
 }

-// Wakes up the thread that will start the search
+
+/// Thread::bestMoveCount(Move move) return best move counter for the given root move
+
+int Thread::best_move_count(Move move) const {
+
+  auto rm = std::find(rootMoves.begin() + pvIdx,
+                      rootMoves.begin() + pvLast, move);
+
+  return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
+}
+
+
+/// Thread::clear() reset histories, usually before a new game
+
+void Thread::clear() {
+
+  counterMoves.fill(MOVE_NONE);
+  mainHistory.fill(0);
+  lowPlyHistory.fill(0);
+  captureHistory.fill(0);
+
+  for (bool inCheck : { false, true })
+      for (StatsType c : { NoCaptures, Captures })
+      {
+          for (auto& to : continuationHistory[inCheck][c])
+                for (auto& h : to)
+                      h->fill(0);
+          continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1);
+      }
+}
+
+
+/// Thread::start_searching() wakes up the thread that will start the search
+
 void Thread::start_searching() {
-    assert(worker != nullptr);
-    run_custom_job([this]() { worker->start_searching(); });
+
+  std::lock_guard<std::mutex> lk(mutex);
+  searching = true;
+  cv.notify_one(); // Wake up the thread in idle_loop()
 }

-// Clears the histories for the thread worker (usually before a new game)
-void Thread::clear_worker() {
-    assert(worker != nullptr);
-    run_custom_job([this]() { worker->clear(); });
-}

-// Blocks on the condition variable until the thread has finished searching
+/// Thread::wait_for_search_finished() blocks on the condition variable
+/// until the thread has finished searching.
+
 void Thread::wait_for_search_finished() {

-    std::unique_lock<std::mutex> lk(mutex);
-    cv.wait(lk, [&] { return !searching; });
+  std::unique_lock<std::mutex> lk(mutex);
+  cv.wait(lk, [&]{ return !searching; });
 }

-// Launching a function in the thread
-void Thread::run_custom_job(std::function<void()> f) {
-    {
-        std::unique_lock<std::mutex> lk(mutex);
-        cv.wait(lk, [&] { return !searching; });
-        jobFunc   = std::move(f);
-        searching = true;
-    }
-    cv.notify_one();
-}

-void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
-
-// Thread gets parked here, blocked on the condition variable
-// when the thread has no work to do.
+/// Thread::idle_loop() is where the thread is parked, blocked on the
+/// condition variable, when it has no work to do.

 void Thread::idle_loop() {
-    while (true)
-    {
-        std::unique_lock<std::mutex> lk(mutex);
-        searching = false;
-        cv.notify_one();  // Wake up anyone waiting for search finished
-        cv.wait(lk, [&] { return searching; });

-        if (exit)
-            return;
+  // If OS already scheduled us on a different group than 0 then don't overwrite
+  // the choice, eventually we are one of many one-threaded processes running on
+  // some Windows NUMA hardware, for instance in fishtest. To make it simple,
+  // just check if running threads are below a threshold, in this case all this
+  // NUMA machinery is not needed.
+  if (Options["Threads"] > 8)
+      WinProcGroup::bindThisThread(idx);

-        std::function<void()> job = std::move(jobFunc);
-        jobFunc                   = nullptr;
+  while (true)
+  {
+      std::unique_lock<std::mutex> lk(mutex);
+      searching = false;
+      cv.notify_one(); // Wake up anyone waiting for search finished
+      cv.wait(lk, [&]{ return searching; });

-        lk.unlock();
+      if (exit)
+          return;

-        if (job)
-            job();
-    }
+      lk.unlock();
+
+      search();
+  }
 }

-Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }
+/// ThreadPool::set() creates/destroys threads to match the requested number.
+/// Created and launched threads will immediately go to sleep in idle_loop.
+/// Upon resizing, threads are recreated to allow for binding if necessary.

-uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
-uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
+void ThreadPool::set(size_t requested) {

-// Creates/destroys threads to match the requested number.
-// Created and launched threads will immediately go to sleep in idle_loop.
-// Upon resizing, threads are recreated to allow for binding if necessary.
-void ThreadPool::set(const NumaConfig&                           numaConfig,
-                     Search::SharedState                         sharedState,
-                     const Search::SearchManager::UpdateContext& updateContext) {
+  if (size() > 0) { // destroy any existing thread(s)
+      main()->wait_for_search_finished();

-    if (threads.size() > 0)  // destroy any existing thread(s)
-    {
-        main_thread()->wait_for_search_finished();
+      while (size() > 0)
+          delete back(), pop_back();
+  }

-        threads.clear();
+  if (requested > 0) { // create new thread(s)
+      push_back(new MainThread(0));

-        boundThreadToNumaNode.clear();
-    }
+      while (size() < requested)
+          push_back(new Thread(size()));
+      clear();

-    const size_t requested = sharedState.options["Threads"];
+      // Reallocate the hash with the new threadpool size
+      TT.resize(size_t(Options["Hash"]));

-    if (requested > 0)  // create new thread(s)
-    {
-        // Binding threads may be problematic when there's multiple NUMA nodes and
-        // multiple Stockfish instances running. In particular, if each instance
-        // runs a single thread then they would all be mapped to the first NUMA node.
-        // This is undesirable, and so the default behaviour (i.e. when the user does not
-        // change the NumaConfig UCI setting) is to not bind the threads to processors
-        // unless we know for sure that we span NUMA nodes and replication is required.
-        const std::string numaPolicy(sharedState.options["NumaPolicy"]);
-        const bool        doBindThreads = [&]() {
-            if (numaPolicy == "none")
-                return false;
-
-            if (numaPolicy == "auto")
-                return numaConfig.suggests_binding_threads(requested);
-
-            // numaPolicy == "system", or explicitly set by the user
-            return true;
-        }();
-
-        boundThreadToNumaNode = doBindThreads
-                                ? numaConfig.distribute_threads_among_numa_nodes(requested)
-                                : std::vector<NumaIndex>{};
-
-        while (threads.size() < requested)
-        {
-            const size_t    threadId = threads.size();
-            const NumaIndex numaId   = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
-            auto            manager  = threadId == 0 ? std::unique_ptr<Search::ISearchManager>(
-                                             std::make_unique<Search::SearchManager>(updateContext))
-                                                     : std::make_unique<Search::NullSearchManager>();
-
-            // When not binding threads we want to force all access to happen
-            // from the same NUMA node, because in case of NUMA replicated memory
-            // accesses we don't want to trash cache in case the threads get scheduled
-            // on the same NUMA node.
-            auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
-                                        : OptionalThreadToNumaNodeBinder(numaId);
-
-            threads.emplace_back(
-              std::make_unique<Thread>(sharedState, std::move(manager), threadId, binder));
-        }
-
-        clear();
-
-        main_thread()->wait_for_search_finished();
-    }
+      // Init thread number dependent search params.
+      Search::init();
+  }
 }


-// Sets threadPool data to initial values
+/// ThreadPool::clear() sets threadPool data to initial values
+
 void ThreadPool::clear() {
-    if (threads.size() == 0)
-        return;

-    for (auto&& th : threads)
-        th->clear_worker();
+  for (Thread* th : *this)
+      th->clear();

-    for (auto&& th : threads)
-        th->wait_for_search_finished();
-
-    // These two affect the time taken on the first move of a game:
-    main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
-    main_manager()->previousTimeReduction    = 0.85;
-
-    main_manager()->callsCnt           = 0;
-    main_manager()->bestPreviousScore  = VALUE_INFINITE;
-    main_manager()->originalTimeAdjust = -1;
-    main_manager()->tm.clear();
+  main()->callsCnt = 0;
+  main()->bestPreviousScore = VALUE_INFINITE;
+  main()->previousTimeReduction = 1.0;
 }

-void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
-    assert(threads.size() > threadId);
-    threads[threadId]->run_custom_job(std::move(f));
-}

-void ThreadPool::wait_on_thread(size_t threadId) {
-    assert(threads.size() > threadId);
-    threads[threadId]->wait_for_search_finished();
-}
+/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and
+/// returns immediately. Main thread will wake up other threads and start the search.

-size_t ThreadPool::num_threads() const { return threads.size(); }
+void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
+                                const Search::LimitsType& limits, bool ponderMode) {

+  main()->wait_for_search_finished();

-// Wakes up main thread waiting in idle_loop() and returns immediately.
-// Main thread will wake up other threads and start the search.
-void ThreadPool::start_thinking(const OptionsMap&  options,
-                                Position&          pos,
-                                StateListPtr&      states,
-                                Search::LimitsType limits) {
+  main()->stopOnPonderhit = stop = false;
+  increaseDepth = true;
+  main()->ponder = ponderMode;
+  Search::Limits = limits;
+  Search::RootMoves rootMoves;

-    main_thread()->wait_for_search_finished();
+  for (const auto& m : MoveList<LEGAL>(pos))
+      if (   limits.searchmoves.empty()
+          || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
+          rootMoves.emplace_back(m);

-    main_manager()->stopOnPonderhit = stop = abortedSearch = false;
-    main_manager()->ponder                                 = limits.ponderMode;
+  if (!rootMoves.empty())
+      Tablebases::rank_root_moves(pos, rootMoves);

-    increaseDepth = true;
+  // After ownership transfer 'states' becomes empty, so if we stop the search
+  // and call 'go' again without setting a new position states.get() == NULL.
+  assert(states.get() || setupStates.get());

-    Search::RootMoves rootMoves;
-    const auto        legalmoves = MoveList<LEGAL>(pos);
+  if (states.get())
+      setupStates = std::move(states); // Ownership transfer, states is now empty

-    for (const auto& uciMove : limits.searchmoves)
-    {
-        auto move = UCIEngine::to_move(pos, uciMove);
+  // We use Position::set() to set root position across threads. But there are
+  // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
+  // be deduced from a fen string, so set() clears them and to not lose the info
+  // we need to backup and later restore setupStates->back(). Note that setupStates
+  // is shared by threads but is accessed in read-only mode.
+  StateInfo tmp = setupStates->back();

-        if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end())
-            rootMoves.emplace_back(move);
-    }
+  for (Thread* th : *this)
+  {
+      th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
+      th->rootDepth = th->completedDepth = 0;
+      th->rootMoves = rootMoves;
+      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+  }

-    if (rootMoves.empty())
-        for (const auto& m : legalmoves)
-            rootMoves.emplace_back(m);
+  setupStates->back() = tmp;

-    Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
-
-    // After ownership transfer 'states' becomes empty, so if we stop the search
-    // and call 'go' again without setting a new position states.get() == nullptr.
-    assert(states.get() || setupStates.get());
-
-    if (states.get())
-        setupStates = std::move(states);  // Ownership transfer, states is now empty
-
-    // We use Position::set() to set root position across threads. But there are
-    // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
-    // be deduced from a fen string, so set() clears them and they are set from
-    // setupStates->back() later. The rootState is per thread, earlier states are
-    // shared since they are read-only.
-    for (auto&& th : threads)
-    {
-        th->run_custom_job([&]() {
-            th->worker->limits = limits;
-            th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
-              th->worker->bestMoveChanges          = 0;
-            th->worker->rootDepth = th->worker->completedDepth = 0;
-            th->worker->rootMoves                              = rootMoves;
-            th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
-            th->worker->rootState = setupStates->back();
-            th->worker->tbConfig  = tbConfig;
-        });
-    }
-
-    for (auto&& th : threads)
-        th->wait_for_search_finished();
-
-    main_thread()->start_searching();
+  main()->start_searching();
 }

 Thread* ThreadPool::get_best_thread() const {

-    Thread* bestThread = threads.front().get();
-    Value   minScore   = VALUE_NONE;
+    Thread* bestThread = front();
+    std::map<Move, int64_t> votes;
+    Value minScore = VALUE_NONE;

-    std::unordered_map<Move, int64_t, Move::MoveHash> votes(
-      2 * std::min(size(), bestThread->worker->rootMoves.size()));
-
-    // Find the minimum score of all threads
-    for (auto&& th : threads)
-        minScore = std::min(minScore, th->worker->rootMoves[0].score);
+    // Find minimum score of all threads
+    for (Thread* th: *this)
+        minScore = std::min(minScore, th->rootMoves[0].score);

    // Vote according to score and depth, and select the best thread
-    auto thread_voting_value = [minScore](Thread* th) {
-        return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
-    };
-
-    for (auto&& th : threads)
-        votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
-
-    for (auto&& th : threads)
+    for (Thread* th : *this)
    {
-        const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
-        const auto newThreadScore  = th->worker->rootMoves[0].score;
+        votes[th->rootMoves[0].pv[0]] +=
+            (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);

-        const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv;
-        const auto& newThreadPV  = th->worker->rootMoves[0].pv;
-
-        const auto bestThreadMoveVote = votes[bestThreadPV[0]];
-        const auto newThreadMoveVote  = votes[newThreadPV[0]];
-
-        const bool bestThreadInProvenWin = is_win(bestThreadScore);
-        const bool newThreadInProvenWin  = is_win(newThreadScore);
-
-        const bool bestThreadInProvenLoss =
-          bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore);
-        const bool newThreadInProvenLoss =
-          newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore);
-
-        // We make sure not to pick a thread with truncated principal variation
-        const bool betterVotingValue =
-          thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
-          > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
-
-        if (bestThreadInProvenWin)
-        {
-            // Make sure we pick the shortest mate / TB conversion
-            if (newThreadScore > bestThreadScore)
-                bestThread = th.get();
-        }
-        else if (bestThreadInProvenLoss)
-        {
-            // Make sure we pick the shortest mated / TB conversion
-            if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
-                bestThread = th.get();
-        }
-        else if (newThreadInProvenWin || newThreadInProvenLoss
-                 || (!is_loss(newThreadScore)
-                     && (newThreadMoveVote > bestThreadMoveVote
-                         || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
-            bestThread = th.get();
+          if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
+          {
+              // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
+              if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
+                  bestThread = th;
+          }
+          else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
+                   || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
+                       && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
+              bestThread = th;
    }

    return bestThread;
 }


-// Start non-main threads.
-// Will be invoked by main thread after it has started searching.
+/// Start non-main threads
+
 void ThreadPool::start_searching() {

-    for (auto&& th : threads)
-        if (th != threads.front())
+    for (Thread* th : *this)
+        if (th != front())
            th->start_searching();
 }


-// Wait for non-main threads
+/// Wait for non-main threads
+
 void ThreadPool::wait_for_search_finished() const {

-    for (auto&& th : threads)
-        if (th != threads.front())
+    for (Thread* th : *this)
+        if (th != front())
            th->wait_for_search_finished();
 }
-
-std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
-    std::vector<size_t> counts;
-
-    if (!boundThreadToNumaNode.empty())
-    {
-        NumaIndex highestNumaNode = 0;
-        for (NumaIndex n : boundThreadToNumaNode)
-            if (n > highestNumaNode)
-                highestNumaNode = n;
-
-        counts.resize(highestNumaNode + 1, 0);
-
-        for (NumaIndex n : boundThreadToNumaNode)
-            counts[n] += 1;
-    }
-
-    return counts;
-}
-
-void ThreadPool::ensure_network_replicated() {
-    for (auto&& th : threads)
-        th->ensure_network_replicated();
-}
-
-}  // namespace Stockfish
--- a/src/thread.h
+++ b/src/thread.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,158 +21,110 @@

 #include <atomic>
 #include <condition_variable>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <memory>
 #include <mutex>
+#include <thread>
 #include <vector>

-#include "numa.h"
+#include "material.h"
+#include "movepick.h"
+#include "pawns.h"
 #include "position.h"
 #include "search.h"
 #include "thread_win32_osx.h"

-namespace Stockfish {

+/// Thread class keeps together all the thread-related stuff. We use
+/// per-thread pawn and material hash tables so that once we get a
+/// pointer to an entry its life time is unlimited and we don't have
+/// to care about someone changing the entry under our feet.

-class OptionsMap;
-using Value = int;
-
-// Sometimes we don't want to actually bind the threads, but the recipient still
-// needs to think it runs on *some* NUMA node, such that it can access structures
-// that rely on NUMA node knowledge. This class encapsulates this optional process
-// such that the recipient does not need to know whether the binding happened or not.
-class OptionalThreadToNumaNodeBinder {
-   public:
-    OptionalThreadToNumaNodeBinder(NumaIndex n) :
-        numaConfig(nullptr),
-        numaId(n) {}
-
-    OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) :
-        numaConfig(&cfg),
-        numaId(n) {}
-
-    NumaReplicatedAccessToken operator()() const {
-        if (numaConfig != nullptr)
-            return numaConfig->bind_current_thread_to_numa_node(numaId);
-        else
-            return NumaReplicatedAccessToken(numaId);
-    }
-
-   private:
-    const NumaConfig* numaConfig;
-    NumaIndex         numaId;
-};
-
-// Abstraction of a thread. It contains a pointer to the worker and a native thread.
-// After construction, the native thread is started with idle_loop()
-// waiting for a signal to start searching.
-// When the signal is received, the thread starts searching and when
-// the search is finished, it goes back to idle_loop() waiting for a new signal.
 class Thread {
-   public:
-    Thread(Search::SharedState&,
-           std::unique_ptr<Search::ISearchManager>,
-           size_t,
-           OptionalThreadToNumaNodeBinder);
-    virtual ~Thread();

-    void idle_loop();
-    void start_searching();
-    void clear_worker();
-    void run_custom_job(std::function<void()> f);
+  std::mutex mutex;
+  std::condition_variable cv;
+  size_t idx;
+  bool exit = false, searching = true; // Set before starting std::thread
+  NativeThread stdThread;

-    void ensure_network_replicated();
+public:
+  explicit Thread(size_t);
+  virtual ~Thread();
+  virtual void search();
+  void clear();
+  void idle_loop();
+  void start_searching();
+  void wait_for_search_finished();
+  int best_move_count(Move move) const;

-    // Thread has been slightly altered to allow running custom jobs, so
-    // this name is no longer correct. However, this class (and ThreadPool)
-    // require further work to make them properly generic while maintaining
-    // appropriate specificity regarding search, from the point of view of an
-    // outside user, so renaming of this function is left for whenever that happens.
-    void   wait_for_search_finished();
-    size_t id() const { return idx; }
+  Pawns::Table pawnsTable;
+  Material::Table materialTable;
+  size_t pvIdx, pvLast;
+  uint64_t ttHitAverage;
+  int selDepth, nmpMinPly;
+  Color nmpColor;
+  std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;

-    std::unique_ptr<Search::Worker> worker;
-    std::function<void()>           jobFunc;
-
-   private:
-    std::mutex                mutex;
-    std::condition_variable   cv;
-    size_t                    idx, nthreads;
-    bool                      exit = false, searching = true;  // Set before starting std::thread
-    NativeThread              stdThread;
-    NumaReplicatedAccessToken numaAccessToken;
+  Position rootPos;
+  Search::RootMoves rootMoves;
+  Depth rootDepth, completedDepth;
+  CounterMoveHistory counterMoves;
+  ButterflyHistory mainHistory;
+  LowPlyHistory lowPlyHistory;
+  CapturePieceToHistory captureHistory;
+  ContinuationHistory continuationHistory[2][2];
+  Score contempt;
 };


-// ThreadPool struct handles all the threads-related stuff like init, starting,
-// parking and, most importantly, launching a thread. All the access to threads
-// is done through this class.
-class ThreadPool {
-   public:
-    ThreadPool() {}
+/// MainThread is a derived class specific for main thread

-    ~ThreadPool() {
-        // destroy any existing thread(s)
-        if (threads.size() > 0)
-        {
-            main_thread()->wait_for_search_finished();
+struct MainThread : public Thread {

-            threads.clear();
-        }
-    }
+  using Thread::Thread;

-    ThreadPool(const ThreadPool&) = delete;
-    ThreadPool(ThreadPool&&)      = delete;
+  void search() override;
+  void check_time();

-    ThreadPool& operator=(const ThreadPool&) = delete;
-    ThreadPool& operator=(ThreadPool&&)      = delete;
-
-    void   start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
-    void   run_on_thread(size_t threadId, std::function<void()> f);
-    void   wait_on_thread(size_t threadId);
-    size_t num_threads() const;
-    void   clear();
-    void   set(const NumaConfig& numaConfig,
-               Search::SharedState,
-               const Search::SearchManager::UpdateContext&);
-
-    Search::SearchManager* main_manager();
-    Thread*                main_thread() const { return threads.front().get(); }
-    uint64_t               nodes_searched() const;
-    uint64_t               tb_hits() const;
-    Thread*                get_best_thread() const;
-    void                   start_searching();
-    void                   wait_for_search_finished() const;
-
-    std::vector<size_t> get_bound_thread_count_by_numa_node() const;
-
-    void ensure_network_replicated();
-
-    std::atomic_bool stop, abortedSearch, increaseDepth;
-
-    auto cbegin() const noexcept { return threads.cbegin(); }
-    auto begin() noexcept { return threads.begin(); }
-    auto end() noexcept { return threads.end(); }
-    auto cend() const noexcept { return threads.cend(); }
-    auto size() const noexcept { return threads.size(); }
-    auto empty() const noexcept { return threads.empty(); }
-
-   private:
-    StateListPtr                         setupStates;
-    std::vector<std::unique_ptr<Thread>> threads;
-    std::vector<NumaIndex>               boundThreadToNumaNode;
-
-    uint64_t accumulate(std::atomic<uint64_t> Search::Worker::* member) const {
-
-        uint64_t sum = 0;
-        for (auto&& th : threads)
-            sum += (th->worker.get()->*member).load(std::memory_order_relaxed);
-        return sum;
-    }
+  double previousTimeReduction;
+  Value bestPreviousScore;
+  Value iterValue[4];
+  int callsCnt;
+  bool stopOnPonderhit;
+  std::atomic_bool ponder;
 };

-}  // namespace Stockfish

-#endif  // #ifndef THREAD_H_INCLUDED
+/// ThreadPool struct handles all the threads-related stuff like init, starting,
+/// parking and, most importantly, launching a thread. All the access to threads
+/// is done through this class.
+
+struct ThreadPool : public std::vector<Thread*> {
+
+  void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
+  void clear();
+  void set(size_t);
+
+  MainThread* main()        const { return static_cast<MainThread*>(front()); }
+  uint64_t nodes_searched() const { return accumulate(&Thread::nodes); }
+  uint64_t tb_hits()        const { return accumulate(&Thread::tbHits); }
+  Thread* get_best_thread() const;
+  void start_searching();
+  void wait_for_search_finished() const;
+
+  std::atomic_bool stop, increaseDepth;
+
+private:
+  StateListPtr setupStates;
+
+  uint64_t accumulate(std::atomic<uint64_t> Thread::* member) const {
+
+    uint64_t sum = 0;
+    for (Thread* th : *this)
+        sum += (th->*member).load(std::memory_order_relaxed);
+    return sum;
+  }
+};
+
+extern ThreadPool Threads;
+
+#endif // #ifndef THREAD_H_INCLUDED
--- a/src/thread_win32_osx.h
+++ b/src/thread_win32_osx.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,58 +21,46 @@

 #include <thread>

-// On OSX threads other than the main thread are created with a reduced stack
-// size of 512KB by default, this is too low for deep searches, which require
-// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
-// The implementation calls pthread_create() with the stack size parameter
-// equal to the Linux 8MB default, on platforms that support it.
+/// On OSX threads other than the main thread are created with a reduced stack
+/// size of 512KB by default, this is too low for deep searches, which require
+/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
+/// The implementation calls pthread_create() with the stack size parameter
+/// equal to the linux 8MB default, on platforms that support it.

-#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS)
+#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)

-    #include <pthread.h>
-    #include <functional>
+#include <pthread.h>

-namespace Stockfish {
+static const size_t TH_STACK_SIZE = 8 * 1024 * 1024;
+
+template <class T, class P = std::pair<T*, void(T::*)()>>
+void* start_routine(void* ptr)
+{
+   P* p = reinterpret_cast<P*>(ptr);
+   (p->first->*(p->second))(); // Call member function pointer
+   delete p;
+   return NULL;
+}

 class NativeThread {
-    pthread_t thread;

-    static constexpr size_t TH_STACK_SIZE = 8 * 1024 * 1024;
+   pthread_t thread;

-   public:
-    template<class Function, class... Args>
-    explicit NativeThread(Function&& fun, Args&&... args) {
-        auto func = new std::function<void()>(
-          std::bind(std::forward<Function>(fun), std::forward<Args>(args)...));
-
-        pthread_attr_t attr_storage, *attr = &attr_storage;
-        pthread_attr_init(attr);
-        pthread_attr_setstacksize(attr, TH_STACK_SIZE);
-
-        auto start_routine = [](void* ptr) -> void* {
-            auto f = reinterpret_cast<std::function<void()>*>(ptr);
-            // Call the function
-            (*f)();
-            delete f;
-            return nullptr;
-        };
-
-        pthread_create(&thread, attr, start_routine, func);
-    }
-
-    void join() { pthread_join(thread, nullptr); }
+public:
+  template<class T, class P = std::pair<T*, void(T::*)()>>
+  explicit NativeThread(void(T::*fun)(), T* obj) {
+    pthread_attr_t attr_storage, *attr = &attr_storage;
+    pthread_attr_init(attr);
+    pthread_attr_setstacksize(attr, TH_STACK_SIZE);
+    pthread_create(&thread, attr, start_routine<T>, new P(obj, fun));
+  }
+  void join() { pthread_join(thread, NULL); }
 };

-}  // namespace Stockfish
+#else // Default case: use STL classes

-#else  // Default case: use STL classes
-
-namespace Stockfish {
-
-using NativeThread = std::thread;
-
-}  // namespace Stockfish
+typedef std::thread NativeThread;

 #endif

-#endif  // #ifndef THREAD_WIN32_OSX_H_INCLUDED
+#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED
--- a/src/timeman.cpp
+++ b/src/timeman.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,125 +16,82 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "timeman.h"
-
 #include <algorithm>
-#include <cassert>
+#include <cfloat>
 #include <cmath>
-#include <cstdint>

 #include "search.h"
-#include "ucioption.h"
+#include "timeman.h"
+#include "uci.h"

-namespace Stockfish {
+TimeManagement Time; // Our global time management object

-TimePoint TimeManagement::optimum() const { return optimumTime; }
-TimePoint TimeManagement::maximum() const { return maximumTime; }

-void TimeManagement::clear() {
-    availableNodes = -1;  // When in 'nodes as time' mode
-}
-
-void TimeManagement::advance_nodes_time(std::int64_t nodes) {
-    assert(useNodesTime);
-    availableNodes = std::max(int64_t(0), availableNodes - nodes);
-}
-
-// Called at the beginning of the search and calculates
-// the bounds of time allowed for the current game ply. We currently support:
+/// TimeManagement::init() is called at the beginning of the search and calculates
+/// the bounds of time allowed for the current game ply. We currently support:
 //      1) x basetime (+ z increment)
 //      2) x moves in y seconds (+ z increment)
-void TimeManagement::init(Search::LimitsType& limits,
-                          Color               us,
-                          int                 ply,
-                          const OptionsMap&   options,
-                          double&             originalTimeAdjust) {
-    TimePoint npmsec = TimePoint(options["nodestime"]);

-    // If we have no time, we don't need to fully initialize TM.
-    // startTime is used by movetime and useNodesTime is used in elapsed calls.
-    startTime    = limits.startTime;
-    useNodesTime = npmsec != 0;
+void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {

-    if (limits.time[us] == 0)
-        return;
+  TimePoint moveOverhead    = TimePoint(Options["Move Overhead"]);
+  TimePoint slowMover       = TimePoint(Options["Slow Mover"]);
+  TimePoint npmsec          = TimePoint(Options["nodestime"]);

-    TimePoint moveOverhead = TimePoint(options["Move Overhead"]);
+  // opt_scale is a percentage of available time to use for the current move.
+  // max_scale is a multiplier applied to optimumTime.
+  double opt_scale, max_scale;

-    // optScale is a percentage of available time to use for the current move.
-    // maxScale is a multiplier applied to optimumTime.
-    double optScale, maxScale;
+  // If we have to play in 'nodes as time' mode, then convert from time
+  // to nodes, and use resulting values in time management formulas.
+  // WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
+  // must be much lower than the real engine speed.
+  if (npmsec)
+  {
+      if (!availableNodes) // Only once at game start
+          availableNodes = npmsec * limits.time[us]; // Time is in msec

-    // If we have to play in 'nodes as time' mode, then convert from time
-    // to nodes, and use resulting values in time management formulas.
-    // WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
-    // must be much lower than the real engine speed.
-    if (useNodesTime)
-    {
-        if (availableNodes == -1)                       // Only once at game start
-            availableNodes = npmsec * limits.time[us];  // Time is in msec
+      // Convert from milliseconds to nodes
+      limits.time[us] = TimePoint(availableNodes);
+      limits.inc[us] *= npmsec;
+      limits.npmsec = npmsec;
+  }

-        // Convert from milliseconds to nodes
-        limits.time[us] = TimePoint(availableNodes);
-        limits.inc[us] *= npmsec;
-        limits.npmsec = npmsec;
-        moveOverhead *= npmsec;
-    }
+  startTime = limits.startTime;

-    // These numbers are used where multiplications, divisions or comparisons
-    // with constants are involved.
-    const int64_t   scaleFactor = useNodesTime ? npmsec : 1;
-    const TimePoint scaledTime  = limits.time[us] / scaleFactor;
+  // Maximum move horizon of 50 moves
+  int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50;

-    // Maximum move horizon
-    int centiMTG = limits.movestogo ? std::min(limits.movestogo * 100, 5000) : 5051;
+  // Make sure timeLeft is > 0 since we may use it as a divisor
+  TimePoint timeLeft =  std::max(TimePoint(1),
+      limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg));

-    // If less than one second, gradually reduce mtg
-    if (scaledTime < 1000)
-        centiMTG = scaledTime * 5.051;
+  // A user may scale time usage by setting UCI option "Slow Mover"
+  // Default is 100 and changing this value will probably lose elo.
+  timeLeft = slowMover * timeLeft / 100;

-    // Make sure timeLeft is > 0 since we may use it as a divisor
-    TimePoint timeLeft =
-      std::max(TimePoint(1),
-               limits.time[us]
-                 + (limits.inc[us] * (centiMTG - 100) - moveOverhead * (200 + centiMTG)) / 100);
+  // x basetime (+ z increment)
+  // If there is a healthy increment, timeLeft can exceed actual available
+  // game time for the current move, so also cap to 20% of available game time.
+  if (limits.movestogo == 0)
+  {
+      opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0,
+                           0.2 * limits.time[us] / double(timeLeft));
+      max_scale = std::min(7.0, 4.0 + ply / 12.0);
+  }

-    // x basetime (+ z increment)
-    // If there is a healthy increment, timeLeft can exceed the actual available
-    // game time for the current move, so also cap to a percentage of available game time.
-    if (limits.movestogo == 0)
-    {
-        // Extra time according to timeLeft
-        if (originalTimeAdjust < 0)
-            originalTimeAdjust = 0.3128 * std::log10(timeLeft) - 0.4354;
+  // x moves in y seconds (+ z increment)
+  else
+  {
+      opt_scale = std::min((0.8 + ply / 128.0) / mtg,
+                            0.8 * limits.time[us] / double(timeLeft));
+      max_scale = std::min(6.3, 1.5 + 0.11 * mtg);
+  }

-        // Calculate time constants based on current time left.
-        double logTimeInSec = std::log10(scaledTime / 1000.0);
-        double optConstant  = std::min(0.0032116 + 0.000321123 * logTimeInSec, 0.00508017);
-        double maxConstant  = std::max(3.3977 + 3.03950 * logTimeInSec, 2.94761);
+  // Never use more than 80% of the available time for this move
+  optimumTime = TimePoint(opt_scale * timeLeft);
+  maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime));

-        optScale = std::min(0.0121431 + std::pow(ply + 2.94693, 0.461073) * optConstant,
-                            0.213035 * limits.time[us] / timeLeft)
-                 * originalTimeAdjust;
-
-        maxScale = std::min(6.67704, maxConstant + ply / 11.9847);
-    }
-
-    // x moves in y seconds (+ z increment)
-    else
-    {
-        optScale =
-          std::min((0.88 + ply / 116.4) / (centiMTG / 100.0), 0.88 * limits.time[us] / timeLeft);
-        maxScale = 1.3 + 0.11 * (centiMTG / 100.0);
-    }
-
-    // Limit the maximum possible time for this move
-    optimumTime = TimePoint(optScale * timeLeft);
-    maximumTime =
-      TimePoint(std::min(0.825179 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10;
-
-    if (options["Ponder"])
-        optimumTime += optimumTime / 4;
+  if (Options["Ponder"])
+      optimumTime += optimumTime / 4;
 }
-
-}  // namespace Stockfish
--- a/src/timeman.h
+++ b/src/timeman.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,49 +19,29 @@
 #ifndef TIMEMAN_H_INCLUDED
 #define TIMEMAN_H_INCLUDED

-#include <cstdint>
-
 #include "misc.h"
+#include "search.h"
+#include "thread.h"

-namespace Stockfish {
+/// The TimeManagement class computes the optimal time to think depending on
+/// the maximum available time, the game move number and other parameters.

-class OptionsMap;
-enum Color : int8_t;
-
-namespace Search {
-struct LimitsType;
-}
-
-// The TimeManagement class computes the optimal time to think depending on
-// the maximum available time, the game move number, and other parameters.
 class TimeManagement {
-   public:
-    void init(Search::LimitsType& limits,
-              Color               us,
-              int                 ply,
-              const OptionsMap&   options,
-              double&             originalTimeAdjust);
+public:
+  void init(Search::LimitsType& limits, Color us, int ply);
+  TimePoint optimum() const { return optimumTime; }
+  TimePoint maximum() const { return maximumTime; }
+  TimePoint elapsed() const { return Search::Limits.npmsec ?
+                                     TimePoint(Threads.nodes_searched()) : now() - startTime; }

-    TimePoint optimum() const;
-    TimePoint maximum() const;
-    template<typename FUNC>
-    TimePoint elapsed(FUNC nodes) const {
-        return useNodesTime ? TimePoint(nodes()) : elapsed_time();
-    }
-    TimePoint elapsed_time() const { return now() - startTime; };
+  int64_t availableNodes; // When in 'nodes as time' mode

-    void clear();
-    void advance_nodes_time(std::int64_t nodes);
-
-   private:
-    TimePoint startTime;
-    TimePoint optimumTime;
-    TimePoint maximumTime;
-
-    std::int64_t availableNodes = -1;     // When in 'nodes as time' mode
-    bool         useNodesTime   = false;  // True if we are in 'nodes as time' mode
+private:
+  TimePoint startTime;
+  TimePoint optimumTime;
+  TimePoint maximumTime;
 };

-}  // namespace Stockfish
+extern TimeManagement Time;

-#endif  // #ifndef TIMEMAN_H_INCLUDED
+#endif // #ifndef TIMEMAN_H_INCLUDED
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,238 +16,140 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "tt.h"
-
-#include <cassert>
-#include <cstdint>
-#include <cstdlib>
-#include <cstring>
+#include <cstring>   // For std::memset
 #include <iostream>
+#include <thread>

-#include "memory.h"
+#include "bitboard.h"
 #include "misc.h"
-#include "syzygy/tbprobe.h"
 #include "thread.h"
+#include "tt.h"
+#include "uci.h"

-namespace Stockfish {
+TranspositionTable TT; // Our global transposition table

+/// TTEntry::save() populates the TTEntry with a new node's data, possibly
+/// overwriting an old position. Update is not atomic and can be racy.

-// TTEntry struct is the 10 bytes transposition table entry, defined as below:
-//
-// key        16 bit
-// depth       8 bit
-// generation  5 bit
-// pv node     1 bit
-// bound type  2 bit
-// move       16 bit
-// value      16 bit
-// evaluation 16 bit
-//
-// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
-// Equally, the store order in save() matches this order.
+void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {

-struct TTEntry {
+  // Preserve any existing move for the same position
+  if (m || (uint16_t)k != key16)
+      move16 = (uint16_t)m;

-    // Convert internal bitfields to external types
-    TTData read() const {
-        return TTData{Move(move16),           Value(value16),
-                      Value(eval16),          Depth(depth8 + DEPTH_ENTRY_OFFSET),
-                      Bound(genBound8 & 0x3), bool(genBound8 & 0x4)};
-    }
+  // Overwrite less valuable entries
+  if ((uint16_t)k != key16
+      || d - DEPTH_OFFSET > depth8 - 4
+      || b == BOUND_EXACT)
+  {
+      assert(d >= DEPTH_OFFSET);

-    bool is_occupied() const;
-    void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
-    // The returned age is a multiple of TranspositionTable::GENERATION_DELTA
-    uint8_t relative_age(const uint8_t generation8) const;
-
-   private:
-    friend class TranspositionTable;
-
-    uint16_t key16;
-    uint8_t  depth8;
-    uint8_t  genBound8;
-    Move     move16;
-    int16_t  value16;
-    int16_t  eval16;
-};
-
-// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits
-// and 3 trailing miscellaneous bits.
-
-// These bits are reserved for other things.
-static constexpr unsigned GENERATION_BITS = 3;
-// increment for generation field
-static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
-// cycle length
-static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
-// mask to pull out generation number
-static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
-
-// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but
-// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits":
-// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.)
-bool TTEntry::is_occupied() const { return bool(depth8); }
-
-// Populates the TTEntry with a new node's data, possibly
-// overwriting an old position. The update is not atomic and can be racy.
-void TTEntry::save(
-  Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
-
-    // Preserve the old ttmove if we don't have a new one
-    if (m || uint16_t(k) != key16)
-        move16 = m;
-
-    // Overwrite less valuable entries (cheapest checks first)
-    if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4
-        || relative_age(generation8))
-    {
-        assert(d > DEPTH_ENTRY_OFFSET);
-        assert(d < 256 + DEPTH_ENTRY_OFFSET);
-
-        key16     = uint16_t(k);
-        depth8    = uint8_t(d - DEPTH_ENTRY_OFFSET);
-        genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b);
-        value16   = int16_t(v);
-        eval16    = int16_t(ev);
-    }
-    else if (depth8 + DEPTH_ENTRY_OFFSET >= 5 && Bound(genBound8 & 0x3) != BOUND_EXACT)
-        depth8--;
+      key16     = (uint16_t)k;
+      value16   = (int16_t)v;
+      eval16    = (int16_t)ev;
+      genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
+      depth8    = (uint8_t)(d - DEPTH_OFFSET);
+  }
 }


-uint8_t TTEntry::relative_age(const uint8_t generation8) const {
-    // Due to our packed storage format for generation and its cyclic
-    // nature we add GENERATION_CYCLE (256 is the modulus, plus what
-    // is needed to keep the unrelated lowest n bits from affecting
-    // the result) to calculate the entry age correctly even after
-    // generation8 overflows into the next cycle.
-    return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK;
+/// TranspositionTable::resize() sets the size of the transposition table,
+/// measured in megabytes. Transposition table consists of a power of 2 number
+/// of clusters and each cluster consists of ClusterSize number of TTEntry.
+
+void TranspositionTable::resize(size_t mbSize) {
+
+  Threads.main()->wait_for_search_finished();
+
+  aligned_ttmem_free(mem);
+
+  clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
+  table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
+  if (!mem)
+  {
+      std::cerr << "Failed to allocate " << mbSize
+                << "MB for transposition table." << std::endl;
+      exit(EXIT_FAILURE);
+  }
+
+  clear();
 }


-// TTWriter is but a very thin wrapper around the pointer
-TTWriter::TTWriter(TTEntry* tte) :
-    entry(tte) {}
+/// TranspositionTable::clear() initializes the entire transposition table to zero,
+//  in a multi-threaded way.

-void TTWriter::write(
-  Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
-    entry->save(k, v, pv, b, d, m, ev, generation8);
+void TranspositionTable::clear() {
+
+  std::vector<std::thread> threads;
+
+  for (size_t idx = 0; idx < Options["Threads"]; ++idx)
+  {
+      threads.emplace_back([this, idx]() {
+
+          // Thread binding gives faster search on systems with a first-touch policy
+          if (Options["Threads"] > 8)
+              WinProcGroup::bindThisThread(idx);
+
+          // Each thread will zero its part of the hash table
+          const size_t stride = size_t(clusterCount / Options["Threads"]),
+                       start  = size_t(stride * idx),
+                       len    = idx != Options["Threads"] - 1 ?
+                                stride : clusterCount - start;
+
+          std::memset(&table[start], 0, len * sizeof(Cluster));
+      });
+  }
+
+  for (std::thread& th : threads)
+      th.join();
 }


-// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number
-// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should
-// divide the size of a cache line for best performance, as the cacheline is prefetched when possible.
+/// TranspositionTable::probe() looks up the current position in the transposition
+/// table. It returns true and a pointer to the TTEntry if the position is found.
+/// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
+/// to be replaced later. The replace value of an entry is calculated as its depth
+/// minus 8 times its relative age. TTEntry t1 is considered more valuable than
+/// TTEntry t2 if its replace value is greater than that of t2.

-static constexpr int ClusterSize = 3;
+TTEntry* TranspositionTable::probe(const Key key, bool& found) const {

-struct Cluster {
-    TTEntry entry[ClusterSize];
-    char    padding[2];  // Pad to 32 bytes
-};
+  TTEntry* const tte = first_entry(key);
+  const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster

-static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size");
+  for (int i = 0; i < ClusterSize; ++i)
+      if (!tte[i].key16 || tte[i].key16 == key16)
+      {
+          tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh

+          return found = (bool)tte[i].key16, &tte[i];
+      }

-// Sets the size of the transposition table,
-// measured in megabytes. Transposition table consists
-// of clusters and each cluster consists of ClusterSize number of TTEntry.
-void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) {
-    aligned_large_pages_free(table);
+  // Find an entry to be replaced according to the replacement strategy
+  TTEntry* replace = tte;
+  for (int i = 1; i < ClusterSize; ++i)
+      // Due to our packed storage format for generation and its cyclic
+      // nature we add 263 (256 is the modulus plus 7 to keep the unrelated
+      // lowest three bits from affecting the result) to calculate the entry
+      // age correctly even after generation8 overflows into the next cycle.
+      if (  replace->depth8 - ((263 + generation8 - replace->genBound8) & 0xF8)
+          >   tte[i].depth8 - ((263 + generation8 -   tte[i].genBound8) & 0xF8))
+          replace = &tte[i];

-    clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
-
-    table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
-
-    if (!table)
-    {
-        std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl;
-        exit(EXIT_FAILURE);
-    }
-
-    clear(threads);
+  return found = false, replace;
 }


-// Initializes the entire transposition table to zero,
-// in a multi-threaded way.
-void TranspositionTable::clear(ThreadPool& threads) {
-    generation8              = 0;
-    const size_t threadCount = threads.num_threads();
+/// TranspositionTable::hashfull() returns an approximation of the hashtable
+/// occupation during a search. The hash is x permill full, as per UCI protocol.

-    for (size_t i = 0; i < threadCount; ++i)
-    {
-        threads.run_on_thread(i, [this, i, threadCount]() {
-            // Each thread will zero its part of the hash table
-            const size_t stride = clusterCount / threadCount;
-            const size_t start  = stride * i;
-            const size_t len    = i + 1 != threadCount ? stride : clusterCount - start;
+int TranspositionTable::hashfull() const {

-            std::memset(&table[start], 0, len * sizeof(Cluster));
-        });
-    }
+  int cnt = 0;
+  for (int i = 0; i < 1000; ++i)
+      for (int j = 0; j < ClusterSize; ++j)
+          cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8;

-    for (size_t i = 0; i < threadCount; ++i)
-        threads.wait_on_thread(i);
+  return cnt / ClusterSize;
 }
-
-
-// Returns an approximation of the hashtable
-// occupation during a search. The hash is x permill full, as per UCI protocol.
-// Only counts entries which match the current generation.
-int TranspositionTable::hashfull(int maxAge) const {
-    int maxAgeInternal = maxAge << GENERATION_BITS;
-    int cnt            = 0;
-    for (int i = 0; i < 1000; ++i)
-        for (int j = 0; j < ClusterSize; ++j)
-            cnt += table[i].entry[j].is_occupied()
-                && table[i].entry[j].relative_age(generation8) <= maxAgeInternal;
-
-    return cnt / ClusterSize;
-}
-
-
-void TranspositionTable::new_search() {
-    // increment by delta to keep lower bits as is
-    generation8 += GENERATION_DELTA;
-}
-
-
-uint8_t TranspositionTable::generation() const { return generation8; }
-
-
-// Looks up the current position in the transposition
-// table. It returns true if the position is found.
-// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
-// to be replaced later. The replace value of an entry is calculated as its depth
-// minus 8 times its relative age. TTEntry t1 is considered more valuable than
-// TTEntry t2 if its replace value is greater than that of t2.
-std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) const {
-
-    TTEntry* const tte   = first_entry(key);
-    const uint16_t key16 = uint16_t(key);  // Use the low 16 bits as key inside the cluster
-
-    for (int i = 0; i < ClusterSize; ++i)
-        if (tte[i].key16 == key16)
-            // This gap is the main place for read races.
-            // After `read()` completes that copy is final, but may be self-inconsistent.
-            return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])};
-
-    // Find an entry to be replaced according to the replacement strategy
-    TTEntry* replace = tte;
-    for (int i = 1; i < ClusterSize; ++i)
-        if (replace->depth8 - replace->relative_age(generation8)
-            > tte[i].depth8 - tte[i].relative_age(generation8))
-            replace = &tte[i];
-
-    return {false,
-            TTData{Move::none(), VALUE_NONE, VALUE_NONE, DEPTH_ENTRY_OFFSET, BOUND_NONE, false},
-            TTWriter(replace)};
-}
-
-
-TTEntry* TranspositionTable::first_entry(const Key key) const {
-    return &table[mul_hi64(key, clusterCount)].entry[0];
-}
-
-}  // namespace Stockfish
--- a/src/tt.h
+++ b/src/tt.h
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,92 +19,80 @@
 #ifndef TT_H_INCLUDED
 #define TT_H_INCLUDED

-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-
-#include "memory.h"
+#include "misc.h"
 #include "types.h"

-namespace Stockfish {
+/// TTEntry struct is the 10 bytes transposition table entry, defined as below:
+///
+/// key        16 bit
+/// move       16 bit
+/// value      16 bit
+/// eval value 16 bit
+/// generation  5 bit
+/// pv node     1 bit
+/// bound type  2 bit
+/// depth       8 bit

-class ThreadPool;
-struct TTEntry;
-struct Cluster;
+struct TTEntry {

-// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy
-// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and
-// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate
-// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size.
-//
-// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of:
-//   1) whether the entry already has this position
-//   2) a copy of the prior data (if any) (may be inconsistent due to read races)
-//   3) a writer object to this entry
-// The copied data and the writer are separated to maintain clear boundaries between local vs global objects.
+  Move  move()  const { return (Move )move16; }
+  Value value() const { return (Value)value16; }
+  Value eval()  const { return (Value)eval16; }
+  Depth depth() const { return (Depth)depth8 + DEPTH_OFFSET; }
+  bool is_pv()  const { return (bool)(genBound8 & 0x4); }
+  Bound bound() const { return (Bound)(genBound8 & 0x3); }
+  void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev);

+private:
+  friend class TranspositionTable;

-// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data.
-struct TTData {
-    Move  move;
-    Value value, eval;
-    Depth depth;
-    Bound bound;
-    bool  is_pv;
-
-    TTData() = delete;
-
-    // clang-format off
-    TTData(Move m, Value v, Value ev, Depth d, Bound b, bool pv) :
-        move(m),
-        value(v),
-        eval(ev),
-        depth(d),
-        bound(b),
-        is_pv(pv) {};
-    // clang-format on
+  uint16_t key16;
+  uint16_t move16;
+  int16_t  value16;
+  int16_t  eval16;
+  uint8_t  genBound8;
+  uint8_t  depth8;
 };


-// This is used to make racy writes to the global TT.
-struct TTWriter {
-   public:
-    void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
-
-   private:
-    friend class TranspositionTable;
-    TTEntry* entry;
-    TTWriter(TTEntry* tte);
-};
-
+/// A TranspositionTable is an array of Cluster, of size clusterCount. Each
+/// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
+/// contains information on exactly one position. The size of a Cluster should
+/// divide the size of a cache line for best performance, as the cacheline is
+/// prefetched when possible.

 class TranspositionTable {

-   public:
-    ~TranspositionTable() { aligned_large_pages_free(table); }
+  static constexpr int ClusterSize = 3;

-    void resize(size_t mbSize, ThreadPool& threads);  // Set TT size
-    void clear(ThreadPool& threads);                  // Re-initialize memory, multithreaded
-    int  hashfull(int maxAge = 0)
-      const;  // Approximate what fraction of entries (permille) have been written to during this root search
+  struct Cluster {
+    TTEntry entry[ClusterSize];
+    char padding[2]; // Pad to 32 bytes
+  };

-    void
-    new_search();  // This must be called at the beginning of each root search to track entry aging
-    uint8_t generation() const;  // The current age, used when writing new data to the TT
-    std::tuple<bool, TTData, TTWriter>
-    probe(const Key key) const;  // The main method, whose retvals separate local vs global objects
-    TTEntry* first_entry(const Key key)
-      const;  // This is the hash function; its only external use is memory prefetching.
+  static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");

-   private:
-    friend struct TTEntry;
+public:
+ ~TranspositionTable() { aligned_ttmem_free(mem); }
+  void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound
+  TTEntry* probe(const Key key, bool& found) const;
+  int hashfull() const;
+  void resize(size_t mbSize);
+  void clear();

-    size_t   clusterCount;
-    Cluster* table = nullptr;
+  TTEntry* first_entry(const Key key) const {
+    return &table[mul_hi64(key, clusterCount)].entry[0];
+  }

-    uint8_t generation8 = 0;  // Size must be not bigger than TTEntry::genBound8
+private:
+  friend struct TTEntry;
+
+  size_t clusterCount;
+  Cluster* table;
+  void* mem;
+  uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
 };

-}  // namespace Stockfish
+extern TranspositionTable TT;

-#endif  // #ifndef TT_H_INCLUDED
+#endif // #ifndef TT_H_INCLUDED
--- a/Show More
+++ b/Show More