mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-06 10:53:50 +08:00
Compare commits
55 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b4ac3d6b96 | ||
|
|
6c19bec86e | ||
|
|
8c4ac26c8e | ||
|
|
a2b24e0030 | ||
|
|
66b4e7f080 | ||
|
|
04a0be956d | ||
|
|
f327096cfb | ||
|
|
e592dcb8e3 | ||
|
|
80eae02603 | ||
|
|
0ad7de3182 | ||
|
|
e0b4dc24c8 | ||
|
|
b79ec2e0b2 | ||
|
|
43c887d367 | ||
|
|
3a187b863b | ||
|
|
5fcd0e6f2a | ||
|
|
19129473f2 | ||
|
|
b706b91bb1 | ||
|
|
8ec5faa46e | ||
|
|
8a9d269855 | ||
|
|
0b3c13107a | ||
|
|
669074672c | ||
|
|
0fd0e4e849 | ||
|
|
85327828c9 | ||
|
|
4cdb6386d8 | ||
|
|
982880bd70 | ||
|
|
bf17a410ec | ||
|
|
5e7777e9d0 | ||
|
|
10a920d7d7 | ||
|
|
21819b7bf8 | ||
|
|
8c4338ae49 | ||
|
|
8a3f8e21ae | ||
|
|
267ca781cd | ||
|
|
ac43bef5c5 | ||
|
|
7a32d26d5f | ||
|
|
fb5c1f5bf5 | ||
|
|
87f0fa55a0 | ||
|
|
2f882309d5 | ||
|
|
86953b9392 | ||
|
|
ba1c639836 | ||
|
|
e526c5aa52 | ||
|
|
9cd2c817db | ||
|
|
54a0a228f6 | ||
|
|
1cd2c7861a | ||
|
|
7af3f4da7a | ||
|
|
271181bb31 | ||
|
|
66b2c6b9f1 | ||
|
|
2559c20c6e | ||
|
|
2659c407c4 | ||
|
|
3730ae1efb | ||
|
|
0d6cdc0c6d | ||
|
|
80afeb0d3b | ||
|
|
2405b38165 | ||
|
|
8a95d269eb | ||
|
|
3b7b632aa5 | ||
|
|
29c166a072 |
1
.github/ci/libcxx17.imp
vendored
1
.github/ci/libcxx17.imp
vendored
@@ -7,7 +7,6 @@
|
||||
{ include: [ "<__fwd/sstream.h>", private, "<iosfwd>", public ] },
|
||||
{ include: [ "<__fwd/streambuf.h>", private, "<iosfwd>", public ] },
|
||||
{ include: [ "<__fwd/string_view.h>", private, "<string_view>", public ] },
|
||||
{ include: [ "<__system_error/errc.h>", private, "<system_error>", public ] },
|
||||
|
||||
# Mappings for includes between public headers
|
||||
{ include: [ "<ios>", public, "<iostream>", public ] },
|
||||
|
||||
10
.github/workflows/arm_compilation.yml
vendored
10
.github/workflows/arm_compilation.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
name: ${{ matrix.config.name }} ${{ matrix.binaries }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
COMPCXX: ${{ matrix.config.compiler }}
|
||||
COMPILER: ${{ matrix.config.compiler }}
|
||||
COMP: ${{ matrix.config.comp }}
|
||||
EMU: ${{ matrix.config.emu }}
|
||||
EXT: ${{ matrix.config.ext }}
|
||||
@@ -26,7 +26,6 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download required linux packages
|
||||
if: runner.os == 'Linux'
|
||||
@@ -63,7 +62,7 @@ jobs:
|
||||
if [ $COMP == ndk ]; then
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
fi
|
||||
$COMPCXX -v
|
||||
$COMPILER -v
|
||||
|
||||
- name: Test help target
|
||||
run: make help
|
||||
@@ -92,7 +91,4 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
|
||||
path: |
|
||||
.
|
||||
!.git
|
||||
!.output
|
||||
path: .
|
||||
|
||||
12
.github/workflows/clang-format.yml
vendored
12
.github/workflows/clang-format.yml
vendored
@@ -11,10 +11,6 @@ on:
|
||||
paths:
|
||||
- "**.cpp"
|
||||
- "**.h"
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
Clang-Format:
|
||||
name: Clang-Format
|
||||
@@ -29,7 +25,7 @@ jobs:
|
||||
id: clang-format
|
||||
continue-on-error: true
|
||||
with:
|
||||
clang-format-version: "18"
|
||||
clang-format-version: "17"
|
||||
exclude-regex: "incbin"
|
||||
|
||||
- name: Comment on PR
|
||||
@@ -37,13 +33,12 @@ jobs:
|
||||
uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
|
||||
with:
|
||||
message: |
|
||||
clang-format 18 needs to be run on this PR.
|
||||
clang-format 17 needs to be run on this PR.
|
||||
If you do not have clang-format installed, the maintainer will run it when merging.
|
||||
For the exact version please see https://packages.ubuntu.com/noble/clang-format-18.
|
||||
For the exact version please see https://packages.ubuntu.com/mantic/clang-format-17.
|
||||
|
||||
_(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
|
||||
comment_tag: execution
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Comment on PR
|
||||
if: steps.clang-format.outcome != 'failure'
|
||||
@@ -54,4 +49,3 @@ jobs:
|
||||
create_if_not_exists: false
|
||||
comment_tag: execution
|
||||
mode: delete
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
2
.github/workflows/codeql.yml
vendored
2
.github/workflows/codeql.yml
vendored
@@ -30,8 +30,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
|
||||
13
.github/workflows/compilation.yml
vendored
13
.github/workflows/compilation.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
name: ${{ matrix.config.name }} ${{ matrix.binaries }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
COMPCXX: ${{ matrix.config.compiler }}
|
||||
COMPILER: ${{ matrix.config.compiler }}
|
||||
COMP: ${{ matrix.config.comp }}
|
||||
EXT: ${{ matrix.config.ext }}
|
||||
NAME: ${{ matrix.config.simple_name }}
|
||||
@@ -25,8 +25,6 @@ jobs:
|
||||
shell: ${{ matrix.config.shell }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install fixed GCC on Linux
|
||||
if: runner.os == 'Linux'
|
||||
@@ -52,7 +50,7 @@ jobs:
|
||||
run: make net
|
||||
|
||||
- name: Check compiler
|
||||
run: $COMPCXX -v
|
||||
run: $COMPILER -v
|
||||
|
||||
- name: Test help target
|
||||
run: make help
|
||||
@@ -61,7 +59,7 @@ jobs:
|
||||
run: git --version
|
||||
|
||||
- name: Check compiler
|
||||
run: $COMPCXX -v
|
||||
run: $COMPILER -v
|
||||
|
||||
- name: Show g++ cpu info
|
||||
if: runner.os != 'macOS'
|
||||
@@ -88,7 +86,4 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
|
||||
path: |
|
||||
.
|
||||
!.git
|
||||
!.output
|
||||
path: .
|
||||
|
||||
43
.github/workflows/games.yml
vendored
43
.github/workflows/games.yml
vendored
@@ -1,43 +0,0 @@
|
||||
# This workflow will play games with a debug enabled SF using the PR
|
||||
|
||||
name: Games
|
||||
on:
|
||||
workflow_call:
|
||||
jobs:
|
||||
Matetrack:
|
||||
name: Games
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout SF repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
path: Stockfish
|
||||
persist-credentials: false
|
||||
|
||||
- name: build debug enabled version of SF
|
||||
working-directory: Stockfish/src
|
||||
run: make -j build debug=yes
|
||||
|
||||
- name: Checkout fast-chess repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Disservin/fast-chess
|
||||
path: fast-chess
|
||||
ref: d54af1910d5479c669dc731f1f54f9108a251951
|
||||
persist-credentials: false
|
||||
|
||||
- name: fast-chess build
|
||||
working-directory: fast-chess
|
||||
run: make -j
|
||||
|
||||
- name: Run games
|
||||
working-directory: fast-chess
|
||||
run: |
|
||||
./fast-chess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\
|
||||
-engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
|
||||
-engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
|
||||
-ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out
|
||||
cat fast.log
|
||||
! grep "Assertion" fast.log > /dev/null
|
||||
! grep "disconnect" fast.out > /dev/null
|
||||
2
.github/workflows/iwyu.yml
vendored
2
.github/workflows/iwyu.yml
vendored
@@ -14,7 +14,6 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: Stockfish
|
||||
persist-credentials: false
|
||||
|
||||
- name: Checkout include-what-you-use
|
||||
uses: actions/checkout@v4
|
||||
@@ -22,7 +21,6 @@ jobs:
|
||||
repository: include-what-you-use/include-what-you-use
|
||||
ref: f25caa280dc3277c4086ec345ad279a2463fea0f
|
||||
path: include-what-you-use
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download required linux packages
|
||||
run: |
|
||||
|
||||
54
.github/workflows/matetrack.yml
vendored
54
.github/workflows/matetrack.yml
vendored
@@ -1,54 +0,0 @@
|
||||
# This workflow will run matetrack on the PR
|
||||
|
||||
name: Matetrack
|
||||
on:
|
||||
workflow_call:
|
||||
jobs:
|
||||
Matetrack:
|
||||
name: Matetrack
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout SF repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
path: Stockfish
|
||||
persist-credentials: false
|
||||
|
||||
- name: build SF
|
||||
working-directory: Stockfish/src
|
||||
run: make -j profile-build
|
||||
|
||||
- name: Checkout matetrack repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vondele/matetrack
|
||||
path: matetrack
|
||||
ref: 814160f82e6428ed2f6522dc06c2a6fa539cd413
|
||||
persist-credentials: false
|
||||
|
||||
- name: matetrack install deps
|
||||
working-directory: matetrack
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: cache syzygy
|
||||
id: cache-syzygy
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
matetrack/3-4-5-wdl/
|
||||
matetrack/3-4-5-dtz/
|
||||
key: key-syzygy
|
||||
|
||||
- name: download syzygy 3-4-5 if needed
|
||||
working-directory: matetrack
|
||||
if: steps.cache-syzygy.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/
|
||||
wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/
|
||||
|
||||
- name: Run matetrack
|
||||
working-directory: matetrack
|
||||
run: |
|
||||
python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out
|
||||
! grep "issues were detected" matecheckout.out > /dev/null
|
||||
17
.github/workflows/sanitizers.yml
vendored
17
.github/workflows/sanitizers.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
||||
name: ${{ matrix.sanitizers.name }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
COMPCXX: ${{ matrix.config.compiler }}
|
||||
COMPILER: ${{ matrix.config.compiler }}
|
||||
COMP: ${{ matrix.config.comp }}
|
||||
CXXFLAGS: "-Werror"
|
||||
strategy:
|
||||
@@ -31,17 +31,12 @@ jobs:
|
||||
- name: Run under valgrind-thread
|
||||
make_option: ""
|
||||
instrumented_option: valgrind-thread
|
||||
- name: Run non-instrumented
|
||||
make_option: ""
|
||||
instrumented_option: none
|
||||
defaults:
|
||||
run:
|
||||
working-directory: src
|
||||
shell: ${{ matrix.config.shell }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download required linux packages
|
||||
run: |
|
||||
@@ -52,7 +47,7 @@ jobs:
|
||||
run: make net
|
||||
|
||||
- name: Check compiler
|
||||
run: $COMPCXX -v
|
||||
run: $COMPILER -v
|
||||
|
||||
- name: Test help target
|
||||
run: make help
|
||||
@@ -60,14 +55,6 @@ jobs:
|
||||
- name: Check git
|
||||
run: git --version
|
||||
|
||||
# Since Linux Kernel 6.5 we are getting false positives from the ci,
|
||||
# lower the ALSR entropy to disable ALSR, which works as a temporary workaround.
|
||||
# https://github.com/google/sanitizers/issues/1716
|
||||
# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762
|
||||
|
||||
- name: Lower ALSR entropy
|
||||
run: sudo sysctl -w vm.mmap_rnd_bits=28
|
||||
|
||||
# Sanitizers
|
||||
|
||||
- name: ${{ matrix.sanitizers.name }}
|
||||
|
||||
18
.github/workflows/stockfish.yml
vendored
18
.github/workflows/stockfish.yml
vendored
@@ -15,12 +15,8 @@ jobs:
|
||||
Prerelease:
|
||||
if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # For deleting/creating a prerelease
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
# returns null if no pre-release exists
|
||||
- name: Get Commit SHA of Latest Pre-release
|
||||
@@ -70,8 +66,6 @@ jobs:
|
||||
arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- id: set-matrix
|
||||
run: |
|
||||
TASKS=$(echo $(cat .github/ci/matrix.json) )
|
||||
@@ -96,27 +90,15 @@ jobs:
|
||||
uses: ./.github/workflows/sanitizers.yml
|
||||
Tests:
|
||||
uses: ./.github/workflows/tests.yml
|
||||
Matetrack:
|
||||
uses: ./.github/workflows/matetrack.yml
|
||||
Games:
|
||||
uses: ./.github/workflows/games.yml
|
||||
Binaries:
|
||||
if: github.repository == 'official-stockfish/Stockfish'
|
||||
needs: [Matrix, Prerelease, Compilation]
|
||||
uses: ./.github/workflows/upload_binaries.yml
|
||||
with:
|
||||
matrix: ${{ needs.Matrix.outputs.matrix }}
|
||||
permissions:
|
||||
contents: write # For deleting/creating a (pre)release
|
||||
secrets:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ARM_Binaries:
|
||||
if: github.repository == 'official-stockfish/Stockfish'
|
||||
needs: [Matrix, Prerelease, ARMCompilation]
|
||||
uses: ./.github/workflows/upload_binaries.yml
|
||||
with:
|
||||
matrix: ${{ needs.Matrix.outputs.arm_matrix }}
|
||||
permissions:
|
||||
contents: write # For deleting/creating a (pre)release
|
||||
secrets:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
9
.github/workflows/tests.yml
vendored
9
.github/workflows/tests.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
||||
name: ${{ matrix.config.name }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
COMPCXX: ${{ matrix.config.compiler }}
|
||||
COMPILER: ${{ matrix.config.compiler }}
|
||||
COMP: ${{ matrix.config.comp }}
|
||||
CXXFLAGS: "-Werror"
|
||||
strategy:
|
||||
@@ -106,7 +106,6 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download required linux packages
|
||||
if: runner.os == 'Linux'
|
||||
@@ -148,7 +147,7 @@ jobs:
|
||||
|
||||
- name: Download required macOS packages
|
||||
if: runner.os == 'macOS'
|
||||
run: brew install coreutils gcc@11
|
||||
run: brew install coreutils
|
||||
|
||||
- name: Setup msys and install required packages
|
||||
if: runner.os == 'Windows'
|
||||
@@ -173,9 +172,9 @@ jobs:
|
||||
if [ $COMP == ndk ]; then
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
fi
|
||||
$COMPCXX -v
|
||||
$COMPILER -v
|
||||
else
|
||||
echo "$COMPCXX -v" > script.sh
|
||||
echo "$COMPILER -v" > script.sh
|
||||
docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}/src:/app sf_builder
|
||||
fi
|
||||
|
||||
|
||||
9
.github/workflows/upload_binaries.yml
vendored
9
.github/workflows/upload_binaries.yml
vendored
@@ -5,16 +5,13 @@ on:
|
||||
matrix:
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
token:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
Artifacts:
|
||||
name: ${{ matrix.config.name }} ${{ matrix.binaries }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
COMPCXX: ${{ matrix.config.compiler }}
|
||||
COMPILER: ${{ matrix.config.compiler }}
|
||||
COMP: ${{ matrix.config.comp }}
|
||||
EXT: ${{ matrix.config.ext }}
|
||||
NAME: ${{ matrix.config.simple_name }}
|
||||
@@ -28,8 +25,6 @@ jobs:
|
||||
shell: ${{ matrix.config.shell }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download artifact from compilation
|
||||
uses: actions/download-artifact@v4
|
||||
@@ -83,7 +78,6 @@ jobs:
|
||||
uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
|
||||
with:
|
||||
files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
|
||||
token: ${{ secrets.token }}
|
||||
|
||||
- name: Get last commit sha
|
||||
id: last_commit
|
||||
@@ -110,4 +104,3 @@ jobs:
|
||||
tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
|
||||
prerelease: true
|
||||
files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
|
||||
token: ${{ secrets.token }}
|
||||
|
||||
6
AUTHORS
6
AUTHORS
@@ -20,7 +20,6 @@ Alexander Kure
|
||||
Alexander Pagel (Lolligerhans)
|
||||
Alfredo Menezes (lonfom169)
|
||||
Ali AlZhrani (Cooffe)
|
||||
Andreas Jan van der Meulen (Andyson007)
|
||||
Andreas Matthies (Matthies)
|
||||
Andrei Vetrov (proukornew)
|
||||
Andrew Grant (AndyGrant)
|
||||
@@ -47,7 +46,6 @@ Bryan Cross (crossbr)
|
||||
candirufish
|
||||
Chess13234
|
||||
Chris Cain (ceebo)
|
||||
Ciekce
|
||||
clefrks
|
||||
Clemens L. (rn5f107s2)
|
||||
Cody Ho (aesrentai)
|
||||
@@ -69,11 +67,9 @@ Douglas Matos Gomes (dsmsgms)
|
||||
Dubslow
|
||||
Eduardo Cáceres (eduherminio)
|
||||
Eelco de Groot (KingDefender)
|
||||
Ehsan Rashid (erashid)
|
||||
Elvin Liu (solarlight2)
|
||||
erbsenzaehler
|
||||
Ernesto Gatti
|
||||
evqsx
|
||||
Fabian Beuke (madnight)
|
||||
Fabian Fichter (ianfab)
|
||||
Fanael Linithien (Fanael)
|
||||
@@ -130,7 +126,6 @@ Kojirion
|
||||
Krystian Kuzniarek (kuzkry)
|
||||
Leonardo Ljubičić (ICCF World Champion)
|
||||
Leonid Pechenik (lp--)
|
||||
Li Ying (yl25946)
|
||||
Liam Keegan (lkeegan)
|
||||
Linmiao Xu (linrock)
|
||||
Linus Arver (listx)
|
||||
@@ -171,7 +166,6 @@ Niklas Fiekas (niklasf)
|
||||
Nikolay Kostov (NikolayIT)
|
||||
Norman Schmidt (FireFather)
|
||||
notruck
|
||||
Nour Berakdar (Nonlinear)
|
||||
Ofek Shochat (OfekShochat, ghostway)
|
||||
Ondrej Mosnáček (WOnder93)
|
||||
Ondřej Mišina (AndrovT)
|
||||
|
||||
@@ -59,7 +59,7 @@ discussion._
|
||||
|
||||
Changes to Stockfish C++ code should respect our coding style defined by
|
||||
[.clang-format](.clang-format). You can format your changes by running
|
||||
`make format`. This requires clang-format version 18 to be installed on your system.
|
||||
`make format`. This requires clang-format version 17 to be installed on your system.
|
||||
|
||||
## Navigate
|
||||
|
||||
|
||||
27
README.md
27
README.md
@@ -59,6 +59,33 @@ This distribution of Stockfish consists of the following files:
|
||||
* a file with the .nnue extension, storing the neural network for the NNUE
|
||||
evaluation. Binary distributions will have this file embedded.
|
||||
|
||||
## Stockfish on distributed memory systems
|
||||
|
||||
The cluster branch allows for running Stockfish on a cluster of servers (nodes)
|
||||
that are connected with a high-speed and low-latency network, using the message
|
||||
passing interface (MPI). In this case, one MPI process should be run per node,
|
||||
and UCI options can be used to set the number of threads/hash per node as usual.
|
||||
Typically, the engine will be invoked as
|
||||
```
|
||||
mpirun -np N /path/to/stockfish
|
||||
```
|
||||
where ```N``` stands for the number of MPI processes used (alternatives to ```mpirun```,
|
||||
include ```mpiexec```, ```srun```). Use 1 mpi rank per node, and employ threading
|
||||
according to the cores per node. To build the cluster
|
||||
branch, it is sufficient to specify ```COMPCXX=mpicxx``` (or e.g. CC depending on the name
|
||||
of the compiler providing MPI support) on the make command line, and do a clean build:
|
||||
```
|
||||
make -j ARCH=x86-64-modern clean build COMPCXX=mpicxx mpi=yes
|
||||
```
|
||||
Make sure that the MPI installation is configured to support ```MPI_THREAD_MULTIPLE```,
|
||||
this might require adding system specific compiler options to the Makefile. Stockfish employs
|
||||
non-blocking (asynchronous) communication, and benefits from an MPI
|
||||
implementation that efficiently supports this. Some MPI implentations might benefit
|
||||
from leaving 1 core/thread free for these asynchronous communications, and might require
|
||||
setting additional environment variables. ```mpirun``` should forward stdin/stdout
|
||||
to ```rank 0``` only (e.g. ```srun --input=0 --output=0```).
|
||||
Refer to your MPI documentation for more info.
|
||||
|
||||
## Contributing
|
||||
|
||||
__See [Contributing Guide](CONTRIBUTING.md).__
|
||||
|
||||
@@ -1,109 +1,106 @@
|
||||
Contributors to Fishtest with >10,000 CPU hours, as of 2024-08-31.
|
||||
Contributors to Fishtest with >10,000 CPU hours, as of 2024-02-24.
|
||||
Thank you!
|
||||
|
||||
Username CPU Hours Games played
|
||||
------------------------------------------------------------------
|
||||
noobpwnftw 40428649 3164740143
|
||||
technologov 23581394 1076895482
|
||||
vdv 19425375 718302718
|
||||
linrock 10034115 643194527
|
||||
noobpwnftw 39302472 3055513453
|
||||
technologov 20845762 994893444
|
||||
linrock 8616428 560281417
|
||||
mlang 3026000 200065824
|
||||
okrout 2572676 237511408
|
||||
pemo 1836785 62226157
|
||||
okrout 2332151 222639518
|
||||
pemo 1800019 60274069
|
||||
dew 1689162 100033738
|
||||
TueRens 1648780 77891164
|
||||
sebastronomy 1468328 60859092
|
||||
grandphish2 1466110 91776075
|
||||
JojoM 1130625 73666098
|
||||
olafm 1067009 74807270
|
||||
TueRens 1474943 75121774
|
||||
grandphish2 1463002 91616949
|
||||
JojoM 1109702 72927902
|
||||
olafm 978631 71037944
|
||||
sebastronomy 939955 44920556
|
||||
tvijlbrief 796125 51897690
|
||||
oz 781847 53910686
|
||||
rpngn 768460 49812975
|
||||
gvreuls 751085 52177668
|
||||
gvreuls 711320 49142318
|
||||
mibere 703840 46867607
|
||||
leszek 566598 42024615
|
||||
cw 519601 34988161
|
||||
oz 646268 46293638
|
||||
rpngn 572571 38928563
|
||||
leszek 531858 39316505
|
||||
cw 518116 34894291
|
||||
fastgm 503862 30260818
|
||||
CSU_Dynasty 468784 31385034
|
||||
maximmasiutin 439192 27893522
|
||||
ctoks 435148 28541909
|
||||
ctoks 434591 28520597
|
||||
maximmasiutin 429983 27066286
|
||||
crunchy 427414 27371625
|
||||
bcross 415724 29061187
|
||||
robal 371112 24642270
|
||||
mgrabiak 367963 26464704
|
||||
velislav 342588 22140902
|
||||
ncfish1 329039 20624527
|
||||
mgrabiak 338763 23999170
|
||||
Fisherman 327231 21829379
|
||||
robal 299836 20213182
|
||||
Dantist 296386 18031762
|
||||
tolkki963 262050 22049676
|
||||
Sylvain27 255595 8864404
|
||||
ncfish1 267604 17881149
|
||||
nordlandia 249322 16420192
|
||||
Fifis 237657 13065577
|
||||
marrco 234581 17714473
|
||||
Calis007 217537 14450582
|
||||
tolkki963 233490 19773930
|
||||
glinscott 208125 13277240
|
||||
drabel 204167 13930674
|
||||
mhoram 202894 12601997
|
||||
bking_US 198894 11876016
|
||||
Calis007 188631 12795784
|
||||
Thanar 179852 12365359
|
||||
javran 169679 13481966
|
||||
armo9494 162863 10937118
|
||||
Fifis 176209 10638245
|
||||
vdv 175544 9904472
|
||||
spams 157128 10319326
|
||||
DesolatedDodo 156683 10211206
|
||||
Wencey 152308 8375444
|
||||
DesolatedDodo 156659 10210328
|
||||
armo9494 155355 10566898
|
||||
sqrt2 147963 9724586
|
||||
vdbergh 140311 9225125
|
||||
jcAEie 140086 10603658
|
||||
vdbergh 139746 9172061
|
||||
CoffeeOne 137100 5024116
|
||||
malala 136182 8002293
|
||||
xoto 133759 9159372
|
||||
Dubslow 129614 8519312
|
||||
davar 129023 8376525
|
||||
DMBK 122960 8980062
|
||||
dsmith 122059 7570238
|
||||
CypressChess 120784 8672620
|
||||
sschnee 120526 7547722
|
||||
maposora 119734 10749710
|
||||
javran 121564 10144656
|
||||
amicic 119661 7938029
|
||||
Wolfgang 115713 8159062
|
||||
sschnee 118107 7389266
|
||||
Wolfgang 114616 8070494
|
||||
Data 113305 8220352
|
||||
BrunoBanani 112960 7436849
|
||||
markkulix 112897 9133168
|
||||
cuistot 109802 7121030
|
||||
Wencey 111502 5991676
|
||||
cuistot 108503 7006992
|
||||
CypressChess 108331 7759788
|
||||
skiminki 107583 7218170
|
||||
sterni1971 104431 5938282
|
||||
MaZePallas 102823 6633619
|
||||
sterni1971 100532 5880772
|
||||
sunu 100167 7040199
|
||||
zeryl 99331 6221261
|
||||
thirdlife 99156 2245320
|
||||
ElbertoOne 99028 7023771
|
||||
megaman7de 98456 6675076
|
||||
Goatminola 96765 8257832
|
||||
bigpen0r 94825 6529241
|
||||
Dubslow 98600 6903242
|
||||
markkulix 97010 7643900
|
||||
bigpen0r 94809 6529203
|
||||
brabos 92118 6186135
|
||||
Maxim 90818 3283364
|
||||
psk 89957 5984901
|
||||
megaman7de 88822 6052132
|
||||
racerschmacer 85805 6122790
|
||||
maposora 85710 7778146
|
||||
Vizvezdenec 83761 5344740
|
||||
0x3C33 82614 5271253
|
||||
szupaw 82495 7151686
|
||||
BRAVONE 81239 5054681
|
||||
nssy 76497 5259388
|
||||
cody 76126 4492126
|
||||
jromang 76106 5236025
|
||||
MarcusTullius 76103 5061991
|
||||
woutboat 76072 6022922
|
||||
Spprtr 75977 5252287
|
||||
teddybaer 75125 5407666
|
||||
Pking_cda 73776 5293873
|
||||
yurikvelo 73611 5046822
|
||||
Mineta 71130 4711422
|
||||
yurikvelo 73516 5036928
|
||||
MarcusTullius 71053 4803477
|
||||
Bobo1239 70579 4794999
|
||||
solarlight 70517 5028306
|
||||
dv8silencer 70287 3883992
|
||||
Spprtr 69646 4806763
|
||||
Mineta 66325 4537742
|
||||
manap 66273 4121774
|
||||
szupaw 65468 5669742
|
||||
tinker 64333 4268790
|
||||
qurashee 61208 3429862
|
||||
woutboat 59496 4906352
|
||||
AGI 58195 4329580
|
||||
robnjr 57262 4053117
|
||||
Freja 56938 3733019
|
||||
@@ -111,45 +108,39 @@ MaxKlaxxMiner 56879 3423958
|
||||
ttruscott 56010 3680085
|
||||
rkl 55132 4164467
|
||||
jmdana 54697 4012593
|
||||
notchris 53936 4184018
|
||||
renouve 53811 3501516
|
||||
notchris 52433 4044590
|
||||
finfish 51360 3370515
|
||||
eva42 51272 3599691
|
||||
eastorwest 51117 3454811
|
||||
Goatminola 51004 4432492
|
||||
rap 49985 3219146
|
||||
pb00067 49733 3298934
|
||||
GPUex 48686 3684998
|
||||
OuaisBla 48626 3445134
|
||||
ronaldjerum 47654 3240695
|
||||
biffhero 46564 3111352
|
||||
oryx 45639 3546530
|
||||
oryx 45533 3539290
|
||||
VoyagerOne 45476 3452465
|
||||
speedycpu 43842 3003273
|
||||
jbwiebe 43305 2805433
|
||||
Antihistamine 41788 2761312
|
||||
mhunt 41735 2691355
|
||||
jibarbosa 41640 4145702
|
||||
homyur 39893 2850481
|
||||
gri 39871 2515779
|
||||
DeepnessFulled 39020 3323102
|
||||
Garf 37741 2999686
|
||||
SC 37299 2731694
|
||||
Gaster319 37118 3279678
|
||||
naclosagc 36562 1279618
|
||||
Sylvain27 36520 1467082
|
||||
csnodgrass 36207 2688994
|
||||
Gaster319 35655 3149442
|
||||
strelock 34716 2074055
|
||||
gopeto 33717 2245606
|
||||
EthanOConnor 33370 2090311
|
||||
slakovv 32915 2021889
|
||||
jojo2357 32890 2826662
|
||||
shawnxu 32019 2802552
|
||||
gopeto 31884 2076712
|
||||
Gelma 31771 1551204
|
||||
vidar808 31560 1351810
|
||||
kdave 31157 2198362
|
||||
manapbk 30987 1810399
|
||||
ZacHFX 30966 2272416
|
||||
TataneSan 30713 1513402
|
||||
votoanthuan 30691 2460856
|
||||
ZacHFX 30551 2238078
|
||||
Prcuvu 30377 2170122
|
||||
anst 30301 2190091
|
||||
jkiiski 30136 1904470
|
||||
@@ -158,15 +149,14 @@ hyperbolic.tom 29840 2017394
|
||||
chuckstablers 29659 2093438
|
||||
Pyafue 29650 1902349
|
||||
belzedar94 28846 1811530
|
||||
mecevdimitar 27610 1721382
|
||||
votoanthuan 27978 2285818
|
||||
shawnxu 27438 2465810
|
||||
chriswk 26902 1868317
|
||||
xwziegtm 26897 2124586
|
||||
achambord 26582 1767323
|
||||
somethingintheshadows 26496 2186404
|
||||
Patrick_G 26276 1801617
|
||||
yorkman 26193 1992080
|
||||
srowen 25743 1490684
|
||||
Ulysses 25413 1702830
|
||||
Ulysses 25397 1701264
|
||||
Jopo12321 25227 1652482
|
||||
SFTUser 25182 1675689
|
||||
nabildanial 25068 1531665
|
||||
@@ -174,69 +164,66 @@ Sharaf_DG 24765 1786697
|
||||
rodneyc 24376 1416402
|
||||
jsys14 24297 1721230
|
||||
agg177 23890 1395014
|
||||
AndreasKrug 23754 1890115
|
||||
srowen 23842 1342508
|
||||
Ente 23752 1678188
|
||||
jojo2357 23479 2061238
|
||||
JanErik 23408 1703875
|
||||
Isidor 23388 1680691
|
||||
Norabor 23371 1603244
|
||||
WoodMan777 23253 2023048
|
||||
Nullvalue 23155 2022752
|
||||
cisco2015 22920 1763301
|
||||
Zirie 22542 1472937
|
||||
Nullvalue 22490 1970374
|
||||
AndreasKrug 22485 1769491
|
||||
team-oh 22272 1636708
|
||||
Roady 22220 1465606
|
||||
MazeOfGalious 21978 1629593
|
||||
sg4032 21950 1643373
|
||||
tsim67 21747 1330880
|
||||
sg4032 21947 1643353
|
||||
ianh2105 21725 1632562
|
||||
Skiff84 21711 1014212
|
||||
xor12 21628 1680365
|
||||
dex 21612 1467203
|
||||
nesoneg 21494 1463031
|
||||
user213718 21454 1404128
|
||||
Serpensin 21452 1790510
|
||||
sphinx 21211 1384728
|
||||
qoo_charly_cai 21136 1514927
|
||||
IslandLambda 21062 1220838
|
||||
qoo_charly_cai 21135 1514907
|
||||
jjoshua2 21001 1423089
|
||||
Zake9298 20938 1565848
|
||||
horst.prack 20878 1465656
|
||||
fishtester 20729 1348888
|
||||
0xB00B1ES 20590 1208666
|
||||
ols 20477 1195945
|
||||
Dinde 20459 1292774
|
||||
Serpensin 20487 1729674
|
||||
Dinde 20440 1292390
|
||||
j3corre 20405 941444
|
||||
Adrian.Schmidt123 20316 1281436
|
||||
wei 19973 1745989
|
||||
teenychess 19819 1762006
|
||||
fishtester 19617 1257388
|
||||
rstoesser 19569 1293588
|
||||
eudhan 19274 1283717
|
||||
vulcan 18871 1729392
|
||||
wizardassassin 18795 1376884
|
||||
Karpovbot 18766 1053178
|
||||
WoodMan777 18556 1628264
|
||||
jundery 18445 1115855
|
||||
mkstockfishtester 18350 1690676
|
||||
ville 17883 1384026
|
||||
chris 17698 1487385
|
||||
purplefishies 17595 1092533
|
||||
dju 17414 981289
|
||||
ols 17291 1042003
|
||||
iisiraider 17275 1049015
|
||||
Skiff84 17111 950248
|
||||
DragonLord 17014 1162790
|
||||
Karby 17008 1013160
|
||||
pirt 16965 1271519
|
||||
redstone59 16842 1461780
|
||||
Karby 16839 1010124
|
||||
Alb11747 16787 1213990
|
||||
pirt 16493 1237199
|
||||
Naven94 16414 951718
|
||||
scuzzi 16115 994341
|
||||
wizardassassin 16392 1148672
|
||||
IgorLeMasson 16064 1147232
|
||||
scuzzi 15757 968735
|
||||
ako027ako 15671 1173203
|
||||
infinigon 15285 965966
|
||||
Nikolay.IT 15154 1068349
|
||||
Andrew Grant 15114 895539
|
||||
OssumOpossum 14857 1007129
|
||||
LunaticBFF57 14525 1190310
|
||||
enedene 14476 905279
|
||||
Hjax 14394 1005013
|
||||
IslandLambda 14393 958196
|
||||
bpfliegel 14233 882523
|
||||
YELNAMRON 14230 1128094
|
||||
mpx86 14019 759568
|
||||
@@ -246,56 +233,54 @@ Nesa92 13806 1116101
|
||||
crocogoat 13803 1117422
|
||||
joster 13710 946160
|
||||
mbeier 13650 1044928
|
||||
Pablohn26 13552 1088532
|
||||
wxt9861 13550 1312306
|
||||
Hjax 13535 915487
|
||||
Dark_wizzie 13422 1007152
|
||||
Rudolphous 13244 883140
|
||||
Machariel 13010 863104
|
||||
nalanzeyu 12996 232590
|
||||
infinigon 12991 943216
|
||||
mabichito 12903 749391
|
||||
Jackfish 12895 868928
|
||||
thijsk 12886 722107
|
||||
AdrianSA 12860 804972
|
||||
Flopzee 12698 894821
|
||||
whelanh 12682 266404
|
||||
mschmidt 12644 863193
|
||||
korposzczur 12606 838168
|
||||
tsim67 12570 890180
|
||||
Jackfish 12553 836958
|
||||
fatmurphy 12547 853210
|
||||
Oakwen 12532 855759
|
||||
icewulf 12447 854878
|
||||
Oakwen 12503 853105
|
||||
SapphireBrand 12416 969604
|
||||
deflectooor 12386 579392
|
||||
modolief 12386 896470
|
||||
TataneSan 12358 609332
|
||||
Farseer 12249 694108
|
||||
Hongildong 12201 648712
|
||||
pgontarz 12151 848794
|
||||
dbernier 12103 860824
|
||||
szczur90 12035 942376
|
||||
FormazChar 12019 910409
|
||||
rensonthemove 11999 971993
|
||||
FormazChar 11989 907809
|
||||
stocky 11954 699440
|
||||
MooTheCow 11923 779432
|
||||
somethingintheshadows 11940 989472
|
||||
MooTheCow 11892 776126
|
||||
3cho 11842 1036786
|
||||
ckaz 11792 732276
|
||||
whelanh 11557 245188
|
||||
infinity 11470 727027
|
||||
aga 11412 695127
|
||||
torbjo 11395 729145
|
||||
Thomas A. Anderson 11372 732094
|
||||
savage84 11358 670860
|
||||
Def9Infinity 11345 696552
|
||||
d64 11263 789184
|
||||
ali-al-zhrani 11245 779246
|
||||
ImperiumAeternum 11155 952000
|
||||
ckaz 11170 680866
|
||||
snicolet 11106 869170
|
||||
dapper 11032 771402
|
||||
Ethnikoi 10993 945906
|
||||
Snuuka 10938 435504
|
||||
Karmatron 10871 678306
|
||||
Karmatron 10859 678058
|
||||
basepi 10637 744851
|
||||
jibarbosa 10628 857100
|
||||
Cubox 10621 826448
|
||||
gerbil 10519 971688
|
||||
mecevdimitar 10609 787318
|
||||
michaelrpg 10509 739239
|
||||
Def9Infinity 10427 686978
|
||||
OIVAS7572 10420 995586
|
||||
wxt9861 10412 1013864
|
||||
Garruk 10365 706465
|
||||
dzjp 10343 732529
|
||||
RickGroszkiewicz 10263 990798
|
||||
|
||||
37
src/Makefile
37
src/Makefile
@@ -53,9 +53,9 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
|
||||
|
||||
### Source and object files
|
||||
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
|
||||
misc.cpp movegen.cpp movepick.cpp position.cpp \
|
||||
misc.cpp movegen.cpp movepick.cpp position.cpp cluster.cpp \
|
||||
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
||||
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp
|
||||
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp
|
||||
|
||||
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \
|
||||
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
|
||||
@@ -63,7 +63,7 @@ HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \
|
||||
nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
|
||||
nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
|
||||
search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
|
||||
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h
|
||||
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h cluster.h
|
||||
|
||||
OBJS = $(notdir $(SRCS:.cpp=.o))
|
||||
|
||||
@@ -100,6 +100,7 @@ VPATH = syzygy:nnue:nnue/features
|
||||
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
|
||||
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
|
||||
# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions
|
||||
# mpi = yes/no --- -DUSE_MPI --- Use Message Passing Interface
|
||||
#
|
||||
# Note that Makefile is space sensitive, so when adding new architectures
|
||||
# or modifying existing flags, you have to make sure there are no extra spaces
|
||||
@@ -149,12 +150,13 @@ avx512 = no
|
||||
vnni256 = no
|
||||
vnni512 = no
|
||||
neon = no
|
||||
mpi = no
|
||||
dotprod = no
|
||||
arm_version = 0
|
||||
STRIP = strip
|
||||
|
||||
ifneq ($(shell which clang-format-18 2> /dev/null),)
|
||||
CLANG-FORMAT = clang-format-18
|
||||
ifneq ($(shell which clang-format-17 2> /dev/null),)
|
||||
CLANG-FORMAT = clang-format-17
|
||||
else
|
||||
CLANG-FORMAT = clang-format
|
||||
endif
|
||||
@@ -489,8 +491,8 @@ ifeq ($(COMP),clang)
|
||||
endif
|
||||
|
||||
ifeq ($(KERNEL),Darwin)
|
||||
CXXFLAGS += -mmacosx-version-min=10.15
|
||||
LDFLAGS += -mmacosx-version-min=10.15
|
||||
CXXFLAGS += -mmacosx-version-min=10.14
|
||||
LDFLAGS += -mmacosx-version-min=10.14
|
||||
ifneq ($(arch),any)
|
||||
CXXFLAGS += -arch $(arch)
|
||||
LDFLAGS += -arch $(arch)
|
||||
@@ -546,6 +548,11 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
### Travis CI script uses COMPILER to overwrite CXX
|
||||
ifdef COMPILER
|
||||
COMPCXX=$(COMPILER)
|
||||
endif
|
||||
|
||||
### Allow overwriting CXX from command line
|
||||
ifdef COMPCXX
|
||||
CXX=$(COMPCXX)
|
||||
@@ -786,6 +793,15 @@ ifeq ($(OS), Android)
|
||||
LDFLAGS += -fPIE -pie
|
||||
endif
|
||||
|
||||
### 3.10 MPI
|
||||
ifneq (,$(findstring mpi, $(CXX)))
|
||||
mpi = yes
|
||||
endif
|
||||
ifeq ($(mpi),yes)
|
||||
CXXFLAGS += -DUSE_MPI -Wno-cast-qual -fexceptions
|
||||
DEPENDFLAGS += -DUSE_MPI
|
||||
endif
|
||||
|
||||
### ==========================================================================
|
||||
### Section 4. Public Targets
|
||||
### ==========================================================================
|
||||
@@ -1008,6 +1024,7 @@ config-sanity: net
|
||||
@echo "vnni256: '$(vnni256)'"
|
||||
@echo "vnni512: '$(vnni512)'"
|
||||
@echo "neon: '$(neon)'"
|
||||
@echo "mpi: '$(mpi)'"
|
||||
@echo "dotprod: '$(dotprod)'"
|
||||
@echo "arm_version: '$(arm_version)'"
|
||||
@echo "target_windows: '$(target_windows)'"
|
||||
@@ -1051,14 +1068,14 @@ FORCE:
|
||||
|
||||
clang-profile-make:
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
||||
EXTRACXXFLAGS='-fprofile-generate ' \
|
||||
EXTRALDFLAGS=' -fprofile-generate' \
|
||||
EXTRACXXFLAGS='-fprofile-instr-generate ' \
|
||||
EXTRALDFLAGS=' -fprofile-instr-generate' \
|
||||
all
|
||||
|
||||
clang-profile-use:
|
||||
$(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
||||
EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \
|
||||
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
|
||||
EXTRALDFLAGS='-fprofile-use ' \
|
||||
all
|
||||
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "position.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// clang-format off
|
||||
@@ -93,7 +95,7 @@ const std::vector<std::string> Defaults = {
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace Stockfish::Benchmark {
|
||||
namespace Stockfish {
|
||||
|
||||
// Builds a list of UCI commands to be run by bench. There
|
||||
// are five parameters: TT size in MB, number of search threads that
|
||||
@@ -106,7 +108,7 @@ namespace Stockfish::Benchmark {
|
||||
// bench 64 1 100000 default nodes : search default positions for 100K nodes each
|
||||
// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec
|
||||
// bench 16 1 5 blah perft : run a perft 5 on positions in file "blah"
|
||||
std::vector<std::string> setup_bench(const std::string& currentFen, std::istream& is) {
|
||||
std::vector<std::string> setup_bench(const Position& current, std::istream& is) {
|
||||
|
||||
std::vector<std::string> fens, list;
|
||||
std::string go, token;
|
||||
@@ -124,7 +126,7 @@ std::vector<std::string> setup_bench(const std::string& currentFen, std::istream
|
||||
fens = Defaults;
|
||||
|
||||
else if (fenFile == "current")
|
||||
fens.push_back(currentFen);
|
||||
fens.push_back(current.fen());
|
||||
|
||||
else
|
||||
{
|
||||
|
||||
@@ -23,9 +23,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Stockfish::Benchmark {
|
||||
namespace Stockfish {
|
||||
|
||||
std::vector<std::string> setup_bench(const std::string&, std::istream&);
|
||||
class Position;
|
||||
|
||||
std::vector<std::string> setup_bench(const Position&, std::istream&);
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
|
||||
@@ -124,14 +124,8 @@ Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
|
||||
for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
|
||||
{
|
||||
Square s = sq;
|
||||
while (safe_destination(s, d))
|
||||
{
|
||||
while (safe_destination(s, d) && !(occupied & s))
|
||||
attacks |= (s += d);
|
||||
if (occupied & s)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return attacks;
|
||||
|
||||
480
src/cluster.cpp
Normal file
480
src/cluster.cpp
Normal file
@@ -0,0 +1,480 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef USE_MPI
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
#include <map>
|
||||
#include <mpi.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "thread.h"
|
||||
#include "timeman.h"
|
||||
#include "tt.h"
|
||||
#include "search.h"
|
||||
|
||||
namespace Stockfish {
|
||||
namespace Cluster {
|
||||
|
||||
// Total number of ranks and rank within the communicator
|
||||
static int world_rank = MPI_PROC_NULL;
|
||||
static int world_size = 0;
|
||||
|
||||
// Signals between ranks exchange basic info using a dedicated communicator
|
||||
static MPI_Comm signalsComm = MPI_COMM_NULL;
|
||||
static MPI_Request reqSignals = MPI_REQUEST_NULL;
|
||||
static uint64_t signalsCallCounter = 0;
|
||||
|
||||
// Signals are the number of nodes searched, stop, table base hits, transposition table saves
|
||||
enum Signals : int {
|
||||
SIG_NODES = 0,
|
||||
SIG_STOP = 1,
|
||||
SIG_TB = 2,
|
||||
SIG_TTS = 3,
|
||||
SIG_NB = 4
|
||||
};
|
||||
static uint64_t signalsSend[SIG_NB] = {};
|
||||
static uint64_t signalsRecv[SIG_NB] = {};
|
||||
static uint64_t nodesSearchedOthers = 0;
|
||||
static uint64_t tbHitsOthers = 0;
|
||||
static uint64_t TTsavesOthers = 0;
|
||||
static uint64_t stopSignalsPosted = 0;
|
||||
|
||||
// The UCI threads of each rank exchange use a dedicated communicator
|
||||
static MPI_Comm InputComm = MPI_COMM_NULL;
|
||||
|
||||
// bestMove requires MoveInfo communicators and data types
|
||||
static MPI_Comm MoveComm = MPI_COMM_NULL;
|
||||
static MPI_Datatype MIDatatype = MPI_DATATYPE_NULL;
|
||||
|
||||
// TT entries are communicated with a dedicated communicator.
|
||||
// The receive buffer is used to gather information from all ranks.
|
||||
// THe TTCacheCounter tracks the number of local elements that are ready to be sent.
|
||||
static MPI_Comm TTComm = MPI_COMM_NULL;
|
||||
static std::array<std::vector<KeyedTTEntry>, 2> TTSendRecvBuffs;
|
||||
static std::array<MPI_Request, 2> reqsTTSendRecv = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
|
||||
static uint64_t sendRecvPosted = 0;
|
||||
static std::atomic<uint64_t> TTCacheCounter = {};
|
||||
|
||||
/// Initialize MPI and associated data types. Note that the MPI library must be configured
|
||||
/// to support MPI_THREAD_MULTIPLE, since multiple threads access MPI simultaneously.
|
||||
void init() {
|
||||
|
||||
int thread_support;
|
||||
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &thread_support);
|
||||
if (thread_support < MPI_THREAD_MULTIPLE)
|
||||
{
|
||||
std::cerr << "Stockfish requires support for MPI_THREAD_MULTIPLE." << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
||||
|
||||
const std::array<MPI_Aint, 5> MIdisps = {offsetof(MoveInfo, move), offsetof(MoveInfo, ponder),
|
||||
offsetof(MoveInfo, depth), offsetof(MoveInfo, score),
|
||||
offsetof(MoveInfo, rank)};
|
||||
MPI_Type_create_hindexed_block(5, 1, MIdisps.data(), MPI_INT, &MIDatatype);
|
||||
MPI_Type_commit(&MIDatatype);
|
||||
|
||||
MPI_Comm_dup(MPI_COMM_WORLD, &InputComm);
|
||||
MPI_Comm_dup(MPI_COMM_WORLD, &TTComm);
|
||||
MPI_Comm_dup(MPI_COMM_WORLD, &MoveComm);
|
||||
MPI_Comm_dup(MPI_COMM_WORLD, &signalsComm);
|
||||
}
|
||||
|
||||
/// Finalize MPI and free the associated data types.
|
||||
void finalize() {
|
||||
|
||||
MPI_Type_free(&MIDatatype);
|
||||
|
||||
MPI_Comm_free(&InputComm);
|
||||
MPI_Comm_free(&TTComm);
|
||||
MPI_Comm_free(&MoveComm);
|
||||
MPI_Comm_free(&signalsComm);
|
||||
|
||||
MPI_Finalize();
|
||||
}
|
||||
|
||||
/// Return the total number of ranks
|
||||
int size() { return world_size; }
|
||||
|
||||
/// Return the rank (index) of the process
|
||||
int rank() { return world_rank; }
|
||||
|
||||
/// The receive buffer depends on the number of MPI ranks and threads, resize as needed
|
||||
void ttSendRecvBuff_resize(size_t nThreads) {
|
||||
|
||||
for (int i : {0, 1})
|
||||
{
|
||||
TTSendRecvBuffs[i].resize(TTCacheSize * world_size * nThreads);
|
||||
std::fill(TTSendRecvBuffs[i].begin(), TTSendRecvBuffs[i].end(), KeyedTTEntry());
|
||||
}
|
||||
}
|
||||
|
||||
/// As input is only received by the root (rank 0) of the cluster, this input must be relayed
|
||||
/// to the UCI threads of all ranks, in order to setup the position, etc. We do this with a
|
||||
/// dedicated getline implementation, where the root broadcasts to all other ranks the received
|
||||
/// information.
|
||||
bool getline(std::istream& input, std::string& str) {
|
||||
|
||||
int size;
|
||||
std::vector<char> vec;
|
||||
int state;
|
||||
|
||||
if (is_root())
|
||||
{
|
||||
state = static_cast<bool>(std::getline(input, str));
|
||||
vec.assign(str.begin(), str.end());
|
||||
size = vec.size();
|
||||
}
|
||||
|
||||
// Some MPI implementations use busy-wait polling, while we need yielding as otherwise
|
||||
// the UCI thread on the non-root ranks would be consuming resources.
|
||||
static MPI_Request reqInput = MPI_REQUEST_NULL;
|
||||
MPI_Ibcast(&size, 1, MPI_INT, 0, InputComm, &reqInput);
|
||||
if (is_root())
|
||||
MPI_Wait(&reqInput, MPI_STATUS_IGNORE);
|
||||
else
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
int flag;
|
||||
MPI_Test(&reqInput, &flag, MPI_STATUS_IGNORE);
|
||||
if (flag)
|
||||
break;
|
||||
else
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
}
|
||||
|
||||
// Broadcast received string
|
||||
if (!is_root())
|
||||
vec.resize(size);
|
||||
MPI_Bcast(vec.data(), size, MPI_CHAR, 0, InputComm);
|
||||
if (!is_root())
|
||||
str.assign(vec.begin(), vec.end());
|
||||
MPI_Bcast(&state, 1, MPI_INT, 0, InputComm);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
/// Sending part of the signal communication loop
|
||||
namespace {
|
||||
void signals_send(const ThreadPool& threads) {
|
||||
|
||||
signalsSend[SIG_NODES] = threads.nodes_searched();
|
||||
signalsSend[SIG_TB] = threads.tb_hits();
|
||||
signalsSend[SIG_TTS] = threads.TT_saves();
|
||||
signalsSend[SIG_STOP] = threads.stop;
|
||||
MPI_Iallreduce(signalsSend, signalsRecv, SIG_NB, MPI_UINT64_T, MPI_SUM, signalsComm,
|
||||
&reqSignals);
|
||||
++signalsCallCounter;
|
||||
}
|
||||
|
||||
|
||||
/// Processing part of the signal communication loop.
|
||||
/// For some counters (e.g. nodes) we only keep their sum on the other nodes
|
||||
/// allowing to add local counters at any time for more fine grained process,
|
||||
/// which is useful to indicate progress during early iterations, and to have
|
||||
/// node counts that exactly match the non-MPI code in the single rank case.
|
||||
/// This call also propagates the stop signal between ranks.
|
||||
void signals_process(ThreadPool& threads) {
|
||||
|
||||
nodesSearchedOthers = signalsRecv[SIG_NODES] - signalsSend[SIG_NODES];
|
||||
tbHitsOthers = signalsRecv[SIG_TB] - signalsSend[SIG_TB];
|
||||
TTsavesOthers = signalsRecv[SIG_TTS] - signalsSend[SIG_TTS];
|
||||
stopSignalsPosted = signalsRecv[SIG_STOP];
|
||||
if (signalsRecv[SIG_STOP] > 0)
|
||||
threads.stop = true;
|
||||
}
|
||||
|
||||
void sendrecv_post() {
|
||||
|
||||
++sendRecvPosted;
|
||||
MPI_Irecv(TTSendRecvBuffs[sendRecvPosted % 2].data(),
|
||||
TTSendRecvBuffs[sendRecvPosted % 2].size() * sizeof(KeyedTTEntry), MPI_BYTE,
|
||||
(rank() + size() - 1) % size(), 42, TTComm, &reqsTTSendRecv[0]);
|
||||
MPI_Isend(TTSendRecvBuffs[(sendRecvPosted + 1) % 2].data(),
|
||||
TTSendRecvBuffs[(sendRecvPosted + 1) % 2].size() * sizeof(KeyedTTEntry), MPI_BYTE,
|
||||
(rank() + 1) % size(), 42, TTComm, &reqsTTSendRecv[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/// During search, most message passing is asynchronous, but at the end of
|
||||
/// search it makes sense to bring them to a common, finalized state.
|
||||
void signals_sync(ThreadPool& threads) {
|
||||
|
||||
while (stopSignalsPosted < uint64_t(size()))
|
||||
signals_poll(threads);
|
||||
|
||||
// Finalize outstanding messages of the signal loops.
|
||||
// We might have issued one call less than needed on some ranks.
|
||||
uint64_t globalCounter;
|
||||
MPI_Allreduce(&signalsCallCounter, &globalCounter, 1, MPI_UINT64_T, MPI_MAX, MoveComm);
|
||||
if (signalsCallCounter < globalCounter)
|
||||
{
|
||||
MPI_Wait(&reqSignals, MPI_STATUS_IGNORE);
|
||||
signals_send(threads);
|
||||
}
|
||||
assert(signalsCallCounter == globalCounter);
|
||||
MPI_Wait(&reqSignals, MPI_STATUS_IGNORE);
|
||||
signals_process(threads);
|
||||
|
||||
// Finalize outstanding messages in the sendRecv loop
|
||||
MPI_Allreduce(&sendRecvPosted, &globalCounter, 1, MPI_UINT64_T, MPI_MAX, MoveComm);
|
||||
while (sendRecvPosted < globalCounter)
|
||||
{
|
||||
MPI_Waitall(reqsTTSendRecv.size(), reqsTTSendRecv.data(), MPI_STATUSES_IGNORE);
|
||||
sendrecv_post();
|
||||
}
|
||||
assert(sendRecvPosted == globalCounter);
|
||||
MPI_Waitall(reqsTTSendRecv.size(), reqsTTSendRecv.data(), MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
/// Initialize signal counters to zero.
|
||||
void signals_init() {
|
||||
|
||||
stopSignalsPosted = tbHitsOthers = TTsavesOthers = nodesSearchedOthers = 0;
|
||||
|
||||
signalsSend[SIG_NODES] = signalsRecv[SIG_NODES] = 0;
|
||||
signalsSend[SIG_TB] = signalsRecv[SIG_TB] = 0;
|
||||
signalsSend[SIG_TTS] = signalsRecv[SIG_TTS] = 0;
|
||||
signalsSend[SIG_STOP] = signalsRecv[SIG_STOP] = 0;
|
||||
}
|
||||
|
||||
/// Poll the signal loop, and start next round as needed.
|
||||
void signals_poll(ThreadPool& threads) {
|
||||
|
||||
int flag;
|
||||
MPI_Test(&reqSignals, &flag, MPI_STATUS_IGNORE);
|
||||
if (flag)
|
||||
{
|
||||
signals_process(threads);
|
||||
signals_send(threads);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide basic info related the cluster performance, in particular, the number of signals send,
|
||||
/// signals per sounds (sps), the number of gathers, the number of positions gathered (per node and per second, gpps)
|
||||
/// The number of TT saves and TT saves per second. If gpps equals approximately TTSavesps the gather loop has enough bandwidth.
|
||||
void cluster_info(const ThreadPool& threads, Depth depth, TimePoint elapsed) {
|
||||
|
||||
// TimePoint elapsed = Time.elapsed() + 1;
|
||||
uint64_t TTSaves = TT_saves(threads);
|
||||
|
||||
sync_cout << "info depth " << depth << " cluster "
|
||||
<< " signals " << signalsCallCounter << " sps " << signalsCallCounter * 1000 / elapsed
|
||||
<< " sendRecvs " << sendRecvPosted << " srpps "
|
||||
<< TTSendRecvBuffs[0].size() * sendRecvPosted * 1000 / elapsed << " TTSaves "
|
||||
<< TTSaves << " TTSavesps " << TTSaves * 1000 / elapsed << sync_endl;
|
||||
}
|
||||
|
||||
/// When a TT entry is saved, additional steps are taken if the entry is of sufficient depth.
|
||||
/// If sufficient entries has been collected, a communication is initiated.
|
||||
/// If a communication has been completed, the received results are saved to the TT.
|
||||
void save(TranspositionTable& TT,
|
||||
ThreadPool& threads,
|
||||
Search::Worker* thread,
|
||||
TTEntry* tte,
|
||||
Key k,
|
||||
Value v,
|
||||
bool PvHit,
|
||||
Bound b,
|
||||
Depth d,
|
||||
Move m,
|
||||
Value ev,
|
||||
uint8_t generation8) {
|
||||
|
||||
// Standard save to the TT
|
||||
tte->save(k, v, PvHit, b, d, m, ev, generation8);
|
||||
|
||||
// If the entry is of sufficient depth to be worth communicating, take action.
|
||||
if (d > 3)
|
||||
{
|
||||
// count the TTsaves to information: this should be relatively similar
|
||||
// to the number of entries we can send/recv.
|
||||
thread->TTsaves.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
// Add to thread's send buffer, the locking here avoids races when the master thread
|
||||
// prepares the send buffer.
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(thread->ttCache.mutex);
|
||||
thread->ttCache.buffer.replace(KeyedTTEntry(k, *tte));
|
||||
++TTCacheCounter;
|
||||
}
|
||||
|
||||
size_t recvBuffPerRankSize = threads.size() * TTCacheSize;
|
||||
|
||||
// Communicate on main search thread, as soon the threads combined have collected
|
||||
// sufficient data to fill the send buffers.
|
||||
if (thread == threads.main_thread()->worker.get() && TTCacheCounter > recvBuffPerRankSize)
|
||||
{
|
||||
// Test communication status
|
||||
int flag;
|
||||
MPI_Testall(reqsTTSendRecv.size(), reqsTTSendRecv.data(), &flag, MPI_STATUSES_IGNORE);
|
||||
|
||||
// Current communication is complete
|
||||
if (flag)
|
||||
{
|
||||
// Save all received entries to TT, and store our TTCaches, ready for the next round of communication
|
||||
for (size_t irank = 0; irank < size_t(size()); ++irank)
|
||||
{
|
||||
if (irank
|
||||
== size_t(
|
||||
rank())) // this is our part, fill the part of the buffer for sending
|
||||
{
|
||||
// Copy from the thread caches to the right spot in the buffer
|
||||
size_t i = irank * recvBuffPerRankSize;
|
||||
for (auto&& th : threads)
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(th->worker->ttCache.mutex);
|
||||
|
||||
for (auto&& e : th->worker->ttCache.buffer)
|
||||
TTSendRecvBuffs[sendRecvPosted % 2][i++] = e;
|
||||
|
||||
// Reset thread's send buffer
|
||||
th->worker->ttCache.buffer = {};
|
||||
}
|
||||
|
||||
TTCacheCounter = 0;
|
||||
}
|
||||
else // process data received from the corresponding rank.
|
||||
for (size_t i = irank * recvBuffPerRankSize;
|
||||
i < (irank + 1) * recvBuffPerRankSize; ++i)
|
||||
{
|
||||
auto&& e = TTSendRecvBuffs[sendRecvPosted % 2][i];
|
||||
bool found;
|
||||
TTEntry* replace_tte;
|
||||
replace_tte = TT.probe(e.first, found);
|
||||
replace_tte->save(e.first, e.second.value(), e.second.is_pv(),
|
||||
e.second.bound(), e.second.depth(), e.second.move(),
|
||||
e.second.eval(), TT.generation());
|
||||
}
|
||||
}
|
||||
|
||||
// Start next communication
|
||||
sendrecv_post();
|
||||
|
||||
// Force check of time on the next occasion, the above actions might have taken some time.
|
||||
thread->main_manager()->callsCnt = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Picks the bestMove across ranks, and send the associated info and PV to the root of the cluster.
|
||||
/// Note that this bestMove and PV must be output by the root, the guarantee proper ordering of output.
|
||||
/// TODO update to the scheme in master.. can this use aggregation of votes?
|
||||
void pick_moves(MoveInfo& mi, std::string& PVLine) {
|
||||
|
||||
MoveInfo* pMoveInfo = NULL;
|
||||
if (is_root())
|
||||
{
|
||||
pMoveInfo = (MoveInfo*) malloc(sizeof(MoveInfo) * size());
|
||||
}
|
||||
MPI_Gather(&mi, 1, MIDatatype, pMoveInfo, 1, MIDatatype, 0, MoveComm);
|
||||
|
||||
if (is_root())
|
||||
{
|
||||
std::map<int, int> votes;
|
||||
int minScore = pMoveInfo[0].score;
|
||||
for (int i = 0; i < size(); ++i)
|
||||
{
|
||||
minScore = std::min(minScore, pMoveInfo[i].score);
|
||||
votes[pMoveInfo[i].move] = 0;
|
||||
}
|
||||
for (int i = 0; i < size(); ++i)
|
||||
{
|
||||
votes[pMoveInfo[i].move] += pMoveInfo[i].score - minScore + pMoveInfo[i].depth;
|
||||
}
|
||||
int bestVote = votes[pMoveInfo[0].move];
|
||||
for (int i = 0; i < size(); ++i)
|
||||
{
|
||||
if (votes[pMoveInfo[i].move] > bestVote)
|
||||
{
|
||||
bestVote = votes[pMoveInfo[i].move];
|
||||
mi = pMoveInfo[i];
|
||||
}
|
||||
}
|
||||
free(pMoveInfo);
|
||||
}
|
||||
|
||||
// Send around the final result
|
||||
MPI_Bcast(&mi, 1, MIDatatype, 0, MoveComm);
|
||||
|
||||
// Send PV line to root as needed
|
||||
if (mi.rank != 0 && mi.rank == rank())
|
||||
{
|
||||
int size;
|
||||
std::vector<char> vec;
|
||||
vec.assign(PVLine.begin(), PVLine.end());
|
||||
size = vec.size();
|
||||
MPI_Send(&size, 1, MPI_INT, 0, 42, MoveComm);
|
||||
MPI_Send(vec.data(), size, MPI_CHAR, 0, 42, MoveComm);
|
||||
}
|
||||
if (mi.rank != 0 && is_root())
|
||||
{
|
||||
int size;
|
||||
std::vector<char> vec;
|
||||
MPI_Recv(&size, 1, MPI_INT, mi.rank, 42, MoveComm, MPI_STATUS_IGNORE);
|
||||
vec.resize(size);
|
||||
MPI_Recv(vec.data(), size, MPI_CHAR, mi.rank, 42, MoveComm, MPI_STATUS_IGNORE);
|
||||
PVLine.assign(vec.begin(), vec.end());
|
||||
}
|
||||
}
|
||||
|
||||
/// Return nodes searched (lazily updated cluster wide in the signal loop)
|
||||
uint64_t nodes_searched(const ThreadPool& threads) {
|
||||
return nodesSearchedOthers + threads.nodes_searched();
|
||||
}
|
||||
|
||||
/// Return table base hits (lazily updated cluster wide in the signal loop)
|
||||
uint64_t tb_hits(const ThreadPool& threads) { return tbHitsOthers + threads.tb_hits(); }
|
||||
|
||||
/// Return the number of saves to the TT buffers, (lazily updated cluster wide in the signal loop)
|
||||
uint64_t TT_saves(const ThreadPool& threads) { return TTsavesOthers + threads.TT_saves(); }
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "cluster.h"
|
||||
#include "thread.h"
|
||||
|
||||
namespace Stockfish {
|
||||
namespace Cluster {
|
||||
|
||||
uint64_t nodes_searched(const ThreadPool& threads) { return threads.nodes_searched(); }
|
||||
|
||||
uint64_t tb_hits(const ThreadPool& threads) { return threads.tb_hits(); }
|
||||
|
||||
uint64_t TT_saves(const ThreadPool& threads) { return threads.TT_saves(); }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // USE_MPI
|
||||
157
src/cluster.h
Normal file
157
src/cluster.h
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef CLUSTER_H_INCLUDED
|
||||
#define CLUSTER_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <istream>
|
||||
#include <string>
|
||||
|
||||
#include "tt.h"
|
||||
|
||||
namespace Stockfish {
|
||||
class Thread;
|
||||
class ThreadPool;
|
||||
|
||||
namespace Search {
|
||||
class Worker;
|
||||
}
|
||||
|
||||
/// The Cluster namespace contains functionality required to run on distributed
|
||||
/// memory architectures using MPI as the message passing interface. On a high level,
|
||||
/// a 'lazy SMP'-like scheme is implemented where TT saves of sufficient depth are
|
||||
/// collected on each rank and distributed to, and used by, all other ranks,
|
||||
/// which search essentially independently. The root (MPI rank 0) of the cluster
|
||||
/// is responsible for all I/O and time management, communicating this info to
|
||||
/// the other ranks as needed. UCI options such as Threads and Hash specify these
|
||||
/// quantities per MPI rank. It is recommended to have one rank (MPI process) per node.
|
||||
/// For the non-MPI case, wrappers that will be compiler-optimized away are provided.
|
||||
|
||||
namespace Cluster {
|
||||
|
||||
/// Basic info to find the cluster-wide bestMove
|
||||
struct MoveInfo {
|
||||
int move;
|
||||
int ponder;
|
||||
int depth;
|
||||
int score;
|
||||
int rank;
|
||||
};
|
||||
|
||||
#ifdef USE_MPI
|
||||
|
||||
// store the TTEntry with its full key, so it can be saved on the receiver side
|
||||
using KeyedTTEntry = std::pair<Key, TTEntry>;
|
||||
constexpr std::size_t TTCacheSize = 16;
|
||||
|
||||
// Threads locally cache their high-depth TT entries till a batch can be send by MPI
|
||||
template<std::size_t N>
|
||||
class TTCache: public std::array<KeyedTTEntry, N> {
|
||||
|
||||
struct Compare {
|
||||
inline bool operator()(const KeyedTTEntry& lhs, const KeyedTTEntry& rhs) {
|
||||
return lhs.second.depth() > rhs.second.depth();
|
||||
}
|
||||
};
|
||||
Compare compare;
|
||||
|
||||
public:
|
||||
// Keep a heap of entries replacing low depth with high depth entries
|
||||
bool replace(const KeyedTTEntry& value) {
|
||||
|
||||
if (compare(value, this->front()))
|
||||
{
|
||||
std::pop_heap(this->begin(), this->end(), compare);
|
||||
this->back() = value;
|
||||
std::push_heap(this->begin(), this->end(), compare);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
void init();
|
||||
void finalize();
|
||||
bool getline(std::istream& input, std::string& str);
|
||||
int size();
|
||||
int rank();
|
||||
inline bool is_root() { return rank() == 0; }
|
||||
void save(TranspositionTable&,
|
||||
ThreadPool&,
|
||||
Search::Worker* thread,
|
||||
TTEntry* tte,
|
||||
Key k,
|
||||
Value v,
|
||||
bool PvHit,
|
||||
Bound b,
|
||||
Depth d,
|
||||
Move m,
|
||||
Value ev,
|
||||
uint8_t generation8);
|
||||
void pick_moves(MoveInfo& mi, std::string& PVLine);
|
||||
void ttSendRecvBuff_resize(size_t nThreads);
|
||||
uint64_t nodes_searched(const ThreadPool&);
|
||||
uint64_t tb_hits(const ThreadPool&);
|
||||
uint64_t TT_saves(const ThreadPool&);
|
||||
void cluster_info(const ThreadPool&, Depth depth, TimePoint elapsed);
|
||||
void signals_init();
|
||||
void signals_poll(ThreadPool& threads);
|
||||
void signals_sync(ThreadPool& threads);
|
||||
|
||||
#else
|
||||
|
||||
inline void init() {}
|
||||
inline void finalize() {}
|
||||
inline bool getline(std::istream& input, std::string& str) {
|
||||
return static_cast<bool>(std::getline(input, str));
|
||||
}
|
||||
constexpr int size() { return 1; }
|
||||
constexpr int rank() { return 0; }
|
||||
constexpr bool is_root() { return true; }
|
||||
inline void save(TranspositionTable&,
|
||||
ThreadPool&,
|
||||
Search::Worker*,
|
||||
TTEntry* tte,
|
||||
Key k,
|
||||
Value v,
|
||||
bool PvHit,
|
||||
Bound b,
|
||||
Depth d,
|
||||
Move m,
|
||||
Value ev,
|
||||
uint8_t generation8) {
|
||||
tte->save(k, v, PvHit, b, d, m, ev, generation8);
|
||||
}
|
||||
inline void pick_moves(MoveInfo&, std::string&) {}
|
||||
inline void ttSendRecvBuff_resize(size_t) {}
|
||||
uint64_t nodes_searched(const ThreadPool&);
|
||||
uint64_t tb_hits(const ThreadPool&);
|
||||
uint64_t TT_saves(const ThreadPool&);
|
||||
inline void cluster_info(const ThreadPool&, Depth, TimePoint) {}
|
||||
inline void signals_init() {}
|
||||
inline void signals_poll(ThreadPool& threads) {}
|
||||
inline void signals_sync(ThreadPool& threads) {}
|
||||
|
||||
#endif /* USE_MPI */
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #ifndef CLUSTER_H_INCLUDED
|
||||
335
src/engine.cpp
335
src/engine.cpp
@@ -1,335 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "evaluate.h"
|
||||
#include "misc.h"
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "perft.h"
|
||||
#include "position.h"
|
||||
#include "search.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
#include "ucioption.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
namespace NN = Eval::NNUE;
|
||||
|
||||
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
|
||||
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
||||
|
||||
Engine::Engine(std::string path) :
|
||||
binaryDirectory(CommandLine::get_binary_directory(path)),
|
||||
numaContext(NumaConfig::from_system()),
|
||||
states(new std::deque<StateInfo>(1)),
|
||||
threads(),
|
||||
networks(
|
||||
numaContext,
|
||||
NN::Networks(
|
||||
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
|
||||
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
|
||||
pos.set(StartFEN, false, &states->back());
|
||||
capSq = SQ_NONE;
|
||||
|
||||
options["Debug Log File"] << Option("", [](const Option& o) {
|
||||
start_logger(o);
|
||||
return std::nullopt;
|
||||
});
|
||||
|
||||
options["NumaPolicy"] << Option("auto", [this](const Option& o) {
|
||||
set_numa_config_from_option(o);
|
||||
return numa_config_information_as_string() + "\n" + thread_binding_information_as_string();
|
||||
});
|
||||
|
||||
options["Threads"] << Option(1, 1, 1024, [this](const Option&) {
|
||||
resize_threads();
|
||||
return thread_binding_information_as_string();
|
||||
});
|
||||
|
||||
options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) {
|
||||
set_tt_size(o);
|
||||
return std::nullopt;
|
||||
});
|
||||
|
||||
options["Clear Hash"] << Option([this](const Option&) {
|
||||
search_clear();
|
||||
return std::nullopt;
|
||||
});
|
||||
options["Ponder"] << Option(false);
|
||||
options["MultiPV"] << Option(1, 1, MAX_MOVES);
|
||||
options["Skill Level"] << Option(20, 0, 20);
|
||||
options["Move Overhead"] << Option(10, 0, 5000);
|
||||
options["nodestime"] << Option(0, 0, 10000);
|
||||
options["UCI_Chess960"] << Option(false);
|
||||
options["UCI_LimitStrength"] << Option(false);
|
||||
options["UCI_Elo"] << Option(Stockfish::Search::Skill::LowestElo,
|
||||
Stockfish::Search::Skill::LowestElo,
|
||||
Stockfish::Search::Skill::HighestElo);
|
||||
options["UCI_ShowWDL"] << Option(false);
|
||||
options["SyzygyPath"] << Option("", [](const Option& o) {
|
||||
Tablebases::init(o);
|
||||
return std::nullopt;
|
||||
});
|
||||
options["SyzygyProbeDepth"] << Option(1, 1, 100);
|
||||
options["Syzygy50MoveRule"] << Option(true);
|
||||
options["SyzygyProbeLimit"] << Option(7, 0, 7);
|
||||
options["EvalFile"] << Option(EvalFileDefaultNameBig, [this](const Option& o) {
|
||||
load_big_network(o);
|
||||
return std::nullopt;
|
||||
});
|
||||
options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, [this](const Option& o) {
|
||||
load_small_network(o);
|
||||
return std::nullopt;
|
||||
});
|
||||
|
||||
load_networks();
|
||||
resize_threads();
|
||||
}
|
||||
|
||||
std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) {
|
||||
verify_networks();
|
||||
|
||||
return Benchmark::perft(fen, depth, isChess960);
|
||||
}
|
||||
|
||||
void Engine::go(Search::LimitsType& limits) {
|
||||
assert(limits.perft == 0);
|
||||
verify_networks();
|
||||
limits.capSq = capSq;
|
||||
|
||||
threads.start_thinking(options, pos, states, limits);
|
||||
}
|
||||
void Engine::stop() { threads.stop = true; }
|
||||
|
||||
void Engine::search_clear() {
|
||||
wait_for_search_finished();
|
||||
|
||||
tt.clear(threads);
|
||||
threads.clear();
|
||||
|
||||
// @TODO wont work with multiple instances
|
||||
Tablebases::init(options["SyzygyPath"]); // Free mapped files
|
||||
}
|
||||
|
||||
void Engine::set_on_update_no_moves(std::function<void(const Engine::InfoShort&)>&& f) {
|
||||
updateContext.onUpdateNoMoves = std::move(f);
|
||||
}
|
||||
|
||||
void Engine::set_on_update_full(std::function<void(const Engine::InfoFull&)>&& f) {
|
||||
updateContext.onUpdateFull = std::move(f);
|
||||
}
|
||||
|
||||
void Engine::set_on_iter(std::function<void(const Engine::InfoIter&)>&& f) {
|
||||
updateContext.onIter = std::move(f);
|
||||
}
|
||||
|
||||
void Engine::set_on_bestmove(std::function<void(std::string_view, std::string_view)>&& f) {
|
||||
updateContext.onBestmove = std::move(f);
|
||||
}
|
||||
|
||||
void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); }
|
||||
|
||||
void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) {
|
||||
// Drop the old state and create a new one
|
||||
states = StateListPtr(new std::deque<StateInfo>(1));
|
||||
pos.set(fen, options["UCI_Chess960"], &states->back());
|
||||
|
||||
capSq = SQ_NONE;
|
||||
for (const auto& move : moves)
|
||||
{
|
||||
auto m = UCIEngine::to_move(pos, move);
|
||||
|
||||
if (m == Move::none())
|
||||
break;
|
||||
|
||||
states->emplace_back();
|
||||
pos.do_move(m, states->back());
|
||||
|
||||
capSq = SQ_NONE;
|
||||
DirtyPiece& dp = states->back().dirtyPiece;
|
||||
if (dp.dirty_num > 1 && dp.to[1] == SQ_NONE)
|
||||
capSq = m.to_sq();
|
||||
}
|
||||
}
|
||||
|
||||
// modifiers
|
||||
|
||||
void Engine::set_numa_config_from_option(const std::string& o) {
|
||||
if (o == "auto" || o == "system")
|
||||
{
|
||||
numaContext.set_numa_config(NumaConfig::from_system());
|
||||
}
|
||||
else if (o == "hardware")
|
||||
{
|
||||
// Don't respect affinity set in the system.
|
||||
numaContext.set_numa_config(NumaConfig::from_system(false));
|
||||
}
|
||||
else if (o == "none")
|
||||
{
|
||||
numaContext.set_numa_config(NumaConfig{});
|
||||
}
|
||||
else
|
||||
{
|
||||
numaContext.set_numa_config(NumaConfig::from_string(o));
|
||||
}
|
||||
|
||||
// Force reallocation of threads in case affinities need to change.
|
||||
resize_threads();
|
||||
threads.ensure_network_replicated();
|
||||
}
|
||||
|
||||
void Engine::resize_threads() {
|
||||
threads.wait_for_search_finished();
|
||||
threads.set(numaContext.get_numa_config(), {options, threads, tt, networks}, updateContext);
|
||||
|
||||
// Reallocate the hash with the new threadpool size
|
||||
set_tt_size(options["Hash"]);
|
||||
threads.ensure_network_replicated();
|
||||
}
|
||||
|
||||
void Engine::set_tt_size(size_t mb) {
|
||||
wait_for_search_finished();
|
||||
tt.resize(mb, threads);
|
||||
}
|
||||
|
||||
void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
|
||||
|
||||
// network related
|
||||
|
||||
void Engine::verify_networks() const {
|
||||
networks->big.verify(options["EvalFile"]);
|
||||
networks->small.verify(options["EvalFileSmall"]);
|
||||
}
|
||||
|
||||
void Engine::load_networks() {
|
||||
networks.modify_and_replicate([this](NN::Networks& networks_) {
|
||||
networks_.big.load(binaryDirectory, options["EvalFile"]);
|
||||
networks_.small.load(binaryDirectory, options["EvalFileSmall"]);
|
||||
});
|
||||
threads.clear();
|
||||
threads.ensure_network_replicated();
|
||||
}
|
||||
|
||||
void Engine::load_big_network(const std::string& file) {
|
||||
networks.modify_and_replicate(
|
||||
[this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); });
|
||||
threads.clear();
|
||||
threads.ensure_network_replicated();
|
||||
}
|
||||
|
||||
void Engine::load_small_network(const std::string& file) {
|
||||
networks.modify_and_replicate(
|
||||
[this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); });
|
||||
threads.clear();
|
||||
threads.ensure_network_replicated();
|
||||
}
|
||||
|
||||
void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) {
|
||||
networks.modify_and_replicate([&files](NN::Networks& networks_) {
|
||||
networks_.big.save(files[0].first);
|
||||
networks_.small.save(files[1].first);
|
||||
});
|
||||
}
|
||||
|
||||
// utility functions
|
||||
|
||||
void Engine::trace_eval() const {
|
||||
StateListPtr trace_states(new std::deque<StateInfo>(1));
|
||||
Position p;
|
||||
p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back());
|
||||
|
||||
verify_networks();
|
||||
|
||||
sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl;
|
||||
}
|
||||
|
||||
const OptionsMap& Engine::get_options() const { return options; }
|
||||
OptionsMap& Engine::get_options() { return options; }
|
||||
|
||||
std::string Engine::fen() const { return pos.fen(); }
|
||||
|
||||
void Engine::flip() { pos.flip(); }
|
||||
|
||||
std::string Engine::visualize() const {
|
||||
std::stringstream ss;
|
||||
ss << pos;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::vector<std::pair<size_t, size_t>> Engine::get_bound_thread_count_by_numa_node() const {
|
||||
auto counts = threads.get_bound_thread_count_by_numa_node();
|
||||
const NumaConfig& cfg = numaContext.get_numa_config();
|
||||
std::vector<std::pair<size_t, size_t>> ratios;
|
||||
NumaIndex n = 0;
|
||||
for (; n < counts.size(); ++n)
|
||||
ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n));
|
||||
if (!counts.empty())
|
||||
for (; n < cfg.num_numa_nodes(); ++n)
|
||||
ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n));
|
||||
return ratios;
|
||||
}
|
||||
|
||||
std::string Engine::get_numa_config_as_string() const {
|
||||
return numaContext.get_numa_config().to_string();
|
||||
}
|
||||
|
||||
std::string Engine::numa_config_information_as_string() const {
|
||||
auto cfgStr = get_numa_config_as_string();
|
||||
return "Available processors: " + cfgStr;
|
||||
}
|
||||
|
||||
std::string Engine::thread_binding_information_as_string() const {
|
||||
auto boundThreadsByNode = get_bound_thread_count_by_numa_node();
|
||||
std::stringstream ss;
|
||||
|
||||
size_t threadsSize = threads.size();
|
||||
ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread");
|
||||
|
||||
if (boundThreadsByNode.empty())
|
||||
return ss.str();
|
||||
|
||||
ss << " with NUMA node thread binding: ";
|
||||
|
||||
bool isFirst = true;
|
||||
|
||||
for (auto&& [current, total] : boundThreadsByNode)
|
||||
{
|
||||
if (!isFirst)
|
||||
ss << ":";
|
||||
ss << current << "/" << total;
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
}
|
||||
128
src/engine.h
128
src/engine.h
@@ -1,128 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_H_INCLUDED
|
||||
#define ENGINE_H_INCLUDED
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "nnue/network.h"
|
||||
#include "numa.h"
|
||||
#include "position.h"
|
||||
#include "search.h"
|
||||
#include "syzygy/tbprobe.h" // for Stockfish::Depth
|
||||
#include "thread.h"
|
||||
#include "tt.h"
|
||||
#include "ucioption.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
enum Square : int;
|
||||
|
||||
class Engine {
|
||||
public:
|
||||
using InfoShort = Search::InfoShort;
|
||||
using InfoFull = Search::InfoFull;
|
||||
using InfoIter = Search::InfoIteration;
|
||||
|
||||
Engine(std::string path = "");
|
||||
|
||||
// Cannot be movable due to components holding backreferences to fields
|
||||
Engine(const Engine&) = delete;
|
||||
Engine(Engine&&) = delete;
|
||||
Engine& operator=(const Engine&) = delete;
|
||||
Engine& operator=(Engine&&) = delete;
|
||||
|
||||
~Engine() { wait_for_search_finished(); }
|
||||
|
||||
std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960);
|
||||
|
||||
// non blocking call to start searching
|
||||
void go(Search::LimitsType&);
|
||||
// non blocking call to stop searching
|
||||
void stop();
|
||||
|
||||
// blocking call to wait for search to finish
|
||||
void wait_for_search_finished();
|
||||
// set a new position, moves are in UCI format
|
||||
void set_position(const std::string& fen, const std::vector<std::string>& moves);
|
||||
|
||||
// modifiers
|
||||
|
||||
void set_numa_config_from_option(const std::string& o);
|
||||
void resize_threads();
|
||||
void set_tt_size(size_t mb);
|
||||
void set_ponderhit(bool);
|
||||
void search_clear();
|
||||
|
||||
void set_on_update_no_moves(std::function<void(const InfoShort&)>&&);
|
||||
void set_on_update_full(std::function<void(const InfoFull&)>&&);
|
||||
void set_on_iter(std::function<void(const InfoIter&)>&&);
|
||||
void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&);
|
||||
|
||||
// network related
|
||||
|
||||
void verify_networks() const;
|
||||
void load_networks();
|
||||
void load_big_network(const std::string& file);
|
||||
void load_small_network(const std::string& file);
|
||||
void save_network(const std::pair<std::optional<std::string>, std::string> files[2]);
|
||||
|
||||
// utility functions
|
||||
|
||||
void trace_eval() const;
|
||||
|
||||
const OptionsMap& get_options() const;
|
||||
OptionsMap& get_options();
|
||||
|
||||
std::string fen() const;
|
||||
void flip();
|
||||
std::string visualize() const;
|
||||
std::vector<std::pair<size_t, size_t>> get_bound_thread_count_by_numa_node() const;
|
||||
std::string get_numa_config_as_string() const;
|
||||
std::string numa_config_information_as_string() const;
|
||||
std::string thread_binding_information_as_string() const;
|
||||
|
||||
private:
|
||||
const std::string binaryDirectory;
|
||||
|
||||
NumaReplicationContext numaContext;
|
||||
|
||||
Position pos;
|
||||
StateListPtr states;
|
||||
Square capSq;
|
||||
|
||||
OptionsMap options;
|
||||
ThreadPool threads;
|
||||
TranspositionTable tt;
|
||||
LazyNumaReplicated<Eval::NNUE::Networks> networks;
|
||||
|
||||
Search::SearchManager::UpdateContext updateContext;
|
||||
};
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
|
||||
#endif // #ifndef ENGINE_H_INCLUDED
|
||||
@@ -24,16 +24,13 @@
|
||||
#include <cstdlib>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_misc.h"
|
||||
#include "position.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
@@ -45,49 +42,45 @@ int Eval::simple_eval(const Position& pos, Color c) {
|
||||
+ (pos.non_pawn_material(c) - pos.non_pawn_material(~c));
|
||||
}
|
||||
|
||||
bool Eval::use_smallnet(const Position& pos) {
|
||||
int simpleEval = simple_eval(pos, pos.side_to_move());
|
||||
return std::abs(simpleEval) > 962;
|
||||
}
|
||||
|
||||
// Evaluate is the evaluator for the outer world. It returns a static evaluation
|
||||
// of the position from the point of view of the side to move.
|
||||
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism) {
|
||||
Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {
|
||||
|
||||
assert(!pos.checkers());
|
||||
|
||||
bool smallNet = use_smallnet(pos);
|
||||
int simpleEval = simple_eval(pos, pos.side_to_move());
|
||||
bool smallNet = std::abs(simpleEval) > SmallNetThreshold;
|
||||
bool psqtOnly = std::abs(simpleEval) > PsqtOnlyThreshold;
|
||||
int nnueComplexity;
|
||||
int v;
|
||||
|
||||
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, &caches.small)
|
||||
: networks.big.evaluate(pos, &caches.big);
|
||||
Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
|
||||
: networks.big.evaluate(pos, true, &nnueComplexity, false);
|
||||
|
||||
Value nnue = (125 * psqt + 131 * positional) / 128;
|
||||
const auto adjustEval = [&](int optDiv, int nnueDiv, int pawnCountConstant, int pawnCountMul,
|
||||
int npmConstant, int evalDiv, int shufflingConstant,
|
||||
int shufflingDiv) {
|
||||
// Blend optimism and eval with nnue complexity and material imbalance
|
||||
optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / optDiv;
|
||||
nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv;
|
||||
|
||||
// Re-evaluate the position when higher eval accuracy is worth the time spent
|
||||
if (smallNet && (nnue * psqt < 0 || std::abs(nnue) < 227))
|
||||
{
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
|
||||
nnue = (125 * psqt + 131 * positional) / 128;
|
||||
smallNet = false;
|
||||
}
|
||||
int npm = pos.non_pawn_material() / 64;
|
||||
v = (nnue * (npm + pawnCountConstant + pawnCountMul * pos.count<PAWN>())
|
||||
+ optimism * (npmConstant + npm))
|
||||
/ evalDiv;
|
||||
|
||||
// Blend optimism and eval with nnue complexity
|
||||
int nnueComplexity = std::abs(psqt - positional);
|
||||
optimism += optimism * nnueComplexity / (smallNet ? 433 : 453);
|
||||
nnue -= nnue * nnueComplexity / (smallNet ? 18815 : 17864);
|
||||
// Damp down the evaluation linearly when shuffling
|
||||
int shuffling = pos.rule50_count();
|
||||
v = v * (shufflingConstant - shuffling) / shufflingDiv;
|
||||
};
|
||||
|
||||
int material = (smallNet ? 553 : 532) * pos.count<PAWN>() + pos.non_pawn_material();
|
||||
v = (nnue * (73921 + material) + optimism * (8112 + material)) / (smallNet ? 68104 : 74715);
|
||||
|
||||
// Evaluation grain (to get more alpha-beta cuts) with randomization (for robustness)
|
||||
v = (v / 16) * 16 - 1 + (pos.key() & 0x2);
|
||||
|
||||
// Damp down the evaluation linearly when shuffling
|
||||
v -= v * pos.rule50_count() / 212;
|
||||
if (!smallNet)
|
||||
adjustEval(513, 32395, 919, 11, 145, 1036, 178, 204);
|
||||
else if (psqtOnly)
|
||||
adjustEval(517, 32857, 908, 7, 155, 1019, 224, 238);
|
||||
else
|
||||
adjustEval(499, 32793, 903, 9, 147, 1067, 208, 211);
|
||||
|
||||
// Guarantee evaluation does not hit the tablebase range
|
||||
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
|
||||
@@ -104,22 +97,19 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
if (pos.checkers())
|
||||
return "Final evaluation: none (in check)";
|
||||
|
||||
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
|
||||
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
|
||||
ss << '\n' << NNUE::trace(pos, networks) << '\n';
|
||||
|
||||
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
|
||||
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, &caches->big);
|
||||
Value v = psqt + positional;
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
|
||||
Value v = networks.big.evaluate(pos, false);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "NNUE evaluation " << 0.01 * UCI::to_cp(v, pos) << " (white side)\n";
|
||||
|
||||
v = evaluate(networks, pos, *caches, VALUE_ZERO);
|
||||
v = evaluate(networks, pos, VALUE_ZERO);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
|
||||
ss << "Final evaluation " << 0.01 * UCI::to_cp(v, pos) << " (white side)";
|
||||
ss << " [with scaled NNUE, ...]";
|
||||
ss << "\n";
|
||||
|
||||
|
||||
@@ -29,26 +29,25 @@ class Position;
|
||||
|
||||
namespace Eval {
|
||||
|
||||
constexpr inline int SmallNetThreshold = 1165, PsqtOnlyThreshold = 2500;
|
||||
|
||||
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
|
||||
// for the build process (profile-build and fishtest) to work. Do not change the
|
||||
// name of the macro or the location where this macro is defined, as it is used
|
||||
// in the Makefile/Fishtest.
|
||||
#define EvalFileDefaultNameBig "nn-1111cefa1111.nnue"
|
||||
#define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue"
|
||||
#define EvalFileDefaultNameBig "nn-ae6a388e4a1a.nnue"
|
||||
#define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue"
|
||||
|
||||
namespace NNUE {
|
||||
struct Networks;
|
||||
struct AccumulatorCaches;
|
||||
}
|
||||
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
|
||||
|
||||
int simple_eval(const Position& pos, Color c);
|
||||
bool use_smallnet(const Position& pos);
|
||||
Value evaluate(const NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
Eval::NNUE::AccumulatorCaches& caches,
|
||||
int optimism);
|
||||
Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);
|
||||
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
12
src/main.cpp
12
src/main.cpp
@@ -21,24 +21,28 @@
|
||||
#include "bitboard.h"
|
||||
#include "misc.h"
|
||||
#include "position.h"
|
||||
#include "tune.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
#include "tune.h"
|
||||
|
||||
using namespace Stockfish;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
std::cout << engine_info() << std::endl;
|
||||
Cluster::init();
|
||||
if (Cluster::is_root())
|
||||
std::cout << engine_info() << std::endl;
|
||||
|
||||
Bitboards::init();
|
||||
Position::init();
|
||||
|
||||
UCIEngine uci(argc, argv);
|
||||
UCI uci(argc, argv);
|
||||
|
||||
Tune::init(uci.engine_options());
|
||||
Tune::init(uci.options);
|
||||
|
||||
uci.loop();
|
||||
|
||||
Cluster::finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
237
src/memory.cpp
237
src/memory.cpp
@@ -1,237 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "memory.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#if __has_include("features.h")
|
||||
#include <features.h>
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|
||||
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|
||||
|| defined(__e2k__)
|
||||
#define POSIXALIGNEDALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
#undef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
||||
#endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <ios> // std::hex, std::dec
|
||||
#include <iostream> // std::cerr
|
||||
#include <ostream> // std::endl
|
||||
#include <windows.h>
|
||||
|
||||
// The needed Windows API for processor groups could be missed from old Windows
|
||||
// versions, so instead of calling them directly (forcing the linker to resolve
|
||||
// the calls at compile time), try to load them at runtime. To do this we need
|
||||
// first to define the corresponding function pointers.
|
||||
|
||||
extern "C" {
|
||||
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
using AdjustTokenPrivileges_t =
|
||||
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
// Wrappers for systems where the c++17 implementation does not guarantee the
|
||||
// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
|
||||
// must be freed with std_aligned_free().
|
||||
|
||||
void* std_aligned_alloc(size_t alignment, size_t size) {
|
||||
#if defined(_ISOC11_SOURCE)
|
||||
return aligned_alloc(alignment, size);
|
||||
#elif defined(POSIXALIGNEDALLOC)
|
||||
void* mem = nullptr;
|
||||
posix_memalign(&mem, alignment, size);
|
||||
return mem;
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
return _mm_malloc(size, alignment);
|
||||
#elif defined(_WIN32)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
return std::aligned_alloc(alignment, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void std_aligned_free(void* ptr) {
|
||||
|
||||
#if defined(POSIXALIGNEDALLOC)
|
||||
free(ptr);
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
_mm_free(ptr);
|
||||
#elif defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
// aligned_large_pages_alloc() will return suitably aligned memory,
|
||||
// if possible using large pages.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
|
||||
|
||||
#if !defined(_WIN64)
|
||||
return nullptr;
|
||||
#else
|
||||
|
||||
HANDLE hProcessToken{};
|
||||
LUID luid{};
|
||||
void* mem = nullptr;
|
||||
|
||||
const size_t largePageSize = GetLargePageMinimum();
|
||||
if (!largePageSize)
|
||||
return nullptr;
|
||||
|
||||
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
|
||||
|
||||
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
|
||||
|
||||
if (!hAdvapi32)
|
||||
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
||||
|
||||
auto OpenProcessToken_f =
|
||||
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
||||
if (!OpenProcessToken_f)
|
||||
return nullptr;
|
||||
auto LookupPrivilegeValueA_f =
|
||||
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
||||
if (!LookupPrivilegeValueA_f)
|
||||
return nullptr;
|
||||
auto AdjustTokenPrivileges_f =
|
||||
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
||||
if (!AdjustTokenPrivileges_f)
|
||||
return nullptr;
|
||||
|
||||
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
||||
|
||||
if (!OpenProcessToken_f( // OpenProcessToken()
|
||||
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
||||
return nullptr;
|
||||
|
||||
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
|
||||
{
|
||||
TOKEN_PRIVILEGES tp{};
|
||||
TOKEN_PRIVILEGES prevTp{};
|
||||
DWORD prevTpLen = 0;
|
||||
|
||||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Luid = luid;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
|
||||
// succeeds, we still need to query GetLastError() to ensure that the privileges
|
||||
// were actually obtained.
|
||||
|
||||
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
||||
&prevTpLen)
|
||||
&& GetLastError() == ERROR_SUCCESS)
|
||||
{
|
||||
// Round up size to full pages and allocate
|
||||
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
|
||||
PAGE_READWRITE);
|
||||
|
||||
// Privilege no longer needed, restore previous state
|
||||
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
CloseHandle(hProcessToken);
|
||||
|
||||
return mem;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
// Try to allocate large pages
|
||||
void* mem = aligned_large_pages_alloc_windows(allocSize);
|
||||
|
||||
// Fall back to regular, page-aligned, allocation if necessary
|
||||
if (!mem)
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // small page size assumed
|
||||
#endif
|
||||
|
||||
// Round up to multiples of alignment
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
||||
void* mem = std_aligned_alloc(alignment, size);
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// aligned_large_pages_free() will free the previously memory allocated
|
||||
// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
void aligned_large_pages_free(void* mem) {
|
||||
|
||||
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
||||
{
|
||||
DWORD err = GetLastError();
|
||||
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
|
||||
<< std::dec << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
|
||||
|
||||
#endif
|
||||
} // namespace Stockfish
|
||||
216
src/memory.h
216
src/memory.h
@@ -1,216 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef MEMORY_H_INCLUDED
|
||||
#define MEMORY_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
void* std_aligned_alloc(size_t alignment, size_t size);
|
||||
void std_aligned_free(void* ptr);
|
||||
|
||||
// Memory aligned by page size, min alignment: 4096 bytes
|
||||
void* aligned_large_pages_alloc(size_t size);
|
||||
void aligned_large_pages_free(void* mem);
|
||||
|
||||
// Frees memory which was placed there with placement new.
|
||||
// Works for both single objects and arrays of unknown bound.
|
||||
template<typename T, typename FREE_FUNC>
|
||||
void memory_deleter(T* ptr, FREE_FUNC free_func) {
|
||||
if (!ptr)
|
||||
return;
|
||||
|
||||
// Explicitly needed to call the destructor
|
||||
if constexpr (!std::is_trivially_destructible_v<T>)
|
||||
ptr->~T();
|
||||
|
||||
free_func(ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
// Frees memory which was placed there with placement new.
|
||||
// Works for both single objects and arrays of unknown bound.
|
||||
template<typename T, typename FREE_FUNC>
|
||||
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
|
||||
if (!ptr)
|
||||
return;
|
||||
|
||||
|
||||
// Move back on the pointer to where the size is allocated
|
||||
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
|
||||
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
|
||||
|
||||
if constexpr (!std::is_trivially_destructible_v<T>)
|
||||
{
|
||||
const size_t size = *reinterpret_cast<size_t*>(raw_memory);
|
||||
|
||||
// Explicitly call the destructor for each element in reverse order
|
||||
for (size_t i = size; i-- > 0;)
|
||||
ptr[i].~T();
|
||||
}
|
||||
|
||||
free_func(raw_memory);
|
||||
}
|
||||
|
||||
// Allocates memory for a single object and places it there with placement new
|
||||
template<typename T, typename ALLOC_FUNC, typename... Args>
|
||||
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
|
||||
Args&&... args) {
|
||||
void* raw_memory = alloc_func(sizeof(T));
|
||||
ASSERT_ALIGNED(raw_memory, alignof(T));
|
||||
return new (raw_memory) T(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Allocates memory for an array of unknown bound and places it there with placement new
|
||||
template<typename T, typename ALLOC_FUNC>
|
||||
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
|
||||
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
|
||||
using ElementType = std::remove_extent_t<T>;
|
||||
|
||||
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
|
||||
|
||||
// Save the array size in the memory location
|
||||
char* raw_memory =
|
||||
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
|
||||
ASSERT_ALIGNED(raw_memory, alignof(T));
|
||||
|
||||
new (raw_memory) size_t(num);
|
||||
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
|
||||
|
||||
// Need to return the pointer at the start of the array so that
|
||||
// the indexing in unique_ptr<T[]> works.
|
||||
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// aligned large page unique ptr
|
||||
//
|
||||
//
|
||||
|
||||
template<typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct LargePageArrayDeleter {
|
||||
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using LargePagePtr =
|
||||
std::conditional_t<std::is_array_v<T>,
|
||||
std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
|
||||
std::unique_ptr<T, LargePageDeleter<T>>>;
|
||||
|
||||
// make_unique_large_page for single objects
|
||||
template<typename T, typename... Args>
|
||||
std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
|
||||
static_assert(alignof(T) <= 4096,
|
||||
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
|
||||
T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
|
||||
|
||||
return LargePagePtr<T>(obj);
|
||||
}
|
||||
|
||||
// make_unique_large_page for arrays of unknown bound
|
||||
template<typename T>
|
||||
std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
|
||||
using ElementType = std::remove_extent_t<T>;
|
||||
|
||||
static_assert(alignof(ElementType) <= 4096,
|
||||
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
|
||||
ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
|
||||
|
||||
return LargePagePtr<T>(memory);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// aligned unique ptr
|
||||
//
|
||||
//
|
||||
|
||||
template<typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct AlignedArrayDeleter {
|
||||
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using AlignedPtr =
|
||||
std::conditional_t<std::is_array_v<T>,
|
||||
std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
|
||||
std::unique_ptr<T, AlignedDeleter<T>>>;
|
||||
|
||||
// make_unique_aligned for single objects
|
||||
template<typename T, typename... Args>
|
||||
std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
|
||||
const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
|
||||
T* obj = memory_allocator<T>(func, std::forward<Args>(args)...);
|
||||
|
||||
return AlignedPtr<T>(obj);
|
||||
}
|
||||
|
||||
// make_unique_aligned for arrays of unknown bound
|
||||
template<typename T>
|
||||
std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
|
||||
using ElementType = std::remove_extent_t<T>;
|
||||
|
||||
const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
|
||||
ElementType* memory = memory_allocator<T>(func, num);
|
||||
|
||||
return AlignedPtr<T>(memory);
|
||||
}
|
||||
|
||||
|
||||
// Get the first aligned element of an array.
|
||||
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
|
||||
// where N is the number of elements in the array.
|
||||
template<uintptr_t Alignment, typename T>
|
||||
T* align_ptr_up(T* ptr) {
|
||||
static_assert(alignof(T) < Alignment);
|
||||
|
||||
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
|
||||
return reinterpret_cast<T*>(
|
||||
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||
}
|
||||
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
#endif // #ifndef MEMORY_H_INCLUDED
|
||||
446
src/misc.cpp
446
src/misc.cpp
@@ -18,27 +18,64 @@
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
#undef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
||||
#endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
// The needed Windows API for processor groups could be missed from old Windows
|
||||
// versions, so instead of calling them directly (forcing the linker to resolve
|
||||
// the calls at compile time), try to load them at runtime. To do this we need
|
||||
// first to define the corresponding function pointers.
|
||||
extern "C" {
|
||||
using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP,
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
|
||||
PDWORD);
|
||||
using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY);
|
||||
using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT);
|
||||
using fun5_t = WORD (*)();
|
||||
using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|
||||
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|
||||
|| defined(__e2k__)
|
||||
#define POSIXALIGNEDALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
namespace {
|
||||
|
||||
// Version number or dev.
|
||||
constexpr std::string_view version = "17";
|
||||
constexpr std::string_view version = "dev";
|
||||
|
||||
// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
|
||||
// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
|
||||
@@ -112,16 +149,14 @@ class Logger {
|
||||
|
||||
|
||||
// Returns the full name of the current Stockfish version.
|
||||
//
|
||||
// For local dev compiles we try to append the commit SHA and
|
||||
// commit date from git. If that fails only the local compilation
|
||||
// date is set and "nogit" is specified:
|
||||
// Stockfish dev-YYYYMMDD-SHA
|
||||
// or
|
||||
// Stockfish dev-YYYYMMDD-nogit
|
||||
// For local dev compiles we try to append the commit sha and commit date
|
||||
// from git if that fails only the local compilation date is set and "nogit" is specified:
|
||||
// Stockfish dev-YYYYMMDD-SHA
|
||||
// or
|
||||
// Stockfish dev-YYYYMMDD-nogit
|
||||
//
|
||||
// For releases (non-dev builds) we only include the version number:
|
||||
// Stockfish version
|
||||
// Stockfish version
|
||||
std::string engine_info(bool to_uci) {
|
||||
std::stringstream ss;
|
||||
ss << "Stockfish " << version << std::setfill('0');
|
||||
@@ -133,9 +168,8 @@ std::string engine_info(bool to_uci) {
|
||||
ss << stringify(GIT_DATE);
|
||||
#else
|
||||
constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
|
||||
|
||||
std::string month, day, year;
|
||||
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
|
||||
std::string month, day, year;
|
||||
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
|
||||
|
||||
date >> month >> day >> year;
|
||||
ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
|
||||
@@ -284,21 +318,13 @@ template<size_t N>
|
||||
struct DebugInfo {
|
||||
std::atomic<int64_t> data[N] = {0};
|
||||
|
||||
constexpr std::atomic<int64_t>& operator[](int index) { return data[index]; }
|
||||
constexpr inline std::atomic<int64_t>& operator[](int index) { return data[index]; }
|
||||
};
|
||||
|
||||
struct DebugExtremes: public DebugInfo<3> {
|
||||
DebugExtremes() {
|
||||
data[1] = std::numeric_limits<int64_t>::min();
|
||||
data[2] = std::numeric_limits<int64_t>::max();
|
||||
}
|
||||
};
|
||||
|
||||
DebugInfo<2> hit[MaxDebugSlots];
|
||||
DebugInfo<2> mean[MaxDebugSlots];
|
||||
DebugInfo<3> stdev[MaxDebugSlots];
|
||||
DebugInfo<6> correl[MaxDebugSlots];
|
||||
DebugExtremes extremes[MaxDebugSlots];
|
||||
DebugInfo<2> hit[MaxDebugSlots];
|
||||
DebugInfo<2> mean[MaxDebugSlots];
|
||||
DebugInfo<3> stdev[MaxDebugSlots];
|
||||
DebugInfo<6> correl[MaxDebugSlots];
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -322,18 +348,6 @@ void dbg_stdev_of(int64_t value, int slot) {
|
||||
stdev[slot][2] += value * value;
|
||||
}
|
||||
|
||||
void dbg_extremes_of(int64_t value, int slot) {
|
||||
++extremes[slot][0];
|
||||
|
||||
int64_t current_max = extremes[slot][1].load();
|
||||
while (current_max < value && !extremes[slot][1].compare_exchange_weak(current_max, value))
|
||||
{}
|
||||
|
||||
int64_t current_min = extremes[slot][2].load();
|
||||
while (current_min > value && !extremes[slot][2].compare_exchange_weak(current_min, value))
|
||||
{}
|
||||
}
|
||||
|
||||
void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
|
||||
|
||||
++correl[slot][0];
|
||||
@@ -368,13 +382,6 @@ void dbg_print() {
|
||||
std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < MaxDebugSlots; ++i)
|
||||
if ((n = extremes[i][0]))
|
||||
{
|
||||
std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2]
|
||||
<< " Max " << extremes[i][1] << std::endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < MaxDebugSlots; ++i)
|
||||
if ((n = correl[i][0]))
|
||||
{
|
||||
@@ -401,8 +408,6 @@ std::ostream& operator<<(std::ostream& os, SyncCout sc) {
|
||||
return os;
|
||||
}
|
||||
|
||||
void sync_cout_start() { std::cout << IO_LOCK; }
|
||||
void sync_cout_end() { std::cout << IO_UNLOCK; }
|
||||
|
||||
// Trampoline helper to avoid moving Logger to misc.h
|
||||
void start_logger(const std::string& fname) { Logger::start(fname); }
|
||||
@@ -410,14 +415,14 @@ void start_logger(const std::string& fname) { Logger::start(fname); }
|
||||
|
||||
#ifdef NO_PREFETCH
|
||||
|
||||
void prefetch(const void*) {}
|
||||
void prefetch(void*) {}
|
||||
|
||||
#else
|
||||
|
||||
void prefetch(const void* addr) {
|
||||
void prefetch(void* addr) {
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
_mm_prefetch((char const*) addr, _MM_HINT_T0);
|
||||
_mm_prefetch((char*) addr, _MM_HINT_T0);
|
||||
#else
|
||||
__builtin_prefetch(addr);
|
||||
#endif
|
||||
@@ -425,6 +430,291 @@ void prefetch(const void* addr) {
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Wrapper for systems where the c++17 implementation
|
||||
// does not guarantee the availability of aligned_alloc(). Memory allocated with
|
||||
// std_aligned_alloc() must be freed with std_aligned_free().
|
||||
void* std_aligned_alloc(size_t alignment, size_t size) {
|
||||
|
||||
#if defined(POSIXALIGNEDALLOC)
|
||||
void* mem;
|
||||
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
return _mm_malloc(size, alignment);
|
||||
#elif defined(_WIN32)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
return std::aligned_alloc(alignment, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void std_aligned_free(void* ptr) {
|
||||
|
||||
#if defined(POSIXALIGNEDALLOC)
|
||||
free(ptr);
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
_mm_free(ptr);
|
||||
#elif defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
|
||||
|
||||
#if !defined(_WIN64)
|
||||
return nullptr;
|
||||
#else
|
||||
|
||||
HANDLE hProcessToken{};
|
||||
LUID luid{};
|
||||
void* mem = nullptr;
|
||||
|
||||
const size_t largePageSize = GetLargePageMinimum();
|
||||
if (!largePageSize)
|
||||
return nullptr;
|
||||
|
||||
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
|
||||
|
||||
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
|
||||
|
||||
if (!hAdvapi32)
|
||||
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
||||
|
||||
auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
||||
if (!fun6)
|
||||
return nullptr;
|
||||
auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
||||
if (!fun7)
|
||||
return nullptr;
|
||||
auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
||||
if (!fun8)
|
||||
return nullptr;
|
||||
|
||||
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
||||
if (!fun6( // OpenProcessToken()
|
||||
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
||||
return nullptr;
|
||||
|
||||
if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)
|
||||
nullptr, "SeLockMemoryPrivilege", &luid))
|
||||
{
|
||||
TOKEN_PRIVILEGES tp{};
|
||||
TOKEN_PRIVILEGES prevTp{};
|
||||
DWORD prevTpLen = 0;
|
||||
|
||||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Luid = luid;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
|
||||
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
|
||||
if (fun8( // AdjustTokenPrivileges()
|
||||
hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen)
|
||||
&& GetLastError() == ERROR_SUCCESS)
|
||||
{
|
||||
// Round up size to full pages and allocate
|
||||
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
|
||||
PAGE_READWRITE);
|
||||
|
||||
// Privilege no longer needed, restore previous state
|
||||
fun8( // AdjustTokenPrivileges ()
|
||||
hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
CloseHandle(hProcessToken);
|
||||
|
||||
return mem;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
// Try to allocate large pages
|
||||
void* mem = aligned_large_pages_alloc_windows(allocSize);
|
||||
|
||||
// Fall back to regular, page-aligned, allocation if necessary
|
||||
if (!mem)
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // assumed small page size
|
||||
#endif
|
||||
|
||||
// Round up to multiples of alignment
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
||||
void* mem = std_aligned_alloc(alignment, size);
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// aligned_large_pages_free() will free the previously allocated ttmem
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
void aligned_large_pages_free(void* mem) {
|
||||
|
||||
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
||||
{
|
||||
DWORD err = GetLastError();
|
||||
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
|
||||
<< std::dec << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
namespace WinProcGroup {
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
void bind_this_thread(size_t) {}
|
||||
|
||||
#else
|
||||
|
||||
namespace {
|
||||
// Retrieves logical processor information using Windows-specific
|
||||
// API and returns the best node id for the thread with index idx. Original
|
||||
// code from Texel by Peter Österlund.
|
||||
int best_node(size_t idx) {
|
||||
|
||||
int threads = 0;
|
||||
int nodes = 0;
|
||||
int cores = 0;
|
||||
DWORD returnLength = 0;
|
||||
DWORD byteOffset = 0;
|
||||
|
||||
// Early exit if the needed API is not available at runtime
|
||||
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
||||
auto fun1 = (fun1_t) (void (*)()) GetProcAddress(k32, "GetLogicalProcessorInformationEx");
|
||||
if (!fun1)
|
||||
return -1;
|
||||
|
||||
// First call to GetLogicalProcessorInformationEx() to get returnLength.
|
||||
// We expect the call to fail due to null buffer.
|
||||
if (fun1(RelationAll, nullptr, &returnLength))
|
||||
return -1;
|
||||
|
||||
// Once we know returnLength, allocate the buffer
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
|
||||
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) malloc(returnLength);
|
||||
|
||||
// Second call to GetLogicalProcessorInformationEx(), now we expect to succeed
|
||||
if (!fun1(RelationAll, buffer, &returnLength))
|
||||
{
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (byteOffset < returnLength)
|
||||
{
|
||||
if (ptr->Relationship == RelationNumaNode)
|
||||
nodes++;
|
||||
|
||||
else if (ptr->Relationship == RelationProcessorCore)
|
||||
{
|
||||
cores++;
|
||||
threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
|
||||
}
|
||||
|
||||
assert(ptr->Size);
|
||||
byteOffset += ptr->Size;
|
||||
ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) (((char*) ptr) + ptr->Size);
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
|
||||
std::vector<int> groups;
|
||||
|
||||
// Run as many threads as possible on the same node until the core limit is
|
||||
// reached, then move on to filling the next node.
|
||||
for (int n = 0; n < nodes; n++)
|
||||
for (int i = 0; i < cores / nodes; i++)
|
||||
groups.push_back(n);
|
||||
|
||||
// In case a core has more than one logical processor (we assume 2) and we
|
||||
// still have threads to allocate, spread them evenly across available nodes.
|
||||
for (int t = 0; t < threads - cores; t++)
|
||||
groups.push_back(t % nodes);
|
||||
|
||||
// If we still have more threads than the total number of logical processors
|
||||
// then return -1 and let the OS to decide what to do.
|
||||
return idx < groups.size() ? groups[idx] : -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sets the group affinity of the current thread
|
||||
void bind_this_thread(size_t idx) {
|
||||
|
||||
// Use only local variables to be thread-safe
|
||||
int node = best_node(idx);
|
||||
|
||||
if (node == -1)
|
||||
return;
|
||||
|
||||
// Early exit if the needed API are not available at runtime
|
||||
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
||||
auto fun2 = fun2_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"));
|
||||
auto fun3 = fun3_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity"));
|
||||
auto fun4 = fun4_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMask2"));
|
||||
auto fun5 = fun5_t((void (*)()) GetProcAddress(k32, "GetMaximumProcessorGroupCount"));
|
||||
|
||||
if (!fun2 || !fun3)
|
||||
return;
|
||||
|
||||
if (!fun4 || !fun5)
|
||||
{
|
||||
GROUP_AFFINITY affinity;
|
||||
if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx
|
||||
fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity
|
||||
}
|
||||
else
|
||||
{
|
||||
// If a numa node has more than one processor group, we assume they are
|
||||
// sized equal and we spread threads evenly across the groups.
|
||||
USHORT elements, returnedElements;
|
||||
elements = fun5(); // GetMaximumProcessorGroupCount
|
||||
GROUP_AFFINITY* affinity = (GROUP_AFFINITY*) malloc(elements * sizeof(GROUP_AFFINITY));
|
||||
if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2
|
||||
fun3(GetCurrentThread(), &affinity[idx % returnedElements],
|
||||
nullptr); // SetThreadGroupAffinity
|
||||
free(affinity);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace WinProcGroup
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define GETCWD _getcwd
|
||||
@@ -433,31 +723,14 @@ void prefetch(const void* addr) {
|
||||
#define GETCWD getcwd
|
||||
#endif
|
||||
|
||||
size_t str_to_size_t(const std::string& s) {
|
||||
unsigned long long value = std::stoull(s);
|
||||
if (value > std::numeric_limits<size_t>::max())
|
||||
std::exit(EXIT_FAILURE);
|
||||
return static_cast<size_t>(value);
|
||||
}
|
||||
|
||||
std::optional<std::string> read_file_to_string(const std::string& path) {
|
||||
std::ifstream f(path, std::ios_base::binary);
|
||||
if (!f)
|
||||
return std::nullopt;
|
||||
return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
|
||||
}
|
||||
|
||||
void remove_whitespace(std::string& s) {
|
||||
s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
|
||||
}
|
||||
|
||||
bool is_whitespace(const std::string& s) {
|
||||
return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); });
|
||||
}
|
||||
|
||||
std::string CommandLine::get_binary_directory(std::string argv0) {
|
||||
CommandLine::CommandLine(int _argc, char** _argv) :
|
||||
argc(_argc),
|
||||
argv(_argv) {
|
||||
std::string pathSeparator;
|
||||
|
||||
// Extract the path+name of the executable binary
|
||||
std::string argv0 = argv[0];
|
||||
|
||||
#ifdef _WIN32
|
||||
pathSeparator = "\\";
|
||||
#ifdef _MSC_VER
|
||||
@@ -472,11 +745,15 @@ std::string CommandLine::get_binary_directory(std::string argv0) {
|
||||
#endif
|
||||
|
||||
// Extract the working directory
|
||||
auto workingDirectory = CommandLine::get_working_directory();
|
||||
workingDirectory = "";
|
||||
char buff[40000];
|
||||
char* cwd = GETCWD(buff, 40000);
|
||||
if (cwd)
|
||||
workingDirectory = cwd;
|
||||
|
||||
// Extract the binary directory path from argv0
|
||||
auto binaryDirectory = argv0;
|
||||
size_t pos = binaryDirectory.find_last_of("\\/");
|
||||
binaryDirectory = argv0;
|
||||
size_t pos = binaryDirectory.find_last_of("\\/");
|
||||
if (pos == std::string::npos)
|
||||
binaryDirectory = "." + pathSeparator;
|
||||
else
|
||||
@@ -485,19 +762,6 @@ std::string CommandLine::get_binary_directory(std::string argv0) {
|
||||
// Pattern replacement: "./" at the start of path is replaced by the working directory
|
||||
if (binaryDirectory.find("." + pathSeparator) == 0)
|
||||
binaryDirectory.replace(0, 1, workingDirectory);
|
||||
|
||||
return binaryDirectory;
|
||||
}
|
||||
|
||||
std::string CommandLine::get_working_directory() {
|
||||
std::string workingDirectory = "";
|
||||
char buff[40000];
|
||||
char* cwd = GETCWD(buff, 40000);
|
||||
if (cwd)
|
||||
workingDirectory = cwd;
|
||||
|
||||
return workingDirectory;
|
||||
}
|
||||
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
105
src/misc.h
105
src/misc.h
@@ -24,9 +24,8 @@
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <iosfwd>
|
||||
#include <optional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -41,33 +40,43 @@ std::string compiler_info();
|
||||
// Preloads the given address in L1/L2 cache. This is a non-blocking
|
||||
// function that doesn't stall the CPU waiting for data to be loaded from memory,
|
||||
// which can be quite slow.
|
||||
void prefetch(const void* addr);
|
||||
void prefetch(void* addr);
|
||||
|
||||
void start_logger(const std::string& fname);
|
||||
void start_logger(const std::string& fname);
|
||||
void* std_aligned_alloc(size_t alignment, size_t size);
|
||||
void std_aligned_free(void* ptr);
|
||||
// memory aligned by page size, min alignment: 4096 bytes
|
||||
void* aligned_large_pages_alloc(size_t size);
|
||||
// nop if mem == nullptr
|
||||
void aligned_large_pages_free(void* mem);
|
||||
|
||||
size_t str_to_size_t(const std::string& s);
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
struct PipeDeleter {
|
||||
void operator()(FILE* file) const {
|
||||
if (file != nullptr)
|
||||
{
|
||||
pclose(file);
|
||||
}
|
||||
// Deleter for automating release of memory area
|
||||
template<typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
std_aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
template<typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
|
||||
template<typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
|
||||
// Reads the file as bytes.
|
||||
// Returns std::nullopt if the file does not exist.
|
||||
std::optional<std::string> read_file_to_string(const std::string& path);
|
||||
|
||||
void dbg_hit_on(bool cond, int slot = 0);
|
||||
void dbg_mean_of(int64_t value, int slot = 0);
|
||||
void dbg_stdev_of(int64_t value, int slot = 0);
|
||||
void dbg_extremes_of(int64_t value, int slot = 0);
|
||||
void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
|
||||
void dbg_print();
|
||||
|
||||
@@ -79,30 +88,6 @@ inline TimePoint now() {
|
||||
.count();
|
||||
}
|
||||
|
||||
inline std::vector<std::string> split(const std::string& s, const std::string& delimiter) {
|
||||
std::vector<std::string> res;
|
||||
|
||||
if (s.empty())
|
||||
return res;
|
||||
|
||||
size_t begin = 0;
|
||||
for (;;)
|
||||
{
|
||||
const size_t end = s.find(delimiter, begin);
|
||||
if (end == std::string::npos)
|
||||
break;
|
||||
|
||||
res.emplace_back(s.substr(begin, end - begin));
|
||||
begin = end + delimiter.size();
|
||||
}
|
||||
|
||||
res.emplace_back(s.substr(begin));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void remove_whitespace(std::string& s);
|
||||
bool is_whitespace(const std::string& s);
|
||||
|
||||
enum SyncCout {
|
||||
IO_LOCK,
|
||||
@@ -113,8 +98,19 @@ std::ostream& operator<<(std::ostream&, SyncCout);
|
||||
#define sync_cout std::cout << IO_LOCK
|
||||
#define sync_endl std::endl << IO_UNLOCK
|
||||
|
||||
void sync_cout_start();
|
||||
void sync_cout_end();
|
||||
|
||||
// Get the first aligned element of an array.
|
||||
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
|
||||
// where N is the number of elements in the array.
|
||||
template<uintptr_t Alignment, typename T>
|
||||
T* align_ptr_up(T* ptr) {
|
||||
static_assert(alignof(T) < Alignment);
|
||||
|
||||
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
|
||||
return reinterpret_cast<T*>(
|
||||
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||
}
|
||||
|
||||
|
||||
// True if and only if the binary is compiled on a little-endian machine
|
||||
static inline const union {
|
||||
@@ -198,18 +194,25 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Under Windows it is not possible for a process to run on more than one
|
||||
// logical processor group. This usually means being limited to using max 64
|
||||
// cores. To overcome this, some special platform-specific API should be
|
||||
// called to set group affinity for each thread. Original code from Texel by
|
||||
// Peter Österlund.
|
||||
namespace WinProcGroup {
|
||||
void bind_this_thread(size_t idx);
|
||||
}
|
||||
|
||||
|
||||
struct CommandLine {
|
||||
public:
|
||||
CommandLine(int _argc, char** _argv) :
|
||||
argc(_argc),
|
||||
argv(_argv) {}
|
||||
|
||||
static std::string get_binary_directory(std::string argv0);
|
||||
static std::string get_working_directory();
|
||||
CommandLine(int, char**);
|
||||
|
||||
int argc;
|
||||
char** argv;
|
||||
|
||||
std::string binaryDirectory; // path of the executable directory
|
||||
std::string workingDirectory; // path of the working directory
|
||||
};
|
||||
|
||||
namespace Utility {
|
||||
|
||||
@@ -75,6 +75,17 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta
|
||||
b2 &= target;
|
||||
}
|
||||
|
||||
if constexpr (Type == QUIET_CHECKS)
|
||||
{
|
||||
// To make a quiet check, you either make a direct check by pushing a pawn
|
||||
// or push a blocker pawn that is not on the same file as the enemy king.
|
||||
// Discovered check promotion has been already generated amongst the captures.
|
||||
Square ksq = pos.square<KING>(Them);
|
||||
Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq);
|
||||
b1 &= pawn_attacks_bb(Them, ksq) | shift<Up>(dcCandidatePawns);
|
||||
b2 &= pawn_attacks_bb(Them, ksq) | shift<Up + Up>(dcCandidatePawns);
|
||||
}
|
||||
|
||||
while (b1)
|
||||
{
|
||||
Square to = pop_lsb(b1);
|
||||
@@ -147,7 +158,7 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta
|
||||
}
|
||||
|
||||
|
||||
template<Color Us, PieceType Pt>
|
||||
template<Color Us, PieceType Pt, bool Checks>
|
||||
ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
|
||||
|
||||
static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");
|
||||
@@ -159,6 +170,10 @@ ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target)
|
||||
Square from = pop_lsb(bb);
|
||||
Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
|
||||
|
||||
// To check, you either move freely a blocker or make a direct check.
|
||||
if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from)))
|
||||
b &= pos.check_squares(Pt);
|
||||
|
||||
while (b)
|
||||
*moveList++ = Move(from, pop_lsb(b));
|
||||
}
|
||||
@@ -172,8 +187,9 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
|
||||
|
||||
static_assert(Type != LEGAL, "Unsupported type in generate_all()");
|
||||
|
||||
const Square ksq = pos.square<KING>(Us);
|
||||
Bitboard target;
|
||||
constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations
|
||||
const Square ksq = pos.square<KING>(Us);
|
||||
Bitboard target;
|
||||
|
||||
// Skip generating non-king moves when in double check
|
||||
if (Type != EVASIONS || !more_than_one(pos.checkers()))
|
||||
@@ -181,24 +197,29 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
|
||||
target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers()))
|
||||
: Type == NON_EVASIONS ? ~pos.pieces(Us)
|
||||
: Type == CAPTURES ? pos.pieces(~Us)
|
||||
: ~pos.pieces(); // QUIETS
|
||||
: ~pos.pieces(); // QUIETS || QUIET_CHECKS
|
||||
|
||||
moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, KNIGHT>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, BISHOP>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, ROOK>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, QUEEN>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, ROOK, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, QUEEN, Checks>(pos, moveList, target);
|
||||
}
|
||||
|
||||
Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
|
||||
if (!Checks || pos.blockers_for_king(~Us) & ksq)
|
||||
{
|
||||
Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
|
||||
if (Checks)
|
||||
b &= ~attacks_bb<QUEEN>(pos.square<KING>(~Us));
|
||||
|
||||
while (b)
|
||||
*moveList++ = Move(ksq, pop_lsb(b));
|
||||
while (b)
|
||||
*moveList++ = Move(ksq, pop_lsb(b));
|
||||
|
||||
if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
|
||||
for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
|
||||
if (!pos.castling_impeded(cr) && pos.can_castle(cr))
|
||||
*moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));
|
||||
if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
|
||||
for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
|
||||
if (!pos.castling_impeded(cr) && pos.can_castle(cr))
|
||||
*moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));
|
||||
}
|
||||
|
||||
return moveList;
|
||||
}
|
||||
@@ -210,6 +231,8 @@ ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
|
||||
// <QUIETS> Generates all pseudo-legal non-captures and underpromotions
|
||||
// <EVASIONS> Generates all pseudo-legal check evasions
|
||||
// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
|
||||
// <QUIET_CHECKS> Generates all pseudo-legal non-captures giving check,
|
||||
// except castling and promotions
|
||||
//
|
||||
// Returns a pointer to the end of the move list.
|
||||
template<GenType Type>
|
||||
@@ -228,6 +251,7 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) {
|
||||
template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<QUIET_CHECKS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);
|
||||
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ class Position;
|
||||
enum GenType {
|
||||
CAPTURES,
|
||||
QUIETS,
|
||||
QUIET_CHECKS,
|
||||
EVASIONS,
|
||||
NON_EVASIONS,
|
||||
LEGAL
|
||||
|
||||
130
src/movepick.cpp
130
src/movepick.cpp
@@ -20,6 +20,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
#include "bitboard.h"
|
||||
@@ -34,6 +35,7 @@ enum Stages {
|
||||
MAIN_TT,
|
||||
CAPTURE_INIT,
|
||||
GOOD_CAPTURE,
|
||||
REFUTATION,
|
||||
QUIET_INIT,
|
||||
GOOD_QUIET,
|
||||
BAD_CAPTURE,
|
||||
@@ -52,11 +54,13 @@ enum Stages {
|
||||
// generate qsearch moves
|
||||
QSEARCH_TT,
|
||||
QCAPTURE_INIT,
|
||||
QCAPTURE
|
||||
QCAPTURE,
|
||||
QCHECK_INIT,
|
||||
QCHECK
|
||||
};
|
||||
|
||||
// Sort moves in descending order up to and including a given limit.
|
||||
// The order of moves smaller than the limit is left unspecified.
|
||||
// Sort moves in descending order up to and including
|
||||
// a given limit. The order of moves smaller than the limit is left unspecified.
|
||||
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
|
||||
|
||||
for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
|
||||
@@ -74,10 +78,35 @@ void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
|
||||
|
||||
|
||||
// Constructors of the MovePicker class. As arguments, we pass information
|
||||
// to decide which class of moves to emit, to help sorting the (presumably)
|
||||
// good moves first, and how important move ordering is at the current node.
|
||||
// to help it return the (presumably) good moves first, to decide which
|
||||
// moves to return (in the quiescence search, for instance, we only want to
|
||||
// search captures, promotions, and some checks) and how important a good
|
||||
// move ordering is at the current node.
|
||||
|
||||
// MovePicker constructor for the main search and for the quiescence search
|
||||
// MovePicker constructor for the main search
|
||||
MovePicker::MovePicker(const Position& p,
|
||||
Move ttm,
|
||||
Depth d,
|
||||
const ButterflyHistory* mh,
|
||||
const CapturePieceToHistory* cph,
|
||||
const PieceToHistory** ch,
|
||||
const PawnHistory* ph,
|
||||
Move cm,
|
||||
const Move* killers) :
|
||||
pos(p),
|
||||
mainHistory(mh),
|
||||
captureHistory(cph),
|
||||
continuationHistory(ch),
|
||||
pawnHistory(ph),
|
||||
ttMove(ttm),
|
||||
refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}},
|
||||
depth(d) {
|
||||
assert(d > 0);
|
||||
|
||||
stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm));
|
||||
}
|
||||
|
||||
// Constructor for quiescence search
|
||||
MovePicker::MovePicker(const Position& p,
|
||||
Move ttm,
|
||||
Depth d,
|
||||
@@ -92,16 +121,13 @@ MovePicker::MovePicker(const Position& p,
|
||||
pawnHistory(ph),
|
||||
ttMove(ttm),
|
||||
depth(d) {
|
||||
assert(d <= 0);
|
||||
|
||||
if (pos.checkers())
|
||||
stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm));
|
||||
|
||||
else
|
||||
stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
|
||||
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
|
||||
}
|
||||
|
||||
// MovePicker constructor for ProbCut: we generate captures with Static Exchange
|
||||
// Evaluation (SEE) greater than or equal to the given threshold.
|
||||
// Constructor for ProbCut: we generate captures with SEE greater
|
||||
// than or equal to the given threshold.
|
||||
MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
|
||||
pos(p),
|
||||
captureHistory(cph),
|
||||
@@ -113,9 +139,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceTo
|
||||
+ !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold));
|
||||
}
|
||||
|
||||
// Assigns a numerical value to each move in a list, used for sorting.
|
||||
// Captures are ordered by Most Valuable Victim (MVV), preferring captures
|
||||
// with a good history. Quiets moves are ordered using the history tables.
|
||||
// Assigns a numerical value to each move in a list, used
|
||||
// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring
|
||||
// captures with a good history. Quiets moves are ordered using the history tables.
|
||||
template<GenType Type>
|
||||
void MovePicker::score() {
|
||||
|
||||
@@ -152,11 +178,11 @@ void MovePicker::score() {
|
||||
Square to = m.to_sq();
|
||||
|
||||
// histories
|
||||
m.value = (*mainHistory)[pos.side_to_move()][m.from_to()];
|
||||
m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()];
|
||||
m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to];
|
||||
m.value += 2 * (*continuationHistory[0])[pc][to];
|
||||
m.value += (*continuationHistory[1])[pc][to];
|
||||
m.value += (*continuationHistory[2])[pc][to] / 3;
|
||||
m.value += (*continuationHistory[2])[pc][to] / 4;
|
||||
m.value += (*continuationHistory[3])[pc][to];
|
||||
m.value += (*continuationHistory[5])[pc][to];
|
||||
|
||||
@@ -164,16 +190,20 @@ void MovePicker::score() {
|
||||
m.value += bool(pos.check_squares(pt) & to) * 16384;
|
||||
|
||||
// bonus for escaping from capture
|
||||
m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook) ? 51700
|
||||
: pt == ROOK && !(to & threatenedByMinor) ? 25600
|
||||
m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook) ? 51000
|
||||
: pt == ROOK && !(to & threatenedByMinor) ? 24950
|
||||
: !(to & threatenedByPawn) ? 14450
|
||||
: 0)
|
||||
: 0;
|
||||
|
||||
// malus for putting piece en prise
|
||||
m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000
|
||||
: pt == ROOK ? bool(to & threatenedByMinor) * 24335
|
||||
: bool(to & threatenedByPawn) * 14900);
|
||||
m.value -= !(threatenedPieces & from)
|
||||
? (pt == QUEEN ? bool(to & threatenedByRook) * 48150
|
||||
+ bool(to & threatenedByMinor) * 10650
|
||||
: pt == ROOK ? bool(to & threatenedByMinor) * 24500
|
||||
: pt != PAWN ? bool(to & threatenedByPawn) * 14950
|
||||
: 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
else // Type == EVASIONS
|
||||
@@ -189,7 +219,7 @@ void MovePicker::score() {
|
||||
}
|
||||
|
||||
// Returns the next move satisfying a predicate function.
|
||||
// This never returns the TT move, as it was emitted before.
|
||||
// It never returns the TT move.
|
||||
template<MovePicker::PickType T, typename Pred>
|
||||
Move MovePicker::select(Pred filter) {
|
||||
|
||||
@@ -206,12 +236,12 @@ Move MovePicker::select(Pred filter) {
|
||||
return Move::none();
|
||||
}
|
||||
|
||||
// This is the most important method of the MovePicker class. We emit one
|
||||
// new pseudo-legal move on every call until there are no more moves left,
|
||||
// picking the move with the highest score from a list of generated moves.
|
||||
// Most important method of the MovePicker class. It
|
||||
// returns a new pseudo-legal move every time it is called until there are no more
|
||||
// moves left, picking the move with the highest score from a list of generated moves.
|
||||
Move MovePicker::next_move(bool skipQuiets) {
|
||||
|
||||
auto quiet_threshold = [](Depth d) { return -3560 * d; };
|
||||
auto quiet_threshold = [](Depth d) { return -3550 * d; };
|
||||
|
||||
top:
|
||||
switch (stage)
|
||||
@@ -243,6 +273,22 @@ top:
|
||||
}))
|
||||
return *(cur - 1);
|
||||
|
||||
// Prepare the pointers to loop over the refutations array
|
||||
cur = std::begin(refutations);
|
||||
endMoves = std::end(refutations);
|
||||
|
||||
// If the countermove is the same as a killer, skip it
|
||||
if (refutations[0] == refutations[2] || refutations[1] == refutations[2])
|
||||
--endMoves;
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
case REFUTATION :
|
||||
if (select<Next>([&]() {
|
||||
return *cur != Move::none() && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur);
|
||||
}))
|
||||
return *(cur - 1);
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
@@ -260,9 +306,11 @@ top:
|
||||
[[fallthrough]];
|
||||
|
||||
case GOOD_QUIET :
|
||||
if (!skipQuiets && select<Next>([]() { return true; }))
|
||||
if (!skipQuiets && select<Next>([&]() {
|
||||
return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2];
|
||||
}))
|
||||
{
|
||||
if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth))
|
||||
if ((cur - 1)->value > -8000 || (cur - 1)->value <= quiet_threshold(depth))
|
||||
return *(cur - 1);
|
||||
|
||||
// Remaining quiets are bad
|
||||
@@ -289,7 +337,9 @@ top:
|
||||
|
||||
case BAD_QUIET :
|
||||
if (!skipQuiets)
|
||||
return select<Next>([]() { return true; });
|
||||
return select<Next>([&]() {
|
||||
return *cur != refutations[0] && *cur != refutations[1] && *cur != refutations[2];
|
||||
});
|
||||
|
||||
return Move::none();
|
||||
|
||||
@@ -308,6 +358,24 @@ top:
|
||||
return select<Next>([&]() { return pos.see_ge(*cur, threshold); });
|
||||
|
||||
case QCAPTURE :
|
||||
if (select<Next>([]() { return true; }))
|
||||
return *(cur - 1);
|
||||
|
||||
// If we did not find any move and we do not try checks, we have finished
|
||||
if (depth != DEPTH_QS_CHECKS)
|
||||
return Move::none();
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
case QCHECK_INIT :
|
||||
cur = moves;
|
||||
endMoves = generate<QUIET_CHECKS>(pos, cur);
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
case QCHECK :
|
||||
return select<Next>([]() { return true; });
|
||||
}
|
||||
|
||||
|
||||
@@ -118,6 +118,10 @@ enum StatsType {
|
||||
// see www.chessprogramming.org/Butterfly_Boards (~11 elo)
|
||||
using ButterflyHistory = Stats<int16_t, 7183, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)>;
|
||||
|
||||
// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
|
||||
// move, see www.chessprogramming.org/Countermove_Heuristic
|
||||
using CounterMoveHistory = Stats<Move, NOT_USED, PIECE_NB, SQUARE_NB>;
|
||||
|
||||
// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
|
||||
using CapturePieceToHistory = Stats<int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
|
||||
|
||||
@@ -137,12 +141,12 @@ using PawnHistory = Stats<int16_t, 8192, PAWN_HISTORY_SIZE, PIECE_NB, SQUARE_NB>
|
||||
using CorrectionHistory =
|
||||
Stats<int16_t, CORRECTION_HISTORY_LIMIT, COLOR_NB, CORRECTION_HISTORY_SIZE>;
|
||||
|
||||
// The MovePicker class is used to pick one pseudo-legal move at a time from the
|
||||
// current position. The most important method is next_move(), which emits one
|
||||
// new pseudo-legal move on every call, until there are no moves left, when
|
||||
// Move::none() is returned. In order to improve the efficiency of the alpha-beta
|
||||
// algorithm, MovePicker attempts to return the moves which are most likely to get
|
||||
// a cut-off first.
|
||||
// MovePicker class is used to pick one pseudo-legal move at a time from the
|
||||
// current position. The most important method is next_move(), which returns a
|
||||
// new pseudo-legal move each time it is called, until there are no moves left,
|
||||
// when Move::none() is returned. In order to improve the efficiency of the
|
||||
// alpha-beta algorithm, MovePicker attempts to return the moves which are most
|
||||
// likely to get a cut-off first.
|
||||
class MovePicker {
|
||||
|
||||
enum PickType {
|
||||
@@ -153,6 +157,15 @@ class MovePicker {
|
||||
public:
|
||||
MovePicker(const MovePicker&) = delete;
|
||||
MovePicker& operator=(const MovePicker&) = delete;
|
||||
MovePicker(const Position&,
|
||||
Move,
|
||||
Depth,
|
||||
const ButterflyHistory*,
|
||||
const CapturePieceToHistory*,
|
||||
const PieceToHistory**,
|
||||
const PawnHistory*,
|
||||
Move,
|
||||
const Move*);
|
||||
MovePicker(const Position&,
|
||||
Move,
|
||||
Depth,
|
||||
@@ -177,11 +190,11 @@ class MovePicker {
|
||||
const PieceToHistory** continuationHistory;
|
||||
const PawnHistory* pawnHistory;
|
||||
Move ttMove;
|
||||
ExtMove * cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets;
|
||||
int stage;
|
||||
int threshold;
|
||||
Depth depth;
|
||||
ExtMove moves[MAX_MOVES];
|
||||
ExtMove refutations[3], *cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets;
|
||||
int stage;
|
||||
int threshold;
|
||||
Depth depth;
|
||||
ExtMove moves[MAX_MOVES];
|
||||
};
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "../../bitboard.h"
|
||||
#include "../../position.h"
|
||||
#include "../../types.h"
|
||||
#include "../nnue_accumulator.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
@@ -49,8 +49,6 @@ void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active)
|
||||
// Explicit template instantiations
|
||||
template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
|
||||
template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
|
||||
template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
|
||||
template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template<Color Perspective>
|
||||
|
||||
@@ -63,6 +63,10 @@ class HalfKAv2_hm {
|
||||
{PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
|
||||
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
template<Color Perspective>
|
||||
static IndexType make_index(Square s, Piece pc, Square ksq);
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
|
||||
@@ -122,10 +126,6 @@ class HalfKAv2_hm {
|
||||
static constexpr IndexType MaxActiveDimensions = 32;
|
||||
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
template<Color Perspective>
|
||||
static IndexType make_index(Square s, Piece pc, Square ksq);
|
||||
|
||||
// Get a list of indices for active features
|
||||
template<Color Perspective>
|
||||
static void append_active_indices(const Position& pos, IndexList& active);
|
||||
|
||||
@@ -39,26 +39,25 @@
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD)
|
||||
#define ENABLE_SEQ_OPT
|
||||
#endif
|
||||
|
||||
// Fallback implementation for older/other architectures.
|
||||
// Requires the input to be padded to at least 16 values.
|
||||
#ifndef ENABLE_SEQ_OPT
|
||||
|
||||
#if !defined(USE_SSSE3)
|
||||
template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
|
||||
static void affine_transform_non_ssse3(std::int32_t* output,
|
||||
const std::int8_t* weights,
|
||||
const std::int32_t* biases,
|
||||
const std::uint8_t* input) {
|
||||
#if defined(USE_SSE2) || defined(USE_NEON)
|
||||
#if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON)
|
||||
#if defined(USE_SSE2)
|
||||
// At least a multiple of 16, with SSE2.
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
||||
const __m128i Zeros = _mm_setzero_si128();
|
||||
const auto inputVector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
||||
const auto inputVector = reinterpret_cast<const int8x16_t*>(input);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
||||
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
|
||||
@@ -92,8 +91,16 @@ static void affine_transform_non_ssse3(std::int32_t* output,
|
||||
sum = _mm_add_epi32(sum, sum_second_32);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
int32x4_t sum = {biases[i]};
|
||||
const auto row = reinterpret_cast<const int8x16_t*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
{
|
||||
sum = vdotq_s32(sum, inputVector[j], row[j]);
|
||||
}
|
||||
output[i] = vaddvq_s32(sum);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sum = {biases[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j)
|
||||
@@ -120,8 +127,7 @@ static void affine_transform_non_ssse3(std::int32_t* output,
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // !ENABLE_SEQ_OPT
|
||||
#endif
|
||||
|
||||
template<IndexType InDims, IndexType OutDims>
|
||||
class AffineTransform {
|
||||
@@ -156,7 +162,7 @@ class AffineTransform {
|
||||
}
|
||||
|
||||
static constexpr IndexType get_weight_index(IndexType i) {
|
||||
#ifdef ENABLE_SEQ_OPT
|
||||
#if defined(USE_SSSE3)
|
||||
return get_weight_index_scrambled(i);
|
||||
#else
|
||||
return i;
|
||||
@@ -184,28 +190,29 @@ class AffineTransform {
|
||||
// Forward propagation
|
||||
void propagate(const InputType* input, OutputType* output) const {
|
||||
|
||||
#ifdef ENABLE_SEQ_OPT
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
if constexpr (OutputDimensions > 1)
|
||||
{
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
using vec_t = __m512i;
|
||||
#define vec_setzero _mm512_setzero_si512
|
||||
#define vec_set_32 _mm512_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m512_hadd
|
||||
#elif defined(USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_setzero _mm256_setzero_si256
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m256_hadd
|
||||
#elif defined(USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_setzero _mm_setzero_si128
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
using vec_t = int32x4_t;
|
||||
#define vec_set_32 vdupq_n_s32
|
||||
#define vec_add_dpbusd_32(acc, a, b) \
|
||||
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
vreinterpretq_s8_s32(b))
|
||||
#define vec_hadd Simd::m128_hadd
|
||||
#endif
|
||||
|
||||
static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
|
||||
@@ -235,33 +242,28 @@ class AffineTransform {
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
outptr[k] = acc[k];
|
||||
|
||||
#undef vec_setzero
|
||||
#undef vec_set_32
|
||||
#undef vec_add_dpbusd_32
|
||||
#undef vec_hadd
|
||||
}
|
||||
else if constexpr (OutputDimensions == 1)
|
||||
{
|
||||
|
||||
// We cannot use AVX512 for the last layer because there are only 32 inputs
|
||||
// and the buffer is not padded to 64 elements.
|
||||
#if defined(USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_setzero() _mm256_setzero_si256()
|
||||
#define vec_setzero _mm256_setzero_si256
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m256_hadd
|
||||
#elif defined(USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_setzero() _mm_setzero_si128()
|
||||
#define vec_setzero _mm_setzero_si128
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m128_hadd
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
using vec_t = int32x4_t;
|
||||
#define vec_setzero() vdupq_n_s32(0)
|
||||
#define vec_set_32 vdupq_n_s32
|
||||
#define vec_add_dpbusd_32(acc, a, b) \
|
||||
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
vreinterpretq_s8_s32(b))
|
||||
#define vec_hadd Simd::neon_m128_hadd
|
||||
#endif
|
||||
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
|
||||
@@ -65,37 +65,41 @@ class ClippedReLU {
|
||||
if constexpr (InputDimensions % SimdWidth == 0)
|
||||
{
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m256i Zero = _mm256_setzero_si256();
|
||||
const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
|
||||
const auto in = reinterpret_cast<const __m256i*>(input);
|
||||
const auto out = reinterpret_cast<__m256i*>(output);
|
||||
for (IndexType i = 0; i < NumChunks; ++i)
|
||||
{
|
||||
const __m256i words0 =
|
||||
_mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]),
|
||||
_mm256_load_si256(&in[i * 4 + 1])),
|
||||
_mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 0]),
|
||||
_mm256_load_si256(&in[i * 4 + 1])),
|
||||
WeightScaleBits);
|
||||
const __m256i words1 =
|
||||
_mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]),
|
||||
_mm256_load_si256(&in[i * 4 + 3])),
|
||||
_mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 2]),
|
||||
_mm256_load_si256(&in[i * 4 + 3])),
|
||||
WeightScaleBits);
|
||||
_mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(
|
||||
_mm256_packs_epi16(words0, words1), Offsets));
|
||||
_mm256_store_si256(
|
||||
&out[i], _mm256_permutevar8x32_epi32(
|
||||
_mm256_max_epi8(_mm256_packs_epi16(words0, words1), Zero), Offsets));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
|
||||
const __m128i Zero = _mm_setzero_si128();
|
||||
const auto in = reinterpret_cast<const __m128i*>(input);
|
||||
const auto out = reinterpret_cast<__m128i*>(output);
|
||||
for (IndexType i = 0; i < NumChunks; ++i)
|
||||
{
|
||||
const __m128i words0 = _mm_srli_epi16(
|
||||
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
||||
const __m128i words0 = _mm_srai_epi16(
|
||||
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
||||
WeightScaleBits);
|
||||
const __m128i words1 = _mm_srli_epi16(
|
||||
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
||||
const __m128i words1 = _mm_srai_epi16(
|
||||
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
||||
WeightScaleBits);
|
||||
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
|
||||
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
|
||||
_mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero));
|
||||
}
|
||||
}
|
||||
constexpr IndexType Start = InputDimensions % SimdWidth == 0
|
||||
@@ -105,7 +109,9 @@ class ClippedReLU {
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
|
||||
#ifndef USE_SSE41
|
||||
#ifdef USE_SSE41
|
||||
const __m128i Zero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
@@ -113,15 +119,6 @@ class ClippedReLU {
|
||||
const auto out = reinterpret_cast<__m128i*>(output);
|
||||
for (IndexType i = 0; i < NumChunks; ++i)
|
||||
{
|
||||
#if defined(USE_SSE41)
|
||||
const __m128i words0 = _mm_srli_epi16(
|
||||
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
||||
WeightScaleBits);
|
||||
const __m128i words1 = _mm_srli_epi16(
|
||||
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
||||
WeightScaleBits);
|
||||
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
|
||||
#else
|
||||
const __m128i words0 = _mm_srai_epi16(
|
||||
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
||||
WeightScaleBits);
|
||||
@@ -129,8 +126,15 @@ class ClippedReLU {
|
||||
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
||||
WeightScaleBits);
|
||||
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
|
||||
_mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
|
||||
_mm_store_si128(&out[i],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, Zero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
constexpr IndexType Start = NumChunks * SimdWidth;
|
||||
|
||||
|
||||
@@ -43,6 +43,39 @@ namespace Stockfish::Simd {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
/*
|
||||
Parameters:
|
||||
sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]]
|
||||
sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]]
|
||||
sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]]
|
||||
sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]]
|
||||
|
||||
Returns:
|
||||
ret = [
|
||||
reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]),
|
||||
reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]),
|
||||
reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]),
|
||||
reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3])
|
||||
]
|
||||
*/
|
||||
[[maybe_unused]] static __m512i
|
||||
m512_hadd128x16_interleave(__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) {
|
||||
|
||||
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
|
||||
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
|
||||
|
||||
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
|
||||
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
|
||||
|
||||
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
|
||||
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
|
||||
|
||||
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
|
||||
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
|
||||
|
||||
return _mm512_add_epi32(sum0123a, sum0123b);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
|
||||
|
||||
#if defined(USE_VNNI)
|
||||
|
||||
@@ -18,17 +18,18 @@
|
||||
|
||||
#include "network.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "../cluster.h"
|
||||
#include "../evaluate.h"
|
||||
#include "../incbin/incbin.h"
|
||||
#include "../memory.h"
|
||||
#include "../misc.h"
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
@@ -85,6 +86,23 @@ namespace Stockfish::Eval::NNUE {
|
||||
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template<typename T>
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
|
||||
static_assert(alignof(T) <= 4096,
|
||||
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
// Read evaluation function parameters
|
||||
template<typename T>
|
||||
bool read_parameters(std::istream& stream, T& reference) {
|
||||
@@ -106,42 +124,6 @@ bool write_parameters(std::ostream& stream, const T& reference) {
|
||||
|
||||
} // namespace Detail
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
Network<Arch, Transformer>::Network(const Network<Arch, Transformer>& other) :
|
||||
evalFile(other.evalFile),
|
||||
embeddedType(other.embeddedType) {
|
||||
|
||||
if (other.featureTransformer)
|
||||
featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
|
||||
|
||||
network = make_unique_aligned<Arch[]>(LayerStacks);
|
||||
|
||||
if (!other.network)
|
||||
return;
|
||||
|
||||
for (std::size_t i = 0; i < LayerStacks; ++i)
|
||||
network[i] = other.network[i];
|
||||
}
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
Network<Arch, Transformer>&
|
||||
Network<Arch, Transformer>::operator=(const Network<Arch, Transformer>& other) {
|
||||
evalFile = other.evalFile;
|
||||
embeddedType = other.embeddedType;
|
||||
|
||||
if (other.featureTransformer)
|
||||
featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
|
||||
|
||||
network = make_unique_aligned<Arch[]>(LayerStacks);
|
||||
|
||||
if (!other.network)
|
||||
return *this;
|
||||
|
||||
for (std::size_t i = 0; i < LayerStacks; ++i)
|
||||
network[i] = other.network[i];
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) {
|
||||
@@ -205,31 +187,42 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
|
||||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
NetworkOutput
|
||||
Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
Value Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
bool adjusted,
|
||||
int* complexity,
|
||||
bool psqtOnly) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
constexpr int delta = 24;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
TransformedFeatureType transformedFeaturesUnaligned
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
alignas(alignment) TransformedFeatureType transformedFeatures
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
|
||||
const auto positional = network[bucket].propagate(transformedFeatures);
|
||||
return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
|
||||
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
||||
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket, psqtOnly);
|
||||
const auto positional = !psqtOnly ? (network[bucket]->propagate(transformedFeatures)) : 0;
|
||||
|
||||
if (complexity)
|
||||
*complexity = !psqtOnly ? std::abs(psqt - positional) / OutputScale : 0;
|
||||
|
||||
// Give more value to positional evaluation when adjusted flag is set
|
||||
if (adjusted)
|
||||
return static_cast<Value>(((1024 - delta) * psqt + (1024 + delta) * positional)
|
||||
/ (1024 * OutputScale));
|
||||
else
|
||||
return static_cast<Value>((psqt + positional) / OutputScale);
|
||||
}
|
||||
|
||||
|
||||
@@ -258,37 +251,32 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks;
|
||||
sync_cout << "info string NNUE evaluation using " << evalfilePath << " ("
|
||||
<< size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", "
|
||||
<< network[0].TransformedFeatureDimensions << ", " << network[0].FC_0_OUTPUTS << ", "
|
||||
<< network[0].FC_1_OUTPUTS << ", 1))" << sync_endl;
|
||||
if (Cluster::is_root())
|
||||
sync_cout << "info string NNUE evaluation using " << evalfilePath << sync_endl;
|
||||
}
|
||||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
void Network<Arch, Transformer>::hint_common_access(
|
||||
const Position& pos, AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
featureTransformer->hint_common_access(pos, cache);
|
||||
void Network<Arch, Transformer>::hint_common_access(const Position& pos, bool psqtOnl) const {
|
||||
featureTransformer->hint_common_access(pos, psqtOnl);
|
||||
}
|
||||
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
NnueEvalTrace
|
||||
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
TransformedFeatureType transformedFeaturesUnaligned
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
alignas(alignment) TransformedFeatureType transformedFeatures
|
||||
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
@@ -298,8 +286,8 @@ Network<Arch, Transformer>::trace_evaluate(const Position&
|
||||
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
|
||||
{
|
||||
const auto materialist =
|
||||
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
|
||||
const auto positional = network[bucket].propagate(transformedFeatures);
|
||||
featureTransformer->transform(pos, transformedFeatures, bucket, false);
|
||||
const auto positional = network[bucket]->propagate(transformedFeatures);
|
||||
|
||||
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
|
||||
t.positional[bucket] = static_cast<Value>(positional / OutputScale);
|
||||
@@ -352,8 +340,9 @@ void Network<Arch, Transformer>::load_internal() {
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
void Network<Arch, Transformer>::initialize() {
|
||||
featureTransformer = make_unique_large_page<Transformer>();
|
||||
network = make_unique_aligned<Arch[]>(LayerStacks);
|
||||
Detail::initialize(featureTransformer);
|
||||
for (std::size_t i = 0; i < LayerStacks; ++i)
|
||||
Detail::initialize(network[i]);
|
||||
}
|
||||
|
||||
|
||||
@@ -420,7 +409,7 @@ bool Network<Arch, Transformer>::read_parameters(std::istream& stream,
|
||||
return false;
|
||||
for (std::size_t i = 0; i < LayerStacks; ++i)
|
||||
{
|
||||
if (!Detail::read_parameters(stream, network[i]))
|
||||
if (!Detail::read_parameters(stream, *(network[i])))
|
||||
return false;
|
||||
}
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
@@ -436,7 +425,7 @@ bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
|
||||
return false;
|
||||
for (std::size_t i = 0; i < LayerStacks; ++i)
|
||||
{
|
||||
if (!Detail::write_parameters(stream, network[i]))
|
||||
if (!Detail::write_parameters(stream, *(network[i])))
|
||||
return false;
|
||||
}
|
||||
return bool(stream);
|
||||
|
||||
@@ -23,54 +23,45 @@
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include "../memory.h"
|
||||
#include "../misc.h"
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
#include "nnue_accumulator.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_misc.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
|
||||
enum class EmbeddedNNUEType {
|
||||
BIG,
|
||||
SMALL,
|
||||
};
|
||||
|
||||
using NetworkOutput = std::tuple<Value, Value>;
|
||||
|
||||
template<typename Arch, typename Transformer>
|
||||
class Network {
|
||||
static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
|
||||
|
||||
public:
|
||||
Network(EvalFile file, EmbeddedNNUEType type) :
|
||||
evalFile(file),
|
||||
embeddedType(type) {}
|
||||
|
||||
Network(const Network& other);
|
||||
Network(Network&& other) = default;
|
||||
|
||||
Network& operator=(const Network& other);
|
||||
Network& operator=(Network&& other) = default;
|
||||
|
||||
void load(const std::string& rootDirectory, std::string evalfilePath);
|
||||
bool save(const std::optional<std::string>& filename) const;
|
||||
|
||||
NetworkOutput evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
|
||||
Value evaluate(const Position& pos,
|
||||
bool adjusted = false,
|
||||
int* complexity = nullptr,
|
||||
bool psqtOnly = false) const;
|
||||
|
||||
|
||||
void hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
void hint_common_access(const Position& pos, bool psqtOnl) const;
|
||||
|
||||
void verify(std::string evalfilePath) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const;
|
||||
NnueEvalTrace trace_evaluate(const Position& pos) const;
|
||||
|
||||
private:
|
||||
void load_user_net(const std::string&, const std::string&);
|
||||
@@ -91,16 +82,13 @@ class Network {
|
||||
LargePagePtr<Transformer> featureTransformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Arch[]> network;
|
||||
AlignedPtr<Arch> network[LayerStacks];
|
||||
|
||||
EvalFile evalFile;
|
||||
EmbeddedNNUEType embeddedType;
|
||||
|
||||
// Hash value of evaluation function structure
|
||||
static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
};
|
||||
|
||||
// Definitions of the network types
|
||||
|
||||
@@ -28,76 +28,13 @@
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Accumulator {
|
||||
std::int16_t accumulation[COLOR_NB][Size];
|
||||
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
|
||||
bool computed[COLOR_NB];
|
||||
};
|
||||
|
||||
|
||||
// AccumulatorCaches struct provides per-thread accumulator caches, where each
|
||||
// cache contains multiple entries for each of the possible king squares.
|
||||
// When the accumulator needs to be refreshed, the cached entry is used to more
|
||||
// efficiently update the accumulator, instead of rebuilding it from scratch.
|
||||
// This idea, was first described by Luecx (author of Koivisto) and
|
||||
// is commonly referred to as "Finny Tables".
|
||||
struct AccumulatorCaches {
|
||||
|
||||
template<typename Networks>
|
||||
AccumulatorCaches(const Networks& networks) {
|
||||
clear(networks);
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Cache {
|
||||
|
||||
struct alignas(CacheLineSize) Entry {
|
||||
BiasType accumulation[Size];
|
||||
PSQTWeightType psqtAccumulation[PSQTBuckets];
|
||||
Bitboard byColorBB[COLOR_NB];
|
||||
Bitboard byTypeBB[PIECE_TYPE_NB];
|
||||
|
||||
// To initialize a refresh entry, we set all its bitboards empty,
|
||||
// so we put the biases in the accumulation, without any weights on top
|
||||
void clear(const BiasType* biases) {
|
||||
|
||||
std::memcpy(accumulation, biases, sizeof(accumulation));
|
||||
std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0,
|
||||
sizeof(Entry) - offsetof(Entry, psqtAccumulation));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Network>
|
||||
void clear(const Network& network) {
|
||||
for (auto& entries1D : entries)
|
||||
for (auto& entry : entries1D)
|
||||
entry.clear(network.featureTransformer->biases);
|
||||
}
|
||||
|
||||
void clear(const BiasType* biases) {
|
||||
for (auto& entry : entries)
|
||||
entry.clear(biases);
|
||||
}
|
||||
|
||||
std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
|
||||
|
||||
std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;
|
||||
};
|
||||
|
||||
template<typename Networks>
|
||||
void clear(const Networks& networks) {
|
||||
big.clear(networks.big);
|
||||
small.clear(networks.small);
|
||||
}
|
||||
|
||||
Cache<TransformedFeatureDimensionsBig> big;
|
||||
Cache<TransformedFeatureDimensionsSmall> small;
|
||||
std::int16_t accumulation[2][Size];
|
||||
std::int32_t psqtAccumulation[2][PSQTBuckets];
|
||||
bool computed[2];
|
||||
bool computedPSQT[2];
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -55,14 +55,15 @@ using psqt_vec_t = __m256i;
|
||||
#define vec_store(a, b) _mm512_store_si512(a, b)
|
||||
#define vec_add_16(a, b) _mm512_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
|
||||
#define vec_mul_16(a, b) _mm512_mullo_epi16(a, b)
|
||||
#define vec_zero() _mm512_setzero_epi32()
|
||||
#define vec_set_16(a) _mm512_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm512_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm512_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b) {
|
||||
vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a, 7), _mm512_srli_epi16(b, 7));
|
||||
return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted);
|
||||
}
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
@@ -78,14 +79,15 @@ using psqt_vec_t = __m256i;
|
||||
#define vec_store(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_16(a, b) _mm256_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
|
||||
#define vec_mul_16(a, b) _mm256_mullo_epi16(a, b)
|
||||
#define vec_zero() _mm256_setzero_si256()
|
||||
#define vec_set_16(a) _mm256_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm256_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm256_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b) {
|
||||
vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a, 7), _mm256_srli_epi16(b, 7));
|
||||
return _mm256_permute4x64_epi64(compacted, 0b11011000);
|
||||
}
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
@@ -101,13 +103,12 @@ using psqt_vec_t = __m128i;
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) _mm_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
|
||||
#define vec_mul_16(a, b) _mm_mullo_epi16(a, b)
|
||||
#define vec_zero() _mm_setzero_si128()
|
||||
#define vec_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm_slli_epi16(a, b)
|
||||
#define vec_packus_16(a, b) _mm_packus_epi16(a, b)
|
||||
#define vec_msb_pack_16(a, b) _mm_packs_epi16(_mm_srli_epi16(a, 7), _mm_srli_epi16(b, 7))
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
|
||||
@@ -123,14 +124,18 @@ using psqt_vec_t = int32x4_t;
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) vaddq_s16(a, b)
|
||||
#define vec_sub_16(a, b) vsubq_s16(a, b)
|
||||
#define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
|
||||
#define vec_mul_16(a, b) vmulq_s16(a, b)
|
||||
#define vec_zero() \
|
||||
vec_t { 0 }
|
||||
#define vec_set_16(a) vdupq_n_s16(a)
|
||||
#define vec_max_16(a, b) vmaxq_s16(a, b)
|
||||
#define vec_min_16(a, b) vminq_s16(a, b)
|
||||
#define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
|
||||
#define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
|
||||
inline vec_t vec_msb_pack_16(vec_t a, vec_t b) {
|
||||
const int8x8_t shifta = vshrn_n_s16(a, 7);
|
||||
const int8x8_t shiftb = vshrn_n_s16(b, 7);
|
||||
const int8x16_t compacted = vcombine_s8(shifta, shiftb);
|
||||
return *reinterpret_cast<const vec_t*>(&compacted);
|
||||
}
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) vaddq_s32(a, b)
|
||||
@@ -192,10 +197,10 @@ template<IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> StateInfo::*accPtr>
|
||||
class FeatureTransformer {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
|
||||
|
||||
private:
|
||||
#ifdef VECTOR
|
||||
static constexpr int NumRegs =
|
||||
BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
|
||||
@@ -224,73 +229,6 @@ class FeatureTransformer {
|
||||
return FeatureSet::HashValue ^ (OutputDimensions * 2);
|
||||
}
|
||||
|
||||
static constexpr void order_packs([[maybe_unused]] uint64_t* v) {
|
||||
#if defined(USE_AVX512) // _mm512_packs_epi16 ordering
|
||||
uint64_t tmp0 = v[2], tmp1 = v[3];
|
||||
v[2] = v[8], v[3] = v[9];
|
||||
v[8] = v[4], v[9] = v[5];
|
||||
v[4] = tmp0, v[5] = tmp1;
|
||||
tmp0 = v[6], tmp1 = v[7];
|
||||
v[6] = v[10], v[7] = v[11];
|
||||
v[10] = v[12], v[11] = v[13];
|
||||
v[12] = tmp0, v[13] = tmp1;
|
||||
#elif defined(USE_AVX2) // _mm256_packs_epi16 ordering
|
||||
std::swap(v[2], v[4]);
|
||||
std::swap(v[3], v[5]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static constexpr void inverse_order_packs([[maybe_unused]] uint64_t* v) {
|
||||
#if defined(USE_AVX512) // Inverse _mm512_packs_epi16 ordering
|
||||
uint64_t tmp0 = v[2], tmp1 = v[3];
|
||||
v[2] = v[4], v[3] = v[5];
|
||||
v[4] = v[8], v[5] = v[9];
|
||||
v[8] = tmp0, v[9] = tmp1;
|
||||
tmp0 = v[6], tmp1 = v[7];
|
||||
v[6] = v[12], v[7] = v[13];
|
||||
v[12] = v[10], v[13] = v[11];
|
||||
v[10] = tmp0, v[11] = tmp1;
|
||||
#elif defined(USE_AVX2) // Inverse _mm256_packs_epi16 ordering
|
||||
std::swap(v[2], v[4]);
|
||||
std::swap(v[3], v[5]);
|
||||
#endif
|
||||
}
|
||||
|
||||
void permute_weights([[maybe_unused]] void (*order_fn)(uint64_t*)) const {
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType di = 16;
|
||||
#else
|
||||
constexpr IndexType di = 8;
|
||||
#endif
|
||||
uint64_t* b = reinterpret_cast<uint64_t*>(const_cast<BiasType*>(&biases[0]));
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(BiasType) / sizeof(uint64_t); i += di)
|
||||
order_fn(&b[i]);
|
||||
|
||||
for (IndexType j = 0; j < InputDimensions; ++j)
|
||||
{
|
||||
uint64_t* w =
|
||||
reinterpret_cast<uint64_t*>(const_cast<WeightType*>(&weights[j * HalfDimensions]));
|
||||
for (IndexType i = 0; i < HalfDimensions * sizeof(WeightType) / sizeof(uint64_t);
|
||||
i += di)
|
||||
order_fn(&w[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void scale_weights(bool read) const {
|
||||
for (IndexType j = 0; j < InputDimensions; ++j)
|
||||
{
|
||||
WeightType* w = const_cast<WeightType*>(&weights[j * HalfDimensions]);
|
||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||
w[i] = read ? w[i] * 2 : w[i] / 2;
|
||||
}
|
||||
|
||||
BiasType* b = const_cast<BiasType*>(biases);
|
||||
for (IndexType i = 0; i < HalfDimensions; ++i)
|
||||
b[i] = read ? b[i] * 2 : b[i] / 2;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
|
||||
@@ -298,33 +236,24 @@ class FeatureTransformer {
|
||||
read_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions);
|
||||
read_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||
|
||||
permute_weights(inverse_order_packs);
|
||||
scale_weights(true);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
|
||||
permute_weights(order_packs);
|
||||
scale_weights(false);
|
||||
|
||||
write_leb_128<BiasType>(stream, biases, HalfDimensions);
|
||||
write_leb_128<WeightType>(stream, weights, HalfDimensions * InputDimensions);
|
||||
write_leb_128<PSQTWeightType>(stream, psqtWeights, PSQTBuckets * InputDimensions);
|
||||
|
||||
permute_weights(inverse_order_packs);
|
||||
scale_weights(true);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
std::int32_t transform(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache,
|
||||
OutputType* output,
|
||||
int bucket) const {
|
||||
update_accumulator<WHITE>(pos, cache);
|
||||
update_accumulator<BLACK>(pos, cache);
|
||||
std::int32_t
|
||||
transform(const Position& pos, OutputType* output, int bucket, bool psqtOnly) const {
|
||||
update_accumulator<WHITE>(pos, psqtOnly);
|
||||
update_accumulator<BLACK>(pos, psqtOnly);
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
|
||||
@@ -332,6 +261,9 @@ class FeatureTransformer {
|
||||
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
|
||||
/ 2;
|
||||
|
||||
if (psqtOnly)
|
||||
return psqt;
|
||||
|
||||
const auto& accumulation = (pos.state()->*accPtr).accumulation;
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
@@ -344,87 +276,25 @@ class FeatureTransformer {
|
||||
static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
|
||||
constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
|
||||
|
||||
const vec_t Zero = vec_zero();
|
||||
const vec_t One = vec_set_16(127 * 2);
|
||||
vec_t Zero = vec_zero();
|
||||
vec_t One = vec_set_16(127);
|
||||
|
||||
const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
|
||||
const vec_t* in1 =
|
||||
reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
|
||||
vec_t* out = reinterpret_cast<vec_t*>(output + offset);
|
||||
|
||||
// Per the NNUE architecture, here we want to multiply pairs of
|
||||
// clipped elements and divide the product by 128. To do this,
|
||||
// we can naively perform min/max operation to clip each of the
|
||||
// four int16 vectors, mullo pairs together, then pack them into
|
||||
// one int8 vector. However, there exists a faster way.
|
||||
|
||||
// The idea here is to use the implicit clipping from packus to
|
||||
// save us two vec_max_16 instructions. This clipping works due
|
||||
// to the fact that any int16 integer below zero will be zeroed
|
||||
// on packus.
|
||||
|
||||
// Consider the case where the second element is negative.
|
||||
// If we do standard clipping, that element will be zero, which
|
||||
// means our pairwise product is zero. If we perform packus and
|
||||
// remove the lower-side clip for the second element, then our
|
||||
// product before packus will be negative, and is zeroed on pack.
|
||||
// The two operation produce equivalent results, but the second
|
||||
// one (using packus) saves one max operation per pair.
|
||||
|
||||
// But here we run into a problem: mullo does not preserve the
|
||||
// sign of the multiplication. We can get around this by doing
|
||||
// mulhi, which keeps the sign. But that requires an additional
|
||||
// tweak.
|
||||
|
||||
// mulhi cuts off the last 16 bits of the resulting product,
|
||||
// which is the same as performing a rightward shift of 16 bits.
|
||||
// We can use this to our advantage. Recall that we want to
|
||||
// divide the final product by 128, which is equivalent to a
|
||||
// 7-bit right shift. Intuitively, if we shift the clipped
|
||||
// value left by 9, and perform mulhi, which shifts the product
|
||||
// right by 16 bits, then we will net a right shift of 7 bits.
|
||||
// However, this won't work as intended. Since we clip the
|
||||
// values to have a maximum value of 127, shifting it by 9 bits
|
||||
// might occupy the signed bit, resulting in some positive
|
||||
// values being interpreted as negative after the shift.
|
||||
|
||||
// There is a way, however, to get around this limitation. When
|
||||
// loading the network, scale accumulator weights and biases by
|
||||
// 2. To get the same pairwise multiplication result as before,
|
||||
// we need to divide the product by 128 * 2 * 2 = 512, which
|
||||
// amounts to a right shift of 9 bits. So now we only have to
|
||||
// shift left by 7 bits, perform mulhi (shifts right by 16 bits)
|
||||
// and net a 9 bit right shift. Since we scaled everything by
|
||||
// two, the values are clipped at 127 * 2 = 254, which occupies
|
||||
// 8 bits. Shifting it by 7 bits left will no longer occupy the
|
||||
// signed bit, so we are safe.
|
||||
|
||||
// Note that on NEON processors, we shift left by 6 instead
|
||||
// because the instruction "vqdmulhq_s16" also doubles the
|
||||
// return value after the multiplication, adding an extra shift
|
||||
// to the left by 1, so we compensate by shifting less before
|
||||
// the multiplication.
|
||||
|
||||
constexpr int shift =
|
||||
#if defined(USE_SSE2)
|
||||
7;
|
||||
#else
|
||||
6;
|
||||
#endif
|
||||
|
||||
for (IndexType j = 0; j < NumOutputChunks; ++j)
|
||||
{
|
||||
const vec_t sum0a =
|
||||
vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift);
|
||||
const vec_t sum0b =
|
||||
vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift);
|
||||
const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One);
|
||||
const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One);
|
||||
const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero);
|
||||
const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero);
|
||||
const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero);
|
||||
const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero);
|
||||
|
||||
const vec_t pa = vec_mulhi_16(sum0a, sum1a);
|
||||
const vec_t pb = vec_mulhi_16(sum0b, sum1b);
|
||||
const vec_t pa = vec_mul_16(sum0a, sum1a);
|
||||
const vec_t pb = vec_mul_16(sum0b, sum1b);
|
||||
|
||||
out[j] = vec_packus_16(pa, pb);
|
||||
out[j] = vec_msb_pack_16(pa, pb);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -434,9 +304,9 @@ class FeatureTransformer {
|
||||
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
|
||||
BiasType sum1 =
|
||||
accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
|
||||
sum0 = std::clamp<BiasType>(sum0, 0, 127 * 2);
|
||||
sum1 = std::clamp<BiasType>(sum1, 0, 127 * 2);
|
||||
output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 512);
|
||||
sum0 = std::clamp<BiasType>(sum0, 0, 127);
|
||||
sum1 = std::clamp<BiasType>(sum1, 0, 127);
|
||||
output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 128);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -445,21 +315,22 @@ class FeatureTransformer {
|
||||
return psqt;
|
||||
} // end of function transform()
|
||||
|
||||
void hint_common_access(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
hint_common_access_for_perspective<WHITE>(pos, cache);
|
||||
hint_common_access_for_perspective<BLACK>(pos, cache);
|
||||
void hint_common_access(const Position& pos, bool psqtOnly) const {
|
||||
hint_common_access_for_perspective<WHITE>(pos, psqtOnly);
|
||||
hint_common_access_for_perspective<BLACK>(pos, psqtOnly);
|
||||
}
|
||||
|
||||
private:
|
||||
template<Color Perspective>
|
||||
[[nodiscard]] std::pair<StateInfo*, StateInfo*>
|
||||
try_find_computed_accumulator(const Position& pos) const {
|
||||
try_find_computed_accumulator(const Position& pos, bool psqtOnly) const {
|
||||
// Look for a usable accumulator of an earlier position. We keep track
|
||||
// of the estimated gain in terms of features to be added/subtracted.
|
||||
StateInfo *st = pos.state(), *next = nullptr;
|
||||
int gain = FeatureSet::refresh_cost(pos);
|
||||
while (st->previous && !(st->*accPtr).computed[Perspective])
|
||||
while (st->previous
|
||||
&& (!(st->*accPtr).computedPSQT[Perspective]
|
||||
|| (!psqtOnly && !(st->*accPtr).computed[Perspective])))
|
||||
{
|
||||
// This governs when a full feature refresh is needed and how many
|
||||
// updates are better than just one full refresh.
|
||||
@@ -472,33 +343,32 @@ class FeatureTransformer {
|
||||
return {st, next};
|
||||
}
|
||||
|
||||
// NOTE: The parameter states_to_update is an array of position states.
|
||||
// All states must be sequential, that is states_to_update[i] must
|
||||
// either be reachable by repeatedly applying ->previous from
|
||||
// states_to_update[i+1], and computed_st must be reachable by
|
||||
// repeatedly applying ->previous on states_to_update[0].
|
||||
// NOTE: The parameter states_to_update is an array of position states, ending with nullptr.
|
||||
// All states must be sequential, that is states_to_update[i] must either be reachable
|
||||
// by repeatedly applying ->previous from states_to_update[i+1] or
|
||||
// states_to_update[i] == nullptr.
|
||||
// computed_st must be reachable by repeatedly applying ->previous on
|
||||
// states_to_update[0], if not nullptr.
|
||||
template<Color Perspective, size_t N>
|
||||
void update_accumulator_incremental(const Position& pos,
|
||||
StateInfo* computed_st,
|
||||
StateInfo* states_to_update[N]) const {
|
||||
StateInfo* states_to_update[N],
|
||||
bool psqtOnly) const {
|
||||
static_assert(N > 0);
|
||||
assert([&]() {
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
if (states_to_update[i] == nullptr)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}());
|
||||
assert(states_to_update[N - 1] == nullptr);
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch.
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
#endif
|
||||
|
||||
if (states_to_update[0] == nullptr)
|
||||
return;
|
||||
|
||||
// Update incrementally going back through states_to_update.
|
||||
|
||||
// Gather all features to be updated.
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
@@ -506,54 +376,68 @@ class FeatureTransformer {
|
||||
// That might depend on the feature set and generally relies on the
|
||||
// feature set's update cost calculation to be correct and never allow
|
||||
// updates with more added/removed features than MaxActiveDimensions.
|
||||
FeatureSet::IndexList removed[N], added[N];
|
||||
FeatureSet::IndexList removed[N - 1], added[N - 1];
|
||||
|
||||
for (int i = N - 1; i >= 0; --i)
|
||||
{
|
||||
(states_to_update[i]->*accPtr).computed[Perspective] = true;
|
||||
int i =
|
||||
N
|
||||
- 2; // Last potential state to update. Skip last element because it must be nullptr.
|
||||
while (states_to_update[i] == nullptr)
|
||||
--i;
|
||||
|
||||
const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];
|
||||
StateInfo* st2 = states_to_update[i];
|
||||
|
||||
for (StateInfo* st2 = states_to_update[i]; st2 != end_state; st2 = st2->previous)
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, st2->dirtyPiece, removed[i],
|
||||
added[i]);
|
||||
for (; i >= 0; --i)
|
||||
{
|
||||
(states_to_update[i]->*accPtr).computed[Perspective] = !psqtOnly;
|
||||
(states_to_update[i]->*accPtr).computedPSQT[Perspective] = true;
|
||||
|
||||
const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];
|
||||
|
||||
for (; st2 != end_state; st2 = st2->previous)
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, st2->dirtyPiece,
|
||||
removed[i], added[i]);
|
||||
}
|
||||
}
|
||||
|
||||
StateInfo* st = computed_st;
|
||||
|
||||
// Now update the accumulators listed in states_to_update[],
|
||||
// where the last element is a sentinel.
|
||||
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
|
||||
#ifdef VECTOR
|
||||
|
||||
if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1)
|
||||
if (states_to_update[1] == nullptr && (removed[0].size() == 1 || removed[0].size() == 2)
|
||||
&& added[0].size() == 1)
|
||||
{
|
||||
assert(states_to_update[0]);
|
||||
|
||||
auto accIn =
|
||||
reinterpret_cast<const vec_t*>(&(st->*accPtr).accumulation[Perspective][0]);
|
||||
auto accOut = reinterpret_cast<vec_t*>(
|
||||
&(states_to_update[0]->*accPtr).accumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetR0 = HalfDimensions * removed[0][0];
|
||||
auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
|
||||
const IndexType offsetA = HalfDimensions * added[0][0];
|
||||
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
||||
|
||||
if (removed[0].size() == 1)
|
||||
if (!psqtOnly)
|
||||
{
|
||||
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
|
||||
++k)
|
||||
accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const IndexType offsetR1 = HalfDimensions * removed[0][1];
|
||||
auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||
auto accIn =
|
||||
reinterpret_cast<const vec_t*>(&(st->*accPtr).accumulation[Perspective][0]);
|
||||
auto accOut = reinterpret_cast<vec_t*>(
|
||||
&(states_to_update[0]->*accPtr).accumulation[Perspective][0]);
|
||||
|
||||
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
|
||||
++k)
|
||||
accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]),
|
||||
vec_add_16(columnR0[k], columnR1[k]));
|
||||
const IndexType offsetR0 = HalfDimensions * removed[0][0];
|
||||
auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
|
||||
const IndexType offsetA = HalfDimensions * added[0][0];
|
||||
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
||||
|
||||
if (removed[0].size() == 1)
|
||||
{
|
||||
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
|
||||
++k)
|
||||
accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const IndexType offsetR1 = HalfDimensions * removed[0][1];
|
||||
auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
|
||||
|
||||
for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
|
||||
++k)
|
||||
accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]),
|
||||
vec_add_16(columnR0[k], columnR1[k]));
|
||||
}
|
||||
}
|
||||
|
||||
auto accPsqtIn =
|
||||
@@ -587,41 +471,43 @@ class FeatureTransformer {
|
||||
}
|
||||
else
|
||||
{
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
// Load accumulator
|
||||
auto accTileIn = reinterpret_cast<const vec_t*>(
|
||||
&(st->*accPtr).accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_load(&accTileIn[k]);
|
||||
|
||||
for (IndexType i = 0; i < N; ++i)
|
||||
if (!psqtOnly)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Store accumulator
|
||||
auto accTileOut = reinterpret_cast<vec_t*>(
|
||||
&(states_to_update[i]->*accPtr).accumulation[Perspective][j * TileHeight]);
|
||||
// Load accumulator
|
||||
auto accTileIn = reinterpret_cast<const vec_t*>(
|
||||
&(st->*accPtr).accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
vec_store(&accTileOut[k], acc[k]);
|
||||
acc[k] = vec_load(&accTileIn[k]);
|
||||
|
||||
for (IndexType i = 0; states_to_update[i]; ++i)
|
||||
{
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Store accumulator
|
||||
auto accTileOut =
|
||||
reinterpret_cast<vec_t*>(&(states_to_update[i]->*accPtr)
|
||||
.accumulation[Perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
vec_store(&accTileOut[k], acc[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
||||
{
|
||||
@@ -631,7 +517,7 @@ class FeatureTransformer {
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_load_psqt(&accTilePsqtIn[k]);
|
||||
|
||||
for (IndexType i = 0; i < N; ++i)
|
||||
for (IndexType i = 0; states_to_update[i]; ++i)
|
||||
{
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
@@ -661,10 +547,12 @@ class FeatureTransformer {
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (IndexType i = 0; i < N; ++i)
|
||||
for (IndexType i = 0; states_to_update[i]; ++i)
|
||||
{
|
||||
std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective],
|
||||
(st->*accPtr).accumulation[Perspective], HalfDimensions * sizeof(BiasType));
|
||||
if (!psqtOnly)
|
||||
std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective],
|
||||
(st->*accPtr).accumulation[Perspective],
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
(states_to_update[i]->*accPtr).psqtAccumulation[Perspective][k] =
|
||||
@@ -675,9 +563,12 @@ class FeatureTransformer {
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
(st->*accPtr).accumulation[Perspective][j] -= weights[offset + j];
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
(st->*accPtr).accumulation[Perspective][j] -= weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
(st->*accPtr).psqtAccumulation[Perspective][k] -=
|
||||
@@ -687,9 +578,12 @@ class FeatureTransformer {
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added[i])
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
(st->*accPtr).accumulation[Perspective][j] += weights[offset + j];
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
(st->*accPtr).accumulation[Perspective][j] += weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
(st->*accPtr).psqtAccumulation[Perspective][k] +=
|
||||
@@ -700,113 +594,82 @@ class FeatureTransformer {
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator_refresh_cache(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
assert(cache != nullptr);
|
||||
|
||||
Square ksq = pos.square<KING>(Perspective);
|
||||
auto& entry = (*cache)[ksq][Perspective];
|
||||
FeatureSet::IndexList removed, added;
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
{
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
{
|
||||
const Piece piece = make_piece(c, pt);
|
||||
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
|
||||
const Bitboard newBB = pos.pieces(c, pt);
|
||||
Bitboard toRemove = oldBB & ~newBB;
|
||||
Bitboard toAdd = newBB & ~oldBB;
|
||||
|
||||
while (toRemove)
|
||||
{
|
||||
Square sq = pop_lsb(toRemove);
|
||||
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
while (toAdd)
|
||||
{
|
||||
Square sq = pop_lsb(toAdd);
|
||||
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = true;
|
||||
|
||||
void update_accumulator_refresh(const Position& pos, bool psqtOnly) const {
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[NumRegs];
|
||||
psqt_vec_t psqt[NumPsqtRegs];
|
||||
#endif
|
||||
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
auto accTile =
|
||||
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
|
||||
auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
|
||||
// Refresh the accumulator
|
||||
// Could be extracted to a separate function because it's done in 2 places,
|
||||
// but it's unclear if compilers would correctly handle register allocation.
|
||||
auto& accumulator = pos.state()->*accPtr;
|
||||
accumulator.computed[Perspective] = !psqtOnly;
|
||||
accumulator.computedPSQT[Perspective] = true;
|
||||
FeatureSet::IndexList active;
|
||||
FeatureSet::append_active_indices<Perspective>(pos, active);
|
||||
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
int i = 0;
|
||||
for (; i < int(std::min(removed.size(), added.size())); ++i)
|
||||
#ifdef VECTOR
|
||||
if (!psqtOnly)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = HalfDimensions * indexR + j * TileHeight;
|
||||
auto columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = HalfDimensions * indexA + j * TileHeight;
|
||||
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(&biases[j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = HalfDimensions * index0 + j * TileHeight;
|
||||
const IndexType offset1 = HalfDimensions * index1 + j * TileHeight;
|
||||
auto column0 = reinterpret_cast<const vec_t*>(&weights[offset0]);
|
||||
auto column1 = reinterpret_cast<const vec_t*>(&weights[offset1]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k]));
|
||||
}
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
auto accTile =
|
||||
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
|
||||
for (unsigned k = 0; k < NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
for (; i < int(removed.size()); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
for (; i < int(added.size()); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < NumRegs; k++)
|
||||
vec_store(&entryTile[k], acc[k]);
|
||||
for (IndexType k = 0; k < NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
|
||||
{
|
||||
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
|
||||
auto entryTilePsqt =
|
||||
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = entryTilePsqt[k];
|
||||
psqt[k] = vec_zero_psqt();
|
||||
|
||||
for (int i = 0; i < int(removed.size()); ++i)
|
||||
int i = 0;
|
||||
for (; i < int(active.size()) - 1; i += 2)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
IndexType index0 = active[i];
|
||||
IndexType index1 = active[i + 1];
|
||||
const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight;
|
||||
const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight;
|
||||
auto columnPsqt0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset0]);
|
||||
auto columnPsqt1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset1]);
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
||||
psqt[k] =
|
||||
vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k]));
|
||||
}
|
||||
for (int i = 0; i < int(added.size()); ++i)
|
||||
for (; i < int(active.size()); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
IndexType index = active[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
|
||||
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
|
||||
|
||||
@@ -814,53 +677,38 @@ class FeatureTransformer {
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
||||
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
||||
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
|
||||
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
if (!psqtOnly)
|
||||
std::memcpy(accumulator.accumulation[Perspective], biases,
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
|
||||
for (const auto index : removed)
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
accumulator.psqtAccumulation[Perspective][k] = 0;
|
||||
|
||||
for (const auto index : active)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[j] -= weights[offset + j];
|
||||
if (!psqtOnly)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
accumulator.accumulation[Perspective][j] += weights[offset + j];
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k];
|
||||
accumulator.psqtAccumulation[Perspective][k] +=
|
||||
psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
entry.accumulation[j] += weights[offset + j];
|
||||
|
||||
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
||||
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
|
||||
}
|
||||
|
||||
// The accumulator of the refresh entry has been updated.
|
||||
// Now copy its content to the actual accumulator we were refreshing.
|
||||
|
||||
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
|
||||
sizeof(BiasType) * HalfDimensions);
|
||||
|
||||
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
|
||||
sizeof(int32_t) * PSQTBuckets);
|
||||
#endif
|
||||
|
||||
for (Color c : {WHITE, BLACK})
|
||||
entry.byColorBB[c] = pos.pieces(c);
|
||||
|
||||
for (PieceType pt = PAWN; pt <= KING; ++pt)
|
||||
entry.byTypeBB[pt] = pos.pieces(pt);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void hint_common_access_for_perspective(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
void hint_common_access_for_perspective(const Position& pos, bool psqtOnly) const {
|
||||
|
||||
// Works like update_accumulator, but performs less work.
|
||||
// Updates ONLY the accumulator for pos.
|
||||
@@ -868,57 +716,50 @@ class FeatureTransformer {
|
||||
// Look for a usable accumulator of an earlier position. We keep track
|
||||
// of the estimated gain in terms of features to be added/subtracted.
|
||||
// Fast early exit.
|
||||
if ((pos.state()->*accPtr).computed[Perspective])
|
||||
if ((pos.state()->*accPtr).computed[Perspective]
|
||||
|| (psqtOnly && (pos.state()->*accPtr).computedPSQT[Perspective]))
|
||||
return;
|
||||
|
||||
auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos);
|
||||
auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
|
||||
|
||||
if ((oldest_st->*accPtr).computed[Perspective])
|
||||
if ((oldest_st->*accPtr).computed[Perspective]
|
||||
|| (psqtOnly && (oldest_st->*accPtr).computedPSQT[Perspective]))
|
||||
{
|
||||
// Only update current position accumulator to minimize work
|
||||
StateInfo* states_to_update[1] = {pos.state()};
|
||||
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
|
||||
// Only update current position accumulator to minimize work.
|
||||
StateInfo* states_to_update[2] = {pos.state(), nullptr};
|
||||
update_accumulator_incremental<Perspective, 2>(pos, oldest_st, states_to_update,
|
||||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
||||
update_accumulator_refresh<Perspective>(pos, psqtOnly);
|
||||
}
|
||||
|
||||
template<Color Perspective>
|
||||
void update_accumulator(const Position& pos,
|
||||
AccumulatorCaches::Cache<HalfDimensions>* cache) const {
|
||||
void update_accumulator(const Position& pos, bool psqtOnly) const {
|
||||
|
||||
auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos);
|
||||
auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
|
||||
|
||||
if ((oldest_st->*accPtr).computed[Perspective])
|
||||
if ((oldest_st->*accPtr).computed[Perspective]
|
||||
|| (psqtOnly && (oldest_st->*accPtr).computedPSQT[Perspective]))
|
||||
{
|
||||
if (next == nullptr)
|
||||
return;
|
||||
|
||||
// Now update the accumulators listed in states_to_update[], where
|
||||
// the last element is a sentinel. Currently we update two accumulators:
|
||||
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
|
||||
// Currently we update 2 accumulators.
|
||||
// 1. for the current position
|
||||
// 2. the next accumulator after the computed one
|
||||
// The heuristic may change in the future.
|
||||
if (next == pos.state())
|
||||
{
|
||||
StateInfo* states_to_update[1] = {next};
|
||||
StateInfo* states_to_update[3] = {next, next == pos.state() ? nullptr : pos.state(),
|
||||
nullptr};
|
||||
|
||||
update_accumulator_incremental<Perspective, 1>(pos, oldest_st, states_to_update);
|
||||
}
|
||||
else
|
||||
{
|
||||
StateInfo* states_to_update[2] = {next, pos.state()};
|
||||
|
||||
update_accumulator_incremental<Perspective, 2>(pos, oldest_st, states_to_update);
|
||||
}
|
||||
update_accumulator_incremental<Perspective, 3>(pos, oldest_st, states_to_update,
|
||||
psqtOnly);
|
||||
}
|
||||
else
|
||||
update_accumulator_refresh_cache<Perspective>(pos, cache);
|
||||
update_accumulator_refresh<Perspective>(pos, psqtOnly);
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
friend struct AccumulatorCaches::Cache;
|
||||
|
||||
alignas(CacheLineSize) BiasType biases[HalfDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
|
||||
alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
|
||||
#include "../evaluate.h"
|
||||
#include "../position.h"
|
||||
@@ -43,13 +42,13 @@ namespace Stockfish::Eval::NNUE {
|
||||
constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
|
||||
|
||||
|
||||
void hint_common_parent_position(const Position& pos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches) {
|
||||
if (Eval::use_smallnet(pos))
|
||||
networks.small.hint_common_access(pos, &caches.small);
|
||||
void hint_common_parent_position(const Position& pos, const Networks& networks) {
|
||||
|
||||
int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
|
||||
if (simpleEvalAbs > Eval::SmallNetThreshold)
|
||||
networks.small.hint_common_access(pos, simpleEvalAbs > Eval::PsqtOnlyThreshold);
|
||||
else
|
||||
networks.big.hint_common_access(pos, &caches.big);
|
||||
networks.big.hint_common_access(pos, false);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -59,7 +58,7 @@ void format_cp_compact(Value v, char* buffer, const Position& pos) {
|
||||
|
||||
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
|
||||
|
||||
int cp = std::abs(UCIEngine::to_cp(v, pos));
|
||||
int cp = std::abs(UCI::to_cp(v, pos));
|
||||
if (cp >= 10000)
|
||||
{
|
||||
buffer[1] = '0' + cp / 10000;
|
||||
@@ -93,7 +92,7 @@ void format_cp_compact(Value v, char* buffer, const Position& pos) {
|
||||
// Converts a Value into pawns, always keeping two decimals
|
||||
void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) {
|
||||
|
||||
const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos));
|
||||
const double pawns = std::abs(0.01 * UCI::to_cp(v, pos));
|
||||
|
||||
stream << (v < 0 ? '-'
|
||||
: v > 0 ? '+'
|
||||
@@ -105,8 +104,7 @@ void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& p
|
||||
|
||||
// Returns a string with the value of each piece on a board,
|
||||
// and a table for (PSQT, Layers) values bucket by bucket.
|
||||
std::string
|
||||
trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
|
||||
std::stringstream ss;
|
||||
|
||||
@@ -132,9 +130,8 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
|
||||
// We estimate the value of each piece by doing a differential evaluation from
|
||||
// the current base eval, simulating the removal of the piece from its square.
|
||||
auto [psqt, positional] = networks.big.evaluate(pos, &caches.big);
|
||||
Value base = psqt + positional;
|
||||
base = pos.side_to_move() == WHITE ? base : -base;
|
||||
Value base = networks.big.evaluate(pos);
|
||||
base = pos.side_to_move() == WHITE ? base : -base;
|
||||
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
for (Rank r = RANK_1; r <= RANK_8; ++r)
|
||||
@@ -148,15 +145,18 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
auto st = pos.state();
|
||||
|
||||
pos.remove_piece(sq);
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
|
||||
false;
|
||||
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
|
||||
Value eval = psqt + positional;
|
||||
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
||||
v = base - eval;
|
||||
Value eval = networks.big.evaluate(pos);
|
||||
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
||||
v = base - eval;
|
||||
|
||||
pos.put_piece(pc, sq);
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
|
||||
false;
|
||||
}
|
||||
|
||||
writeSquare(f, r, pc, v);
|
||||
@@ -167,7 +167,7 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
ss << board[row] << '\n';
|
||||
ss << '\n';
|
||||
|
||||
auto t = networks.big.trace_evaluate(pos, &caches.big);
|
||||
auto t = networks.big.trace_evaluate(pos);
|
||||
|
||||
ss << " NNUE network contributions "
|
||||
<< (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
|
||||
@@ -178,16 +178,16 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
|
||||
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
|
||||
{
|
||||
ss << "| " << bucket << " " //
|
||||
<< " | ";
|
||||
ss << "| " << bucket << " ";
|
||||
ss << " | ";
|
||||
format_cp_aligned_dot(t.psqt[bucket], ss, pos);
|
||||
ss << " " //
|
||||
ss << " "
|
||||
<< " | ";
|
||||
format_cp_aligned_dot(t.positional[bucket], ss, pos);
|
||||
ss << " " //
|
||||
ss << " "
|
||||
<< " | ";
|
||||
format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos);
|
||||
ss << " " //
|
||||
ss << " "
|
||||
<< " |";
|
||||
if (bucket == t.correctBucket)
|
||||
ss << " <-- this bucket is used";
|
||||
|
||||
@@ -50,13 +50,12 @@ struct NnueEvalTrace {
|
||||
std::size_t correctBucket;
|
||||
};
|
||||
|
||||
struct Networks;
|
||||
struct AccumulatorCaches;
|
||||
|
||||
std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
|
||||
void hint_common_parent_position(const Position& pos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches);
|
||||
struct Networks;
|
||||
|
||||
|
||||
std::string trace(Position& pos, const Networks& networks);
|
||||
void hint_common_parent_position(const Position& pos, const Networks& networks);
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
} // namespace Stockfish
|
||||
|
||||
1345
src/numa.h
1345
src/numa.h
File diff suppressed because it is too large
Load Diff
13
src/perft.h
13
src/perft.h
@@ -21,12 +21,13 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "movegen.h"
|
||||
#include "position.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
|
||||
namespace Stockfish::Benchmark {
|
||||
namespace Stockfish {
|
||||
|
||||
// Utility to verify move generation. All the leaf nodes up
|
||||
// to the given depth are generated and counted, and the sum is returned.
|
||||
@@ -50,18 +51,20 @@ uint64_t perft(Position& pos, Depth depth) {
|
||||
nodes += cnt;
|
||||
pos.undo_move(m);
|
||||
}
|
||||
if (Root)
|
||||
sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl;
|
||||
if (Root && Cluster::is_root())
|
||||
sync_cout << UCI::move(m, pos.is_chess960()) << ": " << cnt << sync_endl;
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) {
|
||||
inline void perft(const std::string& fen, Depth depth, bool isChess960) {
|
||||
StateListPtr states(new std::deque<StateInfo>(1));
|
||||
Position p;
|
||||
p.set(fen, isChess960, &states->back());
|
||||
|
||||
return perft<true>(p, depth);
|
||||
uint64_t nodes = perft<true>(p, depth);
|
||||
if (Cluster::is_root())
|
||||
sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
|
||||
<< std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: ";
|
||||
|
||||
for (Bitboard b = pos.checkers(); b;)
|
||||
os << UCIEngine::square(pop_lsb(b)) << " ";
|
||||
os << UCI::square(pop_lsb(b)) << " ";
|
||||
|
||||
if (int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
|
||||
{
|
||||
@@ -431,8 +431,8 @@ string Position::fen() const {
|
||||
if (!can_castle(ANY_CASTLING))
|
||||
ss << '-';
|
||||
|
||||
ss << (ep_square() == SQ_NONE ? " - " : " " + UCIEngine::square(ep_square()) + " ")
|
||||
<< st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2;
|
||||
ss << (ep_square() == SQ_NONE ? " - " : " " + UCI::square(ep_square()) + " ") << st->rule50
|
||||
<< " " << 1 + (gamePly - (sideToMove == BLACK)) / 2;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
@@ -680,8 +680,11 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
++st->pliesFromNull;
|
||||
|
||||
// Used by NNUE
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] =
|
||||
st->accumulatorSmall.computedPSQT[WHITE] = st->accumulatorSmall.computedPSQT[BLACK] =
|
||||
false;
|
||||
|
||||
auto& dp = st->dirtyPiece;
|
||||
dp.dirty_num = 1;
|
||||
@@ -741,6 +744,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
// Update board and piece lists
|
||||
remove_piece(capsq);
|
||||
|
||||
// Update material hash key and prefetch access to materialTable
|
||||
k ^= Zobrist::psq[captured][capsq];
|
||||
st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]];
|
||||
|
||||
@@ -965,10 +969,13 @@ void Position::do_null_move(StateInfo& newSt, TranspositionTable& tt) {
|
||||
newSt.previous = st;
|
||||
st = &newSt;
|
||||
|
||||
st->dirtyPiece.dirty_num = 0;
|
||||
st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
|
||||
st->dirtyPiece.dirty_num = 0;
|
||||
st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
|
||||
st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] =
|
||||
st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
|
||||
st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] =
|
||||
st->accumulatorSmall.computedPSQT[WHITE] = st->accumulatorSmall.computedPSQT[BLACK] =
|
||||
false;
|
||||
|
||||
if (st->epSquare != SQ_NONE)
|
||||
{
|
||||
@@ -1156,9 +1163,9 @@ bool Position::has_repeated() const {
|
||||
}
|
||||
|
||||
|
||||
// Tests if the position has a move which draws by repetition.
|
||||
// This function accurately matches the outcome of is_draw() over all legal moves.
|
||||
bool Position::upcoming_repetition(int ply) const {
|
||||
// Tests if the position has a move which draws by repetition,
|
||||
// or an earlier position has a move that directly reaches the current position.
|
||||
bool Position::has_game_cycle(int ply) const {
|
||||
|
||||
int j;
|
||||
|
||||
@@ -1169,16 +1176,10 @@ bool Position::upcoming_repetition(int ply) const {
|
||||
|
||||
Key originalKey = st->key;
|
||||
StateInfo* stp = st->previous;
|
||||
Key other = originalKey ^ stp->key ^ Zobrist::side;
|
||||
|
||||
for (int i = 3; i <= end; i += 2)
|
||||
{
|
||||
stp = stp->previous;
|
||||
other ^= stp->key ^ stp->previous->key ^ Zobrist::side;
|
||||
stp = stp->previous;
|
||||
|
||||
if (other != 0)
|
||||
continue;
|
||||
stp = stp->previous->previous;
|
||||
|
||||
Key moveKey = originalKey ^ stp->key;
|
||||
if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey))
|
||||
@@ -1194,6 +1195,12 @@ bool Position::upcoming_repetition(int ply) const {
|
||||
|
||||
// For nodes before or at the root, check that the move is a
|
||||
// repetition rather than a move to the current position.
|
||||
// In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in
|
||||
// the same location, so we have to select which square to check.
|
||||
if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move())
|
||||
continue;
|
||||
|
||||
// For repetitions before or at the root, require one more
|
||||
if (stp->repetition)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ class Position {
|
||||
int game_ply() const;
|
||||
bool is_chess960() const;
|
||||
bool is_draw(int ply) const;
|
||||
bool upcoming_repetition(int ply) const;
|
||||
bool has_game_cycle(int ply) const;
|
||||
bool has_repeated() const;
|
||||
int rule50_count() const;
|
||||
Value non_pawn_material(Color c) const;
|
||||
@@ -315,8 +315,8 @@ inline bool Position::capture(Move m) const {
|
||||
}
|
||||
|
||||
// Returns true if a move is generated from the capture stage, having also
|
||||
// queen promotions covered, i.e. consistency with the capture stage move
|
||||
// generation is needed to avoid the generation of duplicate moves.
|
||||
// queen promotions covered, i.e. consistency with the capture stage move generation
|
||||
// is needed to avoid the generation of duplicate moves.
|
||||
inline bool Position::capture_stage(Move m) const {
|
||||
assert(m.is_ok());
|
||||
return capture(m) || m.promotion_type() == QUEEN;
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "score.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "uci.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
Score::Score(Value v, const Position& pos) {
|
||||
assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);
|
||||
|
||||
if (std::abs(v) < VALUE_TB_WIN_IN_MAX_PLY)
|
||||
{
|
||||
score = InternalUnits{UCIEngine::to_cp(v, pos)};
|
||||
}
|
||||
else if (std::abs(v) <= VALUE_TB)
|
||||
{
|
||||
auto distance = VALUE_TB - std::abs(v);
|
||||
score = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto distance = VALUE_MATE - std::abs(v);
|
||||
score = (v > 0) ? Mate{distance} : Mate{-distance};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
70
src/score.h
70
src/score.h
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef SCORE_H_INCLUDED
|
||||
#define SCORE_H_INCLUDED
|
||||
|
||||
#include <variant>
|
||||
#include <utility>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
class Position;
|
||||
|
||||
class Score {
|
||||
public:
|
||||
struct Mate {
|
||||
int plies;
|
||||
};
|
||||
|
||||
struct Tablebase {
|
||||
int plies;
|
||||
bool win;
|
||||
};
|
||||
|
||||
struct InternalUnits {
|
||||
int value;
|
||||
};
|
||||
|
||||
Score() = default;
|
||||
Score(Value v, const Position& pos);
|
||||
|
||||
template<typename T>
|
||||
bool is() const {
|
||||
return std::holds_alternative<T>(score);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T get() const {
|
||||
return std::get<T>(score);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
decltype(auto) visit(F&& f) const {
|
||||
return std::visit(std::forward<F>(f), score);
|
||||
}
|
||||
|
||||
private:
|
||||
std::variant<Mate, Tablebase, InternalUnits> score;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef SCORE_H_INCLUDED
|
||||
1310
src/search.cpp
1310
src/search.cpp
File diff suppressed because it is too large
Load Diff
188
src/search.h
188
src/search.h
@@ -19,31 +19,30 @@
|
||||
#ifndef SEARCH_H_INCLUDED
|
||||
#define SEARCH_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "misc.h"
|
||||
#include "movepick.h"
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
#include "numa.h"
|
||||
#include "position.h"
|
||||
#include "score.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "timeman.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
namespace Eval::NNUE {
|
||||
struct Networks;
|
||||
}
|
||||
|
||||
// Different node types, used as a template parameter
|
||||
enum NodeType {
|
||||
NonPV,
|
||||
@@ -66,12 +65,14 @@ struct Stack {
|
||||
int ply;
|
||||
Move currentMove;
|
||||
Move excludedMove;
|
||||
Move killers[2];
|
||||
Value staticEval;
|
||||
int statScore;
|
||||
int moveCount;
|
||||
bool inCheck;
|
||||
bool ttPv;
|
||||
bool ttHit;
|
||||
int multipleExtensions;
|
||||
int cutoffCnt;
|
||||
};
|
||||
|
||||
@@ -106,7 +107,8 @@ struct RootMove {
|
||||
using RootMoves = std::vector<RootMove>;
|
||||
|
||||
|
||||
// LimitsType struct stores information sent by the caller about the analysis required.
|
||||
// LimitsType struct stores information sent by GUI about available time to
|
||||
// search the current move, maximum depth/time, or if we are in analysis mode.
|
||||
struct LimitsType {
|
||||
|
||||
// Init explicitly due to broken value-initialization of non POD in MSVC
|
||||
@@ -117,33 +119,33 @@ struct LimitsType {
|
||||
ponderMode = false;
|
||||
}
|
||||
|
||||
bool use_time_management() const { return time[WHITE] || time[BLACK]; }
|
||||
bool use_time_management() const { return Cluster::is_root() && (time[WHITE] || time[BLACK]); }
|
||||
|
||||
std::vector<std::string> searchmoves;
|
||||
TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
|
||||
int movestogo, depth, mate, perft, infinite;
|
||||
uint64_t nodes;
|
||||
bool ponderMode;
|
||||
Square capSq;
|
||||
std::vector<Move> searchmoves;
|
||||
TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
|
||||
int movestogo, depth, mate, perft, infinite;
|
||||
uint64_t nodes;
|
||||
bool ponderMode;
|
||||
};
|
||||
|
||||
|
||||
// The UCI stores the uci options, thread pool, and transposition table.
|
||||
// This struct is used to easily forward data to the Search::Worker class.
|
||||
struct SharedState {
|
||||
SharedState(const OptionsMap& optionsMap,
|
||||
ThreadPool& threadPool,
|
||||
TranspositionTable& transpositionTable,
|
||||
const LazyNumaReplicated<Eval::NNUE::Networks>& nets) :
|
||||
SharedState(const OptionsMap& optionsMap,
|
||||
ThreadPool& threadPool,
|
||||
TranspositionTable& transpositionTable,
|
||||
const Eval::NNUE::Networks& nets) :
|
||||
options(optionsMap),
|
||||
threads(threadPool),
|
||||
tt(transpositionTable),
|
||||
networks(nets) {}
|
||||
|
||||
const OptionsMap& options;
|
||||
ThreadPool& threads;
|
||||
TranspositionTable& tt;
|
||||
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
|
||||
|
||||
const OptionsMap& options;
|
||||
ThreadPool& threads;
|
||||
TranspositionTable& tt;
|
||||
const Eval::NNUE::Networks& networks;
|
||||
};
|
||||
|
||||
class Worker;
|
||||
@@ -156,87 +158,18 @@ class ISearchManager {
|
||||
virtual void check_time(Search::Worker&) = 0;
|
||||
};
|
||||
|
||||
struct InfoShort {
|
||||
int depth;
|
||||
Score score;
|
||||
};
|
||||
|
||||
struct InfoFull: InfoShort {
|
||||
int selDepth;
|
||||
size_t multiPV;
|
||||
std::string_view wdl;
|
||||
std::string_view bound;
|
||||
size_t timeMs;
|
||||
size_t nodes;
|
||||
size_t nps;
|
||||
size_t tbHits;
|
||||
std::string_view pv;
|
||||
int hashfull;
|
||||
};
|
||||
|
||||
struct InfoIteration {
|
||||
int depth;
|
||||
std::string_view currmove;
|
||||
size_t currmovenumber;
|
||||
};
|
||||
|
||||
// Skill structure is used to implement strength limit. If we have a UCI_Elo,
|
||||
// we convert it to an appropriate skill level, anchored to the Stash engine.
|
||||
// This method is based on a fit of the Elo results for games played between
|
||||
// Stockfish at various skill levels and various versions of the Stash engine.
|
||||
// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately
|
||||
// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2
|
||||
struct Skill {
|
||||
// Lowest and highest Elo ratings used in the skill level calculation
|
||||
constexpr static int LowestElo = 1320;
|
||||
constexpr static int HighestElo = 3190;
|
||||
|
||||
Skill(int skill_level, int uci_elo) {
|
||||
if (uci_elo)
|
||||
{
|
||||
double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo);
|
||||
level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
|
||||
}
|
||||
else
|
||||
level = double(skill_level);
|
||||
}
|
||||
bool enabled() const { return level < 20.0; }
|
||||
bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
|
||||
Move pick_best(const RootMoves&, size_t multiPV);
|
||||
|
||||
double level;
|
||||
Move best = Move::none();
|
||||
};
|
||||
|
||||
// SearchManager manages the search from the main thread. It is responsible for
|
||||
// keeping track of the time, and storing data strictly related to the main thread.
|
||||
class SearchManager: public ISearchManager {
|
||||
public:
|
||||
using UpdateShort = std::function<void(const InfoShort&)>;
|
||||
using UpdateFull = std::function<void(const InfoFull&)>;
|
||||
using UpdateIter = std::function<void(const InfoIteration&)>;
|
||||
using UpdateBestmove = std::function<void(std::string_view, std::string_view)>;
|
||||
|
||||
struct UpdateContext {
|
||||
UpdateShort onUpdateNoMoves;
|
||||
UpdateFull onUpdateFull;
|
||||
UpdateIter onIter;
|
||||
UpdateBestmove onBestmove;
|
||||
};
|
||||
|
||||
|
||||
SearchManager(const UpdateContext& updateContext) :
|
||||
updates(updateContext) {}
|
||||
|
||||
void check_time(Search::Worker& worker) override;
|
||||
|
||||
void pv(Search::Worker& worker,
|
||||
const ThreadPool& threads,
|
||||
const TranspositionTable& tt,
|
||||
Depth depth);
|
||||
std::string pv(const Search::Worker& worker,
|
||||
const ThreadPool& threads,
|
||||
const TranspositionTable& tt,
|
||||
Depth depth) const;
|
||||
|
||||
Stockfish::TimeManagement tm;
|
||||
double originalTimeAdjust;
|
||||
int callsCnt;
|
||||
std::atomic_bool ponder;
|
||||
|
||||
@@ -247,8 +180,6 @@ class SearchManager: public ISearchManager {
|
||||
bool stopOnPonderhit;
|
||||
|
||||
size_t id;
|
||||
|
||||
const UpdateContext& updates;
|
||||
};
|
||||
|
||||
class NullSearchManager: public ISearchManager {
|
||||
@@ -262,49 +193,68 @@ class NullSearchManager: public ISearchManager {
|
||||
// of the search history, and storing data required for the search.
|
||||
class Worker {
|
||||
public:
|
||||
Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
|
||||
Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t);
|
||||
|
||||
// Called at instantiation to initialize reductions tables.
|
||||
// Reset histories, usually before a new game.
|
||||
// Called at instantiation to initialize Reductions tables
|
||||
// Reset histories, usually before a new game
|
||||
void clear();
|
||||
|
||||
// Called when the program receives the UCI 'go' command.
|
||||
// It searches from the root position and outputs the "bestmove".
|
||||
void start_searching();
|
||||
|
||||
bool is_mainthread() const { return threadIdx == 0; }
|
||||
|
||||
void ensure_network_replicated();
|
||||
bool is_mainthread() const { return thread_idx == 0; }
|
||||
|
||||
// Public because they need to be updatable by the stats
|
||||
CounterMoveHistory counterMoves;
|
||||
ButterflyHistory mainHistory;
|
||||
CapturePieceToHistory captureHistory;
|
||||
ContinuationHistory continuationHistory[2][2];
|
||||
PawnHistory pawnHistory;
|
||||
CorrectionHistory correctionHistory;
|
||||
|
||||
#ifdef USE_MPI
|
||||
struct {
|
||||
std::mutex mutex;
|
||||
Cluster::TTCache<Cluster::TTCacheSize> buffer = {};
|
||||
} ttCache;
|
||||
#endif
|
||||
|
||||
std::atomic<uint64_t> TTsaves;
|
||||
|
||||
friend void Cluster::save(TranspositionTable&,
|
||||
ThreadPool&,
|
||||
Search::Worker*,
|
||||
TTEntry* tte,
|
||||
Key k,
|
||||
Value v,
|
||||
bool PvHit,
|
||||
Bound b,
|
||||
Depth d,
|
||||
Move m,
|
||||
Value ev,
|
||||
uint8_t generation8);
|
||||
|
||||
private:
|
||||
void iterative_deepening();
|
||||
|
||||
// This is the main search function, for both PV and non-PV nodes
|
||||
// Main search function for both PV and non-PV nodes
|
||||
template<NodeType nodeType>
|
||||
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
|
||||
|
||||
// Quiescence search function, which is called by the main search
|
||||
template<NodeType nodeType>
|
||||
Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta);
|
||||
Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0);
|
||||
|
||||
Depth reduction(bool i, Depth d, int mn, int delta) const;
|
||||
Depth reduction(bool i, Depth d, int mn, int delta);
|
||||
|
||||
// Pointer to the search manager, only allowed to be called by the main thread
|
||||
// Get a pointer to the search manager, only allowed to be called by the
|
||||
// main thread.
|
||||
SearchManager* main_manager() const {
|
||||
assert(threadIdx == 0);
|
||||
assert(thread_idx == 0);
|
||||
return static_cast<SearchManager*>(manager.get());
|
||||
}
|
||||
|
||||
TimePoint elapsed() const;
|
||||
TimePoint elapsed_time() const;
|
||||
|
||||
LimitsType limits;
|
||||
|
||||
size_t pvIdx, pvLast;
|
||||
@@ -319,8 +269,7 @@ class Worker {
|
||||
Depth rootDepth, completedDepth;
|
||||
Value rootDelta;
|
||||
|
||||
size_t threadIdx;
|
||||
NumaReplicatedAccessToken numaAccessToken;
|
||||
size_t thread_idx;
|
||||
|
||||
// Reductions lookup table initialized at startup
|
||||
std::array<int, MAX_MOVES> reductions; // [depth or moveNumber]
|
||||
@@ -330,13 +279,10 @@ class Worker {
|
||||
|
||||
Tablebases::Config tbConfig;
|
||||
|
||||
const OptionsMap& options;
|
||||
ThreadPool& threads;
|
||||
TranspositionTable& tt;
|
||||
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
|
||||
|
||||
// Used by NNUE
|
||||
Eval::NNUE::AccumulatorCaches refreshTable;
|
||||
const OptionsMap& options;
|
||||
ThreadPool& threads;
|
||||
TranspositionTable& tt;
|
||||
const Eval::NNUE::Networks& networks;
|
||||
|
||||
friend class Stockfish::ThreadPool;
|
||||
friend class SearchManager;
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../bitboard.h"
|
||||
#include "../cluster.h"
|
||||
#include "../misc.h"
|
||||
#include "../movegen.h"
|
||||
#include "../position.h"
|
||||
@@ -66,7 +67,7 @@ namespace {
|
||||
|
||||
constexpr int TBPIECES = 7; // Max number of supported pieces
|
||||
constexpr int MAX_DTZ =
|
||||
1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit.
|
||||
1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit.
|
||||
|
||||
enum {
|
||||
BigEndian,
|
||||
@@ -443,8 +444,6 @@ class TBTables {
|
||||
|
||||
std::deque<TBTable<WDL>> wdlTable;
|
||||
std::deque<TBTable<DTZ>> dtzTable;
|
||||
size_t foundDTZFiles = 0;
|
||||
size_t foundWDLFiles = 0;
|
||||
|
||||
void insert(Key key, TBTable<WDL>* wdl, TBTable<DTZ>* dtz) {
|
||||
uint32_t homeBucket = uint32_t(key) & (Size - 1);
|
||||
@@ -488,16 +487,9 @@ class TBTables {
|
||||
memset(hashTable, 0, sizeof(hashTable));
|
||||
wdlTable.clear();
|
||||
dtzTable.clear();
|
||||
foundDTZFiles = 0;
|
||||
foundWDLFiles = 0;
|
||||
}
|
||||
|
||||
void info() const {
|
||||
sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles
|
||||
<< " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl;
|
||||
}
|
||||
|
||||
void add(const std::vector<PieceType>& pieces);
|
||||
size_t size() const { return wdlTable.size(); }
|
||||
void add(const std::vector<PieceType>& pieces);
|
||||
};
|
||||
|
||||
TBTables TBTables;
|
||||
@@ -510,22 +502,13 @@ void TBTables::add(const std::vector<PieceType>& pieces) {
|
||||
|
||||
for (PieceType pt : pieces)
|
||||
code += PieceToChar[pt];
|
||||
code.insert(code.find('K', 1), "v");
|
||||
|
||||
TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK
|
||||
if (file_dtz.is_open())
|
||||
{
|
||||
file_dtz.close();
|
||||
foundDTZFiles++;
|
||||
}
|
||||
|
||||
TBFile file(code + ".rtbw"); // KRK -> KRvK
|
||||
TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK
|
||||
|
||||
if (!file.is_open()) // Only WDL file is checked
|
||||
return;
|
||||
|
||||
file.close();
|
||||
foundWDLFiles++;
|
||||
|
||||
MaxCardinality = std::max(int(pieces.size()), MaxCardinality);
|
||||
|
||||
@@ -1344,7 +1327,7 @@ void Tablebases::init(const std::string& paths) {
|
||||
MaxCardinality = 0;
|
||||
TBFile::Paths = paths;
|
||||
|
||||
if (paths.empty())
|
||||
if (paths.empty() || paths == "<empty>")
|
||||
return;
|
||||
|
||||
// MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27
|
||||
@@ -1484,7 +1467,8 @@ void Tablebases::init(const std::string& paths) {
|
||||
}
|
||||
}
|
||||
|
||||
TBTables.info();
|
||||
if (Cluster::is_root())
|
||||
sync_cout << "info string Found " << TBTables.size() << " tablebases" << sync_endl;
|
||||
}
|
||||
|
||||
// Probe the WDL table for a particular position.
|
||||
@@ -1592,10 +1576,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) {
|
||||
// Use the DTZ tables to rank root moves.
|
||||
//
|
||||
// A return value false indicates that not all probes were successful.
|
||||
bool Tablebases::root_probe(Position& pos,
|
||||
Search::RootMoves& rootMoves,
|
||||
bool rule50,
|
||||
bool rankDTZ) {
|
||||
bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50) {
|
||||
|
||||
ProbeState result = OK;
|
||||
StateInfo st;
|
||||
@@ -1606,7 +1587,7 @@ bool Tablebases::root_probe(Position& pos,
|
||||
// Check whether a position was repeated since the last zeroing move.
|
||||
bool rep = pos.has_repeated();
|
||||
|
||||
int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1;
|
||||
int dtz, bound = rule50 ? (MAX_DTZ - 100) : 1;
|
||||
|
||||
// Probe and rank each move
|
||||
for (auto& m : rootMoves)
|
||||
@@ -1645,10 +1626,8 @@ bool Tablebases::root_probe(Position& pos,
|
||||
|
||||
// Better moves are ranked higher. Certain wins are ranked equally.
|
||||
// Losing moves are ranked equally unless a 50-move draw is in sight.
|
||||
int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0)
|
||||
: MAX_DTZ / 2 - (dtz + cnt50))
|
||||
: dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0)
|
||||
: -MAX_DTZ / 2 + (-dtz + cnt50))
|
||||
int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50))
|
||||
: dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50))
|
||||
: 0;
|
||||
m.tbRank = r;
|
||||
|
||||
@@ -1656,11 +1635,10 @@ bool Tablebases::root_probe(Position& pos,
|
||||
// 1 cp to cursed wins and let it grow to 49 cp as the positions gets
|
||||
// closer to a real win.
|
||||
m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1
|
||||
: r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
|
||||
: r == 0 ? VALUE_DRAW
|
||||
: r > -bound
|
||||
? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
|
||||
: -VALUE_MATE + MAX_PLY + 1;
|
||||
: r > 0 ? Value((std::max(3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200)
|
||||
: r == 0 ? VALUE_DRAW
|
||||
: r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200)
|
||||
: -VALUE_MATE + MAX_PLY + 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -1707,8 +1685,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, boo
|
||||
|
||||
Config Tablebases::rank_root_moves(const OptionsMap& options,
|
||||
Position& pos,
|
||||
Search::RootMoves& rootMoves,
|
||||
bool rankDTZ) {
|
||||
Search::RootMoves& rootMoves) {
|
||||
Config config;
|
||||
|
||||
if (rootMoves.empty())
|
||||
@@ -1732,7 +1709,7 @@ Config Tablebases::rank_root_moves(const OptionsMap& options,
|
||||
if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
|
||||
{
|
||||
// Rank moves using DTZ tables
|
||||
config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ);
|
||||
config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"]);
|
||||
|
||||
if (!config.rootInTB)
|
||||
{
|
||||
|
||||
@@ -66,12 +66,9 @@ extern int MaxCardinality;
|
||||
void init(const std::string& paths);
|
||||
WDLScore probe_wdl(Position& pos, ProbeState* result);
|
||||
int probe_dtz(Position& pos, ProbeState* result);
|
||||
bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ);
|
||||
bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50);
|
||||
bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50);
|
||||
Config rank_root_moves(const OptionsMap& options,
|
||||
Position& pos,
|
||||
Search::RootMoves& rootMoves,
|
||||
bool rankDTZ = false);
|
||||
Config rank_root_moves(const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves);
|
||||
|
||||
} // namespace Stockfish::Tablebases
|
||||
|
||||
|
||||
258
src/thread.cpp
258
src/thread.cpp
@@ -22,16 +22,17 @@
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "misc.h"
|
||||
#include "movegen.h"
|
||||
#include "search.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "timeman.h"
|
||||
#include "tt.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
#include "ucioption.h"
|
||||
|
||||
namespace Stockfish {
|
||||
@@ -40,24 +41,13 @@ namespace Stockfish {
|
||||
// in idle_loop(). Note that 'searching' and 'exit' should be already set.
|
||||
Thread::Thread(Search::SharedState& sharedState,
|
||||
std::unique_ptr<Search::ISearchManager> sm,
|
||||
size_t n,
|
||||
OptionalThreadToNumaNodeBinder binder) :
|
||||
size_t n) :
|
||||
worker(std::make_unique<Search::Worker>(sharedState, std::move(sm), n)),
|
||||
idx(n),
|
||||
nthreads(sharedState.options["Threads"]),
|
||||
stdThread(&Thread::idle_loop, this) {
|
||||
|
||||
wait_for_search_finished();
|
||||
|
||||
run_custom_job([this, &binder, &sharedState, &sm, n]() {
|
||||
// Use the binder to [maybe] bind the threads to a NUMA node before doing
|
||||
// the Worker allocation. Ideally we would also allocate the SearchManager
|
||||
// here, but that's minor.
|
||||
this->numaAccessToken = binder();
|
||||
this->worker =
|
||||
std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
|
||||
});
|
||||
|
||||
wait_for_search_finished();
|
||||
}
|
||||
|
||||
|
||||
@@ -72,42 +62,38 @@ Thread::~Thread() {
|
||||
stdThread.join();
|
||||
}
|
||||
|
||||
|
||||
// Wakes up the thread that will start the search
|
||||
void Thread::start_searching() {
|
||||
assert(worker != nullptr);
|
||||
run_custom_job([this]() { worker->start_searching(); });
|
||||
mutex.lock();
|
||||
searching = true;
|
||||
mutex.unlock(); // Unlock before notifying saves a few CPU-cycles
|
||||
cv.notify_one(); // Wake up the thread in idle_loop()
|
||||
}
|
||||
|
||||
// Clears the histories for the thread worker (usually before a new game)
|
||||
void Thread::clear_worker() {
|
||||
assert(worker != nullptr);
|
||||
run_custom_job([this]() { worker->clear(); });
|
||||
}
|
||||
|
||||
// Blocks on the condition variable until the thread has finished searching
|
||||
// Blocks on the condition variable
|
||||
// until the thread has finished searching.
|
||||
void Thread::wait_for_search_finished() {
|
||||
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [&] { return !searching; });
|
||||
}
|
||||
|
||||
// Launching a function in the thread
|
||||
void Thread::run_custom_job(std::function<void()> f) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [&] { return !searching; });
|
||||
jobFunc = std::move(f);
|
||||
searching = true;
|
||||
}
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
|
||||
|
||||
// Thread gets parked here, blocked on the condition variable
|
||||
// when the thread has no work to do.
|
||||
// Thread gets parked here, blocked on the
|
||||
// condition variable, when it has no work to do.
|
||||
|
||||
void Thread::idle_loop() {
|
||||
|
||||
// If OS already scheduled us on a different group than 0 then don't overwrite
|
||||
// the choice, eventually we are one of many one-threaded processes running on
|
||||
// some Windows NUMA hardware, for instance in fishtest. To make it simple,
|
||||
// just check if running threads are below a threshold, in this case, all this
|
||||
// NUMA machinery is not needed.
|
||||
if (nthreads > 8)
|
||||
WinProcGroup::bind_this_thread(idx);
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
@@ -118,125 +104,74 @@ void Thread::idle_loop() {
|
||||
if (exit)
|
||||
return;
|
||||
|
||||
std::function<void()> job = std::move(jobFunc);
|
||||
jobFunc = nullptr;
|
||||
|
||||
lk.unlock();
|
||||
|
||||
if (job)
|
||||
job();
|
||||
worker->start_searching();
|
||||
}
|
||||
}
|
||||
|
||||
Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }
|
||||
Search::SearchManager* ThreadPool::main_manager() {
|
||||
return static_cast<Search::SearchManager*>(main_thread()->worker.get()->manager.get());
|
||||
}
|
||||
|
||||
uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
|
||||
uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
|
||||
uint64_t ThreadPool::TT_saves() const { return accumulate(&Search::Worker::TTsaves); }
|
||||
|
||||
// Creates/destroys threads to match the requested number.
|
||||
// Created and launched threads will immediately go to sleep in idle_loop.
|
||||
// Upon resizing, threads are recreated to allow for binding if necessary.
|
||||
void ThreadPool::set(const NumaConfig& numaConfig,
|
||||
Search::SharedState sharedState,
|
||||
const Search::SearchManager::UpdateContext& updateContext) {
|
||||
void ThreadPool::set(Search::SharedState sharedState) {
|
||||
|
||||
if (threads.size() > 0) // destroy any existing thread(s)
|
||||
{
|
||||
main_thread()->wait_for_search_finished();
|
||||
|
||||
threads.clear();
|
||||
|
||||
boundThreadToNumaNode.clear();
|
||||
while (threads.size() > 0)
|
||||
delete threads.back(), threads.pop_back();
|
||||
}
|
||||
|
||||
const size_t requested = sharedState.options["Threads"];
|
||||
|
||||
if (requested > 0) // create new thread(s)
|
||||
{
|
||||
// Binding threads may be problematic when there's multiple NUMA nodes and
|
||||
// multiple Stockfish instances running. In particular, if each instance
|
||||
// runs a single thread then they would all be mapped to the first NUMA node.
|
||||
// This is undesirable, and so the default behaviour (i.e. when the user does not
|
||||
// change the NumaConfig UCI setting) is to not bind the threads to processors
|
||||
// unless we know for sure that we span NUMA nodes and replication is required.
|
||||
const std::string numaPolicy(sharedState.options["NumaPolicy"]);
|
||||
const bool doBindThreads = [&]() {
|
||||
if (numaPolicy == "none")
|
||||
return false;
|
||||
threads.push_back(new Thread(
|
||||
sharedState, std::unique_ptr<Search::ISearchManager>(new Search::SearchManager()), 0));
|
||||
|
||||
if (numaPolicy == "auto")
|
||||
return numaConfig.suggests_binding_threads(requested);
|
||||
|
||||
// numaPolicy == "system", or explicitly set by the user
|
||||
return true;
|
||||
}();
|
||||
|
||||
boundThreadToNumaNode = doBindThreads
|
||||
? numaConfig.distribute_threads_among_numa_nodes(requested)
|
||||
: std::vector<NumaIndex>{};
|
||||
|
||||
while (threads.size() < requested)
|
||||
{
|
||||
const size_t threadId = threads.size();
|
||||
const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
|
||||
auto manager = threadId == 0 ? std::unique_ptr<Search::ISearchManager>(
|
||||
std::make_unique<Search::SearchManager>(updateContext))
|
||||
: std::make_unique<Search::NullSearchManager>();
|
||||
|
||||
// When not binding threads we want to force all access to happen
|
||||
// from the same NUMA node, because in case of NUMA replicated memory
|
||||
// accesses we don't want to trash cache in case the threads get scheduled
|
||||
// on the same NUMA node.
|
||||
auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
|
||||
: OptionalThreadToNumaNodeBinder(numaId);
|
||||
|
||||
threads.emplace_back(
|
||||
std::make_unique<Thread>(sharedState, std::move(manager), threadId, binder));
|
||||
}
|
||||
|
||||
threads.push_back(new Thread(
|
||||
sharedState, std::unique_ptr<Search::ISearchManager>(new Search::NullSearchManager()),
|
||||
threads.size()));
|
||||
clear();
|
||||
|
||||
main_thread()->wait_for_search_finished();
|
||||
|
||||
// Reallocate the hash with the new threadpool size
|
||||
sharedState.tt.resize(sharedState.options["Hash"], requested);
|
||||
|
||||
// Adjust cluster buffers
|
||||
Cluster::ttSendRecvBuff_resize(requested);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sets threadPool data to initial values
|
||||
void ThreadPool::clear() {
|
||||
if (threads.size() == 0)
|
||||
return;
|
||||
|
||||
for (auto&& th : threads)
|
||||
th->clear_worker();
|
||||
for (Thread* th : threads)
|
||||
th->worker->clear();
|
||||
|
||||
for (auto&& th : threads)
|
||||
th->wait_for_search_finished();
|
||||
|
||||
// These two affect the time taken on the first move of a game:
|
||||
main_manager()->callsCnt = 0;
|
||||
main_manager()->bestPreviousScore = VALUE_INFINITE;
|
||||
main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
|
||||
main_manager()->previousTimeReduction = 0.85;
|
||||
|
||||
main_manager()->callsCnt = 0;
|
||||
main_manager()->bestPreviousScore = VALUE_INFINITE;
|
||||
main_manager()->originalTimeAdjust = -1;
|
||||
main_manager()->previousTimeReduction = 1.0;
|
||||
main_manager()->tm.clear();
|
||||
}
|
||||
|
||||
void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
|
||||
assert(threads.size() > threadId);
|
||||
threads[threadId]->run_custom_job(std::move(f));
|
||||
}
|
||||
|
||||
void ThreadPool::wait_on_thread(size_t threadId) {
|
||||
assert(threads.size() > threadId);
|
||||
threads[threadId]->wait_for_search_finished();
|
||||
}
|
||||
|
||||
size_t ThreadPool::num_threads() const { return threads.size(); }
|
||||
|
||||
|
||||
// Wakes up main thread waiting in idle_loop() and returns immediately.
|
||||
// Main thread will wake up other threads and start the search.
|
||||
// Wakes up main thread waiting in idle_loop() and
|
||||
// returns immediately. Main thread will wake up other threads and start the search.
|
||||
void ThreadPool::start_thinking(const OptionsMap& options,
|
||||
Position& pos,
|
||||
StateListPtr& states,
|
||||
@@ -250,18 +185,10 @@ void ThreadPool::start_thinking(const OptionsMap& options,
|
||||
increaseDepth = true;
|
||||
|
||||
Search::RootMoves rootMoves;
|
||||
const auto legalmoves = MoveList<LEGAL>(pos);
|
||||
|
||||
for (const auto& uciMove : limits.searchmoves)
|
||||
{
|
||||
auto move = UCIEngine::to_move(pos, uciMove);
|
||||
|
||||
if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end())
|
||||
rootMoves.emplace_back(move);
|
||||
}
|
||||
|
||||
if (rootMoves.empty())
|
||||
for (const auto& m : legalmoves)
|
||||
for (const auto& m : MoveList<LEGAL>(pos))
|
||||
if (limits.searchmoves.empty()
|
||||
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
|
||||
rootMoves.emplace_back(m);
|
||||
|
||||
Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
|
||||
@@ -276,38 +203,36 @@ void ThreadPool::start_thinking(const OptionsMap& options,
|
||||
// We use Position::set() to set root position across threads. But there are
|
||||
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
|
||||
// be deduced from a fen string, so set() clears them and they are set from
|
||||
// setupStates->back() later. The rootState is per thread, earlier states are
|
||||
// shared since they are read-only.
|
||||
for (auto&& th : threads)
|
||||
// setupStates->back() later. The rootState is per thread, earlier states are shared
|
||||
// since they are read-only.
|
||||
for (Thread* th : threads)
|
||||
{
|
||||
th->run_custom_job([&]() {
|
||||
th->worker->limits = limits;
|
||||
th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
|
||||
th->worker->bestMoveChanges = 0;
|
||||
th->worker->rootDepth = th->worker->completedDepth = 0;
|
||||
th->worker->rootMoves = rootMoves;
|
||||
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
|
||||
th->worker->rootState = setupStates->back();
|
||||
th->worker->tbConfig = tbConfig;
|
||||
});
|
||||
th->worker->limits = limits;
|
||||
th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
|
||||
th->worker->bestMoveChanges = 0;
|
||||
th->worker->TTsaves = 0;
|
||||
th->worker->rootDepth = th->worker->completedDepth = 0;
|
||||
th->worker->rootMoves = rootMoves;
|
||||
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
|
||||
th->worker->rootState = setupStates->back();
|
||||
th->worker->tbConfig = tbConfig;
|
||||
}
|
||||
|
||||
for (auto&& th : threads)
|
||||
th->wait_for_search_finished();
|
||||
Cluster::signals_init();
|
||||
|
||||
main_thread()->start_searching();
|
||||
}
|
||||
|
||||
Thread* ThreadPool::get_best_thread() const {
|
||||
|
||||
Thread* bestThread = threads.front().get();
|
||||
Thread* bestThread = threads.front();
|
||||
Value minScore = VALUE_NONE;
|
||||
|
||||
std::unordered_map<Move, int64_t, Move::MoveHash> votes(
|
||||
2 * std::min(size(), bestThread->worker->rootMoves.size()));
|
||||
|
||||
// Find the minimum score of all threads
|
||||
for (auto&& th : threads)
|
||||
for (Thread* th : threads)
|
||||
minScore = std::min(minScore, th->worker->rootMoves[0].score);
|
||||
|
||||
// Vote according to score and depth, and select the best thread
|
||||
@@ -315,10 +240,10 @@ Thread* ThreadPool::get_best_thread() const {
|
||||
return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
|
||||
};
|
||||
|
||||
for (auto&& th : threads)
|
||||
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
|
||||
for (Thread* th : threads)
|
||||
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th);
|
||||
|
||||
for (auto&& th : threads)
|
||||
for (Thread* th : threads)
|
||||
{
|
||||
const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
|
||||
const auto newThreadScore = th->worker->rootMoves[0].score;
|
||||
@@ -337,74 +262,51 @@ Thread* ThreadPool::get_best_thread() const {
|
||||
const bool newThreadInProvenLoss =
|
||||
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
|
||||
|
||||
// We make sure not to pick a thread with truncated principal variation
|
||||
// Note that we make sure not to pick a thread with truncated-PV for better viewer experience.
|
||||
const bool betterVotingValue =
|
||||
thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
|
||||
thread_voting_value(th) * int(newThreadPV.size() > 2)
|
||||
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
|
||||
|
||||
if (bestThreadInProvenWin)
|
||||
{
|
||||
// Make sure we pick the shortest mate / TB conversion
|
||||
if (newThreadScore > bestThreadScore)
|
||||
bestThread = th.get();
|
||||
bestThread = th;
|
||||
}
|
||||
else if (bestThreadInProvenLoss)
|
||||
{
|
||||
// Make sure we pick the shortest mated / TB conversion
|
||||
if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
|
||||
bestThread = th.get();
|
||||
bestThread = th;
|
||||
}
|
||||
else if (newThreadInProvenWin || newThreadInProvenLoss
|
||||
|| (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY
|
||||
&& (newThreadMoveVote > bestThreadMoveVote
|
||||
|| (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
|
||||
bestThread = th.get();
|
||||
bestThread = th;
|
||||
}
|
||||
|
||||
return bestThread;
|
||||
}
|
||||
|
||||
|
||||
// Start non-main threads.
|
||||
// Will be invoked by main thread after it has started searching.
|
||||
// Start non-main threads
|
||||
// Will be invoked by main thread after it has started searching
|
||||
void ThreadPool::start_searching() {
|
||||
|
||||
for (auto&& th : threads)
|
||||
for (Thread* th : threads)
|
||||
if (th != threads.front())
|
||||
th->start_searching();
|
||||
}
|
||||
|
||||
|
||||
// Wait for non-main threads
|
||||
|
||||
void ThreadPool::wait_for_search_finished() const {
|
||||
|
||||
for (auto&& th : threads)
|
||||
for (Thread* th : threads)
|
||||
if (th != threads.front())
|
||||
th->wait_for_search_finished();
|
||||
}
|
||||
|
||||
std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
|
||||
std::vector<size_t> counts;
|
||||
|
||||
if (!boundThreadToNumaNode.empty())
|
||||
{
|
||||
NumaIndex highestNumaNode = 0;
|
||||
for (NumaIndex n : boundThreadToNumaNode)
|
||||
if (n > highestNumaNode)
|
||||
highestNumaNode = n;
|
||||
|
||||
counts.resize(highestNumaNode + 1, 0);
|
||||
|
||||
for (NumaIndex n : boundThreadToNumaNode)
|
||||
counts[n] += 1;
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
|
||||
void ThreadPool::ensure_network_replicated() {
|
||||
for (auto&& th : threads)
|
||||
th->ensure_network_replicated();
|
||||
}
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
97
src/thread.h
97
src/thread.h
@@ -23,12 +23,11 @@
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "numa.h"
|
||||
#include "movepick.h"
|
||||
#include "position.h"
|
||||
#include "search.h"
|
||||
#include "thread_win32_osx.h"
|
||||
@@ -39,32 +38,6 @@ namespace Stockfish {
|
||||
class OptionsMap;
|
||||
using Value = int;
|
||||
|
||||
// Sometimes we don't want to actually bind the threads, but the recipient still
|
||||
// needs to think it runs on *some* NUMA node, such that it can access structures
|
||||
// that rely on NUMA node knowledge. This class encapsulates this optional process
|
||||
// such that the recipient does not need to know whether the binding happened or not.
|
||||
class OptionalThreadToNumaNodeBinder {
|
||||
public:
|
||||
OptionalThreadToNumaNodeBinder(NumaIndex n) :
|
||||
numaConfig(nullptr),
|
||||
numaId(n) {}
|
||||
|
||||
OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) :
|
||||
numaConfig(&cfg),
|
||||
numaId(n) {}
|
||||
|
||||
NumaReplicatedAccessToken operator()() const {
|
||||
if (numaConfig != nullptr)
|
||||
return numaConfig->bind_current_thread_to_numa_node(numaId);
|
||||
else
|
||||
return NumaReplicatedAccessToken(numaId);
|
||||
}
|
||||
|
||||
private:
|
||||
const NumaConfig* numaConfig;
|
||||
NumaIndex numaId;
|
||||
};
|
||||
|
||||
// Abstraction of a thread. It contains a pointer to the worker and a native thread.
|
||||
// After construction, the native thread is started with idle_loop()
|
||||
// waiting for a signal to start searching.
|
||||
@@ -72,37 +45,22 @@ class OptionalThreadToNumaNodeBinder {
|
||||
// the search is finished, it goes back to idle_loop() waiting for a new signal.
|
||||
class Thread {
|
||||
public:
|
||||
Thread(Search::SharedState&,
|
||||
std::unique_ptr<Search::ISearchManager>,
|
||||
size_t,
|
||||
OptionalThreadToNumaNodeBinder);
|
||||
Thread(Search::SharedState&, std::unique_ptr<Search::ISearchManager>, size_t);
|
||||
virtual ~Thread();
|
||||
|
||||
void idle_loop();
|
||||
void start_searching();
|
||||
void clear_worker();
|
||||
void run_custom_job(std::function<void()> f);
|
||||
|
||||
void ensure_network_replicated();
|
||||
|
||||
// Thread has been slightly altered to allow running custom jobs, so
|
||||
// this name is no longer correct. However, this class (and ThreadPool)
|
||||
// require further work to make them properly generic while maintaining
|
||||
// appropriate specificity regarding search, from the point of view of an
|
||||
// outside user, so renaming of this function is left for whenever that happens.
|
||||
void idle_loop();
|
||||
void start_searching();
|
||||
void wait_for_search_finished();
|
||||
size_t id() const { return idx; }
|
||||
|
||||
std::unique_ptr<Search::Worker> worker;
|
||||
std::function<void()> jobFunc;
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
size_t idx, nthreads;
|
||||
bool exit = false, searching = true; // Set before starting std::thread
|
||||
NativeThread stdThread;
|
||||
NumaReplicatedAccessToken numaAccessToken;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
size_t idx, nthreads;
|
||||
bool exit = false, searching = true; // Set before starting std::thread
|
||||
NativeThread stdThread;
|
||||
};
|
||||
|
||||
|
||||
@@ -110,46 +68,32 @@ class Thread {
|
||||
// parking and, most importantly, launching a thread. All the access to threads
|
||||
// is done through this class.
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool() {}
|
||||
|
||||
public:
|
||||
~ThreadPool() {
|
||||
// destroy any existing thread(s)
|
||||
if (threads.size() > 0)
|
||||
{
|
||||
main_thread()->wait_for_search_finished();
|
||||
|
||||
threads.clear();
|
||||
while (threads.size() > 0)
|
||||
delete threads.back(), threads.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool(const ThreadPool&) = delete;
|
||||
ThreadPool(ThreadPool&&) = delete;
|
||||
|
||||
ThreadPool& operator=(const ThreadPool&) = delete;
|
||||
ThreadPool& operator=(ThreadPool&&) = delete;
|
||||
|
||||
void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
|
||||
void run_on_thread(size_t threadId, std::function<void()> f);
|
||||
void wait_on_thread(size_t threadId);
|
||||
size_t num_threads() const;
|
||||
void clear();
|
||||
void set(const NumaConfig& numaConfig,
|
||||
Search::SharedState,
|
||||
const Search::SearchManager::UpdateContext&);
|
||||
void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
|
||||
void clear();
|
||||
void set(Search::SharedState);
|
||||
|
||||
Search::SearchManager* main_manager();
|
||||
Thread* main_thread() const { return threads.front().get(); }
|
||||
Thread* main_thread() const { return threads.front(); }
|
||||
uint64_t nodes_searched() const;
|
||||
uint64_t tb_hits() const;
|
||||
uint64_t TT_saves() const;
|
||||
Thread* get_best_thread() const;
|
||||
void start_searching();
|
||||
void wait_for_search_finished() const;
|
||||
|
||||
std::vector<size_t> get_bound_thread_count_by_numa_node() const;
|
||||
|
||||
void ensure_network_replicated();
|
||||
|
||||
std::atomic_bool stop, abortedSearch, increaseDepth;
|
||||
|
||||
auto cbegin() const noexcept { return threads.cbegin(); }
|
||||
@@ -160,14 +104,13 @@ class ThreadPool {
|
||||
auto empty() const noexcept { return threads.empty(); }
|
||||
|
||||
private:
|
||||
StateListPtr setupStates;
|
||||
std::vector<std::unique_ptr<Thread>> threads;
|
||||
std::vector<NumaIndex> boundThreadToNumaNode;
|
||||
StateListPtr setupStates;
|
||||
std::vector<Thread*> threads;
|
||||
|
||||
uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const {
|
||||
|
||||
uint64_t sum = 0;
|
||||
for (auto&& th : threads)
|
||||
for (Thread* th : threads)
|
||||
sum += (th->worker.get()->*member).load(std::memory_order_relaxed);
|
||||
return sum;
|
||||
}
|
||||
|
||||
@@ -30,14 +30,17 @@ namespace Stockfish {
|
||||
|
||||
TimePoint TimeManagement::optimum() const { return optimumTime; }
|
||||
TimePoint TimeManagement::maximum() const { return maximumTime; }
|
||||
TimePoint TimeManagement::elapsed(size_t nodes) const {
|
||||
return useNodesTime ? TimePoint(nodes) : now() - startTime;
|
||||
}
|
||||
|
||||
void TimeManagement::clear() {
|
||||
availableNodes = -1; // When in 'nodes as time' mode
|
||||
availableNodes = 0; // When in 'nodes as time' mode
|
||||
}
|
||||
|
||||
void TimeManagement::advance_nodes_time(std::int64_t nodes) {
|
||||
assert(useNodesTime);
|
||||
availableNodes = std::max(int64_t(0), availableNodes - nodes);
|
||||
availableNodes += nodes;
|
||||
}
|
||||
|
||||
// Called at the beginning of the search and calculates
|
||||
@@ -47,19 +50,15 @@ void TimeManagement::advance_nodes_time(std::int64_t nodes) {
|
||||
void TimeManagement::init(Search::LimitsType& limits,
|
||||
Color us,
|
||||
int ply,
|
||||
const OptionsMap& options,
|
||||
double& originalTimeAdjust) {
|
||||
TimePoint npmsec = TimePoint(options["nodestime"]);
|
||||
|
||||
// If we have no time, we don't need to fully initialize TM.
|
||||
// startTime is used by movetime and useNodesTime is used in elapsed calls.
|
||||
startTime = limits.startTime;
|
||||
useNodesTime = npmsec != 0;
|
||||
|
||||
const OptionsMap& options) {
|
||||
// If we have no time, no need to initialize TM, except for the start time,
|
||||
// which is used by movetime.
|
||||
startTime = limits.startTime;
|
||||
if (limits.time[us] == 0)
|
||||
return;
|
||||
|
||||
TimePoint moveOverhead = TimePoint(options["Move Overhead"]);
|
||||
TimePoint npmsec = TimePoint(options["nodestime"]);
|
||||
|
||||
// optScale is a percentage of available time to use for the current move.
|
||||
// maxScale is a multiplier applied to optimumTime.
|
||||
@@ -69,31 +68,26 @@ void TimeManagement::init(Search::LimitsType& limits,
|
||||
// to nodes, and use resulting values in time management formulas.
|
||||
// WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
|
||||
// must be much lower than the real engine speed.
|
||||
if (useNodesTime)
|
||||
if (npmsec)
|
||||
{
|
||||
if (availableNodes == -1) // Only once at game start
|
||||
useNodesTime = true;
|
||||
|
||||
if (!availableNodes) // Only once at game start
|
||||
availableNodes = npmsec * limits.time[us]; // Time is in msec
|
||||
|
||||
// Convert from milliseconds to nodes
|
||||
limits.time[us] = TimePoint(availableNodes);
|
||||
limits.inc[us] *= npmsec;
|
||||
limits.npmsec = npmsec;
|
||||
moveOverhead *= npmsec;
|
||||
}
|
||||
|
||||
// These numbers are used where multiplications, divisions or comparisons
|
||||
// with constants are involved.
|
||||
const int64_t scaleFactor = useNodesTime ? npmsec : 1;
|
||||
const TimePoint scaledTime = limits.time[us] / scaleFactor;
|
||||
const TimePoint scaledInc = limits.inc[us] / scaleFactor;
|
||||
|
||||
// Maximum move horizon of 50 moves
|
||||
int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50;
|
||||
|
||||
// If less than one second, gradually reduce mtg
|
||||
if (scaledTime < 1000 && double(mtg) / scaledInc > 0.05)
|
||||
// if less than one second, gradually reduce mtg
|
||||
if (limits.time[us] < 1000 && (double(mtg) / limits.time[us] > 0.05))
|
||||
{
|
||||
mtg = scaledTime * 0.05;
|
||||
mtg = limits.time[us] * 0.05;
|
||||
}
|
||||
|
||||
// Make sure timeLeft is > 0 since we may use it as a divisor
|
||||
@@ -105,26 +99,24 @@ void TimeManagement::init(Search::LimitsType& limits,
|
||||
// game time for the current move, so also cap to a percentage of available game time.
|
||||
if (limits.movestogo == 0)
|
||||
{
|
||||
// Extra time according to timeLeft
|
||||
if (originalTimeAdjust < 0)
|
||||
originalTimeAdjust = 0.3285 * std::log10(timeLeft) - 0.4830;
|
||||
// Use extra time with larger increments
|
||||
double optExtra = limits.inc[us] < 500 ? 1.0 : 1.13;
|
||||
|
||||
// Calculate time constants based on current time left.
|
||||
double logTimeInSec = std::log10(scaledTime / 1000.0);
|
||||
double optConstant = std::min(0.00308 + 0.000319 * logTimeInSec, 0.00506);
|
||||
double maxConstant = std::max(3.39 + 3.01 * logTimeInSec, 2.93);
|
||||
double optConstant =
|
||||
std::min(0.00308 + 0.000319 * std::log10(limits.time[us] / 1000.0), 0.00506);
|
||||
double maxConstant = std::max(3.39 + 3.01 * std::log10(limits.time[us] / 1000.0), 2.93);
|
||||
|
||||
optScale = std::min(0.0122 + std::pow(ply + 2.95, 0.462) * optConstant,
|
||||
0.213 * limits.time[us] / timeLeft)
|
||||
* originalTimeAdjust;
|
||||
|
||||
0.213 * limits.time[us] / double(timeLeft))
|
||||
* optExtra;
|
||||
maxScale = std::min(6.64, maxConstant + ply / 12.0);
|
||||
}
|
||||
|
||||
// x moves in y seconds (+ z increment)
|
||||
else
|
||||
{
|
||||
optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / timeLeft);
|
||||
optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / double(timeLeft));
|
||||
maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,10 +19,11 @@
|
||||
#ifndef TIMEMAN_H_INCLUDED
|
||||
#define TIMEMAN_H_INCLUDED
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "misc.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
@@ -36,19 +37,11 @@ struct LimitsType;
|
||||
// the maximum available time, the game move number, and other parameters.
|
||||
class TimeManagement {
|
||||
public:
|
||||
void init(Search::LimitsType& limits,
|
||||
Color us,
|
||||
int ply,
|
||||
const OptionsMap& options,
|
||||
double& originalTimeAdjust);
|
||||
void init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options);
|
||||
|
||||
TimePoint optimum() const;
|
||||
TimePoint maximum() const;
|
||||
template<typename FUNC>
|
||||
TimePoint elapsed(FUNC nodes) const {
|
||||
return useNodesTime ? TimePoint(nodes()) : elapsed_time();
|
||||
}
|
||||
TimePoint elapsed_time() const { return now() - startTime; };
|
||||
TimePoint elapsed(std::size_t nodes) const;
|
||||
|
||||
void clear();
|
||||
void advance_nodes_time(std::int64_t nodes);
|
||||
@@ -58,7 +51,7 @@ class TimeManagement {
|
||||
TimePoint optimumTime;
|
||||
TimePoint maximumTime;
|
||||
|
||||
std::int64_t availableNodes = -1; // When in 'nodes as time' mode
|
||||
std::int64_t availableNodes = 0; // When in 'nodes as time' mode
|
||||
bool useNodesTime = false; // True if we are in 'nodes as time' mode
|
||||
};
|
||||
|
||||
|
||||
206
src/tt.cpp
206
src/tt.cpp
@@ -23,89 +23,30 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "memory.h"
|
||||
#include "misc.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "thread.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
|
||||
// TTEntry struct is the 10 bytes transposition table entry, defined as below:
|
||||
//
|
||||
// key 16 bit
|
||||
// depth 8 bit
|
||||
// generation 5 bit
|
||||
// pv node 1 bit
|
||||
// bound type 2 bit
|
||||
// move 16 bit
|
||||
// value 16 bit
|
||||
// evaluation 16 bit
|
||||
//
|
||||
// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
|
||||
// Equally, the store order in save() matches this order.
|
||||
|
||||
struct TTEntry {
|
||||
|
||||
// Convert internal bitfields to external types
|
||||
TTData read() const {
|
||||
return TTData{Move(move16), Value(value16),
|
||||
Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET),
|
||||
Bound(genBound8 & 0x3), bool(genBound8 & 0x4)};
|
||||
}
|
||||
|
||||
bool is_occupied() const;
|
||||
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
|
||||
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
|
||||
uint8_t relative_age(const uint8_t generation8) const;
|
||||
|
||||
private:
|
||||
friend class TranspositionTable;
|
||||
|
||||
uint16_t key16;
|
||||
uint8_t depth8;
|
||||
uint8_t genBound8;
|
||||
Move move16;
|
||||
int16_t value16;
|
||||
int16_t eval16;
|
||||
};
|
||||
|
||||
// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits
|
||||
// and 3 trailing miscellaneous bits.
|
||||
|
||||
// These bits are reserved for other things.
|
||||
static constexpr unsigned GENERATION_BITS = 3;
|
||||
// increment for generation field
|
||||
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
|
||||
// cycle length
|
||||
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
|
||||
// mask to pull out generation number
|
||||
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
|
||||
|
||||
// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but
|
||||
// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits":
|
||||
// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.)
|
||||
bool TTEntry::is_occupied() const { return bool(depth8); }
|
||||
|
||||
// Populates the TTEntry with a new node's data, possibly
|
||||
// overwriting an old position. The update is not atomic and can be racy.
|
||||
void TTEntry::save(
|
||||
Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
|
||||
|
||||
// Preserve the old ttmove if we don't have a new one
|
||||
// Preserve any existing move for the same position
|
||||
if (m || uint16_t(k) != key16)
|
||||
move16 = m;
|
||||
|
||||
// Overwrite less valuable entries (cheapest checks first)
|
||||
if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4
|
||||
|| relative_age(generation8))
|
||||
if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_OFFSET + 2 * pv > depth8 - 4)
|
||||
{
|
||||
assert(d > DEPTH_ENTRY_OFFSET);
|
||||
assert(d < 256 + DEPTH_ENTRY_OFFSET);
|
||||
assert(d > DEPTH_OFFSET);
|
||||
assert(d < 256 + DEPTH_OFFSET);
|
||||
|
||||
key16 = uint16_t(k);
|
||||
depth8 = uint8_t(d - DEPTH_ENTRY_OFFSET);
|
||||
depth8 = uint8_t(d - DEPTH_OFFSET);
|
||||
genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b);
|
||||
value16 = int16_t(v);
|
||||
eval16 = int16_t(ev);
|
||||
@@ -119,74 +60,85 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const {
|
||||
// is needed to keep the unrelated lowest n bits from affecting
|
||||
// the result) to calculate the entry age correctly even after
|
||||
// generation8 overflows into the next cycle.
|
||||
return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK;
|
||||
|
||||
return (TranspositionTable::GENERATION_CYCLE + generation8 - genBound8)
|
||||
& TranspositionTable::GENERATION_MASK;
|
||||
}
|
||||
|
||||
|
||||
// TTWriter is but a very thin wrapper around the pointer
|
||||
TTWriter::TTWriter(TTEntry* tte) :
|
||||
entry(tte) {}
|
||||
|
||||
void TTWriter::write(
|
||||
Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
|
||||
entry->save(k, v, pv, b, d, m, ev, generation8);
|
||||
}
|
||||
|
||||
|
||||
// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number
|
||||
// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should
|
||||
// divide the size of a cache line for best performance, as the cacheline is prefetched when possible.
|
||||
|
||||
static constexpr int ClusterSize = 3;
|
||||
|
||||
struct Cluster {
|
||||
TTEntry entry[ClusterSize];
|
||||
char padding[2]; // Pad to 32 bytes
|
||||
};
|
||||
|
||||
static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size");
|
||||
|
||||
|
||||
// Sets the size of the transposition table,
|
||||
// measured in megabytes. Transposition table consists
|
||||
// measured in megabytes. Transposition table consists of a power of 2 number
|
||||
// of clusters and each cluster consists of ClusterSize number of TTEntry.
|
||||
void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) {
|
||||
void TranspositionTable::resize(size_t mbSize, int threadCount) {
|
||||
aligned_large_pages_free(table);
|
||||
|
||||
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
|
||||
|
||||
table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
|
||||
|
||||
if (!table)
|
||||
{
|
||||
std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
clear(threads);
|
||||
clear(threadCount);
|
||||
}
|
||||
|
||||
|
||||
// Initializes the entire transposition table to zero,
|
||||
// in a multi-threaded way.
|
||||
void TranspositionTable::clear(ThreadPool& threads) {
|
||||
generation8 = 0;
|
||||
const size_t threadCount = threads.num_threads();
|
||||
void TranspositionTable::clear(size_t threadCount) {
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (size_t i = 0; i < threadCount; ++i)
|
||||
for (size_t idx = 0; idx < size_t(threadCount); ++idx)
|
||||
{
|
||||
threads.run_on_thread(i, [this, i, threadCount]() {
|
||||
threads.emplace_back([this, idx, threadCount]() {
|
||||
// Thread binding gives faster search on systems with a first-touch policy
|
||||
if (threadCount > 8)
|
||||
WinProcGroup::bind_this_thread(idx);
|
||||
|
||||
// Each thread will zero its part of the hash table
|
||||
const size_t stride = clusterCount / threadCount;
|
||||
const size_t start = stride * i;
|
||||
const size_t len = i + 1 != threadCount ? stride : clusterCount - start;
|
||||
const size_t stride = size_t(clusterCount / threadCount), start = size_t(stride * idx),
|
||||
len = idx != size_t(threadCount) - 1 ? stride : clusterCount - start;
|
||||
|
||||
std::memset(&table[start], 0, len * sizeof(Cluster));
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < threadCount; ++i)
|
||||
threads.wait_on_thread(i);
|
||||
for (std::thread& th : threads)
|
||||
th.join();
|
||||
}
|
||||
|
||||
|
||||
// Looks up the current position in the transposition
|
||||
// table. It returns true and a pointer to the TTEntry if the position is found.
|
||||
// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
|
||||
// to be replaced later. The replace value of an entry is calculated as its depth
|
||||
// minus 8 times its relative age. TTEntry t1 is considered more valuable than
|
||||
// TTEntry t2 if its replace value is greater than that of t2.
|
||||
TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
|
||||
|
||||
TTEntry* const tte = first_entry(key);
|
||||
const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster
|
||||
|
||||
for (int i = 0; i < ClusterSize; ++i)
|
||||
if (tte[i].key16 == key16 || !tte[i].depth8)
|
||||
{
|
||||
constexpr uint8_t lowerBits = GENERATION_DELTA - 1;
|
||||
|
||||
// Refresh with new generation, keeping the lower bits the same.
|
||||
tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & lowerBits));
|
||||
return found = bool(tte[i].depth8), &tte[i];
|
||||
}
|
||||
|
||||
// Find an entry to be replaced according to the replacement strategy
|
||||
TTEntry* replace = tte;
|
||||
for (int i = 1; i < ClusterSize; ++i)
|
||||
if (replace->depth8 - replace->relative_age(generation8) * 2
|
||||
> tte[i].depth8 - tte[i].relative_age(generation8) * 2)
|
||||
replace = &tte[i];
|
||||
|
||||
return found = false, replace;
|
||||
}
|
||||
|
||||
|
||||
@@ -198,52 +150,10 @@ int TranspositionTable::hashfull() const {
|
||||
int cnt = 0;
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
for (int j = 0; j < ClusterSize; ++j)
|
||||
cnt += table[i].entry[j].is_occupied()
|
||||
cnt += table[i].entry[j].depth8
|
||||
&& (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8;
|
||||
|
||||
return cnt / ClusterSize;
|
||||
}
|
||||
|
||||
|
||||
void TranspositionTable::new_search() {
|
||||
// increment by delta to keep lower bits as is
|
||||
generation8 += GENERATION_DELTA;
|
||||
}
|
||||
|
||||
|
||||
uint8_t TranspositionTable::generation() const { return generation8; }
|
||||
|
||||
|
||||
// Looks up the current position in the transposition
|
||||
// table. It returns true if the position is found.
|
||||
// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
|
||||
// to be replaced later. The replace value of an entry is calculated as its depth
|
||||
// minus 8 times its relative age. TTEntry t1 is considered more valuable than
|
||||
// TTEntry t2 if its replace value is greater than that of t2.
|
||||
std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) const {
|
||||
|
||||
TTEntry* const tte = first_entry(key);
|
||||
const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster
|
||||
|
||||
for (int i = 0; i < ClusterSize; ++i)
|
||||
if (tte[i].key16 == key16)
|
||||
// This gap is the main place for read races.
|
||||
// After `read()` completes that copy is final, but may be self-inconsistent.
|
||||
return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])};
|
||||
|
||||
// Find an entry to be replaced according to the replacement strategy
|
||||
TTEntry* replace = tte;
|
||||
for (int i = 1; i < ClusterSize; ++i)
|
||||
if (replace->depth8 - replace->relative_age(generation8) * 2
|
||||
> tte[i].depth8 - tte[i].relative_age(generation8) * 2)
|
||||
replace = &tte[i];
|
||||
|
||||
return {false, TTData(), TTWriter(replace)};
|
||||
}
|
||||
|
||||
|
||||
TTEntry* TranspositionTable::first_entry(const Key key) const {
|
||||
return &table[mul_hi64(key, clusterCount)].entry[0];
|
||||
}
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
122
src/tt.h
122
src/tt.h
@@ -21,76 +21,108 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <tuple>
|
||||
|
||||
#include "memory.h"
|
||||
#include "misc.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
class ThreadPool;
|
||||
struct TTEntry;
|
||||
struct Cluster;
|
||||
namespace Cluster {
|
||||
void init();
|
||||
}
|
||||
|
||||
// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy
|
||||
// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and
|
||||
// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate
|
||||
// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size.
|
||||
//
|
||||
// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of:
|
||||
// 1) whether the entry already has this position
|
||||
// 2) a copy of the prior data (if any) (may be inconsistent due to read races)
|
||||
// 3) a writer object to this entry
|
||||
// The copied data and the writer are separated to maintain clear boundaries between local vs global objects.
|
||||
/// TTEntry struct is the 10 bytes transposition table entry, defined as below:
|
||||
///
|
||||
/// key 16 bit
|
||||
/// depth 8 bit
|
||||
/// generation 5 bit
|
||||
/// pv node 1 bit
|
||||
/// bound type 2 bit
|
||||
/// move 16 bit
|
||||
/// value 16 bit
|
||||
/// eval value 16 bit
|
||||
|
||||
struct TTEntry {
|
||||
|
||||
// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data.
|
||||
struct TTData {
|
||||
Move move;
|
||||
Value value, eval;
|
||||
Depth depth;
|
||||
Bound bound;
|
||||
bool is_pv;
|
||||
};
|
||||
|
||||
|
||||
// This is used to make racy writes to the global TT.
|
||||
struct TTWriter {
|
||||
public:
|
||||
void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
|
||||
Move move() const { return Move(move16); }
|
||||
Value value() const { return Value(value16); }
|
||||
Value eval() const { return Value(eval16); }
|
||||
Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); }
|
||||
bool is_pv() const { return bool(genBound8 & 0x4); }
|
||||
Bound bound() const { return Bound(genBound8 & 0x3); }
|
||||
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
|
||||
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
|
||||
uint8_t relative_age(const uint8_t generation8) const;
|
||||
|
||||
private:
|
||||
friend class TranspositionTable;
|
||||
TTEntry* entry;
|
||||
TTWriter(TTEntry* tte);
|
||||
friend void Cluster::init();
|
||||
|
||||
|
||||
uint16_t key16;
|
||||
uint8_t depth8;
|
||||
uint8_t genBound8;
|
||||
Move move16;
|
||||
int16_t value16;
|
||||
int16_t eval16;
|
||||
};
|
||||
|
||||
|
||||
// A TranspositionTable is an array of Cluster, of size clusterCount. Each
|
||||
// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
|
||||
// contains information on exactly one position. The size of a Cluster should
|
||||
// divide the size of a cache line for best performance, as the cacheline is
|
||||
// prefetched when possible.
|
||||
class TranspositionTable {
|
||||
|
||||
friend void Cluster::init();
|
||||
|
||||
static constexpr int ClusterSize = 3;
|
||||
|
||||
struct Cluster {
|
||||
TTEntry entry[ClusterSize];
|
||||
char padding[2]; // Pad to 32 bytes
|
||||
};
|
||||
|
||||
static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
|
||||
|
||||
// Constants used to refresh the hash table periodically
|
||||
|
||||
// We have 8 bits available where the lowest 3 bits are
|
||||
// reserved for other things.
|
||||
static constexpr unsigned GENERATION_BITS = 3;
|
||||
// increment for generation field
|
||||
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
|
||||
// cycle length
|
||||
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
|
||||
// mask to pull out generation number
|
||||
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
|
||||
|
||||
public:
|
||||
~TranspositionTable() { aligned_large_pages_free(table); }
|
||||
|
||||
void resize(size_t mbSize, ThreadPool& threads); // Set TT size
|
||||
void clear(ThreadPool& threads); // Re-initialize memory, multithreaded
|
||||
int hashfull()
|
||||
const; // Approximate what fraction of entries (permille) have been written to during this root search
|
||||
void new_search() {
|
||||
// increment by delta to keep lower bits as is
|
||||
generation8 += GENERATION_DELTA;
|
||||
}
|
||||
|
||||
void
|
||||
new_search(); // This must be called at the beginning of each root search to track entry aging
|
||||
uint8_t generation() const; // The current age, used when writing new data to the TT
|
||||
std::tuple<bool, TTData, TTWriter>
|
||||
probe(const Key key) const; // The main method, whose retvals separate local vs global objects
|
||||
TTEntry* first_entry(const Key key)
|
||||
const; // This is the hash function; its only external use is memory prefetching.
|
||||
TTEntry* probe(const Key key, bool& found) const;
|
||||
int hashfull() const;
|
||||
void resize(size_t mbSize, int threadCount);
|
||||
void clear(size_t threadCount);
|
||||
|
||||
TTEntry* first_entry(const Key key) const {
|
||||
return &table[mul_hi64(key, clusterCount)].entry[0];
|
||||
}
|
||||
|
||||
uint8_t generation() const { return generation8; }
|
||||
|
||||
private:
|
||||
friend struct TTEntry;
|
||||
|
||||
size_t clusterCount;
|
||||
Cluster* table = nullptr;
|
||||
|
||||
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
|
||||
Cluster* table = nullptr;
|
||||
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
|
||||
};
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
26
src/tune.cpp
26
src/tune.cpp
@@ -21,7 +21,6 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
@@ -34,19 +33,19 @@ namespace Stockfish {
|
||||
bool Tune::update_on_last;
|
||||
const Option* LastOption = nullptr;
|
||||
OptionsMap* Tune::options;
|
||||
|
||||
|
||||
namespace {
|
||||
std::map<std::string, int> TuneResults;
|
||||
|
||||
std::optional<std::string> on_tune(const Option& o) {
|
||||
void on_tune(const Option& o) {
|
||||
|
||||
if (!Tune::update_on_last || LastOption == &o)
|
||||
Tune::read_options();
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) {
|
||||
|
||||
void make_option(OptionsMap* options, const string& n, int v, const SetRange& r) {
|
||||
|
||||
// Do not generate option when there is nothing to tune (ie. min = max)
|
||||
if (r(v).first == r(v).second)
|
||||
@@ -55,17 +54,15 @@ void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange&
|
||||
if (TuneResults.count(n))
|
||||
v = TuneResults[n];
|
||||
|
||||
(*opts)[n] << Option(v, r(v).first, r(v).second, on_tune);
|
||||
LastOption = &((*opts)[n]);
|
||||
(*options)[n] << Option(v, r(v).first, r(v).second, on_tune);
|
||||
LastOption = &((*options)[n]);
|
||||
|
||||
// Print formatted parameters, ready to be copy-pasted in Fishtest
|
||||
std::cout << n << "," //
|
||||
<< v << "," //
|
||||
<< r(v).first << "," //
|
||||
<< r(v).second << "," //
|
||||
<< (r(v).second - r(v).first) / 20.0 << "," //
|
||||
std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << ","
|
||||
<< (r(v).second - r(v).first) / 20.0 << ","
|
||||
<< "0.0020" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
string Tune::next(string& names, bool pop) {
|
||||
|
||||
@@ -121,6 +118,7 @@ void Tune::Entry<Tune::PostUpdate>::read_option() {
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
void Tune::read_results() { /* ...insert your values here... */ }
|
||||
void Tune::read_results() { /* ...insert your values here... */
|
||||
}
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
@@ -145,8 +145,6 @@ class Tune {
|
||||
return add(value, (next(names), std::move(names)), args...);
|
||||
}
|
||||
|
||||
static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r);
|
||||
|
||||
std::vector<std::unique_ptr<EntryBase>> list;
|
||||
|
||||
public:
|
||||
@@ -160,7 +158,7 @@ class Tune {
|
||||
for (auto& e : instance().list)
|
||||
e->init_option();
|
||||
read_options();
|
||||
} // Deferred, due to UCIEngine::Options access
|
||||
} // Deferred, due to UCI::Options access
|
||||
static void read_options() {
|
||||
for (auto& e : instance().list)
|
||||
e->read_option();
|
||||
|
||||
34
src/types.h
34
src/types.h
@@ -137,9 +137,9 @@ enum Bound {
|
||||
BOUND_EXACT = BOUND_UPPER | BOUND_LOWER
|
||||
};
|
||||
|
||||
// Value is used as an alias for int, this is done to differentiate between a search
|
||||
// value and any other integer value. The values used in search are always supposed
|
||||
// to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
|
||||
// Value is used as an alias for int16_t, this is done to differentiate between
|
||||
// a search value and any other integer value. The values used in search are always
|
||||
// supposed to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range.
|
||||
using Value = int;
|
||||
|
||||
constexpr Value VALUE_ZERO = 0;
|
||||
@@ -187,21 +187,12 @@ constexpr Value PieceValue[PIECE_NB] = {
|
||||
using Depth = int;
|
||||
|
||||
enum : int {
|
||||
// The following DEPTH_ constants are used for transposition table entries
|
||||
// and quiescence search move generation stages. In regular search, the
|
||||
// depth stored in the transposition table is literal: the search depth
|
||||
// (effort) used to make the corresponding transposition table value. In
|
||||
// quiescence search, however, the transposition table entries only store
|
||||
// the current quiescence move generation stage (which should thus compare
|
||||
// lower than any regular search depth).
|
||||
DEPTH_QS = 0,
|
||||
// For transposition table entries where no searching at all was done
|
||||
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
|
||||
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
|
||||
// is used only for the transposition table entry occupancy check (see tt.cpp),
|
||||
// and should thus be lower than DEPTH_UNSEARCHED.
|
||||
DEPTH_UNSEARCHED = -2,
|
||||
DEPTH_ENTRY_OFFSET = -3
|
||||
DEPTH_QS_CHECKS = 0,
|
||||
DEPTH_QS_NO_CHECKS = -1,
|
||||
|
||||
DEPTH_NONE = -6,
|
||||
|
||||
DEPTH_OFFSET = -7 // value used only for TT entry occupancy check
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
@@ -360,10 +351,9 @@ enum MoveType {
|
||||
// bit 14-15: special move flag: promotion (1), en passant (2), castling (3)
|
||||
// NOTE: en passant bit is set only when a pawn can be captured
|
||||
//
|
||||
// Special cases are Move::none() and Move::null(). We can sneak these in because
|
||||
// in any normal move the destination square and origin square are always different,
|
||||
// but Move::none() and Move::null() have the same origin and destination square.
|
||||
|
||||
// Special cases are Move::none() and Move::null(). We can sneak these in because in
|
||||
// any normal move destination square is always different from origin square
|
||||
// while Move::none() and Move::null() have the same origin and destination square.
|
||||
class Move {
|
||||
public:
|
||||
Move() = default;
|
||||
|
||||
368
src/uci.cpp
368
src/uci.cpp
@@ -19,65 +19,93 @@
|
||||
#include "uci.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark.h"
|
||||
#include "engine.h"
|
||||
#include "cluster.h"
|
||||
#include "evaluate.h"
|
||||
#include "movegen.h"
|
||||
#include "nnue/network.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "perft.h"
|
||||
#include "position.h"
|
||||
#include "score.h"
|
||||
#include "search.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
#include "types.h"
|
||||
#include "ucioption.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
|
||||
template<typename... Ts>
|
||||
struct overload: Ts... {
|
||||
using Ts::operator()...;
|
||||
};
|
||||
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
|
||||
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
||||
|
||||
template<typename... Ts>
|
||||
overload(Ts...) -> overload<Ts...>;
|
||||
|
||||
void UCIEngine::print_info_string(const std::string& str) {
|
||||
sync_cout_start();
|
||||
for (auto& line : split(str, "\n"))
|
||||
{
|
||||
if (!is_whitespace(line))
|
||||
{
|
||||
std::cout << "info string " << line << '\n';
|
||||
}
|
||||
}
|
||||
sync_cout_end();
|
||||
}
|
||||
namespace NN = Eval::NNUE;
|
||||
|
||||
UCIEngine::UCIEngine(int argc, char** argv) :
|
||||
engine(argv[0]),
|
||||
|
||||
UCI::UCI(int argc, char** argv) :
|
||||
networks(NN::Networks(
|
||||
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
|
||||
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))),
|
||||
cli(argc, argv) {
|
||||
|
||||
engine.get_options().add_info_listener([](const std::optional<std::string>& str) {
|
||||
if (str.has_value())
|
||||
print_info_string(*str);
|
||||
options["Debug Log File"] << Option("", [](const Option& o) { start_logger(o); });
|
||||
|
||||
options["Threads"] << Option(1, 1, 1024, [this](const Option&) {
|
||||
threads.set({options, threads, tt, networks});
|
||||
});
|
||||
|
||||
engine.set_on_iter([](const auto& i) { on_iter(i); });
|
||||
engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); });
|
||||
engine.set_on_update_full(
|
||||
[this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); });
|
||||
engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); });
|
||||
options["Hash"] << Option(16, 1, MaxHashMB, [this](const Option& o) {
|
||||
threads.main_thread()->wait_for_search_finished();
|
||||
tt.resize(o, options["Threads"]);
|
||||
});
|
||||
|
||||
options["Clear Hash"] << Option([this](const Option&) { search_clear(); });
|
||||
options["Ponder"] << Option(false);
|
||||
options["MultiPV"] << Option(1, 1, MAX_MOVES);
|
||||
options["Skill Level"] << Option(20, 0, 20);
|
||||
options["Move Overhead"] << Option(10, 0, 5000);
|
||||
options["nodestime"] << Option(0, 0, 10000);
|
||||
options["UCI_Chess960"] << Option(false);
|
||||
options["UCI_LimitStrength"] << Option(false);
|
||||
options["UCI_Elo"] << Option(1320, 1320, 3190);
|
||||
options["UCI_ShowWDL"] << Option(false);
|
||||
options["SyzygyPath"] << Option("<empty>", [](const Option& o) { Tablebases::init(o); });
|
||||
options["SyzygyProbeDepth"] << Option(1, 1, 100);
|
||||
options["Syzygy50MoveRule"] << Option(true);
|
||||
options["SyzygyProbeLimit"] << Option(7, 0, 7);
|
||||
options["EvalFile"] << Option(EvalFileDefaultNameBig, [this](const Option& o) {
|
||||
networks.big.load(cli.binaryDirectory, o);
|
||||
});
|
||||
options["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, [this](const Option& o) {
|
||||
networks.small.load(cli.binaryDirectory, o);
|
||||
});
|
||||
|
||||
networks.big.load(cli.binaryDirectory, options["EvalFile"]);
|
||||
networks.small.load(cli.binaryDirectory, options["EvalFileSmall"]);
|
||||
|
||||
threads.set({options, threads, tt, networks});
|
||||
|
||||
search_clear(); // After threads are up
|
||||
}
|
||||
|
||||
void UCIEngine::loop() {
|
||||
std::string token, cmd;
|
||||
void UCI::loop() {
|
||||
|
||||
Position pos;
|
||||
std::string token, cmd;
|
||||
StateListPtr states(new std::deque<StateInfo>(1));
|
||||
|
||||
pos.set(StartFEN, false, &states->back());
|
||||
|
||||
for (int i = 1; i < cli.argc; ++i)
|
||||
cmd += std::string(cli.argv[i]) + " ";
|
||||
@@ -85,7 +113,8 @@ void UCIEngine::loop() {
|
||||
do
|
||||
{
|
||||
if (cli.argc == 1
|
||||
&& !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication
|
||||
&& !Cluster::getline(std::cin,
|
||||
cmd)) // Wait for an input or an end-of-file (EOF) indication
|
||||
cmd = "quit";
|
||||
|
||||
std::istringstream is(cmd);
|
||||
@@ -94,52 +123,43 @@ void UCIEngine::loop() {
|
||||
is >> std::skipws >> token;
|
||||
|
||||
if (token == "quit" || token == "stop")
|
||||
engine.stop();
|
||||
threads.stop = true;
|
||||
|
||||
// The GUI sends 'ponderhit' to tell that the user has played the expected move.
|
||||
// So, 'ponderhit' is sent if pondering was done on the same move that the user
|
||||
// has played. The search should continue, but should also switch from pondering
|
||||
// to the normal search.
|
||||
else if (token == "ponderhit")
|
||||
engine.set_ponderhit(false);
|
||||
threads.main_manager()->ponder = false; // Switch to the normal search
|
||||
|
||||
else if (token == "uci")
|
||||
{
|
||||
else if (token == "uci" && Cluster::is_root())
|
||||
sync_cout << "id name " << engine_info(true) << "\n"
|
||||
<< engine.get_options() << sync_endl;
|
||||
|
||||
sync_cout << "uciok" << sync_endl;
|
||||
}
|
||||
<< options << "\nuciok" << sync_endl;
|
||||
|
||||
else if (token == "setoption")
|
||||
setoption(is);
|
||||
else if (token == "go")
|
||||
{
|
||||
// send info strings after the go command is sent for old GUIs and python-chess
|
||||
print_info_string(engine.numa_config_information_as_string());
|
||||
print_info_string(engine.thread_binding_information_as_string());
|
||||
go(is);
|
||||
}
|
||||
go(pos, is, states);
|
||||
else if (token == "position")
|
||||
position(is);
|
||||
position(pos, is, states);
|
||||
else if (token == "ucinewgame")
|
||||
engine.search_clear();
|
||||
else if (token == "isready")
|
||||
search_clear();
|
||||
else if (token == "isready" && Cluster::is_root())
|
||||
sync_cout << "readyok" << sync_endl;
|
||||
|
||||
// Add custom non-UCI commands, mainly for debugging purposes.
|
||||
// These commands must not be used during a search!
|
||||
else if (token == "flip")
|
||||
engine.flip();
|
||||
pos.flip();
|
||||
else if (token == "bench")
|
||||
bench(is);
|
||||
else if (token == "d")
|
||||
sync_cout << engine.visualize() << sync_endl;
|
||||
else if (token == "eval")
|
||||
engine.trace_eval();
|
||||
else if (token == "compiler")
|
||||
bench(pos, is, states);
|
||||
else if (token == "d" && Cluster::is_root())
|
||||
sync_cout << pos << sync_endl;
|
||||
else if (token == "eval" && Cluster::is_root())
|
||||
trace_eval(pos);
|
||||
else if (token == "compiler" && Cluster::is_root())
|
||||
sync_cout << compiler_info() << sync_endl;
|
||||
else if (token == "export_net")
|
||||
else if (token == "export_net" && Cluster::is_root())
|
||||
{
|
||||
std::pair<std::optional<std::string>, std::string> files[2];
|
||||
|
||||
@@ -149,9 +169,12 @@ void UCIEngine::loop() {
|
||||
if (is >> std::skipws >> files[1].second)
|
||||
files[1].first = files[1].second;
|
||||
|
||||
engine.save_network(files);
|
||||
networks.big.save(files[0].first);
|
||||
networks.small.save(files[1].first);
|
||||
}
|
||||
else if (token == "--help" || token == "help" || token == "--license" || token == "license")
|
||||
else if ((token == "--help" || token == "help" || token == "--license"
|
||||
|| token == "license")
|
||||
&& Cluster::is_root())
|
||||
sync_cout
|
||||
<< "\nStockfish is a powerful chess engine for playing and analyzing."
|
||||
"\nIt is released as free software licensed under the GNU GPLv3 License."
|
||||
@@ -160,14 +183,14 @@ void UCIEngine::loop() {
|
||||
"\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme"
|
||||
"\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n"
|
||||
<< sync_endl;
|
||||
else if (!token.empty() && token[0] != '#')
|
||||
else if (!token.empty() && token[0] != '#' && Cluster::is_root())
|
||||
sync_cout << "Unknown command: '" << cmd << "'. Type help for more information."
|
||||
<< sync_endl;
|
||||
|
||||
} while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot
|
||||
}
|
||||
|
||||
Search::LimitsType UCIEngine::parse_limits(std::istream& is) {
|
||||
Search::LimitsType UCI::parse_limits(const Position& pos, std::istream& is) {
|
||||
Search::LimitsType limits;
|
||||
std::string token;
|
||||
|
||||
@@ -176,7 +199,7 @@ Search::LimitsType UCIEngine::parse_limits(std::istream& is) {
|
||||
while (is >> token)
|
||||
if (token == "searchmoves") // Needs to be the last command on the line
|
||||
while (is >> token)
|
||||
limits.searchmoves.push_back(to_lower(token));
|
||||
limits.searchmoves.push_back(to_move(pos, token));
|
||||
|
||||
else if (token == "wtime")
|
||||
is >> limits.time[WHITE];
|
||||
@@ -206,28 +229,27 @@ Search::LimitsType UCIEngine::parse_limits(std::istream& is) {
|
||||
return limits;
|
||||
}
|
||||
|
||||
void UCIEngine::go(std::istringstream& is) {
|
||||
void UCI::go(Position& pos, std::istringstream& is, StateListPtr& states) {
|
||||
|
||||
Search::LimitsType limits = parse_limits(is);
|
||||
Search::LimitsType limits = parse_limits(pos, is);
|
||||
|
||||
networks.big.verify(options["EvalFile"]);
|
||||
networks.small.verify(options["EvalFileSmall"]);
|
||||
|
||||
if (limits.perft)
|
||||
perft(limits);
|
||||
else
|
||||
engine.go(limits);
|
||||
{
|
||||
perft(pos.fen(), limits.perft, options["UCI_Chess960"]);
|
||||
return;
|
||||
}
|
||||
|
||||
threads.start_thinking(options, pos, states, limits);
|
||||
}
|
||||
|
||||
void UCIEngine::bench(std::istream& args) {
|
||||
void UCI::bench(Position& pos, std::istream& args, StateListPtr& states) {
|
||||
std::string token;
|
||||
uint64_t num, nodes = 0, cnt = 1;
|
||||
uint64_t nodesSearched = 0;
|
||||
const auto& options = engine.get_options();
|
||||
|
||||
engine.set_on_update_full([&](const auto& i) {
|
||||
nodesSearched = i.nodes;
|
||||
on_update_full(i, options["UCI_ShowWDL"]);
|
||||
});
|
||||
|
||||
std::vector<std::string> list = Benchmark::setup_bench(engine.fen(), args);
|
||||
std::vector<std::string> list = setup_bench(pos, args);
|
||||
|
||||
num = count_if(list.begin(), list.end(),
|
||||
[](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; });
|
||||
@@ -241,33 +263,25 @@ void UCIEngine::bench(std::istream& args) {
|
||||
|
||||
if (token == "go" || token == "eval")
|
||||
{
|
||||
std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << engine.fen() << ")"
|
||||
<< std::endl;
|
||||
if (Cluster::is_root())
|
||||
std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")"
|
||||
<< std::endl;
|
||||
if (token == "go")
|
||||
{
|
||||
Search::LimitsType limits = parse_limits(is);
|
||||
|
||||
if (limits.perft)
|
||||
nodesSearched = perft(limits);
|
||||
else
|
||||
{
|
||||
engine.go(limits);
|
||||
engine.wait_for_search_finished();
|
||||
}
|
||||
|
||||
nodes += nodesSearched;
|
||||
nodesSearched = 0;
|
||||
go(pos, is, states);
|
||||
threads.main_thread()->wait_for_search_finished();
|
||||
nodes += Cluster::nodes_searched(threads);
|
||||
}
|
||||
else
|
||||
engine.trace_eval();
|
||||
else if (Cluster::is_root())
|
||||
trace_eval(pos);
|
||||
}
|
||||
else if (token == "setoption")
|
||||
setoption(is);
|
||||
else if (token == "position")
|
||||
position(is);
|
||||
position(pos, is, states);
|
||||
else if (token == "ucinewgame")
|
||||
{
|
||||
engine.search_clear(); // search_clear may take a while
|
||||
search_clear(); // Search::clear() may take a while
|
||||
elapsed = now();
|
||||
}
|
||||
}
|
||||
@@ -276,28 +290,39 @@ void UCIEngine::bench(std::istream& args) {
|
||||
|
||||
dbg_print();
|
||||
|
||||
std::cerr << "\n===========================" //
|
||||
<< "\nTotal time (ms) : " << elapsed //
|
||||
<< "\nNodes searched : " << nodes //
|
||||
<< "\nNodes/second : " << 1000 * nodes / elapsed << std::endl;
|
||||
|
||||
// reset callback, to not capture a dangling reference to nodesSearched
|
||||
engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); });
|
||||
if (Cluster::is_root())
|
||||
std::cerr << "\n==========================="
|
||||
<< "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes
|
||||
<< "\nNodes/second : " << 1000 * nodes / elapsed << std::endl;
|
||||
}
|
||||
|
||||
void UCI::trace_eval(Position& pos) {
|
||||
StateListPtr states(new std::deque<StateInfo>(1));
|
||||
Position p;
|
||||
p.set(pos.fen(), options["UCI_Chess960"], &states->back());
|
||||
|
||||
void UCIEngine::setoption(std::istringstream& is) {
|
||||
engine.wait_for_search_finished();
|
||||
engine.get_options().setoption(is);
|
||||
networks.big.verify(options["EvalFile"]);
|
||||
networks.small.verify(options["EvalFileSmall"]);
|
||||
|
||||
|
||||
sync_cout << "\n" << Eval::trace(p, networks) << sync_endl;
|
||||
}
|
||||
|
||||
std::uint64_t UCIEngine::perft(const Search::LimitsType& limits) {
|
||||
auto nodes = engine.perft(engine.fen(), limits.perft, engine.get_options()["UCI_Chess960"]);
|
||||
sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl;
|
||||
return nodes;
|
||||
void UCI::search_clear() {
|
||||
threads.main_thread()->wait_for_search_finished();
|
||||
|
||||
tt.clear(options["Threads"]);
|
||||
threads.clear();
|
||||
Tablebases::init(options["SyzygyPath"]); // Free mapped files
|
||||
}
|
||||
|
||||
void UCIEngine::position(std::istringstream& is) {
|
||||
void UCI::setoption(std::istringstream& is) {
|
||||
threads.main_thread()->wait_for_search_finished();
|
||||
options.setoption(is);
|
||||
}
|
||||
|
||||
void UCI::position(Position& pos, std::istringstream& is, StateListPtr& states) {
|
||||
Move m;
|
||||
std::string token, fen;
|
||||
|
||||
is >> token;
|
||||
@@ -313,14 +338,15 @@ void UCIEngine::position(std::istringstream& is) {
|
||||
else
|
||||
return;
|
||||
|
||||
std::vector<std::string> moves;
|
||||
states = StateListPtr(new std::deque<StateInfo>(1)); // Drop the old state and create a new one
|
||||
pos.set(fen, options["UCI_Chess960"], &states->back());
|
||||
|
||||
while (is >> token)
|
||||
// Parse the move list, if any
|
||||
while (is >> token && (m = to_move(pos, token)) != Move::none())
|
||||
{
|
||||
moves.push_back(token);
|
||||
states->emplace_back();
|
||||
pos.do_move(m, states->back());
|
||||
}
|
||||
|
||||
engine.set_position(fen, moves);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -335,12 +361,12 @@ WinRateParams win_rate_params(const Position& pos) {
|
||||
int material = pos.count<PAWN>() + 3 * pos.count<KNIGHT>() + 3 * pos.count<BISHOP>()
|
||||
+ 5 * pos.count<ROOK>() + 9 * pos.count<QUEEN>();
|
||||
|
||||
// The fitted model only uses data for material counts in [17, 78], and is anchored at count 58.
|
||||
double m = std::clamp(material, 17, 78) / 58.0;
|
||||
// The fitted model only uses data for material counts in [10, 78], and is anchored at count 58.
|
||||
double m = std::clamp(material, 10, 78) / 58.0;
|
||||
|
||||
// Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model
|
||||
constexpr double as[] = {-37.45051876, 121.19101539, -132.78783573, 420.70576692};
|
||||
constexpr double bs[] = {90.26261072, -137.26549898, 71.10130540, 51.35259597};
|
||||
constexpr double as[] = {-185.71965483, 504.85014385, -438.58295743, 474.04604627};
|
||||
constexpr double bs[] = {89.23542728, -137.02141296, 73.28669021, 47.53376190};
|
||||
|
||||
double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
|
||||
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
|
||||
@@ -359,30 +385,30 @@ int win_rate_model(Value v, const Position& pos) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string UCIEngine::format_score(const Score& s) {
|
||||
constexpr int TB_CP = 20000;
|
||||
const auto format =
|
||||
overload{[](Score::Mate mate) -> std::string {
|
||||
auto m = (mate.plies > 0 ? (mate.plies + 1) : mate.plies) / 2;
|
||||
return std::string("mate ") + std::to_string(m);
|
||||
},
|
||||
[](Score::Tablebase tb) -> std::string {
|
||||
return std::string("cp ")
|
||||
+ std::to_string((tb.win ? TB_CP - tb.plies : -TB_CP - tb.plies));
|
||||
},
|
||||
[](Score::InternalUnits units) -> std::string {
|
||||
return std::string("cp ") + std::to_string(units.value);
|
||||
}};
|
||||
std::string UCI::to_score(Value v, const Position& pos) {
|
||||
assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);
|
||||
|
||||
return s.visit(format);
|
||||
std::stringstream ss;
|
||||
|
||||
if (std::abs(v) < VALUE_TB_WIN_IN_MAX_PLY)
|
||||
ss << "cp " << to_cp(v, pos);
|
||||
else if (std::abs(v) <= VALUE_TB)
|
||||
{
|
||||
const int ply = VALUE_TB - std::abs(v); // recompute ss->ply
|
||||
ss << "cp " << (v > 0 ? 20000 - ply : -20000 + ply);
|
||||
}
|
||||
else
|
||||
ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
// Turns a Value to an integer centipawn number,
|
||||
// without treatment of mate and similar special scores.
|
||||
int UCIEngine::to_cp(Value v, const Position& pos) {
|
||||
int UCI::to_cp(Value v, const Position& pos) {
|
||||
|
||||
// In general, the score can be defined via the WDL as
|
||||
// (log(1/L - 1) - log(1/W - 1)) / (log(1/L - 1) + log(1/W - 1)).
|
||||
// In general, the score can be defined via the the WDL as
|
||||
// (log(1/L - 1) - log(1/W - 1)) / ((log(1/L - 1) + log(1/W - 1))
|
||||
// Based on our win_rate_model, this simply yields v / a.
|
||||
|
||||
auto [a, b] = win_rate_params(pos);
|
||||
@@ -390,22 +416,22 @@ int UCIEngine::to_cp(Value v, const Position& pos) {
|
||||
return std::round(100 * int(v) / a);
|
||||
}
|
||||
|
||||
std::string UCIEngine::wdl(Value v, const Position& pos) {
|
||||
std::string UCI::wdl(Value v, const Position& pos) {
|
||||
std::stringstream ss;
|
||||
|
||||
int wdl_w = win_rate_model(v, pos);
|
||||
int wdl_l = win_rate_model(-v, pos);
|
||||
int wdl_d = 1000 - wdl_w - wdl_l;
|
||||
ss << wdl_w << " " << wdl_d << " " << wdl_l;
|
||||
ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string UCIEngine::square(Square s) {
|
||||
std::string UCI::square(Square s) {
|
||||
return std::string{char('a' + file_of(s)), char('1' + rank_of(s))};
|
||||
}
|
||||
|
||||
std::string UCIEngine::move(Move m, bool chess960) {
|
||||
std::string UCI::move(Move m, bool chess960) {
|
||||
if (m == Move::none())
|
||||
return "(none)";
|
||||
|
||||
@@ -427,14 +453,9 @@ std::string UCIEngine::move(Move m, bool chess960) {
|
||||
}
|
||||
|
||||
|
||||
std::string UCIEngine::to_lower(std::string str) {
|
||||
std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return std::tolower(c); });
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
Move UCIEngine::to_move(const Position& pos, std::string str) {
|
||||
str = to_lower(str);
|
||||
Move UCI::to_move(const Position& pos, std::string& str) {
|
||||
if (str.length() == 5)
|
||||
str[4] = char(tolower(str[4])); // The promotion piece character must be lowercased
|
||||
|
||||
for (const auto& m : MoveList<LEGAL>(pos))
|
||||
if (str == move(m, pos.is_chess960()))
|
||||
@@ -443,51 +464,4 @@ Move UCIEngine::to_move(const Position& pos, std::string str) {
|
||||
return Move::none();
|
||||
}
|
||||
|
||||
void UCIEngine::on_update_no_moves(const Engine::InfoShort& info) {
|
||||
sync_cout << "info depth " << info.depth << " score " << format_score(info.score) << sync_endl;
|
||||
}
|
||||
|
||||
void UCIEngine::on_update_full(const Engine::InfoFull& info, bool showWDL) {
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "info";
|
||||
ss << " depth " << info.depth //
|
||||
<< " seldepth " << info.selDepth //
|
||||
<< " multipv " << info.multiPV //
|
||||
<< " score " << format_score(info.score); //
|
||||
|
||||
if (showWDL)
|
||||
ss << " wdl " << info.wdl;
|
||||
|
||||
if (!info.bound.empty())
|
||||
ss << " " << info.bound;
|
||||
|
||||
ss << " nodes " << info.nodes //
|
||||
<< " nps " << info.nps //
|
||||
<< " hashfull " << info.hashfull //
|
||||
<< " tbhits " << info.tbHits //
|
||||
<< " time " << info.timeMs //
|
||||
<< " pv " << info.pv; //
|
||||
|
||||
sync_cout << ss.str() << sync_endl;
|
||||
}
|
||||
|
||||
void UCIEngine::on_iter(const Engine::InfoIter& info) {
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "info";
|
||||
ss << " depth " << info.depth //
|
||||
<< " currmove " << info.currmove //
|
||||
<< " currmovenumber " << info.currmovenumber; //
|
||||
|
||||
sync_cout << ss.str() << sync_endl;
|
||||
}
|
||||
|
||||
void UCIEngine::on_bestmove(std::string_view bestmove, std::string_view ponder) {
|
||||
sync_cout << "bestmove " << bestmove;
|
||||
if (!ponder.empty())
|
||||
std::cout << " ponder " << ponder;
|
||||
std::cout << sync_endl;
|
||||
}
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
49
src/uci.h
49
src/uci.h
@@ -19,57 +19,54 @@
|
||||
#ifndef UCI_H_INCLUDED
|
||||
#define UCI_H_INCLUDED
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "engine.h"
|
||||
#include "misc.h"
|
||||
#include "nnue/network.h"
|
||||
#include "position.h"
|
||||
#include "search.h"
|
||||
#include "thread.h"
|
||||
#include "tt.h"
|
||||
#include "ucioption.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
class Position;
|
||||
class Move;
|
||||
class Score;
|
||||
enum Square : int;
|
||||
using Value = int;
|
||||
|
||||
class UCIEngine {
|
||||
class UCI {
|
||||
public:
|
||||
UCIEngine(int argc, char** argv);
|
||||
UCI(int argc, char** argv);
|
||||
|
||||
void loop();
|
||||
|
||||
static int to_cp(Value v, const Position& pos);
|
||||
static std::string format_score(const Score& s);
|
||||
static std::string to_score(Value v, const Position& pos);
|
||||
static std::string square(Square s);
|
||||
static std::string move(Move m, bool chess960);
|
||||
static std::string wdl(Value v, const Position& pos);
|
||||
static std::string to_lower(std::string str);
|
||||
static Move to_move(const Position& pos, std::string str);
|
||||
static Move to_move(const Position& pos, std::string& str);
|
||||
|
||||
static Search::LimitsType parse_limits(std::istream& is);
|
||||
static Search::LimitsType parse_limits(const Position& pos, std::istream& is);
|
||||
|
||||
auto& engine_options() { return engine.get_options(); }
|
||||
const std::string& working_directory() const { return cli.workingDirectory; }
|
||||
|
||||
OptionsMap options;
|
||||
Eval::NNUE::Networks networks;
|
||||
|
||||
private:
|
||||
Engine engine;
|
||||
CommandLine cli;
|
||||
TranspositionTable tt;
|
||||
ThreadPool threads;
|
||||
CommandLine cli;
|
||||
|
||||
static void print_info_string(const std::string& str);
|
||||
|
||||
void go(std::istringstream& is);
|
||||
void bench(std::istream& args);
|
||||
void position(std::istringstream& is);
|
||||
void setoption(std::istringstream& is);
|
||||
std::uint64_t perft(const Search::LimitsType&);
|
||||
|
||||
static void on_update_no_moves(const Engine::InfoShort& info);
|
||||
static void on_update_full(const Engine::InfoFull& info, bool showWDL);
|
||||
static void on_iter(const Engine::InfoIter& info);
|
||||
static void on_bestmove(std::string_view bestmove, std::string_view ponder);
|
||||
void go(Position& pos, std::istringstream& is, StateListPtr& states);
|
||||
void bench(Position& pos, std::istream& args, StateListPtr& states);
|
||||
void position(Position& pos, std::istringstream& is, StateListPtr& states);
|
||||
void trace_eval(Position& pos);
|
||||
void search_clear();
|
||||
void setoption(std::istringstream& is);
|
||||
};
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#include "cluster.h"
|
||||
#include "misc.h"
|
||||
|
||||
namespace Stockfish {
|
||||
@@ -36,8 +37,6 @@ bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s
|
||||
[](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
|
||||
}
|
||||
|
||||
void OptionsMap::add_info_listener(InfoListener&& message_func) { info = std::move(message_func); }
|
||||
|
||||
void OptionsMap::setoption(std::istringstream& is) {
|
||||
std::string token, name, value;
|
||||
|
||||
@@ -53,26 +52,19 @@ void OptionsMap::setoption(std::istringstream& is) {
|
||||
|
||||
if (options_map.count(name))
|
||||
options_map[name] = value;
|
||||
else
|
||||
else if (Cluster::is_root())
|
||||
sync_cout << "No such option: " << name << sync_endl;
|
||||
}
|
||||
|
||||
Option OptionsMap::operator[](const std::string& name) const {
|
||||
auto it = options_map.find(name);
|
||||
return it != options_map.end() ? it->second : Option(this);
|
||||
return it != options_map.end() ? it->second : Option();
|
||||
}
|
||||
|
||||
Option& OptionsMap::operator[](const std::string& name) {
|
||||
if (!options_map.count(name))
|
||||
options_map[name] = Option(this);
|
||||
return options_map[name];
|
||||
}
|
||||
Option& OptionsMap::operator[](const std::string& name) { return options_map[name]; }
|
||||
|
||||
std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); }
|
||||
|
||||
Option::Option(const OptionsMap* map) :
|
||||
parent(map) {}
|
||||
|
||||
Option::Option(const char* v, OnChange f) :
|
||||
type("string"),
|
||||
min(0),
|
||||
@@ -127,8 +119,6 @@ bool Option::operator==(const char* s) const {
|
||||
return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue);
|
||||
}
|
||||
|
||||
bool Option::operator!=(const char* s) const { return !(*this == s); }
|
||||
|
||||
|
||||
// Inits options and assigns idx in the correct printing order
|
||||
|
||||
@@ -136,13 +126,11 @@ void Option::operator<<(const Option& o) {
|
||||
|
||||
static size_t insert_order = 0;
|
||||
|
||||
auto p = this->parent;
|
||||
*this = o;
|
||||
|
||||
this->parent = p;
|
||||
idx = insert_order++;
|
||||
*this = o;
|
||||
idx = insert_order++;
|
||||
}
|
||||
|
||||
|
||||
// Updates currentValue and triggers on_change() action. It's up to
|
||||
// the GUI to check for option's limits, but we could receive the new value
|
||||
// from the user by console window, so let's check the bounds anyway.
|
||||
@@ -166,18 +154,11 @@ Option& Option::operator=(const std::string& v) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
if (type == "string")
|
||||
currentValue = v == "<empty>" ? "" : v;
|
||||
else if (type != "button")
|
||||
if (type != "button")
|
||||
currentValue = v;
|
||||
|
||||
if (on_change)
|
||||
{
|
||||
const auto ret = on_change(*this);
|
||||
|
||||
if (ret && parent != nullptr && parent->info != nullptr)
|
||||
parent->info(ret);
|
||||
}
|
||||
on_change(*this);
|
||||
|
||||
return *this;
|
||||
}
|
||||
@@ -190,16 +171,10 @@ std::ostream& operator<<(std::ostream& os, const OptionsMap& om) {
|
||||
const Option& o = it.second;
|
||||
os << "\noption name " << it.first << " type " << o.type;
|
||||
|
||||
if (o.type == "check" || o.type == "combo")
|
||||
if (o.type == "string" || o.type == "check" || o.type == "combo")
|
||||
os << " default " << o.defaultValue;
|
||||
|
||||
else if (o.type == "string")
|
||||
{
|
||||
std::string defaultValue = o.defaultValue.empty() ? "<empty>" : o.defaultValue;
|
||||
os << " default " << defaultValue;
|
||||
}
|
||||
|
||||
else if (o.type == "spin")
|
||||
if (o.type == "spin")
|
||||
os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max "
|
||||
<< o.max;
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <functional>
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
namespace Stockfish {
|
||||
@@ -32,72 +31,50 @@ struct CaseInsensitiveLess {
|
||||
bool operator()(const std::string&, const std::string&) const;
|
||||
};
|
||||
|
||||
class OptionsMap;
|
||||
|
||||
// The Option class implements each option as specified by the UCI protocol
|
||||
class Option {
|
||||
public:
|
||||
using OnChange = std::function<std::optional<std::string>(const Option&)>;
|
||||
|
||||
Option(const OptionsMap*);
|
||||
Option(OnChange = nullptr);
|
||||
Option(bool v, OnChange = nullptr);
|
||||
Option(const char* v, OnChange = nullptr);
|
||||
Option(double v, int minv, int maxv, OnChange = nullptr);
|
||||
Option(const char* v, const char* cur, OnChange = nullptr);
|
||||
|
||||
Option& operator=(const std::string&);
|
||||
operator int() const;
|
||||
operator std::string() const;
|
||||
bool operator==(const char*) const;
|
||||
bool operator!=(const char*) const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
|
||||
|
||||
private:
|
||||
friend class OptionsMap;
|
||||
friend class Engine;
|
||||
friend class Tune;
|
||||
|
||||
void operator<<(const Option&);
|
||||
|
||||
std::string defaultValue, currentValue, type;
|
||||
int min, max;
|
||||
size_t idx;
|
||||
OnChange on_change;
|
||||
const OptionsMap* parent = nullptr;
|
||||
};
|
||||
class Option;
|
||||
|
||||
class OptionsMap {
|
||||
public:
|
||||
using InfoListener = std::function<void(std::optional<std::string>)>;
|
||||
|
||||
OptionsMap() = default;
|
||||
OptionsMap(const OptionsMap&) = delete;
|
||||
OptionsMap(OptionsMap&&) = delete;
|
||||
OptionsMap& operator=(const OptionsMap&) = delete;
|
||||
OptionsMap& operator=(OptionsMap&&) = delete;
|
||||
|
||||
void add_info_listener(InfoListener&&);
|
||||
|
||||
void setoption(std::istringstream&);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
|
||||
|
||||
Option operator[](const std::string&) const;
|
||||
Option& operator[](const std::string&);
|
||||
|
||||
std::size_t count(const std::string&) const;
|
||||
|
||||
private:
|
||||
friend class Engine;
|
||||
friend class Option;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
|
||||
|
||||
// The options container is defined as a std::map
|
||||
using OptionsStore = std::map<std::string, Option, CaseInsensitiveLess>;
|
||||
|
||||
OptionsStore options_map;
|
||||
InfoListener info;
|
||||
};
|
||||
|
||||
// The Option class implements each option as specified by the UCI protocol
|
||||
class Option {
|
||||
public:
|
||||
using OnChange = std::function<void(const Option&)>;
|
||||
|
||||
Option(OnChange = nullptr);
|
||||
Option(bool v, OnChange = nullptr);
|
||||
Option(const char* v, OnChange = nullptr);
|
||||
Option(double v, int minv, int maxv, OnChange = nullptr);
|
||||
Option(const char* v, const char* cur, OnChange = nullptr);
|
||||
|
||||
Option& operator=(const std::string&);
|
||||
void operator<<(const Option&);
|
||||
operator int() const;
|
||||
operator std::string() const;
|
||||
bool operator==(const char*) const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const OptionsMap&);
|
||||
|
||||
private:
|
||||
std::string defaultValue, currentValue, type;
|
||||
int min, max;
|
||||
size_t idx;
|
||||
OnChange on_change;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -8,20 +8,27 @@ error()
|
||||
}
|
||||
trap 'error ${LINENO}' ERR
|
||||
|
||||
# Since Linux Kernel 6.5 we are getting false positives from the ci,
|
||||
# lower the ALSR entropy to disable ALSR, which works as a temporary workaround.
|
||||
# https://github.com/google/sanitizers/issues/1716
|
||||
# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762
|
||||
sudo sysctl -w vm.mmap_rnd_bits=28
|
||||
|
||||
|
||||
# define suitable post and prefixes for testing options
|
||||
case $1 in
|
||||
--valgrind)
|
||||
echo "valgrind testing started"
|
||||
prefix=''
|
||||
exeprefix='valgrind --error-exitcode=42 --errors-for-leak-kinds=all --leak-check=full'
|
||||
postfix=''
|
||||
postfix='1>/dev/null'
|
||||
threads="1"
|
||||
;;
|
||||
--valgrind-thread)
|
||||
echo "valgrind-thread testing started"
|
||||
prefix=''
|
||||
exeprefix='valgrind --fair-sched=try --error-exitcode=42'
|
||||
postfix=''
|
||||
postfix='1>/dev/null'
|
||||
threads="2"
|
||||
;;
|
||||
--sanitizer-undefined)
|
||||
@@ -39,8 +46,13 @@ case $1 in
|
||||
threads="2"
|
||||
|
||||
cat << EOF > tsan.supp
|
||||
race:Stockfish::TTEntry::read
|
||||
race:Stockfish::TTEntry::move
|
||||
race:Stockfish::TTEntry::depth
|
||||
race:Stockfish::TTEntry::bound
|
||||
race:Stockfish::TTEntry::save
|
||||
race:Stockfish::TTEntry::value
|
||||
race:Stockfish::TTEntry::eval
|
||||
race:Stockfish::TTEntry::is_pv
|
||||
|
||||
race:Stockfish::TranspositionTable::probe
|
||||
race:Stockfish::TranspositionTable::hashfull
|
||||
@@ -100,12 +112,7 @@ diff $network verify.nnue
|
||||
# more general testing, following an uci protocol exchange
|
||||
cat << EOF > game.exp
|
||||
set timeout 240
|
||||
# to correctly catch eof we need the following line
|
||||
# expect_before timeout { exit 2 } eof { exit 3 }
|
||||
expect_before timeout { exit 2 }
|
||||
|
||||
spawn $exeprefix ./stockfish
|
||||
expect "Stockfish"
|
||||
|
||||
send "uci\n"
|
||||
expect "uciok"
|
||||
@@ -118,106 +125,27 @@ cat << EOF > game.exp
|
||||
send "go nodes 1000\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position startpos moves e2e4 e7e6\n"
|
||||
send "go nodes 1000\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
|
||||
send "go depth 10\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
|
||||
send "flip\n"
|
||||
send "go depth 10\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 5\n"
|
||||
expect -re {info depth \d+ seldepth \d+ multipv \d+ score cp \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "setoption name UCI_ShowWDL value true\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 9\n"
|
||||
expect -re {info depth 1 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 2 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 3 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 4 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 5 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 6 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 7 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 8 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
expect -re {info depth 9 seldepth \d+ multipv \d+ score cp \d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv}
|
||||
send "flip\n"
|
||||
send "go depth 5\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "setoption name Skill Level value 10\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 5\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "setoption name Clear Hash\n"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6\n"
|
||||
send "go depth 18\n"
|
||||
expect "score mate 1"
|
||||
expect "pv d5e6"
|
||||
expect "bestmove d5e6"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -\n"
|
||||
send "go depth 18\n"
|
||||
expect "score mate -1"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 7K/P1p1p1p1/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1\n"
|
||||
send "go nodes 500000\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
|
||||
send "go depth 18 searchmoves c6d7\n"
|
||||
expect "score mate 2 * pv c6d7 * f7f5"
|
||||
expect "bestmove c6d7"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
|
||||
send "go mate 2 searchmoves c6d7\n"
|
||||
expect "score mate 2 * pv c6d7"
|
||||
expect "bestmove c6d7"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
|
||||
send "go nodes 500000 searchmoves c6d7\n"
|
||||
expect "score mate 2 * pv c6d7 * f7f5"
|
||||
expect "bestmove c6d7"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 1NR2B2/5p2/5p2/1p1kpp2/1P2rp2/2P1pB2/2P1P1K1/8 b - - \n"
|
||||
send "go depth 27\n"
|
||||
expect "score mate -2"
|
||||
expect "pv d5e6 c8d8"
|
||||
expect "bestmove d5e6"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q\n"
|
||||
send "go depth 18\n"
|
||||
expect "score mate 1 * pv f7f5"
|
||||
expect "bestmove f7f5"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -\n"
|
||||
send "go depth 18 searchmoves c6d7\n"
|
||||
expect "score mate 2 * pv c6d7 * f7f5"
|
||||
expect "bestmove c6d7"
|
||||
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7\n"
|
||||
send "go depth 18 searchmoves e3e2\n"
|
||||
expect "score mate -1 * pv e3e2 f7f5"
|
||||
expect "bestmove e3e2"
|
||||
|
||||
send "setoption name EvalFile value verify.nnue\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 5\n"
|
||||
@@ -226,13 +154,6 @@ cat << EOF > game.exp
|
||||
send "setoption name MultiPV value 4\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 5\n"
|
||||
expect "bestmove"
|
||||
|
||||
send "setoption name Skill Level value 10\n"
|
||||
send "position startpos\n"
|
||||
send "go depth 5\n"
|
||||
expect "bestmove"
|
||||
send "setoption name Skill Level value 20\n"
|
||||
|
||||
send "quit\n"
|
||||
expect eof
|
||||
@@ -250,30 +171,17 @@ fi
|
||||
|
||||
cat << EOF > syzygy.exp
|
||||
set timeout 240
|
||||
# to correctly catch eof we need the following line
|
||||
# expect_before timeout { exit 2 } eof { exit 3 }
|
||||
expect_before timeout { exit 2 }
|
||||
spawn $exeprefix ./stockfish
|
||||
expect "Stockfish"
|
||||
send "uci\n"
|
||||
send "setoption name SyzygyPath value ../tests/syzygy/\n"
|
||||
expect "info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)."
|
||||
expect "info string Found 35 tablebases" {} timeout {exit 1}
|
||||
send "bench 128 1 8 default depth\n"
|
||||
expect "Nodes searched :"
|
||||
send "ucinewgame\n"
|
||||
send "position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1\n"
|
||||
send "go depth 5\n"
|
||||
expect -re {score cp 20000|score mate}
|
||||
expect "bestmove"
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1\n"
|
||||
send "go depth 5\n"
|
||||
expect -re {score cp 20000|score mate}
|
||||
expect "bestmove"
|
||||
send "ucinewgame\n"
|
||||
send "position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1\n"
|
||||
send "go depth 5\n"
|
||||
expect -re {score cp -20000|score mate}
|
||||
expect "bestmove"
|
||||
send "quit\n"
|
||||
expect eof
|
||||
@@ -286,9 +194,6 @@ EOF
|
||||
for exp in game.exp syzygy.exp
|
||||
do
|
||||
|
||||
echo "======== $exp =============="
|
||||
cat $exp
|
||||
echo "============================"
|
||||
echo "$prefix expect $exp $postfix"
|
||||
eval "$prefix expect $exp $postfix"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user