mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-06 10:53:50 +08:00
Compare commits
124 Commits
stockfish-
...
ce73441f20
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce73441f20 | ||
|
|
e695b9537e | ||
|
|
ce7254b5ea | ||
|
|
ea85a54fef | ||
|
|
84e2f3851d | ||
|
|
3a0fff96cf | ||
|
|
318c948c4d | ||
|
|
a7a56c41f6 | ||
|
|
34b75f1575 | ||
|
|
15555e8f4a | ||
|
|
5337edfdb6 | ||
|
|
9ac756695e | ||
|
|
254b6d5e85 | ||
|
|
259bdaaa9f | ||
|
|
c9af7674bc | ||
|
|
3747a19937 | ||
|
|
70ff5e3163 | ||
|
|
ddefd6eb6b | ||
|
|
8da3c2155a | ||
|
|
5695486db9 | ||
|
|
9debc540e5 | ||
|
|
d0212906bd | ||
|
|
29b0c07ac8 | ||
|
|
d27298d7dc | ||
|
|
dc85c5a4c9 | ||
|
|
9fd40b9ea8 | ||
|
|
dfa176fc7e | ||
|
|
9b79b75c9b | ||
|
|
73c55e8949 | ||
|
|
e3adfaf8fc | ||
|
|
bebffc5622 | ||
|
|
00b1540e01 | ||
|
|
805a2c1672 | ||
|
|
eb27d9420f | ||
|
|
fe7b9b14d2 | ||
|
|
e6ec4705a8 | ||
|
|
b1b5893a8e | ||
|
|
f58d923fe0 | ||
|
|
472cc764be | ||
|
|
2662d6bf35 | ||
|
|
c13c1d2c30 | ||
|
|
4f021cab3b | ||
|
|
e03898b57c | ||
|
|
54fb42ddf8 | ||
|
|
347e328fdb | ||
|
|
56ea1fadf1 | ||
|
|
ccfa651968 | ||
|
|
0f102f3692 | ||
|
|
009632c465 | ||
|
|
39942db3ff | ||
|
|
6e9b5af0f0 | ||
|
|
4f76768fcf | ||
|
|
1b6975ac41 | ||
|
|
6f445631ab | ||
|
|
e4b0f37493 | ||
|
|
6b7e05f0c5 | ||
|
|
07f6edf934 | ||
|
|
c4e2479a75 | ||
|
|
b5f11085dd | ||
|
|
1f9af9966f | ||
|
|
40ef7b1212 | ||
|
|
05e39527a8 | ||
|
|
d4b405a5a6 | ||
|
|
63a2ab1510 | ||
|
|
e9925b122f | ||
|
|
7afd9e859d | ||
|
|
b73c8982df | ||
|
|
81cc004060 | ||
|
|
63c6f22627 | ||
|
|
ed6b8d179a | ||
|
|
0f905b4e88 | ||
|
|
94e6c0498f | ||
|
|
af3692b2d0 | ||
|
|
3e26d3acc7 | ||
|
|
267fd8a3d5 | ||
|
|
e5aa4b48c6 | ||
|
|
f98c178960 | ||
|
|
37cc2293ef | ||
|
|
b0a7a34d3f | ||
|
|
f0de8dc034 | ||
|
|
fda269a299 | ||
|
|
4e49f8dff9 | ||
|
|
27428a61c2 | ||
|
|
4b58079485 | ||
|
|
7e6a0c464b | ||
|
|
5f32b3ed4b | ||
|
|
f590767b91 | ||
|
|
8b85290313 | ||
|
|
0dcfe096d6 | ||
|
|
88a524c552 | ||
|
|
7988de4aa3 | ||
|
|
f6b0d53a99 | ||
|
|
449a8b017e | ||
|
|
4176ad7b0a | ||
|
|
16cd38dba1 | ||
|
|
b915ed702a | ||
|
|
f2507d0562 | ||
|
|
f273eea71f | ||
|
|
f9459e4c8e | ||
|
|
3d18ad719b | ||
|
|
698c069bba | ||
|
|
2b4926e091 | ||
|
|
d2d046c2a4 | ||
|
|
44efbaddea | ||
|
|
904a016396 | ||
|
|
5f8e67a544 | ||
|
|
8d2eef2b1e | ||
|
|
2af64d581b | ||
|
|
cf8b3637a0 | ||
|
|
bb3eaf8def | ||
|
|
1577fa0470 | ||
|
|
fb6a3e04ec | ||
|
|
15f34560f2 | ||
|
|
7beff18ef0 | ||
|
|
d7c04a9429 | ||
|
|
d942e13398 | ||
|
|
3d61f932cb | ||
|
|
ed89817f62 | ||
|
|
1a395f1b56 | ||
|
|
d2cb927a04 | ||
|
|
dfef7e7520 | ||
|
|
ee35a51c40 | ||
|
|
c2ff7a95c3 | ||
|
|
0475c8653f |
@@ -9,14 +9,14 @@ AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortEnumsOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
BreakTemplateDeclarations: Yes
|
||||
BasedOnStyle: WebKit
|
||||
BitFieldColonSpacing: After
|
||||
BinPackParameters: false
|
||||
BreakBeforeBinaryOperators: NonAssignment
|
||||
BreakBeforeBraces: Custom
|
||||
BraceWrapping:
|
||||
AfterFunction: false
|
||||
AfterFunction: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: true
|
||||
BeforeElse: true
|
||||
|
||||
12
.github/ci/arm_matrix.json
vendored
12
.github/ci/arm_matrix.json
vendored
@@ -4,7 +4,7 @@
|
||||
"name": "Android NDK aarch64",
|
||||
"os": "ubuntu-22.04",
|
||||
"simple_name": "android",
|
||||
"compiler": "aarch64-linux-android21-clang++",
|
||||
"compiler": "aarch64-linux-android29-clang++",
|
||||
"emu": "qemu-aarch64",
|
||||
"comp": "ndk",
|
||||
"shell": "bash",
|
||||
@@ -14,7 +14,7 @@
|
||||
"name": "Android NDK arm",
|
||||
"os": "ubuntu-22.04",
|
||||
"simple_name": "android",
|
||||
"compiler": "armv7a-linux-androideabi21-clang++",
|
||||
"compiler": "armv7a-linux-androideabi29-clang++",
|
||||
"emu": "qemu-arm",
|
||||
"comp": "ndk",
|
||||
"shell": "bash",
|
||||
@@ -26,25 +26,25 @@
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"compiler": "armv7a-linux-androideabi21-clang++"
|
||||
"compiler": "armv7a-linux-androideabi29-clang++"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"compiler": "armv7a-linux-androideabi21-clang++"
|
||||
"compiler": "armv7a-linux-androideabi29-clang++"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv7",
|
||||
"config": {
|
||||
"compiler": "aarch64-linux-android21-clang++"
|
||||
"compiler": "aarch64-linux-android29-clang++"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv7-neon",
|
||||
"config": {
|
||||
"compiler": "aarch64-linux-android21-clang++"
|
||||
"compiler": "aarch64-linux-android29-clang++"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
150
.github/ci/matrix.json
vendored
150
.github/ci/matrix.json
vendored
@@ -40,6 +40,18 @@
|
||||
"ext": ".exe",
|
||||
"sde": "/d/a/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-win/sde.exe -future --",
|
||||
"archive_ext": "zip"
|
||||
},
|
||||
{
|
||||
"name": "Windows 11 Mingw-w64 Clang arm64",
|
||||
"os": "windows-11-arm",
|
||||
"simple_name": "windows",
|
||||
"compiler": "clang++",
|
||||
"comp": "clang",
|
||||
"msys_sys": "clangarm64",
|
||||
"msys_env": "clang-aarch64-clang",
|
||||
"shell": "msys2 {0}",
|
||||
"ext": ".exe",
|
||||
"archive_ext": "zip"
|
||||
}
|
||||
],
|
||||
"binaries": [
|
||||
@@ -51,7 +63,9 @@
|
||||
"x86-64-avx512",
|
||||
"x86-64-vnni256",
|
||||
"x86-64-vnni512",
|
||||
"apple-silicon"
|
||||
"apple-silicon",
|
||||
"armv8",
|
||||
"armv8-dotprod"
|
||||
],
|
||||
"exclude": [
|
||||
{
|
||||
@@ -84,12 +98,6 @@
|
||||
"os": "macos-14"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avxvnni",
|
||||
"config": {
|
||||
"os": "macos-14"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avx512",
|
||||
"config": {
|
||||
@@ -108,12 +116,6 @@
|
||||
"os": "macos-14"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avxvnni",
|
||||
"config": {
|
||||
"ubuntu-22.04": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avxvnni",
|
||||
"config": {
|
||||
@@ -138,6 +140,54 @@
|
||||
"os": "macos-13"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-sse41-popcnt",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avx2",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-bmi2",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avxvnni",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-avx512",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-vnni256",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "x86-64-vnni512",
|
||||
"config": {
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "apple-silicon",
|
||||
"config": {
|
||||
@@ -147,7 +197,13 @@
|
||||
{
|
||||
"binaries": "apple-silicon",
|
||||
"config": {
|
||||
"os": "macos-13"
|
||||
"os": "windows-11-arm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "apple-silicon",
|
||||
"config": {
|
||||
"os": "ubuntu-20.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -155,6 +211,72 @@
|
||||
"config": {
|
||||
"os": "ubuntu-22.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "apple-silicon",
|
||||
"config": {
|
||||
"os": "macos-13"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"os": "windows-2022"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"os": "ubuntu-20.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"os": "ubuntu-22.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"os": "macos-13"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8",
|
||||
"config": {
|
||||
"os": "macos-14"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"os": "windows-2022"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"os": "ubuntu-20.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"os": "ubuntu-22.04"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"os": "macos-13"
|
||||
}
|
||||
},
|
||||
{
|
||||
"binaries": "armv8-dotprod",
|
||||
"config": {
|
||||
"os": "macos-14"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
2
.github/workflows/arm_compilation.yml
vendored
2
.github/workflows/arm_compilation.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
if [ $COMP == ndk ]; then
|
||||
NDKV="21.4.7075529"
|
||||
NDKV="27.2.12479018"
|
||||
ANDROID_ROOT=/usr/local/lib/android
|
||||
ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
|
||||
SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
|
||||
|
||||
8
.github/workflows/clang-format.yml
vendored
8
.github/workflows/clang-format.yml
vendored
@@ -25,11 +25,11 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Run clang-format style check
|
||||
uses: jidicula/clang-format-action@f62da5e3d3a2d88ff364771d9d938773a618ab5e # @v4.11.0
|
||||
uses: jidicula/clang-format-action@4726374d1aa3c6aecf132e5197e498979588ebc8 # @v4.15.0
|
||||
id: clang-format
|
||||
continue-on-error: true
|
||||
with:
|
||||
clang-format-version: "18"
|
||||
clang-format-version: "20"
|
||||
exclude-regex: "incbin"
|
||||
|
||||
- name: Comment on PR
|
||||
@@ -37,9 +37,9 @@ jobs:
|
||||
uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
|
||||
with:
|
||||
message: |
|
||||
clang-format 18 needs to be run on this PR.
|
||||
clang-format 20 needs to be run on this PR.
|
||||
If you do not have clang-format installed, the maintainer will run it when merging.
|
||||
For the exact version please see https://packages.ubuntu.com/noble/clang-format-18.
|
||||
For the exact version please see https://packages.ubuntu.com/plucky/clang-format-20.
|
||||
|
||||
_(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
|
||||
comment_tag: execution
|
||||
|
||||
2
.github/workflows/codeql.yml
vendored
2
.github/workflows/codeql.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Build
|
||||
working-directory: src
|
||||
run: make -j build ARCH=x86-64-modern
|
||||
run: make -j build
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
|
||||
14
.github/workflows/compilation.yml
vendored
14
.github/workflows/compilation.yml
vendored
@@ -63,13 +63,13 @@ jobs:
|
||||
- name: Check compiler
|
||||
run: $COMPCXX -v
|
||||
|
||||
- name: Show g++ cpu info
|
||||
if: runner.os != 'macOS'
|
||||
run: g++ -Q -march=native --help=target
|
||||
|
||||
- name: Show clang++ cpu info
|
||||
if: runner.os == 'macOS'
|
||||
run: clang++ -E - -march=native -###
|
||||
- name: Show compiler cpu info
|
||||
run: |
|
||||
if [[ "$COMPCXX" == clang* ]]; then
|
||||
$COMPCXX -E - -march=native -###
|
||||
else
|
||||
$COMPCXX -Q -march=native --help=target
|
||||
fi
|
||||
|
||||
# x86-64 with newer extensions tests
|
||||
|
||||
|
||||
43
.github/workflows/tests.yml
vendored
43
.github/workflows/tests.yml
vendored
@@ -29,24 +29,25 @@ jobs:
|
||||
shell: bash
|
||||
- name: Android NDK aarch64
|
||||
os: ubuntu-22.04
|
||||
compiler: aarch64-linux-android21-clang++
|
||||
compiler: aarch64-linux-android29-clang++
|
||||
comp: ndk
|
||||
run_armv8_tests: true
|
||||
shell: bash
|
||||
- name: Android NDK arm
|
||||
os: ubuntu-22.04
|
||||
compiler: armv7a-linux-androideabi21-clang++
|
||||
compiler: armv7a-linux-androideabi29-clang++
|
||||
comp: ndk
|
||||
run_armv7_tests: true
|
||||
shell: bash
|
||||
- name: Linux GCC riscv64
|
||||
os: ubuntu-22.04
|
||||
compiler: g++
|
||||
comp: gcc
|
||||
run_riscv64_tests: true
|
||||
base_image: "riscv64/alpine:edge"
|
||||
platform: linux/riscv64
|
||||
shell: bash
|
||||
# Currently segfaults in the CI unrelated to a Stockfish change.
|
||||
# - name: Linux GCC riscv64
|
||||
# os: ubuntu-22.04
|
||||
# compiler: g++
|
||||
# comp: gcc
|
||||
# run_riscv64_tests: true
|
||||
# base_image: "riscv64/alpine:edge"
|
||||
# platform: linux/riscv64
|
||||
# shell: bash
|
||||
- name: Linux GCC ppc64
|
||||
os: ubuntu-22.04
|
||||
compiler: g++
|
||||
@@ -98,6 +99,14 @@ jobs:
|
||||
msys_sys: clang64
|
||||
msys_env: clang-x86_64-clang
|
||||
shell: msys2 {0}
|
||||
- name: Windows 11 Mingw-w64 Clang arm64
|
||||
os: windows-11-arm
|
||||
compiler: clang++
|
||||
comp: clang
|
||||
run_armv8_tests: true
|
||||
msys_sys: clangarm64
|
||||
msys_env: clang-aarch64-clang
|
||||
shell: msys2 {0}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: src
|
||||
@@ -118,7 +127,7 @@ jobs:
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
if [ $COMP == ndk ]; then
|
||||
NDKV="21.4.7075529"
|
||||
NDKV="27.2.12479018"
|
||||
ANDROID_ROOT=/usr/local/lib/android
|
||||
ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
|
||||
SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
|
||||
@@ -302,8 +311,10 @@ jobs:
|
||||
- name: Test armv8 build
|
||||
if: matrix.config.run_armv8_tests
|
||||
run: |
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
export LDFLAGS="-static -Wno-unused-command-line-argument"
|
||||
if [ $COMP == ndk ]; then
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
export LDFLAGS="-static -Wno-unused-command-line-argument"
|
||||
fi
|
||||
make clean
|
||||
make -j4 ARCH=armv8 build
|
||||
../tests/signature.sh $benchref
|
||||
@@ -311,8 +322,10 @@ jobs:
|
||||
- name: Test armv8-dotprod build
|
||||
if: matrix.config.run_armv8_tests
|
||||
run: |
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
export LDFLAGS="-static -Wno-unused-command-line-argument"
|
||||
if [ $COMP == ndk ]; then
|
||||
export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
|
||||
export LDFLAGS="-static -Wno-unused-command-line-argument"
|
||||
fi
|
||||
make clean
|
||||
make -j4 ARCH=armv8-dotprod build
|
||||
../tests/signature.sh $benchref
|
||||
|
||||
5
AUTHORS
5
AUTHORS
@@ -20,6 +20,7 @@ Alexander Kure
|
||||
Alexander Pagel (Lolligerhans)
|
||||
Alfredo Menezes (lonfom169)
|
||||
Ali AlZhrani (Cooffe)
|
||||
AliceRoselia
|
||||
Andreas Jan van der Meulen (Andyson007)
|
||||
Andreas Matthies (Matthies)
|
||||
Andrei Vetrov (proukornew)
|
||||
@@ -33,6 +34,7 @@ Artem Solopiy (EntityFX)
|
||||
Auguste Pop
|
||||
Balazs Szilagyi
|
||||
Balint Pfliegel
|
||||
Baptiste Rech (breatn)
|
||||
Ben Chaney (Chaneybenjamini)
|
||||
Ben Koshy (BKSpurgeon)
|
||||
Bill Henry (VoyagerOne)
|
||||
@@ -57,6 +59,7 @@ Dale Weiler (graphitemaster)
|
||||
Daniel Axtens (daxtens)
|
||||
Daniel Dugovic (ddugovic)
|
||||
Daniel Monroe (Ergodice)
|
||||
Daniel Samek (DanSamek)
|
||||
Dan Schmidt (dfannius)
|
||||
Dariusz Orzechowski (dorzechowski)
|
||||
David (dav1312)
|
||||
@@ -129,6 +132,7 @@ Kenneth Lee (kennethlee33)
|
||||
Kian E (KJE-98)
|
||||
kinderchocolate
|
||||
Kiran Panditrao (Krgp)
|
||||
Kirill Zaripov (kokodio)
|
||||
Kojirion
|
||||
Krisztián Peőcz
|
||||
Krystian Kuzniarek (kuzkry)
|
||||
@@ -145,6 +149,7 @@ Lucas Braesch (lucasart)
|
||||
Lyudmil Antonov (lantonov)
|
||||
Maciej Żenczykowski (zenczykowski)
|
||||
Malcolm Campbell (xoto10)
|
||||
Mark Marosi (Mapika)
|
||||
Mark Tenzer (31m059)
|
||||
marotear
|
||||
Mathias Parnaudeau (mparnaudeau)
|
||||
|
||||
@@ -59,7 +59,7 @@ discussion._
|
||||
|
||||
Changes to Stockfish C++ code should respect our coding style defined by
|
||||
[.clang-format](.clang-format). You can format your changes by running
|
||||
`make format`. This requires clang-format version 18 to be installed on your system.
|
||||
`make format`. This requires clang-format version 20 to be installed on your system.
|
||||
|
||||
## Navigate
|
||||
|
||||
|
||||
@@ -130,7 +130,13 @@ case $uname_s in
|
||||
esac
|
||||
file_ext='tar'
|
||||
;;
|
||||
'CYGWIN'*|'MINGW'*|'MSYS'*) # Windows system with POSIX compatibility layer
|
||||
'MINGW'*'ARM64'*) # Windows ARM64 system with POSIX compatibility layer
|
||||
# TODO: older chips might be armv8, but we have no good way to detect, /proc/cpuinfo shows x86 info
|
||||
file_os='windows'
|
||||
true_arch='armv8-dotprod'
|
||||
file_ext='zip'
|
||||
;;
|
||||
'CYGWIN'*|'MINGW'*|'MSYS'*) # Windows x86_64system with POSIX compatibility layer
|
||||
get_flags
|
||||
check_znver_1_2
|
||||
set_arch_x86_64
|
||||
|
||||
51
src/Makefile
51
src/Makefile
@@ -60,9 +60,9 @@ SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
|
||||
|
||||
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \
|
||||
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
|
||||
nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \
|
||||
nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h \
|
||||
nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
|
||||
nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
|
||||
nnue/nnue_common.h nnue/nnue_feature_transformer.h nnue/simd.h position.h \
|
||||
search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
|
||||
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h
|
||||
|
||||
@@ -163,8 +163,8 @@ lsx = no
|
||||
lasx = no
|
||||
STRIP = strip
|
||||
|
||||
ifneq ($(shell which clang-format-18 2> /dev/null),)
|
||||
CLANG-FORMAT = clang-format-18
|
||||
ifneq ($(shell which clang-format-20 2> /dev/null),)
|
||||
CLANG-FORMAT = clang-format-20
|
||||
else
|
||||
CLANG-FORMAT = clang-format
|
||||
endif
|
||||
@@ -533,14 +533,12 @@ ifeq ($(KERNEL),Darwin)
|
||||
XCRUN = xcrun
|
||||
endif
|
||||
|
||||
# To cross-compile for Android, NDK version r21 or later is recommended.
|
||||
# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
|
||||
# Currently we don't know how to make PGO builds with the NDK yet.
|
||||
# To cross-compile for Android, use NDK version r27c or later.
|
||||
ifeq ($(COMP),ndk)
|
||||
CXXFLAGS += -stdlib=libc++ -fPIE
|
||||
CXXFLAGS += -stdlib=libc++
|
||||
comp=clang
|
||||
ifeq ($(arch),armv7)
|
||||
CXX=armv7a-linux-androideabi16-clang++
|
||||
CXX=armv7a-linux-androideabi29-clang++
|
||||
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
|
||||
ifneq ($(shell which arm-linux-androideabi-strip 2>/dev/null),)
|
||||
STRIP=arm-linux-androideabi-strip
|
||||
@@ -549,7 +547,7 @@ ifeq ($(COMP),ndk)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(arch),armv8)
|
||||
CXX=aarch64-linux-android21-clang++
|
||||
CXX=aarch64-linux-android29-clang++
|
||||
ifneq ($(shell which aarch64-linux-android-strip 2>/dev/null),)
|
||||
STRIP=aarch64-linux-android-strip
|
||||
else
|
||||
@@ -557,14 +555,28 @@ ifeq ($(COMP),ndk)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(arch),x86_64)
|
||||
CXX=x86_64-linux-android21-clang++
|
||||
CXX=x86_64-linux-android29-clang++
|
||||
ifneq ($(shell which x86_64-linux-android-strip 2>/dev/null),)
|
||||
STRIP=x86_64-linux-android-strip
|
||||
else
|
||||
STRIP=llvm-strip
|
||||
endif
|
||||
endif
|
||||
LDFLAGS += -static-libstdc++ -pie -lm -latomic
|
||||
LDFLAGS += -static-libstdc++
|
||||
endif
|
||||
|
||||
### Allow overwriting CXX from command line
|
||||
ifdef COMPCXX
|
||||
CXX=$(COMPCXX)
|
||||
endif
|
||||
|
||||
# llvm-profdata must be version compatible with the specified CXX (be it clang, or the gcc alias)
|
||||
# make -j profile-build CXX=clang++-20 COMP=clang
|
||||
# Locate the version in the same directory as the compiler used,
|
||||
# with fallback to a generic one if it can't be located
|
||||
LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))llvm-profdata
|
||||
ifeq ($(wildcard $(LLVM_PROFDATA)),)
|
||||
LLVM_PROFDATA := llvm-profdata
|
||||
endif
|
||||
|
||||
ifeq ($(comp),icx)
|
||||
@@ -581,11 +593,6 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
### Allow overwriting CXX from command line
|
||||
ifdef COMPCXX
|
||||
CXX=$(COMPCXX)
|
||||
endif
|
||||
|
||||
### Sometimes gcc is really clang
|
||||
ifeq ($(COMP),gcc)
|
||||
gccversion := $(shell $(CXX) --version 2>/dev/null)
|
||||
@@ -694,7 +701,7 @@ endif
|
||||
ifeq ($(avx512),yes)
|
||||
CXXFLAGS += -DUSE_AVX512
|
||||
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
||||
CXXFLAGS += -mavx512f -mavx512bw
|
||||
CXXFLAGS += -mavx512f -mavx512bw -mavx512dq -mavx512vl
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -989,10 +996,6 @@ net:
|
||||
format:
|
||||
$(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file
|
||||
|
||||
# default target
|
||||
default:
|
||||
help
|
||||
|
||||
### ==========================================================================
|
||||
### Section 5. Private Targets
|
||||
### ==========================================================================
|
||||
@@ -1081,7 +1084,7 @@ clang-profile-make:
|
||||
all
|
||||
|
||||
clang-profile-use:
|
||||
$(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
|
||||
$(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
||||
EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \
|
||||
EXTRALDFLAGS='-fprofile-use ' \
|
||||
@@ -1118,6 +1121,6 @@ icx-profile-use:
|
||||
.depend: $(SRCS)
|
||||
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
|
||||
|
||||
ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean config-sanity))
|
||||
ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean format config-sanity))
|
||||
-include .depend
|
||||
endif
|
||||
|
||||
@@ -32,7 +32,6 @@ uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
|
||||
Bitboard LineBB[SQUARE_NB][SQUARE_NB];
|
||||
Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
|
||||
Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
|
||||
Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];
|
||||
|
||||
alignas(64) Magic Magics[SQUARE_NB][2];
|
||||
|
||||
@@ -86,8 +85,8 @@ void Bitboards::init() {
|
||||
|
||||
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
|
||||
{
|
||||
PawnAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
|
||||
PawnAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));
|
||||
PseudoAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
|
||||
PseudoAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));
|
||||
|
||||
for (int step : {-9, -8, -7, -1, 1, 7, 8, 9})
|
||||
PseudoAttacks[KING][s1] |= safe_destination(s1, step);
|
||||
|
||||
@@ -62,7 +62,6 @@ extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
|
||||
extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
|
||||
extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
|
||||
extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
|
||||
extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];
|
||||
|
||||
|
||||
// Magic holds all magic bitboards relevant data for a single square
|
||||
@@ -103,17 +102,17 @@ constexpr Bitboard square_bb(Square s) {
|
||||
// Overloads of bitwise operators between a Bitboard and a Square for testing
|
||||
// whether a given bit is set in a bitboard, and for setting and clearing bits.
|
||||
|
||||
inline Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); }
|
||||
inline Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); }
|
||||
inline Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); }
|
||||
inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
|
||||
inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }
|
||||
constexpr Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); }
|
||||
constexpr Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); }
|
||||
constexpr Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); }
|
||||
constexpr Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
|
||||
constexpr Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }
|
||||
|
||||
inline Bitboard operator&(Square s, Bitboard b) { return b & s; }
|
||||
inline Bitboard operator|(Square s, Bitboard b) { return b | s; }
|
||||
inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; }
|
||||
constexpr Bitboard operator&(Square s, Bitboard b) { return b & s; }
|
||||
constexpr Bitboard operator|(Square s, Bitboard b) { return b | s; }
|
||||
constexpr Bitboard operator^(Square s, Bitboard b) { return b ^ s; }
|
||||
|
||||
inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
|
||||
constexpr Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
|
||||
|
||||
constexpr bool more_than_one(Bitboard b) { return b & (b - 1); }
|
||||
|
||||
@@ -155,11 +154,6 @@ constexpr Bitboard pawn_attacks_bb(Bitboard b) {
|
||||
: shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
|
||||
}
|
||||
|
||||
inline Bitboard pawn_attacks_bb(Color c, Square s) {
|
||||
|
||||
assert(is_ok(s));
|
||||
return PawnAttacks[c][s];
|
||||
}
|
||||
|
||||
// Returns a bitboard representing an entire line (from board edge
|
||||
// to board edge) that intersects the two given squares. If the given squares
|
||||
@@ -216,10 +210,10 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
|
||||
// Returns the pseudo attacks of the given piece type
|
||||
// assuming an empty board.
|
||||
template<PieceType Pt>
|
||||
inline Bitboard attacks_bb(Square s) {
|
||||
inline Bitboard attacks_bb(Square s, Color c = COLOR_NB) {
|
||||
|
||||
assert((Pt != PAWN) && (is_ok(s)));
|
||||
return PseudoAttacks[Pt][s];
|
||||
assert((Pt != PAWN || c < COLOR_NB) && (is_ok(s)));
|
||||
return Pt == PAWN ? PseudoAttacks[c][s] : PseudoAttacks[Pt][s];
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -38,17 +38,15 @@
|
||||
namespace Stockfish {
|
||||
|
||||
// Returns a static, purely materialistic evaluation of the position from
|
||||
// the point of view of the given color. It can be divided by PawnValue to get
|
||||
// the point of view of the side to move. It can be divided by PawnValue to get
|
||||
// an approximation of the material advantage on the board in terms of pawns.
|
||||
int Eval::simple_eval(const Position& pos, Color c) {
|
||||
int Eval::simple_eval(const Position& pos) {
|
||||
Color c = pos.side_to_move();
|
||||
return PawnValue * (pos.count<PAWN>(c) - pos.count<PAWN>(~c))
|
||||
+ (pos.non_pawn_material(c) - pos.non_pawn_material(~c));
|
||||
}
|
||||
|
||||
bool Eval::use_smallnet(const Position& pos) {
|
||||
int simpleEval = simple_eval(pos, pos.side_to_move());
|
||||
return std::abs(simpleEval) > 962;
|
||||
}
|
||||
bool Eval::use_smallnet(const Position& pos) { return std::abs(simple_eval(pos)) > 962; }
|
||||
|
||||
// Evaluate is the evaluator for the outer world. It returns a static evaluation
|
||||
// of the position from the point of view of the side to move.
|
||||
@@ -103,8 +101,6 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
||||
Eval::NNUE::AccumulatorStack accumulators;
|
||||
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
|
||||
|
||||
accumulators.reset(pos, networks, *caches);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
|
||||
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
|
||||
|
||||
@@ -44,7 +44,7 @@ class AccumulatorStack;
|
||||
|
||||
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
|
||||
|
||||
int simple_eval(const Position& pos, Color c);
|
||||
int simple_eval(const Position& pos);
|
||||
bool use_smallnet(const Position& pos);
|
||||
Value evaluate(const NNUE::Networks& networks,
|
||||
const Position& pos,
|
||||
|
||||
@@ -36,7 +36,7 @@ namespace Stockfish {
|
||||
constexpr int PAWN_HISTORY_SIZE = 512; // has to be a power of 2
|
||||
constexpr int CORRECTION_HISTORY_SIZE = 32768; // has to be a power of 2
|
||||
constexpr int CORRECTION_HISTORY_LIMIT = 1024;
|
||||
constexpr int LOW_PLY_HISTORY_SIZE = 4;
|
||||
constexpr int LOW_PLY_HISTORY_SIZE = 5;
|
||||
|
||||
static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0,
|
||||
"PAWN_HISTORY_SIZE has to be a power of 2");
|
||||
@@ -166,6 +166,8 @@ struct CorrHistTypedef<NonPawn> {
|
||||
template<CorrHistType T>
|
||||
using CorrectionHistory = typename Detail::CorrHistTypedef<T>::type;
|
||||
|
||||
using TTMoveHistory = StatsEntry<std::int16_t, 8192>;
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
#endif // #ifndef HISTORY_H_INCLUDED
|
||||
|
||||
@@ -52,7 +52,6 @@ void memory_deleter(T* ptr, FREE_FUNC free_func) {
|
||||
ptr->~T();
|
||||
|
||||
free_func(ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
// Frees memory which was placed there with placement new.
|
||||
|
||||
@@ -40,7 +40,7 @@ namespace Stockfish {
|
||||
namespace {
|
||||
|
||||
// Version number or dev.
|
||||
constexpr std::string_view version = "17.1";
|
||||
constexpr std::string_view version = "dev";
|
||||
|
||||
// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
|
||||
// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
|
||||
|
||||
16
src/misc.h
16
src/misc.h
@@ -317,6 +317,22 @@ void move_to_front(std::vector<T>& vec, Predicate pred) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#if __GNUC__ >= 13
|
||||
#define sf_assume(cond) __attribute__((assume(cond)))
|
||||
#else
|
||||
#define sf_assume(cond) \
|
||||
do \
|
||||
{ \
|
||||
if (!(cond)) \
|
||||
__builtin_unreachable(); \
|
||||
} while (0)
|
||||
#endif
|
||||
#else
|
||||
// do nothing for other compilers
|
||||
#define sf_assume(cond)
|
||||
#endif
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
#endif // #ifndef MISC_H_INCLUDED
|
||||
|
||||
@@ -134,7 +134,7 @@ ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard ta
|
||||
if (Type == EVASIONS && (target & (pos.ep_square() + Up)))
|
||||
return moveList;
|
||||
|
||||
b1 = pawnsNotOn7 & pawn_attacks_bb(Them, pos.ep_square());
|
||||
b1 = pawnsNotOn7 & attacks_bb<PAWN>(pos.ep_square(), Them);
|
||||
|
||||
assert(b1);
|
||||
|
||||
|
||||
139
src/movepick.cpp
139
src/movepick.cpp
@@ -20,6 +20,7 @@
|
||||
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "misc.h"
|
||||
@@ -55,6 +56,7 @@ enum Stages {
|
||||
QCAPTURE
|
||||
};
|
||||
|
||||
|
||||
// Sort moves in descending order up to and including a given limit.
|
||||
// The order of moves smaller than the limit is left unspecified.
|
||||
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
|
||||
@@ -125,74 +127,68 @@ void MovePicker::score() {
|
||||
|
||||
static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
|
||||
|
||||
[[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook,
|
||||
threatenedPieces;
|
||||
Color us = pos.side_to_move();
|
||||
|
||||
[[maybe_unused]] Bitboard threatByLesser[QUEEN + 1];
|
||||
if constexpr (Type == QUIETS)
|
||||
{
|
||||
Color us = pos.side_to_move();
|
||||
|
||||
threatenedByPawn = pos.attacks_by<PAWN>(~us);
|
||||
threatenedByMinor =
|
||||
pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatenedByPawn;
|
||||
threatenedByRook = pos.attacks_by<ROOK>(~us) | threatenedByMinor;
|
||||
|
||||
// Pieces threatened by pieces of lesser material value
|
||||
threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook)
|
||||
| (pos.pieces(us, ROOK) & threatenedByMinor)
|
||||
| (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn);
|
||||
threatByLesser[KNIGHT] = threatByLesser[BISHOP] = pos.attacks_by<PAWN>(~us);
|
||||
threatByLesser[ROOK] =
|
||||
pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatByLesser[KNIGHT];
|
||||
threatByLesser[QUEEN] = pos.attacks_by<ROOK>(~us) | threatByLesser[ROOK];
|
||||
}
|
||||
|
||||
for (auto& m : *this)
|
||||
{
|
||||
const Square from = m.from_sq();
|
||||
const Square to = m.to_sq();
|
||||
const Piece pc = pos.moved_piece(m);
|
||||
const PieceType pt = type_of(pc);
|
||||
const Piece capturedPiece = pos.piece_on(to);
|
||||
|
||||
if constexpr (Type == CAPTURES)
|
||||
m.value =
|
||||
7 * int(PieceValue[pos.piece_on(m.to_sq())])
|
||||
+ (*captureHistory)[pos.moved_piece(m)][m.to_sq()][type_of(pos.piece_on(m.to_sq()))];
|
||||
m.value = (*captureHistory)[pc][to][type_of(capturedPiece)]
|
||||
+ 7 * int(PieceValue[capturedPiece]) + 1024 * bool(pos.check_squares(pt) & to);
|
||||
|
||||
else if constexpr (Type == QUIETS)
|
||||
{
|
||||
Piece pc = pos.moved_piece(m);
|
||||
PieceType pt = type_of(pc);
|
||||
Square from = m.from_sq();
|
||||
Square to = m.to_sq();
|
||||
|
||||
// histories
|
||||
m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()];
|
||||
m.value = 2 * (*mainHistory)[us][m.from_to()];
|
||||
m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to];
|
||||
m.value += (*continuationHistory[0])[pc][to];
|
||||
m.value += (*continuationHistory[1])[pc][to];
|
||||
m.value += (*continuationHistory[2])[pc][to];
|
||||
m.value += (*continuationHistory[3])[pc][to];
|
||||
m.value += (*continuationHistory[4])[pc][to] / 3;
|
||||
m.value += (*continuationHistory[5])[pc][to];
|
||||
|
||||
// bonus for checks
|
||||
m.value += bool(pos.check_squares(pt) & to) * 16384;
|
||||
m.value += (bool(pos.check_squares(pt) & to) && pos.see_ge(m, -75)) * 16384;
|
||||
|
||||
// bonus for escaping from capture
|
||||
m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook) ? 51700
|
||||
: pt == ROOK && !(to & threatenedByMinor) ? 25600
|
||||
: !(to & threatenedByPawn) ? 14450
|
||||
: 0)
|
||||
: 0;
|
||||
|
||||
// malus for putting piece en prise
|
||||
m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000
|
||||
: pt == ROOK && bool(to & threatenedByMinor) ? 24335
|
||||
: 0);
|
||||
// penalty for moving to a square threatened by a lesser piece
|
||||
// or bonus for escaping an attack by a lesser piece.
|
||||
if (KNIGHT <= pt && pt <= QUEEN)
|
||||
{
|
||||
static constexpr int bonus[QUEEN + 1] = {0, 0, 144, 144, 256, 517};
|
||||
int v = threatByLesser[pt] & to ? -95 : 100 * bool(threatByLesser[pt] & from);
|
||||
m.value += bonus[pt] * v;
|
||||
}
|
||||
|
||||
if (ply < LOW_PLY_HISTORY_SIZE)
|
||||
m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + 2 * ply);
|
||||
m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + ply);
|
||||
}
|
||||
|
||||
else // Type == EVASIONS
|
||||
{
|
||||
if (pos.capture_stage(m))
|
||||
m.value = PieceValue[pos.piece_on(m.to_sq())] + (1 << 28);
|
||||
m.value = PieceValue[capturedPiece] + (1 << 28);
|
||||
else
|
||||
m.value = (*mainHistory)[pos.side_to_move()][m.from_to()]
|
||||
+ (*continuationHistory[0])[pos.moved_piece(m)][m.to_sq()]
|
||||
+ (*pawnHistory)[pawn_structure_index(pos)][pos.moved_piece(m)][m.to_sq()];
|
||||
{
|
||||
m.value = (*mainHistory)[us][m.from_to()] + (*continuationHistory[0])[pc][to];
|
||||
if (ply < LOW_PLY_HISTORY_SIZE)
|
||||
m.value += 2 * (*lowPlyHistory)[ply][m.from_to()] / (1 + ply);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the next move satisfying a predicate function.
|
||||
@@ -200,7 +196,7 @@ void MovePicker::score() {
|
||||
template<typename Pred>
|
||||
Move MovePicker::select(Pred filter) {
|
||||
|
||||
for (; cur < endMoves; ++cur)
|
||||
for (; cur < endCur; ++cur)
|
||||
if (*cur != ttMove && filter())
|
||||
return *cur++;
|
||||
|
||||
@@ -212,8 +208,7 @@ Move MovePicker::select(Pred filter) {
|
||||
// picking the move with the highest score from a list of generated moves.
|
||||
Move MovePicker::next_move() {
|
||||
|
||||
auto quiet_threshold = [](Depth d) { return -3560 * d; };
|
||||
|
||||
constexpr int goodQuietThreshold = -14000;
|
||||
top:
|
||||
switch (stage)
|
||||
{
|
||||
@@ -229,18 +224,19 @@ top:
|
||||
case PROBCUT_INIT :
|
||||
case QCAPTURE_INIT :
|
||||
cur = endBadCaptures = moves;
|
||||
endMoves = generate<CAPTURES>(pos, cur);
|
||||
endCur = endCaptures = generate<CAPTURES>(pos, cur);
|
||||
|
||||
score<CAPTURES>();
|
||||
partial_insertion_sort(cur, endMoves, std::numeric_limits<int>::min());
|
||||
partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
|
||||
++stage;
|
||||
goto top;
|
||||
|
||||
case GOOD_CAPTURE :
|
||||
if (select([&]() {
|
||||
// Move losing capture to endBadCaptures to be tried later
|
||||
return pos.see_ge(*cur, -cur->value / 18) ? true
|
||||
: (*endBadCaptures++ = *cur, false);
|
||||
if (pos.see_ge(*cur, -cur->value / 18))
|
||||
return true;
|
||||
std::swap(*endBadCaptures++, *cur);
|
||||
return false;
|
||||
}))
|
||||
return *(cur - 1);
|
||||
|
||||
@@ -250,29 +246,22 @@ top:
|
||||
case QUIET_INIT :
|
||||
if (!skipQuiets)
|
||||
{
|
||||
cur = endBadCaptures;
|
||||
endMoves = beginBadQuiets = endBadQuiets = generate<QUIETS>(pos, cur);
|
||||
endCur = endGenerated = generate<QUIETS>(pos, cur);
|
||||
|
||||
score<QUIETS>();
|
||||
partial_insertion_sort(cur, endMoves, quiet_threshold(depth));
|
||||
partial_insertion_sort(cur, endCur, -3560 * depth);
|
||||
}
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
case GOOD_QUIET :
|
||||
if (!skipQuiets && select([]() { return true; }))
|
||||
{
|
||||
if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth))
|
||||
return *(cur - 1);
|
||||
|
||||
// Remaining quiets are bad
|
||||
beginBadQuiets = cur - 1;
|
||||
}
|
||||
if (!skipQuiets && select([&]() { return cur->value > goodQuietThreshold; }))
|
||||
return *(cur - 1);
|
||||
|
||||
// Prepare the pointers to loop over the bad captures
|
||||
cur = moves;
|
||||
endMoves = endBadCaptures;
|
||||
cur = moves;
|
||||
endCur = endBadCaptures;
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
@@ -281,25 +270,25 @@ top:
|
||||
if (select([]() { return true; }))
|
||||
return *(cur - 1);
|
||||
|
||||
// Prepare the pointers to loop over the bad quiets
|
||||
cur = beginBadQuiets;
|
||||
endMoves = endBadQuiets;
|
||||
// Prepare the pointers to loop over quiets again
|
||||
cur = endCaptures;
|
||||
endCur = endGenerated;
|
||||
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
case BAD_QUIET :
|
||||
if (!skipQuiets)
|
||||
return select([]() { return true; });
|
||||
return select([&]() { return cur->value <= goodQuietThreshold; });
|
||||
|
||||
return Move::none();
|
||||
|
||||
case EVASION_INIT :
|
||||
cur = moves;
|
||||
endMoves = generate<EVASIONS>(pos, cur);
|
||||
cur = moves;
|
||||
endCur = endGenerated = generate<EVASIONS>(pos, cur);
|
||||
|
||||
score<EVASIONS>();
|
||||
partial_insertion_sort(cur, endMoves, std::numeric_limits<int>::min());
|
||||
partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
|
||||
++stage;
|
||||
[[fallthrough]];
|
||||
|
||||
@@ -317,4 +306,18 @@ top:
|
||||
|
||||
void MovePicker::skip_quiet_moves() { skipQuiets = true; }
|
||||
|
||||
// this function must be called after all quiet moves and captures have been generated
|
||||
bool MovePicker::can_move_king_or_pawn() const {
|
||||
// SEE negative captures shouldn't be returned in GOOD_CAPTURE stage
|
||||
assert(stage > GOOD_CAPTURE && stage != EVASION_INIT);
|
||||
|
||||
for (const ExtMove* m = moves; m < endGenerated; ++m)
|
||||
{
|
||||
PieceType movedPieceType = type_of(pos.moved_piece(*m));
|
||||
if ((movedPieceType == PAWN || movedPieceType == KING) && pos.legal(*m))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
@@ -50,6 +50,7 @@ class MovePicker {
|
||||
MovePicker(const Position&, Move, int, const CapturePieceToHistory*);
|
||||
Move next_move();
|
||||
void skip_quiet_moves();
|
||||
bool can_move_king_or_pawn() const;
|
||||
|
||||
private:
|
||||
template<typename Pred>
|
||||
@@ -57,7 +58,7 @@ class MovePicker {
|
||||
template<GenType>
|
||||
void score();
|
||||
ExtMove* begin() { return cur; }
|
||||
ExtMove* end() { return endMoves; }
|
||||
ExtMove* end() { return endCur; }
|
||||
|
||||
const Position& pos;
|
||||
const ButterflyHistory* mainHistory;
|
||||
@@ -66,7 +67,7 @@ class MovePicker {
|
||||
const PieceToHistory** continuationHistory;
|
||||
const PawnHistory* pawnHistory;
|
||||
Move ttMove;
|
||||
ExtMove * cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets;
|
||||
ExtMove * cur, *endCur, *endBadCaptures, *endCaptures, *endGenerated;
|
||||
int stage;
|
||||
int threshold;
|
||||
Depth depth;
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "../../bitboard.h"
|
||||
#include "../../position.h"
|
||||
#include "../../types.h"
|
||||
#include "../nnue_accumulator.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
@@ -58,13 +58,15 @@ void HalfKAv2_hm::append_changed_indices(Square ksq,
|
||||
const DirtyPiece& dp,
|
||||
IndexList& removed,
|
||||
IndexList& added) {
|
||||
for (int i = 0; i < dp.dirty_num; ++i)
|
||||
{
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed.push_back(make_index<Perspective>(dp.from[i], dp.piece[i], ksq));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added.push_back(make_index<Perspective>(dp.to[i], dp.piece[i], ksq));
|
||||
}
|
||||
removed.push_back(make_index<Perspective>(dp.from, dp.pc, ksq));
|
||||
if (dp.to != SQ_NONE)
|
||||
added.push_back(make_index<Perspective>(dp.to, dp.pc, ksq));
|
||||
|
||||
if (dp.remove_sq != SQ_NONE)
|
||||
removed.push_back(make_index<Perspective>(dp.remove_sq, dp.remove_pc, ksq));
|
||||
|
||||
if (dp.add_sq != SQ_NONE)
|
||||
added.push_back(make_index<Perspective>(dp.add_sq, dp.add_pc, ksq));
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
@@ -78,7 +80,7 @@ template void HalfKAv2_hm::append_changed_indices<BLACK>(Square ksq,
|
||||
IndexList& added);
|
||||
|
||||
bool HalfKAv2_hm::requires_refresh(const DirtyPiece& dirtyPiece, Color perspective) {
|
||||
return dirtyPiece.piece[0] == make_piece(perspective, KING);
|
||||
return dirtyPiece.pc == make_piece(perspective, KING);
|
||||
}
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "../nnue_common.h"
|
||||
#include "simd.h"
|
||||
#include "../simd.h"
|
||||
|
||||
/*
|
||||
This file contains the definition for a fully connected layer (aka affine transform).
|
||||
@@ -102,7 +102,7 @@ static void affine_transform_non_ssse3(std::int32_t* output,
|
||||
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
|
||||
sum = vpadalq_s16(sum, product);
|
||||
}
|
||||
output[i] = Simd::neon_m128_reduce_add_epi32(sum);
|
||||
output[i] = SIMD::neon_m128_reduce_add_epi32(sum);
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -191,20 +191,20 @@ class AffineTransform {
|
||||
#if defined(USE_AVX512)
|
||||
using vec_t = __m512i;
|
||||
#define vec_set_32 _mm512_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
|
||||
#elif defined(USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
||||
#elif defined(USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
using vec_t = int32x4_t;
|
||||
#define vec_set_32 vdupq_n_s32
|
||||
#define vec_add_dpbusd_32(acc, a, b) \
|
||||
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
vreinterpretq_s8_s32(b))
|
||||
#endif
|
||||
|
||||
@@ -245,23 +245,20 @@ class AffineTransform {
|
||||
#if defined(USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#define vec_setzero() _mm256_setzero_si256()
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m256_hadd
|
||||
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
||||
#define vec_hadd SIMD::m256_hadd
|
||||
#elif defined(USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_setzero() _mm_setzero_si128()
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#define vec_hadd Simd::m128_hadd
|
||||
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
||||
#define vec_hadd SIMD::m128_hadd
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
using vec_t = int32x4_t;
|
||||
#define vec_setzero() vdupq_n_s32(0)
|
||||
#define vec_set_32 vdupq_n_s32
|
||||
#define vec_add_dpbusd_32(acc, a, b) \
|
||||
Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
||||
vreinterpretq_s8_s32(b))
|
||||
#define vec_hadd Simd::neon_m128_hadd
|
||||
#define vec_hadd SIMD::neon_m128_hadd
|
||||
#endif
|
||||
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
@@ -282,7 +279,6 @@ class AffineTransform {
|
||||
output[0] = vec_hadd(sum0, biases[0]);
|
||||
|
||||
#undef vec_setzero
|
||||
#undef vec_set_32
|
||||
#undef vec_add_dpbusd_32
|
||||
#undef vec_hadd
|
||||
}
|
||||
|
||||
@@ -22,14 +22,12 @@
|
||||
#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
#include "../../bitboard.h"
|
||||
#include "../simd.h"
|
||||
#include "../nnue_common.h"
|
||||
#include "affine_transform.h"
|
||||
#include "simd.h"
|
||||
|
||||
/*
|
||||
This file contains the definition for a fully connected layer (aka affine transform) with block sparse input.
|
||||
@@ -51,11 +49,7 @@ constexpr int constexpr_lsb(uint64_t bb) {
|
||||
|
||||
alignas(CacheLineSize) static constexpr struct OffsetIndices {
|
||||
|
||||
#if (USE_SSE41)
|
||||
std::uint8_t offset_indices[256][8];
|
||||
#else
|
||||
std::uint16_t offset_indices[256][8];
|
||||
#endif
|
||||
|
||||
constexpr OffsetIndices() :
|
||||
offset_indices() {
|
||||
@@ -74,56 +68,52 @@ alignas(CacheLineSize) static constexpr struct OffsetIndices {
|
||||
|
||||
} Lookup;
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define RESTRICT __restrict__
|
||||
#elif defined(_MSC_VER)
|
||||
#define RESTRICT __restrict
|
||||
#else
|
||||
#define RESTRICT
|
||||
#endif
|
||||
|
||||
// Find indices of nonzero numbers in an int32_t array
|
||||
template<const IndexType InputDimensions>
|
||||
void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) {
|
||||
#if defined(USE_SSSE3)
|
||||
#if defined(USE_AVX512)
|
||||
using vec_t = __m512i;
|
||||
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
|
||||
#elif defined(USE_AVX2)
|
||||
using vec_t = __m256i;
|
||||
#if defined(USE_VNNI) && !defined(USE_AVXVNNI)
|
||||
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
|
||||
#else
|
||||
#define vec_nnz(a) \
|
||||
_mm256_movemask_ps( \
|
||||
_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
|
||||
#endif
|
||||
#elif defined(USE_SSSE3)
|
||||
using vec_t = __m128i;
|
||||
#define vec_nnz(a) \
|
||||
_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
|
||||
#endif
|
||||
using vec128_t = __m128i;
|
||||
#define vec128_zero _mm_setzero_si128()
|
||||
#define vec128_set_16(a) _mm_set1_epi16(a)
|
||||
#if (USE_SSE41)
|
||||
#define vec128_load(a) _mm_cvtepu8_epi16(_mm_loadl_epi64(a))
|
||||
#else
|
||||
#define vec128_load(a) _mm_load_si128(a)
|
||||
#endif
|
||||
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
||||
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
||||
#elif defined(USE_NEON)
|
||||
using vec_t = uint32x4_t;
|
||||
static const std::uint32_t Mask[4] = {1, 2, 4, 8};
|
||||
#define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask)))
|
||||
using vec128_t = uint16x8_t;
|
||||
#define vec128_zero vdupq_n_u16(0)
|
||||
#define vec128_set_16(a) vdupq_n_u16(a)
|
||||
#define vec128_load(a) vld1q_u16(reinterpret_cast<const std::uint16_t*>(a))
|
||||
#define vec128_storeu(a, b) vst1q_u16(reinterpret_cast<std::uint16_t*>(a), b)
|
||||
#define vec128_add(a, b) vaddq_u16(a, b)
|
||||
#endif
|
||||
constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t);
|
||||
void find_nnz(const std::int32_t* RESTRICT input,
|
||||
std::uint16_t* RESTRICT out,
|
||||
IndexType& count_out) {
|
||||
|
||||
#ifdef USE_AVX512
|
||||
constexpr IndexType SimdWidth = 16; // 512 bits / 32 bits
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m512i increment = _mm512_set1_epi32(SimdWidth);
|
||||
__m512i base = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
||||
IndexType count = 0;
|
||||
for (IndexType i = 0; i < NumChunks; ++i)
|
||||
{
|
||||
const __m512i inputV = _mm512_load_si512(input + i * SimdWidth);
|
||||
|
||||
// Get a bitmask and gather non zero indices
|
||||
const __mmask16 nnzMask = _mm512_test_epi32_mask(inputV, inputV);
|
||||
const __m512i nnzV = _mm512_maskz_compress_epi32(nnzMask, base);
|
||||
_mm512_mask_cvtepi32_storeu_epi16(out + count, 0xFFFF, nnzV);
|
||||
count += popcount(nnzMask);
|
||||
base = _mm512_add_epi32(base, increment);
|
||||
}
|
||||
count_out = count;
|
||||
|
||||
#else
|
||||
|
||||
using namespace SIMD;
|
||||
|
||||
constexpr IndexType InputSimdWidth = sizeof(vec_uint_t) / sizeof(std::int32_t);
|
||||
// Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)
|
||||
constexpr IndexType ChunkSize = std::max<IndexType>(InputSimdWidth, 8);
|
||||
constexpr IndexType NumChunks = InputDimensions / ChunkSize;
|
||||
constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth;
|
||||
constexpr IndexType OutputsPerChunk = ChunkSize / 8;
|
||||
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
const auto inputVector = reinterpret_cast<const vec_uint_t*>(input);
|
||||
IndexType count = 0;
|
||||
vec128_t base = vec128_zero;
|
||||
const vec128_t increment = vec128_set_16(8);
|
||||
@@ -133,7 +123,7 @@ void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_ou
|
||||
unsigned nnz = 0;
|
||||
for (IndexType j = 0; j < InputsPerChunk; ++j)
|
||||
{
|
||||
const vec_t inputChunk = inputVector[i * InputsPerChunk + j];
|
||||
const vec_uint_t inputChunk = inputVector[i * InputsPerChunk + j];
|
||||
nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth);
|
||||
}
|
||||
for (IndexType j = 0; j < OutputsPerChunk; ++j)
|
||||
@@ -147,13 +137,9 @@ void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_ou
|
||||
}
|
||||
}
|
||||
count_out = count;
|
||||
#endif
|
||||
}
|
||||
#undef vec_nnz
|
||||
#undef vec128_zero
|
||||
#undef vec128_set_16
|
||||
#undef vec128_load
|
||||
#undef vec128_storeu
|
||||
#undef vec128_add
|
||||
|
||||
#endif
|
||||
|
||||
// Sparse input implementation
|
||||
@@ -232,27 +218,27 @@ class AffineTransformSparseInput {
|
||||
using invec_t = __m512i;
|
||||
using outvec_t = __m512i;
|
||||
#define vec_set_32 _mm512_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
|
||||
#elif defined(USE_AVX2)
|
||||
using invec_t = __m256i;
|
||||
using outvec_t = __m256i;
|
||||
#define vec_set_32 _mm256_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
||||
#elif defined(USE_SSSE3)
|
||||
using invec_t = __m128i;
|
||||
using outvec_t = __m128i;
|
||||
#define vec_set_32 _mm_set1_epi32
|
||||
#define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
||||
#elif defined(USE_NEON_DOTPROD)
|
||||
using invec_t = int8x16_t;
|
||||
using outvec_t = int32x4_t;
|
||||
#define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
|
||||
#define vec_add_dpbusd_32 Simd::dotprod_m128_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::dotprod_m128_add_dpbusd_epi32
|
||||
#elif defined(USE_NEON)
|
||||
using invec_t = int8x16_t;
|
||||
using outvec_t = int32x4_t;
|
||||
#define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
|
||||
#define vec_add_dpbusd_32 Simd::neon_m128_add_dpbusd_epi32
|
||||
#define vec_add_dpbusd_32 SIMD::neon_m128_add_dpbusd_epi32
|
||||
#endif
|
||||
static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType);
|
||||
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef STOCKFISH_SIMD_H_INCLUDED
|
||||
#define STOCKFISH_SIMD_H_INCLUDED
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#include <immintrin.h>
|
||||
|
||||
#elif defined(USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
|
||||
#elif defined(USE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
namespace Stockfish::Simd {
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
|
||||
[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
|
||||
|
||||
#if defined(USE_VNNI)
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
|
||||
[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
return _mm_cvtsi128_si32(sum128) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) {
|
||||
|
||||
#if defined(USE_VNNI)
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
return _mm_cvtsi128_si32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) {
|
||||
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_NEON_DOTPROD)
|
||||
|
||||
[[maybe_unused]] static void
|
||||
dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
||||
|
||||
acc = vdotq_s32(acc, a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(USE_NEON)
|
||||
|
||||
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
|
||||
#if USE_NEON >= 8
|
||||
return vaddvq_s32(s);
|
||||
#else
|
||||
return s[0] + s[1] + s[2] + s[3];
|
||||
#endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
|
||||
return neon_m128_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if USE_NEON >= 8
|
||||
[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
||||
|
||||
int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b));
|
||||
int16x8_t product1 = vmull_high_s8(a, b);
|
||||
int16x8_t sum = vpaddq_s16(product0, product1);
|
||||
acc = vpadalq_s16(acc, sum);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // STOCKFISH_SIMD_H_INCLUDED
|
||||
@@ -212,21 +212,11 @@ NetworkOutput
|
||||
Network<Arch, Transformer>::evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
||||
@@ -284,20 +274,11 @@ NnueEvalTrace
|
||||
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
|
||||
AccumulatorStack& accumulatorStack,
|
||||
AccumulatorCaches::Cache<FTDimensions>* cache) const {
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType
|
||||
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
|
||||
+ alignment / sizeof(TransformedFeatureType)];
|
||||
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment) TransformedFeatureType
|
||||
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
|
||||
#endif
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
|
||||
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
|
||||
@@ -452,12 +433,10 @@ bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
|
||||
|
||||
// Explicit template instantiations
|
||||
|
||||
template class Network<
|
||||
NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>>;
|
||||
template class Network<NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig>>;
|
||||
|
||||
template class Network<
|
||||
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>>;
|
||||
template class Network<NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall>>;
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "../types.h"
|
||||
#include "nnue_accumulator.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_misc.h"
|
||||
|
||||
@@ -110,13 +111,11 @@ class Network {
|
||||
};
|
||||
|
||||
// Definitions of the network types
|
||||
using SmallFeatureTransformer =
|
||||
FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>;
|
||||
using SmallFeatureTransformer = FeatureTransformer<TransformedFeatureDimensionsSmall>;
|
||||
using SmallNetworkArchitecture =
|
||||
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
|
||||
|
||||
using BigFeatureTransformer =
|
||||
FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>;
|
||||
using BigFeatureTransformer = FeatureTransformer<TransformedFeatureDimensionsBig>;
|
||||
using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
|
||||
|
||||
using NetworkBig = Network<BigNetworkArchitecture, BigFeatureTransformer>;
|
||||
|
||||
@@ -19,49 +19,43 @@
|
||||
#include "nnue_accumulator.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../bitboard.h"
|
||||
#include "../misc.h"
|
||||
#include "../position.h"
|
||||
#include "../types.h"
|
||||
#include "network.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_feature_transformer.h" // IWYU pragma: keep
|
||||
#include "simd.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#define sf_assume(cond) \
|
||||
do \
|
||||
{ \
|
||||
if (!(cond)) \
|
||||
__builtin_unreachable(); \
|
||||
} while (0)
|
||||
#else
|
||||
// do nothing for other compilers
|
||||
#define sf_assume(cond)
|
||||
#endif
|
||||
using namespace SIMD;
|
||||
|
||||
namespace {
|
||||
|
||||
template<Color Perspective,
|
||||
IncUpdateDirection Direction = FORWARD,
|
||||
IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_incremental(
|
||||
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed);
|
||||
template<Color Perspective, IndexType TransformedFeatureDimensions>
|
||||
void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& middle_state,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed);
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_refresh_cache(
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache);
|
||||
template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
|
||||
void update_accumulator_incremental(
|
||||
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed);
|
||||
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache);
|
||||
|
||||
}
|
||||
|
||||
@@ -71,63 +65,43 @@ void AccumulatorState::reset(const DirtyPiece& dp) noexcept {
|
||||
accumulatorSmall.computed.fill(false);
|
||||
}
|
||||
|
||||
const AccumulatorState& AccumulatorStack::latest() const noexcept {
|
||||
return m_accumulators[m_current_idx - 1];
|
||||
}
|
||||
const AccumulatorState& AccumulatorStack::latest() const noexcept { return accumulators[size - 1]; }
|
||||
|
||||
AccumulatorState& AccumulatorStack::mut_latest() noexcept {
|
||||
return m_accumulators[m_current_idx - 1];
|
||||
}
|
||||
AccumulatorState& AccumulatorStack::mut_latest() noexcept { return accumulators[size - 1]; }
|
||||
|
||||
void AccumulatorStack::reset(const Position& rootPos,
|
||||
const Networks& networks,
|
||||
AccumulatorCaches& caches) noexcept {
|
||||
m_current_idx = 1;
|
||||
|
||||
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsBig,
|
||||
&AccumulatorState::accumulatorBig>(
|
||||
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
|
||||
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsBig,
|
||||
&AccumulatorState::accumulatorBig>(
|
||||
*networks.big.featureTransformer, rootPos, m_accumulators[0], caches.big);
|
||||
|
||||
update_accumulator_refresh_cache<WHITE, TransformedFeatureDimensionsSmall,
|
||||
&AccumulatorState::accumulatorSmall>(
|
||||
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
|
||||
update_accumulator_refresh_cache<BLACK, TransformedFeatureDimensionsSmall,
|
||||
&AccumulatorState::accumulatorSmall>(
|
||||
*networks.small.featureTransformer, rootPos, m_accumulators[0], caches.small);
|
||||
void AccumulatorStack::reset() noexcept {
|
||||
accumulators[0].reset({});
|
||||
size = 1;
|
||||
}
|
||||
|
||||
void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {
|
||||
assert(m_current_idx + 1 < m_accumulators.size());
|
||||
m_accumulators[m_current_idx].reset(dirtyPiece);
|
||||
m_current_idx++;
|
||||
assert(size + 1 < accumulators.size());
|
||||
accumulators[size].reset(dirtyPiece);
|
||||
size++;
|
||||
}
|
||||
|
||||
void AccumulatorStack::pop() noexcept {
|
||||
assert(m_current_idx > 1);
|
||||
m_current_idx--;
|
||||
assert(size > 1);
|
||||
size--;
|
||||
}
|
||||
|
||||
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
template<IndexType Dimensions>
|
||||
void AccumulatorStack::evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
|
||||
evaluate_side<WHITE>(pos, featureTransformer, cache);
|
||||
evaluate_side<BLACK>(pos, featureTransformer, cache);
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void AccumulatorStack::evaluate_side(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void AccumulatorStack::evaluate_side(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
||||
|
||||
const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions, accPtr>();
|
||||
const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions>();
|
||||
|
||||
if ((m_accumulators[last_usable_accum].*accPtr).computed[Perspective])
|
||||
if ((accumulators[last_usable_accum].template acc<Dimensions>()).computed[Perspective])
|
||||
forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
|
||||
|
||||
else
|
||||
@@ -139,91 +113,202 @@ void AccumulatorStack::evaluate_side(
|
||||
|
||||
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
|
||||
// state just before a change that requires full refresh.
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {
|
||||
|
||||
for (std::size_t curr_idx = m_current_idx - 1; curr_idx > 0; curr_idx--)
|
||||
for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)
|
||||
{
|
||||
if ((m_accumulators[curr_idx].*accPtr).computed[Perspective])
|
||||
if ((accumulators[curr_idx].template acc<Dimensions>()).computed[Perspective])
|
||||
return curr_idx;
|
||||
|
||||
if (FeatureSet::requires_refresh(m_accumulators[curr_idx].dirtyPiece, Perspective))
|
||||
if (FeatureSet::requires_refresh(accumulators[curr_idx].dirtyPiece, Perspective))
|
||||
return curr_idx;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void AccumulatorStack::forward_update_incremental(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t begin) noexcept {
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const std::size_t begin) noexcept {
|
||||
|
||||
assert(begin < m_accumulators.size());
|
||||
assert((m_accumulators[begin].*accPtr).computed[Perspective]);
|
||||
assert(begin < accumulators.size());
|
||||
assert((accumulators[begin].acc<Dimensions>()).computed[Perspective]);
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
for (std::size_t next = begin + 1; next < m_current_idx; next++)
|
||||
update_accumulator_incremental<Perspective>(featureTransformer, ksq, m_accumulators[next],
|
||||
m_accumulators[next - 1]);
|
||||
for (std::size_t next = begin + 1; next < size; next++)
|
||||
{
|
||||
if (next + 1 < size)
|
||||
{
|
||||
DirtyPiece& dp1 = accumulators[next].dirtyPiece;
|
||||
DirtyPiece& dp2 = accumulators[next + 1].dirtyPiece;
|
||||
|
||||
assert((latest().*accPtr).computed[Perspective]);
|
||||
if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)
|
||||
{
|
||||
const Square captureSq = dp1.to;
|
||||
dp1.to = dp2.remove_sq = SQ_NONE;
|
||||
double_inc_update<Perspective>(featureTransformer, ksq, accumulators[next],
|
||||
accumulators[next + 1], accumulators[next - 1]);
|
||||
dp1.to = dp2.remove_sq = captureSq;
|
||||
|
||||
next++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
update_accumulator_incremental<Perspective, true>(
|
||||
featureTransformer, ksq, accumulators[next], accumulators[next - 1]);
|
||||
}
|
||||
|
||||
assert((latest().acc<Dimensions>()).computed[Perspective]);
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void AccumulatorStack::backward_update_incremental(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t end) noexcept {
|
||||
const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const std::size_t end) noexcept {
|
||||
|
||||
assert(end < m_accumulators.size());
|
||||
assert(end < m_current_idx);
|
||||
assert((latest().*accPtr).computed[Perspective]);
|
||||
assert(end < accumulators.size());
|
||||
assert(end < size);
|
||||
assert((latest().acc<Dimensions>()).computed[Perspective]);
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
|
||||
for (std::size_t next = m_current_idx - 2; next >= end; next--)
|
||||
update_accumulator_incremental<Perspective, BACKWARDS>(
|
||||
featureTransformer, ksq, m_accumulators[next], m_accumulators[next + 1]);
|
||||
for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)
|
||||
update_accumulator_incremental<Perspective, false>(
|
||||
featureTransformer, ksq, accumulators[next], accumulators[next + 1]);
|
||||
|
||||
assert((m_accumulators[end].*accPtr).computed[Perspective]);
|
||||
assert((accumulators[end].acc<Dimensions>()).computed[Perspective]);
|
||||
}
|
||||
|
||||
// Explicit template instantiations
|
||||
template void
|
||||
AccumulatorStack::evaluate<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsBig, &AccumulatorState::accumulatorBig>&
|
||||
featureTransformer,
|
||||
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,
|
||||
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
|
||||
template void
|
||||
AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsSmall, &AccumulatorState::accumulatorSmall>&
|
||||
featureTransformer,
|
||||
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(
|
||||
const Position& pos,
|
||||
const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,
|
||||
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
template<Color Perspective,
|
||||
IncUpdateDirection Direction,
|
||||
IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
template<typename VectorWrapper,
|
||||
IndexType Width,
|
||||
UpdateOperation... ops,
|
||||
typename ElementType,
|
||||
typename... Ts,
|
||||
std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>
|
||||
void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {
|
||||
constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);
|
||||
|
||||
auto* vecIn = reinterpret_cast<const typename VectorWrapper::type*>(in);
|
||||
auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);
|
||||
|
||||
for (IndexType i = 0; i < size; ++i)
|
||||
vecOut[i] = fused<VectorWrapper, ops...>(
|
||||
vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
struct AccumulatorUpdateContext {
|
||||
const FeatureTransformer<Dimensions>& featureTransformer;
|
||||
const AccumulatorState& from;
|
||||
AccumulatorState& to;
|
||||
|
||||
AccumulatorUpdateContext(const FeatureTransformer<Dimensions>& ft,
|
||||
const AccumulatorState& accF,
|
||||
AccumulatorState& accT) noexcept :
|
||||
featureTransformer{ft},
|
||||
from{accF},
|
||||
to{accT} {}
|
||||
|
||||
template<UpdateOperation... ops,
|
||||
typename... Ts,
|
||||
std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>
|
||||
void apply(const Ts... indices) {
|
||||
auto to_weight_vector = [&](const IndexType index) {
|
||||
return &featureTransformer.weights[index * Dimensions];
|
||||
};
|
||||
|
||||
auto to_psqt_weight_vector = [&](const IndexType index) {
|
||||
return &featureTransformer.psqtWeights[index * PSQTBuckets];
|
||||
};
|
||||
|
||||
fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(
|
||||
(from.acc<Dimensions>()).accumulation[Perspective],
|
||||
(to.acc<Dimensions>()).accumulation[Perspective], to_weight_vector(indices)...);
|
||||
|
||||
fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(
|
||||
(from.acc<Dimensions>()).psqtAccumulation[Perspective],
|
||||
(to.acc<Dimensions>()).psqtAccumulation[Perspective], to_psqt_weight_vector(indices)...);
|
||||
}
|
||||
};
|
||||
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
auto make_accumulator_update_context(const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const AccumulatorState& accumulatorFrom,
|
||||
AccumulatorState& accumulatorTo) noexcept {
|
||||
return AccumulatorUpdateContext<Perspective, Dimensions>{featureTransformer, accumulatorFrom,
|
||||
accumulatorTo};
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType TransformedFeatureDimensions>
|
||||
void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& middle_state,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed) {
|
||||
|
||||
assert(computed.acc<TransformedFeatureDimensions>().computed[Perspective]);
|
||||
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
|
||||
assert(!target_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
|
||||
|
||||
FeatureSet::IndexList removed, added;
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, middle_state.dirtyPiece, removed, added);
|
||||
// you can't capture a piece that was just involved in castling since the rook ends up
|
||||
// in a square that the king passed
|
||||
assert(added.size() < 2);
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed, added);
|
||||
|
||||
assert(added.size() == 1);
|
||||
assert(removed.size() == 2 || removed.size() == 3);
|
||||
|
||||
// Workaround compiler warning for uninitialized variables, replicated on
|
||||
// profile builds on windows with gcc 14.2.0.
|
||||
// TODO remove once unneeded
|
||||
sf_assume(added.size() == 1);
|
||||
sf_assume(removed.size() == 2 || removed.size() == 3);
|
||||
|
||||
auto updateContext =
|
||||
make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
|
||||
|
||||
if (removed.size() == 2)
|
||||
{
|
||||
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],
|
||||
removed[2]);
|
||||
}
|
||||
|
||||
target_state.acc<TransformedFeatureDimensions>().computed[Perspective] = true;
|
||||
}
|
||||
|
||||
template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
|
||||
void update_accumulator_incremental(
|
||||
const FeatureTransformer<TransformedFeatureDimensions, accPtr>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed) {
|
||||
[[maybe_unused]] constexpr bool Forward = Direction == FORWARD;
|
||||
[[maybe_unused]] constexpr bool Backwards = Direction == BACKWARDS;
|
||||
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
||||
const Square ksq,
|
||||
AccumulatorState& target_state,
|
||||
const AccumulatorState& computed) {
|
||||
|
||||
assert(Forward != Backwards);
|
||||
|
||||
assert((computed.*accPtr).computed[Perspective]);
|
||||
assert(!(target_state.*accPtr).computed[Perspective]);
|
||||
assert((computed.acc<TransformedFeatureDimensions>()).computed[Perspective]);
|
||||
assert(!(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective]);
|
||||
|
||||
// The size must be enough to contain the largest possible update.
|
||||
// That might depend on the feature set and generally relies on the
|
||||
@@ -238,188 +323,52 @@ void update_accumulator_incremental(
|
||||
else
|
||||
FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);
|
||||
|
||||
if (removed.size() == 0 && added.size() == 0)
|
||||
assert(added.size() == 1 || added.size() == 2);
|
||||
assert(removed.size() == 1 || removed.size() == 2);
|
||||
assert((Forward && added.size() <= removed.size())
|
||||
|| (!Forward && added.size() >= removed.size()));
|
||||
|
||||
// Workaround compiler warning for uninitialized variables, replicated on
|
||||
// profile builds on windows with gcc 14.2.0.
|
||||
// TODO remove once unneeded
|
||||
sf_assume(added.size() == 1 || added.size() == 2);
|
||||
sf_assume(removed.size() == 1 || removed.size() == 2);
|
||||
|
||||
auto updateContext =
|
||||
make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
|
||||
|
||||
if ((Forward && removed.size() == 1) || (!Forward && added.size() == 1))
|
||||
{
|
||||
std::memcpy((target_state.*accPtr).accumulation[Perspective],
|
||||
(computed.*accPtr).accumulation[Perspective],
|
||||
TransformedFeatureDimensions * sizeof(BiasType));
|
||||
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
|
||||
(computed.*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
assert(added.size() == 1 && removed.size() == 1);
|
||||
updateContext.template apply<Add, Sub>(added[0], removed[0]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
assert(removed.size() == 2);
|
||||
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
|
||||
}
|
||||
else if (!Forward && removed.size() == 1)
|
||||
{
|
||||
assert(added.size() == 2);
|
||||
updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 1 || added.size() == 2);
|
||||
assert(removed.size() == 1 || removed.size() == 2);
|
||||
|
||||
if (Forward)
|
||||
assert(added.size() <= removed.size());
|
||||
else
|
||||
assert(removed.size() <= added.size());
|
||||
|
||||
// Workaround compiler warning for uninitialized variables, replicated on
|
||||
// profile builds on windows with gcc 14.2.0.
|
||||
// TODO remove once unneeded
|
||||
sf_assume(added.size() == 1 || added.size() == 2);
|
||||
sf_assume(removed.size() == 1 || removed.size() == 2);
|
||||
|
||||
#ifdef VECTOR
|
||||
auto* accIn =
|
||||
reinterpret_cast<const vec_t*>(&(computed.*accPtr).accumulation[Perspective][0]);
|
||||
auto* accOut =
|
||||
reinterpret_cast<vec_t*>(&(target_state.*accPtr).accumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetA0 = TransformedFeatureDimensions * added[0];
|
||||
auto* columnA0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA0]);
|
||||
const IndexType offsetR0 = TransformedFeatureDimensions * removed[0];
|
||||
auto* columnR0 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1) || (Backwards && added.size() == 1))
|
||||
{
|
||||
assert(added.size() == 1 && removed.size() == 1);
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_sub_16(accIn[i], columnR0[i]), columnA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
assert(removed.size() == 2);
|
||||
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_sub_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
assert(added.size() == 2);
|
||||
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(vec_add_16(accIn[i], columnA0[i]),
|
||||
vec_sub_16(columnA1[i], columnR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(added.size() == 2 && removed.size() == 2);
|
||||
const IndexType offsetA1 = TransformedFeatureDimensions * added[1];
|
||||
auto* columnA1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA1]);
|
||||
const IndexType offsetR1 = TransformedFeatureDimensions * removed[1];
|
||||
auto* columnR1 = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR1]);
|
||||
|
||||
for (IndexType i = 0;
|
||||
i < TransformedFeatureDimensions * sizeof(WeightType) / sizeof(vec_t); ++i)
|
||||
accOut[i] = vec_add_16(accIn[i], vec_sub_16(vec_add_16(columnA0[i], columnA1[i]),
|
||||
vec_add_16(columnR0[i], columnR1[i])));
|
||||
}
|
||||
|
||||
auto* accPsqtIn =
|
||||
reinterpret_cast<const psqt_vec_t*>(&(computed.*accPtr).psqtAccumulation[Perspective][0]);
|
||||
auto* accPsqtOut =
|
||||
reinterpret_cast<psqt_vec_t*>(&(target_state.*accPtr).psqtAccumulation[Perspective][0]);
|
||||
|
||||
const IndexType offsetPsqtA0 = PSQTBuckets * added[0];
|
||||
auto* columnPsqtA0 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA0]);
|
||||
const IndexType offsetPsqtR0 = PSQTBuckets * removed[0];
|
||||
auto* columnPsqtR0 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR0]);
|
||||
|
||||
if ((Forward && removed.size() == 1)
|
||||
|| (Backwards && added.size() == 1)) // added.size() == removed.size() == 1
|
||||
{
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] =
|
||||
vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[i], columnPsqtR0[i]), columnPsqtA0[i]);
|
||||
}
|
||||
else if (Forward && added.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i]));
|
||||
}
|
||||
else if (Backwards && removed.size() == 1)
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(vec_add_psqt_32(accPsqtIn[i], columnPsqtA0[i]),
|
||||
vec_sub_psqt_32(columnPsqtA1[i], columnPsqtR0[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
const IndexType offsetPsqtA1 = PSQTBuckets * added[1];
|
||||
auto* columnPsqtA1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtA1]);
|
||||
const IndexType offsetPsqtR1 = PSQTBuckets * removed[1];
|
||||
auto* columnPsqtR1 =
|
||||
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offsetPsqtR1]);
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets * sizeof(PSQTWeightType) / sizeof(psqt_vec_t);
|
||||
++i)
|
||||
accPsqtOut[i] = vec_add_psqt_32(
|
||||
accPsqtIn[i], vec_sub_psqt_32(vec_add_psqt_32(columnPsqtA0[i], columnPsqtA1[i]),
|
||||
vec_add_psqt_32(columnPsqtR0[i], columnPsqtR1[i])));
|
||||
}
|
||||
#else
|
||||
std::memcpy((target_state.*accPtr).accumulation[Perspective],
|
||||
(computed.*accPtr).accumulation[Perspective],
|
||||
TransformedFeatureDimensions * sizeof(BiasType));
|
||||
std::memcpy((target_state.*accPtr).psqtAccumulation[Perspective],
|
||||
(computed.*accPtr).psqtAccumulation[Perspective],
|
||||
PSQTBuckets * sizeof(PSQTWeightType));
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed)
|
||||
{
|
||||
const IndexType offset = TransformedFeatureDimensions * index;
|
||||
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
|
||||
(target_state.*accPtr).accumulation[Perspective][i] -=
|
||||
featureTransformer.weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(target_state.*accPtr).psqtAccumulation[Perspective][i] -=
|
||||
featureTransformer.psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added)
|
||||
{
|
||||
const IndexType offset = TransformedFeatureDimensions * index;
|
||||
for (IndexType i = 0; i < TransformedFeatureDimensions; ++i)
|
||||
(target_state.*accPtr).accumulation[Perspective][i] +=
|
||||
featureTransformer.weights[offset + i];
|
||||
|
||||
for (std::size_t i = 0; i < PSQTBuckets; ++i)
|
||||
(target_state.*accPtr).psqtAccumulation[Perspective][i] +=
|
||||
featureTransformer.psqtWeights[index * PSQTBuckets + i];
|
||||
}
|
||||
#endif
|
||||
assert(added.size() == 2 && removed.size() == 2);
|
||||
updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],
|
||||
removed[1]);
|
||||
}
|
||||
|
||||
(target_state.*accPtr).computed[Perspective] = true;
|
||||
(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective] = true;
|
||||
}
|
||||
|
||||
template<Color Perspective, IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void update_accumulator_refresh_cache(
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) {
|
||||
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions>;
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const Position& pos,
|
||||
AccumulatorState& accumulatorState,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) {
|
||||
|
||||
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
|
||||
|
||||
const Square ksq = pos.square<KING>(Perspective);
|
||||
auto& entry = cache[ksq][Perspective];
|
||||
@@ -448,12 +397,10 @@ void update_accumulator_refresh_cache(
|
||||
}
|
||||
}
|
||||
|
||||
auto& accumulator = accumulatorState.*accPtr;
|
||||
auto& accumulator = accumulatorState.acc<Dimensions>();
|
||||
accumulator.computed[Perspective] = true;
|
||||
|
||||
#ifdef VECTOR
|
||||
const bool combineLast3 =
|
||||
std::abs((int) removed.size() - (int) added.size()) == 1 && removed.size() + added.size() > 2;
|
||||
vec_t acc[Tiling::NumRegs];
|
||||
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
||||
|
||||
@@ -466,8 +413,8 @@ void update_accumulator_refresh_cache(
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = entryTile[k];
|
||||
|
||||
std::size_t i = 0;
|
||||
for (; i < std::min(removed.size(), added.size()) - combineLast3; ++i)
|
||||
IndexType i = 0;
|
||||
for (; i < std::min(removed.size(), added.size()); ++i)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
|
||||
@@ -477,60 +424,25 @@ void update_accumulator_refresh_cache(
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
|
||||
acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
|
||||
}
|
||||
if (combineLast3)
|
||||
for (; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType indexR = removed[i];
|
||||
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
|
||||
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
|
||||
IndexType indexA = added[i];
|
||||
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
|
||||
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
|
||||
if (removed.size() > added.size())
|
||||
{
|
||||
IndexType indexR2 = removed[i + 1];
|
||||
const IndexType offsetR2 = Dimensions * indexR2 + j * Tiling::TileHeight;
|
||||
auto* columnR2 =
|
||||
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(vec_add_16(acc[k], columnA[k]),
|
||||
vec_add_16(columnR[k], columnR2[k]));
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexType indexA2 = added[i + 1];
|
||||
const IndexType offsetA2 = Dimensions * indexA2 + j * Tiling::TileHeight;
|
||||
auto* columnA2 =
|
||||
reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA2]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]),
|
||||
vec_add_16(columnA[k], columnA2[k]));
|
||||
}
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
else
|
||||
for (; i < added.size(); ++i)
|
||||
{
|
||||
for (; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
for (; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
|
||||
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
||||
@@ -546,10 +458,10 @@ void update_accumulator_refresh_cache(
|
||||
auto* entryTilePsqt =
|
||||
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = entryTilePsqt[k];
|
||||
|
||||
for (std::size_t i = 0; i < removed.size(); ++i)
|
||||
for (IndexType i = 0; i < removed.size(); ++i)
|
||||
{
|
||||
IndexType index = removed[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
@@ -559,7 +471,7 @@ void update_accumulator_refresh_cache(
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
for (std::size_t i = 0; i < added.size(); ++i)
|
||||
for (IndexType i = 0; i < added.size(); ++i)
|
||||
{
|
||||
IndexType index = added[i];
|
||||
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
||||
@@ -570,9 +482,9 @@ void update_accumulator_refresh_cache(
|
||||
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
||||
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
||||
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
||||
}
|
||||
|
||||
|
||||
@@ -37,19 +37,10 @@ class Position;
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
struct Networks;
|
||||
|
||||
template<IndexType Size>
|
||||
struct alignas(CacheLineSize) Accumulator;
|
||||
|
||||
struct AccumulatorState;
|
||||
|
||||
template<IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
template<IndexType TransformedFeatureDimensions>
|
||||
class FeatureTransformer;
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
@@ -121,6 +112,30 @@ struct AccumulatorState {
|
||||
Accumulator<TransformedFeatureDimensionsSmall> accumulatorSmall;
|
||||
DirtyPiece dirtyPiece;
|
||||
|
||||
template<IndexType Size>
|
||||
auto& acc() noexcept {
|
||||
static_assert(Size == TransformedFeatureDimensionsBig
|
||||
|| Size == TransformedFeatureDimensionsSmall,
|
||||
"Invalid size for accumulator");
|
||||
|
||||
if constexpr (Size == TransformedFeatureDimensionsBig)
|
||||
return accumulatorBig;
|
||||
else if constexpr (Size == TransformedFeatureDimensionsSmall)
|
||||
return accumulatorSmall;
|
||||
}
|
||||
|
||||
template<IndexType Size>
|
||||
const auto& acc() const noexcept {
|
||||
static_assert(Size == TransformedFeatureDimensionsBig
|
||||
|| Size == TransformedFeatureDimensionsSmall,
|
||||
"Invalid size for accumulator");
|
||||
|
||||
if constexpr (Size == TransformedFeatureDimensionsBig)
|
||||
return accumulatorBig;
|
||||
else if constexpr (Size == TransformedFeatureDimensionsSmall)
|
||||
return accumulatorSmall;
|
||||
}
|
||||
|
||||
void reset(const DirtyPiece& dp) noexcept;
|
||||
};
|
||||
|
||||
@@ -128,54 +143,43 @@ struct AccumulatorState {
|
||||
class AccumulatorStack {
|
||||
public:
|
||||
AccumulatorStack() :
|
||||
m_accumulators(MAX_PLY + 1),
|
||||
m_current_idx{} {}
|
||||
accumulators(MAX_PLY + 1),
|
||||
size{1} {}
|
||||
|
||||
[[nodiscard]] const AccumulatorState& latest() const noexcept;
|
||||
|
||||
void
|
||||
reset(const Position& rootPos, const Networks& networks, AccumulatorCaches& caches) noexcept;
|
||||
void reset() noexcept;
|
||||
void push(const DirtyPiece& dirtyPiece) noexcept;
|
||||
void pop() noexcept;
|
||||
|
||||
template<IndexType Dimensions, Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
template<IndexType Dimensions>
|
||||
void evaluate(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
|
||||
private:
|
||||
[[nodiscard]] AccumulatorState& mut_latest() noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void evaluate_side(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void evaluate_side(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
[[nodiscard]] std::size_t find_last_usable_accumulator() const noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void
|
||||
forward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t begin) noexcept;
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void forward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const std::size_t begin) noexcept;
|
||||
|
||||
template<Color Perspective,
|
||||
IndexType Dimensions,
|
||||
Accumulator<Dimensions> AccumulatorState::*accPtr>
|
||||
void
|
||||
backward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions, accPtr>& featureTransformer,
|
||||
const std::size_t end) noexcept;
|
||||
template<Color Perspective, IndexType Dimensions>
|
||||
void backward_update_incremental(const Position& pos,
|
||||
const FeatureTransformer<Dimensions>& featureTransformer,
|
||||
const std::size_t end) noexcept;
|
||||
|
||||
std::vector<AccumulatorState> m_accumulators;
|
||||
std::size_t m_current_idx;
|
||||
std::vector<AccumulatorState> accumulators;
|
||||
std::size_t size;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -49,6 +49,12 @@ constexpr int L3Small = 32;
|
||||
constexpr IndexType PSQTBuckets = 8;
|
||||
constexpr IndexType LayerStacks = 8;
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
static_assert(PSQTBuckets % 8 == 0,
|
||||
"Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
|
||||
|
||||
template<IndexType L1, int L2, int L3>
|
||||
struct NetworkArchitecture {
|
||||
static constexpr IndexType TransformedFeatureDimensions = L1;
|
||||
|
||||
@@ -48,6 +48,11 @@
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
// Version of the evaluation file
|
||||
constexpr std::uint32_t Version = 0x7AF32F20u;
|
||||
|
||||
@@ -76,7 +81,6 @@ constexpr std::size_t MaxSimdWidth = 32;
|
||||
|
||||
// Type of input feature after conversion
|
||||
using TransformedFeatureType = std::uint8_t;
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
// Round n up to be a multiple of base
|
||||
template<typename IntType>
|
||||
@@ -279,11 +283,6 @@ inline void write_leb_128(std::ostream& stream, const IntType* values, std::size
|
||||
flush();
|
||||
}
|
||||
|
||||
enum IncUpdateDirection {
|
||||
FORWARD,
|
||||
BACKWARDS
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
||||
|
||||
@@ -31,118 +31,10 @@
|
||||
#include "nnue_accumulator.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "nnue_common.h"
|
||||
#include "simd.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
using PSQTWeightType = std::int32_t;
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define VECTOR
|
||||
|
||||
static_assert(PSQTBuckets % 8 == 0,
|
||||
"Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
|
||||
|
||||
#ifdef USE_AVX512
|
||||
using vec_t = __m512i;
|
||||
using psqt_vec_t = __m256i;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a, b) _mm512_store_si512(a, b)
|
||||
#define vec_add_16(a, b) _mm512_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm512_setzero_epi32()
|
||||
#define vec_set_16(a) _mm512_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm512_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm512_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 64
|
||||
|
||||
#elif USE_AVX2
|
||||
using vec_t = __m256i;
|
||||
using psqt_vec_t = __m256i;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_16(a, b) _mm256_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm256_setzero_si256()
|
||||
#define vec_set_16(a) _mm256_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm256_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm256_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 32
|
||||
|
||||
#elif USE_SSE2
|
||||
using vec_t = __m128i;
|
||||
using psqt_vec_t = __m128i;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) _mm_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm_setzero_si128()
|
||||
#define vec_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm_slli_epi16(a, b)
|
||||
#define vec_packus_16(a, b) _mm_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm_setzero_si128()
|
||||
#define NumRegistersSIMD (Is64Bit ? 16 : 8)
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#elif USE_NEON
|
||||
using vec_t = int16x8_t;
|
||||
using psqt_vec_t = int32x4_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) vaddq_s16(a, b)
|
||||
#define vec_sub_16(a, b) vsubq_s16(a, b)
|
||||
#define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
|
||||
#define vec_zero() \
|
||||
vec_t { 0 }
|
||||
#define vec_set_16(a) vdupq_n_s16(a)
|
||||
#define vec_max_16(a, b) vmaxq_s16(a, b)
|
||||
#define vec_min_16(a, b) vminq_s16(a, b)
|
||||
#define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
|
||||
#define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) vaddq_s32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) vsubq_s32(a, b)
|
||||
#define vec_zero_psqt() \
|
||||
psqt_vec_t { 0 }
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
||||
#endif
|
||||
|
||||
// Returns the inverse of a permutation
|
||||
template<std::size_t Len>
|
||||
constexpr std::array<std::size_t, Len>
|
||||
@@ -184,64 +76,8 @@ void permute(T (&data)[N], const std::array<std::size_t, OrderSize>& order) {
|
||||
}
|
||||
}
|
||||
|
||||
// Compute optimal SIMD register count for feature transformer accumulation.
|
||||
template<IndexType TransformedFeatureWidth, IndexType HalfDimensions>
|
||||
class SIMDTiling {
|
||||
#ifdef VECTOR
|
||||
// We use __m* types as template arguments, which causes GCC to emit warnings
|
||||
// about losing some attribute information. This is irrelevant to us as we
|
||||
// only take their size, so the following pragma are harmless.
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
template<typename SIMDRegisterType, typename LaneType, int NumLanes, int MaxRegisters>
|
||||
static constexpr int BestRegisterCount() {
|
||||
constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType);
|
||||
constexpr std::size_t LaneSize = sizeof(LaneType);
|
||||
|
||||
static_assert(RegisterSize >= LaneSize);
|
||||
static_assert(MaxRegisters <= NumRegistersSIMD);
|
||||
static_assert(MaxRegisters > 0);
|
||||
static_assert(NumRegistersSIMD > 0);
|
||||
static_assert(RegisterSize % LaneSize == 0);
|
||||
static_assert((NumLanes * LaneSize) % RegisterSize == 0);
|
||||
|
||||
const int ideal = (NumLanes * LaneSize) / RegisterSize;
|
||||
if (ideal <= MaxRegisters)
|
||||
return ideal;
|
||||
|
||||
// Look for the largest divisor of the ideal register count that is smaller than MaxRegisters
|
||||
for (int divisor = MaxRegisters; divisor > 1; --divisor)
|
||||
if (ideal % divisor == 0)
|
||||
return divisor;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
public:
|
||||
static constexpr int NumRegs =
|
||||
BestRegisterCount<vec_t, WeightType, TransformedFeatureWidth, NumRegistersSIMD>();
|
||||
static constexpr int NumPsqtRegs =
|
||||
BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
||||
|
||||
static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
|
||||
static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
|
||||
|
||||
static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
|
||||
static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
// Input feature converter
|
||||
template<IndexType TransformedFeatureDimensions,
|
||||
Accumulator<TransformedFeatureDimensions> AccumulatorState::*accPtr>
|
||||
template<IndexType TransformedFeatureDimensions>
|
||||
class FeatureTransformer {
|
||||
|
||||
// Number of output dimensions for one side
|
||||
@@ -342,16 +178,18 @@ class FeatureTransformer {
|
||||
OutputType* output,
|
||||
int bucket) const {
|
||||
|
||||
using namespace SIMD;
|
||||
|
||||
accumulatorStack.evaluate(pos, *this, *cache);
|
||||
const auto& accumulatorState = accumulatorStack.latest();
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
const auto& psqtAccumulation = (accumulatorState.*accPtr).psqtAccumulation;
|
||||
const auto& psqtAccumulation = (accumulatorState.acc<HalfDimensions>()).psqtAccumulation;
|
||||
const auto psqt =
|
||||
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
|
||||
/ 2;
|
||||
|
||||
const auto& accumulation = (accumulatorState.*accPtr).accumulation;
|
||||
const auto& accumulation = (accumulatorState.acc<HalfDimensions>()).accumulation;
|
||||
|
||||
for (IndexType p = 0; p < 2; ++p)
|
||||
{
|
||||
|
||||
@@ -121,7 +121,6 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
};
|
||||
|
||||
AccumulatorStack accumulators;
|
||||
accumulators.reset(pos, networks, caches);
|
||||
|
||||
// We estimate the value of each piece by doing a differential evaluation from
|
||||
// the current base eval, simulating the removal of the piece from its square.
|
||||
@@ -140,7 +139,7 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
{
|
||||
pos.remove_piece(sq);
|
||||
|
||||
accumulators.reset(pos, networks, caches);
|
||||
accumulators.reset();
|
||||
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, &caches.big);
|
||||
Value eval = psqt + positional;
|
||||
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
||||
@@ -157,7 +156,7 @@ trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::Accumulat
|
||||
ss << board[row] << '\n';
|
||||
ss << '\n';
|
||||
|
||||
accumulators.reset(pos, networks, caches);
|
||||
accumulators.reset();
|
||||
auto t = networks.big.trace_evaluate(pos, accumulators, &caches.big);
|
||||
|
||||
ss << " NNUE network contributions "
|
||||
|
||||
406
src/nnue/simd.h
Normal file
406
src/nnue/simd.h
Normal file
@@ -0,0 +1,406 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NNUE_SIMD_H_INCLUDED
|
||||
#define NNUE_SIMD_H_INCLUDED
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#include <immintrin.h>
|
||||
|
||||
#elif defined(USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
|
||||
#elif defined(USE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#include "../types.h"
|
||||
#include "nnue_common.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::SIMD {
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define VECTOR
|
||||
|
||||
#ifdef USE_AVX512
|
||||
using vec_t = __m512i;
|
||||
using vec128_t = __m128i;
|
||||
using psqt_vec_t = __m256i;
|
||||
using vec_uint_t = __m512i;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a, b) _mm512_store_si512(a, b)
|
||||
#define vec_add_16(a, b) _mm512_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm512_setzero_epi32()
|
||||
#define vec_set_16(a) _mm512_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm512_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm512_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
|
||||
#ifdef USE_SSSE3
|
||||
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
|
||||
#endif
|
||||
|
||||
#define vec128_zero _mm_setzero_si128()
|
||||
#define vec128_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec128_load(a) _mm_load_si128(a)
|
||||
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
||||
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 64
|
||||
|
||||
#elif USE_AVX2
|
||||
using vec_t = __m256i;
|
||||
using vec128_t = __m128i;
|
||||
using psqt_vec_t = __m256i;
|
||||
using vec_uint_t = __m256i;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_16(a, b) _mm256_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm256_setzero_si256()
|
||||
#define vec_set_16(a) _mm256_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm256_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm256_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
|
||||
// Inverse permuted at load time
|
||||
#define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) _mm256_load_si256(a)
|
||||
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
||||
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm256_setzero_si256()
|
||||
|
||||
#ifdef USE_SSSE3
|
||||
#if defined(USE_VNNI) && !defined(USE_AVXVNNI)
|
||||
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
|
||||
#else
|
||||
#define vec_nnz(a) \
|
||||
_mm256_movemask_ps( \
|
||||
_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define vec128_zero _mm_setzero_si128()
|
||||
#define vec128_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec128_load(a) _mm_load_si128(a)
|
||||
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
||||
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
||||
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 32
|
||||
|
||||
#elif USE_SSE2
|
||||
using vec_t = __m128i;
|
||||
using vec128_t = __m128i;
|
||||
using psqt_vec_t = __m128i;
|
||||
using vec_uint_t = __m128i;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) _mm_add_epi16(a, b)
|
||||
#define vec_sub_16(a, b) _mm_sub_epi16(a, b)
|
||||
#define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
|
||||
#define vec_zero() _mm_setzero_si128()
|
||||
#define vec_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec_max_16(a, b) _mm_max_epi16(a, b)
|
||||
#define vec_min_16(a, b) _mm_min_epi16(a, b)
|
||||
#define vec_slli_16(a, b) _mm_slli_epi16(a, b)
|
||||
#define vec_packus_16(a, b) _mm_packus_epi16(a, b)
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b)
|
||||
#define vec_zero_psqt() _mm_setzero_si128()
|
||||
|
||||
#ifdef USE_SSSE3
|
||||
#define vec_nnz(a) \
|
||||
_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
|
||||
#endif
|
||||
|
||||
#define vec128_zero _mm_setzero_si128()
|
||||
#define vec128_set_16(a) _mm_set1_epi16(a)
|
||||
#define vec128_load(a) _mm_load_si128(a)
|
||||
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
||||
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
||||
|
||||
#define NumRegistersSIMD (Is64Bit ? 16 : 8)
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#elif USE_NEON
|
||||
using vec_t = int16x8_t;
|
||||
using psqt_vec_t = int32x4_t;
|
||||
using vec128_t = uint16x8_t;
|
||||
using vec_uint_t = uint32x4_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a, b) *(a) = (b)
|
||||
#define vec_add_16(a, b) vaddq_s16(a, b)
|
||||
#define vec_sub_16(a, b) vsubq_s16(a, b)
|
||||
#define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
|
||||
#define vec_zero() vec_t{0}
|
||||
#define vec_set_16(a) vdupq_n_s16(a)
|
||||
#define vec_max_16(a, b) vmaxq_s16(a, b)
|
||||
#define vec_min_16(a, b) vminq_s16(a, b)
|
||||
#define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
|
||||
#define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
|
||||
#define vec_load_psqt(a) (*(a))
|
||||
#define vec_store_psqt(a, b) *(a) = (b)
|
||||
#define vec_add_psqt_32(a, b) vaddq_s32(a, b)
|
||||
#define vec_sub_psqt_32(a, b) vsubq_s32(a, b)
|
||||
#define vec_zero_psqt() psqt_vec_t{0}
|
||||
|
||||
static constexpr std::uint32_t Mask[4] = {1, 2, 4, 8};
|
||||
#define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask)))
|
||||
#define vec128_zero vdupq_n_u16(0)
|
||||
#define vec128_set_16(a) vdupq_n_u16(a)
|
||||
#define vec128_load(a) vld1q_u16(reinterpret_cast<const std::uint16_t*>(a))
|
||||
#define vec128_storeu(a, b) vst1q_u16(reinterpret_cast<std::uint16_t*>(a), b)
|
||||
#define vec128_add(a, b) vaddq_u16(a, b)
|
||||
|
||||
#define NumRegistersSIMD 16
|
||||
#define MaxChunkSize 16
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
||||
#endif
|
||||
|
||||
struct Vec16Wrapper {
|
||||
#ifdef VECTOR
|
||||
using type = vec_t;
|
||||
static type add(const type& lhs, const type& rhs) { return vec_add_16(lhs, rhs); }
|
||||
static type sub(const type& lhs, const type& rhs) { return vec_sub_16(lhs, rhs); }
|
||||
#else
|
||||
using type = BiasType;
|
||||
static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
|
||||
static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
|
||||
#endif
|
||||
};
|
||||
|
||||
struct Vec32Wrapper {
|
||||
#ifdef VECTOR
|
||||
using type = psqt_vec_t;
|
||||
static type add(const type& lhs, const type& rhs) { return vec_add_psqt_32(lhs, rhs); }
|
||||
static type sub(const type& lhs, const type& rhs) { return vec_sub_psqt_32(lhs, rhs); }
|
||||
#else
|
||||
using type = PSQTWeightType;
|
||||
static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
|
||||
static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
|
||||
#endif
|
||||
};
|
||||
|
||||
enum UpdateOperation {
|
||||
Add,
|
||||
Sub
|
||||
};
|
||||
|
||||
template<typename VecWrapper,
|
||||
UpdateOperation... ops,
|
||||
std::enable_if_t<sizeof...(ops) == 0, bool> = true>
|
||||
typename VecWrapper::type fused(const typename VecWrapper::type& in) {
|
||||
return in;
|
||||
}
|
||||
|
||||
template<typename VecWrapper,
|
||||
UpdateOperation update_op,
|
||||
UpdateOperation... ops,
|
||||
typename T,
|
||||
typename... Ts,
|
||||
std::enable_if_t<is_all_same_v<typename VecWrapper::type, T, Ts...>, bool> = true,
|
||||
std::enable_if_t<sizeof...(ops) == sizeof...(Ts), bool> = true>
|
||||
typename VecWrapper::type
|
||||
fused(const typename VecWrapper::type& in, const T& operand, const Ts&... operands) {
|
||||
switch (update_op)
|
||||
{
|
||||
case Add :
|
||||
return fused<VecWrapper, ops...>(VecWrapper::add(in, operand), operands...);
|
||||
case Sub :
|
||||
return fused<VecWrapper, ops...>(VecWrapper::sub(in, operand), operands...);
|
||||
default :
|
||||
static_assert(update_op == Add || update_op == Sub,
|
||||
"Only Add and Sub are currently supported.");
|
||||
return typename VecWrapper::type();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
|
||||
[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
|
||||
|
||||
#if defined(USE_VNNI)
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
|
||||
[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
return _mm_cvtsi128_si32(sum128) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) {
|
||||
|
||||
#if defined(USE_VNNI)
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
return _mm_cvtsi128_si32(sum) + bias;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) {
|
||||
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(USE_NEON_DOTPROD)
|
||||
|
||||
[[maybe_unused]] static void
|
||||
dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
||||
|
||||
acc = vdotq_s32(acc, a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(USE_NEON)
|
||||
|
||||
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
|
||||
#if USE_NEON >= 8
|
||||
return vaddvq_s32(s);
|
||||
#else
|
||||
return s[0] + s[1] + s[2] + s[3];
|
||||
#endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
|
||||
return neon_m128_reduce_add_epi32(sum) + bias;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if USE_NEON >= 8
|
||||
[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
||||
|
||||
int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b));
|
||||
int16x8_t product1 = vmull_high_s8(a, b);
|
||||
int16x8_t sum = vpaddq_s16(product0, product1);
|
||||
acc = vpadalq_s16(acc, sum);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Compute optimal SIMD register count for feature transformer accumulation.
|
||||
template<IndexType TransformedFeatureWidth, IndexType HalfDimensions, IndexType PSQTBuckets>
|
||||
class SIMDTiling {
|
||||
#ifdef VECTOR
|
||||
// We use __m* types as template arguments, which causes GCC to emit warnings
|
||||
// about losing some attribute information. This is irrelevant to us as we
|
||||
// only take their size, so the following pragma are harmless.
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
template<typename SIMDRegisterType, typename LaneType, int NumLanes, int MaxRegisters>
|
||||
static constexpr int BestRegisterCount() {
|
||||
constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType);
|
||||
constexpr std::size_t LaneSize = sizeof(LaneType);
|
||||
|
||||
static_assert(RegisterSize >= LaneSize);
|
||||
static_assert(MaxRegisters <= NumRegistersSIMD);
|
||||
static_assert(MaxRegisters > 0);
|
||||
static_assert(NumRegistersSIMD > 0);
|
||||
static_assert(RegisterSize % LaneSize == 0);
|
||||
static_assert((NumLanes * LaneSize) % RegisterSize == 0);
|
||||
|
||||
const int ideal = (NumLanes * LaneSize) / RegisterSize;
|
||||
if (ideal <= MaxRegisters)
|
||||
return ideal;
|
||||
|
||||
// Look for the largest divisor of the ideal register count that is smaller than MaxRegisters
|
||||
for (int divisor = MaxRegisters; divisor > 1; --divisor)
|
||||
if (ideal % divisor == 0)
|
||||
return divisor;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
public:
|
||||
static constexpr int NumRegs =
|
||||
BestRegisterCount<vec_t, WeightType, TransformedFeatureWidth, NumRegistersSIMD>();
|
||||
static constexpr int NumPsqtRegs =
|
||||
BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
||||
|
||||
static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
|
||||
static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
|
||||
|
||||
static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
|
||||
static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -54,8 +54,8 @@ namespace {
|
||||
|
||||
constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
|
||||
|
||||
constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
||||
B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING};
|
||||
static constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
||||
B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING};
|
||||
} // namespace
|
||||
|
||||
|
||||
@@ -270,7 +270,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si) {
|
||||
// a) side to move have a pawn threatening epSquare
|
||||
// b) there is an enemy pawn in front of epSquare
|
||||
// c) there is no piece on epSquare or behind epSquare
|
||||
enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN)
|
||||
enpassant = attacks_bb<PAWN>(st->epSquare, ~sideToMove) & pieces(sideToMove, PAWN)
|
||||
&& (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove)))
|
||||
&& !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove))));
|
||||
}
|
||||
@@ -321,7 +321,7 @@ void Position::set_check_info() const {
|
||||
|
||||
Square ksq = square<KING>(~sideToMove);
|
||||
|
||||
st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq);
|
||||
st->checkSquares[PAWN] = attacks_bb<PAWN>(ksq, ~sideToMove);
|
||||
st->checkSquares[KNIGHT] = attacks_bb<KNIGHT>(ksq);
|
||||
st->checkSquares[BISHOP] = attacks_bb<BISHOP>(ksq, pieces());
|
||||
st->checkSquares[ROOK] = attacks_bb<ROOK>(ksq, pieces());
|
||||
@@ -487,8 +487,8 @@ Bitboard Position::attackers_to(Square s, Bitboard occupied) const {
|
||||
|
||||
return (attacks_bb<ROOK>(s, occupied) & pieces(ROOK, QUEEN))
|
||||
| (attacks_bb<BISHOP>(s, occupied) & pieces(BISHOP, QUEEN))
|
||||
| (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN))
|
||||
| (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN))
|
||||
| (attacks_bb<PAWN>(s, BLACK) & pieces(WHITE, PAWN))
|
||||
| (attacks_bb<PAWN>(s, WHITE) & pieces(BLACK, PAWN))
|
||||
| (attacks_bb<KNIGHT>(s) & pieces(KNIGHT)) | (attacks_bb<KING>(s) & pieces(KING));
|
||||
}
|
||||
|
||||
@@ -498,7 +498,7 @@ bool Position::attackers_to_exist(Square s, Bitboard occupied, Color c) const {
|
||||
&& (attacks_bb<ROOK>(s, occupied) & pieces(c, ROOK, QUEEN)))
|
||||
|| ((attacks_bb<BISHOP>(s) & pieces(c, BISHOP, QUEEN))
|
||||
&& (attacks_bb<BISHOP>(s, occupied) & pieces(c, BISHOP, QUEEN)))
|
||||
|| (((pawn_attacks_bb(~c, s) & pieces(PAWN)) | (attacks_bb<KNIGHT>(s) & pieces(KNIGHT))
|
||||
|| (((attacks_bb<PAWN>(s, ~c) & pieces(PAWN)) | (attacks_bb<KNIGHT>(s) & pieces(KNIGHT))
|
||||
| (attacks_bb<KING>(s) & pieces(KING)))
|
||||
& pieces(c));
|
||||
}
|
||||
@@ -597,10 +597,14 @@ bool Position::pseudo_legal(const Move m) const {
|
||||
if ((Rank8BB | Rank1BB) & to)
|
||||
return false;
|
||||
|
||||
if (!(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture
|
||||
&& !((from + pawn_push(us) == to) && empty(to)) // Not a single push
|
||||
&& !((from + 2 * pawn_push(us) == to) // Not a double push
|
||||
&& (relative_rank(us, from) == RANK_2) && empty(to) && empty(to - pawn_push(us))))
|
||||
// Check if it's a valid capture, single push, or double push
|
||||
const bool isCapture = bool(attacks_bb<PAWN>(from, us) & pieces(~us) & to);
|
||||
const bool isSinglePush = (from + pawn_push(us) == to) && empty(to);
|
||||
const bool isDoublePush = (from + 2 * pawn_push(us) == to)
|
||||
&& (relative_rank(us, from) == RANK_2) && empty(to)
|
||||
&& empty(to - pawn_push(us));
|
||||
|
||||
if (!(isCapture || isSinglePush || isDoublePush))
|
||||
return false;
|
||||
}
|
||||
else if (!(attacks_bb(type_of(pc), from, pieces()) & to))
|
||||
@@ -698,7 +702,6 @@ DirtyPiece Position::do_move(Move m,
|
||||
// our state pointer to point to the new (ready to be updated) state.
|
||||
std::memcpy(&newSt, st, offsetof(StateInfo, key));
|
||||
newSt.previous = st;
|
||||
st->next = &newSt;
|
||||
st = &newSt;
|
||||
|
||||
// Increment ply counters. In particular, rule50 will be reset to zero later on
|
||||
@@ -707,9 +710,6 @@ DirtyPiece Position::do_move(Move m,
|
||||
++st->rule50;
|
||||
++st->pliesFromNull;
|
||||
|
||||
DirtyPiece dp;
|
||||
dp.dirty_num = 1;
|
||||
|
||||
Color us = sideToMove;
|
||||
Color them = ~us;
|
||||
Square from = m.from_sq();
|
||||
@@ -717,6 +717,12 @@ DirtyPiece Position::do_move(Move m,
|
||||
Piece pc = piece_on(from);
|
||||
Piece captured = m.type_of() == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to);
|
||||
|
||||
DirtyPiece dp;
|
||||
dp.pc = pc;
|
||||
dp.from = from;
|
||||
dp.to = to;
|
||||
dp.add_sq = SQ_NONE;
|
||||
|
||||
assert(color_of(pc) == us);
|
||||
assert(captured == NO_PIECE || color_of(captured) == (m.type_of() != CASTLING ? them : us));
|
||||
assert(type_of(captured) != KING);
|
||||
@@ -733,8 +739,7 @@ DirtyPiece Position::do_move(Move m,
|
||||
st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
|
||||
captured = NO_PIECE;
|
||||
}
|
||||
|
||||
if (captured)
|
||||
else if (captured)
|
||||
{
|
||||
Square capsq = to;
|
||||
|
||||
@@ -764,10 +769,8 @@ DirtyPiece Position::do_move(Move m,
|
||||
st->minorPieceKey ^= Zobrist::psq[captured][capsq];
|
||||
}
|
||||
|
||||
dp.dirty_num = 2; // 1 piece moved, 1 piece captured
|
||||
dp.piece[1] = captured;
|
||||
dp.from[1] = capsq;
|
||||
dp.to[1] = SQ_NONE;
|
||||
dp.remove_pc = captured;
|
||||
dp.remove_sq = capsq;
|
||||
|
||||
// Update board and piece lists
|
||||
remove_piece(capsq);
|
||||
@@ -778,6 +781,8 @@ DirtyPiece Position::do_move(Move m,
|
||||
// Reset rule 50 counter
|
||||
st->rule50 = 0;
|
||||
}
|
||||
else
|
||||
dp.remove_sq = SQ_NONE;
|
||||
|
||||
// Update hash key
|
||||
k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
|
||||
@@ -800,9 +805,6 @@ DirtyPiece Position::do_move(Move m,
|
||||
// Move the piece. The tricky Chess960 castling is handled earlier
|
||||
if (m.type_of() != CASTLING)
|
||||
{
|
||||
dp.piece[0] = pc;
|
||||
dp.from[0] = from;
|
||||
dp.to[0] = to;
|
||||
|
||||
move_piece(from, to);
|
||||
}
|
||||
@@ -812,7 +814,7 @@ DirtyPiece Position::do_move(Move m,
|
||||
{
|
||||
// Set en passant square if the moved pawn can be captured
|
||||
if ((int(to) ^ int(from)) == 16
|
||||
&& (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN)))
|
||||
&& (attacks_bb<PAWN>(to - pawn_push(us), us) & pieces(them, PAWN)))
|
||||
{
|
||||
st->epSquare = to - pawn_push(us);
|
||||
k ^= Zobrist::enpassant[file_of(st->epSquare)];
|
||||
@@ -829,12 +831,9 @@ DirtyPiece Position::do_move(Move m,
|
||||
remove_piece(to);
|
||||
put_piece(promotion, to);
|
||||
|
||||
// Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
|
||||
dp.to[0] = SQ_NONE;
|
||||
dp.piece[dp.dirty_num] = promotion;
|
||||
dp.from[dp.dirty_num] = SQ_NONE;
|
||||
dp.to[dp.dirty_num] = to;
|
||||
dp.dirty_num++;
|
||||
dp.add_pc = promotion;
|
||||
dp.add_sq = to;
|
||||
dp.to = SQ_NONE;
|
||||
|
||||
// Update hash keys
|
||||
// Zobrist::psq[pc][to] is zero, so we don't need to clear it
|
||||
@@ -901,6 +900,10 @@ DirtyPiece Position::do_move(Move m,
|
||||
|
||||
assert(pos_is_ok());
|
||||
|
||||
assert(dp.pc != NO_PIECE);
|
||||
assert(!(bool(captured) || m.type_of() == CASTLING) ^ (dp.remove_sq != SQ_NONE));
|
||||
assert(dp.from != SQ_NONE);
|
||||
assert(!(dp.add_sq != SQ_NONE) ^ (m.type_of() == PROMOTION || m.type_of() == CASTLING));
|
||||
return dp;
|
||||
}
|
||||
|
||||
@@ -983,13 +986,10 @@ void Position::do_castling(
|
||||
|
||||
if (Do)
|
||||
{
|
||||
dp->piece[0] = make_piece(us, KING);
|
||||
dp->from[0] = from;
|
||||
dp->to[0] = to;
|
||||
dp->piece[1] = make_piece(us, ROOK);
|
||||
dp->from[1] = rfrom;
|
||||
dp->to[1] = rto;
|
||||
dp->dirty_num = 2;
|
||||
dp->to = to;
|
||||
dp->remove_pc = dp->add_pc = make_piece(us, ROOK);
|
||||
dp->remove_sq = rfrom;
|
||||
dp->add_sq = rto;
|
||||
}
|
||||
|
||||
// Remove both pieces first since squares could overlap in Chess960
|
||||
@@ -1012,7 +1012,6 @@ void Position::do_null_move(StateInfo& newSt, const TranspositionTable& tt) {
|
||||
std::memcpy(&newSt, st, sizeof(StateInfo));
|
||||
|
||||
newSt.previous = st;
|
||||
st->next = &newSt;
|
||||
st = &newSt;
|
||||
|
||||
if (st->epSquare != SQ_NONE)
|
||||
|
||||
@@ -53,7 +53,6 @@ struct StateInfo {
|
||||
Key key;
|
||||
Bitboard checkersBB;
|
||||
StateInfo* previous;
|
||||
StateInfo* next;
|
||||
Bitboard blockersForKing[COLOR_NB];
|
||||
Bitboard pinners[COLOR_NB];
|
||||
Bitboard checkSquares[PIECE_TYPE_NB];
|
||||
@@ -87,9 +86,9 @@ class Position {
|
||||
std::string fen() const;
|
||||
|
||||
// Position representation
|
||||
Bitboard pieces(PieceType pt = ALL_PIECES) const;
|
||||
Bitboard pieces() const; // All pieces
|
||||
template<typename... PieceTypes>
|
||||
Bitboard pieces(PieceType pt, PieceTypes... pts) const;
|
||||
Bitboard pieces(PieceTypes... pts) const;
|
||||
Bitboard pieces(Color c) const;
|
||||
template<typename... PieceTypes>
|
||||
Bitboard pieces(Color c, PieceTypes... pts) const;
|
||||
@@ -165,7 +164,6 @@ class Position {
|
||||
bool pos_is_ok() const;
|
||||
void flip();
|
||||
|
||||
// Used by NNUE
|
||||
StateInfo* state() const;
|
||||
|
||||
void put_piece(Piece pc, Square s);
|
||||
@@ -216,11 +214,11 @@ inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; }
|
||||
|
||||
inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); }
|
||||
|
||||
inline Bitboard Position::pieces(PieceType pt) const { return byTypeBB[pt]; }
|
||||
inline Bitboard Position::pieces() const { return byTypeBB[ALL_PIECES]; }
|
||||
|
||||
template<typename... PieceTypes>
|
||||
inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const {
|
||||
return pieces(pt) | pieces(pts...);
|
||||
inline Bitboard Position::pieces(PieceTypes... pts) const {
|
||||
return (byTypeBB[pts] | ...);
|
||||
}
|
||||
|
||||
inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; }
|
||||
|
||||
573
src/search.cpp
573
src/search.cpp
File diff suppressed because it is too large
Load Diff
@@ -75,7 +75,8 @@ struct Stack {
|
||||
bool ttHit;
|
||||
int cutoffCnt;
|
||||
int reduction;
|
||||
bool isTTMove;
|
||||
bool isPvNode;
|
||||
int quietMoveStreak;
|
||||
};
|
||||
|
||||
|
||||
@@ -292,6 +293,8 @@ class Worker {
|
||||
CorrectionHistory<NonPawn> nonPawnCorrectionHistory;
|
||||
CorrectionHistory<Continuation> continuationCorrectionHistory;
|
||||
|
||||
TTMoveHistory ttMoveHistory;
|
||||
|
||||
private:
|
||||
void iterative_deepening();
|
||||
|
||||
|
||||
@@ -164,7 +164,7 @@ class ThreadPool {
|
||||
std::vector<std::unique_ptr<Thread>> threads;
|
||||
std::vector<NumaIndex> boundThreadToNumaNode;
|
||||
|
||||
uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const {
|
||||
uint64_t accumulate(std::atomic<uint64_t> Search::Worker::* member) const {
|
||||
|
||||
uint64_t sum = 0;
|
||||
for (auto&& th : threads)
|
||||
|
||||
@@ -85,16 +85,13 @@ void TimeManagement::init(Search::LimitsType& limits,
|
||||
// with constants are involved.
|
||||
const int64_t scaleFactor = useNodesTime ? npmsec : 1;
|
||||
const TimePoint scaledTime = limits.time[us] / scaleFactor;
|
||||
const TimePoint scaledInc = limits.inc[us] / scaleFactor;
|
||||
|
||||
// Maximum move horizon
|
||||
int centiMTG = limits.movestogo ? std::min(limits.movestogo * 100, 5000) : 5051;
|
||||
|
||||
// If less than one second, gradually reduce mtg
|
||||
if (scaledTime < 1000 && double(centiMTG) / scaledInc > 5.051)
|
||||
{
|
||||
if (scaledTime < 1000)
|
||||
centiMTG = scaledTime * 5.051;
|
||||
}
|
||||
|
||||
// Make sure timeLeft is > 0 since we may use it as a divisor
|
||||
TimePoint timeLeft =
|
||||
|
||||
@@ -22,11 +22,11 @@
|
||||
#include <cstdint>
|
||||
|
||||
#include "misc.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
class OptionsMap;
|
||||
enum Color : int8_t;
|
||||
|
||||
namespace Search {
|
||||
struct LimitsType;
|
||||
|
||||
@@ -110,6 +110,8 @@ void TTEntry::save(
|
||||
value16 = int16_t(v);
|
||||
eval16 = int16_t(ev);
|
||||
}
|
||||
else if (depth8 + DEPTH_ENTRY_OFFSET >= 5 && Bound(genBound8 & 0x3) != BOUND_EXACT)
|
||||
depth8--;
|
||||
}
|
||||
|
||||
|
||||
@@ -234,8 +236,8 @@ std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) cons
|
||||
// Find an entry to be replaced according to the replacement strategy
|
||||
TTEntry* replace = tte;
|
||||
for (int i = 1; i < ClusterSize; ++i)
|
||||
if (replace->depth8 - replace->relative_age(generation8) * 2
|
||||
> tte[i].depth8 - tte[i].relative_age(generation8) * 2)
|
||||
if (replace->depth8 - replace->relative_age(generation8)
|
||||
> tte[i].depth8 - tte[i].relative_age(generation8))
|
||||
replace = &tte[i];
|
||||
|
||||
return {false,
|
||||
|
||||
104
src/types.h
104
src/types.h
@@ -37,7 +37,9 @@
|
||||
// | only in 64-bit mode and requires hardware with pext support.
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// Disable some silly and noisy warnings from MSVC compiler
|
||||
@@ -55,9 +57,15 @@
|
||||
// _WIN32 Building on Windows (any)
|
||||
// _WIN64 Building on Windows 64 bit
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) \
|
||||
&& defined(_WIN32) && !defined(__clang__)
|
||||
#define ALIGNAS_ON_STACK_VARIABLES_BROKEN
|
||||
// Enforce minimum GCC version
|
||||
#if defined(__GNUC__) && !defined(__clang__) \
|
||||
&& (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 3))
|
||||
#error "Stockfish requires GCC 9.3 or later for correct compilation"
|
||||
#endif
|
||||
|
||||
// Enforce minimum Clang version
|
||||
#if defined(__clang__) && (__clang_major__ < 10)
|
||||
#error "Stockfish requires Clang 10.0 or later for correct compilation"
|
||||
#endif
|
||||
|
||||
#define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast<uintptr_t>(ptr) % alignment == 0)
|
||||
@@ -108,13 +116,13 @@ using Bitboard = uint64_t;
|
||||
constexpr int MAX_MOVES = 256;
|
||||
constexpr int MAX_PLY = 246;
|
||||
|
||||
enum Color {
|
||||
enum Color : int8_t {
|
||||
WHITE,
|
||||
BLACK,
|
||||
COLOR_NB = 2
|
||||
};
|
||||
|
||||
enum CastlingRights {
|
||||
enum CastlingRights : int8_t {
|
||||
NO_CASTLING,
|
||||
WHITE_OO,
|
||||
WHITE_OOO = WHITE_OO << 1,
|
||||
@@ -130,7 +138,7 @@ enum CastlingRights {
|
||||
CASTLING_RIGHT_NB = 16
|
||||
};
|
||||
|
||||
enum Bound {
|
||||
enum Bound : int8_t {
|
||||
BOUND_NONE,
|
||||
BOUND_UPPER,
|
||||
BOUND_LOWER,
|
||||
@@ -181,13 +189,13 @@ constexpr Value QueenValue = 2538;
|
||||
|
||||
|
||||
// clang-format off
|
||||
enum PieceType {
|
||||
enum PieceType : std::int8_t {
|
||||
NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING,
|
||||
ALL_PIECES = 0,
|
||||
PIECE_TYPE_NB = 8
|
||||
};
|
||||
|
||||
enum Piece {
|
||||
enum Piece : std::int8_t {
|
||||
NO_PIECE,
|
||||
W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
||||
B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
|
||||
@@ -201,26 +209,24 @@ constexpr Value PieceValue[PIECE_NB] = {
|
||||
|
||||
using Depth = int;
|
||||
|
||||
enum : int {
|
||||
// The following DEPTH_ constants are used for transposition table entries
|
||||
// and quiescence search move generation stages. In regular search, the
|
||||
// depth stored in the transposition table is literal: the search depth
|
||||
// (effort) used to make the corresponding transposition table value. In
|
||||
// quiescence search, however, the transposition table entries only store
|
||||
// the current quiescence move generation stage (which should thus compare
|
||||
// lower than any regular search depth).
|
||||
DEPTH_QS = 0,
|
||||
// For transposition table entries where no searching at all was done
|
||||
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
|
||||
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
|
||||
// is used only for the transposition table entry occupancy check (see tt.cpp),
|
||||
// and should thus be lower than DEPTH_UNSEARCHED.
|
||||
DEPTH_UNSEARCHED = -2,
|
||||
DEPTH_ENTRY_OFFSET = -3
|
||||
};
|
||||
// The following DEPTH_ constants are used for transposition table entries
|
||||
// and quiescence search move generation stages. In regular search, the
|
||||
// depth stored in the transposition table is literal: the search depth
|
||||
// (effort) used to make the corresponding transposition table value. In
|
||||
// quiescence search, however, the transposition table entries only store
|
||||
// the current quiescence move generation stage (which should thus compare
|
||||
// lower than any regular search depth).
|
||||
constexpr Depth DEPTH_QS = 0;
|
||||
// For transposition table entries where no searching at all was done
|
||||
// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus
|
||||
// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET
|
||||
// is used only for the transposition table entry occupancy check (see tt.cpp),
|
||||
// and should thus be lower than DEPTH_UNSEARCHED.
|
||||
constexpr Depth DEPTH_UNSEARCHED = -2;
|
||||
constexpr Depth DEPTH_ENTRY_OFFSET = -3;
|
||||
|
||||
// clang-format off
|
||||
enum Square : int {
|
||||
enum Square : int8_t {
|
||||
SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1,
|
||||
SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2,
|
||||
SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3,
|
||||
@@ -236,7 +242,7 @@ enum Square : int {
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
enum Direction : int {
|
||||
enum Direction : int8_t {
|
||||
NORTH = 8,
|
||||
EAST = 1,
|
||||
SOUTH = -NORTH,
|
||||
@@ -248,7 +254,7 @@ enum Direction : int {
|
||||
NORTH_WEST = NORTH + WEST
|
||||
};
|
||||
|
||||
enum File : int {
|
||||
enum File : int8_t {
|
||||
FILE_A,
|
||||
FILE_B,
|
||||
FILE_C,
|
||||
@@ -260,7 +266,7 @@ enum File : int {
|
||||
FILE_NB
|
||||
};
|
||||
|
||||
enum Rank : int {
|
||||
enum Rank : int8_t {
|
||||
RANK_1,
|
||||
RANK_2,
|
||||
RANK_3,
|
||||
@@ -274,23 +280,19 @@ enum Rank : int {
|
||||
|
||||
// Keep track of what a move changes on the board (used by NNUE)
|
||||
struct DirtyPiece {
|
||||
Piece pc; // this is never allowed to be NO_PIECE
|
||||
Square from, to; // to should be SQ_NONE for promotions
|
||||
|
||||
// Number of changed pieces
|
||||
int dirty_num;
|
||||
|
||||
// Max 3 pieces can change in one move. A promotion with capture moves
|
||||
// both the pawn and the captured piece to SQ_NONE and the piece promoted
|
||||
// to from SQ_NONE to the capture square.
|
||||
Piece piece[3];
|
||||
|
||||
// From and to squares, which may be SQ_NONE
|
||||
Square from[3];
|
||||
Square to[3];
|
||||
// if {add,remove}_sq is SQ_NONE, {add,remove}_pc is allowed to be
|
||||
// uninitialized
|
||||
// castling uses add_sq and remove_sq to remove and add the rook
|
||||
Square remove_sq, add_sq;
|
||||
Piece remove_pc, add_pc;
|
||||
};
|
||||
|
||||
#define ENABLE_INCR_OPERATORS_ON(T) \
|
||||
inline T& operator++(T& d) { return d = T(int(d) + 1); } \
|
||||
inline T& operator--(T& d) { return d = T(int(d) - 1); }
|
||||
constexpr T& operator++(T& d) { return d = T(int(d) + 1); } \
|
||||
constexpr T& operator--(T& d) { return d = T(int(d) - 1); }
|
||||
|
||||
ENABLE_INCR_OPERATORS_ON(PieceType)
|
||||
ENABLE_INCR_OPERATORS_ON(Square)
|
||||
@@ -303,10 +305,10 @@ constexpr Direction operator+(Direction d1, Direction d2) { return Direction(int
|
||||
constexpr Direction operator*(int i, Direction d) { return Direction(i * int(d)); }
|
||||
|
||||
// Additional operators to add a Direction to a Square
|
||||
constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); }
|
||||
constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); }
|
||||
inline Square& operator+=(Square& s, Direction d) { return s = s + d; }
|
||||
inline Square& operator-=(Square& s, Direction d) { return s = s - d; }
|
||||
constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); }
|
||||
constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); }
|
||||
constexpr Square& operator+=(Square& s, Direction d) { return s = s + d; }
|
||||
constexpr Square& operator-=(Square& s, Direction d) { return s = s - d; }
|
||||
|
||||
// Toggle color
|
||||
constexpr Color operator~(Color c) { return Color(c ^ BLACK); }
|
||||
@@ -334,7 +336,7 @@ constexpr Piece make_piece(Color c, PieceType pt) { return Piece((c << 3) + pt);
|
||||
|
||||
constexpr PieceType type_of(Piece pc) { return PieceType(pc & 7); }
|
||||
|
||||
inline Color color_of(Piece pc) {
|
||||
constexpr Color color_of(Piece pc) {
|
||||
assert(pc != NO_PIECE);
|
||||
return Color(pc >> 3);
|
||||
}
|
||||
@@ -429,6 +431,14 @@ class Move {
|
||||
std::uint16_t data;
|
||||
};
|
||||
|
||||
template<typename T, typename... Ts>
|
||||
struct is_all_same {
|
||||
static constexpr bool value = (std::is_same_v<T, Ts> && ...);
|
||||
};
|
||||
|
||||
template<typename... Ts>
|
||||
constexpr auto is_all_same_v = is_all_same<Ts...>::value;
|
||||
|
||||
} // namespace Stockfish
|
||||
|
||||
#endif // #ifndef TYPES_H_INCLUDED
|
||||
|
||||
@@ -33,7 +33,7 @@ namespace Stockfish {
|
||||
class Position;
|
||||
class Move;
|
||||
class Score;
|
||||
enum Square : int;
|
||||
enum Square : int8_t;
|
||||
using Value = int;
|
||||
|
||||
class UCIEngine {
|
||||
|
||||
@@ -2,16 +2,26 @@
|
||||
# obtain and optionally verify Bench / signature
|
||||
# if no reference is given, the output is deliberately limited to just the signature
|
||||
|
||||
STDOUT_FILE=$(mktemp)
|
||||
STDERR_FILE=$(mktemp)
|
||||
|
||||
error()
|
||||
{
|
||||
echo "running bench for signature failed on line $1"
|
||||
echo "===== STDOUT ====="
|
||||
cat "$STDOUT_FILE"
|
||||
echo "===== STDERR ====="
|
||||
cat "$STDERR_FILE"
|
||||
rm -f "$STDOUT_FILE" "$STDERR_FILE"
|
||||
exit 1
|
||||
}
|
||||
trap 'error ${LINENO}' ERR
|
||||
|
||||
# obtain
|
||||
eval "$WINE_PATH ./stockfish bench" > "$STDOUT_FILE" 2> "$STDERR_FILE" || error ${LINENO}
|
||||
signature=$(grep "Nodes searched : " "$STDERR_FILE" | awk '{print $4}')
|
||||
|
||||
signature=`eval "$WINE_PATH ./stockfish bench 2>&1" | grep "Nodes searched : " | awk '{print $4}'`
|
||||
rm -f "$STDOUT_FILE" "$STDERR_FILE"
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
# compare to given reference
|
||||
@@ -28,4 +38,4 @@ if [ $# -gt 0 ]; then
|
||||
else
|
||||
# just report signature
|
||||
echo $signature
|
||||
fi
|
||||
fi
|
||||
Reference in New Issue
Block a user