mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-24 19:16:49 +08:00
Merge remote-tracking branch 'upstream/master' into tools_merge_20210513
This commit is contained in:
1
AUTHORS
1
AUTHORS
@@ -27,6 +27,7 @@ Andy Duplain
|
||||
Antoine Champion (antoinechampion)
|
||||
Aram Tumanian (atumanian)
|
||||
Arjun Temurnikar
|
||||
Artem Solopiy (EntityFX)
|
||||
Auguste Pop
|
||||
Balint Pfliegel
|
||||
Ben Koshy (BKSpurgeon)
|
||||
|
||||
36
README.md
36
README.md
@@ -33,9 +33,14 @@ This distribution of Stockfish consists of the following files:
|
||||
* a file with the .nnue extension, storing the neural network for the NNUE
|
||||
evaluation. Binary distributions will have this file embedded.
|
||||
|
||||
## UCI options
|
||||
## The UCI protocol and available options
|
||||
|
||||
Currently, Stockfish has the following UCI options:
|
||||
The Universal Chess Interface (UCI) is a standard protocol used to communicate with a chess engine,
|
||||
and is the recommended way to do so for typical graphical user interfaces (GUI) or chess tools.
|
||||
|
||||
Stockfish implements most commands as described in [the UCI protocol](https://www.shredderchess.com/download/div/uci.zip)
|
||||
|
||||
For users, the following UCI options, which can typically be set via a GUI, are available in Stockfish:
|
||||
|
||||
* #### Threads
|
||||
The number of CPU threads used for searching a position. For best performance, set
|
||||
@@ -136,6 +141,33 @@ Currently, Stockfish has the following UCI options:
|
||||
* #### Debug Log File
|
||||
Write all communication to and from the engine into a text file.
|
||||
|
||||
For developers the following non-standard commands might be of interest, mainly useful for debugging:
|
||||
|
||||
* #### bench ttSize threads limit fenFile limitType evalType
|
||||
Performs a standard benchmark using various options. The signature or standard node
|
||||
count is obtained using all defaults. `bench` is currently `bench 16 1 13 default depth mixed`.
|
||||
|
||||
* #### compiler
|
||||
Give information about the compiler and environment used for building a binary.
|
||||
|
||||
* #### d
|
||||
Display the current position, with ascii art and fen.
|
||||
|
||||
* #### eval
|
||||
Return the evaluation of the current position.
|
||||
|
||||
* #### export_net [filename]
|
||||
Exports the currently loaded network to a file.
|
||||
If the currently loaded network is the embedded network and the filename
|
||||
is not specified then the network is saved to the file matching the name
|
||||
of the embedded network, as defined in evaluate.h.
|
||||
If the currently loaded network is not the embedded network (some net set
|
||||
through the UCI setoption) then the filename parameter is required and the
|
||||
network is saved into that file.
|
||||
|
||||
* #### flip
|
||||
Flips the side to move.
|
||||
|
||||
### Generating Training Data
|
||||
|
||||
To generate training data from the classic eval, use the generate_training_data command with the setting "Use NNUE" set to "false". The given example is generation in its simplest form. There are more commands.
|
||||
|
||||
15
src/Makefile
15
src/Makefile
@@ -107,6 +107,7 @@ ifeq ($(ARCH), $(filter $(ARCH), \
|
||||
x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
|
||||
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
|
||||
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
|
||||
e2k \
|
||||
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
|
||||
SUPPORTED_ARCH=true
|
||||
else
|
||||
@@ -301,6 +302,17 @@ ifeq ($(ARCH),ppc-64)
|
||||
prefetch = yes
|
||||
endif
|
||||
|
||||
ifeq ($(findstring e2k,$(ARCH)),e2k)
|
||||
arch = e2k
|
||||
mmx = yes
|
||||
bits = 64
|
||||
sse = yes
|
||||
sse2 = yes
|
||||
ssse3 = yes
|
||||
sse41 = yes
|
||||
popcnt = yes
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
### ==========================================================================
|
||||
@@ -524,7 +536,6 @@ ifeq ($(popcnt),yes)
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(avx2),yes)
|
||||
CXXFLAGS += -DUSE_AVX2
|
||||
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
|
||||
@@ -692,6 +703,7 @@ help:
|
||||
@echo "armv7 > ARMv7 32-bit"
|
||||
@echo "armv7-neon > ARMv7 32-bit with popcnt and neon"
|
||||
@echo "armv8 > ARMv8 64-bit with popcnt and neon"
|
||||
@echo "e2k > Elbrus 2000"
|
||||
@echo "apple-silicon > Apple silicon ARM64"
|
||||
@echo "general-64 > unspecified 64-bit"
|
||||
@echo "general-32 > unspecified 32-bit"
|
||||
@@ -841,6 +853,7 @@ config-sanity: net
|
||||
@test "$(SUPPORTED_ARCH)" = "true"
|
||||
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
|
||||
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
|
||||
test "$(arch)" = "e2k" || \
|
||||
test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
|
||||
@test "$(bits)" = "32" || test "$(bits)" = "64"
|
||||
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "misc.h"
|
||||
#include "pawns.h"
|
||||
#include "thread.h"
|
||||
#include "timeman.h"
|
||||
#include "uci.h"
|
||||
#include "incbin/incbin.h"
|
||||
|
||||
@@ -126,8 +127,28 @@ namespace Eval {
|
||||
}
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify() {
|
||||
void NNUE::export_net(const std::optional<std::string>& filename) {
|
||||
std::string actualFilename;
|
||||
if (filename.has_value()) {
|
||||
actualFilename = filename.value();
|
||||
} else {
|
||||
if (eval_file_loaded != EvalFileDefaultName) {
|
||||
sync_cout << "Failed to export a net. A non-embedded net can only be saved if the filename is specified." << sync_endl;
|
||||
return;
|
||||
}
|
||||
actualFilename = EvalFileDefaultName;
|
||||
}
|
||||
|
||||
ofstream stream(actualFilename, std::ios_base::binary);
|
||||
if (save_eval(stream)) {
|
||||
sync_cout << "Network saved successfully to " << actualFilename << "." << sync_endl;
|
||||
} else {
|
||||
sync_cout << "Failed to export a net." << sync_endl;
|
||||
}
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void NNUE::verify() {
|
||||
|
||||
string eval_file = string(Options["EvalFile"]);
|
||||
|
||||
@@ -1120,7 +1141,7 @@ Value Eval::evaluate(const Position& pos) {
|
||||
+ material / 32
|
||||
- 4 * pos.rule50_count();
|
||||
|
||||
Value nnue = NNUE::evaluate(pos) * scale / 1024 + Tempo;
|
||||
Value nnue = NNUE::evaluate(pos) * scale / 1024 + Time.tempoNNUE;
|
||||
|
||||
if (pos.is_chess960())
|
||||
nnue += fix_FRC(pos);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#define EVALUATE_H_INCLUDED
|
||||
|
||||
#include <string>
|
||||
#include <optional>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
@@ -50,7 +51,9 @@ namespace Eval {
|
||||
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
bool save_eval(std::ostream& stream);
|
||||
void init();
|
||||
void export_net(const std::optional<std::string>& filename);
|
||||
void verify();
|
||||
}
|
||||
|
||||
|
||||
14
src/misc.cpp
14
src/misc.cpp
@@ -51,7 +51,7 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
|
||||
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__)
|
||||
#define POSIXALIGNEDALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
@@ -194,6 +194,18 @@ std::string compiler_info() {
|
||||
compiler += "(version ";
|
||||
compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
|
||||
compiler += ")";
|
||||
#elif defined(__e2k__) && defined(__LCC__)
|
||||
#define dot_ver2(n) \
|
||||
compiler += (char)'.'; \
|
||||
compiler += (char)('0' + (n) / 10); \
|
||||
compiler += (char)('0' + (n) % 10);
|
||||
|
||||
compiler += "MCST LCC ";
|
||||
compiler += "(version ";
|
||||
compiler += std::to_string(__LCC__ / 100);
|
||||
dot_ver2(__LCC__ % 100)
|
||||
dot_ver2(__LCC_MINOR__)
|
||||
compiler += ")";
|
||||
#elif __GNUC__
|
||||
compiler += "g++ (GNUC) ";
|
||||
compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
||||
|
||||
43
src/misc.h
43
src/misc.h
@@ -86,6 +86,49 @@ T* align_ptr_up(T* ptr)
|
||||
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class ValueListInserter {
|
||||
public:
|
||||
ValueListInserter(T* v, std::size_t& s) :
|
||||
values(v),
|
||||
size(&s)
|
||||
{
|
||||
}
|
||||
|
||||
void push_back(const T& value) { values[(*size)++] = value; }
|
||||
private:
|
||||
T* values;
|
||||
std::size_t* size;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t newSize) { size_ = newSize; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
operator ValueListInserter<T>() { return ValueListInserter(values_, size_); }
|
||||
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t maxSize = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < maxSize; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T values_[MaxSize];
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
// This logger allows printing many parts in a region atomically
|
||||
// but doesn't block the threads trying to append to other regions.
|
||||
// Instead if some region tries to pring while other region holds
|
||||
|
||||
158
src/movegen.cpp
158
src/movegen.cpp
@@ -58,19 +58,16 @@ namespace {
|
||||
constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);
|
||||
|
||||
const Square ksq = pos.square<KING>(Them);
|
||||
Bitboard emptySquares;
|
||||
const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces();
|
||||
const Bitboard enemies = Type == EVASIONS ? pos.checkers()
|
||||
: Type == CAPTURES ? target : pos.pieces(Them);
|
||||
|
||||
Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB;
|
||||
Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;
|
||||
|
||||
Bitboard enemies = (Type == EVASIONS ? pos.checkers():
|
||||
Type == CAPTURES ? target : pos.pieces(Them));
|
||||
|
||||
// Single and double pawn pushes, no promotions
|
||||
if (Type != CAPTURES)
|
||||
{
|
||||
emptySquares = (Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces());
|
||||
|
||||
Bitboard b1 = shift<Up>(pawnsNotOn7) & emptySquares;
|
||||
Bitboard b2 = shift<Up>(b1 & TRank3BB) & emptySquares;
|
||||
|
||||
@@ -82,22 +79,12 @@ namespace {
|
||||
|
||||
if (Type == QUIET_CHECKS)
|
||||
{
|
||||
b1 &= pawn_attacks_bb(Them, ksq);
|
||||
b2 &= pawn_attacks_bb(Them, ksq);
|
||||
|
||||
// Add pawn pushes which give discovered check. This is possible only
|
||||
// if the pawn is not on the same file as the enemy king, because we
|
||||
// don't generate captures. Note that a possible discovered check
|
||||
// promotion has been already generated amongst the captures.
|
||||
Bitboard dcCandidateQuiets = pos.blockers_for_king(Them) & pawnsNotOn7;
|
||||
if (dcCandidateQuiets)
|
||||
{
|
||||
Bitboard dc1 = shift<Up>(dcCandidateQuiets) & emptySquares & ~file_bb(ksq);
|
||||
Bitboard dc2 = shift<Up>(dc1 & TRank3BB) & emptySquares;
|
||||
|
||||
b1 |= dc1;
|
||||
b2 |= dc2;
|
||||
}
|
||||
// To make a quiet check, you either make a direct check by pushing a pawn
|
||||
// or push a blocker pawn that is not on the same file as the enemy king.
|
||||
// Discovered check promotion has been already generated amongst the captures.
|
||||
Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq);
|
||||
b1 &= pawn_attacks_bb(Them, ksq) | shift< Up>(dcCandidatePawns);
|
||||
b2 &= pawn_attacks_bb(Them, ksq) | shift<Up+Up>(dcCandidatePawns);
|
||||
}
|
||||
|
||||
while (b1)
|
||||
@@ -116,16 +103,13 @@ namespace {
|
||||
// Promotions and underpromotions
|
||||
if (pawnsOn7)
|
||||
{
|
||||
if (Type == CAPTURES)
|
||||
emptySquares = ~pos.pieces();
|
||||
|
||||
if (Type == EVASIONS)
|
||||
emptySquares &= target;
|
||||
|
||||
Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
|
||||
Bitboard b2 = shift<UpLeft >(pawnsOn7) & enemies;
|
||||
Bitboard b3 = shift<Up >(pawnsOn7) & emptySquares;
|
||||
|
||||
if (Type == EVASIONS)
|
||||
b3 &= target;
|
||||
|
||||
while (b1)
|
||||
moveList = make_promotions<Type, UpRight>(moveList, pop_lsb(b1), ksq);
|
||||
|
||||
@@ -175,19 +159,20 @@ namespace {
|
||||
}
|
||||
|
||||
|
||||
template<PieceType Pt, bool Checks>
|
||||
ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard piecesToMove, Bitboard target) {
|
||||
template<Color Us, PieceType Pt, bool Checks>
|
||||
ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
|
||||
|
||||
static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");
|
||||
|
||||
Bitboard bb = piecesToMove & pos.pieces(Pt);
|
||||
Bitboard bb = pos.pieces(Us, Pt);
|
||||
|
||||
while (bb)
|
||||
{
|
||||
Square from = pop_lsb(bb);
|
||||
|
||||
Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
|
||||
if constexpr (Checks)
|
||||
|
||||
// To check, you either move freely a blocker or make a direct check.
|
||||
if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from)))
|
||||
b &= pos.check_squares(Pt);
|
||||
|
||||
while (b)
|
||||
@@ -204,42 +189,34 @@ namespace {
|
||||
static_assert(Type != LEGAL, "Unsupported type in generate_all()");
|
||||
|
||||
constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations
|
||||
Bitboard target, piecesToMove = pos.pieces(Us);
|
||||
const Square ksq = pos.square<KING>(Us);
|
||||
Bitboard target;
|
||||
|
||||
if(Type == QUIET_CHECKS)
|
||||
piecesToMove &= ~pos.blockers_for_king(~Us);
|
||||
if (Type == EVASIONS && more_than_one(pos.checkers()))
|
||||
goto kingMoves; // Double check, only a king move can save the day
|
||||
|
||||
switch (Type)
|
||||
{
|
||||
case CAPTURES:
|
||||
target = pos.pieces(~Us);
|
||||
break;
|
||||
case QUIETS:
|
||||
case QUIET_CHECKS:
|
||||
target = ~pos.pieces();
|
||||
break;
|
||||
case EVASIONS:
|
||||
target = between_bb(pos.square<KING>(Us), lsb(pos.checkers()));
|
||||
break;
|
||||
case NON_EVASIONS:
|
||||
target = ~pos.pieces(Us);
|
||||
break;
|
||||
}
|
||||
target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers()))
|
||||
: Type == NON_EVASIONS ? ~pos.pieces( Us)
|
||||
: Type == CAPTURES ? pos.pieces(~Us)
|
||||
: ~pos.pieces( ); // QUIETS || QUIET_CHECKS
|
||||
|
||||
moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
|
||||
moveList = generate_moves<KNIGHT, Checks>(pos, moveList, piecesToMove, target);
|
||||
moveList = generate_moves<BISHOP, Checks>(pos, moveList, piecesToMove, target);
|
||||
moveList = generate_moves< ROOK, Checks>(pos, moveList, piecesToMove, target);
|
||||
moveList = generate_moves< QUEEN, Checks>(pos, moveList, piecesToMove, target);
|
||||
moveList = generate_moves<Us, KNIGHT, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, BISHOP, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, ROOK, Checks>(pos, moveList, target);
|
||||
moveList = generate_moves<Us, QUEEN, Checks>(pos, moveList, target);
|
||||
|
||||
if (Type != QUIET_CHECKS && Type != EVASIONS)
|
||||
kingMoves:
|
||||
if (!Checks || pos.blockers_for_king(~Us) & ksq)
|
||||
{
|
||||
Square ksq = pos.square<KING>(Us);
|
||||
Bitboard b = attacks_bb<KING>(ksq) & target;
|
||||
Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
|
||||
if (Checks)
|
||||
b &= ~attacks_bb<QUEEN>(pos.square<KING>(~Us));
|
||||
|
||||
while (b)
|
||||
*moveList++ = make_move(ksq, pop_lsb(b));
|
||||
|
||||
if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
|
||||
if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
|
||||
for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
|
||||
if (!pos.castling_impeded(cr) && pos.can_castle(cr))
|
||||
*moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
|
||||
@@ -253,6 +230,8 @@ namespace {
|
||||
|
||||
/// <CAPTURES> Generates all pseudo-legal captures plus queen and checking knight promotions
|
||||
/// <QUIETS> Generates all pseudo-legal non-captures and underpromotions (except checking knight)
|
||||
/// <EVASIONS> Generates all pseudo-legal check evasions when the side to move is in check
|
||||
/// <QUIET_CHECKS> Generates all pseudo-legal non-captures giving check, except castling
|
||||
/// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
|
||||
///
|
||||
/// Returns a pointer to the end of the move list.
|
||||
@@ -260,8 +239,8 @@ namespace {
|
||||
template<GenType Type>
|
||||
ExtMove* generate(const Position& pos, ExtMove* moveList) {
|
||||
|
||||
static_assert(Type == CAPTURES || Type == QUIETS || Type == NON_EVASIONS, "Unsupported type in generate()");
|
||||
assert(!pos.checkers());
|
||||
static_assert(Type != LEGAL, "Unsupported type in generate()");
|
||||
assert((Type == EVASIONS) == (bool)pos.checkers());
|
||||
|
||||
Color us = pos.side_to_move();
|
||||
|
||||
@@ -272,62 +251,11 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) {
|
||||
// Explicit template instantiations
|
||||
template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<QUIET_CHECKS>(const Position&, ExtMove*);
|
||||
template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);
|
||||
|
||||
|
||||
/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures giving check,
|
||||
/// except castling. Returns a pointer to the end of the move list.
|
||||
template<>
|
||||
ExtMove* generate<QUIET_CHECKS>(const Position& pos, ExtMove* moveList) {
|
||||
|
||||
assert(!pos.checkers());
|
||||
|
||||
Color us = pos.side_to_move();
|
||||
Bitboard dc = pos.blockers_for_king(~us) & pos.pieces(us) & ~pos.pieces(PAWN);
|
||||
|
||||
while (dc)
|
||||
{
|
||||
Square from = pop_lsb(dc);
|
||||
PieceType pt = type_of(pos.piece_on(from));
|
||||
|
||||
Bitboard b = attacks_bb(pt, from, pos.pieces()) & ~pos.pieces();
|
||||
|
||||
if (pt == KING)
|
||||
b &= ~attacks_bb<QUEEN>(pos.square<KING>(~us));
|
||||
|
||||
while (b)
|
||||
*moveList++ = make_move(from, pop_lsb(b));
|
||||
}
|
||||
|
||||
return us == WHITE ? generate_all<WHITE, QUIET_CHECKS>(pos, moveList)
|
||||
: generate_all<BLACK, QUIET_CHECKS>(pos, moveList);
|
||||
}
|
||||
|
||||
|
||||
/// generate<EVASIONS> generates all pseudo-legal check evasions when the side
|
||||
/// to move is in check. Returns a pointer to the end of the move list.
|
||||
template<>
|
||||
ExtMove* generate<EVASIONS>(const Position& pos, ExtMove* moveList) {
|
||||
|
||||
assert(pos.checkers());
|
||||
|
||||
Color us = pos.side_to_move();
|
||||
Square ksq = pos.square<KING>(us);
|
||||
|
||||
// Generate evasions for king
|
||||
Bitboard b = attacks_bb<KING>(ksq) & ~pos.pieces(us);
|
||||
while (b)
|
||||
*moveList++ = make_move(ksq, pop_lsb(b));
|
||||
|
||||
if (more_than_one(pos.checkers()))
|
||||
return moveList; // Double check, only a king move can save the day
|
||||
|
||||
// Generate blocking interpositions or captures of the checking piece
|
||||
return us == WHITE ? generate_all<WHITE, EVASIONS>(pos, moveList)
|
||||
: generate_all<BLACK, EVASIONS>(pos, moveList);
|
||||
}
|
||||
|
||||
|
||||
/// generate<LEGAL> generates all the legal moves in the given position
|
||||
|
||||
template<>
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
|
||||
#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
|
||||
@@ -32,26 +32,27 @@
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Input feature converter
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
LargePagePtr<FeatureTransformer> featureTransformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
std::string fileName;
|
||||
std::string netDescription;
|
||||
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void Initialize(AlignedPtr<T>& pointer) {
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Initialize(LargePagePtr<T>& pointer) {
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
@@ -60,85 +61,120 @@ namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
bool read_parameters(std::istream& stream, T& reference) {
|
||||
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || header != T::GetHashValue()) return false;
|
||||
return reference.ReadParameters(stream);
|
||||
if (!stream || header != T::get_hash_value()) return false;
|
||||
return reference.read_parameters(stream);
|
||||
}
|
||||
|
||||
// Write evaluation function parameters
|
||||
template <typename T>
|
||||
bool write_parameters(std::ostream& stream, const T& reference) {
|
||||
|
||||
write_little_endian<std::uint32_t>(stream, T::get_hash_value());
|
||||
return reference.write_parameters(stream);
|
||||
}
|
||||
|
||||
} // namespace Detail
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
void Initialize() {
|
||||
void initialize() {
|
||||
|
||||
Detail::Initialize(feature_transformer);
|
||||
Detail::Initialize(network);
|
||||
Detail::initialize(featureTransformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
|
||||
// Read network header
|
||||
bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
|
||||
version = read_little_endian<std::uint32_t>(stream);
|
||||
*hash_value = read_little_endian<std::uint32_t>(stream);
|
||||
*hashValue = read_little_endian<std::uint32_t>(stream);
|
||||
size = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || version != kVersion) return false;
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
if (!stream || version != Version) return false;
|
||||
desc->resize(size);
|
||||
stream.read(&(*desc)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network header
|
||||
bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc)
|
||||
{
|
||||
write_little_endian<std::uint32_t>(stream, Version);
|
||||
write_little_endian<std::uint32_t>(stream, hashValue);
|
||||
write_little_endian<std::uint32_t>(stream, desc.size());
|
||||
stream.write(&desc[0], desc.size());
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
bool read_parameters(std::istream& stream) {
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
|
||||
if (hash_value != kHashValue) return false;
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, *network)) return false;
|
||||
std::uint32_t hashValue;
|
||||
if (!read_header(stream, &hashValue, &netDescription)) return false;
|
||||
if (hashValue != HashValue) return false;
|
||||
if (!Detail::read_parameters(stream, *featureTransformer)) return false;
|
||||
if (!Detail::read_parameters(stream, *network)) return false;
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) {
|
||||
|
||||
if (!write_header(stream, HashValue, netDescription)) return false;
|
||||
if (!Detail::write_parameters(stream, *featureTransformer)) return false;
|
||||
if (!Detail::write_parameters(stream, *network)) return false;
|
||||
return (bool)stream;
|
||||
}
|
||||
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
constexpr uint64_t alignment = CacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
TransformedFeatureType transformedFeaturesUnaligned[
|
||||
FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char bufferUnaligned[Network::BufferSize + alignment];
|
||||
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&bufferUnaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
|
||||
alignas(alignment) char buffer[Network::BufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(transformedFeatures, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
featureTransformer->transform(pos, transformedFeatures);
|
||||
const auto output = network->propagate(transformedFeatures, buffer);
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
return static_cast<Value>(output[0] / OutputScale);
|
||||
}
|
||||
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
|
||||
Initialize();
|
||||
initialize();
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
return read_parameters(stream);
|
||||
}
|
||||
|
||||
// Save eval, to a file stream or a memory stream
|
||||
bool save_eval(std::ostream& stream) {
|
||||
|
||||
if (fileName.empty())
|
||||
return false;
|
||||
|
||||
return write_parameters(stream);
|
||||
}
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -28,8 +28,8 @@
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
constexpr std::uint32_t HashValue =
|
||||
FeatureTransformer::get_hash_value() ^ Network::get_hash_value();
|
||||
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// A class template that represents the input feature set of the NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_FEATURE_SET_H_INCLUDED
|
||||
#define NNUE_FEATURE_SET_H_INCLUDED
|
||||
|
||||
#include "features_common.h"
|
||||
#include <array>
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Class template that represents a list of values
|
||||
template <typename T, T... Values>
|
||||
struct CompileTimeList;
|
||||
|
||||
template <typename T, T First, T... Remaining>
|
||||
struct CompileTimeList<T, First, Remaining...> {
|
||||
static constexpr bool Contains(T value) {
|
||||
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
|
||||
}
|
||||
static constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
kValues = {{First, Remaining...}};
|
||||
};
|
||||
|
||||
// Base class of feature set
|
||||
template <typename Derived>
|
||||
class FeatureSetBase {
|
||||
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
template <typename FeatureType>
|
||||
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions = FeatureType::kDimensions;
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
using SortedTriggerSet =
|
||||
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Common header of input features of NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
#define NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
class IndexList;
|
||||
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kFriendKingMoved // calculate full evaluation when own king moves
|
||||
};
|
||||
|
||||
enum class Side {
|
||||
kFriend // side to move
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
|
||||
@@ -19,69 +19,68 @@
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
|
||||
#include "half_kp.h"
|
||||
#include "index_list.h"
|
||||
|
||||
#include "../../position.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
inline Square HalfKP::orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
inline IndexType make_index(Color perspective, Square s, Piece pc, Square ksq) {
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[perspective][pc] + PS_END * ksq);
|
||||
inline IndexType HalfKP::make_index(Color perspective, Square s, Piece pc, Square ksq) {
|
||||
return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
|
||||
}
|
||||
|
||||
// Get a list of indices for active features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
|
||||
void HalfKP::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> active
|
||||
) {
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb)
|
||||
{
|
||||
Square s = pop_lsb(bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
active.push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// AppendChangedIndices() : get a list of indices for recently changed features
|
||||
// append_changed_indices() : get a list of indices for recently changed features
|
||||
|
||||
// IMPORTANT: The `pos` in this function is pretty much useless as it
|
||||
// is not always the position the features are updated to. The feature
|
||||
// transformer code right now can update multiple accumulators per move,
|
||||
// but since Stockfish only keeps the full state of the current leaf
|
||||
// search position it is not possible to always pass here the position for
|
||||
// which the accumulator is being updated. Therefore the only thing that
|
||||
// can be reliably extracted from `pos` is the king square for the king
|
||||
// of the `perspective` color (note: not even the other king's square will
|
||||
// match reality in all cases, this is also the reason why `dp` is passed
|
||||
// as a parameter and not extracted from pos.state()). This is of particular
|
||||
// problem for future nets with other feature sets, where updating the active
|
||||
// feature might require more information from the intermediate positions. In
|
||||
// this case the only easy solution is to remove the multiple updates from
|
||||
// the feature transformer update code and only update the accumulator for
|
||||
// the current leaf position (the position after the move).
|
||||
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendChangedIndices(
|
||||
const Position& pos, const DirtyPiece& dp, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
void HalfKP::append_changed_indices(
|
||||
Square ksq,
|
||||
StateInfo* st,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> removed,
|
||||
ValueListInserter<IndexType> added
|
||||
) {
|
||||
const auto& dp = st->dirtyPiece;
|
||||
Square oriented_ksq = orient(perspective, ksq);
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
if (type_of(pc) == KING) continue;
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
|
||||
removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
|
||||
added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq));
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfKP<Side::kFriend>;
|
||||
int HalfKP::update_cost(StateInfo* st) {
|
||||
return st->dirtyPiece.dirty_num;
|
||||
}
|
||||
|
||||
int HalfKP::refresh_cost(const Position& pos) {
|
||||
return pos.count<ALL_PIECES>() - 2;
|
||||
}
|
||||
|
||||
bool HalfKP::requires_refresh(StateInfo* st, Color perspective) {
|
||||
return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
|
||||
}
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
@@ -21,37 +21,88 @@
|
||||
#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
|
||||
#define NNUE_FEATURES_HALF_KP_H_INCLUDED
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
#include "../../misc.h"
|
||||
|
||||
namespace Stockfish {
|
||||
struct StateInfo;
|
||||
}
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Feature HalfKP: Combination of the position of own king
|
||||
// and the position of pieces other than kings
|
||||
template <Side AssociatedKing>
|
||||
class HalfKP {
|
||||
|
||||
// unique number for each piece type on each square
|
||||
enum {
|
||||
PS_NONE = 0,
|
||||
PS_W_PAWN = 1,
|
||||
PS_B_PAWN = 1 * SQUARE_NB + 1,
|
||||
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
|
||||
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
|
||||
PS_W_BISHOP = 4 * SQUARE_NB + 1,
|
||||
PS_B_BISHOP = 5 * SQUARE_NB + 1,
|
||||
PS_W_ROOK = 6 * SQUARE_NB + 1,
|
||||
PS_B_ROOK = 7 * SQUARE_NB + 1,
|
||||
PS_W_QUEEN = 8 * SQUARE_NB + 1,
|
||||
PS_B_QUEEN = 9 * SQUARE_NB + 1,
|
||||
PS_NB = 10 * SQUARE_NB + 1
|
||||
};
|
||||
|
||||
static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE,
|
||||
PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE },
|
||||
{ PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE,
|
||||
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE }
|
||||
};
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
static Square orient(Color perspective, Square s);
|
||||
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* kName = "HalfKP(Friend)";
|
||||
static constexpr const char* Name = "HalfKP(Friend)";
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
|
||||
static constexpr std::uint32_t HashValue = 0x5D69D5B8u;
|
||||
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
|
||||
static constexpr IndexType Dimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
|
||||
|
||||
// Maximum number of simultaneously active features. 30 because kins are not included.
|
||||
static constexpr IndexType MaxActiveDimensions = 30;
|
||||
|
||||
// Get a list of indices for active features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> active);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void AppendChangedIndices(const Position& pos, const DirtyPiece& dp, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
static void append_changed_indices(
|
||||
Square ksq,
|
||||
StateInfo* st,
|
||||
Color perspective,
|
||||
ValueListInserter<IndexType> removed,
|
||||
ValueListInserter<IndexType> added);
|
||||
|
||||
// Returns the cost of updating one perspective, the most costly one.
|
||||
// Assumes no refresh needed.
|
||||
static int update_cost(StateInfo* st);
|
||||
static int refresh_cost(const Position& pos);
|
||||
|
||||
// Returns whether the change stored in this StateInfo means that
|
||||
// a full accumulator refresh is required.
|
||||
static bool requires_refresh(StateInfo* st, Color perspective);
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of index list of input features
|
||||
|
||||
#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
|
||||
#include "../../position.h"
|
||||
#include "../nnue_architecture.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE::Features {
|
||||
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T values_[MaxSize];
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
//Type of feature index list
|
||||
class IndexList
|
||||
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Features
|
||||
|
||||
#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
|
||||
@@ -27,7 +27,7 @@
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
// Affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutputDimensions>
|
||||
template <typename PreviousLayer, IndexType OutDims>
|
||||
class AffineTransform {
|
||||
public:
|
||||
// Input/output type
|
||||
@@ -36,64 +36,68 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static constexpr IndexType kPaddedInputDimensions =
|
||||
CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
|
||||
static constexpr IndexType InputDimensions =
|
||||
PreviousLayer::OutputDimensions;
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
static constexpr IndexType PaddedInputDimensions =
|
||||
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
||||
#if defined (USE_AVX512)
|
||||
static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 2;
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 2;
|
||||
#elif defined (USE_SSSE3)
|
||||
static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 4;
|
||||
static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
|
||||
#endif
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
static constexpr std::size_t SelfBufferSize =
|
||||
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
static constexpr std::size_t BufferSize =
|
||||
PreviousLayer::BufferSize + SelfBufferSize;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xCC03DAE4u;
|
||||
hash_value += kOutputDimensions;
|
||||
hash_value ^= PreviousLayer::GetHashValue() >> 1;
|
||||
hash_value ^= PreviousLayer::GetHashValue() << 31;
|
||||
return hash_value;
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
std::uint32_t hashValue = 0xCC03DAE4u;
|
||||
hashValue += OutputDimensions;
|
||||
hashValue ^= PreviousLayer::get_hash_value() >> 1;
|
||||
hashValue ^= PreviousLayer::get_hash_value() << 31;
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!previous_layer_.ReadParameters(stream)) return false;
|
||||
for (std::size_t i = 0; i < kOutputDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
|
||||
// Read network parameters
|
||||
bool read_parameters(std::istream& stream) {
|
||||
if (!previousLayer.read_parameters(stream)) return false;
|
||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
||||
biases[i] = read_little_endian<BiasType>(stream);
|
||||
#if !defined (USE_SSSE3)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
weights[i] = read_little_endian<WeightType>(stream);
|
||||
#else
|
||||
weights_[
|
||||
(i / 4) % (kPaddedInputDimensions / 4) * kOutputDimensions * 4 +
|
||||
i / kPaddedInputDimensions * 4 +
|
||||
i % 4
|
||||
] = read_little_endian<WeightType>(stream);
|
||||
std::unique_ptr<uint32_t[]> indexMap = std::make_unique<uint32_t[]>(OutputDimensions * PaddedInputDimensions);
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
|
||||
const uint32_t scrambledIdx =
|
||||
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
|
||||
i / PaddedInputDimensions * 4 +
|
||||
i % 4;
|
||||
weights[scrambledIdx] = read_little_endian<WeightType>(stream);
|
||||
indexMap[scrambledIdx] = i;
|
||||
}
|
||||
|
||||
// Determine if eights of weight and input products can be summed using 16bits
|
||||
// without saturation. We assume worst case combinations of 0 and 127 for all inputs.
|
||||
if (kOutputDimensions > 1 && !stream.fail())
|
||||
if (OutputDimensions > 1 && !stream.fail())
|
||||
{
|
||||
canSaturate16.count = 0;
|
||||
#if !defined(USE_VNNI)
|
||||
for (IndexType i = 0; i < kPaddedInputDimensions; i += 16)
|
||||
for (IndexType j = 0; j < kOutputDimensions; ++j)
|
||||
for (IndexType i = 0; i < PaddedInputDimensions; i += 16)
|
||||
for (IndexType j = 0; j < OutputDimensions; ++j)
|
||||
for (int x = 0; x < 2; ++x)
|
||||
{
|
||||
WeightType* w = &weights_[i * kOutputDimensions + j * 4 + x * 2];
|
||||
WeightType* w = &weights[i * OutputDimensions + j * 4 + x * 2];
|
||||
int sum[2] = {0, 0};
|
||||
for (int k = 0; k < 8; ++k)
|
||||
{
|
||||
IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
|
||||
IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
|
||||
sum[w[idx] < 0] += w[idx];
|
||||
}
|
||||
for (int sign : { -1, 1 })
|
||||
@@ -102,14 +106,15 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
int maxK = 0, maxW = 0;
|
||||
for (int k = 0; k < 8; ++k)
|
||||
{
|
||||
IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
|
||||
IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
|
||||
if (maxW < sign * w[idx])
|
||||
maxK = k, maxW = sign * w[idx];
|
||||
}
|
||||
|
||||
IndexType idx = maxK / 2 * kOutputDimensions * 4 + maxK % 2;
|
||||
IndexType idx = maxK / 2 * OutputDimensions * 4 + maxK % 2;
|
||||
sum[sign == -1] -= w[idx];
|
||||
canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx]);
|
||||
const uint32_t scrambledIdx = idx + i * OutputDimensions + j * 4 + x * 2;
|
||||
canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx], indexMap[scrambledIdx]);
|
||||
w[idx] = 0;
|
||||
}
|
||||
}
|
||||
@@ -125,15 +130,43 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
if (!previousLayer.write_parameters(stream)) return false;
|
||||
for (std::size_t i = 0; i < OutputDimensions; ++i)
|
||||
write_little_endian<BiasType>(stream, biases[i]);
|
||||
#if !defined (USE_SSSE3)
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[i]);
|
||||
#else
|
||||
std::unique_ptr<WeightType[]> unscrambledWeights = std::make_unique<WeightType[]>(OutputDimensions * PaddedInputDimensions);
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
|
||||
unscrambledWeights[i] =
|
||||
weights[
|
||||
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
|
||||
i / PaddedInputDimensions * 4 +
|
||||
i % 4
|
||||
];
|
||||
}
|
||||
for (int i = 0; i < canSaturate16.count; ++i)
|
||||
unscrambledWeights[canSaturate16.ids[i].wIdx] = canSaturate16.ids[i].w;
|
||||
|
||||
for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, unscrambledWeights[i]);
|
||||
#endif
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
const auto input = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures, char* buffer) const {
|
||||
const auto input = previousLayer.propagate(
|
||||
transformedFeatures, buffer + SelfBufferSize);
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
|
||||
[[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1);
|
||||
[[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
@@ -144,7 +177,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
product0 = _mm512_madd_epi16(product0, kOnes512);
|
||||
product0 = _mm512_madd_epi16(product0, Ones512);
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
@@ -164,7 +197,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
product0 = _mm512_add_epi16(product0, product1);
|
||||
product2 = _mm512_add_epi16(product2, product3);
|
||||
product0 = _mm512_add_epi16(product0, product2);
|
||||
product0 = _mm512_madd_epi16(product0, kOnes512);
|
||||
product0 = _mm512_madd_epi16(product0, Ones512);
|
||||
acc = _mm512_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
@@ -172,7 +205,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
#endif
|
||||
#if defined (USE_AVX2)
|
||||
|
||||
[[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1);
|
||||
[[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
@@ -186,7 +219,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
product0 = _mm256_madd_epi16(product0, kOnes256);
|
||||
product0 = _mm256_madd_epi16(product0, Ones256);
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
@@ -206,7 +239,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
product0 = _mm256_add_epi16(product0, product1);
|
||||
product2 = _mm256_add_epi16(product2, product3);
|
||||
product0 = _mm256_add_epi16(product0, product2);
|
||||
product0 = _mm256_madd_epi16(product0, kOnes256);
|
||||
product0 = _mm256_madd_epi16(product0, Ones256);
|
||||
acc = _mm256_add_epi32(acc, product0);
|
||||
#endif
|
||||
};
|
||||
@@ -214,7 +247,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
#endif
|
||||
#if defined (USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1);
|
||||
[[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
@@ -224,7 +257,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
[[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
product0 = _mm_madd_epi16(product0, kOnes128);
|
||||
product0 = _mm_madd_epi16(product0, Ones128);
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
};
|
||||
|
||||
@@ -237,7 +270,7 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
product0 = _mm_add_epi16(product0, product1);
|
||||
product2 = _mm_add_epi16(product2, product3);
|
||||
product0 = _mm_add_epi16(product0, product2);
|
||||
product0 = _mm_madd_epi16(product0, kOnes128);
|
||||
product0 = _mm_madd_epi16(product0, Ones128);
|
||||
acc = _mm_add_epi32(acc, product0);
|
||||
};
|
||||
|
||||
@@ -269,71 +302,71 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
#if defined (USE_SSSE3)
|
||||
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
const auto input_vector = reinterpret_cast<const vec_t*>(input);
|
||||
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
||||
|
||||
static_assert(kOutputDimensions % kOutputSimdWidth == 0 || kOutputDimensions == 1);
|
||||
static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1);
|
||||
|
||||
// kOutputDimensions is either 1 or a multiple of kSimdWidth
|
||||
// OutputDimensions is either 1 or a multiple of SimdWidth
|
||||
// because then it is also an input dimension.
|
||||
if constexpr (kOutputDimensions % kOutputSimdWidth == 0)
|
||||
if constexpr (OutputDimensions % OutputSimdWidth == 0)
|
||||
{
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / 4;
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / 4;
|
||||
|
||||
const auto input32 = reinterpret_cast<const std::int32_t*>(input);
|
||||
vec_t* outptr = reinterpret_cast<vec_t*>(output);
|
||||
std::memcpy(output, biases_, kOutputDimensions * sizeof(OutputType));
|
||||
std::memcpy(output, biases, OutputDimensions * sizeof(OutputType));
|
||||
|
||||
for (int i = 0; i < (int)kNumChunks - 3; i += 4)
|
||||
for (int i = 0; i < (int)NumChunks - 3; i += 4)
|
||||
{
|
||||
const vec_t in0 = vec_set_32(input32[i + 0]);
|
||||
const vec_t in1 = vec_set_32(input32[i + 1]);
|
||||
const vec_t in2 = vec_set_32(input32[i + 2]);
|
||||
const vec_t in3 = vec_set_32(input32[i + 3]);
|
||||
const auto col0 = reinterpret_cast<const vec_t*>(&weights_[(i + 0) * kOutputDimensions * 4]);
|
||||
const auto col1 = reinterpret_cast<const vec_t*>(&weights_[(i + 1) * kOutputDimensions * 4]);
|
||||
const auto col2 = reinterpret_cast<const vec_t*>(&weights_[(i + 2) * kOutputDimensions * 4]);
|
||||
const auto col3 = reinterpret_cast<const vec_t*>(&weights_[(i + 3) * kOutputDimensions * 4]);
|
||||
for (int j = 0; j * kOutputSimdWidth < kOutputDimensions; ++j)
|
||||
const auto col0 = reinterpret_cast<const vec_t*>(&weights[(i + 0) * OutputDimensions * 4]);
|
||||
const auto col1 = reinterpret_cast<const vec_t*>(&weights[(i + 1) * OutputDimensions * 4]);
|
||||
const auto col2 = reinterpret_cast<const vec_t*>(&weights[(i + 2) * OutputDimensions * 4]);
|
||||
const auto col3 = reinterpret_cast<const vec_t*>(&weights[(i + 3) * OutputDimensions * 4]);
|
||||
for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
|
||||
vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
|
||||
}
|
||||
for (int i = 0; i < canSaturate16.count; ++i)
|
||||
output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w;
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
else if constexpr (OutputDimensions == 1)
|
||||
{
|
||||
#if defined (USE_AVX512)
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) != 0)
|
||||
if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0)
|
||||
{
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const auto input_vector256 = reinterpret_cast<const __m256i*>(input);
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
const auto inputVector256 = reinterpret_cast<const __m256i*>(input);
|
||||
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights[0]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; ++j)
|
||||
for (int j = 0; j < (int)NumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
const __m256i in = inputVector256[j];
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
}
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
output[0] = m256_hadd(sum0, biases[0]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if defined (USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2);
|
||||
#else
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
#endif
|
||||
vec_t sum0 = vec_setzero();
|
||||
const auto row0 = reinterpret_cast<const vec_t*>(&weights_[0]);
|
||||
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
|
||||
|
||||
for (int j = 0; j < (int)kNumChunks; ++j)
|
||||
for (int j = 0; j < (int)NumChunks; ++j)
|
||||
{
|
||||
const vec_t in = input_vector[j];
|
||||
const vec_t in = inputVector[j];
|
||||
vec_add_dpbusd_32(sum0, in, row0[j]);
|
||||
}
|
||||
output[0] = vec_hadd(sum0, biases_[0]);
|
||||
output[0] = vec_hadd(sum0, biases[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,80 +377,80 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const __m128i kZeros = _mm_setzero_si128();
|
||||
const auto input_vector = reinterpret_cast<const __m128i*>(input);
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
const __m128i Zeros = _mm_setzero_si128();
|
||||
const auto inputVector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const __m64 kZeros = _mm_setzero_si64();
|
||||
const auto input_vector = reinterpret_cast<const __m64*>(input);
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
const __m64 Zeros = _mm_setzero_si64();
|
||||
const auto inputVector = reinterpret_cast<const __m64*>(input);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
|
||||
constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
|
||||
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
|
||||
#endif
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType offset = i * kPaddedInputDimensions;
|
||||
for (IndexType i = 0; i < OutputDimensions; ++i) {
|
||||
const IndexType offset = i * PaddedInputDimensions;
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
__m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
|
||||
__m128i sum_hi = kZeros;
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
|
||||
__m128i sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m128i row_j = _mm_load_si128(&row[j]);
|
||||
__m128i input_j = _mm_load_si128(&input_vector[j]);
|
||||
__m128i extended_row_lo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
|
||||
__m128i extended_row_hi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
|
||||
__m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
|
||||
__m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
|
||||
__m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
|
||||
__m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
|
||||
sum_lo = _mm_add_epi32(sum_lo, product_lo);
|
||||
sum_hi = _mm_add_epi32(sum_hi, product_hi);
|
||||
__m128i input_j = _mm_load_si128(&inputVector[j]);
|
||||
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
|
||||
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
|
||||
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
|
||||
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
|
||||
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
|
||||
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_epi32(sumLo, productLo);
|
||||
sumHi = _mm_add_epi32(sumHi, productHi);
|
||||
}
|
||||
__m128i sum = _mm_add_epi32(sum_lo, sum_hi);
|
||||
__m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sum_high_64);
|
||||
__m128i sum = _mm_add_epi32(sumLo, sumHi);
|
||||
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sumHigh_64);
|
||||
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
sum = _mm_add_epi32(sum, sum_second_32);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
__m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
|
||||
__m64 sum_hi = kZeros;
|
||||
const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m64 sumLo = _mm_cvtsi32_si64(biases[i]);
|
||||
__m64 sumHi = Zeros;
|
||||
const auto row = reinterpret_cast<const __m64*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m64 row_j = row[j];
|
||||
__m64 input_j = input_vector[j];
|
||||
__m64 extended_row_lo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
|
||||
__m64 extended_row_hi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
|
||||
__m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
|
||||
__m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
|
||||
__m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
|
||||
__m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
|
||||
sum_lo = _mm_add_pi32(sum_lo, product_lo);
|
||||
sum_hi = _mm_add_pi32(sum_hi, product_hi);
|
||||
__m64 input_j = inputVector[j];
|
||||
__m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
|
||||
__m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
|
||||
__m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros);
|
||||
__m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros);
|
||||
__m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo);
|
||||
__m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi);
|
||||
sumLo = _mm_add_pi32(sumLo, productLo);
|
||||
sumHi = _mm_add_pi32(sumHi, productHi);
|
||||
}
|
||||
__m64 sum = _mm_add_pi32(sum_lo, sum_hi);
|
||||
__m64 sum = _mm_add_pi32(sumLo, sumHi);
|
||||
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
|
||||
output[i] = _mm_cvtsi64_si32(sum);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sum = {biases_[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
|
||||
product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
|
||||
int32x4_t sum = {biases[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
|
||||
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
|
||||
sum = vpadalq_s16(sum, product);
|
||||
}
|
||||
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
|
||||
#else
|
||||
OutputType sum = biases_[i];
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
sum += weights_[offset + j] * input[j];
|
||||
OutputType sum = biases[i];
|
||||
for (IndexType j = 0; j < InputDimensions; ++j) {
|
||||
sum += weights[offset + j] * input[j];
|
||||
}
|
||||
output[i] = sum;
|
||||
#endif
|
||||
@@ -436,20 +469,22 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
using BiasType = OutputType;
|
||||
using WeightType = std::int8_t;
|
||||
|
||||
PreviousLayer previous_layer_;
|
||||
PreviousLayer previousLayer;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
|
||||
alignas(kCacheLineSize) WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
|
||||
alignas(CacheLineSize) BiasType biases[OutputDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
|
||||
#if defined (USE_SSSE3)
|
||||
struct CanSaturate {
|
||||
int count;
|
||||
struct Entry {
|
||||
uint32_t wIdx;
|
||||
uint16_t out;
|
||||
uint16_t in;
|
||||
int8_t w;
|
||||
} ids[kPaddedInputDimensions * kOutputDimensions * 3 / 4];
|
||||
} ids[PaddedInputDimensions * OutputDimensions * 3 / 4];
|
||||
|
||||
void add(int i, int j, int8_t w) {
|
||||
void add(int i, int j, int8_t w, uint32_t wIdx) {
|
||||
ids[count].wIdx = wIdx;
|
||||
ids[count].out = i;
|
||||
ids[count].in = j;
|
||||
ids[count].w = w;
|
||||
|
||||
@@ -35,130 +35,135 @@ namespace Stockfish::Eval::NNUE::Layers {
|
||||
static_assert(std::is_same<InputType, std::int32_t>::value, "");
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
static constexpr IndexType InputDimensions =
|
||||
PreviousLayer::OutputDimensions;
|
||||
static constexpr IndexType OutputDimensions = InputDimensions;
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
static constexpr std::size_t SelfBufferSize =
|
||||
ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
static constexpr std::size_t BufferSize =
|
||||
PreviousLayer::BufferSize + SelfBufferSize;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0x538D24C7u;
|
||||
hash_value += PreviousLayer::GetHashValue();
|
||||
return hash_value;
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
std::uint32_t hashValue = 0x538D24C7u;
|
||||
hashValue += PreviousLayer::get_hash_value();
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
bool read_parameters(std::istream& stream) {
|
||||
return previousLayer.read_parameters(stream);
|
||||
}
|
||||
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
return previousLayer.write_parameters(stream);
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
const auto input = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures, char* buffer) const {
|
||||
const auto input = previousLayer.propagate(
|
||||
transformedFeatures, buffer + SelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m256i Zero = _mm256_setzero_si256();
|
||||
const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
|
||||
const auto in = reinterpret_cast<const __m256i*>(input);
|
||||
const auto out = reinterpret_cast<__m256i*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
for (IndexType i = 0; i < NumChunks; ++i) {
|
||||
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
_mm256_load_si256(&in[i * 4 + 0]),
|
||||
_mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
_mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
|
||||
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
_mm256_load_si256(&in[i * 4 + 2]),
|
||||
_mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
_mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
|
||||
_mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
|
||||
_mm256_packs_epi16(words0, words1), Zero), Offsets));
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
constexpr IndexType Start = NumChunks * SimdWidth;
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
const __m128i Zero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
const auto in = reinterpret_cast<const __m128i*>(input);
|
||||
const auto out = reinterpret_cast<__m128i*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
for (IndexType i = 0; i < NumChunks; ++i) {
|
||||
const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
|
||||
_mm_load_si128(&in[i * 4 + 0]),
|
||||
_mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
_mm_load_si128(&in[i * 4 + 1])), WeightScaleBits);
|
||||
const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
|
||||
_mm_load_si128(&in[i * 4 + 2]),
|
||||
_mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
_mm_load_si128(&in[i * 4 + 3])), WeightScaleBits);
|
||||
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
|
||||
_mm_store_si128(&out[i],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
_mm_max_epi8(packedbytes, Zero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
constexpr IndexType Start = NumChunks * SimdWidth;
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
|
||||
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
const auto in = reinterpret_cast<const __m64*>(input);
|
||||
const auto out = reinterpret_cast<__m64*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
for (IndexType i = 0; i < NumChunks; ++i) {
|
||||
const __m64 words0 = _mm_srai_pi16(
|
||||
_mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
|
||||
kWeightScaleBits);
|
||||
WeightScaleBits);
|
||||
const __m64 words1 = _mm_srai_pi16(
|
||||
_mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
|
||||
kWeightScaleBits);
|
||||
WeightScaleBits);
|
||||
const __m64 packedbytes = _mm_packs_pi16(words0, words1);
|
||||
out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
_mm_empty();
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
constexpr IndexType Start = NumChunks * SimdWidth;
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
|
||||
const int8x8_t Zero = {0};
|
||||
const auto in = reinterpret_cast<const int32x4_t*>(input);
|
||||
const auto out = reinterpret_cast<int8x8_t*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
for (IndexType i = 0; i < NumChunks; ++i) {
|
||||
int16x8_t shifted;
|
||||
const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
|
||||
pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
|
||||
pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
|
||||
out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
|
||||
pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits);
|
||||
pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits);
|
||||
out[i] = vmax_s8(vqmovn_s16(shifted), Zero);
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
|
||||
constexpr IndexType Start = NumChunks * (SimdWidth / 2);
|
||||
#else
|
||||
constexpr IndexType kStart = 0;
|
||||
constexpr IndexType Start = 0;
|
||||
#endif
|
||||
|
||||
for (IndexType i = kStart; i < kInputDimensions; ++i) {
|
||||
for (IndexType i = Start; i < InputDimensions; ++i) {
|
||||
output[i] = static_cast<OutputType>(
|
||||
std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
|
||||
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
PreviousLayer previous_layer_;
|
||||
PreviousLayer previousLayer;
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE::Layers
|
||||
|
||||
@@ -26,37 +26,42 @@
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
// Input layer
|
||||
template <IndexType OutputDimensions, IndexType Offset = 0>
|
||||
template <IndexType OutDims, IndexType Offset = 0>
|
||||
class InputSlice {
|
||||
public:
|
||||
// Need to maintain alignment
|
||||
static_assert(Offset % kMaxSimdWidth == 0, "");
|
||||
static_assert(Offset % MaxSimdWidth == 0, "");
|
||||
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Output dimensionality
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static constexpr IndexType OutputDimensions = OutDims;
|
||||
|
||||
// Size of forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = 0;
|
||||
static constexpr std::size_t BufferSize = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xEC42E90Du;
|
||||
hash_value ^= kOutputDimensions ^ (Offset << 10);
|
||||
return hash_value;
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
std::uint32_t hashValue = 0xEC42E90Du;
|
||||
hashValue ^= OutputDimensions ^ (Offset << 10);
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& /*stream*/) {
|
||||
bool read_parameters(std::istream& /*stream*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool write_parameters(std::ostream& /*stream*/) const {
|
||||
return true;
|
||||
}
|
||||
// Forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features,
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformedFeatures,
|
||||
char* /*buffer*/) const {
|
||||
return transformed_features + Offset;
|
||||
return transformedFeatures + Offset;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -29,9 +29,9 @@ namespace Stockfish::Eval::NNUE {
|
||||
enum AccumulatorState { EMPTY, COMPUTED, INIT };
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
struct alignas(CacheLineSize) Accumulator {
|
||||
std::int16_t
|
||||
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
accumulation[2][TransformedFeatureDimensions];
|
||||
AccumulatorState state[2];
|
||||
};
|
||||
|
||||
|
||||
@@ -21,17 +21,37 @@
|
||||
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
||||
#define NNUE_ARCHITECTURE_H_INCLUDED
|
||||
|
||||
// Defines the network structure
|
||||
#include "architectures/halfkp_256x2-32-32.h"
|
||||
#include "nnue_common.h"
|
||||
|
||||
#include "features/half_kp.h"
|
||||
|
||||
#include "layers/input_slice.h"
|
||||
#include "layers/affine_transform.h"
|
||||
#include "layers/clipped_relu.h"
|
||||
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
// Input features used in evaluation function
|
||||
using FeatureSet = Features::HalfKP;
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType TransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, "");
|
||||
static_assert(Network::OutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
|
||||
@@ -48,58 +48,30 @@
|
||||
namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Version of the evaluation file
|
||||
constexpr std::uint32_t kVersion = 0x7AF32F16u;
|
||||
constexpr std::uint32_t Version = 0x7AF32F16u;
|
||||
|
||||
// Constant used in evaluation value calculation
|
||||
constexpr int FV_SCALE = 16;
|
||||
constexpr int kWeightScaleBits = 6;
|
||||
constexpr int OutputScale = 16;
|
||||
constexpr int WeightScaleBits = 6;
|
||||
|
||||
// Size of cache line (in bytes)
|
||||
constexpr std::size_t kCacheLineSize = 64;
|
||||
constexpr std::size_t CacheLineSize = 64;
|
||||
|
||||
// SIMD width (in bytes)
|
||||
#if defined(USE_AVX2)
|
||||
constexpr std::size_t kSimdWidth = 32;
|
||||
constexpr std::size_t SimdWidth = 32;
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr std::size_t kSimdWidth = 16;
|
||||
constexpr std::size_t SimdWidth = 16;
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr std::size_t kSimdWidth = 8;
|
||||
constexpr std::size_t SimdWidth = 8;
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr std::size_t kSimdWidth = 16;
|
||||
constexpr std::size_t SimdWidth = 16;
|
||||
#endif
|
||||
|
||||
constexpr std::size_t kMaxSimdWidth = 32;
|
||||
|
||||
// unique number for each piece type on each square
|
||||
enum {
|
||||
PS_NONE = 0,
|
||||
PS_W_PAWN = 1,
|
||||
PS_B_PAWN = 1 * SQUARE_NB + 1,
|
||||
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
|
||||
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
|
||||
PS_W_BISHOP = 4 * SQUARE_NB + 1,
|
||||
PS_B_BISHOP = 5 * SQUARE_NB + 1,
|
||||
PS_W_ROOK = 6 * SQUARE_NB + 1,
|
||||
PS_B_ROOK = 7 * SQUARE_NB + 1,
|
||||
PS_W_QUEEN = 8 * SQUARE_NB + 1,
|
||||
PS_B_QUEEN = 9 * SQUARE_NB + 1,
|
||||
PS_W_KING = 10 * SQUARE_NB + 1,
|
||||
PS_END = PS_W_KING, // pieces without kings (pawns included)
|
||||
PS_B_KING = 11 * SQUARE_NB + 1,
|
||||
PS_END2 = 12 * SQUARE_NB + 1
|
||||
};
|
||||
|
||||
constexpr uint32_t kpp_board_index[COLOR_NB][PIECE_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE,
|
||||
PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE },
|
||||
{ PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE,
|
||||
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE }
|
||||
};
|
||||
constexpr std::size_t MaxSimdWidth = 32;
|
||||
|
||||
// Type of input feature after conversion
|
||||
using TransformedFeatureType = std::uint8_t;
|
||||
@@ -107,7 +79,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
|
||||
// Round n up to be a multiple of base
|
||||
template <typename IntType>
|
||||
constexpr IntType CeilToMultiple(IntType n, IntType base) {
|
||||
constexpr IntType ceil_to_multiple(IntType n, IntType base) {
|
||||
return (n + base - 1) / base * base;
|
||||
}
|
||||
|
||||
@@ -129,6 +101,24 @@ namespace Stockfish::Eval::NNUE {
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename IntType>
|
||||
inline void write_little_endian(std::ostream& stream, IntType value) {
|
||||
|
||||
std::uint8_t u[sizeof(IntType)];
|
||||
typename std::make_unsigned<IntType>::type v = value;
|
||||
|
||||
std::size_t i = 0;
|
||||
// if constexpr to silence the warning about shift by 8
|
||||
if constexpr (sizeof(IntType) > 1) {
|
||||
for (; i + 1 < sizeof(IntType); ++i) {
|
||||
u[i] = v;
|
||||
v >>= 8;
|
||||
}
|
||||
}
|
||||
u[i] = v;
|
||||
|
||||
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
||||
}
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
||||
|
||||
@@ -23,7 +23,8 @@
|
||||
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "features/index_list.h"
|
||||
|
||||
#include "../misc.h"
|
||||
|
||||
#include <cstring> // std::memset()
|
||||
|
||||
@@ -40,7 +41,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
static constexpr IndexType NumRegs = 8; // only 8 are needed
|
||||
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
@@ -48,7 +49,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
static constexpr IndexType NumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
@@ -56,7 +57,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
static constexpr IndexType NumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
@@ -64,7 +65,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
static constexpr IndexType NumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
@@ -72,7 +73,7 @@ namespace Stockfish::Eval::NNUE {
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
static constexpr IndexType NumRegs = 16;
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
@@ -84,11 +85,11 @@ namespace Stockfish::Eval::NNUE {
|
||||
|
||||
private:
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
|
||||
|
||||
#ifdef VECTOR
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
@@ -96,105 +97,111 @@ namespace Stockfish::Eval::NNUE {
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// Number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
|
||||
static constexpr IndexType OutputDimensions = HalfDimensions * 2;
|
||||
|
||||
// Size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
static constexpr std::size_t BufferSize =
|
||||
OutputDimensions * sizeof(OutputType);
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
static constexpr std::uint32_t get_hash_value() {
|
||||
return FeatureSet::HashValue ^ OutputDimensions;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
bool read_parameters(std::istream& stream) {
|
||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
||||
biases[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
||||
weights[i] = read_little_endian<WeightType>(stream);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
weights_[i] = read_little_endian<WeightType>(stream);
|
||||
// Write network parameters
|
||||
bool write_parameters(std::ostream& stream) const {
|
||||
for (std::size_t i = 0; i < HalfDimensions; ++i)
|
||||
write_little_endian<BiasType>(stream, biases[i]);
|
||||
for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
|
||||
write_little_endian<WeightType>(stream, weights[i]);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
UpdateAccumulator(pos, WHITE);
|
||||
UpdateAccumulator(pos, BLACK);
|
||||
void transform(const Position& pos, OutputType* output) const {
|
||||
update_accumulator(pos, WHITE);
|
||||
update_accumulator(pos, BLACK);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
|
||||
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
|
||||
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i kZero = _mm512_setzero_si512();
|
||||
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
|
||||
static_assert(HalfDimensions % (SimdWidth * 2) == 0);
|
||||
const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i Zero = _mm512_setzero_si512();
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
constexpr int Control = 0b11011000;
|
||||
const __m256i Zero = _mm256_setzero_si256();
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
|
||||
#ifdef USE_SSE41
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
const __m128i Zero = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr IndexType NumChunks = HalfDimensions / SimdWidth;
|
||||
const __m64 k0x80s = _mm_set1_pi8(-128);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2);
|
||||
const int8x8_t Zero = {0};
|
||||
#endif
|
||||
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
const IndexType offset = HalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
auto out = reinterpret_cast<__m512i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m512i sum0 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m512i sum1 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
|
||||
}
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
_mm256_packs_epi16(sum0, sum1), Zero), Control));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
_mm_max_epi8(packedbytes, Zero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
@@ -204,26 +211,26 @@ namespace Stockfish::Eval::NNUE {
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto out = reinterpret_cast<__m64*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
accumulation[perspectives[p]])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
accumulation[perspectives[p]])[j * 2 + 1]);
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < NumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
accumulation[perspectives[p]])[j];
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), Zero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][j];
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
@@ -236,34 +243,36 @@ namespace Stockfish::Eval::NNUE {
|
||||
}
|
||||
|
||||
private:
|
||||
void UpdateAccumulator(const Position& pos, const Color c) const {
|
||||
void update_accumulator(const Position& pos, const Color perspective) const {
|
||||
|
||||
// The size must be enough to contain the largest possible update.
|
||||
// That might depend on the feature set and generally relies on the
|
||||
// feature set's update cost calculation to be correct and never
|
||||
// allow updates with more added/removed features than MaxActiveDimensions.
|
||||
using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
vec_t acc[NumRegs];
|
||||
#endif
|
||||
|
||||
// Look for a usable accumulator of an earlier position. We keep track
|
||||
// of the estimated gain in terms of features to be added/subtracted.
|
||||
StateInfo *st = pos.state(), *next = nullptr;
|
||||
int gain = pos.count<ALL_PIECES>() - 2;
|
||||
while (st->accumulator.state[c] == EMPTY)
|
||||
int gain = FeatureSet::refresh_cost(pos);
|
||||
while (st->accumulator.state[perspective] == EMPTY)
|
||||
{
|
||||
auto& dp = st->dirtyPiece;
|
||||
// The first condition tests whether an incremental update is
|
||||
// possible at all: if this side's king has moved, it is not possible.
|
||||
static_assert(std::is_same_v<RawFeatures::SortedTriggerSet,
|
||||
Features::CompileTimeList<Features::TriggerEvent, Features::TriggerEvent::kFriendKingMoved>>,
|
||||
"Current code assumes that only kFriendlyKingMoved refresh trigger is being used.");
|
||||
if ( dp.piece[0] == make_piece(c, KING)
|
||||
|| (gain -= dp.dirty_num + 1) < 0)
|
||||
// This governs when a full feature refresh is needed and how many
|
||||
// updates are better than just one full refresh.
|
||||
if ( FeatureSet::requires_refresh(st, perspective)
|
||||
|| (gain -= FeatureSet::update_cost(st) + 1) < 0)
|
||||
break;
|
||||
next = st;
|
||||
st = st->previous;
|
||||
}
|
||||
|
||||
if (st->accumulator.state[c] == COMPUTED)
|
||||
if (st->accumulator.state[perspective] == COMPUTED)
|
||||
{
|
||||
if (next == nullptr)
|
||||
return;
|
||||
@@ -271,85 +280,83 @@ namespace Stockfish::Eval::NNUE {
|
||||
// Update incrementally in two steps. First, we update the "next"
|
||||
// accumulator. Then, we update the current accumulator (pos.state()).
|
||||
|
||||
// Gather all features to be updated. This code assumes HalfKP features
|
||||
// only and doesn't support refresh triggers.
|
||||
static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::kFriend>>,
|
||||
RawFeatures>);
|
||||
Features::IndexList removed[2], added[2];
|
||||
Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
|
||||
next->dirtyPiece, c, &removed[0], &added[0]);
|
||||
// Gather all features to be updated.
|
||||
const Square ksq = pos.square<KING>(perspective);
|
||||
IndexList removed[2], added[2];
|
||||
FeatureSet::append_changed_indices(
|
||||
ksq, next, perspective, removed[0], added[0]);
|
||||
for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
|
||||
Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
|
||||
st2->dirtyPiece, c, &removed[1], &added[1]);
|
||||
FeatureSet::append_changed_indices(
|
||||
ksq, st2, perspective, removed[1], added[1]);
|
||||
|
||||
// Mark the accumulators as computed.
|
||||
next->accumulator.state[c] = COMPUTED;
|
||||
pos.state()->accumulator.state[c] = COMPUTED;
|
||||
next->accumulator.state[perspective] = COMPUTED;
|
||||
pos.state()->accumulator.state[perspective] = COMPUTED;
|
||||
|
||||
// Now update the accumulators listed in info[], where the last element is a sentinel.
|
||||
StateInfo *info[3] =
|
||||
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
|
||||
StateInfo *states_to_update[3] =
|
||||
{ next, next == pos.state() ? nullptr : pos.state(), nullptr };
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
// Load accumulator
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&st->accumulator.accumulation[c][0][j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
&st->accumulator.accumulation[perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_load(&accTile[k]);
|
||||
|
||||
for (IndexType i = 0; info[i]; ++i)
|
||||
for (IndexType i = 0; states_to_update[i]; ++i)
|
||||
{
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added[i])
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
// Store accumulator
|
||||
accTile = reinterpret_cast<vec_t*>(
|
||||
&info[i]->accumulator.accumulation[c][0][j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
&states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType i = 0; info[i]; ++i)
|
||||
for (IndexType i = 0; states_to_update[i]; ++i)
|
||||
{
|
||||
std::memcpy(info[i]->accumulator.accumulation[c][0],
|
||||
st->accumulator.accumulation[c][0],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
st = info[i];
|
||||
std::memcpy(states_to_update[i]->accumulator.accumulation[perspective],
|
||||
st->accumulator.accumulation[perspective],
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
st = states_to_update[i];
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed[i])
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
st->accumulator.accumulation[c][0][j] -= weights_[offset + j];
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
st->accumulator.accumulation[perspective][j] -= weights[offset + j];
|
||||
}
|
||||
|
||||
// Difference calculation for the activated features
|
||||
for (const auto index : added[i])
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
st->accumulator.accumulation[c][0][j] += weights_[offset + j];
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
st->accumulator.accumulation[perspective][j] += weights[offset + j];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -358,43 +365,43 @@ namespace Stockfish::Eval::NNUE {
|
||||
{
|
||||
// Refresh the accumulator
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
accumulator.state[c] = COMPUTED;
|
||||
Features::IndexList active;
|
||||
Features::HalfKP<Features::Side::kFriend>::AppendActiveIndices(pos, c, &active);
|
||||
accumulator.state[perspective] = COMPUTED;
|
||||
IndexList active;
|
||||
FeatureSet::append_active_indices(pos, perspective, active);
|
||||
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
|
||||
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
|
||||
{
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
&biases[j * TileHeight]);
|
||||
for (IndexType k = 0; k < NumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
|
||||
for (const auto index : active)
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
const IndexType offset = HalfDimensions * index + j * TileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
||||
|
||||
for (unsigned k = 0; k < kNumRegs; ++k)
|
||||
for (unsigned k = 0; k < NumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[c][0][j * kTileHeight]);
|
||||
for (unsigned k = 0; k < kNumRegs; k++)
|
||||
&accumulator.accumulation[perspective][j * TileHeight]);
|
||||
for (unsigned k = 0; k < NumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
|
||||
#else
|
||||
std::memcpy(accumulator.accumulation[c][0], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
std::memcpy(accumulator.accumulation[perspective], biases,
|
||||
HalfDimensions * sizeof(BiasType));
|
||||
|
||||
for (const auto index : active)
|
||||
{
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
const IndexType offset = HalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[c][0][j] += weights_[offset + j];
|
||||
for (IndexType j = 0; j < HalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][j] += weights[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -407,9 +414,8 @@ namespace Stockfish::Eval::NNUE {
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
alignas(CacheLineSize) BiasType biases[HalfDimensions];
|
||||
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
|
||||
};
|
||||
|
||||
} // namespace Stockfish::Eval::NNUE
|
||||
|
||||
@@ -84,7 +84,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
|
||||
&& !pos.can_castle(ANY_CASTLING))
|
||||
{
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
Position p;
|
||||
p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
|
||||
@@ -1337,7 +1337,7 @@ bool Position::pos_is_ok() const {
|
||||
assert(0 && "pos_is_ok: Bitboards");
|
||||
|
||||
StateInfo si = *st;
|
||||
ASSERT_ALIGNED(&si, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize);
|
||||
|
||||
set_state(&si);
|
||||
if (std::memcmp(&si, st, sizeof(StateInfo)))
|
||||
|
||||
@@ -118,7 +118,6 @@ public:
|
||||
Bitboard blockers_for_king(Color c) const;
|
||||
Bitboard check_squares(PieceType pt) const;
|
||||
Bitboard pinners(Color c) const;
|
||||
bool is_discovered_check_on_king(Color c, Move m) const;
|
||||
|
||||
// Attacks to/from a given square
|
||||
Bitboard attackers_to(Square s) const;
|
||||
@@ -328,10 +327,6 @@ inline Bitboard Position::check_squares(PieceType pt) const {
|
||||
return st->checkSquares[pt];
|
||||
}
|
||||
|
||||
inline bool Position::is_discovered_check_on_king(Color c, Move m) const {
|
||||
return st->blockersForKing[c] & from_sq(m);
|
||||
}
|
||||
|
||||
inline bool Position::pawn_passed(Color c, Square s) const {
|
||||
return !(pieces(~c, PAWN) & passed_pawn_span(c, s));
|
||||
}
|
||||
|
||||
108
src/search.cpp
108
src/search.cpp
@@ -96,49 +96,6 @@ namespace {
|
||||
Move best = MOVE_NONE;
|
||||
};
|
||||
|
||||
// Breadcrumbs are used to mark nodes as being searched by a given thread
|
||||
struct Breadcrumb {
|
||||
std::atomic<Thread*> thread;
|
||||
std::atomic<Key> key;
|
||||
};
|
||||
std::array<Breadcrumb, 1024> breadcrumbs;
|
||||
|
||||
// ThreadHolding structure keeps track of which thread left breadcrumbs at the given
|
||||
// node for potential reductions. A free node will be marked upon entering the moves
|
||||
// loop by the constructor, and unmarked upon leaving that loop by the destructor.
|
||||
struct ThreadHolding {
|
||||
explicit ThreadHolding(Thread* thisThread, Key posKey, int ply) {
|
||||
location = ply < 8 ? &breadcrumbs[posKey & (breadcrumbs.size() - 1)] : nullptr;
|
||||
otherThread = false;
|
||||
owning = false;
|
||||
if (location)
|
||||
{
|
||||
// See if another already marked this location, if not, mark it ourselves
|
||||
Thread* tmp = (*location).thread.load(std::memory_order_relaxed);
|
||||
if (tmp == nullptr)
|
||||
{
|
||||
(*location).thread.store(thisThread, std::memory_order_relaxed);
|
||||
(*location).key.store(posKey, std::memory_order_relaxed);
|
||||
owning = true;
|
||||
}
|
||||
else if ( tmp != thisThread
|
||||
&& (*location).key.load(std::memory_order_relaxed) == posKey)
|
||||
otherThread = true;
|
||||
}
|
||||
}
|
||||
|
||||
~ThreadHolding() {
|
||||
if (owning) // Free the marked location
|
||||
(*location).thread.store(nullptr, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
bool marked() { return otherThread; }
|
||||
|
||||
private:
|
||||
Breadcrumb* location;
|
||||
bool otherThread, owning;
|
||||
};
|
||||
|
||||
template <NodeType NT>
|
||||
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
|
||||
|
||||
@@ -159,7 +116,7 @@ namespace {
|
||||
uint64_t perft(Position& pos, Depth depth) {
|
||||
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
uint64_t cnt, nodes = 0;
|
||||
const bool leaf = (depth == 2);
|
||||
@@ -594,7 +551,7 @@ namespace {
|
||||
|
||||
Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[64];
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
TTEntry* tte;
|
||||
Key posKey;
|
||||
@@ -1003,8 +960,12 @@ moves_loop: // When in check, search starts from here
|
||||
value = bestValue;
|
||||
singularQuietLMR = moveCountPruning = false;
|
||||
|
||||
// Mark this node as being searched
|
||||
ThreadHolding th(thisThread, posKey, ss->ply);
|
||||
// Indicate PvNodes that will probably fail low if the node was searched
|
||||
// at a depth equal or greater than the current depth, and the result of this search was a fail low.
|
||||
bool likelyFailLow = PvNode
|
||||
&& ttMove
|
||||
&& (tte->bound() & BOUND_UPPER)
|
||||
&& tte->depth() >= depth;
|
||||
|
||||
// Step 12. Loop through all pseudo-legal moves until no moves remain
|
||||
// or a beta cutoff occurs.
|
||||
@@ -1043,14 +1004,6 @@ moves_loop: // When in check, search starts from here
|
||||
movedPiece = pos.moved_piece(move);
|
||||
givesCheck = pos.gives_check(move);
|
||||
|
||||
// Indicate PvNodes that will probably fail low if node was searched with non-PV search
|
||||
// at depth equal or greater to current depth and result of this search was far below alpha
|
||||
bool likelyFailLow = PvNode
|
||||
&& ttMove
|
||||
&& (tte->bound() & BOUND_UPPER)
|
||||
&& ttValue < alpha + 200 + 100 * depth
|
||||
&& tte->depth() >= depth;
|
||||
|
||||
// Calculate new depth for this move
|
||||
newDepth = depth - 1;
|
||||
|
||||
@@ -1130,6 +1083,8 @@ moves_loop: // When in check, search starts from here
|
||||
{
|
||||
extension = 1;
|
||||
singularQuietLMR = !ttCapture;
|
||||
if (!PvNode && value < singularBeta - 140)
|
||||
extension = 2;
|
||||
}
|
||||
|
||||
// Multi-cut pruning
|
||||
@@ -1153,11 +1108,6 @@ moves_loop: // When in check, search starts from here
|
||||
}
|
||||
}
|
||||
|
||||
// Check extension (~2 Elo)
|
||||
else if ( givesCheck
|
||||
&& (pos.is_discovered_check_on_king(~us, move) || pos.see_ge(move)))
|
||||
extension = 1;
|
||||
|
||||
// Add extension to new depth
|
||||
newDepth += extension;
|
||||
|
||||
@@ -1185,7 +1135,8 @@ moves_loop: // When in check, search starts from here
|
||||
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|
||||
|| cutNode
|
||||
|| (!PvNode && !formerPv && captureHistory[movedPiece][to_sq(move)][type_of(pos.captured_piece())] < 3678)
|
||||
|| thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024))
|
||||
|| thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024)
|
||||
&& (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3))
|
||||
{
|
||||
Depth r = reduction(improving, depth, moveCount);
|
||||
|
||||
@@ -1193,10 +1144,6 @@ moves_loop: // When in check, search starts from here
|
||||
if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024)
|
||||
r--;
|
||||
|
||||
// Increase reduction if other threads are searching this position
|
||||
if (th.marked())
|
||||
r++;
|
||||
|
||||
// Decrease reduction if position is or has been on the PV
|
||||
// and node is not likely to fail low. (~10 Elo)
|
||||
if ( ss->ttPv
|
||||
@@ -1209,29 +1156,17 @@ moves_loop: // When in check, search starts from here
|
||||
&& thisThread->bestMoveChanges <= 2)
|
||||
r++;
|
||||
|
||||
// More reductions for late moves if position was not in previous PV
|
||||
if ( moveCountPruning
|
||||
&& !formerPv)
|
||||
r++;
|
||||
|
||||
// Decrease reduction if opponent's move count is high (~5 Elo)
|
||||
// Decrease reduction if opponent's move count is high (~1 Elo)
|
||||
if ((ss-1)->moveCount > 13)
|
||||
r--;
|
||||
|
||||
// Decrease reduction if ttMove has been singularly extended (~3 Elo)
|
||||
// Decrease reduction if ttMove has been singularly extended (~1 Elo)
|
||||
if (singularQuietLMR)
|
||||
r--;
|
||||
|
||||
if (captureOrPromotion)
|
||||
if (!captureOrPromotion)
|
||||
{
|
||||
// Increase reduction for non-checking captures likely to be bad
|
||||
if ( !givesCheck
|
||||
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha)
|
||||
r++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Increase reduction if ttMove is a capture (~5 Elo)
|
||||
// Increase reduction if ttMove is a capture (~3 Elo)
|
||||
if (ttCapture)
|
||||
r++;
|
||||
|
||||
@@ -1242,13 +1177,6 @@ moves_loop: // When in check, search starts from here
|
||||
if (cutNode)
|
||||
r += 2;
|
||||
|
||||
// Decrease reduction for moves that escape a capture. Filter out
|
||||
// castling moves, because they are coded as "king captures rook" and
|
||||
// hence break reverse_move() (~2 Elo)
|
||||
else if ( type_of(move) == NORMAL
|
||||
&& !pos.see_ge(reverse_move(move)))
|
||||
r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN);
|
||||
|
||||
ss->statScore = thisThread->mainHistory[us][from_to(move)]
|
||||
+ (*contHist[0])[movedPiece][to_sq(move)]
|
||||
+ (*contHist[1])[movedPiece][to_sq(move)]
|
||||
@@ -1458,7 +1386,7 @@ moves_loop: // When in check, search starts from here
|
||||
|
||||
Move pv[MAX_PLY+1];
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
TTEntry* tte;
|
||||
Key posKey;
|
||||
@@ -1964,7 +1892,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
|
||||
bool RootMove::extract_ponder_from_tt(Position& pos) {
|
||||
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
|
||||
|
||||
bool ttHit;
|
||||
|
||||
|
||||
@@ -74,6 +74,7 @@ public:
|
||||
void idle_loop();
|
||||
void start_searching();
|
||||
void wait_for_search_finished();
|
||||
int id() const { return idx; }
|
||||
void wait_for_worker_finished();
|
||||
size_t thread_idx() const { return idx; }
|
||||
|
||||
|
||||
@@ -94,6 +94,14 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
|
||||
optimumTime = TimePoint(optScale * timeLeft);
|
||||
maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));
|
||||
|
||||
if (Stockfish::Search::Limits.use_time_management())
|
||||
{
|
||||
int strength = std::log( std::max(1, int(optimumTime * Threads.size() / 10))) * 60;
|
||||
tempoNNUE = std::clamp( (strength + 264) / 24, 18, 30);
|
||||
}
|
||||
else
|
||||
tempoNNUE = 28; // default for no time given
|
||||
|
||||
if (Options["Ponder"])
|
||||
optimumTime += optimumTime / 4;
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ public:
|
||||
TimePoint(Threads.nodes_searched()) : now() - startTime; }
|
||||
|
||||
int64_t availableNodes; // When in 'nodes as time' mode
|
||||
int tempoNNUE;
|
||||
|
||||
private:
|
||||
TimePoint startTime;
|
||||
|
||||
@@ -319,7 +319,14 @@ void UCI::loop(int argc, char* argv[]) {
|
||||
else if (token == "d") sync_cout << pos << sync_endl;
|
||||
else if (token == "eval") trace_eval(pos);
|
||||
else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
|
||||
|
||||
else if (token == "export_net") {
|
||||
std::optional<std::string> filename;
|
||||
std::string f;
|
||||
if (is >> skipws >> f) {
|
||||
filename = f;
|
||||
}
|
||||
Eval::NNUE::export_net(filename);
|
||||
}
|
||||
else if (token == "generate_training_data") Tools::generate_training_data(is);
|
||||
else if (token == "generate_training_data") Tools::generate_training_data_nonpv(is);
|
||||
else if (token == "convert") Tools::convert(is);
|
||||
|
||||
Reference in New Issue
Block a user