diff --git a/src/Makefile b/src/Makefile index b021ba12..c1dab86b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -41,7 +41,7 @@ endif SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp OBJS = $(notdir $(SRCS:.cpp=.o)) diff --git a/src/evaluate.h b/src/evaluate.h index 1ac224b6..4430a28c 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -39,7 +39,7 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-46832cfbead3.nnue" + #define EvalFileDefaultName "nn-e8321e467bf6.nnue" namespace NNUE { diff --git a/src/nnue/features/half_ka_v2.cpp b/src/nnue/features/half_ka_v2_hm.cpp similarity index 68% rename from src/nnue/features/half_ka_v2.cpp rename to src/nnue/features/half_ka_v2_hm.cpp index 57f43e50..098a6d60 100644 --- a/src/nnue/features/half_ka_v2.cpp +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -16,31 +16,32 @@ along with this program. If not, see . */ -//Definition of input features HalfKAv2 of NNUE evaluation function +//Definition of input features HalfKAv2_hm of NNUE evaluation function -#include "half_ka_v2.h" +#include "half_ka_v2_hm.h" #include "../../position.h" namespace Stockfish::Eval::NNUE::Features { // Orient a square according to perspective (rotates by 180 for black) - inline Square HalfKAv2::orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 56)); + inline Square HalfKAv2_hm::orient(Color perspective, Square s, Square ksq) { + return Square(int(s) ^ (bool(perspective) * SQ_A8) ^ ((file_of(ksq) < FILE_E) * SQ_H1)); } // Index of a feature for a given king position and another piece on some square - inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) { - return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq); + inline IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) { + Square o_ksq = orient(perspective, ksq, ksq); + return IndexType(orient(perspective, s, ksq) + PieceSquareIndex[perspective][pc] + PS_NB * KingBuckets[o_ksq]); } // Get a list of indices for active features - void HalfKAv2::append_active_indices( + void HalfKAv2_hm::append_active_indices( const Position& pos, Color perspective, ValueListInserter active ) { - Square ksq = orient(perspective, pos.square(perspective)); + Square ksq = pos.square(perspective); Bitboard bb = pos.pieces(); while (bb) { @@ -52,7 +53,7 @@ namespace Stockfish::Eval::NNUE::Features { // append_changed_indices() : get a list of indices for recently changed features - void HalfKAv2::append_changed_indices( + void HalfKAv2_hm::append_changed_indices( Square ksq, StateInfo* st, Color perspective, @@ -60,25 +61,24 @@ namespace Stockfish::Eval::NNUE::Features { ValueListInserter added ) { const auto& dp = st->dirtyPiece; - Square oriented_ksq = orient(perspective, ksq); for (int i = 0; i < dp.dirty_num; ++i) { Piece pc = dp.piece[i]; if (dp.from[i] != SQ_NONE) - removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq)); + removed.push_back(make_index(perspective, dp.from[i], pc, ksq)); if (dp.to[i] != SQ_NONE) - added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq)); + added.push_back(make_index(perspective, dp.to[i], pc, ksq)); } } - int HalfKAv2::update_cost(StateInfo* st) { + int HalfKAv2_hm::update_cost(StateInfo* st) { return st->dirtyPiece.dirty_num; } - int HalfKAv2::refresh_cost(const Position& pos) { + int HalfKAv2_hm::refresh_cost(const Position& pos) { return pos.count(); } - bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) { + bool HalfKAv2_hm::requires_refresh(StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); } diff --git a/src/nnue/features/half_ka_v2.h b/src/nnue/features/half_ka_v2_hm.h similarity index 80% rename from src/nnue/features/half_ka_v2.h rename to src/nnue/features/half_ka_v2_hm.h index e4b2edd9..2c1144f6 100644 --- a/src/nnue/features/half_ka_v2.h +++ b/src/nnue/features/half_ka_v2_hm.h @@ -18,8 +18,8 @@ //Definition of input features HalfKP of NNUE evaluation function -#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED -#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED #include "../nnue_common.h" @@ -32,9 +32,9 @@ namespace Stockfish { namespace Stockfish::Eval::NNUE::Features { - // Feature HalfKAv2: Combination of the position of own king - // and the position of pieces - class HalfKAv2 { + // Feature HalfKAv2_hm: Combination of the position of own king + // and the position of pieces. Position mirrored such that king always on e..h files. + class HalfKAv2_hm { // unique number for each piece type on each square enum { @@ -63,21 +63,32 @@ namespace Stockfish::Eval::NNUE::Features { }; // Orient a square according to perspective (rotates by 180 for black) - static Square orient(Color perspective, Square s); + static Square orient(Color perspective, Square s, Square ksq); // Index of a feature for a given king position and another piece on some square static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); public: // Feature name - static constexpr const char* Name = "HalfKAv2(Friend)"; + static constexpr const char* Name = "HalfKAv2_hm(Friend)"; // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = 0x5f234cb8u; + static constexpr std::uint32_t HashValue = 0x7f234cb8u; // Number of feature dimensions static constexpr IndexType Dimensions = - static_cast(SQUARE_NB) * static_cast(PS_NB); + static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; + + static constexpr int KingBuckets[64] = { + -1, -1, -1, -1, 31, 30, 29, 28, + -1, -1, -1, -1, 27, 26, 25, 24, + -1, -1, -1, -1, 23, 22, 21, 20, + -1, -1, -1, -1, 19, 18, 17, 16, + -1, -1, -1, -1, 15, 14, 13, 12, + -1, -1, -1, -1, 11, 10, 9, 8, + -1, -1, -1, -1, 7, 6, 5, 4, + -1, -1, -1, -1, 3, 2, 1, 0 + }; // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; @@ -108,4 +119,4 @@ namespace Stockfish::Eval::NNUE::Features { } // namespace Stockfish::Eval::NNUE::Features -#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 9a5f62c0..d1318368 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -46,6 +46,11 @@ namespace Stockfish::Eval::NNUE::Layers { #elif defined (USE_SSSE3) static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; #endif +#if defined (USE_AVX512) + static constexpr const IndexType InputSimdWidth = SimdWidth * 2; +#elif defined (USE_SSSE3) + static constexpr const IndexType InputSimdWidth = SimdWidth; +#endif // Size of forward propagation buffer used in this layer static constexpr std::size_t SelfBufferSize = @@ -72,6 +77,15 @@ namespace Stockfish::Eval::NNUE::Layers { for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) #if !defined (USE_SSSE3) weights[i] = read_little_endian(stream); +#elif defined (USE_VNNI) || defined (USE_AVX512) + if constexpr (OutputDimensions <= 8 && OutputDimensions != 1) + weights[i] = read_little_endian(stream); + else + weights[ + (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + + i % 4 + ] = read_little_endian(stream); #else weights[ (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + @@ -108,7 +122,6 @@ namespace Stockfish::Eval::NNUE::Layers { return !stream.fail(); } - // Forward propagation const OutputType* propagate( const TransformedFeatureType* transformedFeatures, char* buffer) const { @@ -123,6 +136,40 @@ namespace Stockfish::Eval::NNUE::Layers { return _mm512_reduce_add_epi32(sum) + bias; }; + [[maybe_unused]] auto m512_hadd128x16_interleave = []( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) -> __m512i { + + __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); + __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); + + __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); + __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); + + __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); + __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); + + __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); + __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); + + return _mm512_add_epi32(sum0123a, sum0123b); + }; + + [[maybe_unused]] auto m512_haddx4 = [m512_hadd128x16_interleave]( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, __m128i bias) -> __m128i { + + __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); + + __m256i sum256lo = _mm512_castsi512_si256(sum); + __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); + + sum256lo = _mm256_add_epi32(sum256lo, sum256hi); + + __m128i sum128lo = _mm256_castsi256_si128(sum256lo); + __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + }; + [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) { #if defined (USE_VNNI) acc = _mm512_dpbusd_epi32(acc, a, b); @@ -133,6 +180,19 @@ namespace Stockfish::Eval::NNUE::Layers { #endif }; + [[maybe_unused]] auto m512_add_dpbusd_epi32x2 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1) { +#if defined (USE_VNNI) + acc = _mm512_dpbusd_epi32(acc, a0, b0); + acc = _mm512_dpbusd_epi32(acc, a1, b1); +#else + __m512i product0 = _mm512_maddubs_epi16(a0, b0); + __m512i product1 = _mm512_maddubs_epi16(a1, b1); + product0 = _mm512_adds_epi16(product0, product1); + product0 = _mm512_madd_epi16(product0, Ones512); + acc = _mm512_add_epi32(acc, product0); +#endif + }; + [[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1, __m512i a2, __m512i b2, __m512i a3, __m512i b3) { #if defined (USE_VNNI) @@ -165,6 +225,18 @@ namespace Stockfish::Eval::NNUE::Layers { return _mm_cvtsi128_si32(sum128) + bias; }; + [[maybe_unused]] auto m256_haddx4 = [](__m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, __m128i bias) -> __m128i { + sum0 = _mm256_hadd_epi32(sum0, sum1); + sum2 = _mm256_hadd_epi32(sum2, sum3); + + sum0 = _mm256_hadd_epi32(sum0, sum2); + + __m128i sum128lo = _mm256_castsi256_si128(sum0); + __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + }; + [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) { #if defined (USE_VNNI) acc = _mm256_dpbusd_epi32(acc, a, b); @@ -175,6 +247,19 @@ namespace Stockfish::Eval::NNUE::Layers { #endif }; + [[maybe_unused]] auto m256_add_dpbusd_epi32x2 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1) { +#if defined (USE_VNNI) + acc = _mm256_dpbusd_epi32(acc, a0, b0); + acc = _mm256_dpbusd_epi32(acc, a1, b1); +#else + __m256i product0 = _mm256_maddubs_epi16(a0, b0); + __m256i product1 = _mm256_maddubs_epi16(a1, b1); + product0 = _mm256_adds_epi16(product0, product1); + product0 = _mm256_madd_epi16(product0, Ones256); + acc = _mm256_add_epi32(acc, product0); +#endif + }; + [[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1, __m256i a2, __m256i b2, __m256i a3, __m256i b3) { #if defined (USE_VNNI) @@ -206,12 +291,27 @@ namespace Stockfish::Eval::NNUE::Layers { return _mm_cvtsi128_si32(sum) + bias; }; + [[maybe_unused]] auto m128_haddx4 = [](__m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, __m128i bias) -> __m128i { + sum0 = _mm_hadd_epi32(sum0, sum1); + sum2 = _mm_hadd_epi32(sum2, sum3); + sum0 = _mm_hadd_epi32(sum0, sum2); + return _mm_add_epi32(sum0, bias); + }; + [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) { __m128i product0 = _mm_maddubs_epi16(a, b); product0 = _mm_madd_epi16(product0, Ones128); acc = _mm_add_epi32(acc, product0); }; + [[maybe_unused]] auto m128_add_dpbusd_epi32x2 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1) { + __m128i product0 = _mm_maddubs_epi16(a0, b0); + __m128i product1 = _mm_maddubs_epi16(a1, b1); + product0 = _mm_adds_epi16(product0, product1); + product0 = _mm_madd_epi16(product0, Ones128); + acc = _mm_add_epi32(acc, product0); + }; + [[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1, __m128i a2, __m128i b2, __m128i a3, __m128i b3) { __m128i product0 = _mm_maddubs_epi16(a0, b0); @@ -231,33 +331,116 @@ namespace Stockfish::Eval::NNUE::Layers { using vec_t = __m512i; #define vec_setzero _mm512_setzero_si512 #define vec_set_32 _mm512_set1_epi32 - auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4; - auto& vec_hadd = m512_hadd; + [[maybe_unused]] auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32; + [[maybe_unused]] auto& vec_add_dpbusd_32x2 = m512_add_dpbusd_epi32x2; + [[maybe_unused]] auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4; + [[maybe_unused]] auto& vec_hadd = m512_hadd; + [[maybe_unused]] auto& vec_haddx4 = m512_haddx4; #elif defined (USE_AVX2) using vec_t = __m256i; #define vec_setzero _mm256_setzero_si256 #define vec_set_32 _mm256_set1_epi32 - auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4; - auto& vec_hadd = m256_hadd; + [[maybe_unused]] auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32; + [[maybe_unused]] auto& vec_add_dpbusd_32x2 = m256_add_dpbusd_epi32x2; + [[maybe_unused]] auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4; + [[maybe_unused]] auto& vec_hadd = m256_hadd; + [[maybe_unused]] auto& vec_haddx4 = m256_haddx4; #elif defined (USE_SSSE3) using vec_t = __m128i; #define vec_setzero _mm_setzero_si128 #define vec_set_32 _mm_set1_epi32 - auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32; - auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4; - auto& vec_hadd = m128_hadd; + [[maybe_unused]] auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32; + [[maybe_unused]] auto& vec_add_dpbusd_32x2 = m128_add_dpbusd_epi32x2; + [[maybe_unused]] auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4; + [[maybe_unused]] auto& vec_hadd = m128_hadd; + [[maybe_unused]] auto& vec_haddx4 = m128_haddx4; #endif #if defined (USE_SSSE3) const auto output = reinterpret_cast(buffer); const auto inputVector = reinterpret_cast(input); +#endif - static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); +#if defined (USE_VNNI) || defined (USE_AVX512) + + static_assert(OutputDimensions == 1 || OutputDimensions % 4 == 0); // OutputDimensions is either 1 or a multiple of SimdWidth // because then it is also an input dimension. + if constexpr (OutputDimensions <= 8 && OutputDimensions != 1) + { + constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; + + static_assert(NumChunks % 2 == 0); + + const auto input_vec = reinterpret_cast(input); + const auto bias_vec = reinterpret_cast(biases); + auto out_vec = reinterpret_cast<__m128i*>(output); + + vec_t regs[OutputDimensions]; + for (IndexType k = 0; k < OutputDimensions; ++k) + regs[k] = vec_setzero(); + + for (IndexType i = 0; i < NumChunks / 2; ++i) + { + const vec_t in0 = input_vec[i * 2 + 0]; + const vec_t in1 = input_vec[i * 2 + 1]; + for (IndexType k = 0; k < OutputDimensions; ++k) + { + const vec_t w0 = reinterpret_cast(&weights[k * PaddedInputDimensions])[i * 2 + 0]; + const vec_t w1 = reinterpret_cast(&weights[k * PaddedInputDimensions])[i * 2 + 1]; + vec_add_dpbusd_32(regs[k], in0, w0); + vec_add_dpbusd_32(regs[k], in1, w1); + } + } + + for (IndexType k = 0; k < OutputDimensions / 4; ++k) + { + out_vec[k] = vec_haddx4( + regs[k * 4 + 0], + regs[k * 4 + 1], + regs[k * 4 + 2], + regs[k * 4 + 3], + bias_vec[k] + ); + } + } + else if constexpr (InputDimensions == 8) + { + const auto input32 = reinterpret_cast(input); + __m256i* outptr = reinterpret_cast<__m256i*>(output); + std::memcpy(output, biases, OutputDimensions * sizeof(OutputType)); + + const __m256i in0 = _mm256_set1_epi32(input32[0]); + const __m256i in1 = _mm256_set1_epi32(input32[1]); + const auto col0 = reinterpret_cast(&weights[0]); + const auto col1 = reinterpret_cast(&weights[OutputDimensions * 4]); + for (IndexType j = 0; j * 8 < OutputDimensions; ++j) + m256_add_dpbusd_epi32x2(outptr[j], in0, col0[j], in1, col1[j]); + } + else + +#elif defined (USE_SSSE3) + + if constexpr (OutputDimensions % OutputSimdWidth == 0 && InputDimensions == 8) + { + const auto input32 = reinterpret_cast(input); + vec_t* outptr = reinterpret_cast(output); + std::memcpy(output, biases, OutputDimensions * sizeof(OutputType)); + + const vec_t in0 = vec_set_32(input32[0]); + const vec_t in1 = vec_set_32(input32[1]); + const auto col0 = reinterpret_cast(&weights[0]); + const auto col1 = reinterpret_cast(&weights[OutputDimensions * 4]); + for (IndexType j = 0; j * OutputSimdWidth < OutputDimensions; ++j) + vec_add_dpbusd_32x2(outptr[j], in0, col0[j], in1, col1[j]); + } + else + +#endif + +#if defined (USE_SSSE3) + if constexpr (OutputDimensions % OutputSimdWidth == 0) { static_assert(InputDimensions % 16 == 0); @@ -337,8 +520,8 @@ namespace Stockfish::Eval::NNUE::Layers { #if defined(USE_SSE2) // At least a multiple of 16, with SSE2. - static_assert(InputDimensions % SimdWidth == 0); - constexpr IndexType NumChunks = InputDimensions / SimdWidth; + static_assert(PaddedInputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; const __m128i Zeros = _mm_setzero_si128(); const auto inputVector = reinterpret_cast(input); @@ -349,8 +532,8 @@ namespace Stockfish::Eval::NNUE::Layers { const auto inputVector = reinterpret_cast(input); #elif defined(USE_NEON) - static_assert(InputDimensions % SimdWidth == 0); - constexpr IndexType NumChunks = InputDimensions / SimdWidth; + static_assert(PaddedInputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; const auto inputVector = reinterpret_cast(input); #endif @@ -423,6 +606,13 @@ namespace Stockfish::Eval::NNUE::Layers { _mm_empty(); #endif +#endif + +#if (!defined (USE_SSSE3) && defined (USE_SSE2)) || defined (USE_NEON) + static_assert(SimdWidth <= 16, "Otherwise we run outside of the padding for the output."); + if constexpr (SimdWidth > OutputDimensions && OutputDimensions != 1) + for (IndexType i = OutputDimensions; i < SimdWidth; ++i) + output[i] = 0; #endif return output; diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 879a39cd..193a197d 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -23,7 +23,7 @@ #include "nnue_common.h" -#include "features/half_ka_v2.h" +#include "features/half_ka_v2_hm.h" #include "layers/input_slice.h" #include "layers/affine_transform.h" @@ -32,10 +32,10 @@ namespace Stockfish::Eval::NNUE { // Input features used in evaluation function - using FeatureSet = Features::HalfKAv2; + using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion - constexpr IndexType TransformedFeatureDimensions = 512; + constexpr IndexType TransformedFeatureDimensions = 1024; constexpr IndexType PSQTBuckets = 8; constexpr IndexType LayerStacks = 8; @@ -43,7 +43,7 @@ namespace Stockfish::Eval::NNUE { // Define network structure using InputLayer = InputSlice; - using HiddenLayer1 = ClippedReLU>; + using HiddenLayer1 = ClippedReLU>; using HiddenLayer2 = ClippedReLU>; using OutputLayer = AffineTransform;