mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-06 10:53:50 +08:00
Generate lookup indices at compile time.
Credit to @Disservin: the constexpr lsb is just the De Bruijn bitscan with the xor from the cpw https://www.chessprogramming.org/BitScan closes https://github.com/official-stockfish/Stockfish/pull/5801 No functional change
This commit is contained in:
committed by
Joost VandeVondele
parent
62ecdfe82c
commit
18b3465a86
@@ -38,99 +38,41 @@
|
||||
namespace Stockfish::Eval::NNUE::Layers {
|
||||
|
||||
#if (USE_SSSE3 | (USE_NEON >= 8))
|
||||
static constexpr int lsb_index64[64] = {
|
||||
0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61, 54, 58, 35, 52, 50, 42,
|
||||
21, 44, 38, 32, 29, 23, 17, 11, 4, 62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43,
|
||||
31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63};
|
||||
|
||||
constexpr int constexpr_lsb(uint64_t bb) {
|
||||
assert(bb != 0);
|
||||
constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL;
|
||||
return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58];
|
||||
}
|
||||
|
||||
alignas(CacheLineSize) static constexpr struct OffsetIndices {
|
||||
|
||||
#if (USE_SSE41)
|
||||
alignas(CacheLineSize) static constexpr std::uint8_t
|
||||
std::uint8_t offset_indices[256][8];
|
||||
#else
|
||||
alignas(CacheLineSize) static constexpr std::uint16_t
|
||||
std::uint16_t offset_indices[256][8];
|
||||
#endif
|
||||
lookup_indices[256][8] = {
|
||||
{0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 1, 0, 0, 0, 0, 0, 0}, {2, 0, 0, 0, 0, 0, 0, 0}, {0, 2, 0, 0, 0, 0, 0, 0},
|
||||
{1, 2, 0, 0, 0, 0, 0, 0}, {0, 1, 2, 0, 0, 0, 0, 0}, {3, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 3, 0, 0, 0, 0, 0, 0}, {1, 3, 0, 0, 0, 0, 0, 0}, {0, 1, 3, 0, 0, 0, 0, 0},
|
||||
{2, 3, 0, 0, 0, 0, 0, 0}, {0, 2, 3, 0, 0, 0, 0, 0}, {1, 2, 3, 0, 0, 0, 0, 0},
|
||||
{0, 1, 2, 3, 0, 0, 0, 0}, {4, 0, 0, 0, 0, 0, 0, 0}, {0, 4, 0, 0, 0, 0, 0, 0},
|
||||
{1, 4, 0, 0, 0, 0, 0, 0}, {0, 1, 4, 0, 0, 0, 0, 0}, {2, 4, 0, 0, 0, 0, 0, 0},
|
||||
{0, 2, 4, 0, 0, 0, 0, 0}, {1, 2, 4, 0, 0, 0, 0, 0}, {0, 1, 2, 4, 0, 0, 0, 0},
|
||||
{3, 4, 0, 0, 0, 0, 0, 0}, {0, 3, 4, 0, 0, 0, 0, 0}, {1, 3, 4, 0, 0, 0, 0, 0},
|
||||
{0, 1, 3, 4, 0, 0, 0, 0}, {2, 3, 4, 0, 0, 0, 0, 0}, {0, 2, 3, 4, 0, 0, 0, 0},
|
||||
{1, 2, 3, 4, 0, 0, 0, 0}, {0, 1, 2, 3, 4, 0, 0, 0}, {5, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 5, 0, 0, 0, 0, 0, 0}, {1, 5, 0, 0, 0, 0, 0, 0}, {0, 1, 5, 0, 0, 0, 0, 0},
|
||||
{2, 5, 0, 0, 0, 0, 0, 0}, {0, 2, 5, 0, 0, 0, 0, 0}, {1, 2, 5, 0, 0, 0, 0, 0},
|
||||
{0, 1, 2, 5, 0, 0, 0, 0}, {3, 5, 0, 0, 0, 0, 0, 0}, {0, 3, 5, 0, 0, 0, 0, 0},
|
||||
{1, 3, 5, 0, 0, 0, 0, 0}, {0, 1, 3, 5, 0, 0, 0, 0}, {2, 3, 5, 0, 0, 0, 0, 0},
|
||||
{0, 2, 3, 5, 0, 0, 0, 0}, {1, 2, 3, 5, 0, 0, 0, 0}, {0, 1, 2, 3, 5, 0, 0, 0},
|
||||
{4, 5, 0, 0, 0, 0, 0, 0}, {0, 4, 5, 0, 0, 0, 0, 0}, {1, 4, 5, 0, 0, 0, 0, 0},
|
||||
{0, 1, 4, 5, 0, 0, 0, 0}, {2, 4, 5, 0, 0, 0, 0, 0}, {0, 2, 4, 5, 0, 0, 0, 0},
|
||||
{1, 2, 4, 5, 0, 0, 0, 0}, {0, 1, 2, 4, 5, 0, 0, 0}, {3, 4, 5, 0, 0, 0, 0, 0},
|
||||
{0, 3, 4, 5, 0, 0, 0, 0}, {1, 3, 4, 5, 0, 0, 0, 0}, {0, 1, 3, 4, 5, 0, 0, 0},
|
||||
{2, 3, 4, 5, 0, 0, 0, 0}, {0, 2, 3, 4, 5, 0, 0, 0}, {1, 2, 3, 4, 5, 0, 0, 0},
|
||||
{0, 1, 2, 3, 4, 5, 0, 0}, {6, 0, 0, 0, 0, 0, 0, 0}, {0, 6, 0, 0, 0, 0, 0, 0},
|
||||
{1, 6, 0, 0, 0, 0, 0, 0}, {0, 1, 6, 0, 0, 0, 0, 0}, {2, 6, 0, 0, 0, 0, 0, 0},
|
||||
{0, 2, 6, 0, 0, 0, 0, 0}, {1, 2, 6, 0, 0, 0, 0, 0}, {0, 1, 2, 6, 0, 0, 0, 0},
|
||||
{3, 6, 0, 0, 0, 0, 0, 0}, {0, 3, 6, 0, 0, 0, 0, 0}, {1, 3, 6, 0, 0, 0, 0, 0},
|
||||
{0, 1, 3, 6, 0, 0, 0, 0}, {2, 3, 6, 0, 0, 0, 0, 0}, {0, 2, 3, 6, 0, 0, 0, 0},
|
||||
{1, 2, 3, 6, 0, 0, 0, 0}, {0, 1, 2, 3, 6, 0, 0, 0}, {4, 6, 0, 0, 0, 0, 0, 0},
|
||||
{0, 4, 6, 0, 0, 0, 0, 0}, {1, 4, 6, 0, 0, 0, 0, 0}, {0, 1, 4, 6, 0, 0, 0, 0},
|
||||
{2, 4, 6, 0, 0, 0, 0, 0}, {0, 2, 4, 6, 0, 0, 0, 0}, {1, 2, 4, 6, 0, 0, 0, 0},
|
||||
{0, 1, 2, 4, 6, 0, 0, 0}, {3, 4, 6, 0, 0, 0, 0, 0}, {0, 3, 4, 6, 0, 0, 0, 0},
|
||||
{1, 3, 4, 6, 0, 0, 0, 0}, {0, 1, 3, 4, 6, 0, 0, 0}, {2, 3, 4, 6, 0, 0, 0, 0},
|
||||
{0, 2, 3, 4, 6, 0, 0, 0}, {1, 2, 3, 4, 6, 0, 0, 0}, {0, 1, 2, 3, 4, 6, 0, 0},
|
||||
{5, 6, 0, 0, 0, 0, 0, 0}, {0, 5, 6, 0, 0, 0, 0, 0}, {1, 5, 6, 0, 0, 0, 0, 0},
|
||||
{0, 1, 5, 6, 0, 0, 0, 0}, {2, 5, 6, 0, 0, 0, 0, 0}, {0, 2, 5, 6, 0, 0, 0, 0},
|
||||
{1, 2, 5, 6, 0, 0, 0, 0}, {0, 1, 2, 5, 6, 0, 0, 0}, {3, 5, 6, 0, 0, 0, 0, 0},
|
||||
{0, 3, 5, 6, 0, 0, 0, 0}, {1, 3, 5, 6, 0, 0, 0, 0}, {0, 1, 3, 5, 6, 0, 0, 0},
|
||||
{2, 3, 5, 6, 0, 0, 0, 0}, {0, 2, 3, 5, 6, 0, 0, 0}, {1, 2, 3, 5, 6, 0, 0, 0},
|
||||
{0, 1, 2, 3, 5, 6, 0, 0}, {4, 5, 6, 0, 0, 0, 0, 0}, {0, 4, 5, 6, 0, 0, 0, 0},
|
||||
{1, 4, 5, 6, 0, 0, 0, 0}, {0, 1, 4, 5, 6, 0, 0, 0}, {2, 4, 5, 6, 0, 0, 0, 0},
|
||||
{0, 2, 4, 5, 6, 0, 0, 0}, {1, 2, 4, 5, 6, 0, 0, 0}, {0, 1, 2, 4, 5, 6, 0, 0},
|
||||
{3, 4, 5, 6, 0, 0, 0, 0}, {0, 3, 4, 5, 6, 0, 0, 0}, {1, 3, 4, 5, 6, 0, 0, 0},
|
||||
{0, 1, 3, 4, 5, 6, 0, 0}, {2, 3, 4, 5, 6, 0, 0, 0}, {0, 2, 3, 4, 5, 6, 0, 0},
|
||||
{1, 2, 3, 4, 5, 6, 0, 0}, {0, 1, 2, 3, 4, 5, 6, 0}, {7, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 7, 0, 0, 0, 0, 0, 0}, {1, 7, 0, 0, 0, 0, 0, 0}, {0, 1, 7, 0, 0, 0, 0, 0},
|
||||
{2, 7, 0, 0, 0, 0, 0, 0}, {0, 2, 7, 0, 0, 0, 0, 0}, {1, 2, 7, 0, 0, 0, 0, 0},
|
||||
{0, 1, 2, 7, 0, 0, 0, 0}, {3, 7, 0, 0, 0, 0, 0, 0}, {0, 3, 7, 0, 0, 0, 0, 0},
|
||||
{1, 3, 7, 0, 0, 0, 0, 0}, {0, 1, 3, 7, 0, 0, 0, 0}, {2, 3, 7, 0, 0, 0, 0, 0},
|
||||
{0, 2, 3, 7, 0, 0, 0, 0}, {1, 2, 3, 7, 0, 0, 0, 0}, {0, 1, 2, 3, 7, 0, 0, 0},
|
||||
{4, 7, 0, 0, 0, 0, 0, 0}, {0, 4, 7, 0, 0, 0, 0, 0}, {1, 4, 7, 0, 0, 0, 0, 0},
|
||||
{0, 1, 4, 7, 0, 0, 0, 0}, {2, 4, 7, 0, 0, 0, 0, 0}, {0, 2, 4, 7, 0, 0, 0, 0},
|
||||
{1, 2, 4, 7, 0, 0, 0, 0}, {0, 1, 2, 4, 7, 0, 0, 0}, {3, 4, 7, 0, 0, 0, 0, 0},
|
||||
{0, 3, 4, 7, 0, 0, 0, 0}, {1, 3, 4, 7, 0, 0, 0, 0}, {0, 1, 3, 4, 7, 0, 0, 0},
|
||||
{2, 3, 4, 7, 0, 0, 0, 0}, {0, 2, 3, 4, 7, 0, 0, 0}, {1, 2, 3, 4, 7, 0, 0, 0},
|
||||
{0, 1, 2, 3, 4, 7, 0, 0}, {5, 7, 0, 0, 0, 0, 0, 0}, {0, 5, 7, 0, 0, 0, 0, 0},
|
||||
{1, 5, 7, 0, 0, 0, 0, 0}, {0, 1, 5, 7, 0, 0, 0, 0}, {2, 5, 7, 0, 0, 0, 0, 0},
|
||||
{0, 2, 5, 7, 0, 0, 0, 0}, {1, 2, 5, 7, 0, 0, 0, 0}, {0, 1, 2, 5, 7, 0, 0, 0},
|
||||
{3, 5, 7, 0, 0, 0, 0, 0}, {0, 3, 5, 7, 0, 0, 0, 0}, {1, 3, 5, 7, 0, 0, 0, 0},
|
||||
{0, 1, 3, 5, 7, 0, 0, 0}, {2, 3, 5, 7, 0, 0, 0, 0}, {0, 2, 3, 5, 7, 0, 0, 0},
|
||||
{1, 2, 3, 5, 7, 0, 0, 0}, {0, 1, 2, 3, 5, 7, 0, 0}, {4, 5, 7, 0, 0, 0, 0, 0},
|
||||
{0, 4, 5, 7, 0, 0, 0, 0}, {1, 4, 5, 7, 0, 0, 0, 0}, {0, 1, 4, 5, 7, 0, 0, 0},
|
||||
{2, 4, 5, 7, 0, 0, 0, 0}, {0, 2, 4, 5, 7, 0, 0, 0}, {1, 2, 4, 5, 7, 0, 0, 0},
|
||||
{0, 1, 2, 4, 5, 7, 0, 0}, {3, 4, 5, 7, 0, 0, 0, 0}, {0, 3, 4, 5, 7, 0, 0, 0},
|
||||
{1, 3, 4, 5, 7, 0, 0, 0}, {0, 1, 3, 4, 5, 7, 0, 0}, {2, 3, 4, 5, 7, 0, 0, 0},
|
||||
{0, 2, 3, 4, 5, 7, 0, 0}, {1, 2, 3, 4, 5, 7, 0, 0}, {0, 1, 2, 3, 4, 5, 7, 0},
|
||||
{6, 7, 0, 0, 0, 0, 0, 0}, {0, 6, 7, 0, 0, 0, 0, 0}, {1, 6, 7, 0, 0, 0, 0, 0},
|
||||
{0, 1, 6, 7, 0, 0, 0, 0}, {2, 6, 7, 0, 0, 0, 0, 0}, {0, 2, 6, 7, 0, 0, 0, 0},
|
||||
{1, 2, 6, 7, 0, 0, 0, 0}, {0, 1, 2, 6, 7, 0, 0, 0}, {3, 6, 7, 0, 0, 0, 0, 0},
|
||||
{0, 3, 6, 7, 0, 0, 0, 0}, {1, 3, 6, 7, 0, 0, 0, 0}, {0, 1, 3, 6, 7, 0, 0, 0},
|
||||
{2, 3, 6, 7, 0, 0, 0, 0}, {0, 2, 3, 6, 7, 0, 0, 0}, {1, 2, 3, 6, 7, 0, 0, 0},
|
||||
{0, 1, 2, 3, 6, 7, 0, 0}, {4, 6, 7, 0, 0, 0, 0, 0}, {0, 4, 6, 7, 0, 0, 0, 0},
|
||||
{1, 4, 6, 7, 0, 0, 0, 0}, {0, 1, 4, 6, 7, 0, 0, 0}, {2, 4, 6, 7, 0, 0, 0, 0},
|
||||
{0, 2, 4, 6, 7, 0, 0, 0}, {1, 2, 4, 6, 7, 0, 0, 0}, {0, 1, 2, 4, 6, 7, 0, 0},
|
||||
{3, 4, 6, 7, 0, 0, 0, 0}, {0, 3, 4, 6, 7, 0, 0, 0}, {1, 3, 4, 6, 7, 0, 0, 0},
|
||||
{0, 1, 3, 4, 6, 7, 0, 0}, {2, 3, 4, 6, 7, 0, 0, 0}, {0, 2, 3, 4, 6, 7, 0, 0},
|
||||
{1, 2, 3, 4, 6, 7, 0, 0}, {0, 1, 2, 3, 4, 6, 7, 0}, {5, 6, 7, 0, 0, 0, 0, 0},
|
||||
{0, 5, 6, 7, 0, 0, 0, 0}, {1, 5, 6, 7, 0, 0, 0, 0}, {0, 1, 5, 6, 7, 0, 0, 0},
|
||||
{2, 5, 6, 7, 0, 0, 0, 0}, {0, 2, 5, 6, 7, 0, 0, 0}, {1, 2, 5, 6, 7, 0, 0, 0},
|
||||
{0, 1, 2, 5, 6, 7, 0, 0}, {3, 5, 6, 7, 0, 0, 0, 0}, {0, 3, 5, 6, 7, 0, 0, 0},
|
||||
{1, 3, 5, 6, 7, 0, 0, 0}, {0, 1, 3, 5, 6, 7, 0, 0}, {2, 3, 5, 6, 7, 0, 0, 0},
|
||||
{0, 2, 3, 5, 6, 7, 0, 0}, {1, 2, 3, 5, 6, 7, 0, 0}, {0, 1, 2, 3, 5, 6, 7, 0},
|
||||
{4, 5, 6, 7, 0, 0, 0, 0}, {0, 4, 5, 6, 7, 0, 0, 0}, {1, 4, 5, 6, 7, 0, 0, 0},
|
||||
{0, 1, 4, 5, 6, 7, 0, 0}, {2, 4, 5, 6, 7, 0, 0, 0}, {0, 2, 4, 5, 6, 7, 0, 0},
|
||||
{1, 2, 4, 5, 6, 7, 0, 0}, {0, 1, 2, 4, 5, 6, 7, 0}, {3, 4, 5, 6, 7, 0, 0, 0},
|
||||
{0, 3, 4, 5, 6, 7, 0, 0}, {1, 3, 4, 5, 6, 7, 0, 0}, {0, 1, 3, 4, 5, 6, 7, 0},
|
||||
{2, 3, 4, 5, 6, 7, 0, 0}, {0, 2, 3, 4, 5, 6, 7, 0}, {1, 2, 3, 4, 5, 6, 7, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7}};
|
||||
|
||||
constexpr OffsetIndices() :
|
||||
offset_indices() {
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
std::uint64_t j = i, k = 0;
|
||||
while (j)
|
||||
{
|
||||
offset_indices[i][k++] = constexpr_lsb(j);
|
||||
j &= j - 1;
|
||||
}
|
||||
while (k < 8)
|
||||
offset_indices[i][k++] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
} Lookup;
|
||||
|
||||
// Find indices of nonzero numbers in an int32_t array
|
||||
template<const IndexType InputDimensions>
|
||||
@@ -196,9 +138,9 @@ void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_ou
|
||||
}
|
||||
for (IndexType j = 0; j < OutputsPerChunk; ++j)
|
||||
{
|
||||
const auto lookup = (nnz >> (j * 8)) & 0xFF;
|
||||
const auto offsets =
|
||||
vec128_load(reinterpret_cast<const vec128_t*>(&lookup_indices[lookup]));
|
||||
const unsigned lookup = (nnz >> (j * 8)) & 0xFF;
|
||||
const vec128_t offsets =
|
||||
vec128_load(reinterpret_cast<const vec128_t*>(&Lookup.offset_indices[lookup]));
|
||||
vec128_storeu(reinterpret_cast<vec128_t*>(out + count), vec128_add(base, offsets));
|
||||
count += popcount(lookup);
|
||||
base = vec128_add(base, increment);
|
||||
|
||||
Reference in New Issue
Block a user