mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-21 01:27:16 +08:00
Use 128 bit multiply for TT index
Remove super cluster stuff from TT and just use a 128 bit multiply. STC https://tests.stockfishchess.org/tests/view/5ee719b3aae8aec816ab7548 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 12736 W: 2502 L: 2333 D: 7901 Ptnml(0-2): 191, 1452, 2944, 1559, 222 LTC https://tests.stockfishchess.org/tests/view/5ee732d1aae8aec816ab7556 LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 27584 W: 3431 L: 3350 D: 20803 Ptnml(0-2): 173, 2500, 8400, 2511, 208 Scheme back to being derived from https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ Also the default optimized version of the index calculation now uses fewer instructions. https://godbolt.org/z/Tktxbv Might benefit from mulx (requires -mbmi2) closes https://github.com/official-stockfish/Stockfish/pull/2744 bench: 4320954
This commit is contained in:
committed by
Joost VandeVondele
parent
995ee4b311
commit
1ea488d34c
13
src/misc.h
13
src/misc.h
@@ -110,6 +110,19 @@ public:
|
||||
{ return T(rand64() & rand64() & rand64()); }
|
||||
};
|
||||
|
||||
inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
||||
#if defined(__GNUC__) && defined(IS_64BIT)
|
||||
__extension__ typedef unsigned __int128 uint128;
|
||||
return ((uint128)a * (uint128)b) >> 64;
|
||||
#else
|
||||
uint64_t aL = (uint32_t)a, aH = a >> 32;
|
||||
uint64_t bL = (uint32_t)b, bH = b >> 32;
|
||||
uint64_t c1 = (aL * bL) >> 32;
|
||||
uint64_t c2 = aH * bL + c1;
|
||||
uint64_t c3 = aL * bH + (uint32_t)c2;
|
||||
return aH * bH + (c2 >> 32) + (c3 >> 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Under Windows it is not possible for a process to run on more than one
|
||||
/// logical processor group. This usually means to be limited to use max 64
|
||||
|
||||
Reference in New Issue
Block a user