mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-23 10:36:26 +08:00
Use 128 bit multiply for TT index
Remove super cluster stuff from TT and just use a 128 bit multiply. STC https://tests.stockfishchess.org/tests/view/5ee719b3aae8aec816ab7548 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 12736 W: 2502 L: 2333 D: 7901 Ptnml(0-2): 191, 1452, 2944, 1559, 222 LTC https://tests.stockfishchess.org/tests/view/5ee732d1aae8aec816ab7556 LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 27584 W: 3431 L: 3350 D: 20803 Ptnml(0-2): 173, 2500, 8400, 2511, 208 Scheme back to being derived from https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ Also the default optimized version of the index calculation now uses fewer instructions. https://godbolt.org/z/Tktxbv Might benefit from mulx (requires -mbmi2) closes https://github.com/official-stockfish/Stockfish/pull/2744 bench: 4320954
This commit is contained in:
committed by
Joost VandeVondele
parent
995ee4b311
commit
1ea488d34c
16
src/tt.cpp
16
src/tt.cpp
@@ -36,17 +36,17 @@ TranspositionTable TT; // Our global transposition table
|
||||
void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
|
||||
|
||||
// Preserve any existing move for the same position
|
||||
if (m || (k >> 48) != key16)
|
||||
if (m || (uint16_t)k != key16)
|
||||
move16 = (uint16_t)m;
|
||||
|
||||
// Overwrite less valuable entries
|
||||
if ( (k >> 48) != key16
|
||||
if ((uint16_t)k != key16
|
||||
|| d - DEPTH_OFFSET > depth8 - 4
|
||||
|| b == BOUND_EXACT)
|
||||
{
|
||||
assert(d >= DEPTH_OFFSET);
|
||||
|
||||
key16 = (uint16_t)(k >> 48);
|
||||
key16 = (uint16_t)k;
|
||||
value16 = (int16_t)v;
|
||||
eval16 = (int16_t)ev;
|
||||
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
|
||||
@@ -65,10 +65,8 @@ void TranspositionTable::resize(size_t mbSize) {
|
||||
|
||||
aligned_ttmem_free(mem);
|
||||
|
||||
superClusterCount = mbSize * 1024 * 1024 / (sizeof(Cluster) * ClustersPerSuperCluster);
|
||||
|
||||
table = static_cast<Cluster*>(
|
||||
aligned_ttmem_alloc(superClusterCount * ClustersPerSuperCluster * sizeof(Cluster), mem));
|
||||
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
|
||||
table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
|
||||
if (!mem)
|
||||
{
|
||||
std::cerr << "Failed to allocate " << mbSize
|
||||
@@ -91,8 +89,6 @@ void TranspositionTable::clear() {
|
||||
{
|
||||
threads.emplace_back([this, idx]() {
|
||||
|
||||
const size_t clusterCount = superClusterCount * ClustersPerSuperCluster;
|
||||
|
||||
// Thread binding gives faster search on systems with a first-touch policy
|
||||
if (Options["Threads"] > 8)
|
||||
WinProcGroup::bindThisThread(idx);
|
||||
@@ -121,7 +117,7 @@ void TranspositionTable::clear() {
|
||||
TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
|
||||
|
||||
TTEntry* const tte = first_entry(key);
|
||||
const uint16_t key16 = key >> 48; // Use the high 16 bits as key inside the cluster
|
||||
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
|
||||
|
||||
for (int i = 0; i < ClusterSize; ++i)
|
||||
if (!tte[i].key16 || tte[i].key16 == key16)
|
||||
|
||||
Reference in New Issue
Block a user