Cache line aligned TT

Let TT clusters (16*4=64 bytes) to hold on a singe cache line.
This avoids the need for the double prefetch.

Original patches by Lucas and Jean-Francois that has also tested
on his AMD FX:

BIG HASHTABLE

./stockfish bench 1024 1 18 > /dev/null

Before:
1437642 nps
1426519 nps
1438493 nps

After:
1474482 nps
1476375 nps
1475877 nps

SMALL HASHTABLE

./stockfish bench 128 1 18 > /dev/null

Before:
1435207 nps
1435586 nps
1433741 nps

After:
1479143 nps
1471042 nps
1472286 nps

No functional change.
This commit is contained in:
Marco Costalba
2013-04-26 18:45:54 +02:00
parent e508494a99
commit 083fe58124
4 changed files with 11 additions and 11 deletions

View File

@@ -237,10 +237,8 @@ void prefetch(char* addr) {
# if defined(__INTEL_COMPILER) || defined(_MSC_VER) # if defined(__INTEL_COMPILER) || defined(_MSC_VER)
_mm_prefetch(addr, _MM_HINT_T0); _mm_prefetch(addr, _MM_HINT_T0);
_mm_prefetch(addr+64, _MM_HINT_T0); // 64 bytes ahead
# else # else
__builtin_prefetch(addr); __builtin_prefetch(addr);
__builtin_prefetch(addr+64);
# endif # endif
} }

View File

@@ -39,18 +39,18 @@ void TranspositionTable::set_size(size_t mbSize) {
if (hashMask == size - ClusterSize) if (hashMask == size - ClusterSize)
return; return;
hashMask = size - ClusterSize; free(mem);
delete [] table; mem = malloc(size * sizeof(TTEntry) + (CACHE_LINE_SIZE - 1));
table = new (std::nothrow) TTEntry[size]; if (!mem)
if (!table)
{ {
std::cerr << "Failed to allocate " << mbSize std::cerr << "Failed to allocate " << mbSize
<< "MB for transposition table." << std::endl; << "MB for transposition table." << std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
clear(); // Operator new is not guaranteed to initialize memory to zero table = (TTEntry*)((size_t(mem) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1));
hashMask = size - ClusterSize;
clear(); // Newly allocated block of memory is not initialized
} }

View File

@@ -85,7 +85,7 @@ class TranspositionTable {
static const unsigned ClusterSize = 4; // A cluster is 64 Bytes static const unsigned ClusterSize = 4; // A cluster is 64 Bytes
public: public:
~TranspositionTable() { delete [] table; } ~TranspositionTable() { free(mem); }
void new_search() { generation++; } void new_search() { generation++; }
TTEntry* probe(const Key key) const; TTEntry* probe(const Key key) const;
@@ -98,6 +98,7 @@ public:
private: private:
uint32_t hashMask; uint32_t hashMask;
TTEntry* table; TTEntry* table;
void* mem;
uint8_t generation; // Size must be not bigger then TTEntry::generation8 uint8_t generation; // Size must be not bigger then TTEntry::generation8
}; };

View File

@@ -56,10 +56,11 @@
# include <xmmintrin.h> // Intel and Microsoft header for _mm_prefetch() # include <xmmintrin.h> // Intel and Microsoft header for _mm_prefetch()
# endif # endif
#define CACHE_LINE_SIZE 64
#if defined(_MSC_VER) || defined(__INTEL_COMPILER) #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
# define CACHE_LINE_ALIGNMENT __declspec(align(64)) # define CACHE_LINE_ALIGNMENT __declspec(align(CACHE_LINE_SIZE))
#else #else
# define CACHE_LINE_ALIGNMENT __attribute__ ((aligned(64))) # define CACHE_LINE_ALIGNMENT __attribute__ ((aligned(CACHE_LINE_SIZE)))
#endif #endif
#if defined(_MSC_VER) #if defined(_MSC_VER)