mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-25 11:36:51 +08:00
Add large page support for NNUE weights and simplify TT mem management
Use TT memory functions to allocate memory for the NNUE weights. This should provide a small speed-up on systems where large pages are not automatically used, including Windows and some Linux distributions. Further, since we now have a wrapper for std::aligned_alloc(), we can simplify the TT memory management a bit: - We no longer need to store separate pointers to the hash table and its underlying memory allocation. - We also get to merge the Linux-specific and default implementations of aligned_ttmem_alloc(). Finally, we'll enable the VirtualAlloc code path with large page support also for Win32. STC: https://tests.stockfishchess.org/tests/view/5f66595823a84a47b9036fba LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 14896 W: 1854 L: 1686 D: 11356 Ptnml(0-2): 65, 1224, 4742, 1312, 105 closes https://github.com/official-stockfish/Stockfish/pull/3081 No functional change.
This commit is contained in:
committed by
Joost VandeVondele
parent
16b4578cc1
commit
485d517c68
57
src/misc.cpp
57
src/misc.cpp
@@ -357,27 +357,11 @@ void std_aligned_free(void* ptr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
/// The returned pointer is the aligned one, while the mem argument is the one that needs
|
||||
/// to be passed to free. With c++17 some of this functionality could be simplified.
|
||||
/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#if defined(_WIN32)
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
|
||||
if (posix_memalign(&mem, alignment, size))
|
||||
mem = nullptr;
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, allocSize, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#elif defined(_WIN64)
|
||||
|
||||
static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
|
||||
static void* aligned_large_pages_alloc_win(size_t allocSize) {
|
||||
|
||||
HANDLE hProcessToken { };
|
||||
LUID luid { };
|
||||
@@ -422,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
|
||||
return mem;
|
||||
}
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
static bool firstCall = true;
|
||||
void* mem;
|
||||
|
||||
// Try to allocate large pages
|
||||
mem = aligned_ttmem_alloc_large_pages(allocSize);
|
||||
mem = aligned_large_pages_alloc_win(allocSize);
|
||||
|
||||
// Suppress info strings on the first call. The first call occurs before 'uci'
|
||||
// is received and in that case this output confuses some GUIs.
|
||||
@@ -449,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
constexpr size_t alignment = 64; // assumed cache line size
|
||||
size_t size = allocSize + alignment - 1; // allocate some extra space
|
||||
mem = malloc(size);
|
||||
void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
|
||||
return ret;
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // assumed small page size
|
||||
#endif
|
||||
|
||||
// round up to multiples of alignment
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
||||
void *mem = std_aligned_alloc(alignment, size);
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/// aligned_ttmem_free() will free the previously allocated ttmem
|
||||
/// aligned_large_pages_free() will free the previously allocated ttmem
|
||||
|
||||
#if defined(_WIN64)
|
||||
#if defined(_WIN32)
|
||||
|
||||
void aligned_ttmem_free(void* mem) {
|
||||
void aligned_large_pages_free(void* mem) {
|
||||
|
||||
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
||||
{
|
||||
@@ -478,8 +471,8 @@ void aligned_ttmem_free(void* mem) {
|
||||
|
||||
#else
|
||||
|
||||
void aligned_ttmem_free(void *mem) {
|
||||
free(mem);
|
||||
void aligned_large_pages_free(void *mem) {
|
||||
std_aligned_free(mem);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user