Remove custom mutex implementation

As part of the investigation of the hang caused by an incorrect implementation of condition_variable in libwinpthread, it was realized that our custom Mutex implementation is no longer needed. Prior to lazySMP this custom implementation resulted in a 30% speedup, but now no speed difference can be measured as no mutex is used on the hot path in lazySMP. https://github.com/official-stockfish/Stockfish/issues/2291 https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533733393 https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533737515 The interest of this patch is that it removes platform-specific code, which is always less tested. No functional change.
2025-12-20 17:16:33 +08:00 · 2019-09-16 07:51:25 +02:00
parent 8726beba59
commit d703d2b5e7
5 changed files with 16 additions and 61 deletions
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -168,7 +168,7 @@ void dbg_print() {
 std::ostream& operator<<(std::ostream& os, SyncCout sc) {
-  static Mutex m;
+  static std::mutex m;
  if (sc == IO_LOCK)
      m.lock();
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
@@ -27,12 +27,12 @@
 #include <list>
 #include <sstream>
 #include <type_traits>
 #include <mutex>
 #include "../bitboard.h"
 #include "../movegen.h"
 #include "../position.h"
 #include "../search.h"
 #include "../thread_win32_osx.h"
 #include "../types.h"
 #include "../uci.h"
@@ -45,7 +45,9 @@
 #include <sys/stat.h>
 #else
 #define WIN32_LEAN_AND_MEAN
-#define NOMINMAX
+#ifndef NOMINMAX
 #  define NOMINMAX // Disable macros min() and max()
 #endif
 #include <windows.h>
 #endif
@@ -1124,14 +1126,14 @@ void set(T& e, uint8_t* data) {
 template<TBType Type>
 void* mapped(TBTable<Type>& e, const Position& pos) {
-    static Mutex mutex;
+    static std::mutex mutex;
    // Use 'acquire' to avoid a thread reading 'ready' == true while
    // another is still working. (compiler reordering may cause this).
    if (e.ready.load(std::memory_order_acquire))
        return e.baseAddress; // Could be nullptr if file does not exist
-    std::unique_lock<Mutex> lk(mutex);
+    std::unique_lock<std::mutex> lk(mutex);
    if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock
        return e.baseAddress;
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -81,7 +81,7 @@ void Thread::clear() {
 void Thread::start_searching() {
-  std::lock_guard<Mutex> lk(mutex);
+  std::lock_guard<std::mutex> lk(mutex);
  searching = true;
  cv.notify_one(); // Wake up the thread in idle_loop()
 }
@@ -92,7 +92,7 @@ void Thread::start_searching() {
 void Thread::wait_for_search_finished() {
-  std::unique_lock<Mutex> lk(mutex);
+  std::unique_lock<std::mutex> lk(mutex);
  cv.wait(lk, [&]{ return !searching; });
 }
@@ -112,7 +112,7 @@ void Thread::idle_loop() {
  while (true)
  {
-      std::unique_lock<Mutex> lk(mutex);
+      std::unique_lock<std::mutex> lk(mutex);
      searching = false;
      cv.notify_one(); // Wake up anyone waiting for search finished
      cv.wait(lk, [&]{ return searching; });
--- a/src/thread.h
+++ b/src/thread.h
@@ -42,8 +42,8 @@
 class Thread {
-  Mutex mutex;
+  std::mutex mutex;
-  ConditionVariable cv;
+  std::condition_variable cv;
  size_t idx;
  bool exit = false, searching = true; // Set before starting std::thread
  NativeThread stdThread;
--- a/src/thread_win32_osx.h
+++ b/src/thread_win32_osx.h
@@ -21,60 +21,13 @@
 #ifndef THREAD_WIN32_OSX_H_INCLUDED
 #define THREAD_WIN32_OSX_H_INCLUDED
 /// STL thread library used by mingw and gcc when cross compiling for Windows
 /// relies on libwinpthread. Currently libwinpthread implements mutexes directly
 /// on top of Windows semaphores. Semaphores, being kernel objects, require kernel
 /// mode transition in order to lock or unlock, which is very slow compared to
 /// interlocked operations (about 30% slower on bench test). To work around this
 /// issue, we define our wrappers to the low level Win32 calls. We use critical
 /// sections to support Windows XP and older versions. Unfortunately, cond_wait()
 /// is racy between unlock() and WaitForSingleObject() but they have the same
 /// speed performance as the SRW locks.
 #include <condition_variable>
 #include <mutex>
 #include <thread>
 #if defined(_WIN32) && !defined(_MSC_VER)
 #ifndef NOMINMAX
 #  define NOMINMAX // Disable macros min() and max()
 #endif
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #undef WIN32_LEAN_AND_MEAN
 #undef NOMINMAX
 /// Mutex and ConditionVariable struct are wrappers of the low level locking
 /// machinery and are modeled after the corresponding C++11 classes.
 struct Mutex {
  Mutex() { InitializeCriticalSection(&cs); }
 ~Mutex() { DeleteCriticalSection(&cs); }
  void lock() { EnterCriticalSection(&cs); }
  void unlock() { LeaveCriticalSection(&cs); }
 private:
  CRITICAL_SECTION cs;
 };
 typedef std::condition_variable_any ConditionVariable;
 #else // Default case: use STL classes
 typedef std::mutex Mutex;
 typedef std::condition_variable ConditionVariable;
 #endif
 /// On OSX threads other than the main thread are created with a reduced stack
-/// size of 512KB by default, this is dangerously low for deep searches, so
+/// size of 512KB by default, this is too low for deep searches, which require
-/// adjust it to TH_STACK_SIZE. The implementation calls pthread_create() with
+/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
-/// proper stack size parameter.
+/// The implementation calls pthread_create() with the stack size parameter
-
+/// equal to the linux 8MB default, on platforms that support it.
 /// On toolchains where pthread is always available, ensure minimum stack size
 /// instead of relying on linker defaults which may be platform-specific.
 #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)