Start all threads searching on root position and
use only the shared TT table as synching scheme.

It seems this scheme scales better than YBWC for
high number of threads.

Verified for nor regression at STC 3 threads
LLR: -2.95 (-2.94,2.94) [-3.00,1.00]
Total: 40232 W: 6908 L: 7130 D: 26194

Verified for nor regression at LTC 3 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 28186 W: 3908 L: 3798 D: 20480

Verified for nor regression at STC 7 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 3607 W: 674 L: 526 D: 2407

Verified for nor regression at LTC 7 threads
LLR: 2.95 (-2.94,2.94) [-3.00,1.00]
Total: 4235 W: 671 L: 528 D: 3036

Tested with fixed games at LTC with 20 threads
ELO: 44.75 +-7.6 (95%) LOS: 100.0%
Total: 2069 W: 407 L: 142 D: 1520

Tested with fixed games at XLTC (120secs) with 20 threads
ELO: 28.01 +-6.7 (95%) LOS: 100.0%
Total: 2275 W: 349 L: 166 D: 1760

Original patch of mbootsector, with additional work
from Ivan Ivec (log formula), Joerg Oster (id loop
simplification) and Marco Costalba (assorted formatting
and rework).

Bench: 8116244
This commit is contained in:
mbootsector
2015-10-06 08:15:17 +02:00
committed by Marco Costalba
parent 7ea5659c5f
commit ecc5ff6693
10 changed files with 365 additions and 750 deletions

View File

@@ -37,53 +37,6 @@
struct Thread;
const size_t MAX_THREADS = 128;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
class Spinlock {
std::atomic_int lock;
public:
Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
void acquire() {
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
while (lock.load(std::memory_order_relaxed) <= 0)
std::this_thread::yield(); // Be nice to hyperthreading
}
void release() { lock.store(1, std::memory_order_release); }
};
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
struct SplitPoint {
// Const data after split point has been setup
const Position* pos;
Search::Stack* ss;
Thread* master;
Depth depth;
Value beta;
int nodeType;
bool cutNode;
// Const pointers to shared data
MovePicker* movePicker;
SplitPoint* parentSplitPoint;
// Shared variable data
Spinlock spinlock;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;
volatile Value bestValue;
volatile Move bestMove;
volatile int moveCount;
volatile bool cutoff;
};
/// ThreadBase struct is the base of the hierarchy from where we derive all the
@@ -94,10 +47,10 @@ struct ThreadBase : public std::thread {
virtual ~ThreadBase() = default;
virtual void idle_loop() = 0;
void notify_one();
void wait_for(volatile const bool& b);
void wait(volatile const bool& b);
void wait_while(volatile const bool& b);
Mutex mutex;
Spinlock spinlock;
ConditionVariable sleepCondition;
volatile bool exit = false;
};
@@ -112,22 +65,21 @@ struct Thread : public ThreadBase {
Thread();
virtual void idle_loop();
bool cutoff_occurred() const;
bool can_join(const SplitPoint* sp) const;
void search(bool isMainThread = false);
void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
Pawns::Table pawnsTable;
Material::Table materialTable;
Endgames endgames;
Position* activePosition;
size_t idx;
size_t idx, PVIdx;
int maxPly;
SplitPoint* volatile activeSplitPoint;
volatile size_t splitPointsSize;
volatile bool searching;
Position rootPos;
Search::RootMoveVector rootMoves;
Search::Stack stack[MAX_PLY+4];
HistoryStats History;
MovesStats Countermoves;
Depth depth;
};
@@ -137,6 +89,7 @@ struct Thread : public ThreadBase {
struct MainThread : public Thread {
virtual void idle_loop();
void join();
void think();
volatile bool thinking = true; // Avoid a race with start_thinking()
};
@@ -161,10 +114,8 @@ struct ThreadPool : public std::vector<Thread*> {
MainThread* main() { return static_cast<MainThread*>(at(0)); }
void read_uci_options();
Thread* available_slave(const SplitPoint* sp) const;
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
int64_t nodes_searched();
TimerThread* timer;
};