Files
Stockfish/src/thread.h
Joona Kiiski d65f75c153 Improve smp performance for high number of threads
Balance threads between split points.

There are huge differences between different machines and autopurging makes it very difficult to measure the improvement in fishtest, but the following was recorded for 16 threads at 15+0.05:

    For Bravone (1000 games): 0 ELO
    For Glinscott (1000 games): +20 ELO
    For bKingUs (1000 games): +50 ELO
    For fastGM (1500 games): +50 ELO

The change was regression for no one, and a big improvement for some, so it should be fine to commit it.
Also for 8 threads at 15+0.05 we measured a statistically significant improvement:
ELO: 6.19 +-3.9 (95%) LOS: 99.9%
Total: 10325 W: 1824 L: 1640 D: 6861

Finally it was verified that there was no (significant) regression for

4 threads:
ELO: 0.09 +-2.8 (95%) LOS: 52.4%
Total: 19908 W: 3422 L: 3417 D: 13069

2 threads:
ELO: 0.38 +-3.0 (95%) LOS: 60.0%
Total: 19044 W: 3480 L: 3459 D: 12105

1 thread:
ELO: -1.27 +-2.1 (95%) LOS: 12.3%
Total: 40000 W: 7829 L: 7975 D: 24196

Resolves #258
2015-02-16 20:36:13 +00:00

190 lines
5.2 KiB
C++

/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef THREAD_H_INCLUDED
#define THREAD_H_INCLUDED
#include <bitset>
#include <vector>
#include "material.h"
#include "movepick.h"
#include "pawns.h"
#include "position.h"
#include "search.h"
struct Thread;
const int MAX_THREADS = 128;
const int MAX_SPLITPOINTS_PER_THREAD = 8;
const int MAX_SLAVES_PER_SPLITPOINT = 4;
/// Mutex and ConditionVariable struct are wrappers of the low level locking
/// machinery and are modeled after the corresponding C++11 classes.
struct Mutex {
Mutex() { lock_init(l); }
~Mutex() { lock_destroy(l); }
void lock() { lock_grab(l); }
void unlock() { lock_release(l); }
private:
friend struct ConditionVariable;
Lock l;
};
struct ConditionVariable {
ConditionVariable() { cond_init(c); }
~ConditionVariable() { cond_destroy(c); }
void wait(Mutex& m) { cond_wait(c, m.l); }
void wait_for(Mutex& m, int ms) { timed_wait(c, m.l, ms); }
void notify_one() { cond_signal(c); }
private:
WaitCondition c;
};
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
struct SplitPoint {
// Const data after split point has been setup
const Position* pos;
Search::Stack* ss;
Thread* masterThread;
int spLevel;
Depth depth;
Value beta;
int nodeType;
bool cutNode;
// Const pointers to shared data
MovePicker* movePicker;
SplitPoint* parentSplitPoint;
// Shared variable data
Mutex mutex;
std::bitset<MAX_THREADS> slavesMask;
int slavesCount;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;
volatile Value bestValue;
volatile Move bestMove;
volatile int moveCount;
volatile bool cutoff;
};
/// ThreadBase struct is the base of the hierarchy from where we derive all the
/// specialized thread classes.
struct ThreadBase {
ThreadBase() : handle(NativeHandle()), exit(false) {}
virtual ~ThreadBase() {}
virtual void idle_loop() = 0;
void notify_one();
void wait_for(volatile const bool& b);
Mutex mutex;
ConditionVariable sleepCondition;
NativeHandle handle;
volatile bool exit;
};
/// Thread struct keeps together all the thread related stuff like locks, state
/// and especially split points. We also use per-thread pawn and material hash
/// tables so that once we get a pointer to an entry its life time is unlimited
/// and we don't have to care about someone changing the entry under our feet.
struct Thread : public ThreadBase {
Thread();
virtual void idle_loop();
bool cutoff_occurred() const;
bool available_to(const Thread* master) const;
void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
Pawns::Table pawnsTable;
Material::Table materialTable;
Endgames endgames;
Position* activePosition;
size_t idx;
int maxPly;
SplitPoint* volatile activeSplitPoint;
volatile int splitPointsSize;
volatile bool searching;
};
/// MainThread and TimerThread are derived classes used to characterize the two
/// special threads: the main one and the recurring timer.
struct MainThread : public Thread {
MainThread() : thinking(true) {} // Avoid a race with start_thinking()
virtual void idle_loop();
volatile bool thinking;
};
struct TimerThread : public ThreadBase {
static const int Resolution = 5; // Millisec between two check_time() calls
TimerThread() : run(false) {}
virtual void idle_loop();
bool run;
};
/// ThreadPool struct handles all the threads related stuff like init, starting,
/// parking and, most importantly, launching a slave thread at a split point.
/// All the access to shared thread data is done through this class.
struct ThreadPool : public std::vector<Thread*> {
void init(); // No c'tor and d'tor, threads rely on globals that should be
void exit(); // initialized and are valid during the whole thread lifetime.
MainThread* main() { return static_cast<MainThread*>(at(0)); }
void read_uci_options();
Thread* available_slave(const Thread* master) const;
void wait_for_think_finished();
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
Mutex mutex;
ConditionVariable sleepCondition;
TimerThread* timer;
};
extern ThreadPool Threads;
#endif // #ifndef THREAD_H_INCLUDED