From 20a5c07472bf04f5d684141a09c120e9beaa54d4 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sat, 14 Feb 2015 15:55:11 +0100 Subject: [PATCH 01/11] Further simplify KingDanger init And remove a tale whitespace while there. No functional change. --- src/evaluate.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 9d19ad75..18f5adc4 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -555,7 +555,7 @@ namespace { b &= ~pos.pieces() & ~ei.attackedBy[Them][PAWN] & (ei.attackedBy[Us][ALL_PIECES] | ~ei.attackedBy[Them][ALL_PIECES]); - + if (b) score += popcount(b) * PawnSafePush; @@ -917,14 +917,14 @@ namespace Eval { void init() { - const int MaxSlope = 87; - const int Peak = 12800; + const int MaxSlope = 8700; + const int Peak = 1280000; int t = 0; for (int i = 0; i < 400; ++i) { - t = std::min(Peak, std::min(i * i * 27 / 100, t + MaxSlope)); - KingDanger[i] = apply_weight(make_score(t / 10, 0), Weights[KingSafety]); + t = std::min(Peak, std::min(i * i * 27, t + MaxSlope)); + KingDanger[i] = apply_weight(make_score(t / 1000, 0), Weights[KingSafety]); } } From f8f5dcbb682830a66a37f68f3c192bbbfc84a33a Mon Sep 17 00:00:00 2001 From: lucasart Date: Mon, 16 Feb 2015 09:34:26 +0800 Subject: [PATCH 02/11] Compute checkers from scratch This micro-optimization only complicates the code and provides no benefit. Removing it is even a speedup on my machine (i7-3770k, linux, gcc 4.9.1): stat test master diff mean 2,403,118 2,390,904 12,214 stdev 12,043 10,620 3,677 speedup 0.51% P(speedup>0) 100.0% No functional change. --- src/position.cpp | 30 ++++-------------------------- src/position.h | 2 +- src/search.cpp | 8 ++++---- src/syzygy/tbprobe.cpp | 16 ++++++++-------- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/src/position.cpp b/src/position.cpp index ef31d58d..eda26014 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -690,10 +690,10 @@ bool Position::gives_check(Move m, const CheckInfo& ci) const { void Position::do_move(Move m, StateInfo& newSt) { CheckInfo ci(*this); - do_move(m, newSt, ci, gives_check(m, ci)); + do_move(m, newSt, gives_check(m, ci)); } -void Position::do_move(Move m, StateInfo& newSt, const CheckInfo& ci, bool moveIsCheck) { +void Position::do_move(Move m, StateInfo& newSt, bool moveIsCheck) { assert(is_ok(m)); assert(&newSt != st); @@ -856,30 +856,8 @@ void Position::do_move(Move m, StateInfo& newSt, const CheckInfo& ci, bool moveI // Update the key with the final value st->key = k; - // Update checkers bitboard: piece must be already moved due to attacks_from() - st->checkersBB = 0; - - if (moveIsCheck) - { - if (type_of(m) != NORMAL) - st->checkersBB = attackers_to(king_square(them)) & pieces(us); - else - { - // Direct checks - if (ci.checkSq[pt] & to) - st->checkersBB |= to; - - // Discovered checks - if (ci.dcCandidates && (ci.dcCandidates & from)) - { - if (pt != ROOK) - st->checkersBB |= attacks_from(king_square(them)) & pieces(us, QUEEN, ROOK); - - if (pt != BISHOP) - st->checkersBB |= attacks_from(king_square(them)) & pieces(us, QUEEN, BISHOP); - } - } - } + // Calculate checkers bitboard (if move is check) + st->checkersBB = moveIsCheck ? attackers_to(king_square(them)) & pieces(us) : 0; sideToMove = ~sideToMove; diff --git a/src/position.h b/src/position.h index 447872ab..14ce8099 100644 --- a/src/position.h +++ b/src/position.h @@ -145,7 +145,7 @@ public: // Doing and undoing moves void do_move(Move m, StateInfo& st); - void do_move(Move m, StateInfo& st, const CheckInfo& ci, bool moveIsCheck); + void do_move(Move m, StateInfo& st, bool moveIsCheck); void undo_move(Move m); void do_null_move(StateInfo& st); void undo_null_move(); diff --git a/src/search.cpp b/src/search.cpp index eea52984..ffe36126 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -173,7 +173,7 @@ uint64_t Search::perft(Position& pos, Depth depth) { cnt = 1, nodes++; else { - pos.do_move(*it, st, ci, pos.gives_check(*it, ci)); + pos.do_move(*it, st, pos.gives_check(*it, ci)); cnt = leaf ? MoveList(pos).size() : perft(pos, depth - ONE_PLY); nodes += cnt; pos.undo_move(*it); @@ -702,7 +702,7 @@ namespace { if (pos.legal(move, ci.pinned)) { ss->currentMove = move; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); value = -search(pos, ss+1, -rbeta, -rbeta+1, rdepth, !cutNode); pos.undo_move(move); if (value >= rbeta) @@ -894,7 +894,7 @@ moves_loop: // When in check and at SpNode search starts from here quietsSearched[quietCount++] = move; // Step 14. Make the move - pos.do_move(move, st, ci, givesCheck); + pos.do_move(move, st, givesCheck); // Step 15. Reduced depth search (LMR). If the move fails high it will be // re-searched at full depth. @@ -1255,7 +1255,7 @@ moves_loop: // When in check and at SpNode search starts from here ss->currentMove = move; // Make and search the move - pos.do_move(move, st, ci, givesCheck); + pos.do_move(move, st, givesCheck); value = givesCheck ? -qsearch(pos, ss+1, -beta, -alpha, depth - ONE_PLY) : -qsearch(pos, ss+1, -beta, -alpha, depth - ONE_PLY); pos.undo_move(move); diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 0abd2b2e..17326970 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -367,7 +367,7 @@ static int probe_ab(Position& pos, int alpha, int beta, int *success) if (!pos.capture(capture) || type_of(capture) == ENPASSANT || !pos.legal(capture, ci.pinned)) continue; - pos.do_move(capture, st, ci, pos.gives_check(capture, ci)); + pos.do_move(capture, st, pos.gives_check(capture, ci)); v = -probe_ab(pos, -beta, -alpha, success); pos.undo_move(capture); if (*success == 0) return 0; @@ -430,7 +430,7 @@ int Tablebases::probe_wdl(Position& pos, int *success) if (type_of(capture) != ENPASSANT || !pos.legal(capture, ci.pinned)) continue; - pos.do_move(capture, st, ci, pos.gives_check(capture, ci)); + pos.do_move(capture, st, pos.gives_check(capture, ci)); int v0 = -probe_ab(pos, -2, 2, success); pos.undo_move(capture); if (*success == 0) return 0; @@ -493,7 +493,7 @@ static int probe_dtz_no_ep(Position& pos, int *success) if (type_of(pos.moved_piece(move)) != PAWN || pos.capture(move) || !pos.legal(move, ci.pinned)) continue; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); int v = -probe_ab(pos, -2, -wdl + 1, success); pos.undo_move(move); if (*success == 0) return 0; @@ -515,7 +515,7 @@ static int probe_dtz_no_ep(Position& pos, int *success) if (pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN || !pos.legal(move, ci.pinned)) continue; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); int v = -Tablebases::probe_dtz(pos, success); pos.undo_move(move); if (*success == 0) return 0; @@ -534,7 +534,7 @@ static int probe_dtz_no_ep(Position& pos, int *success) Move move = moves->move; if (!pos.legal(move, ci.pinned)) continue; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); if (st.rule50 == 0) { if (wdl == -2) v = -1; else { @@ -610,7 +610,7 @@ int Tablebases::probe_dtz(Position& pos, int *success) if (type_of(capture) != ENPASSANT || !pos.legal(capture, ci.pinned)) continue; - pos.do_move(capture, st, ci, pos.gives_check(capture, ci)); + pos.do_move(capture, st, pos.gives_check(capture, ci)); int v0 = -probe_ab(pos, -2, 2, success); pos.undo_move(capture); if (*success == 0) return 0; @@ -700,7 +700,7 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoveVector& rootMoves, Va // Probe each move. for (size_t i = 0; i < rootMoves.size(); i++) { Move move = rootMoves[i].pv[0]; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); int v = 0; if (pos.checkers() && dtz > 0) { ExtMove s[192]; @@ -810,7 +810,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoveVector& rootMoves // Probe each move. for (size_t i = 0; i < rootMoves.size(); i++) { Move move = rootMoves[i].pv[0]; - pos.do_move(move, st, ci, pos.gives_check(move, ci)); + pos.do_move(move, st, pos.gives_check(move, ci)); int v = -Tablebases::probe_wdl(pos, &success); pos.undo_move(move); if (!success) return false; From d65f75c1532536f11a2dfbc0263c55e1beb88c2b Mon Sep 17 00:00:00 2001 From: Joona Kiiski Date: Sat, 14 Feb 2015 20:46:00 +0000 Subject: [PATCH 03/11] Improve smp performance for high number of threads Balance threads between split points. There are huge differences between different machines and autopurging makes it very difficult to measure the improvement in fishtest, but the following was recorded for 16 threads at 15+0.05: For Bravone (1000 games): 0 ELO For Glinscott (1000 games): +20 ELO For bKingUs (1000 games): +50 ELO For fastGM (1500 games): +50 ELO The change was regression for no one, and a big improvement for some, so it should be fine to commit it. Also for 8 threads at 15+0.05 we measured a statistically significant improvement: ELO: 6.19 +-3.9 (95%) LOS: 99.9% Total: 10325 W: 1824 L: 1640 D: 6861 Finally it was verified that there was no (significant) regression for 4 threads: ELO: 0.09 +-2.8 (95%) LOS: 52.4% Total: 19908 W: 3422 L: 3417 D: 13069 2 threads: ELO: 0.38 +-3.0 (95%) LOS: 60.0% Total: 19044 W: 3480 L: 3459 D: 12105 1 thread: ELO: -1.27 +-2.1 (95%) LOS: 12.3% Total: 40000 W: 7829 L: 7975 D: 24196 Resolves #258 --- src/search.cpp | 51 ++++++++++++++++++++++++++++++++++++-------------- src/thread.cpp | 6 +++++- src/thread.h | 3 +++ 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ffe36126..29820a1f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1042,7 +1042,9 @@ moves_loop: // When in check and at SpNode search starts from here && Threads.size() >= 2 && depth >= Threads.minimumSplitDepth && ( !thisThread->activeSplitPoint - || !thisThread->activeSplitPoint->allSlavesSearching) + || !thisThread->activeSplitPoint->allSlavesSearching + || ( int(Threads.size()) > MAX_SLAVES_PER_SPLITPOINT + && thisThread->activeSplitPoint->slavesCount == MAX_SLAVES_PER_SPLITPOINT)) && thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD) { assert(bestValue > -VALUE_INFINITE && bestValue < beta); @@ -1587,6 +1589,11 @@ void Thread::idle_loop() { // Try to late join to another split point if none of its slaves has // already finished. if (Threads.size() > 2) + { + SplitPoint *bestSp = NULL; + int bestThread = 0; + int bestScore = INT_MAX; + for (size_t i = 0; i < Threads.size(); ++i) { const int size = Threads[i]->splitPointsSize; // Local copy @@ -1594,26 +1601,42 @@ void Thread::idle_loop() { if ( sp && sp->allSlavesSearching + && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT && available_to(Threads[i])) { - // Recheck the conditions under lock protection - Threads.mutex.lock(); - sp->mutex.lock(); + int score = sp->spLevel * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; - if ( sp->allSlavesSearching - && available_to(Threads[i])) + if (score < bestScore) { - sp->slavesMask.set(idx); - activeSplitPoint = sp; - searching = true; + bestSp = sp; + bestThread = i; + bestScore = score; } - - sp->mutex.unlock(); - Threads.mutex.unlock(); - - break; // Just a single attempt } } + + if (bestSp) + { + sp = bestSp; + + // Recheck the conditions under lock protection + Threads.mutex.lock(); + sp->mutex.lock(); + + if ( sp->allSlavesSearching + && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT + && available_to(Threads[bestThread])) + { + sp->slavesMask.set(idx); + sp->slavesCount++; + activeSplitPoint = sp; + searching = true; + } + + sp->mutex.unlock(); + Threads.mutex.unlock(); + } + } } // Grab the lock to avoid races with Thread::notify_one() diff --git a/src/thread.cpp b/src/thread.cpp index b8571dcd..02cb4562 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -154,7 +154,9 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes sp.masterThread = this; sp.parentSplitPoint = activeSplitPoint; + sp.spLevel = activeSplitPoint ? activeSplitPoint->spLevel + 1 : 0; sp.slavesMask = 0, sp.slavesMask.set(idx); + sp.slavesCount = 1; sp.depth = depth; sp.bestValue = *bestValue; sp.bestMove = *bestMove; @@ -182,9 +184,11 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes Thread* slave; - while ((slave = Threads.available_slave(this)) != NULL) + while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT + && (slave = Threads.available_slave(this)) != NULL) { sp.slavesMask.set(slave->idx); + sp.slavesCount++; slave->activeSplitPoint = &sp; slave->searching = true; // Slave leaves idle_loop() slave->notify_one(); // Could be sleeping diff --git a/src/thread.h b/src/thread.h index 9750ed7b..949f87ad 100644 --- a/src/thread.h +++ b/src/thread.h @@ -33,6 +33,7 @@ struct Thread; const int MAX_THREADS = 128; const int MAX_SPLITPOINTS_PER_THREAD = 8; +const int MAX_SLAVES_PER_SPLITPOINT = 4; /// Mutex and ConditionVariable struct are wrappers of the low level locking /// machinery and are modeled after the corresponding C++11 classes. @@ -72,6 +73,7 @@ struct SplitPoint { const Position* pos; Search::Stack* ss; Thread* masterThread; + int spLevel; Depth depth; Value beta; int nodeType; @@ -84,6 +86,7 @@ struct SplitPoint { // Shared variable data Mutex mutex; std::bitset slavesMask; + int slavesCount; volatile bool allSlavesSearching; volatile uint64_t nodes; volatile Value alpha; From 6656ed8904ccf0e20321f9929f2d13fbb7b9223d Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Tue, 17 Feb 2015 08:23:35 +0100 Subject: [PATCH 04/11] Simplify attackUnits formula Use '/ 8' instead of '* 31 / 256' Passed STC LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 55077 W: 10999 L: 10940 D: 33138 And LTC LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 14751 W: 2530 L: 2400 D: 9821 bench: 7911944 --- src/evaluate.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 18f5adc4..7ff7dfc1 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -414,10 +414,10 @@ namespace { // attacked and undefended squares around our king and the quality of // the pawn shelter (current 'score' value). attackUnits = std::min(74, ei.kingAttackersCount[Them] * ei.kingAttackersWeight[Them]) - + 8 * ei.kingAdjacentZoneAttacksCount[Them] + + 8 * ei.kingAdjacentZoneAttacksCount[Them] + 25 * popcount(undefended) - + 11 * (ei.pinnedPieces[Us] != 0) - - mg_value(score) * 31 / 256 + + 11 * (ei.pinnedPieces[Us] != 0) + - mg_value(score) / 8 - !pos.count(Them) * 60; // Analyse the enemy's safe queen contact checks. Firstly, find the From dccaa145d2b57999aa3e368c7c9203731b4e9685 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Tue, 17 Feb 2015 10:10:58 +0100 Subject: [PATCH 05/11] Compute SplitPoint::spLevel on the fly And retire a redundant field. This is important also from a concept point of view becuase we want to keep SMP structures as simple as possible with the only strictly necessary data. Verified with dbg_hit_on(sp->spLevel != level) that the values are 100% the same out of more 50K samples. No functional change. --- src/search.cpp | 9 +++++++-- src/thread.cpp | 3 +-- src/thread.h | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 29820a1f..dcb89d75 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1604,7 +1604,12 @@ void Thread::idle_loop() { && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT && available_to(Threads[i])) { - int score = sp->spLevel * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; + // Compute the recursive split points chain size + int level = -1; + for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) + level++; + + int score = level * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; if (score < bestScore) { @@ -1618,7 +1623,7 @@ void Thread::idle_loop() { if (bestSp) { sp = bestSp; - + // Recheck the conditions under lock protection Threads.mutex.lock(); sp->mutex.lock(); diff --git a/src/thread.cpp b/src/thread.cpp index 02cb4562..5c013c6a 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -154,7 +154,6 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes sp.masterThread = this; sp.parentSplitPoint = activeSplitPoint; - sp.spLevel = activeSplitPoint ? activeSplitPoint->spLevel + 1 : 0; sp.slavesMask = 0, sp.slavesMask.set(idx); sp.slavesCount = 1; sp.depth = depth; @@ -184,7 +183,7 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes Thread* slave; - while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT + while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT && (slave = Threads.available_slave(this)) != NULL) { sp.slavesMask.set(slave->idx); diff --git a/src/thread.h b/src/thread.h index 949f87ad..0265ee67 100644 --- a/src/thread.h +++ b/src/thread.h @@ -73,7 +73,6 @@ struct SplitPoint { const Position* pos; Search::Stack* ss; Thread* masterThread; - int spLevel; Depth depth; Value beta; int nodeType; From 4f906a25897467ba8fc7c31aa634cefc1ec0dba9 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 09:51:17 +0100 Subject: [PATCH 06/11] Remove useless condition in late join In case of Threads.size() == 2 we have that sp->allSlavesSearching is always false (because we have finished our search), bestSp is always NULL and we never late join, so there is no need to special case here. Tested with dbg_hit_on(sp && sp->allSlavesSearching) and verified it never fires. No functional change. --- src/search.cpp | 83 ++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index dcb89d75..1062c920 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1588,59 +1588,56 @@ void Thread::idle_loop() { // Try to late join to another split point if none of its slaves has // already finished. - if (Threads.size() > 2) + SplitPoint* bestSp = NULL; + int bestThread = 0; + int bestScore = INT_MAX; + + for (size_t i = 0; i < Threads.size(); ++i) { - SplitPoint *bestSp = NULL; - int bestThread = 0; - int bestScore = INT_MAX; + const int size = Threads[i]->splitPointsSize; // Local copy + sp = size ? &Threads[i]->splitPoints[size - 1] : NULL; - for (size_t i = 0; i < Threads.size(); ++i) + if ( sp + && sp->allSlavesSearching + && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT + && available_to(Threads[i])) { - const int size = Threads[i]->splitPointsSize; // Local copy - sp = size ? &Threads[i]->splitPoints[size - 1] : NULL; + // Compute the recursive split points chain size + int level = -1; + for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) + level++; - if ( sp - && sp->allSlavesSearching - && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT - && available_to(Threads[i])) + int score = level * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; + + if (score < bestScore) { - // Compute the recursive split points chain size - int level = -1; - for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) - level++; - - int score = level * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; - - if (score < bestScore) - { - bestSp = sp; - bestThread = i; - bestScore = score; - } + bestSp = sp; + bestThread = i; + bestScore = score; } } + } - if (bestSp) + if (bestSp) + { + sp = bestSp; + + // Recheck the conditions under lock protection + Threads.mutex.lock(); + sp->mutex.lock(); + + if ( sp->allSlavesSearching + && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT + && available_to(Threads[bestThread])) { - sp = bestSp; - - // Recheck the conditions under lock protection - Threads.mutex.lock(); - sp->mutex.lock(); - - if ( sp->allSlavesSearching - && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT - && available_to(Threads[bestThread])) - { - sp->slavesMask.set(idx); - sp->slavesCount++; - activeSplitPoint = sp; - searching = true; - } - - sp->mutex.unlock(); - Threads.mutex.unlock(); + sp->slavesMask.set(idx); + sp->slavesCount++; + activeSplitPoint = sp; + searching = true; } + + sp->mutex.unlock(); + Threads.mutex.unlock(); } } From 193a7ae35be740f8ff78e103fc605557a503ebbe Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 10:08:29 +0100 Subject: [PATCH 07/11] Add a couple of asserts to late join Document and clarify that we cannot rejoin on ourselves and that we never late join if we are master and all slaves have finished, inded in this case we exit idle_loop. No functional change. --- src/search.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 1062c920..b1eac341 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1602,6 +1602,9 @@ void Thread::idle_loop() { && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT && available_to(Threads[i])) { + assert(this != Threads[i]); + assert(!(this_sp && this_sp->slavesMask.none())); + // Compute the recursive split points chain size int level = -1; for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) From b9d4e6f7fd238edffd91ebbf4b944cddcd65a955 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 10:18:24 +0100 Subject: [PATCH 08/11] Fix a warning under MSVC Assignment of size_t to int. No functional change. --- src/search.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index b1eac341..76d0350c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1589,7 +1589,7 @@ void Thread::idle_loop() { // Try to late join to another split point if none of its slaves has // already finished. SplitPoint* bestSp = NULL; - int bestThread = 0; + Thread* bestThread = NULL; int bestScore = INT_MAX; for (size_t i = 0; i < Threads.size(); ++i) @@ -1615,7 +1615,7 @@ void Thread::idle_loop() { if (score < bestScore) { bestSp = sp; - bestThread = i; + bestThread = Threads[i]; bestScore = score; } } @@ -1631,7 +1631,7 @@ void Thread::idle_loop() { if ( sp->allSlavesSearching && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT - && available_to(Threads[bestThread])) + && available_to(bestThread)) { sp->slavesMask.set(idx); sp->slavesCount++; From 8d47caa16ec9d2efad44f2638ce7d7637216d281 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 10:27:24 +0100 Subject: [PATCH 09/11] Retire redundant sp->slavesCount field It should be used slavesMask.count() instead. Verified 100% equivalent when sp->allSlavesSearching: dbg_hit_on(sp->allSlavesSearching, sp->slavesCount != sp->slavesMask.count()); No functional change. --- src/search.cpp | 11 +++++------ src/thread.cpp | 4 +--- src/thread.h | 3 +-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 76d0350c..a0476653 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1043,8 +1043,8 @@ moves_loop: // When in check and at SpNode search starts from here && depth >= Threads.minimumSplitDepth && ( !thisThread->activeSplitPoint || !thisThread->activeSplitPoint->allSlavesSearching - || ( int(Threads.size()) > MAX_SLAVES_PER_SPLITPOINT - && thisThread->activeSplitPoint->slavesCount == MAX_SLAVES_PER_SPLITPOINT)) + || ( Threads.size() > MAX_SLAVES_PER_SPLITPOINT + && thisThread->activeSplitPoint->slavesMask.count() == MAX_SLAVES_PER_SPLITPOINT)) && thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD) { assert(bestValue > -VALUE_INFINITE && bestValue < beta); @@ -1599,7 +1599,7 @@ void Thread::idle_loop() { if ( sp && sp->allSlavesSearching - && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT + && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT && available_to(Threads[i])) { assert(this != Threads[i]); @@ -1610,7 +1610,7 @@ void Thread::idle_loop() { for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) level++; - int score = level * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1; + int score = level * 256 * 256 + (int)sp->slavesMask.count() * 256 - sp->depth * 1; if (score < bestScore) { @@ -1630,11 +1630,10 @@ void Thread::idle_loop() { sp->mutex.lock(); if ( sp->allSlavesSearching - && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT + && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT && available_to(bestThread)) { sp->slavesMask.set(idx); - sp->slavesCount++; activeSplitPoint = sp; searching = true; } diff --git a/src/thread.cpp b/src/thread.cpp index 5c013c6a..8be7b721 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -155,7 +155,6 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes sp.masterThread = this; sp.parentSplitPoint = activeSplitPoint; sp.slavesMask = 0, sp.slavesMask.set(idx); - sp.slavesCount = 1; sp.depth = depth; sp.bestValue = *bestValue; sp.bestMove = *bestMove; @@ -183,11 +182,10 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes Thread* slave; - while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT + while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT && (slave = Threads.available_slave(this)) != NULL) { sp.slavesMask.set(slave->idx); - sp.slavesCount++; slave->activeSplitPoint = &sp; slave->searching = true; // Slave leaves idle_loop() slave->notify_one(); // Could be sleeping diff --git a/src/thread.h b/src/thread.h index 0265ee67..42c44aa9 100644 --- a/src/thread.h +++ b/src/thread.h @@ -33,7 +33,7 @@ struct Thread; const int MAX_THREADS = 128; const int MAX_SPLITPOINTS_PER_THREAD = 8; -const int MAX_SLAVES_PER_SPLITPOINT = 4; +const size_t MAX_SLAVES_PER_SPLITPOINT = 4; /// Mutex and ConditionVariable struct are wrappers of the low level locking /// machinery and are modeled after the corresponding C++11 classes. @@ -85,7 +85,6 @@ struct SplitPoint { // Shared variable data Mutex mutex; std::bitset slavesMask; - int slavesCount; volatile bool allSlavesSearching; volatile uint64_t nodes; volatile Value alpha; From 950c8436edc50857b83eb3e0cbaca06407764655 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 10:43:28 +0100 Subject: [PATCH 10/11] Use size_t consistently across thread code No functional change. --- src/search.cpp | 4 ++-- src/thread.cpp | 5 +++-- src/thread.h | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index a0476653..12ed7599 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1594,7 +1594,7 @@ void Thread::idle_loop() { for (size_t i = 0; i < Threads.size(); ++i) { - const int size = Threads[i]->splitPointsSize; // Local copy + const size_t size = Threads[i]->splitPointsSize; // Local copy sp = size ? &Threads[i]->splitPoints[size - 1] : NULL; if ( sp @@ -1705,7 +1705,7 @@ void check_time() { // Loop across all split points and sum accumulated SplitPoint nodes plus // all the currently active positions nodes. for (size_t i = 0; i < Threads.size(); ++i) - for (int j = 0; j < Threads[i]->splitPointsSize; ++j) + for (size_t j = 0; j < Threads[i]->splitPointsSize; ++j) { SplitPoint& sp = Threads[i]->splitPoints[j]; diff --git a/src/thread.cpp b/src/thread.cpp index 8be7b721..18870021 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -89,7 +89,8 @@ void ThreadBase::wait_for(volatile const bool& condition) { Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in MSVC searching = false; - maxPly = splitPointsSize = 0; + maxPly = 0; + splitPointsSize = 0; activeSplitPoint = NULL; activePosition = NULL; idx = Threads.size(); // Starts from 0 @@ -123,7 +124,7 @@ bool Thread::available_to(const Thread* master) const { // Make a local copy to be sure it doesn't become zero under our feet while // testing next condition and so leading to an out of bounds access. - const int size = splitPointsSize; + const size_t size = splitPointsSize; // No split points means that the thread is available as a slave for any // other thread otherwise apply the "helpful master" concept if possible. diff --git a/src/thread.h b/src/thread.h index 42c44aa9..04e9a370 100644 --- a/src/thread.h +++ b/src/thread.h @@ -31,8 +31,8 @@ struct Thread; -const int MAX_THREADS = 128; -const int MAX_SPLITPOINTS_PER_THREAD = 8; +const size_t MAX_THREADS = 128; +const size_t MAX_SPLITPOINTS_PER_THREAD = 8; const size_t MAX_SLAVES_PER_SPLITPOINT = 4; /// Mutex and ConditionVariable struct are wrappers of the low level locking @@ -136,7 +136,7 @@ struct Thread : public ThreadBase { size_t idx; int maxPly; SplitPoint* volatile activeSplitPoint; - volatile int splitPointsSize; + volatile size_t splitPointsSize; volatile bool searching; }; From 667f35073773653ef8d05260536516fffb2d3faa Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 19 Feb 2015 23:12:59 +0100 Subject: [PATCH 11/11] Clarify we don't late join with only 2 threads Thanks to Gary for pointing this out. No functional change. --- src/search.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 12ed7599..15b93a72 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1604,8 +1604,10 @@ void Thread::idle_loop() { { assert(this != Threads[i]); assert(!(this_sp && this_sp->slavesMask.none())); + assert(Threads.size() > 2); - // Compute the recursive split points chain size + // Prefer to join to SP with few parents to reduce the probability + // that a cut-off occurs above us, and hence we waste our work. int level = -1; for (SplitPoint* spp = Threads[i]->activeSplitPoint; spp; spp = spp->parentSplitPoint) level++;