Add our own blas-like routines that use stockfish's thread pool for parallelization.

This commit is contained in:
Tomasz Sobczyk
2020-10-28 14:41:51 +01:00
committed by nodchip
parent ee0917a345
commit c56a4a36eb
5 changed files with 1202 additions and 0 deletions

View File

@@ -47,6 +47,7 @@ PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
extra/stockfish_blas.cpp \
nnue/evaluate_nnue.cpp \
nnue/evaluate_nnue_learner.cpp \
nnue/features/half_kp.cpp \

1033
src/extra/stockfish_blas.cpp Normal file

File diff suppressed because it is too large Load Diff

130
src/extra/stockfish_blas.h Normal file
View File

@@ -0,0 +1,130 @@
#ifndef _STOCKFISH_BLAS_H_
#define _STOCKFISH_BLAS_H_
struct ThreadPool;
#if defined (_MSC_VER)
#define SF_BLAS_RESTRICT __restrict
#elif defined (__INTEL_COMPILER)
#define SF_BLAS_RESTRICT restrict
#elif defined (__clang__)
#define SF_BLAS_RESTRICT __restrict__
#elif defined (__GNUC__)
#define SF_BLAS_RESTRICT __restrict__
#endif
namespace Blas {
enum struct MatrixLayout {
RowMajor = 101,
ColMajor = 102
};
enum struct MatrixTranspose {
NoTrans = 111,
Trans = 112
};
void scopy(
const int N,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void scopy(
const int N,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void scopy(
ThreadPool& thread_pool,
const int N,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void scopy(
ThreadPool& thread_pool,
const int N,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void sscal(
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X
);
void sscal(
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X, const int incX
);
void sscal(
ThreadPool& thread_pool,
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X
);
void sscal(
ThreadPool& thread_pool,
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X, const int incX
);
void saxpy(
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void saxpy(
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void saxpy(
ThreadPool& thread_pool,
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void saxpy(
ThreadPool& thread_pool,
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void sgemm(
ThreadPool& thread_pool,
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
const int M, const int N, const int K,
const float alpha,
const float * SF_BLAS_RESTRICT A, const int lda,
const float * SF_BLAS_RESTRICT B, const int ldb,
const float beta,
float * SF_BLAS_RESTRICT C, const int ldc
);
void test(
ThreadPool& thread_pool
);
void bench(
ThreadPool& thread_pool
);
}
#endif

View File

@@ -39,6 +39,15 @@
/// pointer to an entry its life time is unlimited and we don't have
/// to care about someone changing the entry under our feet.
namespace Detail {
template <typename T>
struct TypeIdentity {
using Type = T;
};
}
class Thread {
std::mutex mutex;
@@ -120,6 +129,26 @@ struct ThreadPool : public std::vector<Thread*> {
// to the state of the `worker` function object.
void execute_with_workers(const std::function<void(Thread&)>& worker);
template <typename IndexT, typename FuncT>
void for_each_index_with_workers(
IndexT begin,
typename Detail::TypeIdentity<IndexT>::Type end,
FuncT func)
{
std::atomic<IndexT> i_atomic = begin;
execute_with_workers(
[&i_atomic, end, func](Thread& th) mutable {
for(;;) {
const auto i = i_atomic.fetch_add(1);
if (i >= end)
break;
func(th, i);
}
});
}
void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
void clear();
void set(size_t);

View File

@@ -22,6 +22,7 @@
#include <sstream>
#include <string>
#include "extra/stockfish_blas.h"
#include "nnue/evaluate_nnue.h"
#include "evaluate.h"
#include "movegen.h"
@@ -354,6 +355,14 @@ void UCI::loop(int argc, char* argv[]) {
std::cout << th.thread_idx() << '\n';
});
}
else if (token == "blastest")
{
Blas::test(Threads);
}
else if (token == "blasbench")
{
Blas::bench(Threads);
}
// test command
else if (token == "test") test_cmd(pos, is);