mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-25 19:46:55 +08:00
Add our own blas-like routines that use stockfish's thread pool for parallelization.
This commit is contained in:
@@ -47,6 +47,7 @@ PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_
|
||||
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
|
||||
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
|
||||
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
||||
extra/stockfish_blas.cpp \
|
||||
nnue/evaluate_nnue.cpp \
|
||||
nnue/evaluate_nnue_learner.cpp \
|
||||
nnue/features/half_kp.cpp \
|
||||
|
||||
1033
src/extra/stockfish_blas.cpp
Normal file
1033
src/extra/stockfish_blas.cpp
Normal file
File diff suppressed because it is too large
Load Diff
130
src/extra/stockfish_blas.h
Normal file
130
src/extra/stockfish_blas.h
Normal file
@@ -0,0 +1,130 @@
|
||||
#ifndef _STOCKFISH_BLAS_H_
|
||||
#define _STOCKFISH_BLAS_H_
|
||||
|
||||
struct ThreadPool;
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
#define SF_BLAS_RESTRICT __restrict
|
||||
#elif defined (__INTEL_COMPILER)
|
||||
#define SF_BLAS_RESTRICT restrict
|
||||
#elif defined (__clang__)
|
||||
#define SF_BLAS_RESTRICT __restrict__
|
||||
#elif defined (__GNUC__)
|
||||
#define SF_BLAS_RESTRICT __restrict__
|
||||
#endif
|
||||
|
||||
namespace Blas {
|
||||
|
||||
enum struct MatrixLayout {
|
||||
RowMajor = 101,
|
||||
ColMajor = 102
|
||||
};
|
||||
|
||||
enum struct MatrixTranspose {
|
||||
NoTrans = 111,
|
||||
Trans = 112
|
||||
};
|
||||
|
||||
void scopy(
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void scopy(
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void scopy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void scopy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void sscal(
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X
|
||||
);
|
||||
|
||||
void sscal(
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X, const int incX
|
||||
);
|
||||
|
||||
void sscal(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X
|
||||
);
|
||||
|
||||
void sscal(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X, const int incX
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void sgemm(
|
||||
ThreadPool& thread_pool,
|
||||
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
|
||||
const int M, const int N, const int K,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT A, const int lda,
|
||||
const float * SF_BLAS_RESTRICT B, const int ldb,
|
||||
const float beta,
|
||||
float * SF_BLAS_RESTRICT C, const int ldc
|
||||
);
|
||||
|
||||
void test(
|
||||
ThreadPool& thread_pool
|
||||
);
|
||||
|
||||
void bench(
|
||||
ThreadPool& thread_pool
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
29
src/thread.h
29
src/thread.h
@@ -39,6 +39,15 @@
|
||||
/// pointer to an entry its life time is unlimited and we don't have
|
||||
/// to care about someone changing the entry under our feet.
|
||||
|
||||
namespace Detail {
|
||||
|
||||
template <typename T>
|
||||
struct TypeIdentity {
|
||||
using Type = T;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
class Thread {
|
||||
|
||||
std::mutex mutex;
|
||||
@@ -120,6 +129,26 @@ struct ThreadPool : public std::vector<Thread*> {
|
||||
// to the state of the `worker` function object.
|
||||
void execute_with_workers(const std::function<void(Thread&)>& worker);
|
||||
|
||||
template <typename IndexT, typename FuncT>
|
||||
void for_each_index_with_workers(
|
||||
IndexT begin,
|
||||
typename Detail::TypeIdentity<IndexT>::Type end,
|
||||
FuncT func)
|
||||
{
|
||||
std::atomic<IndexT> i_atomic = begin;
|
||||
|
||||
execute_with_workers(
|
||||
[&i_atomic, end, func](Thread& th) mutable {
|
||||
for(;;) {
|
||||
const auto i = i_atomic.fetch_add(1);
|
||||
if (i >= end)
|
||||
break;
|
||||
|
||||
func(th, i);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
|
||||
void clear();
|
||||
void set(size_t);
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "extra/stockfish_blas.h"
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
#include "evaluate.h"
|
||||
#include "movegen.h"
|
||||
@@ -354,6 +355,14 @@ void UCI::loop(int argc, char* argv[]) {
|
||||
std::cout << th.thread_idx() << '\n';
|
||||
});
|
||||
}
|
||||
else if (token == "blastest")
|
||||
{
|
||||
Blas::test(Threads);
|
||||
}
|
||||
else if (token == "blasbench")
|
||||
{
|
||||
Blas::bench(Threads);
|
||||
}
|
||||
|
||||
// test command
|
||||
else if (token == "test") test_cmd(pos, is);
|
||||
|
||||
Reference in New Issue
Block a user