mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-26 12:06:22 +08:00
Prepare input slice trainer.
This commit is contained in:
@@ -34,15 +34,15 @@ namespace Eval::NNUE {
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
if (num_calls_ == 0) {
|
||||
if (num_calls_[0] == 0) {
|
||||
current_operation_ = Operation::kSendMessage;
|
||||
feature_transformer_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kSendMessage);
|
||||
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
if (++num_calls_[0] == num_referrers_) {
|
||||
num_calls_[0] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
@@ -50,55 +50,79 @@ namespace Eval::NNUE {
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
if (num_calls_ == 0) {
|
||||
if (num_calls_[0] == 0) {
|
||||
current_operation_ = Operation::kInitialize;
|
||||
feature_transformer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kInitialize);
|
||||
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
if (++num_calls_[0] == num_referrers_) {
|
||||
num_calls_[0] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||
if (gradients_.size() < kInputDimensions * batch.size()) {
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, const std::vector<Example>& combined_batch) {
|
||||
if (gradients_.size() < kInputDimensions * combined_batch.size()) {
|
||||
gradients_.resize(kInputDimensions * combined_batch.size());
|
||||
}
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kPropagate;
|
||||
output_ = feature_transformer_trainer_->propagate(thread_pool, batch);
|
||||
if (num_calls_.size() < thread_pool.size())
|
||||
{
|
||||
num_calls_.resize(thread_pool.size(), 0);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kPropagate);
|
||||
batch_size_ = static_cast<IndexType>(combined_batch.size());
|
||||
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
if (num_calls_[0] == 0) {
|
||||
current_operation_ = Operation::kStepStart;
|
||||
output_ = feature_transformer_trainer_->step_start(thread_pool, combined_batch);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kStepStart);
|
||||
|
||||
if (++num_calls_[0] == num_referrers_) {
|
||||
num_calls_[0] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
|
||||
return output_;
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
void propagate(Thread& th, uint64_t offset, uint64_t count) {
|
||||
const auto thread_id = th.thread_idx();
|
||||
|
||||
if (num_calls_[thread_id] == 0) {
|
||||
current_operation_ = Operation::kPropagate;
|
||||
feature_transformer_trainer_->propagate(th, offset, count);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kPropagate);
|
||||
|
||||
if (++num_calls_[thread_id] == num_referrers_) {
|
||||
num_calls_[thread_id] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(ThreadPool& thread_pool,
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
uint64_t offset,
|
||||
uint64_t count) {
|
||||
|
||||
const auto thread_id = th.thread_idx();
|
||||
|
||||
if (num_referrers_ == 1) {
|
||||
feature_transformer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
|
||||
feature_transformer_trainer_->backpropagate(th, gradients, offset, count);
|
||||
return;
|
||||
}
|
||||
|
||||
if (num_calls_ == 0) {
|
||||
if (num_calls_[thread_id] == 0) {
|
||||
current_operation_ = Operation::kBackPropagate;
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
@@ -108,17 +132,31 @@ namespace Eval::NNUE {
|
||||
|
||||
assert(current_operation_ == Operation::kBackPropagate);
|
||||
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] += gradients[batch_offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
if (++num_calls_[thread_id] == num_referrers_) {
|
||||
feature_transformer_trainer_->backpropagate(
|
||||
thread_pool, gradients_.data(), learning_rate);
|
||||
num_calls_ = 0;
|
||||
th, gradients_.data(), offset, count);
|
||||
num_calls_[thread_id] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
|
||||
if (num_calls_[0] == 0) {
|
||||
current_operation_ = Operation::kStepEnd;
|
||||
feature_transformer_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kStepEnd);
|
||||
|
||||
if (++num_calls_[0] == num_referrers_) {
|
||||
num_calls_[0] = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
@@ -128,7 +166,7 @@ namespace Eval::NNUE {
|
||||
SharedInputTrainer(FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
num_referrers_(0),
|
||||
num_calls_(0),
|
||||
num_calls_(1, 0),
|
||||
current_operation_(Operation::kNone),
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::create(
|
||||
ft)),
|
||||
@@ -144,8 +182,10 @@ namespace Eval::NNUE {
|
||||
kNone,
|
||||
kSendMessage,
|
||||
kInitialize,
|
||||
kStepStart,
|
||||
kPropagate,
|
||||
kBackPropagate,
|
||||
kStepEnd,
|
||||
};
|
||||
|
||||
// number of samples in mini-batch
|
||||
@@ -155,7 +195,7 @@ namespace Eval::NNUE {
|
||||
std::uint32_t num_referrers_;
|
||||
|
||||
// Number of times the current process has been called
|
||||
std::uint32_t num_calls_;
|
||||
std::vector<std::uint32_t> num_calls_;
|
||||
|
||||
// current processing type
|
||||
Operation current_operation_;
|
||||
@@ -197,74 +237,81 @@ namespace Eval::NNUE {
|
||||
shared_input_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* propagate(ThreadPool& thread_pool,const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, const std::vector<Example>& combined_batch) {
|
||||
if (output_.size() < kOutputDimensions * combined_batch.size()) {
|
||||
output_.resize(kOutputDimensions * combined_batch.size());
|
||||
gradients_.resize(kInputDimensions * combined_batch.size());
|
||||
}
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
batch_size_ = static_cast<IndexType>(combined_batch.size());
|
||||
|
||||
const auto input = shared_input_trainer_->propagate(thread_pool, batch);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
input_ = shared_input_trainer_->step_start(thread_pool, combined_batch);
|
||||
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
void propagate(Thread& th, uint64_t offset, uint64_t count) {
|
||||
|
||||
shared_input_trainer_->propagate(th, offset, count);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
|
||||
cblas_scopy(
|
||||
kOutputDimensions, &input[input_offset + Offset], 1,
|
||||
kOutputDimensions, &input_[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1
|
||||
);
|
||||
#else
|
||||
|
||||
Blas::scopy(
|
||||
thread_pool,
|
||||
kOutputDimensions, &input[input_offset + Offset], 1,
|
||||
kOutputDimensions, &input_[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(ThreadPool& thread_pool,
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
uint64_t offset,
|
||||
uint64_t count) {
|
||||
|
||||
thread_pool.for_each_index_with_workers(
|
||||
0, batch_size_,
|
||||
[&](Thread&, int b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
for (IndexType b = offset; b < offset + count; ++b)
|
||||
{
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
|
||||
IndexType i = 0;
|
||||
for (; i < Offset; ++i) {
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
|
||||
for (; i < Offset + kOutputDimensions; ++i) {
|
||||
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
|
||||
}
|
||||
|
||||
for (; i < kInputDimensions; ++i)
|
||||
{
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
IndexType i = 0;
|
||||
for (; i < Offset; ++i) {
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
);
|
||||
thread_pool.wait_for_workers_finished();
|
||||
|
||||
shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
|
||||
for (; i < Offset + kOutputDimensions; ++i) {
|
||||
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
|
||||
}
|
||||
|
||||
for (; i < kInputDimensions; ++i)
|
||||
{
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
shared_input_trainer_->backpropagate(th, gradients_.data(), offset, count);
|
||||
}
|
||||
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
|
||||
shared_input_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(FeatureTransformer* ft):
|
||||
Trainer(FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
shared_input_trainer_(SharedInputTrainer::create(ft)) {
|
||||
}
|
||||
@@ -278,6 +325,8 @@ namespace Eval::NNUE {
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
const LearnFloatType* input_;
|
||||
|
||||
// Trainer of shared input layer
|
||||
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user