From b5714c4084719cd089c2d70266404e4e36f0a129 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Tue, 27 Oct 2020 18:41:17 +0100 Subject: [PATCH] Parallelize input slice trainer backprop. --- src/nnue/trainer/trainer_input_slice.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/nnue/trainer/trainer_input_slice.h b/src/nnue/trainer/trainer_input_slice.h index 03e9fec0..a93a3ea0 100644 --- a/src/nnue/trainer/trainer_input_slice.h +++ b/src/nnue/trainer/trainer_input_slice.h @@ -236,17 +236,29 @@ namespace Eval::NNUE { const LearnFloatType* gradients, LearnFloatType learning_rate) { - for (IndexType b = 0; b < batch_size_; ++b) { - const IndexType input_offset = kInputDimensions * b; - const IndexType output_offset = kOutputDimensions * b; - for (IndexType i = 0; i < kInputDimensions; ++i) { - if ((int)i < (int)Offset || i >= Offset + kOutputDimensions) { + thread_pool.for_each_index_with_workers( + 0, batch_size_, + [&](Thread&, int b) { + const IndexType input_offset = kInputDimensions * b; + const IndexType output_offset = kOutputDimensions * b; + + IndexType i = 0; + for (; i < Offset; ++i) { gradients_[input_offset + i] = static_cast(0.0); - } else { + } + + for (; i < Offset + kOutputDimensions; ++i) { gradients_[input_offset + i] = gradients[output_offset + i - Offset]; } + + for (; i < kInputDimensions; ++i) + { + gradients_[input_offset + i] = static_cast(0.0); + } } - } + ); + thread_pool.wait_for_workers_finished(); + shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate); }