mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-27 04:26:24 +08:00
PascalCase -> snake_case for consistency with the rest of the codebase.
This commit is contained in:
@@ -14,12 +14,12 @@ namespace Eval::NNUE::Features {
|
||||
class Factorizer {
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return FeatureType::kDimensions;
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
assert(base_index <FeatureType::kDimensions);
|
||||
@@ -35,7 +35,7 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
// Add the original input features to the learning features
|
||||
template <typename FeatureType>
|
||||
IndexType AppendBaseFeature(
|
||||
IndexType append_base_feature(
|
||||
FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
@@ -47,7 +47,7 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
// If the learning rate scale is not 0, inherit other types of learning features
|
||||
template <typename FeatureType>
|
||||
IndexType InheritFeaturesIfRequired(
|
||||
IndexType inherit_features_if_required(
|
||||
IndexType index_offset, FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
@@ -55,17 +55,17 @@ namespace Eval::NNUE::Features {
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
|
||||
assert(properties.dimensions == Factorizer<FeatureType>::get_dimensions());
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
Factorizer<FeatureType>::append_training_features(
|
||||
base_index, training_features);
|
||||
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
feature.ShiftIndex(index_offset);
|
||||
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
|
||||
feature.shift_index(index_offset);
|
||||
}
|
||||
|
||||
return properties.dimensions;
|
||||
@@ -73,7 +73,7 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
// Return the index difference as needed, without adding learning features
|
||||
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
|
||||
IndexType SkipFeatures(FeatureProperties properties) {
|
||||
IndexType skip_features(FeatureProperties properties) {
|
||||
if (!properties.active)
|
||||
return 0;
|
||||
|
||||
@@ -82,7 +82,7 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
template <std::size_t N>
|
||||
constexpr IndexType GetActiveDimensions(
|
||||
constexpr IndexType get_active_dimensions(
|
||||
const FeatureProperties (&properties)[N]) {
|
||||
|
||||
static_assert(N > 0, "");
|
||||
@@ -100,7 +100,7 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
// get the number of elements in the array
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
|
||||
constexpr std::size_t get_array_length(const T (&/*array*/)[N]) {
|
||||
return N;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,12 +22,12 @@ namespace Eval::NNUE::Features {
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Head::GetDimensions() + Tail::GetDimensions();
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return Head::get_dimensions() + Tail::get_dimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
|
||||
@@ -36,29 +36,29 @@ namespace Eval::NNUE::Features {
|
||||
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
if (base_index < boundary) {
|
||||
Tail::AppendTrainingFeatures(
|
||||
Tail::append_training_features(
|
||||
base_index, training_features, base_dimensions);
|
||||
}
|
||||
else {
|
||||
const auto start = training_features->size();
|
||||
|
||||
Head::AppendTrainingFeatures(
|
||||
Head::append_training_features(
|
||||
base_index - boundary, training_features, base_dimensions);
|
||||
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
const auto index = feature.GetIndex();
|
||||
const auto index = feature.get_index();
|
||||
|
||||
assert(index < Head::GetDimensions() ||
|
||||
assert(index < Head::get_dimensions() ||
|
||||
(index >= base_dimensions &&
|
||||
index < base_dimensions +
|
||||
Head::GetDimensions() - Head::kBaseDimensions));
|
||||
Head::get_dimensions() - Head::kBaseDimensions));
|
||||
|
||||
if (index < Head::kBaseDimensions) {
|
||||
feature.ShiftIndex(Tail::kBaseDimensions);
|
||||
feature.shift_index(Tail::kBaseDimensions);
|
||||
}
|
||||
else {
|
||||
feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
|
||||
feature.shift_index(Tail::get_dimensions() - Tail::kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -74,12 +74,12 @@ namespace Eval::NNUE::Features {
|
||||
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Factorizer<FeatureType>::GetDimensions();
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return Factorizer<FeatureType>::get_dimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
|
||||
@@ -87,14 +87,14 @@ namespace Eval::NNUE::Features {
|
||||
|
||||
const auto start = training_features->size();
|
||||
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
Factorizer<FeatureType>::append_training_features(
|
||||
base_index, training_features);
|
||||
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
if (feature.GetIndex() >= kBaseDimensions) {
|
||||
feature.ShiftIndex(base_dimensions - kBaseDimensions);
|
||||
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
|
||||
if (feature.get_index() >= kBaseDimensions) {
|
||||
feature.shift_index(base_dimensions - kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,25 +37,25 @@ namespace Eval::NNUE::Features {
|
||||
// kFeaturesHalfK
|
||||
{true, SQUARE_NB},
|
||||
// kFeaturesP
|
||||
{true, Factorizer<P>::GetDimensions()},
|
||||
{true, Factorizer<P>::get_dimensions()},
|
||||
// kFeaturesHalfRelativeKP
|
||||
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
|
||||
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::get_dimensions()},
|
||||
};
|
||||
|
||||
static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
static_assert(get_array_length(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return GetActiveDimensions(kProperties);
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return get_active_dimensions(kProperties);
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
// kFeaturesHalfKP
|
||||
IndexType index_offset = AppendBaseFeature<FeatureType>(
|
||||
IndexType index_offset = append_base_feature<FeatureType>(
|
||||
kProperties[kFeaturesHalfKP], base_index, training_features);
|
||||
|
||||
const auto sq_k = static_cast<Square>(base_index / PS_END);
|
||||
@@ -71,20 +71,20 @@ namespace Eval::NNUE::Features {
|
||||
}
|
||||
|
||||
// kFeaturesP
|
||||
index_offset += InheritFeaturesIfRequired<P>(
|
||||
index_offset += inherit_features_if_required<P>(
|
||||
index_offset, kProperties[kFeaturesP], p, training_features);
|
||||
// kFeaturesHalfRelativeKP
|
||||
if (p >= PS_W_PAWN) {
|
||||
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
|
||||
index_offset += inherit_features_if_required<HalfRelativeKP<AssociatedKing>>(
|
||||
index_offset, kProperties[kFeaturesHalfRelativeKP],
|
||||
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
|
||||
HalfRelativeKP<AssociatedKing>::make_index(sq_k, p),
|
||||
training_features);
|
||||
}
|
||||
else {
|
||||
index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
|
||||
index_offset += skip_features(kProperties[kFeaturesHalfRelativeKP]);
|
||||
}
|
||||
|
||||
assert(index_offset == GetDimensions());
|
||||
assert(index_offset == get_dimensions());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -37,22 +37,22 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
TrainingFeature& operator+=(const TrainingFeature& other) {
|
||||
assert(other.GetIndex() == GetIndex());
|
||||
assert(other.GetCount() + GetCount() < (1 << kCountBits));
|
||||
index_and_count_ += other.GetCount();
|
||||
assert(other.get_index() == get_index());
|
||||
assert(other.get_index() + get_count() < (1 << kCountBits));
|
||||
index_and_count_ += other.get_count();
|
||||
return *this;
|
||||
}
|
||||
|
||||
IndexType GetIndex() const {
|
||||
IndexType get_index() const {
|
||||
return static_cast<IndexType>(index_and_count_ >> kCountBits);
|
||||
}
|
||||
|
||||
void ShiftIndex(IndexType offset) {
|
||||
assert(GetIndex() + offset < (1 << kIndexBits));
|
||||
void shift_index(IndexType offset) {
|
||||
assert(get_index() + offset < (1 << kIndexBits));
|
||||
index_and_count_ += offset << kCountBits;
|
||||
}
|
||||
|
||||
IndexType GetCount() const {
|
||||
IndexType get_count() const {
|
||||
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ namespace Eval::NNUE {
|
||||
};
|
||||
|
||||
// determine whether to accept the message
|
||||
bool ReceiveMessage(const std::string& name, Message* message) {
|
||||
bool receive_message(const std::string& name, Message* message) {
|
||||
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
|
||||
|
||||
if (message->name.substr(0, name.size() + 1) == name + "[") {
|
||||
@@ -101,28 +101,15 @@ namespace Eval::NNUE {
|
||||
return false;
|
||||
}
|
||||
|
||||
// split the string
|
||||
std::vector<std::string> Split(const std::string& input, char delimiter) {
|
||||
std::istringstream stream(input);
|
||||
std::string field;
|
||||
std::vector<std::string> fields;
|
||||
|
||||
while (std::getline(stream, field, delimiter)) {
|
||||
fields.push_back(field);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
// round a floating point number to an integer
|
||||
template <typename IntType>
|
||||
IntType Round(double value) {
|
||||
IntType round(double value) {
|
||||
return static_cast<IntType>(std::floor(value + 0.5));
|
||||
}
|
||||
|
||||
// make_shared with alignment
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
|
||||
std::shared_ptr<T> make_aligned_shared_ptr(ArgumentTypes&&... arguments) {
|
||||
const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
|
||||
T(std::forward<ArgumentTypes>(arguments)...);
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ namespace Eval::NNUE {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(
|
||||
@@ -29,31 +29,31 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
|
||||
if (ReceiveMessage("momentum", message)) {
|
||||
if (receive_message("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (ReceiveMessage("learning_rate_scale", message)) {
|
||||
if (receive_message("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (ReceiveMessage("reset", message)) {
|
||||
DequantizeParameters();
|
||||
if (receive_message("reset", message)) {
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
if (ReceiveMessage("quantize_parameters", message)) {
|
||||
QuantizeParameters();
|
||||
if (receive_message("quantize_parameters", message)) {
|
||||
quantize_parameters();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
|
||||
if (kIsOutputLayer) {
|
||||
// Initialize output layer with 0
|
||||
@@ -80,18 +80,18 @@ namespace Eval::NNUE {
|
||||
}
|
||||
}
|
||||
|
||||
QuantizeParameters();
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
batch_input_ = previous_layer_trainer_->Propagate(batch);
|
||||
batch_input_ = previous_layer_trainer_->propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
@@ -123,7 +123,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
const LearnFloatType local_learning_rate =
|
||||
@@ -206,7 +206,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
#endif
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -214,7 +214,7 @@ namespace Eval::NNUE {
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
batch_input_(nullptr),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer),
|
||||
biases_(),
|
||||
@@ -224,11 +224,11 @@ namespace Eval::NNUE {
|
||||
momentum_(0.2),
|
||||
learning_rate_scale_(1.0) {
|
||||
|
||||
DequantizeParameters();
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void QuantizeParameters() {
|
||||
void quantize_parameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] = std::max(-kMaxWeightMagnitude,
|
||||
std::min(+kMaxWeightMagnitude, weights_[i]));
|
||||
@@ -236,7 +236,7 @@ namespace Eval::NNUE {
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
@@ -244,14 +244,14 @@ namespace Eval::NNUE {
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
target_layer_->weights_[padded_offset + j] =
|
||||
Round<typename LayerType::WeightType>(
|
||||
round<typename LayerType::WeightType>(
|
||||
weights_[offset + j] * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void DequantizeParameters() {
|
||||
void dequantize_parameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace Eval::NNUE {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(
|
||||
@@ -27,27 +27,27 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
if (ReceiveMessage("check_health", message)) {
|
||||
CheckHealth();
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
if (receive_message("check_health", message)) {
|
||||
check_health();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
|
||||
const auto input = previous_layer_trainer_->Propagate(batch);
|
||||
const auto input = previous_layer_trainer_->propagate(batch);
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
@@ -63,7 +63,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
@@ -75,14 +75,14 @@ namespace Eval::NNUE {
|
||||
}
|
||||
}
|
||||
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
|
||||
@@ -93,7 +93,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
void check_health() {
|
||||
const auto largest_min_activation = *std::max_element(
|
||||
std::begin(min_activations_), std::end(min_activations_));
|
||||
const auto smallest_max_activation = *std::min_element(
|
||||
|
||||
@@ -34,44 +34,44 @@ namespace Eval::NNUE {
|
||||
friend struct AlignedDeleter;
|
||||
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
friend std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments);
|
||||
friend std::shared_ptr<T> make_aligned_shared_ptr(ArgumentTypes&&... arguments);
|
||||
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(LayerType* target_layer) {
|
||||
return MakeAlignedSharedPtr<Trainer>(target_layer);
|
||||
static std::shared_ptr<Trainer> create(LayerType* target_layer) {
|
||||
return make_aligned_shared_ptr<Trainer>(target_layer);
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
if (ReceiveMessage("momentum", message)) {
|
||||
void send_message(Message* message) {
|
||||
if (receive_message("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (ReceiveMessage("learning_rate_scale", message)) {
|
||||
if (receive_message("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (ReceiveMessage("reset", message)) {
|
||||
DequantizeParameters();
|
||||
if (receive_message("reset", message)) {
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
if (ReceiveMessage("quantize_parameters", message)) {
|
||||
QuantizeParameters();
|
||||
if (receive_message("quantize_parameters", message)) {
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
if (ReceiveMessage("clear_unobserved_feature_weights", message)) {
|
||||
ClearUnobservedFeatureWeights();
|
||||
if (receive_message("clear_unobserved_feature_weights", message)) {
|
||||
clear_unobserved_feature_weights();
|
||||
}
|
||||
|
||||
if (ReceiveMessage("check_health", message)) {
|
||||
CheckHealth();
|
||||
if (receive_message("check_health", message)) {
|
||||
check_health();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
void initialize(RNG& rng) {
|
||||
std::fill(std::begin(weights_), std::end(weights_), +kZero);
|
||||
|
||||
const double kSigma = 0.1 / std::sqrt(RawFeatures::kMaxActiveDimensions);
|
||||
@@ -86,11 +86,11 @@ namespace Eval::NNUE {
|
||||
biases_[i] = static_cast<LearnFloatType>(0.5);
|
||||
}
|
||||
|
||||
QuantizeParameters();
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kOutputDimensions * batch.size());
|
||||
@@ -106,8 +106,8 @@ namespace Eval::NNUE {
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kHalfDimensions, biases_, 1, &output_[output_offset], 1);
|
||||
for (const auto& feature : batch[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
cblas_saxpy(kHalfDimensions, (float)feature.GetCount(),
|
||||
const IndexType weights_offset = kHalfDimensions * feature.get_index();
|
||||
cblas_saxpy(kHalfDimensions, (float)feature.get_count(),
|
||||
&weights_[weights_offset], 1, &output_[output_offset], 1);
|
||||
}
|
||||
#else
|
||||
@@ -115,10 +115,10 @@ namespace Eval::NNUE {
|
||||
output_[output_offset + i] = biases_[i];
|
||||
}
|
||||
for (const auto& feature : batch[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
const IndexType weights_offset = kHalfDimensions * feature.get_index();
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
output_[output_offset + i] +=
|
||||
feature.GetCount() * weights_[weights_offset + i];
|
||||
feature.get_count() * weights_[weights_offset + i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -143,7 +143,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
const LearnFloatType local_learning_rate =
|
||||
@@ -188,13 +188,13 @@ namespace Eval::NNUE {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
#if defined(_OPENMP)
|
||||
if (feature.GetIndex() % num_threads != thread_index)
|
||||
if (feature.get_index() % num_threads != thread_index)
|
||||
continue;
|
||||
#endif
|
||||
const IndexType weights_offset =
|
||||
kHalfDimensions * feature.GetIndex();
|
||||
kHalfDimensions * feature.get_index();
|
||||
const auto scale = static_cast<LearnFloatType>(
|
||||
effective_learning_rate / feature.GetCount());
|
||||
effective_learning_rate / feature.get_count());
|
||||
|
||||
cblas_saxpy(kHalfDimensions, -scale,
|
||||
&gradients_[output_offset], 1,
|
||||
@@ -228,9 +228,9 @@ namespace Eval::NNUE {
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
const IndexType weights_offset = kHalfDimensions * feature.get_index();
|
||||
const auto scale = static_cast<LearnFloatType>(
|
||||
effective_learning_rate / feature.GetCount());
|
||||
effective_learning_rate / feature.get_count());
|
||||
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
weights_[weights_offset + i] -=
|
||||
@@ -244,7 +244,7 @@ namespace Eval::NNUE {
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
observed_features.set(feature.GetIndex());
|
||||
observed_features.set(feature.get_index());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -269,14 +269,14 @@ namespace Eval::NNUE {
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
|
||||
DequantizeParameters();
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void QuantizeParameters() {
|
||||
void quantize_parameters() {
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
|
||||
std::vector<TrainingFeature> training_features;
|
||||
@@ -284,23 +284,23 @@ namespace Eval::NNUE {
|
||||
#pragma omp parallel for private(training_features)
|
||||
for (IndexType j = 0; j < RawFeatures::kDimensions; ++j) {
|
||||
training_features.clear();
|
||||
Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
|
||||
Features::Factorizer<RawFeatures>::append_training_features(
|
||||
j, &training_features);
|
||||
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
double sum = 0.0;
|
||||
for (const auto& feature : training_features) {
|
||||
sum += weights_[kHalfDimensions * feature.GetIndex() + i];
|
||||
sum += weights_[kHalfDimensions * feature.get_index() + i];
|
||||
}
|
||||
|
||||
target_layer_->weights_[kHalfDimensions * j + i] =
|
||||
Round<typename LayerType::WeightType>(sum * kWeightScale);
|
||||
round<typename LayerType::WeightType>(sum * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void DequantizeParameters() {
|
||||
void dequantize_parameters() {
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
@@ -317,7 +317,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set the weight corresponding to the feature that does not appear in the learning data to 0
|
||||
void ClearUnobservedFeatureWeights() {
|
||||
void clear_unobserved_feature_weights() {
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
if (!observed_features.test(i)) {
|
||||
std::fill(std::begin(weights_) + kHalfDimensions * i,
|
||||
@@ -325,11 +325,11 @@ namespace Eval::NNUE {
|
||||
}
|
||||
}
|
||||
|
||||
QuantizeParameters();
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
void check_health() {
|
||||
std::cout << "INFO: observed " << observed_features.count()
|
||||
<< " (out of " << kInputDimensions << ") features" << std::endl;
|
||||
|
||||
@@ -359,7 +359,7 @@ namespace Eval::NNUE {
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
Features::Factorizer<RawFeatures>::GetDimensions();
|
||||
Features::Factorizer<RawFeatures>::get_dimensions();
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
static constexpr IndexType kHalfDimensions = LayerType::kHalfDimensions;
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace Eval::NNUE {
|
||||
class SharedInputTrainer {
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<SharedInputTrainer> Create(
|
||||
static std::shared_ptr<SharedInputTrainer> create(
|
||||
FeatureTransformer* ft) {
|
||||
|
||||
static std::shared_ptr<SharedInputTrainer> instance;
|
||||
@@ -29,10 +29,10 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
void send_message(Message* message) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kSendMessage;
|
||||
feature_transformer_trainer_->SendMessage(message);
|
||||
feature_transformer_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kSendMessage);
|
||||
@@ -45,10 +45,10 @@ namespace Eval::NNUE {
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
void initialize(RNG& rng) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kInitialize;
|
||||
feature_transformer_trainer_->Initialize(rng);
|
||||
feature_transformer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kInitialize);
|
||||
@@ -60,7 +60,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (gradients_.size() < kInputDimensions * batch.size()) {
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
@@ -69,7 +69,7 @@ namespace Eval::NNUE {
|
||||
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kPropagate;
|
||||
output_ = feature_transformer_trainer_->Propagate(batch);
|
||||
output_ = feature_transformer_trainer_->propagate(batch);
|
||||
}
|
||||
|
||||
assert(current_operation_ == Operation::kPropagate);
|
||||
@@ -83,11 +83,11 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
if (num_referrers_ == 1) {
|
||||
feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
feature_transformer_trainer_->backpropagate(gradients, learning_rate);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -111,7 +111,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
feature_transformer_trainer_->Backpropagate(
|
||||
feature_transformer_trainer_->backpropagate(
|
||||
gradients_.data(), learning_rate);
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
@@ -125,7 +125,7 @@ namespace Eval::NNUE {
|
||||
num_referrers_(0),
|
||||
num_calls_(0),
|
||||
current_operation_(Operation::kNone),
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::create(
|
||||
ft)),
|
||||
output_(nullptr) {
|
||||
}
|
||||
@@ -175,25 +175,25 @@ namespace Eval::NNUE {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* /*target_layer*/, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(new Trainer(ft));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
shared_input_trainer_->SendMessage(message);
|
||||
void send_message(Message* message) {
|
||||
shared_input_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
shared_input_trainer_->Initialize(rng);
|
||||
void initialize(RNG& rng) {
|
||||
shared_input_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
@@ -201,7 +201,7 @@ namespace Eval::NNUE {
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
|
||||
const auto input = shared_input_trainer_->Propagate(batch);
|
||||
const auto input = shared_input_trainer_->propagate(batch);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
@@ -219,7 +219,7 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
@@ -233,14 +233,14 @@ namespace Eval::NNUE {
|
||||
}
|
||||
}
|
||||
}
|
||||
shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
shared_input_trainer_->backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(FeatureTransformer* ft):
|
||||
batch_size_(0),
|
||||
shared_input_trainer_(SharedInputTrainer::Create(ft)) {
|
||||
shared_input_trainer_(SharedInputTrainer::create(ft)) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
|
||||
@@ -21,7 +21,7 @@ namespace Eval::NNUE {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(
|
||||
@@ -29,26 +29,26 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
void send_message(Message* message) {
|
||||
// The results of other member functions do not depend on the processing order, so
|
||||
// Tail is processed first for the purpose of simplifying the implementation, but
|
||||
// SendMessage processes Head first to make it easier to understand subscript correspondence
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
Tail::SendMessage(message);
|
||||
previous_layer_trainer_->send_message(message);
|
||||
Tail::send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
Tail::Initialize(rng);
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
void initialize(RNG& rng) {
|
||||
Tail::initialize(rng);
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
auto output = Tail::Propagate(batch);
|
||||
const auto head_output = previous_layer_trainer_->Propagate(batch);
|
||||
auto output = Tail::propagate(batch);
|
||||
const auto head_output = previous_layer_trainer_->propagate(batch);
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
|
||||
@@ -66,11 +66,11 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
Tail::Backpropagate(gradients, learning_rate);
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
Tail::backpropagate(gradients, learning_rate);
|
||||
previous_layer_trainer_->backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -78,7 +78,7 @@ namespace Eval::NNUE {
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft):
|
||||
Tail(target_layer, ft),
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
|
||||
previous_layer_trainer_(Trainer<FirstPreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
@@ -110,7 +110,7 @@ namespace Eval::NNUE {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(
|
||||
@@ -118,24 +118,24 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
}
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto output = previous_layer_trainer_->Propagate(batch);
|
||||
const auto output = previous_layer_trainer_->propagate(batch);
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
|
||||
@@ -152,17 +152,17 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
previous_layer_trainer_->backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user