diff --git a/src/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h
index f6d374ef..21e54f18 100644
--- a/src/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@@ -48,6 +48,10 @@ namespace Eval::NNUE {
             if (receive_message("quantize_parameters", message)) {
                 quantize_parameters();
             }
+
+            if (receive_message("check_health", message)) {
+                check_health();
+            }
         }
 
         // Initialize the parameters with random numbers
@@ -145,16 +149,11 @@ namespace Eval::NNUE {
                           &gradients[batch_offset], 1, biases_diff_, 1);
             }
 
-            cblas_saxpy(kOutputDimensions, -local_learning_rate,
-                        biases_diff_, 1, biases_, 1);
-
             cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                         kOutputDimensions, kInputDimensions, batch_size_, 1.0,
                         gradients, kOutputDimensions,
                         batch_input_, kInputDimensions,
                         momentum_, weights_diff_, kInputDimensions);
-            cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
-                        weights_diff_, 1, weights_, 1);
 
 #else
             // backpropagate
@@ -196,16 +195,22 @@ namespace Eval::NNUE {
                     }
                 }
             }
+#endif
 
             for (IndexType i = 0; i < kOutputDimensions; ++i) {
-                biases_[i] -= local_learning_rate * biases_diff_[i];
+                const double d = local_learning_rate * biases_diff_[i];
+                biases_[i] -= d;
+                abs_biases_diff_sum_ += std::abs(d);
             }
+            num_biases_diffs_ += kOutputDimensions;
 
             for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-                weights_[i] -= local_learning_rate * weights_diff_[i];
+                const double d = local_learning_rate * weights_diff_[i];
+                weights_[i] -= d;
+                abs_weights_diff_sum_ += std::abs(d);
             }
+            num_weights_diffs_ += kOutputDimensions * kInputDimensions;
 
-#endif
             previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
         }
 
@@ -227,6 +232,30 @@ namespace Eval::NNUE {
             dequantize_parameters();
         }
 
+        void reset_stats() {
+            abs_biases_diff_sum_ = 0.0;
+            abs_weights_diff_sum_ = 0.0;
+            num_biases_diffs_ = 0;
+            num_weights_diffs_ = 0;
+        }
+
+        void check_health() {
+
+            auto out = sync_region_cout.new_region();
+
+            out << "INFO (check_health):"
+                << " layer " << LayerType::kLayerIndex
+                << " - " << LayerType::get_name()
+                << std::endl;
+
+            out << "  - avg_abs_bias_diff   = " << abs_biases_diff_sum_ / num_biases_diffs_ << std::endl;
+            out << "  - avg_abs_weight_diff = " << abs_weights_diff_sum_ / num_weights_diffs_ << std::endl;
+
+            out.unlock();
+
+            reset_stats();
+        }
+
         // Weight saturation and parameterization
         void quantize_parameters() {
             for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
@@ -270,6 +299,8 @@ namespace Eval::NNUE {
                       static_cast<LearnFloatType>(0.0));
             std::fill(std::begin(weights_diff_), std::end(weights_diff_),
                       static_cast<LearnFloatType>(0.0));
+
+            reset_stats();
         }
 
         // number of input/output dimensions
@@ -296,6 +327,11 @@ namespace Eval::NNUE {
         // number of samples in mini-batch
         IndexType batch_size_;
 
+        double abs_biases_diff_sum_;
+        double abs_weights_diff_sum_;
+        uint64_t num_biases_diffs_;
+        uint64_t num_weights_diffs_;
+
         // Input mini batch
         const LearnFloatType* batch_input_;
 
diff --git a/src/nnue/trainer/trainer_clipped_relu.h b/src/nnue/trainer/trainer_clipped_relu.h
index f9bbd833..57e9bac4 100644
--- a/src/nnue/trainer/trainer_clipped_relu.h
+++ b/src/nnue/trainer/trainer_clipped_relu.h
@@ -70,10 +70,12 @@ namespace Eval::NNUE {
                 const IndexType batch_offset = kOutputDimensions * b;
                 for (IndexType i = 0; i < kOutputDimensions; ++i) {
                     const IndexType index = batch_offset + i;
-                    gradients_[index] = gradients[index] *
-                        (output_[index] > kZero) * (output_[index] < kOne);
+                    const bool clipped = (output_[index] <= kZero) | (output_[index] >= kOne);
+                    gradients_[index] = gradients[index] * !clipped;
+                    num_clipped_ += clipped;
                 }
             }
+            num_total_ += batch_size_ * kOutputDimensions;
 
             previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
         }
@@ -86,10 +88,17 @@ namespace Eval::NNUE {
                 &target_layer->previous_layer_, ft)),
             target_layer_(target_layer) {
 
+            reset_stats();
+        }
+
+        void reset_stats() {
             std::fill(std::begin(min_activations_), std::end(min_activations_),
                       std::numeric_limits<LearnFloatType>::max());
             std::fill(std::begin(max_activations_), std::end(max_activations_),
                       std::numeric_limits<LearnFloatType>::lowest());
+
+            num_clipped_ = 0;
+            num_total_ = 0;
         }
 
         // Check if there are any problems with learning
@@ -111,12 +120,12 @@ namespace Eval::NNUE {
                 << " , smallest max activation = " << smallest_max_activation
                 << std::endl;
 
+            out << "  - clipped " << static_cast<double>(num_clipped_) / num_total_ * 100.0 << "% of outputs"
+                << std::endl;
+
             out.unlock();
 
-            std::fill(std::begin(min_activations_), std::end(min_activations_),
-                      std::numeric_limits<LearnFloatType>::max());
-            std::fill(std::begin(max_activations_), std::end(max_activations_),
-                      std::numeric_limits<LearnFloatType>::lowest());
+            reset_stats();
         }
 
         // number of input/output dimensions
@@ -130,6 +139,9 @@ namespace Eval::NNUE {
         // number of samples in mini-batch
         IndexType batch_size_;
 
+        IndexType num_clipped_;
+        IndexType num_total_;
+
         // Trainer of the previous layer
         const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
 
diff --git a/src/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h
index ffde6eba..869ceb85 100644
--- a/src/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@@ -153,10 +153,12 @@ namespace Eval::NNUE {
                 const IndexType batch_offset = kOutputDimensions * b;
                 for (IndexType i = 0; i < kOutputDimensions; ++i) {
                     const IndexType index = batch_offset + i;
-                    gradients_[index] = gradients[index] *
-                        ((output_[index] > kZero) * (output_[index] < kOne));
+                    const bool clipped = (output_[index] <= kZero) | (output_[index] >= kOne);
+                    gradients_[index] = gradients[index] * !clipped;
+                    num_clipped_ += clipped;
                 }
             }
+            num_total_ += batch_->size() * kOutputDimensions;
 
             // Since the weight matrix updates only the columns corresponding to the features that appeared in the input,
             // Correct the learning rate and adjust the scale without using momentum
@@ -261,14 +263,6 @@ namespace Eval::NNUE {
             momentum_(0.2),
             learning_rate_scale_(1.0) {
 
-            min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
-            max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
-
-            std::fill(std::begin(min_activations_), std::end(min_activations_),
-                      std::numeric_limits<LearnFloatType>::max());
-            std::fill(std::begin(max_activations_), std::end(max_activations_),
-                      std::numeric_limits<LearnFloatType>::lowest());
-
             dequantize_parameters();
         }
 
@@ -299,6 +293,19 @@ namespace Eval::NNUE {
             }
         }
 
+        void reset_stats() {
+            min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
+            max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
+
+            std::fill(std::begin(min_activations_), std::end(min_activations_),
+                      std::numeric_limits<LearnFloatType>::max());
+            std::fill(std::begin(max_activations_), std::end(max_activations_),
+                      std::numeric_limits<LearnFloatType>::lowest());
+
+            num_clipped_ = 0;
+            num_total_ = 0;
+        }
+
         // read parameterized integer
         void dequantize_parameters() {
             for (IndexType i = 0; i < kHalfDimensions; ++i) {
@@ -314,6 +321,8 @@ namespace Eval::NNUE {
             }
 
             std::fill(std::begin(biases_diff_), std::end(biases_diff_), +kZero);
+
+            reset_stats();
         }
 
         // Set the weight corresponding to the feature that does not appear in the learning data to 0
@@ -361,12 +370,12 @@ namespace Eval::NNUE {
                 << " , smallest max activation = " << smallest_max_activation
                 << std::endl;
 
+            out << "  - clipped " << static_cast<double>(num_clipped_) / num_total_ * 100.0 << "% of outputs"
+                << std::endl;
+
             out.unlock();
 
-            std::fill(std::begin(min_activations_), std::end(min_activations_),
-                      std::numeric_limits<LearnFloatType>::max());
-            std::fill(std::begin(max_activations_), std::end(max_activations_),
-                      std::numeric_limits<LearnFloatType>::lowest());
+            reset_stats();
         }
 
         // number of input/output dimensions
@@ -391,6 +400,9 @@ namespace Eval::NNUE {
         // layer to learn
         LayerType* const target_layer_;
 
+        IndexType num_clipped_;
+        IndexType num_total_;
+
         // parameter
         alignas(kCacheLineSize) LearnFloatType biases_[kHalfDimensions];
         alignas(kCacheLineSize)