diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp
index 7de359ef..e3d2fecf 100644
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -599,19 +599,16 @@ namespace Learner
                 // Evaluation value of deep search
                 const auto deep_value = (Value)ps.score;
 
-                const Value shallow_value =
-                    (rootColor == pos.side_to_move())
-                    ? Eval::evaluate(pos)
-                    : -Eval::evaluate(pos);
+                const Value shallow_value = Eval::evaluate(pos);
 
                 const auto loss = calc_cross_entropy(
                     deep_value,
-                    shallow_value,
+                    (rootColor == pos.side_to_move()) ? shallow_value : -shallow_value,
                     ps);
 
                 local_loss_sum += loss;
 
-                Eval::NNUE::add_example(pos, rootColor, ps, 1.0);
+                Eval::NNUE::add_example(pos, rootColor, shallow_value, ps, 1.0);
             };
 
             if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
index 54525fe4..581e7928 100644
--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -118,8 +118,12 @@ namespace Eval::NNUE {
     }
 
     // Add 1 sample of learning data
-    void add_example(Position& pos, Color rootColor,
-                    const Learner::PackedSfenValue& psv, double weight) {
+    void add_example(
+        Position& pos,
+        Color rootColor,
+        Value discrete_nn_eval,
+        const Learner::PackedSfenValue& psv,
+        double weight) {
 
         Example example;
         if (rootColor == pos.side_to_move()) {
@@ -128,6 +132,7 @@ namespace Eval::NNUE {
             example.sign = -1;
         }
 
+        example.discrete_nn_eval = discrete_nn_eval;
         example.psv = psv;
         example.weight = weight;
 
@@ -176,6 +181,13 @@ namespace Eval::NNUE {
 
         std::lock_guard<std::mutex> lock(examples_mutex);
         std::shuffle(examples.begin(), examples.end(), rng);
+
+        double abs_eval_diff_sum = 0.0;
+        double abs_discrete_eval_sum = 0.0;
+        double gradient_norm = 0.0;
+
+        bool is_first_batch = true;
+
         while (examples.size() >= batch_size) {
             std::vector<Example> batch(examples.end() - batch_size, examples.end());
             examples.resize(examples.size() - batch_size);
@@ -186,13 +198,39 @@ namespace Eval::NNUE {
             for (std::size_t b = 0; b < batch.size(); ++b) {
                 const auto shallow = static_cast<Value>(round<std::int32_t>(
                     batch[b].sign * network_output[b] * kPonanzaConstant));
+                const auto discrete = batch[b].sign * batch[b].discrete_nn_eval;
                 const auto& psv = batch[b].psv;
                 const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
                 gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+
+
+                // The discrete eval will only be valid before first backpropagation,
+                // that is only for the first batch.
+                // Similarily we want only gradients from one batch.
+                if (is_first_batch)
+                {
+                    abs_eval_diff_sum += std::abs(discrete - shallow);
+                    abs_discrete_eval_sum += std::abs(discrete);
+                    gradient_norm += std::abs(gradient);
+                }
             }
 
             trainer->backpropagate(gradients.data(), learning_rate);
+
+            is_first_batch = false;
         }
+
+        const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size;
+        const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size;
+
+        std::cout << "INFO (update_weights):"
+            << " avg_abs(trainer_eval-nnue_eval) = " << avg_abs_eval_diff
+            << " , avg_abs(nnue_eval) = " << avg_abs_discrete_eval
+            << " , avg_relative_error = " << avg_abs_eval_diff / avg_abs_discrete_eval
+            << " , batch_size = " << batch_size
+            << " , grad_norm = " << gradient_norm
+            << std::endl;
+
         send_messages({{"quantize_parameters"}});
     }
 
diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
index 431fb02e..48ab31b9 100644
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -22,6 +22,7 @@ namespace Eval::NNUE {
     void add_example(
         Position& pos,
         Color rootColor,
+        Value discrete_nn_eval,
     	const Learner::PackedSfenValue& psv,
         double weight);
 
diff --git a/src/nnue/trainer/trainer.h b/src/nnue/trainer/trainer.h
index 763bd5c8..973bc898 100644
--- a/src/nnue/trainer/trainer.h
+++ b/src/nnue/trainer/trainer.h
@@ -68,6 +68,7 @@ namespace Eval::NNUE {
     struct Example {
         std::vector<TrainingFeature> training_features[2];
         Learner::PackedSfenValue psv;
+        Value discrete_nn_eval;
         int sign;
         double weight;
     };