diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp
index 4900ff79..450a80c6 100644
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -52,7 +52,6 @@
 #include <sstream>
 #include <unordered_set>
 #include <iostream>
-#include <mutex>
 
 #if defined (_OPENMP)
 #include <omp.h>
@@ -98,65 +97,6 @@ namespace Learner
     // Using stockfish's WDL with win rate model instead of sigmoid
     static bool use_wdl = false;
 
-    struct Loss
-    {
-        double value() const
-        {
-            return m_loss.value;
-        }
-
-        double grad() const
-        {
-            return m_loss.grad;
-        }
-
-        uint64_t count() const
-        {
-            return m_count;
-        }
-
-        Loss& operator += (const ValueWithGrad<double>& rhs)
-        {
-            std::unique_lock lock(m_mutex);
-
-            m_loss += rhs.abs();
-            m_count += 1;
-
-            return *this;
-        }
-
-        Loss& operator += (const Loss& rhs)
-        {
-            std::unique_lock lock(m_mutex);
-
-            m_loss += rhs.m_loss.abs();
-            m_count += rhs.m_count;
-
-            return *this;
-        }
-
-        void reset()
-        {
-            std::unique_lock lock(m_mutex);
-
-            m_loss = ValueWithGrad<double>{ 0.0, 0.0 };
-            m_count = 0;
-        }
-
-        template <typename StreamT>
-        void print(const std::string& prefix, StreamT& s) const
-        {
-            s << "  - " << prefix << "_loss       = " << m_loss.value / (double)m_count << endl;
-            s << "  - " << prefix << "_grad_norm  = " << m_loss.grad / (double)m_count << endl;
-        }
-
-    private:
-        ValueWithGrad<double> m_loss{ 0.0, 0.0 };
-        uint64_t m_count{0};
-        std::mutex m_mutex;
-
-    };
-
     static void append_files_from_dir(
         std::vector<std::string>& filenames,
         const std::string& base_dir,
@@ -714,7 +654,6 @@ namespace Learner
         const auto thread_id = th.thread_idx();
         auto& pos = th.rootPos;
 
-        Loss local_loss_sum{};
         std::vector<StateInfo, AlignedAllocator<StateInfo>> state(MAX_PLY);
 
         while(!stop_flag)
@@ -761,17 +700,8 @@ namespace Learner
             auto pos_add_grad = [&]() {
 
                 // Evaluation value of deep search
-                const auto deep_value = (Value)ps.score;
-
                 const Value shallow_value = Eval::evaluate(pos);
 
-                const auto loss = get_loss(
-                    deep_value,
-                    (rootColor == pos.side_to_move()) ? shallow_value : -shallow_value,
-                    ps);
-
-                local_loss_sum += loss;
-
                 Eval::NNUE::add_example(pos, rootColor, shallow_value, ps, 1.0);
             };
 
@@ -809,8 +739,6 @@ namespace Learner
             // Since we have reached the end phase of PV, add the slope here.
             pos_add_grad();
         }
-
-        learn_loss_sum += local_loss_sum;
     }
 
     void LearnerThink::update_weights(const PSVector& psv, uint64_t epoch)
@@ -819,7 +747,8 @@ namespace Learner
         // should be no real issues happening since
         // the read/write phases are isolated.
         atomic_thread_fence(memory_order_seq_cst);
-        Eval::NNUE::update_parameters(Threads, epoch, params.verbose, params.learning_rate, params.max_grad, get_loss);
+        learn_loss_sum += Eval::NNUE::update_parameters(
+            Threads, epoch, params.verbose, params.learning_rate, params.max_grad, get_loss);
         atomic_thread_fence(memory_order_seq_cst);
 
         if (++save_count * params.mini_batch_size >= params.eval_save_interval)
@@ -899,11 +828,11 @@ namespace Learner
 
         if (psv.size() && test_loss_sum.count() > 0)
         {
-            test_loss_sum.print("test", out);
+            test_loss_sum.print("val", out);
 
             if (learn_loss_sum.count() > 0)
             {
-                learn_loss_sum.print("learn", out);
+                learn_loss_sum.print("train", out);
             }
 
             out << "  - norm = " << sum_norm << endl;
diff --git a/src/learn/learn.h b/src/learn/learn.h
index 4e8d8a02..552096b2 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -40,6 +40,8 @@ using LearnFloatType = float;
 
 #include <sstream>
 #include <vector>
+#include <mutex>
+#include <string>
 
 namespace Learner
 {
@@ -69,6 +71,72 @@ namespace Learner
     void learn(std::istringstream& is);
 
     using CalcLossFunc = ValueWithGrad<double>(Value, Value, int, int);
+
+    struct Loss
+    {
+        double value() const
+        {
+            return m_loss.value;
+        }
+
+        double grad() const
+        {
+            return m_loss.grad;
+        }
+
+        uint64_t count() const
+        {
+            return m_count;
+        }
+
+        Loss() = default;
+
+        Loss(const Loss& other) :
+            m_loss(other.m_loss),
+            m_count(other.m_count)
+        {
+        }
+
+        Loss& operator += (const ValueWithGrad<double>& rhs)
+        {
+            std::unique_lock lock(m_mutex);
+
+            m_loss += rhs.abs();
+            m_count += 1;
+
+            return *this;
+        }
+
+        Loss& operator += (const Loss& rhs)
+        {
+            std::unique_lock lock(m_mutex);
+
+            m_loss += rhs.m_loss.abs();
+            m_count += rhs.m_count;
+
+            return *this;
+        }
+
+        void reset()
+        {
+            std::unique_lock lock(m_mutex);
+
+            m_loss = ValueWithGrad<double>{ 0.0, 0.0 };
+            m_count = 0;
+        }
+
+        template <typename StreamT>
+        void print(const std::string& prefix, StreamT& s) const
+        {
+            s << "  - " << prefix << "_loss       = " << m_loss.value / (double)m_count << std::endl;
+            s << "  - " << prefix << "_grad_norm  = " << m_loss.grad / (double)m_count << std::endl;
+        }
+
+    private:
+        ValueWithGrad<double> m_loss{ 0.0, 0.0 };
+        uint64_t m_count{0};
+        std::mutex m_mutex;
+    };
 }
 
 #endif // ifndef _LEARN_H_
diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
index 8c28e4f4..3061a4f4 100644
--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -190,7 +190,7 @@ namespace Eval::NNUE {
     }
 
     // update the evaluation function parameters
-    void update_parameters(
+    Learner::Loss update_parameters(
         ThreadPool& thread_pool,
         uint64_t epoch,
         bool verbose,
@@ -212,9 +212,12 @@ namespace Eval::NNUE {
 
         bool collect_stats = verbose;
 
+        Learner::Loss loss_sum{};
+
         std::vector<double> abs_eval_diff_sum_local(thread_pool.size(), 0.0);
         std::vector<double> abs_discrete_eval_sum_local(thread_pool.size(), 0.0);
         std::vector<double> gradient_norm_local(thread_pool.size(), 0.0);
+        std::vector<Learner::Loss> loss_sum_local(thread_pool.size());
 
         auto prev_batch_begin = examples.end();
         while ((long)(prev_batch_begin - examples.begin()) >= (long)batch_size) {
@@ -237,11 +240,11 @@ namespace Eval::NNUE {
                             e.sign * network_output[b] * kPonanzaConstant));
                         const auto discrete = e.sign * e.discrete_nn_eval;
                         const auto& psv = e.psv;
-                        const auto loss = calc_loss(shallow, (Value)psv.score, psv.game_result, psv.gamePly);
-                        const double gradient = std::clamp(
+                        auto loss = calc_loss(shallow, (Value)psv.score, psv.game_result, psv.gamePly);
+                        loss.grad = std::clamp(
                             loss.grad * e.sign * kPonanzaConstant * e.weight, -max_grad, max_grad);
-                        gradients[b] = static_cast<LearnFloatType>(gradient);
-
+                        gradients[b] = static_cast<LearnFloatType>(loss.grad);
+                        loss_sum_local[thread_id] += loss;
 
                         // The discrete eval will only be valid before first backpropagation,
                         // that is only for the first batch.
@@ -250,7 +253,7 @@ namespace Eval::NNUE {
                         {
                             abs_eval_diff_sum_local[thread_id] += std::abs(discrete - shallow);
                             abs_discrete_eval_sum_local[thread_id] += std::abs(discrete);
-                            gradient_norm_local[thread_id] += std::abs(gradient);
+                            gradient_norm_local[thread_id] += std::abs(loss.grad);
                         }
                     }
 
@@ -277,9 +280,7 @@ namespace Eval::NNUE {
             abs_eval_diff_sum = std::accumulate(abs_eval_diff_sum_local.begin(), abs_eval_diff_sum_local.end(), 0.0);
             abs_discrete_eval_sum = std::accumulate(abs_discrete_eval_sum_local.begin(), abs_discrete_eval_sum_local.end(), 0.0);
             gradient_norm = std::accumulate(gradient_norm_local.begin(), gradient_norm_local.end(), 0.0);
-        }
 
-        if (verbose) {
             const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size;
             const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size;
 
@@ -300,6 +301,13 @@ namespace Eval::NNUE {
         }
 
         send_messages({{"quantize_parameters"}});
+
+        for(auto& loss : loss_sum_local)
+        {
+            loss_sum += loss;
+        }
+
+        return loss_sum;
     }
 
     // Check if there are any problems with learning
diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
index 5beca0a7..3d9f5b31 100644
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -33,7 +33,7 @@ namespace Eval::NNUE {
         double weight);
 
     // update the evaluation function parameters
-    void update_parameters(
+    Learner::Loss update_parameters(
         ThreadPool& thread_pool,
         uint64_t epoch,
         bool verbose,