diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index e3d2fecf..a56ac15f 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -432,6 +432,8 @@ namespace Learner // If true, do not dig the folder. bool save_only_once; + bool verbose; + double newbob_decay; int newbob_num_trials; uint64_t auto_lr_drop; @@ -644,7 +646,7 @@ namespace Learner // should be no real issues happening since // the read/write phases are isolated. atomic_thread_fence(memory_order_seq_cst); - Eval::NNUE::update_parameters(); + Eval::NNUE::update_parameters(epoch, verbose); atomic_thread_fence(memory_order_seq_cst); if (++save_count * mini_batch_size >= eval_save_interval) @@ -943,6 +945,8 @@ namespace Learner // Turn on if you want to pass a pre-shuffled file. bool no_shuffle = false; + bool verbose = false; + global_learning_rate = 1.0; // elmo lambda @@ -1070,6 +1074,7 @@ namespace Learner UCI::setoption("PruneAtShallowDepth", "false"); UCI::setoption("EnableTranspositionTable", "false"); } + else if (option == "verbose") verbose = true; else { cout << "Unknown option: " << option << ". Ignoring.\n"; @@ -1191,6 +1196,8 @@ namespace Learner learn_think.mini_batch_size = mini_batch_size; learn_think.validation_set_file_name = validation_set_file_name; + learn_think.verbose = verbose; + cout << "init done." << endl; // Start learning. diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp index 581e7928..e0d2351d 100644 --- a/src/nnue/evaluate_nnue_learner.cpp +++ b/src/nnue/evaluate_nnue_learner.cpp @@ -173,7 +173,7 @@ namespace Eval::NNUE { } // update the evaluation function parameters - void update_parameters() { + void update_parameters(uint64_t epoch, bool verbose) { assert(batch_size > 0); const auto learning_rate = static_cast( @@ -186,7 +186,7 @@ namespace Eval::NNUE { double abs_discrete_eval_sum = 0.0; double gradient_norm = 0.0; - bool is_first_batch = true; + bool collect_stats = verbose; while (examples.size() >= batch_size) { std::vector batch(examples.end() - batch_size, examples.end()); @@ -207,7 +207,7 @@ namespace Eval::NNUE { // The discrete eval will only be valid before first backpropagation, // that is only for the first batch. // Similarily we want only gradients from one batch. - if (is_first_batch) + if (collect_stats) { abs_eval_diff_sum += std::abs(discrete - shallow); abs_discrete_eval_sum += std::abs(discrete); @@ -217,19 +217,22 @@ namespace Eval::NNUE { trainer->backpropagate(gradients.data(), learning_rate); - is_first_batch = false; + collect_stats = false; } - const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size; - const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size; + if (verbose) { + const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size; + const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size; - std::cout << "INFO (update_weights):" - << " avg_abs(trainer_eval-nnue_eval) = " << avg_abs_eval_diff - << " , avg_abs(nnue_eval) = " << avg_abs_discrete_eval - << " , avg_relative_error = " << avg_abs_eval_diff / avg_abs_discrete_eval - << " , batch_size = " << batch_size - << " , grad_norm = " << gradient_norm - << std::endl; + std::cout << "INFO (update_parameters):" + << " epoch = " << epoch + << " , avg_abs(trainer_eval-nnue_eval) = " << avg_abs_eval_diff + << " , avg_abs(nnue_eval) = " << avg_abs_discrete_eval + << " , avg_relative_error = " << avg_abs_eval_diff / avg_abs_discrete_eval + << " , batch_size = " << batch_size + << " , grad_norm = " << gradient_norm + << std::endl; + } send_messages({{"quantize_parameters"}}); } diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h index 48ab31b9..03a23c83 100644 --- a/src/nnue/evaluate_nnue_learner.h +++ b/src/nnue/evaluate_nnue_learner.h @@ -27,7 +27,7 @@ namespace Eval::NNUE { double weight); // update the evaluation function parameters - void update_parameters(); + void update_parameters(uint64_t epoch, bool verbose); // Check if there are any problems with learning void check_health();