diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index 109a43ea..e3bfe3a4 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -213,9 +213,9 @@ namespace Learner static thread_local auto outcome_loss_ = -(t_ * log(q_) + (1.0 - t_) * log(1.0 - q_)); static thread_local auto result_ = lambda_ * teacher_loss_ + (1.0 - lambda_) * outcome_loss_; static thread_local auto entropy_ = lambda_ * teacher_entropy_ + (1.0 - lambda_) * outcome_entropy_; - static thread_local auto loss_ = result_ - entropy_; + static thread_local auto cross_entropy_ = result_ - entropy_; - return loss_; + return cross_entropy_; } template @@ -247,23 +247,27 @@ namespace Learner return perf_; } - static ValueWithGrad get_loss(Value shallow, Value teacher_signal, int result, int ply) + static ValueWithGrad get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */) { using namespace Learner::Autograd::UnivariateStatic; - /* - auto q_ = sigmoid(VariableParameter{} * winning_probability_coefficient); - auto p_ = sigmoid(ConstantParameter{} * winning_probability_coefficient); - auto t_ = (ConstantParameter{} + 1.0) * 0.5; - auto lambda_ = ConstantParameter{}; - auto loss_ = pow(lambda_ * (q_ - p_) + (1.0 - lambda_) * (q_ - t_), 2.0); - */ + static thread_local auto q_ = VariableParameter{}; + static thread_local auto p_ = ConstantParameter{}; + static thread_local auto loss_ = pow(q_ - p_, 2.0) * (1.0 / (2400.0 * 2.0 * 600.0)); - /* - auto q_ = VariableParameter{}; - auto p_ = ConstantParameter{}; - auto loss_ = pow(q_ - p_, 2.0) * (1.0 / (2400.0 * 2.0 * 600.0)); - */ + auto args = std::tuple( + (double)shallow, + (double)teacher_signal, + (double)result, + calculate_lambda(teacher_signal) + ); + + return loss_.eval(args); + } + + static ValueWithGrad get_loss_cross_entropy(Value shallow, Value teacher_signal, int result, int /* ply */) + { + using namespace Learner::Autograd::UnivariateStatic; static thread_local auto q_ = expected_perf_(VariableParameter{}); static thread_local auto p_ = expected_perf_(scale_score_(ConstantParameter{})); @@ -281,6 +285,13 @@ namespace Learner return loss_.eval(args); } + static auto get_loss(Value shallow, Value teacher_signal, int result, int ply) + { + using namespace Learner::Autograd::UnivariateStatic; + + return get_loss_cross_entropy(shallow, teacher_signal, result, ply); + } + static auto get_loss( Value teacher_signal, Value shallow,