Reintroduce use_wdl.

This commit is contained in:
Tomasz Sobczyk
2020-11-30 21:17:44 +01:00
committed by nodchip
parent cf6bc7ecaf
commit 99cb869db3

View File

@@ -237,6 +237,22 @@ namespace Learner
return scaled_;
}
static Value scale_score(Value v)
{
// Normalize to [0.0, 1.0].
auto normalized =
((double)v - src_score_min_value)
/ (src_score_max_value - src_score_min_value);
// Scale to [dest_score_min_value, dest_score_max_value].
auto scaled =
normalized
* (dest_score_max_value - dest_score_min_value)
+ dest_score_min_value;
return Value(scaled);
}
template <typename ValueT>
static auto& expected_perf_(ValueT&& v_)
{
@@ -247,7 +263,72 @@ namespace Learner
return perf_;
}
static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
template <typename ValueT, typename PlyT, typename T = typename ValueT::ValueType>
static auto& expected_perf_use_wdl_(
ValueT& v_,
PlyT&& ply_
)
{
using namespace Learner::Autograd::UnivariateStatic;
// Coefficients of a 3rd order polynomial fit based on fishtest data
// for two parameters needed to transform eval to the argument of a
// logistic function.
static constexpr T as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
static constexpr T bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 };
// The model captures only up to 240 plies, so limit input (and rescale)
static thread_local auto m_ = std::forward<PlyT>(ply_) / 64.0;
static thread_local auto a_ = (((as[0] * m_ + as[1]) * m_ + as[2]) * m_) + as[3];
static thread_local auto b_ = (((bs[0] * m_ + bs[1]) * m_ + bs[2]) * m_) + bs[3];
// Return win rate in per mille
static thread_local auto sv_ = (v_ - a_) / b_;
static thread_local auto svn_ = (-v_ - a_) / b_;
static thread_local auto win_pct_ = sigmoid(sv_);
static thread_local auto loss_pct_ = sigmoid(svn_);
static thread_local auto draw_pct_ = 1.0 - win_pct_ - loss_pct_;
static thread_local auto perf_ = win_pct_ + draw_pct_ * 0.5;
return perf_;
}
static double expected_perf_use_wdl(
Value v,
int ply
)
{
// Coefficients of a 3rd order polynomial fit based on fishtest data
// for two parameters needed to transform eval to the argument of a
// logistic function.
static constexpr double as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
static constexpr double bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 };
// The model captures only up to 240 plies, so limit input (and rescale)
auto m = ply / 64.0;
auto a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
auto b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
// Return win rate in per mille
auto sv = ((double)v - a) / b;
auto svn = ((double)-v - a) / b;
auto win_pct = Math::sigmoid(sv);
auto loss_pct = Math::sigmoid(svn);
auto draw_pct = 1.0 - win_pct - loss_pct;
auto perf = win_pct + draw_pct * 0.5;
return perf;
}
[[maybe_unused]] static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
{
using namespace Learner::Autograd::UnivariateStatic;
@@ -285,11 +366,46 @@ namespace Learner
return loss_.eval(args);
}
static ValueWithGrad<double> get_loss_cross_entropy_use_wdl(
Value shallow, Value teacher_signal, int result, int ply)
{
using namespace Learner::Autograd::UnivariateStatic;
static thread_local auto ply_ = ConstantParameter<double, 4>{};
static thread_local auto shallow_ = VariableParameter<double, 0>{};
static thread_local auto q_ = expected_perf_use_wdl_(shallow_, ply_);
// We could do just this but MSVC crashes with an internal compiler error :(
// static thread_local auto scaled_teacher_ = scale_score_(ConstantParameter<double, 1>{});
// static thread_local auto p_ = expected_perf_use_wdl_(scaled_teacher_, ply_);
static thread_local auto p_ = ConstantParameter<double, 1>{};
static thread_local auto t_ = (ConstantParameter<double, 2>{} + 1.0) * 0.5;
static thread_local auto lambda_ = ConstantParameter<double, 3>{};
static thread_local auto loss_ = cross_entropy_(q_, p_, t_, lambda_);
auto args = std::tuple(
(double)shallow,
// This is required because otherwise MSVC crashes :(
expected_perf_use_wdl(scale_score(teacher_signal), ply),
(double)result,
calculate_lambda(teacher_signal),
(double)std::min(240, ply)
);
return loss_.eval(args);
}
static auto get_loss(Value shallow, Value teacher_signal, int result, int ply)
{
using namespace Learner::Autograd::UnivariateStatic;
return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
if (use_wdl)
{
return get_loss_cross_entropy_use_wdl(shallow, teacher_signal, result, ply);
}
else
{
return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
}
}
static auto get_loss(