mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-25 11:36:51 +08:00
Reintroduce use_wdl.
This commit is contained in:
@@ -237,6 +237,22 @@ namespace Learner
|
||||
return scaled_;
|
||||
}
|
||||
|
||||
static Value scale_score(Value v)
|
||||
{
|
||||
// Normalize to [0.0, 1.0].
|
||||
auto normalized =
|
||||
((double)v - src_score_min_value)
|
||||
/ (src_score_max_value - src_score_min_value);
|
||||
|
||||
// Scale to [dest_score_min_value, dest_score_max_value].
|
||||
auto scaled =
|
||||
normalized
|
||||
* (dest_score_max_value - dest_score_min_value)
|
||||
+ dest_score_min_value;
|
||||
|
||||
return Value(scaled);
|
||||
}
|
||||
|
||||
template <typename ValueT>
|
||||
static auto& expected_perf_(ValueT&& v_)
|
||||
{
|
||||
@@ -247,7 +263,72 @@ namespace Learner
|
||||
return perf_;
|
||||
}
|
||||
|
||||
static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
|
||||
template <typename ValueT, typename PlyT, typename T = typename ValueT::ValueType>
|
||||
static auto& expected_perf_use_wdl_(
|
||||
ValueT& v_,
|
||||
PlyT&& ply_
|
||||
)
|
||||
{
|
||||
using namespace Learner::Autograd::UnivariateStatic;
|
||||
|
||||
// Coefficients of a 3rd order polynomial fit based on fishtest data
|
||||
// for two parameters needed to transform eval to the argument of a
|
||||
// logistic function.
|
||||
static constexpr T as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
|
||||
static constexpr T bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 };
|
||||
|
||||
// The model captures only up to 240 plies, so limit input (and rescale)
|
||||
static thread_local auto m_ = std::forward<PlyT>(ply_) / 64.0;
|
||||
|
||||
static thread_local auto a_ = (((as[0] * m_ + as[1]) * m_ + as[2]) * m_) + as[3];
|
||||
static thread_local auto b_ = (((bs[0] * m_ + bs[1]) * m_ + bs[2]) * m_) + bs[3];
|
||||
|
||||
// Return win rate in per mille
|
||||
static thread_local auto sv_ = (v_ - a_) / b_;
|
||||
static thread_local auto svn_ = (-v_ - a_) / b_;
|
||||
|
||||
static thread_local auto win_pct_ = sigmoid(sv_);
|
||||
static thread_local auto loss_pct_ = sigmoid(svn_);
|
||||
|
||||
static thread_local auto draw_pct_ = 1.0 - win_pct_ - loss_pct_;
|
||||
|
||||
static thread_local auto perf_ = win_pct_ + draw_pct_ * 0.5;
|
||||
|
||||
return perf_;
|
||||
}
|
||||
|
||||
static double expected_perf_use_wdl(
|
||||
Value v,
|
||||
int ply
|
||||
)
|
||||
{
|
||||
// Coefficients of a 3rd order polynomial fit based on fishtest data
|
||||
// for two parameters needed to transform eval to the argument of a
|
||||
// logistic function.
|
||||
static constexpr double as[] = { -8.24404295, 64.23892342, -95.73056462, 153.86478679 };
|
||||
static constexpr double bs[] = { -3.37154371, 28.44489198, -56.67657741, 72.05858751 };
|
||||
|
||||
// The model captures only up to 240 plies, so limit input (and rescale)
|
||||
auto m = ply / 64.0;
|
||||
|
||||
auto a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
|
||||
auto b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
|
||||
|
||||
// Return win rate in per mille
|
||||
auto sv = ((double)v - a) / b;
|
||||
auto svn = ((double)-v - a) / b;
|
||||
|
||||
auto win_pct = Math::sigmoid(sv);
|
||||
auto loss_pct = Math::sigmoid(svn);
|
||||
|
||||
auto draw_pct = 1.0 - win_pct - loss_pct;
|
||||
|
||||
auto perf = win_pct + draw_pct * 0.5;
|
||||
|
||||
return perf;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static ValueWithGrad<double> get_loss_noob(Value shallow, Value teacher_signal, int result, int /* ply */)
|
||||
{
|
||||
using namespace Learner::Autograd::UnivariateStatic;
|
||||
|
||||
@@ -285,11 +366,46 @@ namespace Learner
|
||||
return loss_.eval(args);
|
||||
}
|
||||
|
||||
static ValueWithGrad<double> get_loss_cross_entropy_use_wdl(
|
||||
Value shallow, Value teacher_signal, int result, int ply)
|
||||
{
|
||||
using namespace Learner::Autograd::UnivariateStatic;
|
||||
|
||||
static thread_local auto ply_ = ConstantParameter<double, 4>{};
|
||||
static thread_local auto shallow_ = VariableParameter<double, 0>{};
|
||||
static thread_local auto q_ = expected_perf_use_wdl_(shallow_, ply_);
|
||||
// We could do just this but MSVC crashes with an internal compiler error :(
|
||||
// static thread_local auto scaled_teacher_ = scale_score_(ConstantParameter<double, 1>{});
|
||||
// static thread_local auto p_ = expected_perf_use_wdl_(scaled_teacher_, ply_);
|
||||
static thread_local auto p_ = ConstantParameter<double, 1>{};
|
||||
static thread_local auto t_ = (ConstantParameter<double, 2>{} + 1.0) * 0.5;
|
||||
static thread_local auto lambda_ = ConstantParameter<double, 3>{};
|
||||
static thread_local auto loss_ = cross_entropy_(q_, p_, t_, lambda_);
|
||||
|
||||
auto args = std::tuple(
|
||||
(double)shallow,
|
||||
// This is required because otherwise MSVC crashes :(
|
||||
expected_perf_use_wdl(scale_score(teacher_signal), ply),
|
||||
(double)result,
|
||||
calculate_lambda(teacher_signal),
|
||||
(double)std::min(240, ply)
|
||||
);
|
||||
|
||||
return loss_.eval(args);
|
||||
}
|
||||
|
||||
static auto get_loss(Value shallow, Value teacher_signal, int result, int ply)
|
||||
{
|
||||
using namespace Learner::Autograd::UnivariateStatic;
|
||||
|
||||
return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
|
||||
if (use_wdl)
|
||||
{
|
||||
return get_loss_cross_entropy_use_wdl(shallow, teacher_signal, result, ply);
|
||||
}
|
||||
else
|
||||
{
|
||||
return get_loss_cross_entropy(shallow, teacher_signal, result, ply);
|
||||
}
|
||||
}
|
||||
|
||||
static auto get_loss(
|
||||
|
||||
Reference in New Issue
Block a user