Revert "use winning_percentage_wdl in learn"

This reverts commit c964e902c5.

# Conflicts:
#	src/uci.cpp
This commit is contained in:
nodchip
2020-07-06 11:07:46 +09:00
parent 3b535b5ade
commit 85c802d0b9
3 changed files with 8 additions and 59 deletions

View File

@@ -116,7 +116,6 @@ bool use_draw_in_training_data_generation = false;
bool use_draw_in_training = false;
bool use_draw_in_validation = false;
bool use_hash_in_training = true;
bool use_wdl = false;
// -----------------------------------
// write phase file
@@ -1026,16 +1025,6 @@ double sigmoid(double x)
return 1.0 / (1.0 + std::exp(-x));
}
// A function that converts the evaluation value to the winning rate [0,1]
double winning_percentage_wdl(Value value, int ply)
{
double wdl_w = UCI::win_rate_model( value, ply);
double wdl_l = UCI::win_rate_model(-value, ply);
double wdl_d = 1000.0 - wdl_w - wdl_l;
return (wdl_w + wdl_d / 2.0) / 1000.0;
}
// A function that converts the evaluation value to the winning rate [0,1]
double winning_percentage(double value)
{
@@ -1044,18 +1033,6 @@ double winning_percentage(double value)
// = sigmoid(Eval/4*ln(10))
return sigmoid(value / PawnValueEg / 4.0 * log(10.0));
}
// A function that converts the evaluation value to the winning rate [0,1]
double winning_percentage(Value value, int ply)
{
if (use_wdl) {
return winning_percentage_wdl(value, ply);
}
else {
return winning_percentage(value);
}
}
double dsigmoid(double x)
{
// Sigmoid function
@@ -1092,8 +1069,8 @@ double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
// Also, the coefficient of 1/m is unnecessary if you use the update formula that has the automatic gradient adjustment function like Adam and AdaGrad.
// Therefore, it is not necessary to save it in memory.
double p = winning_percentage(deep, psv.gamePly);
double q = winning_percentage(shallow, psv.gamePly);
double p = winning_percentage(deep);
double q = winning_percentage(shallow);
return (q - p) * dsigmoid(double(shallow) / 600.0);
}
#endif
@@ -1118,8 +1095,8 @@ double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
// = ...
// = q-p.
double p = winning_percentage(deep, psv.gamePly);
double q = winning_percentage(shallow, psv.gamePly);
double p = winning_percentage(deep);
double q = winning_percentage(shallow);
return q - p;
}
@@ -1150,8 +1127,8 @@ double calc_grad(Value deep, Value shallow , const PackedSfenValue& psv)
// elmo (WCSC27) method
// Correct with the actual game wins and losses.
const double q = winning_percentage(shallow, psv.gamePly);
const double p = winning_percentage(deep, psv.gamePly);
const double q = winning_percentage(shallow);
const double p = winning_percentage(deep);
// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
// game_result = 1,0,-1 so add 1 and divide by 2.
@@ -1173,8 +1150,8 @@ void calc_cross_entropy(Value deep, Value shallow, const PackedSfenValue& psv,
double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
double& entropy_eval, double& entropy_win, double& entropy)
{
const double p /* teacher_winrate */ = winning_percentage(deep, psv.gamePly);
const double q /* eval_winrate */ = winning_percentage(shallow, psv.gamePly);
const double p /* teacher_winrate */ = winning_percentage(deep);
const double q /* eval_winrate */ = winning_percentage(shallow);
const double t = double(psv.game_result + 1) / 2;
constexpr double epsilon = 0.000001;
@@ -2943,7 +2920,6 @@ void learn(Position&, istringstream& is)
else if (option == "use_draw_in_training") is >> use_draw_in_training;
else if (option == "use_draw_in_validation") is >> use_draw_in_validation;
else if (option == "use_hash_in_training") is >> use_hash_in_training;
else if (option == "use_wdl") is >> use_wdl;
// Discount rate
else if (option == "discount_rate") is >> discount_rate;