mirror of
https://github.com/HChaZZY/Stockfish.git
synced 2025-12-25 19:46:55 +08:00
280 lines
9.2 KiB
C++
280 lines
9.2 KiB
C++
#ifndef _LEARN_H_
|
|
#define _LEARN_H_
|
|
|
|
#if defined(EVAL_LEARN)
|
|
|
|
#include <vector>
|
|
|
|
// =====================
|
|
// Settings for learning
|
|
// =====================
|
|
|
|
// If you select one of the following, the details after that will be automatically selected.
|
|
// If you don't select any of them, you need to set the subsequent details one by one.
|
|
|
|
// Learning setting by elmo method. This is the default setting.
|
|
// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
|
|
#define LEARN_ELMO_METHOD
|
|
|
|
|
|
// ----------------------
|
|
// update formula
|
|
// ----------------------
|
|
|
|
// Ada Grad. Recommended because it is stable.
|
|
// #define ADA_GRAD_UPDATE
|
|
|
|
// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
|
|
// #define SGD_UPDATE
|
|
|
|
// ----------------------
|
|
// Settings for learning
|
|
// ----------------------
|
|
|
|
// mini-batch size.
|
|
// Calculate the gradient by combining this number of phases.
|
|
// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
|
|
// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
|
|
// I don't think you need to change this value in most cases.
|
|
|
|
#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
|
|
|
|
// The number of phases to read from the file at one time. After reading this much, shuffle.
|
|
// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
|
|
// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
|
|
|
|
#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
|
|
|
|
// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
|
|
// Needless to say, the longer the saving interval, the shorter the learning time.
|
|
// Folder name is incremented for each save like 0/, 1/, 2/...
|
|
// By default, once every 1 billion phases.
|
|
#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
|
|
|
|
|
|
// ----------------------
|
|
// Select the objective function
|
|
// ----------------------
|
|
|
|
// The objective function is the sum of squares of the difference in winning percentage
|
|
// See learner.cpp for more information.
|
|
|
|
//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
|
|
|
|
// Objective function is cross entropy
|
|
// See learner.cpp for more information.
|
|
// So-called ordinary "rag cloth squeezer"
|
|
//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
|
|
|
|
// A version in which the objective function is cross entropy, but the win rate function is not passed
|
|
// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
|
|
|
|
// elmo (WCSC27) method
|
|
// #define LOSS_FUNCTION_IS_ELMO_METHOD
|
|
|
|
// ※ Other things may be added.
|
|
|
|
|
|
// ----------------------
|
|
// debug settings for learning
|
|
// ----------------------
|
|
|
|
// Reduce the output of rmse during learning to 1 for this number of times.
|
|
// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
|
|
#define LEARN_RMSE_OUTPUT_INTERVAL 1
|
|
|
|
|
|
// ----------------------
|
|
// learning from zero vector
|
|
// ----------------------
|
|
|
|
// Start learning the evaluation function parameters from the zero vector.
|
|
// Initialize to zero, generate a game, learn from zero vector,
|
|
// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
|
|
// (very time consuming)
|
|
|
|
//#define RESET_TO_ZERO_VECTOR
|
|
|
|
|
|
// ----------------------
|
|
// Floating point for learning
|
|
// ----------------------
|
|
|
|
// If this is set to double, the calculation accuracy will be higher, but the weight array entangled memory will be doubled.
|
|
// Currently, if this is float, the weight array is 4.5 times the size of the evaluation function file. (About 4.5GB with KPPT)
|
|
// Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.
|
|
|
|
// when using float
|
|
typedef float LearnFloatType;
|
|
|
|
// when using double
|
|
//typedef double LearnFloatType;
|
|
|
|
// when using float16
|
|
//#include "half_float.h"
|
|
//typedef HalfFloat::float16 LearnFloatType;
|
|
|
|
// ----------------------
|
|
// save memory
|
|
// ----------------------
|
|
|
|
// Use a triangular array for the Weight array (of which is KPP) to save memory.
|
|
// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
|
|
|
|
#define USE_TRIANGLE_WEIGHT_ARRAY
|
|
|
|
// ----------------------
|
|
// dimension down
|
|
// ----------------------
|
|
|
|
// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
|
|
// All on by default.
|
|
|
|
// Dimension reduction using mirror and inverse for KK. (Unclear effect)
|
|
// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
|
|
#define USE_KK_MIRROR_WRITE
|
|
#define USE_KK_INVERSE_WRITE
|
|
|
|
// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
|
|
// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
|
|
#define USE_KKP_MIRROR_WRITE
|
|
#define USE_KKP_INVERSE_WRITE
|
|
|
|
// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
|
|
// KPP has no inverse. (Because there is only K on the front side)
|
|
#define USE_KPP_MIRROR_WRITE
|
|
|
|
// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
|
|
// KPPP has no inverse. (Because there is only K on the front side)
|
|
#define USE_KPPP_MIRROR_WRITE
|
|
|
|
// Reduce the dimension by KPP for learning the KKPP component.
|
|
// Learning is very slow.
|
|
// Do not use as it is not debugged.
|
|
//#define USE_KKPP_LOWER_DIM
|
|
|
|
|
|
// ======================
|
|
// Settings for creating teacher phases
|
|
// ======================
|
|
|
|
// ----------------------
|
|
// write out the draw
|
|
// ----------------------
|
|
|
|
// When you reach a draw, write it out as a teacher position
|
|
// It's subtle whether it's better to do this.
|
|
// #define LEARN_GENSFEN_USE_DRAW_RESULT
|
|
|
|
|
|
// ======================
|
|
// configure
|
|
// ======================
|
|
|
|
// ----------------------
|
|
// Learning with the method of elmo (WCSC27)
|
|
// ----------------------
|
|
|
|
#if defined( LEARN_ELMO_METHOD )
|
|
#define LOSS_FUNCTION_IS_ELMO_METHOD
|
|
#define ADA_GRAD_UPDATE
|
|
#endif
|
|
|
|
// Character string according to update formula. (Output for debugging.)
|
|
// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory.
|
|
#if defined(ADA_GRAD_UPDATE)
|
|
#define LEARN_UPDATE "AdaGrad"
|
|
#elif defined(SGD_UPDATE)
|
|
#define LEARN_UPDATE "SGD"
|
|
#endif
|
|
|
|
#if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
|
|
#define LOSS_FUNCTION "WINNING_PERCENTAGE"
|
|
#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY)
|
|
#define LOSS_FUNCTION "CROSS_ENTOROPY"
|
|
#elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE)
|
|
#define LOSS_FUNCTION "CROSS_ENTOROPY_FOR_VALUE"
|
|
#elif defined(LOSS_FUNCTION_IS_ELMO_METHOD)
|
|
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
|
|
#endif
|
|
|
|
// ----------------------
|
|
// Definition of struct used in Learner
|
|
// ----------------------
|
|
#include "../position.h"
|
|
|
|
namespace Learner
|
|
{
|
|
//Structure in which PackedSfen and evaluation value are integrated
|
|
// If you write different contents for each option, it will be a problem when reusing the teacher game
|
|
// For the time being, write all the following members regardless of the options.
|
|
struct PackedSfenValue
|
|
{
|
|
// phase
|
|
PackedSfen sfen;
|
|
|
|
// Evaluation value returned from Learner::search()
|
|
int16_t score;
|
|
|
|
// PV first move
|
|
// Used when finding the match rate with the teacher
|
|
uint16_t move;
|
|
|
|
// Trouble of the phase from the initial phase.
|
|
uint16_t gamePly;
|
|
|
|
// 1 if the player on this side ultimately wins the game. -1 if you are losing.
|
|
// 0 if a draw is reached.
|
|
// The draw is in the teacher position generation command gensfen,
|
|
// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
|
|
int8_t game_result;
|
|
|
|
// When exchanging the file that wrote the teacher aspect with other people
|
|
//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
|
|
uint8_t padding;
|
|
|
|
// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
|
|
};
|
|
|
|
// Type that returns the reading line and the evaluation value at that time
|
|
// Used in Learner::search(), Learner::qsearch().
|
|
typedef std::pair<Value, std::vector<Move> > ValueAndPV;
|
|
|
|
// Phase array: PSVector stands for packed sfen vector.
|
|
typedef std::vector<PackedSfenValue> PSVector;
|
|
|
|
// So far, only Yaneura King 2018 Otafuku has this stub
|
|
// This stub is required if EVAL_LEARN is defined.
|
|
extern Learner::ValueAndPV search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
|
|
extern Learner::ValueAndPV qsearch(Position& pos);
|
|
|
|
double calc_grad(Value shallow, const PackedSfenValue& psv);
|
|
|
|
void convert_bin_from_pgn_extract(
|
|
const std::vector<std::string>& filenames,
|
|
const std::string& output_file_name,
|
|
const bool pgn_eval_side_to_move,
|
|
const bool convert_no_eval_fens_as_score_zero);
|
|
|
|
void convert_bin(
|
|
const std::vector<std::string>& filenames,
|
|
const std::string& output_file_name,
|
|
const int ply_minimum,
|
|
const int ply_maximum,
|
|
const int interpolate_eval,
|
|
const int src_score_min_value,
|
|
const int src_score_max_value,
|
|
const int dest_score_min_value,
|
|
const int dest_score_max_value,
|
|
const bool check_invalid_fen,
|
|
const bool check_illegal_move);
|
|
|
|
void convert_plain(
|
|
const std::vector<std::string>& filenames,
|
|
const std::string& output_file_name);
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif // ifndef _LEARN_H_
|