From f3a158725d573753cf4b81fc5866c0f3bbdb1e88 Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:07:09 +0900 Subject: [PATCH] Removed SGD_UPDATE macro. --- src/learn/learn.h | 12 --------- src/learn/learning_tools.h | 51 ++------------------------------------ 2 files changed, 2 insertions(+), 61 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 8fb6217f..91b40213 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -5,14 +5,6 @@ #include -// ---------------------- -// update formula -// ---------------------- - -// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is... -// #define SGD_UPDATE - - // ---------------------- // Select the objective function // ---------------------- @@ -134,10 +126,6 @@ typedef float LearnFloatType; #define LOSS_FUNCTION_IS_ELMO_METHOD -#if defined(SGD_UPDATE) -#define LEARN_UPDATE "SGD" -#endif - #if defined(LOSS_FUNCTION_IS_WINNING_PERCENTAGE) #define LOSS_FUNCTION "WINNING_PERCENTAGE" #elif defined(LOSS_FUNCTION_IS_CROSS_ENTOROPY) diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h index 854133e4..348105b6 100644 --- a/src/learn/learning_tools.h +++ b/src/learn/learning_tools.h @@ -4,13 +4,12 @@ // A set of machine learning tools related to the weight array used for machine learning of evaluation functions #include "learn.h" + #if defined (EVAL_LEARN) -#include -#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE) #include "../misc.h" // PRNG , my_insertion_sort -#endif +#include #include // std::sqrt() namespace EvalLearningTools @@ -29,14 +28,6 @@ namespace EvalLearningTools // cumulative value of one mini-batch gradient LearnFloatType g = LearnFloatType(0); - // When ADA_GRAD_UPDATE. LearnFloatType == float, - // total 4*2 + 4*2 + 1*2 = 18 bytes - // It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB. - // However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so - // Specify pragma pack(2). - - // For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes. - // Learning rate η(eta) such as AdaGrad. // It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called. // The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch. @@ -76,44 +67,6 @@ namespace EvalLearningTools template void updateFV(T& v) { updateFV(v, 1.0); } -#if defined(SGD_UPDATE) - - // See only the sign of the gradient Update with SGD - // When executing this function, the value of g and the member do not change - // Guaranteed by the caller. It does not have to be an atomic operation. - template - void updateFV(T & v , double k) - { - if (g == 0) - return; - - // See only the sign of g and update. - // If g <0, add v a little. - // If g> 0, subtract v slightly. - - // Since we only add integers, no decimal part is required. - - // It's a good idea to move around 0-5. - // It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1), - // Pop_count() it. At this time, it has a binomial distribution. - //int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31)); - // → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good. - int16_t diff = 1; - - double V = v; - if (g > 0.0) - V-= diff; - else - V+= diff; - - V = (std::min)((double)(std::numeric_limits::max)(), V); - V = (std::max)((double)(std::numeric_limits::min)(), V); - - v = (T)V; - } - -#endif - // grad setting template void set_grad(const T& g_) { g = g_; }