From 0271d707759117af6557beb93319aa51c07280aa Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 9 Sep 2020 00:01:53 +0900 Subject: [PATCH] Removed ADA_GRAD_UPDATE macro. --- src/learn/learn.h | 10 +------ src/learn/learning_tools.h | 54 +------------------------------------- 2 files changed, 2 insertions(+), 62 deletions(-) diff --git a/src/learn/learn.h b/src/learn/learn.h index 7285f61a..8fb6217f 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -9,9 +9,6 @@ // update formula // ---------------------- -// Ada Grad. Recommended because it is stable. -// #define ADA_GRAD_UPDATE - // SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is... // #define SGD_UPDATE @@ -136,13 +133,8 @@ typedef float LearnFloatType; // ---------------------- #define LOSS_FUNCTION_IS_ELMO_METHOD -#define ADA_GRAD_UPDATE -// Character string according to update formula. (Output for debugging.) -// Implemented various update expressions, but concluded that AdaGrad is the best in terms of speed and memory. -#if defined(ADA_GRAD_UPDATE) -#define LEARN_UPDATE "AdaGrad" -#elif defined(SGD_UPDATE) +#if defined(SGD_UPDATE) #define LEARN_UPDATE "SGD" #endif diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h index 3c4be08a..854133e4 100644 --- a/src/learn/learning_tools.h +++ b/src/learn/learning_tools.h @@ -76,59 +76,7 @@ namespace EvalLearningTools template void updateFV(T& v) { updateFV(v, 1.0); } -#if defined (ADA_GRAD_UPDATE) - - // Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1 - // Keep the small value as a marker. - const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128); - - // What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory, - // Since it is doubtful in accuracy and the visibility is bad, it was abolished. - LearnFloatType v0 = LearnFloatType(V0_NOT_INIT); - - // AdaGrad g2 - LearnFloatType g2 = LearnFloatType(0); - - // update with AdaGrad - // When executing this function, the value of g and the member do not change - // Guaranteed by the caller. It does not have to be an atomic operation. - // k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc. - template - void updateFV(T& v,double k) - { - // AdaGrad update formula - // Gradient vector is g, vector to be updated is v, η(eta) is a constant, - // g2 = g2 + g^2 - // v = v - ηg/sqrt(g2) - - constexpr double epsilon = 0.000001; - - if (g == LearnFloatType(0)) - return; - - g2 += g * g; - - // If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array, - // In this case, read the value of v from the one passed in the argument. - double V = (v0 == V0_NOT_INIT) ? v : v0; - - V -= k * eta * (double)g / sqrt((double)g2 + epsilon); - - // Limit the value of V to be within the range of types. - // By the way, windows.h defines the min and max macros, so to avoid it, - // Here, it is enclosed in parentheses so that it is not treated as a function-like macro. - V = (std::min)((double)(std::numeric_limits::max)() , V); - V = (std::max)((double)(std::numeric_limits::min)() , V); - - v0 = (LearnFloatType)V; - v = (T)round(V); - - // Clear g because one update of mini-batch for this element is over - // g[i] = 0; - // → There is a problem of dimension reduction, so this will be done by the caller. - } - -#elif defined(SGD_UPDATE) +#if defined(SGD_UPDATE) // See only the sign of the gradient Update with SGD // When executing this function, the value of g and the member do not change