Dual NNUE with L1-128 smallnet

Credit goes to @mstembera for:
- writing the code enabling dual NNUE:
  https://github.com/official-stockfish/Stockfish/pull/4898
- the idea of trying L1-128 trained exclusively on high simple eval
  positions

The L1-128 smallnet is:
- epoch 399 of a single-stage training from scratch
- trained only on positions from filtered data with high material
  difference
  - defined by abs(simple_eval) > 1000

```yaml
experiment-name: 128--S1-only-hse-v2

training-dataset:
  - /data/hse/S3/dfrc99-16tb7p-eval-filt-v2.min.high-simple-eval-1k.binpack
  - /data/hse/S3/leela96-filt-v2.min.high-simple-eval-1k.binpack
  - /data/hse/S3/test80-apr2022-16tb7p.min.high-simple-eval-1k.binpack

  - /data/hse/S7/test60-2020-2tb7p.v6-3072.high-simple-eval-1k.binpack
  - /data/hse/S7/test60-novdec2021-12tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack

  - /data/hse/S7/test77-nov2021-2tb7p.v6-3072.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test77-dec2021-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test77-jan2022-2tb7p.high-simple-eval-1k.binpack

  - /data/hse/S7/test78-jantomay2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test78-juntosep2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack

  - /data/hse/S7/test79-apr2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test79-may2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack

  # T80 2022
  - /data/hse/S7/test80-may2022-16tb7p.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-jun2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-jul2022-16tb7p.v6-dd.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-aug2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-sep2022-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-oct2022-16tb7p.v6-dd.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-nov2022-16tb7p-v6-dd.min.high-simple-eval-1k.binpack

  # T80 2023
  - /data/hse/S7/test80-jan2023-3of3-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-feb2023-16tb7p-filter-v6-dd.min-mar2023.unmin.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-mar2023-2tb7p.v6-sk16.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-apr2023-2tb7p-filter-v6-sk16.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-may2023-2tb7p.v6.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-jun2023-2tb7p.v6-3072.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-jul2023-2tb7p.v6-3072.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-aug2023-2tb7p.v6.min.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-sep2023-2tb7p.high-simple-eval-1k.binpack
  - /data/hse/S7/test80-oct2023-2tb7p.high-simple-eval-1k.binpack

start-from-engine-test-net: False

nnue-pytorch-branch: linrock/nnue-pytorch/L1-128
engine-test-branch: linrock/Stockfish/L1-128-nolazy
engine-base-branch: linrock/Stockfish/L1-128

num-epochs: 500
lambda: 1.0
```

Experiment yaml configs converted to easy_train.sh commands with:
https://github.com/linrock/nnue-tools/blob/4339954/yaml_easy_train.py

Binpacks interleaved at training time with:
https://github.com/official-stockfish/nnue-pytorch/pull/259

Data filtered for high simple eval positions with:
https://github.com/linrock/nnue-data/blob/32d6a68/filter_high_simple_eval_plain.py
https://github.com/linrock/Stockfish/blob/61dbfe/src/tools/transform.cpp#L626-L655

Training data can be found at:
https://robotmoon.com/nnue-training-data/

Local elo at 25k nodes per move of
L1-128 smallnet (nnue-only eval) vs. L1-128 trained on standard S1 data:
nn-epoch399.nnue : -318.1 +/- 2.1

Passed STC:
https://tests.stockfishchess.org/tests/view/6574cb9d95ea6ba1fcd49e3b
LLR: 2.93 (-2.94,2.94) <0.00,2.00>
Total: 62432 W: 15875 L: 15521 D: 31036
Ptnml(0-2): 177, 7331, 15872, 7633, 203

Passed LTC:
https://tests.stockfishchess.org/tests/view/6575da2d4d789acf40aaac6e
LLR: 2.94 (-2.94,2.94) <0.50,2.50>
Total: 64830 W: 16118 L: 15738 D: 32974
Ptnml(0-2): 43, 7129, 17697, 7497, 49

closes https://github.com/official-stockfish/Stockfish/pulls

Bench: 1330050

Co-Authored-By: mstembera <5421953+mstembera@users.noreply.github.com>
This commit is contained in:
Linmiao Xu
2023-12-02 17:50:32 -08:00
committed by Disservin
parent a5a76a6370
commit 584d9efedc
12 changed files with 293 additions and 194 deletions

View File

@@ -40,14 +40,18 @@
namespace Stockfish::Eval::NNUE {
// Input feature converter
LargePagePtr<FeatureTransformer> featureTransformer;
LargePagePtr<FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>>
featureTransformerBig;
LargePagePtr<FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>>
featureTransformerSmall;
// Evaluation function
AlignedPtr<Network> network[LayerStacks];
AlignedPtr<Network<TransformedFeatureDimensionsBig, L2Big, L3Big>> networkBig[LayerStacks];
AlignedPtr<Network<TransformedFeatureDimensionsSmall, L2Small, L3Small>> networkSmall[LayerStacks];
// Evaluation function file name
std::string fileName;
std::string netDescription;
// Evaluation function file names
std::string fileName[2];
std::string netDescription[2];
namespace Detail {
@@ -91,11 +95,20 @@ bool write_parameters(std::ostream& stream, const T& reference) {
// Initialize the evaluation function parameters
static void initialize() {
static void initialize(NetSize netSize) {
Detail::initialize(featureTransformer);
for (std::size_t i = 0; i < LayerStacks; ++i)
Detail::initialize(network[i]);
if (netSize == Small)
{
Detail::initialize(featureTransformerSmall);
for (std::size_t i = 0; i < LayerStacks; ++i)
Detail::initialize(networkSmall[i]);
}
else
{
Detail::initialize(featureTransformerBig);
for (std::size_t i = 0; i < LayerStacks; ++i)
Detail::initialize(networkBig[i]);
}
}
// Read network header
@@ -122,39 +135,57 @@ static bool write_header(std::ostream& stream, std::uint32_t hashValue, const st
}
// Read network parameters
static bool read_parameters(std::istream& stream) {
static bool read_parameters(std::istream& stream, NetSize netSize) {
std::uint32_t hashValue;
if (!read_header(stream, &hashValue, &netDescription))
if (!read_header(stream, &hashValue, &netDescription[netSize]))
return false;
if (hashValue != HashValue)
if (hashValue != HashValue[netSize])
return false;
if (!Detail::read_parameters(stream, *featureTransformer))
if (netSize == Big && !Detail::read_parameters(stream, *featureTransformerBig))
return false;
if (netSize == Small && !Detail::read_parameters(stream, *featureTransformerSmall))
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
if (!Detail::read_parameters(stream, *(network[i])))
{
if (netSize == Big && !Detail::read_parameters(stream, *(networkBig[i])))
return false;
if (netSize == Small && !Detail::read_parameters(stream, *(networkSmall[i])))
return false;
}
return stream && stream.peek() == std::ios::traits_type::eof();
}
// Write network parameters
static bool write_parameters(std::ostream& stream) {
static bool write_parameters(std::ostream& stream, NetSize netSize) {
if (!write_header(stream, HashValue, netDescription))
if (!write_header(stream, HashValue[netSize], netDescription[netSize]))
return false;
if (!Detail::write_parameters(stream, *featureTransformer))
if (netSize == Big && !Detail::write_parameters(stream, *featureTransformerBig))
return false;
if (netSize == Small && !Detail::write_parameters(stream, *featureTransformerSmall))
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
if (!Detail::write_parameters(stream, *(network[i])))
{
if (netSize == Big && !Detail::write_parameters(stream, *(networkBig[i])))
return false;
if (netSize == Small && !Detail::write_parameters(stream, *(networkSmall[i])))
return false;
}
return bool(stream);
}
void hint_common_parent_position(const Position& pos) {
featureTransformer->hint_common_access(pos);
int simpleEval = simple_eval(pos, pos.side_to_move());
if (abs(simpleEval) > 1100)
featureTransformerSmall->hint_common_access(pos);
else
featureTransformerBig->hint_common_access(pos);
}
// Evaluation function. Perform differential calculation.
template<NetSize Net_Size>
Value evaluate(const Position& pos, bool adjusted, int* complexity) {
// We manually align the arrays on the stack because with gcc < 9.3
@@ -165,19 +196,28 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType
transformedFeaturesUnaligned[FeatureTransformer::BufferSize
+ alignment / sizeof(TransformedFeatureType)];
transformedFeaturesUnaligned[FeatureTransformer < Small ? TransformedFeatureDimensionsSmall
: TransformedFeatureDimensionsBig,
nullptr
> ::BufferSize + alignment / sizeof(TransformedFeatureType)];
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
#else
alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
alignas(alignment) TransformedFeatureType
transformedFeatures[FeatureTransformer < Net_Size == Small ? TransformedFeatureDimensionsSmall
: TransformedFeatureDimensionsBig,
nullptr > ::BufferSize];
#endif
ASSERT_ALIGNED(transformedFeatures, alignment);
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);
const auto psqt = Net_Size == Small
? featureTransformerSmall->transform(pos, transformedFeatures, bucket)
: featureTransformerBig->transform(pos, transformedFeatures, bucket);
const auto positional = Net_Size == Small ? networkSmall[bucket]->propagate(transformedFeatures)
: networkBig[bucket]->propagate(transformedFeatures);
if (complexity)
*complexity = std::abs(psqt - positional) / OutputScale;
@@ -190,6 +230,9 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
return static_cast<Value>((psqt + positional) / OutputScale);
}
template Value evaluate<Big>(const Position& pos, bool adjusted, int* complexity);
template Value evaluate<Small>(const Position& pos, bool adjusted, int* complexity);
struct NnueEvalTrace {
static_assert(LayerStacks == PSQTBuckets);
@@ -205,13 +248,14 @@ static NnueEvalTrace trace_evaluate(const Position& pos) {
constexpr uint64_t alignment = CacheLineSize;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType
transformedFeaturesUnaligned[FeatureTransformer::BufferSize
+ alignment / sizeof(TransformedFeatureType)];
TransformedFeatureType transformedFeaturesUnaligned
[FeatureTransformer<TransformedFeatureDimensionsBig, nullptr>::BufferSize
+ alignment / sizeof(TransformedFeatureType)];
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
#else
alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
alignas(alignment) TransformedFeatureType
transformedFeatures[FeatureTransformer<TransformedFeatureDimensionsBig, nullptr>::BufferSize];
#endif
ASSERT_ALIGNED(transformedFeatures, alignment);
@@ -220,8 +264,8 @@ static NnueEvalTrace trace_evaluate(const Position& pos) {
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
{
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);
const auto materialist = featureTransformerBig->transform(pos, transformedFeatures, bucket);
const auto positional = networkBig[bucket]->propagate(transformedFeatures);
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
t.positional[bucket] = static_cast<Value>(positional / OutputScale);
@@ -310,7 +354,7 @@ std::string trace(Position& pos) {
// We estimate the value of each piece by doing a differential evaluation from
// the current base eval, simulating the removal of the piece from its square.
Value base = evaluate(pos);
Value base = evaluate<NNUE::Big>(pos);
base = pos.side_to_move() == WHITE ? base : -base;
for (File f = FILE_A; f <= FILE_H; ++f)
@@ -325,16 +369,16 @@ std::string trace(Position& pos) {
auto st = pos.state();
pos.remove_piece(sq);
st->accumulator.computed[WHITE] = false;
st->accumulator.computed[BLACK] = false;
st->accumulatorBig.computed[WHITE] = false;
st->accumulatorBig.computed[BLACK] = false;
Value eval = evaluate(pos);
Value eval = evaluate<NNUE::Big>(pos);
eval = pos.side_to_move() == WHITE ? eval : -eval;
v = base - eval;
pos.put_piece(pc, sq);
st->accumulator.computed[WHITE] = false;
st->accumulator.computed[BLACK] = false;
st->accumulatorBig.computed[WHITE] = false;
st->accumulatorBig.computed[BLACK] = false;
}
writeSquare(f, r, pc, v);
@@ -379,24 +423,24 @@ std::string trace(Position& pos) {
// Load eval, from a file stream or a memory stream
bool load_eval(std::string name, std::istream& stream) {
bool load_eval(const std::string name, std::istream& stream, NetSize netSize) {
initialize();
fileName = name;
return read_parameters(stream);
initialize(netSize);
fileName[netSize] = name;
return read_parameters(stream, netSize);
}
// Save eval, to a file stream or a memory stream
bool save_eval(std::ostream& stream) {
bool save_eval(std::ostream& stream, NetSize netSize) {
if (fileName.empty())
if (fileName[netSize].empty())
return false;
return write_parameters(stream);
return write_parameters(stream, netSize);
}
// Save eval, to a file given by its name
bool save_eval(const std::optional<std::string>& filename) {
bool save_eval(const std::optional<std::string>& filename, NetSize netSize) {
std::string actualFilename;
std::string msg;
@@ -405,7 +449,8 @@ bool save_eval(const std::optional<std::string>& filename) {
actualFilename = filename.value();
else
{
if (currentEvalFileName != EvalFileDefaultName)
if (currentEvalFileName[netSize]
!= (netSize == Small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig))
{
msg = "Failed to export a net. "
"A non-embedded net can only be saved if the filename is specified";
@@ -413,11 +458,11 @@ bool save_eval(const std::optional<std::string>& filename) {
sync_cout << msg << sync_endl;
return false;
}
actualFilename = EvalFileDefaultName;
actualFilename = (netSize == Small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig);
}
std::ofstream stream(actualFilename, std::ios_base::binary);
bool saved = save_eval(stream);
bool saved = save_eval(stream, netSize);
msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net";