Spaces:
Running
Running
Upload 65 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- src/Makefile +1184 -0
- src/benchmark.cpp +516 -0
- src/benchmark.h +42 -0
- src/bitboard.cpp +189 -0
- src/bitboard.h +458 -0
- src/engine.cpp +411 -0
- src/engine.h +134 -0
- src/evaluate.cpp +124 -0
- src/evaluate.h +58 -0
- src/history.h +273 -0
- src/incbin/UNLICENCE +26 -0
- src/incbin/incbin.h +476 -0
- src/main.cpp +43 -0
- src/memory.cpp +199 -0
- src/memory.h +333 -0
- src/misc.cpp +549 -0
- src/misc.h +538 -0
- src/movegen.cpp +312 -0
- src/movegen.h +73 -0
- src/movepick.cpp +313 -0
- src/movepick.h +80 -0
- src/nnue/features/full_threats.cpp +343 -0
- src/nnue/features/full_threats.h +106 -0
- src/nnue/features/half_ka_v2_hm.cpp +69 -0
- src/nnue/features/half_ka_v2_hm.h +128 -0
- src/nnue/layers/affine_transform.h +312 -0
- src/nnue/layers/affine_transform_sparse_input.h +379 -0
- src/nnue/layers/clipped_relu.h +170 -0
- src/nnue/layers/sqr_clipped_relu.h +109 -0
- src/nnue/network.cpp +415 -0
- src/nnue/network.h +161 -0
- src/nnue/nnue_accumulator.cpp +952 -0
- src/nnue/nnue_accumulator.h +206 -0
- src/nnue/nnue_architecture.h +165 -0
- src/nnue/nnue_common.h +298 -0
- src/nnue/nnue_feature_transformer.h +456 -0
- src/nnue/nnue_misc.cpp +193 -0
- src/nnue/nnue_misc.h +74 -0
- src/nnue/simd.h +440 -0
- src/numa.h +1718 -0
- src/perft.h +67 -0
- src/position.cpp +1566 -0
- src/position.h +414 -0
- src/score.cpp +48 -0
- src/score.h +70 -0
- src/search.cpp +2217 -0
- src/search.h +379 -0
- src/shm.h +634 -0
- src/shm_linux.h +672 -0
- src/syzygy/tbprobe.cpp +1776 -0
src/Makefile
ADDED
|
@@ -0,0 +1,1184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 2 |
+
# Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 3 |
+
#
|
| 4 |
+
# Stockfish is free software: you can redistribute it and/or modify
|
| 5 |
+
# it under the terms of the GNU General Public License as published by
|
| 6 |
+
# the Free Software Foundation, either version 3 of the License, or
|
| 7 |
+
# (at your option) any later version.
|
| 8 |
+
#
|
| 9 |
+
# Stockfish is distributed in the hope that it will be useful,
|
| 10 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 11 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 12 |
+
# GNU General Public License for more details.
|
| 13 |
+
#
|
| 14 |
+
# You should have received a copy of the GNU General Public License
|
| 15 |
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
### ==========================================================================
|
| 19 |
+
### Section 1. General Configuration
|
| 20 |
+
### ==========================================================================
|
| 21 |
+
|
| 22 |
+
### Establish the operating system name
|
| 23 |
+
KERNEL := $(shell uname -s)
|
| 24 |
+
ifeq ($(KERNEL),Linux)
|
| 25 |
+
OS := $(shell uname -o)
|
| 26 |
+
endif
|
| 27 |
+
|
| 28 |
+
### Command prefix to run the built executable (e.g. wine, sde, qemu)
|
| 29 |
+
### Backward compatible alias: WINE_PATH (deprecated)
|
| 30 |
+
ifneq ($(strip $(WINE_PATH)),)
|
| 31 |
+
ifeq ($(strip $(RUN_PREFIX)),)
|
| 32 |
+
RUN_PREFIX := $(WINE_PATH)
|
| 33 |
+
endif
|
| 34 |
+
ifeq ($(MAKELEVEL),0)
|
| 35 |
+
ifneq ($(strip $(RUN_PREFIX)),$(strip $(WINE_PATH)))
|
| 36 |
+
$(warning *** Both RUN_PREFIX and WINE_PATH are set; ignoring WINE_PATH. ***)
|
| 37 |
+
else
|
| 38 |
+
$(warning *** WINE_PATH is deprecated; use RUN_PREFIX instead. ***)
|
| 39 |
+
endif
|
| 40 |
+
endif
|
| 41 |
+
endif
|
| 42 |
+
|
| 43 |
+
### Target Windows OS
|
| 44 |
+
ifeq ($(OS),Windows_NT)
|
| 45 |
+
ifneq ($(COMP),ndk)
|
| 46 |
+
target_windows = yes
|
| 47 |
+
endif
|
| 48 |
+
else ifeq ($(COMP),mingw)
|
| 49 |
+
target_windows = yes
|
| 50 |
+
ifeq ($(RUN_PREFIX),)
|
| 51 |
+
RUN_PREFIX := $(shell which wine)
|
| 52 |
+
endif
|
| 53 |
+
endif
|
| 54 |
+
|
| 55 |
+
### Executable name
|
| 56 |
+
ifeq ($(target_windows),yes)
|
| 57 |
+
EXE = stockfish.exe
|
| 58 |
+
else
|
| 59 |
+
EXE = stockfish
|
| 60 |
+
endif
|
| 61 |
+
|
| 62 |
+
### Installation dir definitions
|
| 63 |
+
PREFIX = /usr/local
|
| 64 |
+
BINDIR = $(PREFIX)/bin
|
| 65 |
+
|
| 66 |
+
### Built-in benchmark for pgo-builds
|
| 67 |
+
PGOBENCH = $(RUN_PREFIX) ./$(EXE) bench
|
| 68 |
+
|
| 69 |
+
### Source and object files
|
| 70 |
+
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
|
| 71 |
+
misc.cpp movegen.cpp movepick.cpp position.cpp \
|
| 72 |
+
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
| 73 |
+
nnue/nnue_accumulator.cpp nnue/nnue_misc.cpp nnue/network.cpp \
|
| 74 |
+
nnue/features/half_ka_v2_hm.cpp nnue/features/full_threats.cpp \
|
| 75 |
+
engine.cpp score.cpp memory.cpp
|
| 76 |
+
|
| 77 |
+
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \
|
| 78 |
+
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/features/full_threats.h \
|
| 79 |
+
nnue/layers/affine_transform.h nnue/layers/affine_transform_sparse_input.h \
|
| 80 |
+
nnue/layers/clipped_relu.h nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h \
|
| 81 |
+
nnue/nnue_architecture.h nnue/nnue_common.h nnue/nnue_feature_transformer.h nnue/simd.h \
|
| 82 |
+
position.h search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
|
| 83 |
+
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h shm.h shm_linux.h
|
| 84 |
+
|
| 85 |
+
OBJS = $(notdir $(SRCS:.cpp=.o))
|
| 86 |
+
|
| 87 |
+
VPATH = syzygy:nnue:nnue/features
|
| 88 |
+
|
| 89 |
+
### ==========================================================================
|
| 90 |
+
### Section 2. High-level Configuration
|
| 91 |
+
### ==========================================================================
|
| 92 |
+
#
|
| 93 |
+
# flag --- Comp switch --- Description
|
| 94 |
+
# ----------------------------------------------------------------------------
|
| 95 |
+
#
|
| 96 |
+
# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode
|
| 97 |
+
# sanitize = none/<sanitizer> ... (-fsanitize )
|
| 98 |
+
# --- ( undefined ) --- enable undefined behavior checks
|
| 99 |
+
# --- ( thread ) --- enable threading error checks
|
| 100 |
+
# --- ( address ) --- enable memory access checks
|
| 101 |
+
# --- ...etc... --- see compiler documentation for supported sanitizers
|
| 102 |
+
# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations
|
| 103 |
+
# arch = (name) --- (-arch) --- Target architecture
|
| 104 |
+
# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
|
| 105 |
+
# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction
|
| 106 |
+
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction
|
| 107 |
+
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
|
| 108 |
+
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
|
| 109 |
+
# mmx = yes/no --- -mmmx --- Use Intel MMX instructions
|
| 110 |
+
# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2
|
| 111 |
+
# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3
|
| 112 |
+
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
|
| 113 |
+
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
|
| 114 |
+
# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX
|
| 115 |
+
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
|
| 116 |
+
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
|
| 117 |
+
# avx512icl = yes/no --- ... multiple ... --- Use All AVX-512 features available on both Intel Ice Lake and AMD Zen 4
|
| 118 |
+
# altivec = yes/no --- -maltivec --- Use PowerPC Altivec SIMD extension
|
| 119 |
+
# vsx = yes/no --- -mvsx --- Use POWER VSX SIMD extension
|
| 120 |
+
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
|
| 121 |
+
# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions
|
| 122 |
+
# lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension
|
| 123 |
+
# lasx = yes/no --- -mlasx --- use Loongson Advanced SIMD eXtension
|
| 124 |
+
#
|
| 125 |
+
# Note that Makefile is space sensitive, so when adding new architectures
|
| 126 |
+
# or modifying existing flags, you have to make sure there are no extra spaces
|
| 127 |
+
# at the end of the line for flag values.
|
| 128 |
+
#
|
| 129 |
+
# Example of use for these flags:
|
| 130 |
+
# make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined"
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
### 2.1. General and architecture defaults
|
| 134 |
+
|
| 135 |
+
ifeq ($(ARCH),)
|
| 136 |
+
ARCH = native
|
| 137 |
+
endif
|
| 138 |
+
|
| 139 |
+
ifeq ($(ARCH), native)
|
| 140 |
+
override ARCH := $(shell $(SHELL) ../scripts/get_native_properties.sh | cut -d " " -f 1)
|
| 141 |
+
endif
|
| 142 |
+
|
| 143 |
+
# explicitly check for the list of supported architectures (as listed with make help),
|
| 144 |
+
# the user can override with `make ARCH=x86-64-avx512icl SUPPORTED_ARCH=true`
|
| 145 |
+
ifeq ($(ARCH), $(filter $(ARCH), \
|
| 146 |
+
x86-64-avx512icl x86-64-vnni512 x86-64-avx512 x86-64-avxvnni \
|
| 147 |
+
x86-64-bmi2 x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
|
| 148 |
+
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-64-altivec ppc-64-vsx ppc-32 e2k \
|
| 149 |
+
armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \
|
| 150 |
+
loongarch64 loongarch64-lsx loongarch64-lasx))
|
| 151 |
+
SUPPORTED_ARCH=true
|
| 152 |
+
else
|
| 153 |
+
SUPPORTED_ARCH=false
|
| 154 |
+
endif
|
| 155 |
+
|
| 156 |
+
optimize = yes
|
| 157 |
+
debug = no
|
| 158 |
+
sanitize = none
|
| 159 |
+
bits = 64
|
| 160 |
+
prefetch = no
|
| 161 |
+
popcnt = no
|
| 162 |
+
pext = no
|
| 163 |
+
sse = no
|
| 164 |
+
mmx = no
|
| 165 |
+
sse2 = no
|
| 166 |
+
ssse3 = no
|
| 167 |
+
sse41 = no
|
| 168 |
+
avx2 = no
|
| 169 |
+
avxvnni = no
|
| 170 |
+
avx512 = no
|
| 171 |
+
vnni512 = no
|
| 172 |
+
avx512icl = no
|
| 173 |
+
altivec = no
|
| 174 |
+
vsx = no
|
| 175 |
+
neon = no
|
| 176 |
+
dotprod = no
|
| 177 |
+
arm_version = 0
|
| 178 |
+
lsx = no
|
| 179 |
+
lasx = no
|
| 180 |
+
STRIP = strip
|
| 181 |
+
|
| 182 |
+
ifneq ($(shell which clang-format-20 2> /dev/null),)
|
| 183 |
+
CLANG-FORMAT = clang-format-20
|
| 184 |
+
else
|
| 185 |
+
CLANG-FORMAT = clang-format
|
| 186 |
+
endif
|
| 187 |
+
|
| 188 |
+
### 2.2 Architecture specific
|
| 189 |
+
|
| 190 |
+
ifeq ($(findstring x86,$(ARCH)),x86)
|
| 191 |
+
|
| 192 |
+
# x86-32/64
|
| 193 |
+
|
| 194 |
+
ifeq ($(findstring x86-32,$(ARCH)),x86-32)
|
| 195 |
+
arch = i386
|
| 196 |
+
bits = 32
|
| 197 |
+
sse = no
|
| 198 |
+
mmx = yes
|
| 199 |
+
else
|
| 200 |
+
arch = x86_64
|
| 201 |
+
sse = yes
|
| 202 |
+
sse2 = yes
|
| 203 |
+
endif
|
| 204 |
+
|
| 205 |
+
ifeq ($(findstring -sse,$(ARCH)),-sse)
|
| 206 |
+
sse = yes
|
| 207 |
+
endif
|
| 208 |
+
|
| 209 |
+
ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
|
| 210 |
+
popcnt = yes
|
| 211 |
+
endif
|
| 212 |
+
|
| 213 |
+
ifeq ($(findstring -mmx,$(ARCH)),-mmx)
|
| 214 |
+
mmx = yes
|
| 215 |
+
endif
|
| 216 |
+
|
| 217 |
+
ifeq ($(findstring -sse2,$(ARCH)),-sse2)
|
| 218 |
+
sse = yes
|
| 219 |
+
sse2 = yes
|
| 220 |
+
endif
|
| 221 |
+
|
| 222 |
+
ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
|
| 223 |
+
sse = yes
|
| 224 |
+
sse2 = yes
|
| 225 |
+
ssse3 = yes
|
| 226 |
+
endif
|
| 227 |
+
|
| 228 |
+
ifeq ($(findstring -sse41,$(ARCH)),-sse41)
|
| 229 |
+
sse = yes
|
| 230 |
+
sse2 = yes
|
| 231 |
+
ssse3 = yes
|
| 232 |
+
sse41 = yes
|
| 233 |
+
endif
|
| 234 |
+
|
| 235 |
+
ifeq ($(findstring -modern,$(ARCH)),-modern)
|
| 236 |
+
$(warning *** ARCH=$(ARCH) is deprecated, defaulting to ARCH=x86-64-sse41-popcnt. Execute `make help` for a list of available architectures. ***)
|
| 237 |
+
$(shell sleep 5)
|
| 238 |
+
popcnt = yes
|
| 239 |
+
sse = yes
|
| 240 |
+
sse2 = yes
|
| 241 |
+
ssse3 = yes
|
| 242 |
+
sse41 = yes
|
| 243 |
+
endif
|
| 244 |
+
|
| 245 |
+
ifeq ($(findstring -avx2,$(ARCH)),-avx2)
|
| 246 |
+
popcnt = yes
|
| 247 |
+
sse = yes
|
| 248 |
+
sse2 = yes
|
| 249 |
+
ssse3 = yes
|
| 250 |
+
sse41 = yes
|
| 251 |
+
avx2 = yes
|
| 252 |
+
endif
|
| 253 |
+
|
| 254 |
+
ifeq ($(findstring -avxvnni,$(ARCH)),-avxvnni)
|
| 255 |
+
popcnt = yes
|
| 256 |
+
sse = yes
|
| 257 |
+
sse2 = yes
|
| 258 |
+
ssse3 = yes
|
| 259 |
+
sse41 = yes
|
| 260 |
+
avx2 = yes
|
| 261 |
+
avxvnni = yes
|
| 262 |
+
pext = yes
|
| 263 |
+
endif
|
| 264 |
+
|
| 265 |
+
ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
|
| 266 |
+
popcnt = yes
|
| 267 |
+
sse = yes
|
| 268 |
+
sse2 = yes
|
| 269 |
+
ssse3 = yes
|
| 270 |
+
sse41 = yes
|
| 271 |
+
avx2 = yes
|
| 272 |
+
pext = yes
|
| 273 |
+
endif
|
| 274 |
+
|
| 275 |
+
ifeq ($(findstring -avx512,$(ARCH)),-avx512)
|
| 276 |
+
popcnt = yes
|
| 277 |
+
sse = yes
|
| 278 |
+
sse2 = yes
|
| 279 |
+
ssse3 = yes
|
| 280 |
+
sse41 = yes
|
| 281 |
+
avx2 = yes
|
| 282 |
+
pext = yes
|
| 283 |
+
avx512 = yes
|
| 284 |
+
endif
|
| 285 |
+
|
| 286 |
+
ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
|
| 287 |
+
popcnt = yes
|
| 288 |
+
sse = yes
|
| 289 |
+
sse2 = yes
|
| 290 |
+
ssse3 = yes
|
| 291 |
+
sse41 = yes
|
| 292 |
+
avx2 = yes
|
| 293 |
+
pext = yes
|
| 294 |
+
avx512 = yes
|
| 295 |
+
vnni512 = yes
|
| 296 |
+
endif
|
| 297 |
+
|
| 298 |
+
ifeq ($(findstring -avx512icl,$(ARCH)),-avx512icl)
|
| 299 |
+
popcnt = yes
|
| 300 |
+
sse = yes
|
| 301 |
+
sse2 = yes
|
| 302 |
+
ssse3 = yes
|
| 303 |
+
sse41 = yes
|
| 304 |
+
avx2 = yes
|
| 305 |
+
pext = yes
|
| 306 |
+
avx512 = yes
|
| 307 |
+
vnni512 = yes
|
| 308 |
+
avx512icl = yes
|
| 309 |
+
endif
|
| 310 |
+
|
| 311 |
+
ifeq ($(sse),yes)
|
| 312 |
+
prefetch = yes
|
| 313 |
+
endif
|
| 314 |
+
|
| 315 |
+
# 64-bit pext is not available on x86-32
|
| 316 |
+
ifeq ($(bits),32)
|
| 317 |
+
pext = no
|
| 318 |
+
endif
|
| 319 |
+
|
| 320 |
+
else
|
| 321 |
+
|
| 322 |
+
# all other architectures
|
| 323 |
+
|
| 324 |
+
ifeq ($(ARCH),general-32)
|
| 325 |
+
arch = any
|
| 326 |
+
bits = 32
|
| 327 |
+
endif
|
| 328 |
+
|
| 329 |
+
ifeq ($(ARCH),general-64)
|
| 330 |
+
arch = any
|
| 331 |
+
endif
|
| 332 |
+
|
| 333 |
+
ifeq ($(ARCH),armv7)
|
| 334 |
+
arch = armv7
|
| 335 |
+
prefetch = yes
|
| 336 |
+
bits = 32
|
| 337 |
+
arm_version = 7
|
| 338 |
+
endif
|
| 339 |
+
|
| 340 |
+
ifeq ($(ARCH),armv7-neon)
|
| 341 |
+
arch = armv7
|
| 342 |
+
prefetch = yes
|
| 343 |
+
popcnt = yes
|
| 344 |
+
neon = yes
|
| 345 |
+
bits = 32
|
| 346 |
+
arm_version = 7
|
| 347 |
+
endif
|
| 348 |
+
|
| 349 |
+
ifeq ($(ARCH),armv8)
|
| 350 |
+
arch = armv8
|
| 351 |
+
prefetch = yes
|
| 352 |
+
popcnt = yes
|
| 353 |
+
neon = yes
|
| 354 |
+
arm_version = 8
|
| 355 |
+
endif
|
| 356 |
+
|
| 357 |
+
ifeq ($(ARCH),armv8-dotprod)
|
| 358 |
+
arch = armv8
|
| 359 |
+
prefetch = yes
|
| 360 |
+
popcnt = yes
|
| 361 |
+
neon = yes
|
| 362 |
+
dotprod = yes
|
| 363 |
+
arm_version = 8
|
| 364 |
+
endif
|
| 365 |
+
|
| 366 |
+
ifeq ($(ARCH),apple-silicon)
|
| 367 |
+
arch = arm64
|
| 368 |
+
prefetch = yes
|
| 369 |
+
popcnt = yes
|
| 370 |
+
neon = yes
|
| 371 |
+
dotprod = yes
|
| 372 |
+
arm_version = 8
|
| 373 |
+
endif
|
| 374 |
+
|
| 375 |
+
ifeq ($(ARCH),ppc-32)
|
| 376 |
+
arch = ppc
|
| 377 |
+
bits = 32
|
| 378 |
+
endif
|
| 379 |
+
|
| 380 |
+
ifeq ($(ARCH),ppc-64)
|
| 381 |
+
arch = ppc64
|
| 382 |
+
popcnt = yes
|
| 383 |
+
prefetch = yes
|
| 384 |
+
endif
|
| 385 |
+
|
| 386 |
+
ifeq ($(ARCH),ppc-64-altivec)
|
| 387 |
+
arch = ppc64
|
| 388 |
+
popcnt = yes
|
| 389 |
+
prefetch = yes
|
| 390 |
+
altivec = yes
|
| 391 |
+
endif
|
| 392 |
+
|
| 393 |
+
ifeq ($(ARCH),ppc-64-vsx)
|
| 394 |
+
arch = ppc64
|
| 395 |
+
popcnt = yes
|
| 396 |
+
prefetch = yes
|
| 397 |
+
vsx = yes
|
| 398 |
+
endif
|
| 399 |
+
|
| 400 |
+
ifeq ($(findstring e2k,$(ARCH)),e2k)
|
| 401 |
+
arch = e2k
|
| 402 |
+
mmx = yes
|
| 403 |
+
bits = 64
|
| 404 |
+
sse = yes
|
| 405 |
+
sse2 = yes
|
| 406 |
+
ssse3 = yes
|
| 407 |
+
sse41 = yes
|
| 408 |
+
popcnt = yes
|
| 409 |
+
endif
|
| 410 |
+
|
| 411 |
+
ifeq ($(ARCH),riscv64)
|
| 412 |
+
arch = riscv64
|
| 413 |
+
endif
|
| 414 |
+
|
| 415 |
+
ifeq ($(findstring loongarch64,$(ARCH)),loongarch64)
|
| 416 |
+
arch = loongarch64
|
| 417 |
+
prefetch = yes
|
| 418 |
+
|
| 419 |
+
ifeq ($(findstring -lasx,$(ARCH)),-lasx)
|
| 420 |
+
lsx = yes
|
| 421 |
+
lasx = yes
|
| 422 |
+
endif
|
| 423 |
+
|
| 424 |
+
ifeq ($(findstring -lsx,$(ARCH)),-lsx)
|
| 425 |
+
lsx = yes
|
| 426 |
+
endif
|
| 427 |
+
|
| 428 |
+
endif
|
| 429 |
+
endif
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
### ==========================================================================
|
| 433 |
+
### Section 3. Low-level Configuration
|
| 434 |
+
### ==========================================================================
|
| 435 |
+
|
| 436 |
+
### 3.1 Selecting compiler (default = gcc)
|
| 437 |
+
ifeq ($(MAKELEVEL),0)
|
| 438 |
+
export ENV_CXXFLAGS := $(CXXFLAGS)
|
| 439 |
+
export ENV_DEPENDFLAGS := $(DEPENDFLAGS)
|
| 440 |
+
export ENV_LDFLAGS := $(LDFLAGS)
|
| 441 |
+
endif
|
| 442 |
+
|
| 443 |
+
CXXFLAGS = $(ENV_CXXFLAGS) -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
|
| 444 |
+
DEPENDFLAGS = $(ENV_DEPENDFLAGS) -std=c++17
|
| 445 |
+
LDFLAGS = $(ENV_LDFLAGS) $(EXTRALDFLAGS)
|
| 446 |
+
|
| 447 |
+
ifeq ($(COMP),)
|
| 448 |
+
COMP=gcc
|
| 449 |
+
endif
|
| 450 |
+
|
| 451 |
+
ifeq ($(COMP),gcc)
|
| 452 |
+
comp=gcc
|
| 453 |
+
CXX=g++
|
| 454 |
+
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations
|
| 455 |
+
|
| 456 |
+
ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64))
|
| 457 |
+
ifeq ($(OS),Android)
|
| 458 |
+
CXXFLAGS += -m$(bits)
|
| 459 |
+
LDFLAGS += -m$(bits)
|
| 460 |
+
endif
|
| 461 |
+
ifeq ($(ARCH),riscv64)
|
| 462 |
+
CXXFLAGS += -latomic
|
| 463 |
+
endif
|
| 464 |
+
else ifeq ($(arch),loongarch64)
|
| 465 |
+
CXXFLAGS += -latomic
|
| 466 |
+
else
|
| 467 |
+
CXXFLAGS += -m$(bits)
|
| 468 |
+
LDFLAGS += -m$(bits)
|
| 469 |
+
endif
|
| 470 |
+
|
| 471 |
+
ifeq ($(arch),$(filter $(arch),armv7))
|
| 472 |
+
LDFLAGS += -latomic
|
| 473 |
+
endif
|
| 474 |
+
|
| 475 |
+
ifneq ($(KERNEL),Darwin)
|
| 476 |
+
LDFLAGS += -Wl,--no-as-needed
|
| 477 |
+
endif
|
| 478 |
+
endif
|
| 479 |
+
|
| 480 |
+
ifeq ($(target_windows),yes)
|
| 481 |
+
LDFLAGS += -static
|
| 482 |
+
endif
|
| 483 |
+
|
| 484 |
+
ifeq ($(COMP),mingw)
|
| 485 |
+
comp=mingw
|
| 486 |
+
|
| 487 |
+
ifeq ($(bits),64)
|
| 488 |
+
ifeq ($(shell which x86_64-w64-mingw32-c++-posix 2> /dev/null),)
|
| 489 |
+
CXX=x86_64-w64-mingw32-c++
|
| 490 |
+
else
|
| 491 |
+
CXX=x86_64-w64-mingw32-c++-posix
|
| 492 |
+
endif
|
| 493 |
+
else
|
| 494 |
+
ifeq ($(shell which i686-w64-mingw32-c++-posix 2> /dev/null),)
|
| 495 |
+
CXX=i686-w64-mingw32-c++
|
| 496 |
+
else
|
| 497 |
+
CXX=i686-w64-mingw32-c++-posix
|
| 498 |
+
endif
|
| 499 |
+
endif
|
| 500 |
+
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations
|
| 501 |
+
endif
|
| 502 |
+
|
| 503 |
+
ifeq ($(COMP),icx)
|
| 504 |
+
comp=icx
|
| 505 |
+
CXX=icpx
|
| 506 |
+
CXXFLAGS += --intel -pedantic -Wextra -Wshadow -Wmissing-prototypes \
|
| 507 |
+
-Wconditional-uninitialized -Wabi -Wdeprecated
|
| 508 |
+
endif
|
| 509 |
+
|
| 510 |
+
ifeq ($(COMP),clang)
|
| 511 |
+
comp=clang
|
| 512 |
+
CXX=clang++
|
| 513 |
+
ifeq ($(target_windows),yes)
|
| 514 |
+
CXX=x86_64-w64-mingw32-clang++
|
| 515 |
+
endif
|
| 516 |
+
|
| 517 |
+
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-prototypes \
|
| 518 |
+
-Wconditional-uninitialized -flax-vector-conversions=none
|
| 519 |
+
|
| 520 |
+
ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),)
|
| 521 |
+
ifeq ($(target_windows),)
|
| 522 |
+
ifneq ($(RTLIB),compiler-rt)
|
| 523 |
+
LDFLAGS += -latomic
|
| 524 |
+
endif
|
| 525 |
+
endif
|
| 526 |
+
endif
|
| 527 |
+
|
| 528 |
+
ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64))
|
| 529 |
+
ifeq ($(OS),Android)
|
| 530 |
+
CXXFLAGS += -m$(bits)
|
| 531 |
+
LDFLAGS += -m$(bits)
|
| 532 |
+
endif
|
| 533 |
+
ifeq ($(ARCH),riscv64)
|
| 534 |
+
CXXFLAGS += -latomic
|
| 535 |
+
endif
|
| 536 |
+
else ifeq ($(arch),loongarch64)
|
| 537 |
+
CXXFLAGS += -latomic
|
| 538 |
+
else
|
| 539 |
+
CXXFLAGS += -m$(bits)
|
| 540 |
+
LDFLAGS += -m$(bits)
|
| 541 |
+
endif
|
| 542 |
+
endif
|
| 543 |
+
|
| 544 |
+
ifeq ($(KERNEL),Darwin)
|
| 545 |
+
CXXFLAGS += -mmacosx-version-min=10.15
|
| 546 |
+
LDFLAGS += -mmacosx-version-min=10.15
|
| 547 |
+
ifneq ($(arch),any)
|
| 548 |
+
CXXFLAGS += -arch $(arch)
|
| 549 |
+
LDFLAGS += -arch $(arch)
|
| 550 |
+
endif
|
| 551 |
+
XCRUN = xcrun
|
| 552 |
+
endif
|
| 553 |
+
|
| 554 |
+
# To cross-compile for Android, use NDK version r27c or later.
|
| 555 |
+
ifeq ($(COMP),ndk)
|
| 556 |
+
CXXFLAGS += -stdlib=libc++
|
| 557 |
+
comp=clang
|
| 558 |
+
ifeq ($(arch),armv7)
|
| 559 |
+
CXX=armv7a-linux-androideabi29-clang++
|
| 560 |
+
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
|
| 561 |
+
ifneq ($(shell which arm-linux-androideabi-strip 2>/dev/null),)
|
| 562 |
+
STRIP=arm-linux-androideabi-strip
|
| 563 |
+
else
|
| 564 |
+
STRIP=llvm-strip
|
| 565 |
+
endif
|
| 566 |
+
endif
|
| 567 |
+
ifeq ($(arch),armv8)
|
| 568 |
+
CXX=aarch64-linux-android29-clang++
|
| 569 |
+
ifneq ($(shell which aarch64-linux-android-strip 2>/dev/null),)
|
| 570 |
+
STRIP=aarch64-linux-android-strip
|
| 571 |
+
else
|
| 572 |
+
STRIP=llvm-strip
|
| 573 |
+
endif
|
| 574 |
+
endif
|
| 575 |
+
ifeq ($(arch),x86_64)
|
| 576 |
+
CXX=x86_64-linux-android29-clang++
|
| 577 |
+
ifneq ($(shell which x86_64-linux-android-strip 2>/dev/null),)
|
| 578 |
+
STRIP=x86_64-linux-android-strip
|
| 579 |
+
else
|
| 580 |
+
STRIP=llvm-strip
|
| 581 |
+
endif
|
| 582 |
+
endif
|
| 583 |
+
LDFLAGS += -static-libstdc++
|
| 584 |
+
endif
|
| 585 |
+
|
| 586 |
+
### Allow overwriting CXX from command line
|
| 587 |
+
ifdef COMPCXX
|
| 588 |
+
CXX=$(COMPCXX)
|
| 589 |
+
endif
|
| 590 |
+
|
| 591 |
+
# llvm-profdata must be version compatible with the specified CXX (be it clang, or the gcc alias)
|
| 592 |
+
# make -j profile-build CXX=clang++-20 COMP=clang
|
| 593 |
+
# Locate the version in the same directory as the compiler used,
|
| 594 |
+
# with fallback to a generic one if it can't be located
|
| 595 |
+
LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))llvm-profdata
|
| 596 |
+
# for icx
|
| 597 |
+
ifeq ($(wildcard $(LLVM_PROFDATA)),)
|
| 598 |
+
LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))/compiler/llvm-profdata
|
| 599 |
+
endif
|
| 600 |
+
ifeq ($(wildcard $(LLVM_PROFDATA)),)
|
| 601 |
+
LLVM_PROFDATA := llvm-profdata
|
| 602 |
+
endif
|
| 603 |
+
|
| 604 |
+
ifeq ($(comp),icx)
|
| 605 |
+
profile_make = icx-profile-make
|
| 606 |
+
profile_use = icx-profile-use
|
| 607 |
+
else ifeq ($(comp),clang)
|
| 608 |
+
profile_make = clang-profile-make
|
| 609 |
+
profile_use = clang-profile-use
|
| 610 |
+
else
|
| 611 |
+
profile_make = gcc-profile-make
|
| 612 |
+
profile_use = gcc-profile-use
|
| 613 |
+
ifeq ($(KERNEL),Darwin)
|
| 614 |
+
EXTRAPROFILEFLAGS = -fvisibility=hidden
|
| 615 |
+
endif
|
| 616 |
+
endif
|
| 617 |
+
|
| 618 |
+
### Sometimes gcc is really clang
|
| 619 |
+
ifeq ($(COMP),gcc)
|
| 620 |
+
gccversion := $(shell $(CXX) --version 2>/dev/null)
|
| 621 |
+
gccisclang := $(findstring clang,$(gccversion))
|
| 622 |
+
ifneq ($(gccisclang),)
|
| 623 |
+
profile_make = clang-profile-make
|
| 624 |
+
profile_use = clang-profile-use
|
| 625 |
+
else
|
| 626 |
+
CXXFLAGS += -Wstack-usage=128000
|
| 627 |
+
endif
|
| 628 |
+
endif
|
| 629 |
+
|
| 630 |
+
### On mingw use Windows threads, otherwise POSIX
|
| 631 |
+
ifneq ($(comp),mingw)
|
| 632 |
+
CXXFLAGS += -DUSE_PTHREADS
|
| 633 |
+
# On Android Bionic's C library comes with its own pthread implementation bundled in
|
| 634 |
+
ifneq ($(OS),Android)
|
| 635 |
+
# Haiku has pthreads in its libroot, so only link it in on other platforms
|
| 636 |
+
ifneq ($(KERNEL),Haiku)
|
| 637 |
+
ifneq ($(COMP),ndk)
|
| 638 |
+
LDFLAGS += -lpthread
|
| 639 |
+
|
| 640 |
+
add_lrt = yes
|
| 641 |
+
ifeq ($(target_windows),yes)
|
| 642 |
+
add_lrt = no
|
| 643 |
+
endif
|
| 644 |
+
|
| 645 |
+
ifeq ($(KERNEL),Darwin)
|
| 646 |
+
add_lrt = no
|
| 647 |
+
endif
|
| 648 |
+
|
| 649 |
+
ifeq ($(add_lrt),yes)
|
| 650 |
+
LDFLAGS += -lrt
|
| 651 |
+
endif
|
| 652 |
+
endif
|
| 653 |
+
endif
|
| 654 |
+
endif
|
| 655 |
+
endif
|
| 656 |
+
|
| 657 |
+
### 3.2.1 Debugging
|
| 658 |
+
ifeq ($(debug),no)
|
| 659 |
+
CXXFLAGS += -DNDEBUG
|
| 660 |
+
else
|
| 661 |
+
CXXFLAGS += -g
|
| 662 |
+
CXXFLAGS += -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_DEBUG
|
| 663 |
+
endif
|
| 664 |
+
|
| 665 |
+
### 3.2.2 Debugging with undefined behavior sanitizers
|
| 666 |
+
ifneq ($(sanitize),none)
|
| 667 |
+
CXXFLAGS += -g3 $(addprefix -fsanitize=,$(sanitize))
|
| 668 |
+
LDFLAGS += $(addprefix -fsanitize=,$(sanitize))
|
| 669 |
+
endif
|
| 670 |
+
|
| 671 |
+
### 3.3 Optimization
|
| 672 |
+
ifeq ($(optimize),yes)
|
| 673 |
+
|
| 674 |
+
CXXFLAGS += -O3 -funroll-loops
|
| 675 |
+
|
| 676 |
+
ifeq ($(comp),gcc)
|
| 677 |
+
ifeq ($(OS), Android)
|
| 678 |
+
CXXFLAGS += -fno-gcse -mthumb -march=armv7-a -mfloat-abi=softfp
|
| 679 |
+
endif
|
| 680 |
+
endif
|
| 681 |
+
|
| 682 |
+
ifeq ($(KERNEL),Darwin)
|
| 683 |
+
ifeq ($(comp),$(filter $(comp),clang icx))
|
| 684 |
+
CXXFLAGS += -mdynamic-no-pic
|
| 685 |
+
endif
|
| 686 |
+
|
| 687 |
+
ifeq ($(comp),gcc)
|
| 688 |
+
ifneq ($(arch),arm64)
|
| 689 |
+
CXXFLAGS += -mdynamic-no-pic
|
| 690 |
+
endif
|
| 691 |
+
endif
|
| 692 |
+
endif
|
| 693 |
+
|
| 694 |
+
ifeq ($(comp),clang)
|
| 695 |
+
clangmajorversion := $(shell $(CXX) -dumpversion 2>/dev/null | cut -f1 -d.)
|
| 696 |
+
ifeq ($(shell expr $(clangmajorversion) \< 16),1)
|
| 697 |
+
CXXFLAGS += -fexperimental-new-pass-manager
|
| 698 |
+
endif
|
| 699 |
+
endif
|
| 700 |
+
endif
|
| 701 |
+
|
| 702 |
+
### 3.4 Bits
|
| 703 |
+
ifeq ($(bits),64)
|
| 704 |
+
CXXFLAGS += -DIS_64BIT
|
| 705 |
+
endif
|
| 706 |
+
|
| 707 |
+
### 3.5 prefetch and popcount
|
| 708 |
+
ifeq ($(prefetch),yes)
|
| 709 |
+
ifeq ($(sse),yes)
|
| 710 |
+
CXXFLAGS += -msse
|
| 711 |
+
endif
|
| 712 |
+
else
|
| 713 |
+
CXXFLAGS += -DNO_PREFETCH
|
| 714 |
+
endif
|
| 715 |
+
|
| 716 |
+
ifeq ($(popcnt),yes)
|
| 717 |
+
ifeq ($(arch),$(filter $(arch),ppc64 ppc64-altivec ppc64-vsx armv7 armv8 arm64))
|
| 718 |
+
CXXFLAGS += -DUSE_POPCNT
|
| 719 |
+
else
|
| 720 |
+
CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT
|
| 721 |
+
endif
|
| 722 |
+
endif
|
| 723 |
+
|
| 724 |
+
### 3.6 SIMD architectures
|
| 725 |
+
ifeq ($(avx2),yes)
|
| 726 |
+
CXXFLAGS += -DUSE_AVX2
|
| 727 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 728 |
+
CXXFLAGS += -mavx2 -mbmi
|
| 729 |
+
endif
|
| 730 |
+
endif
|
| 731 |
+
|
| 732 |
+
ifeq ($(avxvnni),yes)
|
| 733 |
+
CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI
|
| 734 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 735 |
+
CXXFLAGS += -mavxvnni
|
| 736 |
+
endif
|
| 737 |
+
endif
|
| 738 |
+
|
| 739 |
+
ifeq ($(avx512),yes)
|
| 740 |
+
CXXFLAGS += -DUSE_AVX512
|
| 741 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 742 |
+
CXXFLAGS += -mavx512f -mavx512bw -mavx512dq -mavx512vl
|
| 743 |
+
endif
|
| 744 |
+
endif
|
| 745 |
+
|
| 746 |
+
ifeq ($(vnni512),yes)
|
| 747 |
+
CXXFLAGS += -DUSE_VNNI
|
| 748 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 749 |
+
CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl
|
| 750 |
+
endif
|
| 751 |
+
endif
|
| 752 |
+
|
| 753 |
+
ifeq ($(avx512icl),yes)
|
| 754 |
+
CXXFLAGS += -DUSE_AVX512 -DUSE_VNNI -DUSE_AVX512ICL
|
| 755 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 756 |
+
CXXFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx512vpopcntdq -mavx512bitalg -mavx512vnni -mvpclmulqdq -mgfni -mvaes
|
| 757 |
+
endif
|
| 758 |
+
endif
|
| 759 |
+
|
| 760 |
+
ifeq ($(sse41),yes)
|
| 761 |
+
CXXFLAGS += -DUSE_SSE41
|
| 762 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 763 |
+
CXXFLAGS += -msse4.1
|
| 764 |
+
endif
|
| 765 |
+
endif
|
| 766 |
+
|
| 767 |
+
ifeq ($(ssse3),yes)
|
| 768 |
+
CXXFLAGS += -DUSE_SSSE3
|
| 769 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 770 |
+
CXXFLAGS += -mssse3
|
| 771 |
+
endif
|
| 772 |
+
endif
|
| 773 |
+
|
| 774 |
+
ifeq ($(sse2),yes)
|
| 775 |
+
CXXFLAGS += -DUSE_SSE2
|
| 776 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 777 |
+
CXXFLAGS += -msse2
|
| 778 |
+
endif
|
| 779 |
+
endif
|
| 780 |
+
|
| 781 |
+
ifeq ($(mmx),yes)
|
| 782 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 783 |
+
CXXFLAGS += -mmmx
|
| 784 |
+
endif
|
| 785 |
+
endif
|
| 786 |
+
|
| 787 |
+
ifeq ($(altivec),yes)
|
| 788 |
+
CXXFLAGS += -maltivec
|
| 789 |
+
ifeq ($(COMP),gcc)
|
| 790 |
+
CXXFLAGS += -mabi=altivec
|
| 791 |
+
endif
|
| 792 |
+
endif
|
| 793 |
+
|
| 794 |
+
ifeq ($(vsx),yes)
|
| 795 |
+
CXXFLAGS += -mvsx
|
| 796 |
+
ifeq ($(COMP),gcc)
|
| 797 |
+
CXXFLAGS += -DNO_WARN_X86_INTRINSICS -DUSE_SSE2
|
| 798 |
+
endif
|
| 799 |
+
endif
|
| 800 |
+
|
| 801 |
+
ifeq ($(neon),yes)
|
| 802 |
+
CXXFLAGS += -DUSE_NEON=$(arm_version)
|
| 803 |
+
ifeq ($(KERNEL),Linux)
|
| 804 |
+
ifneq ($(COMP),ndk)
|
| 805 |
+
ifneq ($(arch),armv8)
|
| 806 |
+
CXXFLAGS += -mfpu=neon
|
| 807 |
+
endif
|
| 808 |
+
endif
|
| 809 |
+
endif
|
| 810 |
+
endif
|
| 811 |
+
|
| 812 |
+
ifeq ($(dotprod),yes)
|
| 813 |
+
CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD
|
| 814 |
+
endif
|
| 815 |
+
|
| 816 |
+
ifeq ($(lasx),yes)
|
| 817 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 818 |
+
CXXFLAGS += -mlasx
|
| 819 |
+
endif
|
| 820 |
+
endif
|
| 821 |
+
|
| 822 |
+
ifeq ($(lsx),yes)
|
| 823 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 824 |
+
CXXFLAGS += -mlsx
|
| 825 |
+
endif
|
| 826 |
+
endif
|
| 827 |
+
|
| 828 |
+
### 3.7 pext
|
| 829 |
+
ifeq ($(pext),yes)
|
| 830 |
+
CXXFLAGS += -DUSE_PEXT
|
| 831 |
+
ifeq ($(comp),$(filter $(comp),gcc clang mingw icx))
|
| 832 |
+
CXXFLAGS += -mbmi2
|
| 833 |
+
endif
|
| 834 |
+
endif
|
| 835 |
+
|
| 836 |
+
### 3.8.1 Try to include git commit sha for versioning
|
| 837 |
+
GIT_SHA := $(shell git rev-parse HEAD 2>/dev/null | cut -c 1-8)
|
| 838 |
+
ifneq ($(GIT_SHA), )
|
| 839 |
+
CXXFLAGS += -DGIT_SHA=$(GIT_SHA)
|
| 840 |
+
endif
|
| 841 |
+
|
| 842 |
+
### 3.8.2 Try to include git commit date for versioning
|
| 843 |
+
GIT_DATE := $(shell git show -s --date=format:'%Y%m%d' --format=%cd HEAD 2>/dev/null)
|
| 844 |
+
ifneq ($(GIT_DATE), )
|
| 845 |
+
CXXFLAGS += -DGIT_DATE=$(GIT_DATE)
|
| 846 |
+
endif
|
| 847 |
+
|
| 848 |
+
### 3.8.3 Try to include architecture
|
| 849 |
+
ifneq ($(ARCH), )
|
| 850 |
+
CXXFLAGS += -DARCH=$(ARCH)
|
| 851 |
+
endif
|
| 852 |
+
|
| 853 |
+
### 3.9 Link Time Optimization
|
| 854 |
+
### This is a mix of compile and link time options because the lto link phase
|
| 855 |
+
### needs access to the optimization flags.
|
| 856 |
+
ifeq ($(optimize),yes)
|
| 857 |
+
ifeq ($(debug),no)
|
| 858 |
+
ifneq ($(KERNEL),Darwin)
|
| 859 |
+
LLD_BIN := $(shell command -v ld.lld 2>/dev/null)
|
| 860 |
+
ifeq ($(LLD_BIN),)
|
| 861 |
+
LLD_BIN := $(shell command -v lld 2>/dev/null)
|
| 862 |
+
endif
|
| 863 |
+
ifneq ($(LLD_BIN),)
|
| 864 |
+
ifeq ($(comp),clang)
|
| 865 |
+
LDFLAGS += -fuse-ld=lld
|
| 866 |
+
else ifeq ($(comp),gcc)
|
| 867 |
+
ifneq ($(gccisclang),)
|
| 868 |
+
LDFLAGS += -fuse-ld=lld
|
| 869 |
+
endif
|
| 870 |
+
endif
|
| 871 |
+
endif
|
| 872 |
+
endif
|
| 873 |
+
|
| 874 |
+
ifeq ($(comp),$(filter $(comp),clang icx))
|
| 875 |
+
CXXFLAGS += -flto=full
|
| 876 |
+
ifeq ($(comp),icx)
|
| 877 |
+
CXXFLAGS += -fwhole-program-vtables
|
| 878 |
+
endif
|
| 879 |
+
LDFLAGS += $(CXXFLAGS)
|
| 880 |
+
|
| 881 |
+
# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
|
| 882 |
+
# GCC on some systems.
|
| 883 |
+
else ifeq ($(comp),gcc)
|
| 884 |
+
ifeq ($(gccisclang),)
|
| 885 |
+
CXXFLAGS += -flto -flto-partition=one
|
| 886 |
+
LDFLAGS += $(CXXFLAGS) -flto=jobserver
|
| 887 |
+
else
|
| 888 |
+
CXXFLAGS += -flto=full
|
| 889 |
+
LDFLAGS += $(CXXFLAGS)
|
| 890 |
+
endif
|
| 891 |
+
|
| 892 |
+
# To use LTO and static linking on Windows,
|
| 893 |
+
# the tool chain requires gcc version 10.1 or later.
|
| 894 |
+
else ifeq ($(comp),mingw)
|
| 895 |
+
CXXFLAGS += -flto -flto-partition=one
|
| 896 |
+
LDFLAGS += $(CXXFLAGS) -save-temps
|
| 897 |
+
endif
|
| 898 |
+
endif
|
| 899 |
+
endif
|
| 900 |
+
|
| 901 |
+
### 3.10 Android 5 can only run position independent executables. Note that this
|
| 902 |
+
### breaks Android 4.0 and earlier.
|
| 903 |
+
ifeq ($(OS), Android)
|
| 904 |
+
CXXFLAGS += -fPIE
|
| 905 |
+
LDFLAGS += -fPIE -pie
|
| 906 |
+
endif
|
| 907 |
+
|
| 908 |
+
### 3.11 Inline settings
|
| 909 |
+
ifeq ($(optimize), yes)
|
| 910 |
+
ifeq ($(comp), clang)
|
| 911 |
+
CXXFLAGS += -Xclang -mllvm -Xclang -inline-threshold=500
|
| 912 |
+
endif
|
| 913 |
+
endif
|
| 914 |
+
|
| 915 |
+
### ==========================================================================
|
| 916 |
+
### Section 4. Public Targets
|
| 917 |
+
### ==========================================================================
|
| 918 |
+
|
| 919 |
+
help:
|
| 920 |
+
@echo "" && \
|
| 921 |
+
echo "To compile stockfish, type: " && \
|
| 922 |
+
echo "" && \
|
| 923 |
+
echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" && \
|
| 924 |
+
echo "" && \
|
| 925 |
+
echo "Supported targets:" && \
|
| 926 |
+
echo "" && \
|
| 927 |
+
echo "help > Display architecture details" && \
|
| 928 |
+
echo "profile-build > standard build with profile-guided optimization" && \
|
| 929 |
+
echo "build > skip profile-guided optimization" && \
|
| 930 |
+
echo "net > Download the default nnue nets" && \
|
| 931 |
+
echo "strip > Strip executable" && \
|
| 932 |
+
echo "install > Install executable" && \
|
| 933 |
+
echo "clean > Clean up" && \
|
| 934 |
+
echo "" && \
|
| 935 |
+
echo "Supported archs:" && \
|
| 936 |
+
echo "" && \
|
| 937 |
+
echo "native > select the best architecture for the host processor (default)" && \
|
| 938 |
+
echo "x86-64-avx512icl > x86 64-bit with minimum avx512 support of Intel Ice Lake or AMD Zen 4" && \
|
| 939 |
+
echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" && \
|
| 940 |
+
echo "x86-64-avx512 > x86 64-bit with avx512 support" && \
|
| 941 |
+
echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" && \
|
| 942 |
+
echo "x86-64-bmi2 > x86 64-bit with bmi2 support" && \
|
| 943 |
+
echo "x86-64-avx2 > x86 64-bit with avx2 support" && \
|
| 944 |
+
echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" && \
|
| 945 |
+
echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" && \
|
| 946 |
+
echo "x86-64-ssse3 > x86 64-bit with ssse3 support" && \
|
| 947 |
+
echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" && \
|
| 948 |
+
echo "x86-64 > x86 64-bit generic (with sse2 support)" && \
|
| 949 |
+
echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" && \
|
| 950 |
+
echo "x86-32-sse2 > x86 32-bit with sse2 support" && \
|
| 951 |
+
echo "x86-32 > x86 32-bit generic (with mmx compile support)" && \
|
| 952 |
+
echo "ppc-64 > PPC 64-bit" && \
|
| 953 |
+
echo "ppc-64-altivec > PPC 64-bit with altivec support" && \
|
| 954 |
+
echo "ppc-64-vsx > PPC 64-bit with vsx support" && \
|
| 955 |
+
echo "ppc-32 > PPC 32-bit" && \
|
| 956 |
+
echo "armv7 > ARMv7 32-bit" && \
|
| 957 |
+
echo "armv7-neon > ARMv7 32-bit with popcnt and neon" && \
|
| 958 |
+
echo "armv8 > ARMv8 64-bit with popcnt and neon" && \
|
| 959 |
+
echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" && \
|
| 960 |
+
echo "e2k > Elbrus 2000" && \
|
| 961 |
+
echo "apple-silicon > Apple silicon ARM64" && \
|
| 962 |
+
echo "general-64 > unspecified 64-bit" && \
|
| 963 |
+
echo "general-32 > unspecified 32-bit" && \
|
| 964 |
+
echo "riscv64 > RISC-V 64-bit" && \
|
| 965 |
+
echo "loongarch64 > LoongArch 64-bit" && \
|
| 966 |
+
echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" && \
|
| 967 |
+
echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" && \
|
| 968 |
+
echo "" && \
|
| 969 |
+
echo "Supported compilers:" && \
|
| 970 |
+
echo "" && \
|
| 971 |
+
echo "gcc > GNU compiler (default)" && \
|
| 972 |
+
echo "mingw > GNU compiler with MinGW under Windows" && \
|
| 973 |
+
echo "clang > LLVM Clang compiler" && \
|
| 974 |
+
echo "icx > Intel oneAPI DPC++/C++ Compiler" && \
|
| 975 |
+
echo "ndk > Google NDK to cross-compile for Android" && \
|
| 976 |
+
echo "" && \
|
| 977 |
+
echo "Simple examples. If you don't know what to do, you likely want to run one of: " && \
|
| 978 |
+
echo "" && \
|
| 979 |
+
echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " && \
|
| 980 |
+
echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " && \
|
| 981 |
+
echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " && \
|
| 982 |
+
echo "" && \
|
| 983 |
+
echo "Advanced examples, for experienced users: " && \
|
| 984 |
+
echo "" && \
|
| 985 |
+
echo "make -j profile-build ARCH=x86-64-avxvnni" && \
|
| 986 |
+
echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" && \
|
| 987 |
+
echo "make -j build ARCH=x86-64-ssse3 COMP=clang" && \
|
| 988 |
+
echo ""
|
| 989 |
+
ifneq ($(SUPPORTED_ARCH), true)
|
| 990 |
+
@echo "Specify a supported architecture with the ARCH option for more details"
|
| 991 |
+
@echo ""
|
| 992 |
+
endif
|
| 993 |
+
|
| 994 |
+
|
| 995 |
+
.PHONY: help analyze build profile-build strip install clean net \
|
| 996 |
+
objclean profileclean config-sanity \
|
| 997 |
+
icx-profile-use icx-profile-make \
|
| 998 |
+
gcc-profile-use gcc-profile-make \
|
| 999 |
+
clang-profile-use clang-profile-make FORCE \
|
| 1000 |
+
format analyze
|
| 1001 |
+
|
| 1002 |
+
analyze: net config-sanity objclean
|
| 1003 |
+
$(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS)
|
| 1004 |
+
|
| 1005 |
+
build: net config-sanity
|
| 1006 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
|
| 1007 |
+
|
| 1008 |
+
profile-build: net config-sanity objclean profileclean
|
| 1009 |
+
@echo ""
|
| 1010 |
+
@echo "Step 1/4. Building instrumented executable ..."
|
| 1011 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
|
| 1012 |
+
@echo ""
|
| 1013 |
+
@echo "Step 2/4. Running benchmark for pgo-build ..."
|
| 1014 |
+
$(PGOBENCH) > PGOBENCH.out 2>&1
|
| 1015 |
+
tail -n 4 PGOBENCH.out
|
| 1016 |
+
@echo ""
|
| 1017 |
+
@echo "Step 3/4. Building optimized executable ..."
|
| 1018 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
|
| 1019 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use)
|
| 1020 |
+
@echo ""
|
| 1021 |
+
@echo "Step 4/4. Deleting profile data ..."
|
| 1022 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
|
| 1023 |
+
|
| 1024 |
+
strip:
|
| 1025 |
+
$(STRIP) $(EXE)
|
| 1026 |
+
|
| 1027 |
+
install:
|
| 1028 |
+
-mkdir -p -m 755 $(BINDIR)
|
| 1029 |
+
-cp $(EXE) $(BINDIR)
|
| 1030 |
+
$(STRIP) $(BINDIR)/$(EXE)
|
| 1031 |
+
|
| 1032 |
+
# clean all
|
| 1033 |
+
clean: objclean profileclean
|
| 1034 |
+
@rm -f .depend *~ core
|
| 1035 |
+
|
| 1036 |
+
# clean binaries and objects
|
| 1037 |
+
objclean:
|
| 1038 |
+
@rm -f stockfish stockfish.exe *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o
|
| 1039 |
+
|
| 1040 |
+
# clean auxiliary profiling files
|
| 1041 |
+
profileclean:
|
| 1042 |
+
@rm -rf profdir
|
| 1043 |
+
@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s PGOBENCH.out
|
| 1044 |
+
@rm -f stockfish.profdata *.profraw
|
| 1045 |
+
@rm -f stockfish.*args*
|
| 1046 |
+
@rm -f stockfish.*lt*
|
| 1047 |
+
@rm -f stockfish.res
|
| 1048 |
+
@rm -f ./-lstdc++.res
|
| 1049 |
+
|
| 1050 |
+
# evaluation network (nnue)
|
| 1051 |
+
net:
|
| 1052 |
+
@$(SHELL) ../scripts/net.sh
|
| 1053 |
+
|
| 1054 |
+
format:
|
| 1055 |
+
$(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file
|
| 1056 |
+
|
| 1057 |
+
### ==========================================================================
|
| 1058 |
+
### Section 5. Private Targets
|
| 1059 |
+
### ==========================================================================
|
| 1060 |
+
|
| 1061 |
+
all: $(EXE) .depend
|
| 1062 |
+
|
| 1063 |
+
config-sanity: net
|
| 1064 |
+
@echo ""
|
| 1065 |
+
@echo "Config:" && \
|
| 1066 |
+
echo "debug: '$(debug)'" && \
|
| 1067 |
+
echo "sanitize: '$(sanitize)'" && \
|
| 1068 |
+
echo "optimize: '$(optimize)'" && \
|
| 1069 |
+
echo "arch: '$(arch)'" && \
|
| 1070 |
+
echo "bits: '$(bits)'" && \
|
| 1071 |
+
echo "kernel: '$(KERNEL)'" && \
|
| 1072 |
+
echo "os: '$(OS)'" && \
|
| 1073 |
+
echo "prefetch: '$(prefetch)'" && \
|
| 1074 |
+
echo "popcnt: '$(popcnt)'" && \
|
| 1075 |
+
echo "pext: '$(pext)'" && \
|
| 1076 |
+
echo "sse: '$(sse)'" && \
|
| 1077 |
+
echo "mmx: '$(mmx)'" && \
|
| 1078 |
+
echo "sse2: '$(sse2)'" && \
|
| 1079 |
+
echo "ssse3: '$(ssse3)'" && \
|
| 1080 |
+
echo "sse41: '$(sse41)'" && \
|
| 1081 |
+
echo "avx2: '$(avx2)'" && \
|
| 1082 |
+
echo "avxvnni: '$(avxvnni)'" && \
|
| 1083 |
+
echo "avx512: '$(avx512)'" && \
|
| 1084 |
+
echo "vnni512: '$(vnni512)'" && \
|
| 1085 |
+
echo "avx512icl: '$(avx512icl)'" && \
|
| 1086 |
+
echo "altivec: '$(altivec)'" && \
|
| 1087 |
+
echo "vsx: '$(vsx)'" && \
|
| 1088 |
+
echo "neon: '$(neon)'" && \
|
| 1089 |
+
echo "dotprod: '$(dotprod)'" && \
|
| 1090 |
+
echo "arm_version: '$(arm_version)'" && \
|
| 1091 |
+
echo "lsx: '$(lsx)'" && \
|
| 1092 |
+
echo "lasx: '$(lasx)'" && \
|
| 1093 |
+
echo "target_windows: '$(target_windows)'" && \
|
| 1094 |
+
echo "" && \
|
| 1095 |
+
echo "Flags:" && \
|
| 1096 |
+
echo "CXX: $(CXX)" && \
|
| 1097 |
+
echo "CXXFLAGS: $(CXXFLAGS)" && \
|
| 1098 |
+
echo "LDFLAGS: $(LDFLAGS)" && \
|
| 1099 |
+
echo "" && \
|
| 1100 |
+
echo "Testing config sanity. If this fails, try 'make help' ..." && \
|
| 1101 |
+
echo "" && \
|
| 1102 |
+
(test "$(debug)" = "yes" || test "$(debug)" = "no") && \
|
| 1103 |
+
(test "$(optimize)" = "yes" || test "$(optimize)" = "no") && \
|
| 1104 |
+
(test "$(SUPPORTED_ARCH)" = "true") && \
|
| 1105 |
+
(test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
|
| 1106 |
+
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \
|
| 1107 |
+
test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || \
|
| 1108 |
+
test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64") && \
|
| 1109 |
+
(test "$(bits)" = "32" || test "$(bits)" = "64") && \
|
| 1110 |
+
(test "$(prefetch)" = "yes" || test "$(prefetch)" = "no") && \
|
| 1111 |
+
(test "$(popcnt)" = "yes" || test "$(popcnt)" = "no") && \
|
| 1112 |
+
(test "$(pext)" = "yes" || test "$(pext)" = "no") && \
|
| 1113 |
+
(test "$(sse)" = "yes" || test "$(sse)" = "no") && \
|
| 1114 |
+
(test "$(mmx)" = "yes" || test "$(mmx)" = "no") && \
|
| 1115 |
+
(test "$(sse2)" = "yes" || test "$(sse2)" = "no") && \
|
| 1116 |
+
(test "$(ssse3)" = "yes" || test "$(ssse3)" = "no") && \
|
| 1117 |
+
(test "$(sse41)" = "yes" || test "$(sse41)" = "no") && \
|
| 1118 |
+
(test "$(avx2)" = "yes" || test "$(avx2)" = "no") && \
|
| 1119 |
+
(test "$(avx512)" = "yes" || test "$(avx512)" = "no") && \
|
| 1120 |
+
(test "$(vnni512)" = "yes" || test "$(vnni512)" = "no") && \
|
| 1121 |
+
(test "$(avx512icl)" = "yes" || test "$(avx512icl)" = "no") && \
|
| 1122 |
+
(test "$(altivec)" = "yes" || test "$(altivec)" = "no") && \
|
| 1123 |
+
(test "$(vsx)" = "yes" || test "$(vsx)" = "no") && \
|
| 1124 |
+
(test "$(neon)" = "yes" || test "$(neon)" = "no") && \
|
| 1125 |
+
(test "$(lsx)" = "yes" || test "$(lsx)" = "no") && \
|
| 1126 |
+
(test "$(lasx)" = "yes" || test "$(lasx)" = "no") && \
|
| 1127 |
+
(test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || \
|
| 1128 |
+
test "$(comp)" = "clang" || test "$(comp)" = "armv7a-linux-androideabi16-clang" || \
|
| 1129 |
+
test "$(comp)" = "aarch64-linux-android21-clang")
|
| 1130 |
+
|
| 1131 |
+
$(EXE): $(OBJS)
|
| 1132 |
+
+$(CXX) -o $@ $(OBJS) $(LDFLAGS)
|
| 1133 |
+
|
| 1134 |
+
# Force recompilation to ensure version info is up-to-date
|
| 1135 |
+
misc.o: FORCE
|
| 1136 |
+
FORCE:
|
| 1137 |
+
|
| 1138 |
+
clang-profile-make:
|
| 1139 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1140 |
+
EXTRACXXFLAGS='-fprofile-generate ' \
|
| 1141 |
+
EXTRALDFLAGS=' -fprofile-generate' \
|
| 1142 |
+
all
|
| 1143 |
+
|
| 1144 |
+
clang-profile-use:
|
| 1145 |
+
$(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw
|
| 1146 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1147 |
+
EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \
|
| 1148 |
+
EXTRALDFLAGS='-fprofile-use ' \
|
| 1149 |
+
all
|
| 1150 |
+
|
| 1151 |
+
gcc-profile-make:
|
| 1152 |
+
@mkdir -p profdir
|
| 1153 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1154 |
+
EXTRACXXFLAGS='-fprofile-generate=profdir' \
|
| 1155 |
+
EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \
|
| 1156 |
+
EXTRALDFLAGS='-lgcov' \
|
| 1157 |
+
all
|
| 1158 |
+
|
| 1159 |
+
gcc-profile-use:
|
| 1160 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1161 |
+
EXTRACXXFLAGS='-fprofile-use=profdir -fno-peel-loops -fno-tracer' \
|
| 1162 |
+
EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \
|
| 1163 |
+
EXTRALDFLAGS='-lgcov' \
|
| 1164 |
+
all
|
| 1165 |
+
|
| 1166 |
+
icx-profile-make:
|
| 1167 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1168 |
+
EXTRACXXFLAGS='-fprofile-instr-generate ' \
|
| 1169 |
+
EXTRALDFLAGS=' -fprofile-instr-generate' \
|
| 1170 |
+
all
|
| 1171 |
+
|
| 1172 |
+
icx-profile-use:
|
| 1173 |
+
$(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw
|
| 1174 |
+
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
|
| 1175 |
+
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
|
| 1176 |
+
EXTRALDFLAGS='-fprofile-use ' \
|
| 1177 |
+
all
|
| 1178 |
+
|
| 1179 |
+
.depend: $(SRCS)
|
| 1180 |
+
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
|
| 1181 |
+
|
| 1182 |
+
ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean format config-sanity))
|
| 1183 |
+
-include .depend
|
| 1184 |
+
endif
|
src/benchmark.cpp
ADDED
|
@@ -0,0 +1,516 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "benchmark.h"
|
| 20 |
+
#include "numa.h"
|
| 21 |
+
|
| 22 |
+
#include <cstdlib>
|
| 23 |
+
#include <fstream>
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include <vector>
|
| 26 |
+
|
| 27 |
+
namespace {
|
| 28 |
+
|
| 29 |
+
// clang-format off
|
| 30 |
+
const std::vector<std::string> Defaults = {
|
| 31 |
+
"setoption name UCI_Chess960 value false",
|
| 32 |
+
"rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1",
|
| 33 |
+
"r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10",
|
| 34 |
+
"8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11",
|
| 35 |
+
"4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19",
|
| 36 |
+
"rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves d4e6",
|
| 37 |
+
"r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves g2g4",
|
| 38 |
+
"r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15",
|
| 39 |
+
"r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13",
|
| 40 |
+
"r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16",
|
| 41 |
+
"4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17",
|
| 42 |
+
"2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11",
|
| 43 |
+
"r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16",
|
| 44 |
+
"3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22",
|
| 45 |
+
"r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18",
|
| 46 |
+
"4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22",
|
| 47 |
+
"3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26",
|
| 48 |
+
"6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1",
|
| 49 |
+
"3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1",
|
| 50 |
+
"2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3",
|
| 51 |
+
"8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1",
|
| 52 |
+
"7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1",
|
| 53 |
+
"8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1",
|
| 54 |
+
"8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1",
|
| 55 |
+
"8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1",
|
| 56 |
+
"8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1",
|
| 57 |
+
"5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1",
|
| 58 |
+
"6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1",
|
| 59 |
+
"1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1",
|
| 60 |
+
"6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1",
|
| 61 |
+
"8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1",
|
| 62 |
+
"5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90",
|
| 63 |
+
"4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21",
|
| 64 |
+
"r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16",
|
| 65 |
+
"3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40",
|
| 66 |
+
"4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1",
|
| 67 |
+
"1r6/1P4bk/3qr1p1/N6p/3pp2P/6R1/3Q1PP1/1R4K1 w - - 1 42",
|
| 68 |
+
|
| 69 |
+
// Positions with high numbers of changed threats
|
| 70 |
+
"k7/2n1n3/1nbNbn2/2NbRBn1/1nbRQR2/2NBRBN1/3N1N2/7K w - - 0 1",
|
| 71 |
+
"K7/8/8/BNQNQNB1/N5N1/R1Q1q2r/n5n1/bnqnqnbk w - - 0 1",
|
| 72 |
+
|
| 73 |
+
// 5-man positions
|
| 74 |
+
"8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate
|
| 75 |
+
"8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate
|
| 76 |
+
"8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw
|
| 77 |
+
|
| 78 |
+
// 6-man positions
|
| 79 |
+
"8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", // Re5 - mate
|
| 80 |
+
"8/2p4P/8/kr6/6R1/8/8/1K6 w - - 0 1", // Ka2 - mate
|
| 81 |
+
"8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", // Nd2 - draw
|
| 82 |
+
|
| 83 |
+
// 7-man positions
|
| 84 |
+
"8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw
|
| 85 |
+
|
| 86 |
+
// Mate and stalemate positions
|
| 87 |
+
"6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1",
|
| 88 |
+
"r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1",
|
| 89 |
+
"8/8/8/8/8/6k1/6p1/6K1 w - -",
|
| 90 |
+
"7k/7P/6K1/8/3B4/8/8/8 b - -",
|
| 91 |
+
|
| 92 |
+
// Chess 960
|
| 93 |
+
"setoption name UCI_Chess960 value true",
|
| 94 |
+
"bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
|
| 95 |
+
"nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
|
| 96 |
+
"setoption name UCI_Chess960 value false"
|
| 97 |
+
};
|
| 98 |
+
// clang-format on
|
| 99 |
+
|
| 100 |
+
// clang-format off
|
| 101 |
+
// human-randomly picked 5 games with <60 moves from
|
| 102 |
+
// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0
|
| 103 |
+
// only moves for one side
|
| 104 |
+
const std::vector<std::vector<std::string>> BenchmarkPositions = {
|
| 105 |
+
{
|
| 106 |
+
"rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8",
|
| 107 |
+
"rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9",
|
| 108 |
+
"r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10",
|
| 109 |
+
"r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11",
|
| 110 |
+
"r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12",
|
| 111 |
+
"r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13",
|
| 112 |
+
"r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14",
|
| 113 |
+
"r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15",
|
| 114 |
+
"r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16",
|
| 115 |
+
"r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17",
|
| 116 |
+
"r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18",
|
| 117 |
+
"r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19",
|
| 118 |
+
"1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20",
|
| 119 |
+
"1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21",
|
| 120 |
+
"1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22",
|
| 121 |
+
"1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23",
|
| 122 |
+
"1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24",
|
| 123 |
+
"1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25",
|
| 124 |
+
"1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26",
|
| 125 |
+
"1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27",
|
| 126 |
+
"1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28",
|
| 127 |
+
"1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29",
|
| 128 |
+
"1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30",
|
| 129 |
+
"1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31",
|
| 130 |
+
"3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32",
|
| 131 |
+
"3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33",
|
| 132 |
+
"8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34",
|
| 133 |
+
"8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35",
|
| 134 |
+
"8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36",
|
| 135 |
+
"1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37",
|
| 136 |
+
"8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38",
|
| 137 |
+
"1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39",
|
| 138 |
+
"1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40",
|
| 139 |
+
"1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41",
|
| 140 |
+
"5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42",
|
| 141 |
+
"5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43",
|
| 142 |
+
"5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44",
|
| 143 |
+
"5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45",
|
| 144 |
+
"8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46",
|
| 145 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47",
|
| 146 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48",
|
| 147 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49",
|
| 148 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50",
|
| 149 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51",
|
| 150 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52",
|
| 151 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53",
|
| 152 |
+
"3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54",
|
| 153 |
+
"3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55",
|
| 154 |
+
"3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56",
|
| 155 |
+
"8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57",
|
| 156 |
+
"3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58",
|
| 157 |
+
"8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59",
|
| 158 |
+
"8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60",
|
| 159 |
+
"8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61",
|
| 160 |
+
"8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62",
|
| 161 |
+
"8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63",
|
| 162 |
+
"4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64",
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6",
|
| 166 |
+
"r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7",
|
| 167 |
+
"r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8",
|
| 168 |
+
"r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9",
|
| 169 |
+
"r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10",
|
| 170 |
+
"r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11",
|
| 171 |
+
"3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12",
|
| 172 |
+
"q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13",
|
| 173 |
+
"r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14",
|
| 174 |
+
"r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15",
|
| 175 |
+
"r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16",
|
| 176 |
+
"r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17",
|
| 177 |
+
"r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18",
|
| 178 |
+
"r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19",
|
| 179 |
+
"r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20",
|
| 180 |
+
"r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21",
|
| 181 |
+
"rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22",
|
| 182 |
+
"1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23",
|
| 183 |
+
"1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24",
|
| 184 |
+
"8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25",
|
| 185 |
+
"8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26",
|
| 186 |
+
"2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27",
|
| 187 |
+
"8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28",
|
| 188 |
+
"8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29",
|
| 189 |
+
"8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30",
|
| 190 |
+
"8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31",
|
| 191 |
+
"8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32",
|
| 192 |
+
"8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33",
|
| 193 |
+
"8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34",
|
| 194 |
+
"8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35",
|
| 195 |
+
"8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36",
|
| 196 |
+
"8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37",
|
| 197 |
+
"8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38",
|
| 198 |
+
"8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39",
|
| 199 |
+
"8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40",
|
| 200 |
+
"8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41",
|
| 201 |
+
"8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42",
|
| 202 |
+
"8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43",
|
| 203 |
+
"8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44",
|
| 204 |
+
"8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45",
|
| 205 |
+
"8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46",
|
| 206 |
+
"8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47",
|
| 207 |
+
"8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48",
|
| 208 |
+
"8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49",
|
| 209 |
+
"2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50",
|
| 210 |
+
"2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51",
|
| 211 |
+
"2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52",
|
| 212 |
+
"3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53",
|
| 213 |
+
"3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54",
|
| 214 |
+
"3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55",
|
| 215 |
+
"3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56",
|
| 216 |
+
"3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57",
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8",
|
| 220 |
+
"rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9",
|
| 221 |
+
"rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10",
|
| 222 |
+
"r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11",
|
| 223 |
+
"r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12",
|
| 224 |
+
"r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13",
|
| 225 |
+
"r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14",
|
| 226 |
+
"r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15",
|
| 227 |
+
"r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16",
|
| 228 |
+
"r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17",
|
| 229 |
+
"r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18",
|
| 230 |
+
"r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19",
|
| 231 |
+
"r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20",
|
| 232 |
+
"r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21",
|
| 233 |
+
"r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22",
|
| 234 |
+
"r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23",
|
| 235 |
+
"r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24",
|
| 236 |
+
"r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25",
|
| 237 |
+
"r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26",
|
| 238 |
+
"r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27",
|
| 239 |
+
"5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28",
|
| 240 |
+
"5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29",
|
| 241 |
+
"4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30",
|
| 242 |
+
"4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31",
|
| 243 |
+
"4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32",
|
| 244 |
+
"4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33",
|
| 245 |
+
"4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34",
|
| 246 |
+
"4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35",
|
| 247 |
+
"4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36",
|
| 248 |
+
"4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37",
|
| 249 |
+
"4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38",
|
| 250 |
+
"5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39",
|
| 251 |
+
"5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40",
|
| 252 |
+
"5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41",
|
| 253 |
+
"5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42",
|
| 254 |
+
"1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43",
|
| 255 |
+
"1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44",
|
| 256 |
+
"1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45",
|
| 257 |
+
"1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46",
|
| 258 |
+
"1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47",
|
| 259 |
+
"5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48",
|
| 260 |
+
"5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49",
|
| 261 |
+
"5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50",
|
| 262 |
+
"8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51",
|
| 263 |
+
"8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52",
|
| 264 |
+
"8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53",
|
| 265 |
+
"8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54",
|
| 266 |
+
"8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55",
|
| 267 |
+
"8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56",
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7",
|
| 271 |
+
"r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8",
|
| 272 |
+
"r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9",
|
| 273 |
+
"r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10",
|
| 274 |
+
"r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11",
|
| 275 |
+
"r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12",
|
| 276 |
+
"r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13",
|
| 277 |
+
"r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14",
|
| 278 |
+
"r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15",
|
| 279 |
+
"r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16",
|
| 280 |
+
"r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17",
|
| 281 |
+
"r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18",
|
| 282 |
+
"r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19",
|
| 283 |
+
"r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20",
|
| 284 |
+
"r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21",
|
| 285 |
+
"2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22",
|
| 286 |
+
"2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23",
|
| 287 |
+
"2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24",
|
| 288 |
+
"2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25",
|
| 289 |
+
"2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26",
|
| 290 |
+
"r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27",
|
| 291 |
+
"rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28",
|
| 292 |
+
"rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29",
|
| 293 |
+
"rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30",
|
| 294 |
+
"rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31",
|
| 295 |
+
"rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32",
|
| 296 |
+
"rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33",
|
| 297 |
+
"rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34",
|
| 298 |
+
"rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35",
|
| 299 |
+
"1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36",
|
| 300 |
+
"1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37",
|
| 301 |
+
"1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38",
|
| 302 |
+
"1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39",
|
| 303 |
+
"1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40",
|
| 304 |
+
"2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41",
|
| 305 |
+
"1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42",
|
| 306 |
+
"2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43",
|
| 307 |
+
"1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44",
|
| 308 |
+
"1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45",
|
| 309 |
+
"2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46",
|
| 310 |
+
"Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47",
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6",
|
| 314 |
+
"r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7",
|
| 315 |
+
"r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8",
|
| 316 |
+
"r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9",
|
| 317 |
+
"r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10",
|
| 318 |
+
"r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11",
|
| 319 |
+
"r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12",
|
| 320 |
+
"r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13",
|
| 321 |
+
"r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14",
|
| 322 |
+
"r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15",
|
| 323 |
+
"r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16",
|
| 324 |
+
"2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17",
|
| 325 |
+
"2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18",
|
| 326 |
+
"2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19",
|
| 327 |
+
"2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20",
|
| 328 |
+
"2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21",
|
| 329 |
+
"2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22",
|
| 330 |
+
"2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23",
|
| 331 |
+
"2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24",
|
| 332 |
+
"2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25",
|
| 333 |
+
"2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26",
|
| 334 |
+
"3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27",
|
| 335 |
+
"3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28",
|
| 336 |
+
"3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29",
|
| 337 |
+
"3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30",
|
| 338 |
+
"3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31",
|
| 339 |
+
"3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32",
|
| 340 |
+
"6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33",
|
| 341 |
+
"6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34",
|
| 342 |
+
"6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35",
|
| 343 |
+
"6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36",
|
| 344 |
+
"6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37",
|
| 345 |
+
"6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38",
|
| 346 |
+
"6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39",
|
| 347 |
+
"6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40",
|
| 348 |
+
"6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41",
|
| 349 |
+
"6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42",
|
| 350 |
+
"6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43",
|
| 351 |
+
"6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44",
|
| 352 |
+
"8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45",
|
| 353 |
+
"3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46",
|
| 354 |
+
"8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47",
|
| 355 |
+
"8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48",
|
| 356 |
+
"8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49",
|
| 357 |
+
"8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50",
|
| 358 |
+
"8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51",
|
| 359 |
+
"8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52",
|
| 360 |
+
"2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53",
|
| 361 |
+
"6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54",
|
| 362 |
+
"R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55",
|
| 363 |
+
"R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56",
|
| 364 |
+
"8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57",
|
| 365 |
+
"8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58",
|
| 366 |
+
"8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59",
|
| 367 |
+
"8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60",
|
| 368 |
+
"8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61",
|
| 369 |
+
"8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62",
|
| 370 |
+
"8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63",
|
| 371 |
+
"8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64",
|
| 372 |
+
}
|
| 373 |
+
};
|
| 374 |
+
// clang-format on
|
| 375 |
+
|
| 376 |
+
} // namespace
|
| 377 |
+
|
| 378 |
+
namespace Stockfish::Benchmark {
|
| 379 |
+
|
| 380 |
+
// Builds a list of UCI commands to be run by bench. There
|
| 381 |
+
// are five parameters: TT size in MB, number of search threads that
|
| 382 |
+
// should be used, the limit value spent for each position, a file name
|
| 383 |
+
// where to look for positions in FEN format, and the type of the limit:
|
| 384 |
+
// depth, perft, nodes and movetime (in milliseconds). Examples:
|
| 385 |
+
//
|
| 386 |
+
// bench : search default positions up to depth 13
|
| 387 |
+
// bench 64 1 15 : search default positions up to depth 15 (TT = 64MB)
|
| 388 |
+
// bench 64 1 100000 default nodes : search default positions for 100K nodes each
|
| 389 |
+
// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec
|
| 390 |
+
// bench 16 1 5 blah perft : run a perft 5 on positions in file "blah"
|
| 391 |
+
std::vector<std::string> setup_bench(const std::string& currentFen, std::istream& is) {
|
| 392 |
+
|
| 393 |
+
std::vector<std::string> fens, list;
|
| 394 |
+
std::string go, token;
|
| 395 |
+
|
| 396 |
+
// Assign default values to missing arguments
|
| 397 |
+
std::string ttSize = (is >> token) ? token : "16";
|
| 398 |
+
std::string threads = (is >> token) ? token : "1";
|
| 399 |
+
std::string limit = (is >> token) ? token : "13";
|
| 400 |
+
std::string fenFile = (is >> token) ? token : "default";
|
| 401 |
+
std::string limitType = (is >> token) ? token : "depth";
|
| 402 |
+
|
| 403 |
+
go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
|
| 404 |
+
|
| 405 |
+
if (fenFile == "default")
|
| 406 |
+
fens = Defaults;
|
| 407 |
+
|
| 408 |
+
else if (fenFile == "current")
|
| 409 |
+
fens.push_back(currentFen);
|
| 410 |
+
|
| 411 |
+
else
|
| 412 |
+
{
|
| 413 |
+
std::string fen;
|
| 414 |
+
std::ifstream file(fenFile);
|
| 415 |
+
|
| 416 |
+
if (!file.is_open())
|
| 417 |
+
{
|
| 418 |
+
std::cerr << "Unable to open file " << fenFile << std::endl;
|
| 419 |
+
exit(EXIT_FAILURE);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
while (getline(file, fen))
|
| 423 |
+
if (!fen.empty())
|
| 424 |
+
fens.push_back(fen);
|
| 425 |
+
|
| 426 |
+
file.close();
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
list.emplace_back("setoption name Threads value " + threads);
|
| 430 |
+
list.emplace_back("setoption name Hash value " + ttSize);
|
| 431 |
+
list.emplace_back("ucinewgame");
|
| 432 |
+
|
| 433 |
+
for (const std::string& fen : fens)
|
| 434 |
+
if (fen.find("setoption") != std::string::npos)
|
| 435 |
+
list.emplace_back(fen);
|
| 436 |
+
else
|
| 437 |
+
{
|
| 438 |
+
list.emplace_back("position fen " + fen);
|
| 439 |
+
list.emplace_back(go);
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
return list;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
BenchmarkSetup setup_benchmark(std::istream& is) {
|
| 446 |
+
// TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions
|
| 447 |
+
// for the current sequence have been searched.
|
| 448 |
+
static constexpr int TT_SIZE_PER_THREAD = 128;
|
| 449 |
+
|
| 450 |
+
static constexpr int DEFAULT_DURATION_S = 150;
|
| 451 |
+
|
| 452 |
+
BenchmarkSetup setup{};
|
| 453 |
+
|
| 454 |
+
// Assign default values to missing arguments
|
| 455 |
+
int desiredTimeS;
|
| 456 |
+
|
| 457 |
+
if (!(is >> setup.threads))
|
| 458 |
+
setup.threads = int(get_hardware_concurrency());
|
| 459 |
+
else
|
| 460 |
+
setup.originalInvocation += std::to_string(setup.threads);
|
| 461 |
+
|
| 462 |
+
if (!(is >> setup.ttSize))
|
| 463 |
+
setup.ttSize = TT_SIZE_PER_THREAD * setup.threads;
|
| 464 |
+
else
|
| 465 |
+
setup.originalInvocation += " " + std::to_string(setup.ttSize);
|
| 466 |
+
|
| 467 |
+
if (!(is >> desiredTimeS))
|
| 468 |
+
desiredTimeS = DEFAULT_DURATION_S;
|
| 469 |
+
else
|
| 470 |
+
setup.originalInvocation += " " + std::to_string(desiredTimeS);
|
| 471 |
+
|
| 472 |
+
setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize)
|
| 473 |
+
+ " " + std::to_string(desiredTimeS);
|
| 474 |
+
|
| 475 |
+
auto getCorrectedTime = [&](int ply) {
|
| 476 |
+
// time per move is fit roughly based on LTC games
|
| 477 |
+
// seconds = 50/{ply+15}
|
| 478 |
+
// ms = 50000/{ply+15}
|
| 479 |
+
// with this fit 10th move gets 2000ms
|
| 480 |
+
// adjust for desired 10th move time
|
| 481 |
+
return 50000.0 / (static_cast<double>(ply) + 15.0);
|
| 482 |
+
};
|
| 483 |
+
|
| 484 |
+
float totalTime = 0;
|
| 485 |
+
for (const auto& game : BenchmarkPositions)
|
| 486 |
+
{
|
| 487 |
+
int ply = 1;
|
| 488 |
+
for (int i = 0; i < static_cast<int>(game.size()); ++i)
|
| 489 |
+
{
|
| 490 |
+
const float correctedTime = float(getCorrectedTime(ply));
|
| 491 |
+
totalTime += correctedTime;
|
| 492 |
+
ply += 1;
|
| 493 |
+
}
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
float timeScaleFactor = static_cast<float>(desiredTimeS * 1000) / totalTime;
|
| 497 |
+
|
| 498 |
+
for (const auto& game : BenchmarkPositions)
|
| 499 |
+
{
|
| 500 |
+
setup.commands.emplace_back("ucinewgame");
|
| 501 |
+
int ply = 1;
|
| 502 |
+
for (const std::string& fen : game)
|
| 503 |
+
{
|
| 504 |
+
setup.commands.emplace_back("position fen " + fen);
|
| 505 |
+
|
| 506 |
+
const int correctedTime = static_cast<int>(getCorrectedTime(ply) * timeScaleFactor);
|
| 507 |
+
setup.commands.emplace_back("go movetime " + std::to_string(correctedTime));
|
| 508 |
+
|
| 509 |
+
ply += 1;
|
| 510 |
+
}
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
return setup;
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
} // namespace Stockfish
|
src/benchmark.h
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef BENCHMARK_H_INCLUDED
|
| 20 |
+
#define BENCHMARK_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <iosfwd>
|
| 23 |
+
#include <string>
|
| 24 |
+
#include <vector>
|
| 25 |
+
|
| 26 |
+
namespace Stockfish::Benchmark {
|
| 27 |
+
|
| 28 |
+
std::vector<std::string> setup_bench(const std::string&, std::istream&);
|
| 29 |
+
|
| 30 |
+
struct BenchmarkSetup {
|
| 31 |
+
int ttSize;
|
| 32 |
+
int threads;
|
| 33 |
+
std::vector<std::string> commands;
|
| 34 |
+
std::string originalInvocation;
|
| 35 |
+
std::string filledInvocation;
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
BenchmarkSetup setup_benchmark(std::istream&);
|
| 39 |
+
|
| 40 |
+
} // namespace Stockfish
|
| 41 |
+
|
| 42 |
+
#endif // #ifndef BENCHMARK_H_INCLUDED
|
src/bitboard.cpp
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "bitboard.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <bitset>
|
| 23 |
+
#include <initializer_list>
|
| 24 |
+
|
| 25 |
+
#include "misc.h"
|
| 26 |
+
|
| 27 |
+
namespace Stockfish {
|
| 28 |
+
|
| 29 |
+
uint8_t PopCnt16[1 << 16];
|
| 30 |
+
uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
|
| 31 |
+
|
| 32 |
+
Bitboard LineBB[SQUARE_NB][SQUARE_NB];
|
| 33 |
+
Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
|
| 34 |
+
Bitboard RayPassBB[SQUARE_NB][SQUARE_NB];
|
| 35 |
+
|
| 36 |
+
alignas(64) Magic Magics[SQUARE_NB][2];
|
| 37 |
+
|
| 38 |
+
namespace {
|
| 39 |
+
|
| 40 |
+
Bitboard RookTable[0x19000]; // To store rook attacks
|
| 41 |
+
Bitboard BishopTable[0x1480]; // To store bishop attacks
|
| 42 |
+
|
| 43 |
+
void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]);
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
// Returns an ASCII representation of a bitboard suitable
|
| 47 |
+
// to be printed to standard output. Useful for debugging.
|
| 48 |
+
std::string Bitboards::pretty(Bitboard b) {
|
| 49 |
+
|
| 50 |
+
std::string s = "+---+---+---+---+---+---+---+---+\n";
|
| 51 |
+
|
| 52 |
+
for (Rank r = RANK_8;; --r)
|
| 53 |
+
{
|
| 54 |
+
for (File f = FILE_A; f <= FILE_H; ++f)
|
| 55 |
+
s += b & make_square(f, r) ? "| X " : "| ";
|
| 56 |
+
|
| 57 |
+
s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n";
|
| 58 |
+
|
| 59 |
+
if (r == RANK_1)
|
| 60 |
+
break;
|
| 61 |
+
}
|
| 62 |
+
s += " a b c d e f g h\n";
|
| 63 |
+
|
| 64 |
+
return s;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
// Initializes various bitboard tables. It is called at
|
| 69 |
+
// startup and relies on global objects to be already zero-initialized.
|
| 70 |
+
void Bitboards::init() {
|
| 71 |
+
|
| 72 |
+
for (unsigned i = 0; i < (1 << 16); ++i)
|
| 73 |
+
PopCnt16[i] = uint8_t(std::bitset<16>(i).count());
|
| 74 |
+
|
| 75 |
+
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
|
| 76 |
+
for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
|
| 77 |
+
SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));
|
| 78 |
+
|
| 79 |
+
init_magics(ROOK, RookTable, Magics);
|
| 80 |
+
init_magics(BISHOP, BishopTable, Magics);
|
| 81 |
+
|
| 82 |
+
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
|
| 83 |
+
{
|
| 84 |
+
for (PieceType pt : {BISHOP, ROOK})
|
| 85 |
+
for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
|
| 86 |
+
{
|
| 87 |
+
if (PseudoAttacks[pt][s1] & s2)
|
| 88 |
+
{
|
| 89 |
+
LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
|
| 90 |
+
BetweenBB[s1][s2] =
|
| 91 |
+
(attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1)));
|
| 92 |
+
RayPassBB[s1][s2] =
|
| 93 |
+
attacks_bb(pt, s1, 0) & (attacks_bb(pt, s2, square_bb(s1)) | s2);
|
| 94 |
+
}
|
| 95 |
+
BetweenBB[s1][s2] |= s2;
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
namespace {
|
| 101 |
+
// Computes all rook and bishop attacks at startup. Magic
|
| 102 |
+
// bitboards are used to look up attacks of sliding pieces. As a reference see
|
| 103 |
+
// https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use
|
| 104 |
+
// the so called "fancy" approach.
|
| 105 |
+
void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {
|
| 106 |
+
|
| 107 |
+
#ifndef USE_PEXT
|
| 108 |
+
// Optimal PRNG seeds to pick the correct magics in the shortest time
|
| 109 |
+
int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020},
|
| 110 |
+
{728, 10316, 55013, 32803, 12281, 15100, 16645, 255}};
|
| 111 |
+
|
| 112 |
+
Bitboard occupancy[4096];
|
| 113 |
+
int epoch[4096] = {}, cnt = 0;
|
| 114 |
+
#endif
|
| 115 |
+
Bitboard reference[4096];
|
| 116 |
+
int size = 0;
|
| 117 |
+
|
| 118 |
+
for (Square s = SQ_A1; s <= SQ_H8; ++s)
|
| 119 |
+
{
|
| 120 |
+
// Board edges are not considered in the relevant occupancies
|
| 121 |
+
Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));
|
| 122 |
+
|
| 123 |
+
// Given a square 's', the mask is the bitboard of sliding attacks from
|
| 124 |
+
// 's' computed on an empty board. The index must be big enough to contain
|
| 125 |
+
// all the attacks for each possible subset of the mask and so is 2 power
|
| 126 |
+
// the number of 1s of the mask. Hence we deduce the size of the shift to
|
| 127 |
+
// apply to the 64 or 32 bits word to get the index.
|
| 128 |
+
Magic& m = magics[s][pt - BISHOP];
|
| 129 |
+
m.mask = Bitboards::sliding_attack(pt, s, 0) & ~edges;
|
| 130 |
+
#ifndef USE_PEXT
|
| 131 |
+
m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask);
|
| 132 |
+
#endif
|
| 133 |
+
// Set the offset for the attacks table of the square. We have individual
|
| 134 |
+
// table sizes for each square with "Fancy Magic Bitboards".
|
| 135 |
+
m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size;
|
| 136 |
+
size = 0;
|
| 137 |
+
|
| 138 |
+
// Use Carry-Rippler trick to enumerate all subsets of masks[s] and
|
| 139 |
+
// store the corresponding sliding attack bitboard in reference[].
|
| 140 |
+
Bitboard b = 0;
|
| 141 |
+
do
|
| 142 |
+
{
|
| 143 |
+
#ifndef USE_PEXT
|
| 144 |
+
occupancy[size] = b;
|
| 145 |
+
#endif
|
| 146 |
+
reference[size] = Bitboards::sliding_attack(pt, s, b);
|
| 147 |
+
|
| 148 |
+
if (HasPext)
|
| 149 |
+
m.attacks[pext(b, m.mask)] = reference[size];
|
| 150 |
+
|
| 151 |
+
size++;
|
| 152 |
+
b = (b - m.mask) & m.mask;
|
| 153 |
+
} while (b);
|
| 154 |
+
|
| 155 |
+
#ifndef USE_PEXT
|
| 156 |
+
PRNG rng(seeds[Is64Bit][rank_of(s)]);
|
| 157 |
+
|
| 158 |
+
// Find a magic for square 's' picking up an (almost) random number
|
| 159 |
+
// until we find the one that passes the verification test.
|
| 160 |
+
for (int i = 0; i < size;)
|
| 161 |
+
{
|
| 162 |
+
for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;)
|
| 163 |
+
m.magic = rng.sparse_rand<Bitboard>();
|
| 164 |
+
|
| 165 |
+
// A good magic must map every possible occupancy to an index that
|
| 166 |
+
// looks up the correct sliding attack in the attacks[s] database.
|
| 167 |
+
// Note that we build up the database for square 's' as a side
|
| 168 |
+
// effect of verifying the magic. Keep track of the attempt count
|
| 169 |
+
// and save it in epoch[], little speed-up trick to avoid resetting
|
| 170 |
+
// m.attacks[] after every failed attempt.
|
| 171 |
+
for (++cnt, i = 0; i < size; ++i)
|
| 172 |
+
{
|
| 173 |
+
unsigned idx = m.index(occupancy[i]);
|
| 174 |
+
|
| 175 |
+
if (epoch[idx] < cnt)
|
| 176 |
+
{
|
| 177 |
+
epoch[idx] = cnt;
|
| 178 |
+
m.attacks[idx] = reference[i];
|
| 179 |
+
}
|
| 180 |
+
else if (m.attacks[idx] != reference[i])
|
| 181 |
+
break;
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
#endif
|
| 185 |
+
}
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
} // namespace Stockfish
|
src/bitboard.h
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef BITBOARD_H_INCLUDED
|
| 20 |
+
#define BITBOARD_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <cassert>
|
| 24 |
+
#include <cmath>
|
| 25 |
+
#include <cstring>
|
| 26 |
+
#include <cstdint>
|
| 27 |
+
#include <cstdlib>
|
| 28 |
+
#include <string>
|
| 29 |
+
#include <initializer_list>
|
| 30 |
+
#include <array>
|
| 31 |
+
|
| 32 |
+
#include "types.h"
|
| 33 |
+
|
| 34 |
+
namespace Stockfish {
|
| 35 |
+
|
| 36 |
+
namespace Bitboards {
|
| 37 |
+
|
| 38 |
+
void init();
|
| 39 |
+
std::string pretty(Bitboard b);
|
| 40 |
+
|
| 41 |
+
} // namespace Stockfish::Bitboards
|
| 42 |
+
|
| 43 |
+
constexpr Bitboard FileABB = 0x0101010101010101ULL;
|
| 44 |
+
constexpr Bitboard FileBBB = FileABB << 1;
|
| 45 |
+
constexpr Bitboard FileCBB = FileABB << 2;
|
| 46 |
+
constexpr Bitboard FileDBB = FileABB << 3;
|
| 47 |
+
constexpr Bitboard FileEBB = FileABB << 4;
|
| 48 |
+
constexpr Bitboard FileFBB = FileABB << 5;
|
| 49 |
+
constexpr Bitboard FileGBB = FileABB << 6;
|
| 50 |
+
constexpr Bitboard FileHBB = FileABB << 7;
|
| 51 |
+
|
| 52 |
+
constexpr Bitboard Rank1BB = 0xFF;
|
| 53 |
+
constexpr Bitboard Rank2BB = Rank1BB << (8 * 1);
|
| 54 |
+
constexpr Bitboard Rank3BB = Rank1BB << (8 * 2);
|
| 55 |
+
constexpr Bitboard Rank4BB = Rank1BB << (8 * 3);
|
| 56 |
+
constexpr Bitboard Rank5BB = Rank1BB << (8 * 4);
|
| 57 |
+
constexpr Bitboard Rank6BB = Rank1BB << (8 * 5);
|
| 58 |
+
constexpr Bitboard Rank7BB = Rank1BB << (8 * 6);
|
| 59 |
+
constexpr Bitboard Rank8BB = Rank1BB << (8 * 7);
|
| 60 |
+
|
| 61 |
+
extern uint8_t PopCnt16[1 << 16];
|
| 62 |
+
extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];
|
| 63 |
+
|
| 64 |
+
extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
|
| 65 |
+
extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
|
| 66 |
+
extern Bitboard RayPassBB[SQUARE_NB][SQUARE_NB];
|
| 67 |
+
|
| 68 |
+
// Magic holds all magic bitboards relevant data for a single square
|
| 69 |
+
struct Magic {
|
| 70 |
+
Bitboard mask;
|
| 71 |
+
Bitboard* attacks;
|
| 72 |
+
#ifndef USE_PEXT
|
| 73 |
+
Bitboard magic;
|
| 74 |
+
unsigned shift;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
// Compute the attack's index using the 'magic bitboards' approach
|
| 78 |
+
unsigned index(Bitboard occupied) const {
|
| 79 |
+
|
| 80 |
+
#ifdef USE_PEXT
|
| 81 |
+
return unsigned(pext(occupied, mask));
|
| 82 |
+
#else
|
| 83 |
+
if (Is64Bit)
|
| 84 |
+
return unsigned(((occupied & mask) * magic) >> shift);
|
| 85 |
+
|
| 86 |
+
unsigned lo = unsigned(occupied) & unsigned(mask);
|
| 87 |
+
unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
|
| 88 |
+
return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
|
| 89 |
+
#endif
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; }
|
| 93 |
+
};
|
| 94 |
+
|
| 95 |
+
extern Magic Magics[SQUARE_NB][2];
|
| 96 |
+
|
| 97 |
+
constexpr Bitboard square_bb(Square s) {
|
| 98 |
+
assert(is_ok(s));
|
| 99 |
+
return 1ULL << s;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
// Overloads of bitwise operators between a Bitboard and a Square for testing
|
| 104 |
+
// whether a given bit is set in a bitboard, and for setting and clearing bits.
|
| 105 |
+
|
| 106 |
+
constexpr Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); }
|
| 107 |
+
constexpr Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); }
|
| 108 |
+
constexpr Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); }
|
| 109 |
+
constexpr Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
|
| 110 |
+
constexpr Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }
|
| 111 |
+
|
| 112 |
+
constexpr Bitboard operator&(Square s, Bitboard b) { return b & s; }
|
| 113 |
+
constexpr Bitboard operator|(Square s, Bitboard b) { return b | s; }
|
| 114 |
+
constexpr Bitboard operator^(Square s, Bitboard b) { return b ^ s; }
|
| 115 |
+
|
| 116 |
+
constexpr Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
|
| 117 |
+
|
| 118 |
+
constexpr bool more_than_one(Bitboard b) { return b & (b - 1); }
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
// rank_bb() and file_bb() return a bitboard representing all the squares on
|
| 122 |
+
// the given file or rank.
|
| 123 |
+
|
| 124 |
+
constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); }
|
| 125 |
+
|
| 126 |
+
constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); }
|
| 127 |
+
|
| 128 |
+
constexpr Bitboard file_bb(File f) { return FileABB << f; }
|
| 129 |
+
|
| 130 |
+
constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); }
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
// Moves a bitboard one or two steps as specified by the direction D
|
| 134 |
+
template<Direction D>
|
| 135 |
+
constexpr Bitboard shift(Bitboard b) {
|
| 136 |
+
return D == NORTH ? b << 8
|
| 137 |
+
: D == SOUTH ? b >> 8
|
| 138 |
+
: D == NORTH + NORTH ? b << 16
|
| 139 |
+
: D == SOUTH + SOUTH ? b >> 16
|
| 140 |
+
: D == EAST ? (b & ~FileHBB) << 1
|
| 141 |
+
: D == WEST ? (b & ~FileABB) >> 1
|
| 142 |
+
: D == NORTH_EAST ? (b & ~FileHBB) << 9
|
| 143 |
+
: D == NORTH_WEST ? (b & ~FileABB) << 7
|
| 144 |
+
: D == SOUTH_EAST ? (b & ~FileHBB) >> 7
|
| 145 |
+
: D == SOUTH_WEST ? (b & ~FileABB) >> 9
|
| 146 |
+
: 0;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
// Returns the squares attacked by pawns of the given color
|
| 151 |
+
// from the squares in the given bitboard.
|
| 152 |
+
template<Color C>
|
| 153 |
+
constexpr Bitboard pawn_attacks_bb(Bitboard b) {
|
| 154 |
+
return C == WHITE ? shift<NORTH_WEST>(b) | shift<NORTH_EAST>(b)
|
| 155 |
+
: shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
// Returns a bitboard representing an entire line (from board edge
|
| 160 |
+
// to board edge) that intersects the two given squares. If the given squares
|
| 161 |
+
// are not on a same file/rank/diagonal, the function returns 0. For instance,
|
| 162 |
+
// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
|
| 163 |
+
inline Bitboard line_bb(Square s1, Square s2) {
|
| 164 |
+
|
| 165 |
+
assert(is_ok(s1) && is_ok(s2));
|
| 166 |
+
return LineBB[s1][s2];
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
// Returns a bitboard representing the squares in the semi-open
|
| 171 |
+
// segment between the squares s1 and s2 (excluding s1 but including s2). If the
|
| 172 |
+
// given squares are not on a same file/rank/diagonal, it returns s2. For instance,
|
| 173 |
+
// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but
|
| 174 |
+
// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick
|
| 175 |
+
// allows to generate non-king evasion moves faster: the defending piece must either
|
| 176 |
+
// interpose itself to cover the check or capture the checking piece.
|
| 177 |
+
inline Bitboard between_bb(Square s1, Square s2) {
|
| 178 |
+
|
| 179 |
+
assert(is_ok(s1) && is_ok(s2));
|
| 180 |
+
return BetweenBB[s1][s2];
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
// distance() functions return the distance between x and y, defined as the
|
| 184 |
+
// number of steps for a king in x to reach y.
|
| 185 |
+
|
| 186 |
+
template<typename T1 = Square>
|
| 187 |
+
inline int distance(Square x, Square y);
|
| 188 |
+
|
| 189 |
+
template<>
|
| 190 |
+
inline int distance<File>(Square x, Square y) {
|
| 191 |
+
return std::abs(file_of(x) - file_of(y));
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
template<>
|
| 195 |
+
inline int distance<Rank>(Square x, Square y) {
|
| 196 |
+
return std::abs(rank_of(x) - rank_of(y));
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
template<>
|
| 200 |
+
inline int distance<Square>(Square x, Square y) {
|
| 201 |
+
return SquareDistance[x][y];
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
constexpr int constexpr_popcount(Bitboard b) {
|
| 208 |
+
b = b - ((b >> 1) & 0x5555555555555555ULL);
|
| 209 |
+
b = (b & 0x3333333333333333ULL) + ((b >> 2) & 0x3333333333333333ULL);
|
| 210 |
+
b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
|
| 211 |
+
return static_cast<int>((b * 0x0101010101010101ULL) >> 56);
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
// Counts the number of non-zero bits in a bitboard.
|
| 215 |
+
inline int popcount(Bitboard b) {
|
| 216 |
+
|
| 217 |
+
#ifndef USE_POPCNT
|
| 218 |
+
|
| 219 |
+
std::uint16_t indices[4];
|
| 220 |
+
std::memcpy(indices, &b, sizeof(b));
|
| 221 |
+
return PopCnt16[indices[0]] + PopCnt16[indices[1]] + PopCnt16[indices[2]]
|
| 222 |
+
+ PopCnt16[indices[3]];
|
| 223 |
+
|
| 224 |
+
#elif defined(_MSC_VER)
|
| 225 |
+
|
| 226 |
+
return int(_mm_popcnt_u64(b));
|
| 227 |
+
|
| 228 |
+
#else // Assumed gcc or compatible compiler
|
| 229 |
+
|
| 230 |
+
return __builtin_popcountll(b);
|
| 231 |
+
|
| 232 |
+
#endif
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
// Returns the least significant bit in a non-zero bitboard.
|
| 236 |
+
inline Square lsb(Bitboard b) {
|
| 237 |
+
assert(b);
|
| 238 |
+
|
| 239 |
+
#if defined(__GNUC__) // GCC, Clang, ICX
|
| 240 |
+
|
| 241 |
+
return Square(__builtin_ctzll(b));
|
| 242 |
+
|
| 243 |
+
#elif defined(_MSC_VER)
|
| 244 |
+
#ifdef _WIN64 // MSVC, WIN64
|
| 245 |
+
|
| 246 |
+
unsigned long idx;
|
| 247 |
+
_BitScanForward64(&idx, b);
|
| 248 |
+
return Square(idx);
|
| 249 |
+
|
| 250 |
+
#else // MSVC, WIN32
|
| 251 |
+
unsigned long idx;
|
| 252 |
+
|
| 253 |
+
if (b & 0xffffffff)
|
| 254 |
+
{
|
| 255 |
+
_BitScanForward(&idx, int32_t(b));
|
| 256 |
+
return Square(idx);
|
| 257 |
+
}
|
| 258 |
+
else
|
| 259 |
+
{
|
| 260 |
+
_BitScanForward(&idx, int32_t(b >> 32));
|
| 261 |
+
return Square(idx + 32);
|
| 262 |
+
}
|
| 263 |
+
#endif
|
| 264 |
+
#else // Compiler is neither GCC nor MSVC compatible
|
| 265 |
+
#error "Compiler not supported."
|
| 266 |
+
#endif
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
// Returns the most significant bit in a non-zero bitboard.
|
| 270 |
+
inline Square msb(Bitboard b) {
|
| 271 |
+
assert(b);
|
| 272 |
+
|
| 273 |
+
#if defined(__GNUC__) // GCC, Clang, ICX
|
| 274 |
+
|
| 275 |
+
return Square(63 ^ __builtin_clzll(b));
|
| 276 |
+
|
| 277 |
+
#elif defined(_MSC_VER)
|
| 278 |
+
#ifdef _WIN64 // MSVC, WIN64
|
| 279 |
+
|
| 280 |
+
unsigned long idx;
|
| 281 |
+
_BitScanReverse64(&idx, b);
|
| 282 |
+
return Square(idx);
|
| 283 |
+
|
| 284 |
+
#else // MSVC, WIN32
|
| 285 |
+
|
| 286 |
+
unsigned long idx;
|
| 287 |
+
|
| 288 |
+
if (b >> 32)
|
| 289 |
+
{
|
| 290 |
+
_BitScanReverse(&idx, int32_t(b >> 32));
|
| 291 |
+
return Square(idx + 32);
|
| 292 |
+
}
|
| 293 |
+
else
|
| 294 |
+
{
|
| 295 |
+
_BitScanReverse(&idx, int32_t(b));
|
| 296 |
+
return Square(idx);
|
| 297 |
+
}
|
| 298 |
+
#endif
|
| 299 |
+
#else // Compiler is neither GCC nor MSVC compatible
|
| 300 |
+
#error "Compiler not supported."
|
| 301 |
+
#endif
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
// Returns the bitboard of the least significant
|
| 305 |
+
// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)).
|
| 306 |
+
inline Bitboard least_significant_square_bb(Bitboard b) {
|
| 307 |
+
assert(b);
|
| 308 |
+
return b & -b;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
// Finds and clears the least significant bit in a non-zero bitboard.
|
| 312 |
+
inline Square pop_lsb(Bitboard& b) {
|
| 313 |
+
assert(b);
|
| 314 |
+
const Square s = lsb(b);
|
| 315 |
+
b &= b - 1;
|
| 316 |
+
return s;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
namespace Bitboards {
|
| 320 |
+
// Returns the bitboard of target square for the given step
|
| 321 |
+
// from the given square. If the step is off the board, returns empty bitboard.
|
| 322 |
+
constexpr Bitboard safe_destination(Square s, int step) {
|
| 323 |
+
constexpr auto abs = [](int v) { return v < 0 ? -v : v; };
|
| 324 |
+
Square to = Square(s + step);
|
| 325 |
+
return is_ok(to) && abs(file_of(s) - file_of(to)) <= 2 ? square_bb(to) : Bitboard(0);
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
constexpr Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
|
| 329 |
+
Bitboard attacks = 0;
|
| 330 |
+
Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST};
|
| 331 |
+
Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST};
|
| 332 |
+
|
| 333 |
+
for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
|
| 334 |
+
{
|
| 335 |
+
Square s = sq;
|
| 336 |
+
while (safe_destination(s, d))
|
| 337 |
+
{
|
| 338 |
+
attacks |= (s += d);
|
| 339 |
+
if (occupied & s)
|
| 340 |
+
{
|
| 341 |
+
break;
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
return attacks;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
constexpr Bitboard knight_attack(Square sq) {
|
| 350 |
+
Bitboard b = {};
|
| 351 |
+
for (int step : {-17, -15, -10, -6, 6, 10, 15, 17})
|
| 352 |
+
b |= safe_destination(sq, step);
|
| 353 |
+
return b;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
constexpr Bitboard king_attack(Square sq) {
|
| 357 |
+
Bitboard b = {};
|
| 358 |
+
for (int step : {-9, -8, -7, -1, 1, 7, 8, 9})
|
| 359 |
+
b |= safe_destination(sq, step);
|
| 360 |
+
return b;
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
constexpr Bitboard pseudo_attacks(PieceType pt, Square sq) {
|
| 364 |
+
switch (pt)
|
| 365 |
+
{
|
| 366 |
+
case PieceType::ROOK :
|
| 367 |
+
case PieceType::BISHOP :
|
| 368 |
+
return sliding_attack(pt, sq, 0);
|
| 369 |
+
case PieceType::QUEEN :
|
| 370 |
+
return sliding_attack(PieceType::ROOK, sq, 0) | sliding_attack(PieceType::BISHOP, sq, 0);
|
| 371 |
+
case PieceType::KNIGHT :
|
| 372 |
+
return knight_attack(sq);
|
| 373 |
+
case PieceType::KING :
|
| 374 |
+
return king_attack(sq);
|
| 375 |
+
default :
|
| 376 |
+
assert(false);
|
| 377 |
+
return 0;
|
| 378 |
+
}
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
inline constexpr auto PseudoAttacks = []() constexpr {
|
| 384 |
+
std::array<std::array<Bitboard, SQUARE_NB>, PIECE_TYPE_NB> attacks{};
|
| 385 |
+
|
| 386 |
+
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
|
| 387 |
+
{
|
| 388 |
+
attacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
|
| 389 |
+
attacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));
|
| 390 |
+
|
| 391 |
+
attacks[KING][s1] = Bitboards::pseudo_attacks(KING, s1);
|
| 392 |
+
attacks[KNIGHT][s1] = Bitboards::pseudo_attacks(KNIGHT, s1);
|
| 393 |
+
attacks[QUEEN][s1] = attacks[BISHOP][s1] = Bitboards::pseudo_attacks(BISHOP, s1);
|
| 394 |
+
attacks[QUEEN][s1] |= attacks[ROOK][s1] = Bitboards::pseudo_attacks(ROOK, s1);
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
return attacks;
|
| 398 |
+
}();
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
// Returns the pseudo attacks of the given piece type
|
| 402 |
+
// assuming an empty board.
|
| 403 |
+
template<PieceType Pt>
|
| 404 |
+
inline Bitboard attacks_bb(Square s, Color c = COLOR_NB) {
|
| 405 |
+
|
| 406 |
+
assert((Pt != PAWN || c < COLOR_NB) && is_ok(s));
|
| 407 |
+
return Pt == PAWN ? PseudoAttacks[c][s] : PseudoAttacks[Pt][s];
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
// Returns the attacks by the given piece
|
| 412 |
+
// assuming the board is occupied according to the passed Bitboard.
|
| 413 |
+
// Sliding piece attacks do not continue passed an occupied square.
|
| 414 |
+
template<PieceType Pt>
|
| 415 |
+
inline Bitboard attacks_bb(Square s, Bitboard occupied) {
|
| 416 |
+
|
| 417 |
+
assert(Pt != PAWN && is_ok(s));
|
| 418 |
+
|
| 419 |
+
switch (Pt)
|
| 420 |
+
{
|
| 421 |
+
case BISHOP :
|
| 422 |
+
case ROOK :
|
| 423 |
+
return Magics[s][Pt - BISHOP].attacks_bb(occupied);
|
| 424 |
+
case QUEEN :
|
| 425 |
+
return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
|
| 426 |
+
default :
|
| 427 |
+
return PseudoAttacks[Pt][s];
|
| 428 |
+
}
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
// Returns the attacks by the given piece
|
| 432 |
+
// assuming the board is occupied according to the passed Bitboard.
|
| 433 |
+
// Sliding piece attacks do not continue passed an occupied square.
|
| 434 |
+
inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) {
|
| 435 |
+
|
| 436 |
+
assert(pt != PAWN && is_ok(s));
|
| 437 |
+
|
| 438 |
+
switch (pt)
|
| 439 |
+
{
|
| 440 |
+
case BISHOP :
|
| 441 |
+
return attacks_bb<BISHOP>(s, occupied);
|
| 442 |
+
case ROOK :
|
| 443 |
+
return attacks_bb<ROOK>(s, occupied);
|
| 444 |
+
case QUEEN :
|
| 445 |
+
return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
|
| 446 |
+
default :
|
| 447 |
+
return PseudoAttacks[pt][s];
|
| 448 |
+
}
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
inline Bitboard attacks_bb(Piece pc, Square s, Bitboard occupied) {
|
| 452 |
+
return type_of(pc) == PAWN ? PseudoAttacks[color_of(pc)][s]
|
| 453 |
+
: attacks_bb(type_of(pc), s, occupied);
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
} // namespace Stockfish
|
| 457 |
+
|
| 458 |
+
#endif // #ifndef BITBOARD_H_INCLUDED
|
src/engine.cpp
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "engine.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <cassert>
|
| 23 |
+
#include <deque>
|
| 24 |
+
#include <iosfwd>
|
| 25 |
+
#include <memory>
|
| 26 |
+
#include <ostream>
|
| 27 |
+
#include <sstream>
|
| 28 |
+
#include <string_view>
|
| 29 |
+
#include <utility>
|
| 30 |
+
#include <vector>
|
| 31 |
+
|
| 32 |
+
#include "evaluate.h"
|
| 33 |
+
#include "misc.h"
|
| 34 |
+
#include "nnue/network.h"
|
| 35 |
+
#include "nnue/nnue_common.h"
|
| 36 |
+
#include "nnue/nnue_misc.h"
|
| 37 |
+
#include "numa.h"
|
| 38 |
+
#include "perft.h"
|
| 39 |
+
#include "position.h"
|
| 40 |
+
#include "search.h"
|
| 41 |
+
#include "shm.h"
|
| 42 |
+
#include "syzygy/tbprobe.h"
|
| 43 |
+
#include "types.h"
|
| 44 |
+
#include "uci.h"
|
| 45 |
+
#include "ucioption.h"
|
| 46 |
+
|
| 47 |
+
namespace Stockfish {
|
| 48 |
+
|
| 49 |
+
namespace NN = Eval::NNUE;
|
| 50 |
+
|
| 51 |
+
constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
|
| 52 |
+
constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048;
|
| 53 |
+
int MaxThreads = std::max(1024, 4 * int(get_hardware_concurrency()));
|
| 54 |
+
|
| 55 |
+
// The default configuration will attempt to group L3 domains up to 32 threads.
|
| 56 |
+
// This size was found to be a good balance between the Elo gain of increased
|
| 57 |
+
// history sharing and the speed loss from more cross-cache accesses (see
|
| 58 |
+
// PR#6526). The user can always explicitly override this behavior.
|
| 59 |
+
constexpr NumaAutoPolicy DefaultNumaPolicy = BundledL3Policy{32};
|
| 60 |
+
|
| 61 |
+
Engine::Engine(std::optional<std::string> path) :
|
| 62 |
+
binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""),
|
| 63 |
+
numaContext(NumaConfig::from_system(DefaultNumaPolicy)),
|
| 64 |
+
states(new std::deque<StateInfo>(1)),
|
| 65 |
+
threads(),
|
| 66 |
+
networks(numaContext, get_default_networks()) {
|
| 67 |
+
|
| 68 |
+
pos.set(StartFEN, false, &states->back());
|
| 69 |
+
|
| 70 |
+
options.add( //
|
| 71 |
+
"Debug Log File", Option("", [](const Option& o) {
|
| 72 |
+
start_logger(o);
|
| 73 |
+
return std::nullopt;
|
| 74 |
+
}));
|
| 75 |
+
|
| 76 |
+
options.add( //
|
| 77 |
+
"NumaPolicy", Option("auto", [this](const Option& o) {
|
| 78 |
+
set_numa_config_from_option(o);
|
| 79 |
+
return numa_config_information_as_string() + "\n"
|
| 80 |
+
+ thread_allocation_information_as_string();
|
| 81 |
+
}));
|
| 82 |
+
|
| 83 |
+
options.add( //
|
| 84 |
+
"Threads", Option(1, 1, MaxThreads, [this](const Option&) {
|
| 85 |
+
resize_threads();
|
| 86 |
+
return thread_allocation_information_as_string();
|
| 87 |
+
}));
|
| 88 |
+
|
| 89 |
+
options.add( //
|
| 90 |
+
"Hash", Option(16, 1, MaxHashMB, [this](const Option& o) {
|
| 91 |
+
set_tt_size(o);
|
| 92 |
+
return std::nullopt;
|
| 93 |
+
}));
|
| 94 |
+
|
| 95 |
+
options.add( //
|
| 96 |
+
"Clear Hash", Option([this](const Option&) {
|
| 97 |
+
search_clear();
|
| 98 |
+
return std::nullopt;
|
| 99 |
+
}));
|
| 100 |
+
|
| 101 |
+
options.add( //
|
| 102 |
+
"Ponder", Option(false));
|
| 103 |
+
|
| 104 |
+
options.add( //
|
| 105 |
+
"MultiPV", Option(1, 1, MAX_MOVES));
|
| 106 |
+
|
| 107 |
+
options.add("Skill Level", Option(20, 0, 20));
|
| 108 |
+
|
| 109 |
+
options.add("Move Overhead", Option(10, 0, 5000));
|
| 110 |
+
|
| 111 |
+
options.add("nodestime", Option(0, 0, 10000));
|
| 112 |
+
|
| 113 |
+
options.add("UCI_Chess960", Option(false));
|
| 114 |
+
|
| 115 |
+
options.add("UCI_LimitStrength", Option(false));
|
| 116 |
+
|
| 117 |
+
options.add("UCI_Elo",
|
| 118 |
+
Option(Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::LowestElo,
|
| 119 |
+
Stockfish::Search::Skill::HighestElo));
|
| 120 |
+
|
| 121 |
+
options.add("UCI_ShowWDL", Option(false));
|
| 122 |
+
|
| 123 |
+
options.add( //
|
| 124 |
+
"SyzygyPath", Option("", [](const Option& o) {
|
| 125 |
+
Tablebases::init(o);
|
| 126 |
+
return std::nullopt;
|
| 127 |
+
}));
|
| 128 |
+
|
| 129 |
+
options.add("SyzygyProbeDepth", Option(1, 1, 100));
|
| 130 |
+
|
| 131 |
+
options.add("Syzygy50MoveRule", Option(true));
|
| 132 |
+
|
| 133 |
+
options.add("SyzygyProbeLimit", Option(7, 0, 7));
|
| 134 |
+
|
| 135 |
+
options.add( //
|
| 136 |
+
"EvalFile", Option(EvalFileDefaultNameBig, [this](const Option& o) {
|
| 137 |
+
load_big_network(o);
|
| 138 |
+
return std::nullopt;
|
| 139 |
+
}));
|
| 140 |
+
|
| 141 |
+
options.add( //
|
| 142 |
+
"EvalFileSmall", Option(EvalFileDefaultNameSmall, [this](const Option& o) {
|
| 143 |
+
load_small_network(o);
|
| 144 |
+
return std::nullopt;
|
| 145 |
+
}));
|
| 146 |
+
|
| 147 |
+
threads.clear();
|
| 148 |
+
threads.ensure_network_replicated();
|
| 149 |
+
resize_threads();
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) {
|
| 153 |
+
verify_networks();
|
| 154 |
+
|
| 155 |
+
return Benchmark::perft(fen, depth, isChess960);
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
void Engine::go(Search::LimitsType& limits) {
|
| 159 |
+
assert(limits.perft == 0);
|
| 160 |
+
verify_networks();
|
| 161 |
+
|
| 162 |
+
threads.start_thinking(options, pos, states, limits);
|
| 163 |
+
}
|
| 164 |
+
void Engine::stop() { threads.stop = true; }
|
| 165 |
+
|
| 166 |
+
void Engine::search_clear() {
|
| 167 |
+
wait_for_search_finished();
|
| 168 |
+
|
| 169 |
+
tt.clear(threads);
|
| 170 |
+
threads.clear();
|
| 171 |
+
|
| 172 |
+
// @TODO wont work with multiple instances
|
| 173 |
+
Tablebases::init(options["SyzygyPath"]); // Free mapped files
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
void Engine::set_on_update_no_moves(std::function<void(const Engine::InfoShort&)>&& f) {
|
| 177 |
+
updateContext.onUpdateNoMoves = std::move(f);
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
void Engine::set_on_update_full(std::function<void(const Engine::InfoFull&)>&& f) {
|
| 181 |
+
updateContext.onUpdateFull = std::move(f);
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
void Engine::set_on_iter(std::function<void(const Engine::InfoIter&)>&& f) {
|
| 185 |
+
updateContext.onIter = std::move(f);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
void Engine::set_on_bestmove(std::function<void(std::string_view, std::string_view)>&& f) {
|
| 189 |
+
updateContext.onBestmove = std::move(f);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
void Engine::set_on_verify_networks(std::function<void(std::string_view)>&& f) {
|
| 193 |
+
onVerifyNetworks = std::move(f);
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); }
|
| 197 |
+
|
| 198 |
+
void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) {
|
| 199 |
+
// Drop the old state and create a new one
|
| 200 |
+
states = StateListPtr(new std::deque<StateInfo>(1));
|
| 201 |
+
pos.set(fen, options["UCI_Chess960"], &states->back());
|
| 202 |
+
|
| 203 |
+
for (const auto& move : moves)
|
| 204 |
+
{
|
| 205 |
+
auto m = UCIEngine::to_move(pos, move);
|
| 206 |
+
|
| 207 |
+
if (m == Move::none())
|
| 208 |
+
break;
|
| 209 |
+
|
| 210 |
+
states->emplace_back();
|
| 211 |
+
pos.do_move(m, states->back());
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
// modifiers
|
| 216 |
+
|
| 217 |
+
void Engine::set_numa_config_from_option(const std::string& o) {
|
| 218 |
+
if (o == "auto" || o == "system")
|
| 219 |
+
{
|
| 220 |
+
numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy));
|
| 221 |
+
}
|
| 222 |
+
else if (o == "hardware")
|
| 223 |
+
{
|
| 224 |
+
// Don't respect affinity set in the system.
|
| 225 |
+
numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy, false));
|
| 226 |
+
}
|
| 227 |
+
else if (o == "none")
|
| 228 |
+
{
|
| 229 |
+
numaContext.set_numa_config(NumaConfig{});
|
| 230 |
+
}
|
| 231 |
+
else
|
| 232 |
+
{
|
| 233 |
+
numaContext.set_numa_config(NumaConfig::from_string(o));
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// Force reallocation of threads in case affinities need to change.
|
| 237 |
+
resize_threads();
|
| 238 |
+
threads.ensure_network_replicated();
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
void Engine::resize_threads() {
|
| 242 |
+
threads.wait_for_search_finished();
|
| 243 |
+
threads.set(numaContext.get_numa_config(), {options, threads, tt, sharedHists, networks},
|
| 244 |
+
updateContext);
|
| 245 |
+
|
| 246 |
+
// Reallocate the hash with the new threadpool size
|
| 247 |
+
set_tt_size(options["Hash"]);
|
| 248 |
+
threads.ensure_network_replicated();
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
void Engine::set_tt_size(size_t mb) {
|
| 252 |
+
wait_for_search_finished();
|
| 253 |
+
tt.resize(mb, threads);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
|
| 257 |
+
|
| 258 |
+
// network related
|
| 259 |
+
|
| 260 |
+
void Engine::verify_networks() const {
|
| 261 |
+
networks->big.verify(options["EvalFile"], onVerifyNetworks);
|
| 262 |
+
networks->small.verify(options["EvalFileSmall"], onVerifyNetworks);
|
| 263 |
+
|
| 264 |
+
auto statuses = networks.get_status_and_errors();
|
| 265 |
+
for (size_t i = 0; i < statuses.size(); ++i)
|
| 266 |
+
{
|
| 267 |
+
const auto [status, error] = statuses[i];
|
| 268 |
+
std::string message = "Network replica " + std::to_string(i + 1) + ": ";
|
| 269 |
+
if (status == SystemWideSharedConstantAllocationStatus::NoAllocation)
|
| 270 |
+
{
|
| 271 |
+
message += "No allocation.";
|
| 272 |
+
}
|
| 273 |
+
else if (status == SystemWideSharedConstantAllocationStatus::LocalMemory)
|
| 274 |
+
{
|
| 275 |
+
message += "Local memory.";
|
| 276 |
+
}
|
| 277 |
+
else if (status == SystemWideSharedConstantAllocationStatus::SharedMemory)
|
| 278 |
+
{
|
| 279 |
+
message += "Shared memory.";
|
| 280 |
+
}
|
| 281 |
+
else
|
| 282 |
+
{
|
| 283 |
+
message += "Unknown status.";
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
if (error.has_value())
|
| 287 |
+
{
|
| 288 |
+
message += " " + *error;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
onVerifyNetworks(message);
|
| 292 |
+
}
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
std::unique_ptr<Eval::NNUE::Networks> Engine::get_default_networks() const {
|
| 296 |
+
|
| 297 |
+
auto networks_ =
|
| 298 |
+
std::make_unique<NN::Networks>(NN::EvalFile{EvalFileDefaultNameBig, "None", ""},
|
| 299 |
+
NN::EvalFile{EvalFileDefaultNameSmall, "None", ""});
|
| 300 |
+
|
| 301 |
+
networks_->big.load(binaryDirectory, "");
|
| 302 |
+
networks_->small.load(binaryDirectory, "");
|
| 303 |
+
|
| 304 |
+
return networks_;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
void Engine::load_big_network(const std::string& file) {
|
| 308 |
+
networks.modify_and_replicate(
|
| 309 |
+
[this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); });
|
| 310 |
+
threads.clear();
|
| 311 |
+
threads.ensure_network_replicated();
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
void Engine::load_small_network(const std::string& file) {
|
| 315 |
+
networks.modify_and_replicate(
|
| 316 |
+
[this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); });
|
| 317 |
+
threads.clear();
|
| 318 |
+
threads.ensure_network_replicated();
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) {
|
| 322 |
+
networks.modify_and_replicate([&files](NN::Networks& networks_) {
|
| 323 |
+
networks_.big.save(files[0].first);
|
| 324 |
+
networks_.small.save(files[1].first);
|
| 325 |
+
});
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
// utility functions
|
| 329 |
+
|
| 330 |
+
void Engine::trace_eval() const {
|
| 331 |
+
StateListPtr trace_states(new std::deque<StateInfo>(1));
|
| 332 |
+
Position p;
|
| 333 |
+
p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back());
|
| 334 |
+
|
| 335 |
+
verify_networks();
|
| 336 |
+
|
| 337 |
+
sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
const OptionsMap& Engine::get_options() const { return options; }
|
| 341 |
+
OptionsMap& Engine::get_options() { return options; }
|
| 342 |
+
|
| 343 |
+
std::string Engine::fen() const { return pos.fen(); }
|
| 344 |
+
|
| 345 |
+
void Engine::flip() { pos.flip(); }
|
| 346 |
+
|
| 347 |
+
std::string Engine::visualize() const {
|
| 348 |
+
std::stringstream ss;
|
| 349 |
+
ss << pos;
|
| 350 |
+
return ss.str();
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); }
|
| 354 |
+
|
| 355 |
+
std::vector<std::pair<size_t, size_t>> Engine::get_bound_thread_count_by_numa_node() const {
|
| 356 |
+
auto counts = threads.get_bound_thread_count_by_numa_node();
|
| 357 |
+
const NumaConfig& cfg = numaContext.get_numa_config();
|
| 358 |
+
std::vector<std::pair<size_t, size_t>> ratios;
|
| 359 |
+
NumaIndex n = 0;
|
| 360 |
+
for (; n < counts.size(); ++n)
|
| 361 |
+
ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n));
|
| 362 |
+
if (!counts.empty())
|
| 363 |
+
for (; n < cfg.num_numa_nodes(); ++n)
|
| 364 |
+
ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n));
|
| 365 |
+
return ratios;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
std::string Engine::get_numa_config_as_string() const {
|
| 369 |
+
return numaContext.get_numa_config().to_string();
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
std::string Engine::numa_config_information_as_string() const {
|
| 373 |
+
auto cfgStr = get_numa_config_as_string();
|
| 374 |
+
return "Available processors: " + cfgStr;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
std::string Engine::thread_binding_information_as_string() const {
|
| 378 |
+
auto boundThreadsByNode = get_bound_thread_count_by_numa_node();
|
| 379 |
+
std::stringstream ss;
|
| 380 |
+
if (boundThreadsByNode.empty())
|
| 381 |
+
return ss.str();
|
| 382 |
+
|
| 383 |
+
bool isFirst = true;
|
| 384 |
+
|
| 385 |
+
for (auto&& [current, total] : boundThreadsByNode)
|
| 386 |
+
{
|
| 387 |
+
if (!isFirst)
|
| 388 |
+
ss << ":";
|
| 389 |
+
ss << current << "/" << total;
|
| 390 |
+
isFirst = false;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
return ss.str();
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
std::string Engine::thread_allocation_information_as_string() const {
|
| 397 |
+
std::stringstream ss;
|
| 398 |
+
|
| 399 |
+
size_t threadsSize = threads.size();
|
| 400 |
+
ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread");
|
| 401 |
+
|
| 402 |
+
auto boundThreadsByNodeStr = thread_binding_information_as_string();
|
| 403 |
+
if (boundThreadsByNodeStr.empty())
|
| 404 |
+
return ss.str();
|
| 405 |
+
|
| 406 |
+
ss << " with NUMA node thread binding: ";
|
| 407 |
+
ss << boundThreadsByNodeStr;
|
| 408 |
+
|
| 409 |
+
return ss.str();
|
| 410 |
+
}
|
| 411 |
+
}
|
src/engine.h
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef ENGINE_H_INCLUDED
|
| 20 |
+
#define ENGINE_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <cstddef>
|
| 23 |
+
#include <cstdint>
|
| 24 |
+
#include <functional>
|
| 25 |
+
#include <map>
|
| 26 |
+
#include <memory>
|
| 27 |
+
#include <optional>
|
| 28 |
+
#include <string>
|
| 29 |
+
#include <string_view>
|
| 30 |
+
#include <utility>
|
| 31 |
+
#include <vector>
|
| 32 |
+
|
| 33 |
+
#include "history.h"
|
| 34 |
+
#include "nnue/network.h"
|
| 35 |
+
#include "numa.h"
|
| 36 |
+
#include "position.h"
|
| 37 |
+
#include "search.h"
|
| 38 |
+
#include "syzygy/tbprobe.h" // for Stockfish::Depth
|
| 39 |
+
#include "thread.h"
|
| 40 |
+
#include "tt.h"
|
| 41 |
+
#include "ucioption.h"
|
| 42 |
+
|
| 43 |
+
namespace Stockfish {
|
| 44 |
+
|
| 45 |
+
class Engine {
|
| 46 |
+
public:
|
| 47 |
+
using InfoShort = Search::InfoShort;
|
| 48 |
+
using InfoFull = Search::InfoFull;
|
| 49 |
+
using InfoIter = Search::InfoIteration;
|
| 50 |
+
|
| 51 |
+
Engine(std::optional<std::string> path = std::nullopt);
|
| 52 |
+
|
| 53 |
+
// Cannot be movable due to components holding backreferences to fields
|
| 54 |
+
Engine(const Engine&) = delete;
|
| 55 |
+
Engine(Engine&&) = delete;
|
| 56 |
+
Engine& operator=(const Engine&) = delete;
|
| 57 |
+
Engine& operator=(Engine&&) = delete;
|
| 58 |
+
|
| 59 |
+
~Engine() { wait_for_search_finished(); }
|
| 60 |
+
|
| 61 |
+
std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960);
|
| 62 |
+
|
| 63 |
+
// non blocking call to start searching
|
| 64 |
+
void go(Search::LimitsType&);
|
| 65 |
+
// non blocking call to stop searching
|
| 66 |
+
void stop();
|
| 67 |
+
|
| 68 |
+
// blocking call to wait for search to finish
|
| 69 |
+
void wait_for_search_finished();
|
| 70 |
+
// set a new position, moves are in UCI format
|
| 71 |
+
void set_position(const std::string& fen, const std::vector<std::string>& moves);
|
| 72 |
+
|
| 73 |
+
// modifiers
|
| 74 |
+
|
| 75 |
+
void set_numa_config_from_option(const std::string& o);
|
| 76 |
+
void resize_threads();
|
| 77 |
+
void set_tt_size(size_t mb);
|
| 78 |
+
void set_ponderhit(bool);
|
| 79 |
+
void search_clear();
|
| 80 |
+
|
| 81 |
+
void set_on_update_no_moves(std::function<void(const InfoShort&)>&&);
|
| 82 |
+
void set_on_update_full(std::function<void(const InfoFull&)>&&);
|
| 83 |
+
void set_on_iter(std::function<void(const InfoIter&)>&&);
|
| 84 |
+
void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&);
|
| 85 |
+
void set_on_verify_networks(std::function<void(std::string_view)>&&);
|
| 86 |
+
|
| 87 |
+
// network related
|
| 88 |
+
|
| 89 |
+
void verify_networks() const;
|
| 90 |
+
std::unique_ptr<Eval::NNUE::Networks> get_default_networks() const;
|
| 91 |
+
void load_big_network(const std::string& file);
|
| 92 |
+
void load_small_network(const std::string& file);
|
| 93 |
+
void save_network(const std::pair<std::optional<std::string>, std::string> files[2]);
|
| 94 |
+
|
| 95 |
+
// utility functions
|
| 96 |
+
|
| 97 |
+
void trace_eval() const;
|
| 98 |
+
|
| 99 |
+
const OptionsMap& get_options() const;
|
| 100 |
+
OptionsMap& get_options();
|
| 101 |
+
|
| 102 |
+
int get_hashfull(int maxAge = 0) const;
|
| 103 |
+
|
| 104 |
+
std::string fen() const;
|
| 105 |
+
void flip();
|
| 106 |
+
std::string visualize() const;
|
| 107 |
+
std::vector<std::pair<size_t, size_t>> get_bound_thread_count_by_numa_node() const;
|
| 108 |
+
std::string get_numa_config_as_string() const;
|
| 109 |
+
std::string numa_config_information_as_string() const;
|
| 110 |
+
std::string thread_allocation_information_as_string() const;
|
| 111 |
+
std::string thread_binding_information_as_string() const;
|
| 112 |
+
|
| 113 |
+
private:
|
| 114 |
+
const std::string binaryDirectory;
|
| 115 |
+
|
| 116 |
+
NumaReplicationContext numaContext;
|
| 117 |
+
|
| 118 |
+
Position pos;
|
| 119 |
+
StateListPtr states;
|
| 120 |
+
|
| 121 |
+
OptionsMap options;
|
| 122 |
+
ThreadPool threads;
|
| 123 |
+
TranspositionTable tt;
|
| 124 |
+
LazyNumaReplicatedSystemWide<Eval::NNUE::Networks> networks;
|
| 125 |
+
|
| 126 |
+
Search::SearchManager::UpdateContext updateContext;
|
| 127 |
+
std::function<void(std::string_view)> onVerifyNetworks;
|
| 128 |
+
std::map<NumaIndex, SharedHistories> sharedHists;
|
| 129 |
+
};
|
| 130 |
+
|
| 131 |
+
} // namespace Stockfish
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
#endif // #ifndef ENGINE_H_INCLUDED
|
src/evaluate.cpp
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "evaluate.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <cassert>
|
| 23 |
+
#include <cmath>
|
| 24 |
+
#include <cstdlib>
|
| 25 |
+
#include <iomanip>
|
| 26 |
+
#include <iostream>
|
| 27 |
+
#include <memory>
|
| 28 |
+
#include <sstream>
|
| 29 |
+
#include <tuple>
|
| 30 |
+
|
| 31 |
+
#include "nnue/network.h"
|
| 32 |
+
#include "nnue/nnue_misc.h"
|
| 33 |
+
#include "position.h"
|
| 34 |
+
#include "types.h"
|
| 35 |
+
#include "uci.h"
|
| 36 |
+
#include "nnue/nnue_accumulator.h"
|
| 37 |
+
|
| 38 |
+
namespace Stockfish {
|
| 39 |
+
|
| 40 |
+
// Returns a static, purely materialistic evaluation of the position from
|
| 41 |
+
// the point of view of the side to move. It can be divided by PawnValue to get
|
| 42 |
+
// an approximation of the material advantage on the board in terms of pawns.
|
| 43 |
+
int Eval::simple_eval(const Position& pos) {
|
| 44 |
+
Color c = pos.side_to_move();
|
| 45 |
+
return PawnValue * (pos.count<PAWN>(c) - pos.count<PAWN>(~c)) + pos.non_pawn_material(c)
|
| 46 |
+
- pos.non_pawn_material(~c);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
bool Eval::use_smallnet(const Position& pos) { return std::abs(simple_eval(pos)) > 962; }
|
| 50 |
+
|
| 51 |
+
// Evaluate is the evaluator for the outer world. It returns a static evaluation
|
| 52 |
+
// of the position from the point of view of the side to move.
|
| 53 |
+
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
|
| 54 |
+
const Position& pos,
|
| 55 |
+
Eval::NNUE::AccumulatorStack& accumulators,
|
| 56 |
+
Eval::NNUE::AccumulatorCaches& caches,
|
| 57 |
+
int optimism) {
|
| 58 |
+
|
| 59 |
+
assert(!pos.checkers());
|
| 60 |
+
|
| 61 |
+
bool smallNet = use_smallnet(pos);
|
| 62 |
+
auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, accumulators, caches.small)
|
| 63 |
+
: networks.big.evaluate(pos, accumulators, caches.big);
|
| 64 |
+
|
| 65 |
+
Value nnue = (125 * psqt + 131 * positional) / 128;
|
| 66 |
+
|
| 67 |
+
// Re-evaluate the position when higher eval accuracy is worth the time spent
|
| 68 |
+
if (smallNet && (std::abs(nnue) < 277))
|
| 69 |
+
{
|
| 70 |
+
std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, caches.big);
|
| 71 |
+
nnue = (125 * psqt + 131 * positional) / 128;
|
| 72 |
+
smallNet = false;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
// Blend optimism and eval with nnue complexity
|
| 76 |
+
int nnueComplexity = std::abs(psqt - positional);
|
| 77 |
+
optimism += optimism * nnueComplexity / 476;
|
| 78 |
+
nnue -= nnue * nnueComplexity / 18236;
|
| 79 |
+
|
| 80 |
+
int material = 534 * pos.count<PAWN>() + pos.non_pawn_material();
|
| 81 |
+
int v = (nnue * (77871 + material) + optimism * (7191 + material)) / 77871;
|
| 82 |
+
|
| 83 |
+
// Damp down the evaluation linearly when shuffling
|
| 84 |
+
v -= v * pos.rule50_count() / 199;
|
| 85 |
+
|
| 86 |
+
// Guarantee evaluation does not hit the tablebase range
|
| 87 |
+
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
|
| 88 |
+
|
| 89 |
+
return v;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
// Like evaluate(), but instead of returning a value, it returns
|
| 93 |
+
// a string (suitable for outputting to stdout) that contains the detailed
|
| 94 |
+
// descriptions and values of each evaluation term. Useful for debugging.
|
| 95 |
+
// Trace scores are from white's point of view
|
| 96 |
+
std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
|
| 97 |
+
|
| 98 |
+
if (pos.checkers())
|
| 99 |
+
return "Final evaluation: none (in check)";
|
| 100 |
+
|
| 101 |
+
auto accumulators = std::make_unique<Eval::NNUE::AccumulatorStack>();
|
| 102 |
+
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
|
| 103 |
+
|
| 104 |
+
std::stringstream ss;
|
| 105 |
+
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
|
| 106 |
+
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
|
| 107 |
+
|
| 108 |
+
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
|
| 109 |
+
|
| 110 |
+
auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches->big);
|
| 111 |
+
Value v = psqt + positional;
|
| 112 |
+
v = pos.side_to_move() == WHITE ? v : -v;
|
| 113 |
+
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
|
| 114 |
+
|
| 115 |
+
v = evaluate(networks, pos, *accumulators, *caches, VALUE_ZERO);
|
| 116 |
+
v = pos.side_to_move() == WHITE ? v : -v;
|
| 117 |
+
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
|
| 118 |
+
ss << " [with scaled NNUE, ...]";
|
| 119 |
+
ss << "\n";
|
| 120 |
+
|
| 121 |
+
return ss.str();
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
} // namespace Stockfish
|
src/evaluate.h
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef EVALUATE_H_INCLUDED
|
| 20 |
+
#define EVALUATE_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <string>
|
| 23 |
+
|
| 24 |
+
#include "types.h"
|
| 25 |
+
|
| 26 |
+
namespace Stockfish {
|
| 27 |
+
|
| 28 |
+
class Position;
|
| 29 |
+
|
| 30 |
+
namespace Eval {
|
| 31 |
+
|
| 32 |
+
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
|
| 33 |
+
// for the build process (profile-build and fishtest) to work. Do not change the
|
| 34 |
+
// name of the macro or the location where this macro is defined, as it is used
|
| 35 |
+
// in the Makefile/Fishtest.
|
| 36 |
+
#define EvalFileDefaultNameBig "nn-9a0cc2a62c52.nnue"
|
| 37 |
+
#define EvalFileDefaultNameSmall "nn-47fc8b7fff06.nnue"
|
| 38 |
+
|
| 39 |
+
namespace NNUE {
|
| 40 |
+
struct Networks;
|
| 41 |
+
struct AccumulatorCaches;
|
| 42 |
+
class AccumulatorStack;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
|
| 46 |
+
|
| 47 |
+
int simple_eval(const Position& pos);
|
| 48 |
+
bool use_smallnet(const Position& pos);
|
| 49 |
+
Value evaluate(const NNUE::Networks& networks,
|
| 50 |
+
const Position& pos,
|
| 51 |
+
Eval::NNUE::AccumulatorStack& accumulators,
|
| 52 |
+
Eval::NNUE::AccumulatorCaches& caches,
|
| 53 |
+
int optimism);
|
| 54 |
+
} // namespace Eval
|
| 55 |
+
|
| 56 |
+
} // namespace Stockfish
|
| 57 |
+
|
| 58 |
+
#endif // #ifndef EVALUATE_H_INCLUDED
|
src/history.h
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef HISTORY_H_INCLUDED
|
| 20 |
+
#define HISTORY_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <array>
|
| 24 |
+
#include <atomic>
|
| 25 |
+
#include <cassert>
|
| 26 |
+
#include <cmath>
|
| 27 |
+
#include <cstdint>
|
| 28 |
+
#include <cstdlib>
|
| 29 |
+
#include <limits>
|
| 30 |
+
#include <type_traits> // IWYU pragma: keep
|
| 31 |
+
|
| 32 |
+
#include "memory.h"
|
| 33 |
+
#include "misc.h"
|
| 34 |
+
#include "position.h"
|
| 35 |
+
|
| 36 |
+
namespace Stockfish {
|
| 37 |
+
|
| 38 |
+
constexpr int PAWN_HISTORY_BASE_SIZE = 8192; // has to be a power of 2
|
| 39 |
+
constexpr int UINT_16_HISTORY_SIZE = std::numeric_limits<uint16_t>::max() + 1;
|
| 40 |
+
constexpr int CORRHIST_BASE_SIZE = UINT_16_HISTORY_SIZE;
|
| 41 |
+
constexpr int CORRECTION_HISTORY_LIMIT = 1024;
|
| 42 |
+
constexpr int LOW_PLY_HISTORY_SIZE = 5;
|
| 43 |
+
|
| 44 |
+
static_assert((PAWN_HISTORY_BASE_SIZE & (PAWN_HISTORY_BASE_SIZE - 1)) == 0,
|
| 45 |
+
"PAWN_HISTORY_BASE_SIZE has to be a power of 2");
|
| 46 |
+
|
| 47 |
+
static_assert((CORRHIST_BASE_SIZE & (CORRHIST_BASE_SIZE - 1)) == 0,
|
| 48 |
+
"CORRHIST_BASE_SIZE has to be a power of 2");
|
| 49 |
+
|
| 50 |
+
// StatsEntry is the container of various numerical statistics. We use a class
|
| 51 |
+
// instead of a naked value to directly call history update operator<<() on
|
| 52 |
+
// the entry. The first template parameter T is the base type of the array,
|
| 53 |
+
// and the second template parameter D limits the range of updates in [-D, D]
|
| 54 |
+
// when we update values with the << operator
|
| 55 |
+
template<typename T, int D, bool Atomic = false>
|
| 56 |
+
struct StatsEntry {
|
| 57 |
+
static_assert(std::is_arithmetic_v<T>, "Not an arithmetic type");
|
| 58 |
+
|
| 59 |
+
private:
|
| 60 |
+
std::conditional_t<Atomic, std::atomic<T>, T> entry;
|
| 61 |
+
|
| 62 |
+
public:
|
| 63 |
+
void operator=(const T& v) {
|
| 64 |
+
if constexpr (Atomic)
|
| 65 |
+
entry.store(v, std::memory_order_relaxed);
|
| 66 |
+
else
|
| 67 |
+
entry = v;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
operator T() const {
|
| 71 |
+
if constexpr (Atomic)
|
| 72 |
+
return entry.load(std::memory_order_relaxed);
|
| 73 |
+
else
|
| 74 |
+
return entry;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
void operator<<(int bonus) {
|
| 78 |
+
// Make sure that bonus is in range [-D, D]
|
| 79 |
+
int clampedBonus = std::clamp(bonus, -D, D);
|
| 80 |
+
T val = *this;
|
| 81 |
+
*this = val + clampedBonus - val * std::abs(clampedBonus) / D;
|
| 82 |
+
|
| 83 |
+
assert(std::abs(T(*this)) <= D);
|
| 84 |
+
}
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
enum StatsType {
|
| 88 |
+
NoCaptures,
|
| 89 |
+
Captures
|
| 90 |
+
};
|
| 91 |
+
|
| 92 |
+
template<typename T, int D, std::size_t... Sizes>
|
| 93 |
+
using Stats = MultiArray<StatsEntry<T, D>, Sizes...>;
|
| 94 |
+
|
| 95 |
+
template<typename T, int D, std::size_t... Sizes>
|
| 96 |
+
using AtomicStats = MultiArray<StatsEntry<T, D, true>, Sizes...>;
|
| 97 |
+
|
| 98 |
+
// DynStats is a dynamically sized array of Stats, used for thread-shared histories
|
| 99 |
+
// which should scale with the total number of threads. The SizeMultiplier gives
|
| 100 |
+
// the per-thread allocation count of T.
|
| 101 |
+
template<typename T, int SizeMultiplier>
|
| 102 |
+
struct DynStats {
|
| 103 |
+
explicit DynStats(size_t s) {
|
| 104 |
+
size = s * SizeMultiplier;
|
| 105 |
+
data = make_unique_large_page<T[]>(size);
|
| 106 |
+
}
|
| 107 |
+
// Sets all values in the range to 0
|
| 108 |
+
void clear_range(int value, size_t threadIdx, size_t numaTotal) {
|
| 109 |
+
size_t start = uint64_t(threadIdx) * size / numaTotal;
|
| 110 |
+
assert(start < size);
|
| 111 |
+
size_t end = threadIdx + 1 == numaTotal ? size : uint64_t(threadIdx + 1) * size / numaTotal;
|
| 112 |
+
|
| 113 |
+
while (start < end)
|
| 114 |
+
data[start++].fill(value);
|
| 115 |
+
}
|
| 116 |
+
size_t get_size() const { return size; }
|
| 117 |
+
T& operator[](size_t index) {
|
| 118 |
+
assert(index < size);
|
| 119 |
+
return data.get()[index];
|
| 120 |
+
}
|
| 121 |
+
const T& operator[](size_t index) const {
|
| 122 |
+
assert(index < size);
|
| 123 |
+
return data.get()[index];
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
private:
|
| 127 |
+
size_t size;
|
| 128 |
+
LargePagePtr<T[]> data;
|
| 129 |
+
};
|
| 130 |
+
|
| 131 |
+
// ButterflyHistory records how often quiet moves have been successful or unsuccessful
|
| 132 |
+
// during the current search, and is used for reduction and move ordering decisions.
|
| 133 |
+
// It uses 2 tables (one for each color) indexed by the move's from and to squares,
|
| 134 |
+
// see https://www.chessprogramming.org/Butterfly_Boards
|
| 135 |
+
using ButterflyHistory = Stats<std::int16_t, 7183, COLOR_NB, UINT_16_HISTORY_SIZE>;
|
| 136 |
+
|
| 137 |
+
// LowPlyHistory is addressed by ply and move's from and to squares, used
|
| 138 |
+
// to improve move ordering near the root
|
| 139 |
+
using LowPlyHistory = Stats<std::int16_t, 7183, LOW_PLY_HISTORY_SIZE, UINT_16_HISTORY_SIZE>;
|
| 140 |
+
|
| 141 |
+
// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
|
| 142 |
+
using CapturePieceToHistory = Stats<std::int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
|
| 143 |
+
|
| 144 |
+
// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
|
| 145 |
+
using PieceToHistory = Stats<std::int16_t, 30000, PIECE_NB, SQUARE_NB>;
|
| 146 |
+
|
| 147 |
+
// ContinuationHistory is the combined history of a given pair of moves, usually
|
| 148 |
+
// the current one given a previous one. The nested history table is based on
|
| 149 |
+
// PieceToHistory instead of ButterflyBoards.
|
| 150 |
+
using ContinuationHistory = MultiArray<PieceToHistory, PIECE_NB, SQUARE_NB>;
|
| 151 |
+
|
| 152 |
+
// PawnHistory is addressed by the pawn structure and a move's [piece][to]
|
| 153 |
+
using PawnHistory =
|
| 154 |
+
DynStats<AtomicStats<std::int16_t, 8192, PIECE_NB, SQUARE_NB>, PAWN_HISTORY_BASE_SIZE>;
|
| 155 |
+
|
| 156 |
+
// Correction histories record differences between the static evaluation of
|
| 157 |
+
// positions and their search score. It is used to improve the static evaluation
|
| 158 |
+
// used by some search heuristics.
|
| 159 |
+
// see https://www.chessprogramming.org/Static_Evaluation_Correction_History
|
| 160 |
+
enum CorrHistType {
|
| 161 |
+
Pawn, // By color and pawn structure
|
| 162 |
+
Minor, // By color and positions of minor pieces (Knight, Bishop)
|
| 163 |
+
NonPawn, // By non-pawn material positions and color
|
| 164 |
+
PieceTo, // By [piece][to] move
|
| 165 |
+
Continuation, // Combined history of move pairs
|
| 166 |
+
};
|
| 167 |
+
|
| 168 |
+
template<typename T, int D>
|
| 169 |
+
struct CorrectionBundle {
|
| 170 |
+
StatsEntry<T, D, true> pawn;
|
| 171 |
+
StatsEntry<T, D, true> minor;
|
| 172 |
+
StatsEntry<T, D, true> nonPawnWhite;
|
| 173 |
+
StatsEntry<T, D, true> nonPawnBlack;
|
| 174 |
+
|
| 175 |
+
void operator=(T val) {
|
| 176 |
+
pawn = val;
|
| 177 |
+
minor = val;
|
| 178 |
+
nonPawnWhite = val;
|
| 179 |
+
nonPawnBlack = val;
|
| 180 |
+
}
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
namespace Detail {
|
| 184 |
+
|
| 185 |
+
template<CorrHistType>
|
| 186 |
+
struct CorrHistTypedef {
|
| 187 |
+
using type =
|
| 188 |
+
DynStats<Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, COLOR_NB>, CORRHIST_BASE_SIZE>;
|
| 189 |
+
};
|
| 190 |
+
|
| 191 |
+
template<>
|
| 192 |
+
struct CorrHistTypedef<PieceTo> {
|
| 193 |
+
using type = Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, PIECE_NB, SQUARE_NB>;
|
| 194 |
+
};
|
| 195 |
+
|
| 196 |
+
template<>
|
| 197 |
+
struct CorrHistTypedef<Continuation> {
|
| 198 |
+
using type = MultiArray<CorrHistTypedef<PieceTo>::type, PIECE_NB, SQUARE_NB>;
|
| 199 |
+
};
|
| 200 |
+
|
| 201 |
+
template<>
|
| 202 |
+
struct CorrHistTypedef<NonPawn> {
|
| 203 |
+
using type = DynStats<Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, COLOR_NB, COLOR_NB>,
|
| 204 |
+
CORRHIST_BASE_SIZE>;
|
| 205 |
+
};
|
| 206 |
+
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
using UnifiedCorrectionHistory =
|
| 210 |
+
DynStats<MultiArray<CorrectionBundle<std::int16_t, CORRECTION_HISTORY_LIMIT>, COLOR_NB>,
|
| 211 |
+
CORRHIST_BASE_SIZE>;
|
| 212 |
+
|
| 213 |
+
template<CorrHistType T>
|
| 214 |
+
using CorrectionHistory = typename Detail::CorrHistTypedef<T>::type;
|
| 215 |
+
|
| 216 |
+
using TTMoveHistory = StatsEntry<std::int16_t, 8192>;
|
| 217 |
+
|
| 218 |
+
// Set of histories shared between groups of threads. To avoid excessive
|
| 219 |
+
// cross-node data transfer, histories are shared only between threads
|
| 220 |
+
// on a given NUMA node. The passed size must be a power of two to make
|
| 221 |
+
// the indexing more efficient.
|
| 222 |
+
struct SharedHistories {
|
| 223 |
+
SharedHistories(size_t threadCount) :
|
| 224 |
+
correctionHistory(threadCount),
|
| 225 |
+
pawnHistory(threadCount) {
|
| 226 |
+
assert((threadCount & (threadCount - 1)) == 0 && threadCount != 0);
|
| 227 |
+
sizeMinus1 = correctionHistory.get_size() - 1;
|
| 228 |
+
pawnHistSizeMinus1 = pawnHistory.get_size() - 1;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
size_t get_size() const { return sizeMinus1 + 1; }
|
| 232 |
+
|
| 233 |
+
auto& pawn_entry(const Position& pos) {
|
| 234 |
+
return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1];
|
| 235 |
+
}
|
| 236 |
+
const auto& pawn_entry(const Position& pos) const {
|
| 237 |
+
return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1];
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
auto& pawn_correction_entry(const Position& pos) {
|
| 241 |
+
return correctionHistory[pos.pawn_key() & sizeMinus1];
|
| 242 |
+
}
|
| 243 |
+
const auto& pawn_correction_entry(const Position& pos) const {
|
| 244 |
+
return correctionHistory[pos.pawn_key() & sizeMinus1];
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
auto& minor_piece_correction_entry(const Position& pos) {
|
| 248 |
+
return correctionHistory[pos.minor_piece_key() & sizeMinus1];
|
| 249 |
+
}
|
| 250 |
+
const auto& minor_piece_correction_entry(const Position& pos) const {
|
| 251 |
+
return correctionHistory[pos.minor_piece_key() & sizeMinus1];
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
template<Color c>
|
| 255 |
+
auto& nonpawn_correction_entry(const Position& pos) {
|
| 256 |
+
return correctionHistory[pos.non_pawn_key(c) & sizeMinus1];
|
| 257 |
+
}
|
| 258 |
+
template<Color c>
|
| 259 |
+
const auto& nonpawn_correction_entry(const Position& pos) const {
|
| 260 |
+
return correctionHistory[pos.non_pawn_key(c) & sizeMinus1];
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
UnifiedCorrectionHistory correctionHistory;
|
| 264 |
+
PawnHistory pawnHistory;
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
private:
|
| 268 |
+
size_t sizeMinus1, pawnHistSizeMinus1;
|
| 269 |
+
};
|
| 270 |
+
|
| 271 |
+
} // namespace Stockfish
|
| 272 |
+
|
| 273 |
+
#endif // #ifndef HISTORY_H_INCLUDED
|
src/incbin/UNLICENCE
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The file "incbin.h" is free and unencumbered software released into
|
| 2 |
+
the public domain by Dale Weiler, see:
|
| 3 |
+
<https://github.com/graphitemaster/incbin>
|
| 4 |
+
|
| 5 |
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
| 6 |
+
distribute this software, either in source code form or as a compiled
|
| 7 |
+
binary, for any purpose, commercial or non-commercial, and by any
|
| 8 |
+
means.
|
| 9 |
+
|
| 10 |
+
In jurisdictions that recognize copyright laws, the author or authors
|
| 11 |
+
of this software dedicate any and all copyright interest in the
|
| 12 |
+
software to the public domain. We make this dedication for the benefit
|
| 13 |
+
of the public at large and to the detriment of our heirs and
|
| 14 |
+
successors. We intend this dedication to be an overt act of
|
| 15 |
+
relinquishment in perpetuity of all present and future rights to this
|
| 16 |
+
software under copyright law.
|
| 17 |
+
|
| 18 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 19 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 20 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
| 21 |
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
| 22 |
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
| 23 |
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
| 24 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
| 25 |
+
|
| 26 |
+
For more information, please refer to <http://unlicense.org/>
|
src/incbin/incbin.h
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* @file incbin.h
|
| 3 |
+
* @author Dale Weiler
|
| 4 |
+
* @brief Utility for including binary files
|
| 5 |
+
*
|
| 6 |
+
* Facilities for including binary files into the current translation unit and
|
| 7 |
+
* making use from them externally in other translation units.
|
| 8 |
+
*/
|
| 9 |
+
#ifndef INCBIN_HDR
|
| 10 |
+
#define INCBIN_HDR
|
| 11 |
+
#include <limits.h>
|
| 12 |
+
#if defined(__AVX512BW__) || \
|
| 13 |
+
defined(__AVX512CD__) || \
|
| 14 |
+
defined(__AVX512DQ__) || \
|
| 15 |
+
defined(__AVX512ER__) || \
|
| 16 |
+
defined(__AVX512PF__) || \
|
| 17 |
+
defined(__AVX512VL__) || \
|
| 18 |
+
defined(__AVX512F__)
|
| 19 |
+
# define INCBIN_ALIGNMENT_INDEX 6
|
| 20 |
+
#elif defined(__AVX__) || \
|
| 21 |
+
defined(__AVX2__)
|
| 22 |
+
# define INCBIN_ALIGNMENT_INDEX 5
|
| 23 |
+
#elif defined(__SSE__) || \
|
| 24 |
+
defined(__SSE2__) || \
|
| 25 |
+
defined(__SSE3__) || \
|
| 26 |
+
defined(__SSSE3__) || \
|
| 27 |
+
defined(__SSE4_1__) || \
|
| 28 |
+
defined(__SSE4_2__) || \
|
| 29 |
+
defined(__neon__) || \
|
| 30 |
+
defined(__ARM_NEON) || \
|
| 31 |
+
defined(__ALTIVEC__)
|
| 32 |
+
# define INCBIN_ALIGNMENT_INDEX 4
|
| 33 |
+
#elif ULONG_MAX != 0xffffffffu
|
| 34 |
+
# define INCBIN_ALIGNMENT_INDEX 3
|
| 35 |
+
# else
|
| 36 |
+
# define INCBIN_ALIGNMENT_INDEX 2
|
| 37 |
+
#endif
|
| 38 |
+
|
| 39 |
+
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
|
| 40 |
+
#define INCBIN_ALIGN_SHIFT_0 1
|
| 41 |
+
#define INCBIN_ALIGN_SHIFT_1 2
|
| 42 |
+
#define INCBIN_ALIGN_SHIFT_2 4
|
| 43 |
+
#define INCBIN_ALIGN_SHIFT_3 8
|
| 44 |
+
#define INCBIN_ALIGN_SHIFT_4 16
|
| 45 |
+
#define INCBIN_ALIGN_SHIFT_5 32
|
| 46 |
+
#define INCBIN_ALIGN_SHIFT_6 64
|
| 47 |
+
|
| 48 |
+
/* Actual alignment value */
|
| 49 |
+
#define INCBIN_ALIGNMENT \
|
| 50 |
+
INCBIN_CONCATENATE( \
|
| 51 |
+
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
|
| 52 |
+
INCBIN_ALIGNMENT_INDEX)
|
| 53 |
+
|
| 54 |
+
/* Stringize */
|
| 55 |
+
#define INCBIN_STR(X) \
|
| 56 |
+
#X
|
| 57 |
+
#define INCBIN_STRINGIZE(X) \
|
| 58 |
+
INCBIN_STR(X)
|
| 59 |
+
/* Concatenate */
|
| 60 |
+
#define INCBIN_CAT(X, Y) \
|
| 61 |
+
X ## Y
|
| 62 |
+
#define INCBIN_CONCATENATE(X, Y) \
|
| 63 |
+
INCBIN_CAT(X, Y)
|
| 64 |
+
/* Deferred macro expansion */
|
| 65 |
+
#define INCBIN_EVAL(X) \
|
| 66 |
+
X
|
| 67 |
+
#define INCBIN_INVOKE(N, ...) \
|
| 68 |
+
INCBIN_EVAL(N(__VA_ARGS__))
|
| 69 |
+
/* Variable argument count for overloading by arity */
|
| 70 |
+
#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
|
| 71 |
+
#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)
|
| 72 |
+
|
| 73 |
+
/* Green Hills uses a different directive for including binary data */
|
| 74 |
+
#if defined(__ghs__)
|
| 75 |
+
# if (__ghs_asm == 2)
|
| 76 |
+
# define INCBIN_MACRO ".file"
|
| 77 |
+
/* Or consider the ".myrawdata" entry in the ld file */
|
| 78 |
+
# else
|
| 79 |
+
# define INCBIN_MACRO "\tINCBIN"
|
| 80 |
+
# endif
|
| 81 |
+
#else
|
| 82 |
+
# define INCBIN_MACRO ".incbin"
|
| 83 |
+
#endif
|
| 84 |
+
|
| 85 |
+
#ifndef _MSC_VER
|
| 86 |
+
# define INCBIN_ALIGN \
|
| 87 |
+
__attribute__((aligned(INCBIN_ALIGNMENT)))
|
| 88 |
+
#else
|
| 89 |
+
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
|
| 90 |
+
#endif
|
| 91 |
+
|
| 92 |
+
#if defined(__arm__) || /* GNU C and RealView */ \
|
| 93 |
+
defined(__arm) || /* Diab */ \
|
| 94 |
+
defined(_ARM) /* ImageCraft */
|
| 95 |
+
# define INCBIN_ARM
|
| 96 |
+
#endif
|
| 97 |
+
|
| 98 |
+
#ifdef __GNUC__
|
| 99 |
+
/* Utilize .balign where supported */
|
| 100 |
+
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
| 101 |
+
# define INCBIN_ALIGN_BYTE ".balign 1\n"
|
| 102 |
+
#elif defined(INCBIN_ARM)
|
| 103 |
+
/*
|
| 104 |
+
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
|
| 105 |
+
* the shift count. This is the value passed to `.align'
|
| 106 |
+
*/
|
| 107 |
+
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
|
| 108 |
+
# define INCBIN_ALIGN_BYTE ".align 0\n"
|
| 109 |
+
#else
|
| 110 |
+
/* We assume other inline assembler's treat `.align' as `.balign' */
|
| 111 |
+
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
| 112 |
+
# define INCBIN_ALIGN_BYTE ".align 1\n"
|
| 113 |
+
#endif
|
| 114 |
+
|
| 115 |
+
/* INCBIN_CONST is used by incbin.c generated files */
|
| 116 |
+
#if defined(__cplusplus)
|
| 117 |
+
# define INCBIN_EXTERNAL extern "C"
|
| 118 |
+
# define INCBIN_CONST extern const
|
| 119 |
+
#else
|
| 120 |
+
# define INCBIN_EXTERNAL extern
|
| 121 |
+
# define INCBIN_CONST const
|
| 122 |
+
#endif
|
| 123 |
+
|
| 124 |
+
/**
|
| 125 |
+
* @brief Optionally override the linker section into which size and data is
|
| 126 |
+
* emitted.
|
| 127 |
+
*
|
| 128 |
+
* @warning If you use this facility, you might have to deal with
|
| 129 |
+
* platform-specific linker output section naming on your own.
|
| 130 |
+
*/
|
| 131 |
+
#if !defined(INCBIN_OUTPUT_SECTION)
|
| 132 |
+
# if defined(__APPLE__)
|
| 133 |
+
# define INCBIN_OUTPUT_SECTION ".const_data"
|
| 134 |
+
# else
|
| 135 |
+
# define INCBIN_OUTPUT_SECTION ".rodata"
|
| 136 |
+
# endif
|
| 137 |
+
#endif
|
| 138 |
+
|
| 139 |
+
/**
|
| 140 |
+
* @brief Optionally override the linker section into which data is emitted.
|
| 141 |
+
*
|
| 142 |
+
* @warning If you use this facility, you might have to deal with
|
| 143 |
+
* platform-specific linker output section naming on your own.
|
| 144 |
+
*/
|
| 145 |
+
#if !defined(INCBIN_OUTPUT_DATA_SECTION)
|
| 146 |
+
# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
|
| 147 |
+
#endif
|
| 148 |
+
|
| 149 |
+
/**
|
| 150 |
+
* @brief Optionally override the linker section into which size is emitted.
|
| 151 |
+
*
|
| 152 |
+
* @warning If you use this facility, you might have to deal with
|
| 153 |
+
* platform-specific linker output section naming on your own.
|
| 154 |
+
*
|
| 155 |
+
* @note This is useful for Harvard architectures where program memory cannot
|
| 156 |
+
* be directly read from the program without special instructions. With this you
|
| 157 |
+
* can chose to put the size variable in RAM rather than ROM.
|
| 158 |
+
*/
|
| 159 |
+
#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
|
| 160 |
+
# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
|
| 161 |
+
#endif
|
| 162 |
+
|
| 163 |
+
#if defined(__APPLE__)
|
| 164 |
+
# include "TargetConditionals.h"
|
| 165 |
+
# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
|
| 166 |
+
# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
|
| 167 |
+
# endif
|
| 168 |
+
/* The directives are different for Apple branded compilers */
|
| 169 |
+
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
|
| 170 |
+
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
| 171 |
+
# define INCBIN_INT ".long "
|
| 172 |
+
# define INCBIN_MANGLE "_"
|
| 173 |
+
# define INCBIN_BYTE ".byte "
|
| 174 |
+
# define INCBIN_TYPE(...)
|
| 175 |
+
#else
|
| 176 |
+
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
|
| 177 |
+
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
| 178 |
+
# if defined(__ghs__)
|
| 179 |
+
# define INCBIN_INT ".word "
|
| 180 |
+
# else
|
| 181 |
+
# define INCBIN_INT ".int "
|
| 182 |
+
# endif
|
| 183 |
+
# if defined(__USER_LABEL_PREFIX__)
|
| 184 |
+
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
|
| 185 |
+
# else
|
| 186 |
+
# define INCBIN_MANGLE ""
|
| 187 |
+
# endif
|
| 188 |
+
# if defined(INCBIN_ARM)
|
| 189 |
+
/* On arm assemblers, `@' is used as a line comment token */
|
| 190 |
+
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
|
| 191 |
+
# elif defined(__MINGW32__) || defined(__MINGW64__)
|
| 192 |
+
/* Mingw doesn't support this directive either */
|
| 193 |
+
# define INCBIN_TYPE(NAME)
|
| 194 |
+
# else
|
| 195 |
+
/* It's safe to use `@' on other architectures */
|
| 196 |
+
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
|
| 197 |
+
# endif
|
| 198 |
+
# define INCBIN_BYTE ".byte "
|
| 199 |
+
#endif
|
| 200 |
+
|
| 201 |
+
/* List of style types used for symbol names */
|
| 202 |
+
#define INCBIN_STYLE_CAMEL 0
|
| 203 |
+
#define INCBIN_STYLE_SNAKE 1
|
| 204 |
+
|
| 205 |
+
/**
|
| 206 |
+
* @brief Specify the prefix to use for symbol names.
|
| 207 |
+
*
|
| 208 |
+
* @note By default this is "g".
|
| 209 |
+
*
|
| 210 |
+
* @code
|
| 211 |
+
* #define INCBIN_PREFIX incbin
|
| 212 |
+
* #include "incbin.h"
|
| 213 |
+
* INCBIN(Foo, "foo.txt");
|
| 214 |
+
*
|
| 215 |
+
* // Now you have the following symbols instead:
|
| 216 |
+
* // const unsigned char incbinFoo<data>[];
|
| 217 |
+
* // const unsigned char *const incbinFoo<end>;
|
| 218 |
+
* // const unsigned int incbinFoo<size>;
|
| 219 |
+
* @endcode
|
| 220 |
+
*/
|
| 221 |
+
#if !defined(INCBIN_PREFIX)
|
| 222 |
+
# define INCBIN_PREFIX g
|
| 223 |
+
#endif
|
| 224 |
+
|
| 225 |
+
/**
|
| 226 |
+
* @brief Specify the style used for symbol names.
|
| 227 |
+
*
|
| 228 |
+
* Possible options are
|
| 229 |
+
* - INCBIN_STYLE_CAMEL "CamelCase"
|
| 230 |
+
* - INCBIN_STYLE_SNAKE "snake_case"
|
| 231 |
+
*
|
| 232 |
+
* @note By default this is INCBIN_STYLE_CAMEL
|
| 233 |
+
*
|
| 234 |
+
* @code
|
| 235 |
+
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
|
| 236 |
+
* #include "incbin.h"
|
| 237 |
+
* INCBIN(foo, "foo.txt");
|
| 238 |
+
*
|
| 239 |
+
* // Now you have the following symbols:
|
| 240 |
+
* // const unsigned char <prefix>foo_data[];
|
| 241 |
+
* // const unsigned char *const <prefix>foo_end;
|
| 242 |
+
* // const unsigned int <prefix>foo_size;
|
| 243 |
+
* @endcode
|
| 244 |
+
*/
|
| 245 |
+
#if !defined(INCBIN_STYLE)
|
| 246 |
+
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
|
| 247 |
+
#endif
|
| 248 |
+
|
| 249 |
+
/* Style lookup tables */
|
| 250 |
+
#define INCBIN_STYLE_0_DATA Data
|
| 251 |
+
#define INCBIN_STYLE_0_END End
|
| 252 |
+
#define INCBIN_STYLE_0_SIZE Size
|
| 253 |
+
#define INCBIN_STYLE_1_DATA _data
|
| 254 |
+
#define INCBIN_STYLE_1_END _end
|
| 255 |
+
#define INCBIN_STYLE_1_SIZE _size
|
| 256 |
+
|
| 257 |
+
/* Style lookup: returning identifier */
|
| 258 |
+
#define INCBIN_STYLE_IDENT(TYPE) \
|
| 259 |
+
INCBIN_CONCATENATE( \
|
| 260 |
+
INCBIN_STYLE_, \
|
| 261 |
+
INCBIN_CONCATENATE( \
|
| 262 |
+
INCBIN_EVAL(INCBIN_STYLE), \
|
| 263 |
+
INCBIN_CONCATENATE(_, TYPE)))
|
| 264 |
+
|
| 265 |
+
/* Style lookup: returning string literal */
|
| 266 |
+
#define INCBIN_STYLE_STRING(TYPE) \
|
| 267 |
+
INCBIN_STRINGIZE( \
|
| 268 |
+
INCBIN_STYLE_IDENT(TYPE)) \
|
| 269 |
+
|
| 270 |
+
/* Generate the global labels by indirectly invoking the macro with our style
|
| 271 |
+
* type and concatenating the name against them. */
|
| 272 |
+
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
|
| 273 |
+
INCBIN_INVOKE( \
|
| 274 |
+
INCBIN_GLOBAL, \
|
| 275 |
+
INCBIN_CONCATENATE( \
|
| 276 |
+
NAME, \
|
| 277 |
+
INCBIN_INVOKE( \
|
| 278 |
+
INCBIN_STYLE_IDENT, \
|
| 279 |
+
TYPE))) \
|
| 280 |
+
INCBIN_INVOKE( \
|
| 281 |
+
INCBIN_TYPE, \
|
| 282 |
+
INCBIN_CONCATENATE( \
|
| 283 |
+
NAME, \
|
| 284 |
+
INCBIN_INVOKE( \
|
| 285 |
+
INCBIN_STYLE_IDENT, \
|
| 286 |
+
TYPE)))
|
| 287 |
+
|
| 288 |
+
/**
|
| 289 |
+
* @brief Externally reference binary data included in another translation unit.
|
| 290 |
+
*
|
| 291 |
+
* Produces three external symbols that reference the binary data included in
|
| 292 |
+
* another translation unit.
|
| 293 |
+
*
|
| 294 |
+
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
| 295 |
+
* "Data", as well as "End" and "Size" after. An example is provided below.
|
| 296 |
+
*
|
| 297 |
+
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
| 298 |
+
* @param NAME The name given for the binary data
|
| 299 |
+
*
|
| 300 |
+
* @code
|
| 301 |
+
* INCBIN_EXTERN(Foo);
|
| 302 |
+
*
|
| 303 |
+
* // Now you have the following symbols:
|
| 304 |
+
* // extern const unsigned char <prefix>Foo<data>[];
|
| 305 |
+
* // extern const unsigned char *const <prefix>Foo<end>;
|
| 306 |
+
* // extern const unsigned int <prefix>Foo<size>;
|
| 307 |
+
* @endcode
|
| 308 |
+
*
|
| 309 |
+
* You may specify a custom optional data type as well as the first argument.
|
| 310 |
+
* @code
|
| 311 |
+
* INCBIN_EXTERN(custom_type, Foo);
|
| 312 |
+
*
|
| 313 |
+
* // Now you have the following symbols:
|
| 314 |
+
* // extern const custom_type <prefix>Foo<data>[];
|
| 315 |
+
* // extern const custom_type *const <prefix>Foo<end>;
|
| 316 |
+
* // extern const unsigned int <prefix>Foo<size>;
|
| 317 |
+
* @endcode
|
| 318 |
+
*/
|
| 319 |
+
#define INCBIN_EXTERN(...) \
|
| 320 |
+
INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
| 321 |
+
#define INCBIN_EXTERN_1(NAME, ...) \
|
| 322 |
+
INCBIN_EXTERN_2(unsigned char, NAME)
|
| 323 |
+
#define INCBIN_EXTERN_2(TYPE, NAME) \
|
| 324 |
+
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
|
| 325 |
+
INCBIN_CONCATENATE( \
|
| 326 |
+
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
| 327 |
+
INCBIN_STYLE_IDENT(DATA))[]; \
|
| 328 |
+
INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
|
| 329 |
+
INCBIN_CONCATENATE( \
|
| 330 |
+
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
| 331 |
+
INCBIN_STYLE_IDENT(END)); \
|
| 332 |
+
INCBIN_EXTERNAL const unsigned int \
|
| 333 |
+
INCBIN_CONCATENATE( \
|
| 334 |
+
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
| 335 |
+
INCBIN_STYLE_IDENT(SIZE))
|
| 336 |
+
|
| 337 |
+
/**
|
| 338 |
+
* @brief Externally reference textual data included in another translation unit.
|
| 339 |
+
*
|
| 340 |
+
* Produces three external symbols that reference the textual data included in
|
| 341 |
+
* another translation unit.
|
| 342 |
+
*
|
| 343 |
+
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
| 344 |
+
* "Data", as well as "End" and "Size" after. An example is provided below.
|
| 345 |
+
*
|
| 346 |
+
* @param NAME The name given for the textual data
|
| 347 |
+
*
|
| 348 |
+
* @code
|
| 349 |
+
* INCBIN_EXTERN(Foo);
|
| 350 |
+
*
|
| 351 |
+
* // Now you have the following symbols:
|
| 352 |
+
* // extern const char <prefix>Foo<data>[];
|
| 353 |
+
* // extern const char *const <prefix>Foo<end>;
|
| 354 |
+
* // extern const unsigned int <prefix>Foo<size>;
|
| 355 |
+
* @endcode
|
| 356 |
+
*/
|
| 357 |
+
#define INCTXT_EXTERN(NAME) \
|
| 358 |
+
INCBIN_EXTERN_2(char, NAME)
|
| 359 |
+
|
| 360 |
+
/**
|
| 361 |
+
* @brief Include a binary file into the current translation unit.
|
| 362 |
+
*
|
| 363 |
+
* Includes a binary file into the current translation unit, producing three symbols
|
| 364 |
+
* for objects that encode the data and size respectively.
|
| 365 |
+
*
|
| 366 |
+
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
| 367 |
+
* "Data", as well as "End" and "Size" after. An example is provided below.
|
| 368 |
+
*
|
| 369 |
+
* @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
|
| 370 |
+
* @param NAME The name to associate with this binary data (as an identifier.)
|
| 371 |
+
* @param FILENAME The file to include (as a string literal.)
|
| 372 |
+
*
|
| 373 |
+
* @code
|
| 374 |
+
* INCBIN(Icon, "icon.png");
|
| 375 |
+
*
|
| 376 |
+
* // Now you have the following symbols:
|
| 377 |
+
* // const unsigned char <prefix>Icon<data>[];
|
| 378 |
+
* // const unsigned char *const <prefix>Icon<end>;
|
| 379 |
+
* // const unsigned int <prefix>Icon<size>;
|
| 380 |
+
* @endcode
|
| 381 |
+
*
|
| 382 |
+
* You may specify a custom optional data type as well as the first argument.
|
| 383 |
+
* These macros are specialized by arity.
|
| 384 |
+
* @code
|
| 385 |
+
* INCBIN(custom_type, Icon, "icon.png");
|
| 386 |
+
*
|
| 387 |
+
* // Now you have the following symbols:
|
| 388 |
+
* // const custom_type <prefix>Icon<data>[];
|
| 389 |
+
* // const custom_type *const <prefix>Icon<end>;
|
| 390 |
+
* // const unsigned int <prefix>Icon<size>;
|
| 391 |
+
* @endcode
|
| 392 |
+
*
|
| 393 |
+
* @warning This must be used in global scope
|
| 394 |
+
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
| 395 |
+
*
|
| 396 |
+
* To externally reference the data included by this in another translation unit
|
| 397 |
+
* please @see INCBIN_EXTERN.
|
| 398 |
+
*/
|
| 399 |
+
#ifdef _MSC_VER
|
| 400 |
+
# define INCBIN(NAME, FILENAME) \
|
| 401 |
+
INCBIN_EXTERN(NAME)
|
| 402 |
+
#else
|
| 403 |
+
# define INCBIN(...) \
|
| 404 |
+
INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
|
| 405 |
+
# if defined(__GNUC__)
|
| 406 |
+
# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
|
| 407 |
+
# elif defined(__clang__)
|
| 408 |
+
# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
|
| 409 |
+
# else
|
| 410 |
+
# define INCBIN_1(...) /* Cannot do anything here */
|
| 411 |
+
# endif
|
| 412 |
+
# define INCBIN_2(NAME, FILENAME) \
|
| 413 |
+
INCBIN_3(unsigned char, NAME, FILENAME)
|
| 414 |
+
# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
|
| 415 |
+
# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
|
| 416 |
+
__asm__(INCBIN_SECTION \
|
| 417 |
+
INCBIN_GLOBAL_LABELS(NAME, DATA) \
|
| 418 |
+
INCBIN_ALIGN_HOST \
|
| 419 |
+
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
|
| 420 |
+
INCBIN_MACRO " \"" FILENAME "\"\n" \
|
| 421 |
+
TERMINATOR \
|
| 422 |
+
INCBIN_GLOBAL_LABELS(NAME, END) \
|
| 423 |
+
INCBIN_ALIGN_BYTE \
|
| 424 |
+
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
|
| 425 |
+
INCBIN_BYTE "1\n" \
|
| 426 |
+
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
|
| 427 |
+
INCBIN_ALIGN_HOST \
|
| 428 |
+
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
|
| 429 |
+
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
|
| 430 |
+
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
|
| 431 |
+
INCBIN_ALIGN_HOST \
|
| 432 |
+
".text\n" \
|
| 433 |
+
); \
|
| 434 |
+
INCBIN_EXTERN(TYPE, NAME)
|
| 435 |
+
#endif
|
| 436 |
+
|
| 437 |
+
/**
|
| 438 |
+
* @brief Include a textual file into the current translation unit.
|
| 439 |
+
*
|
| 440 |
+
* This behaves the same as INCBIN except it produces char compatible arrays
|
| 441 |
+
* and implicitly adds a null-terminator byte, thus the size of data included
|
| 442 |
+
* by this is one byte larger than that of INCBIN.
|
| 443 |
+
*
|
| 444 |
+
* Includes a textual file into the current translation unit, producing three
|
| 445 |
+
* symbols for objects that encode the data and size respectively.
|
| 446 |
+
*
|
| 447 |
+
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
| 448 |
+
* "Data", as well as "End" and "Size" after. An example is provided below.
|
| 449 |
+
*
|
| 450 |
+
* @param NAME The name to associate with this binary data (as an identifier.)
|
| 451 |
+
* @param FILENAME The file to include (as a string literal.)
|
| 452 |
+
*
|
| 453 |
+
* @code
|
| 454 |
+
* INCTXT(Readme, "readme.txt");
|
| 455 |
+
*
|
| 456 |
+
* // Now you have the following symbols:
|
| 457 |
+
* // const char <prefix>Readme<data>[];
|
| 458 |
+
* // const char *const <prefix>Readme<end>;
|
| 459 |
+
* // const unsigned int <prefix>Readme<size>;
|
| 460 |
+
* @endcode
|
| 461 |
+
*
|
| 462 |
+
* @warning This must be used in global scope
|
| 463 |
+
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
| 464 |
+
*
|
| 465 |
+
* To externally reference the data included by this in another translation unit
|
| 466 |
+
* please @see INCBIN_EXTERN.
|
| 467 |
+
*/
|
| 468 |
+
#if defined(_MSC_VER)
|
| 469 |
+
# define INCTXT(NAME, FILENAME) \
|
| 470 |
+
INCBIN_EXTERN(NAME)
|
| 471 |
+
#else
|
| 472 |
+
# define INCTXT(NAME, FILENAME) \
|
| 473 |
+
INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
|
| 474 |
+
#endif
|
| 475 |
+
|
| 476 |
+
#endif
|
src/main.cpp
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include <iostream>
|
| 20 |
+
#include <memory>
|
| 21 |
+
|
| 22 |
+
#include "bitboard.h"
|
| 23 |
+
#include "misc.h"
|
| 24 |
+
#include "position.h"
|
| 25 |
+
#include "tune.h"
|
| 26 |
+
#include "uci.h"
|
| 27 |
+
|
| 28 |
+
using namespace Stockfish;
|
| 29 |
+
|
| 30 |
+
int main(int argc, char* argv[]) {
|
| 31 |
+
std::cout << engine_info() << std::endl;
|
| 32 |
+
|
| 33 |
+
Bitboards::init();
|
| 34 |
+
Position::init();
|
| 35 |
+
|
| 36 |
+
auto uci = std::make_unique<UCIEngine>(argc, argv);
|
| 37 |
+
|
| 38 |
+
Tune::init(uci->engine_options());
|
| 39 |
+
|
| 40 |
+
uci->loop();
|
| 41 |
+
|
| 42 |
+
return 0;
|
| 43 |
+
}
|
src/memory.cpp
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "memory.h"
|
| 20 |
+
|
| 21 |
+
#include <cstdlib>
|
| 22 |
+
|
| 23 |
+
#if __has_include("features.h")
|
| 24 |
+
#include <features.h>
|
| 25 |
+
#endif
|
| 26 |
+
|
| 27 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 28 |
+
#include <sys/mman.h>
|
| 29 |
+
#endif
|
| 30 |
+
|
| 31 |
+
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|
| 32 |
+
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|
| 33 |
+
|| defined(__e2k__)
|
| 34 |
+
#define POSIXALIGNEDALLOC
|
| 35 |
+
#include <stdlib.h>
|
| 36 |
+
#endif
|
| 37 |
+
|
| 38 |
+
#ifdef _WIN32
|
| 39 |
+
#if _WIN32_WINNT < 0x0601
|
| 40 |
+
#undef _WIN32_WINNT
|
| 41 |
+
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
| 42 |
+
#endif
|
| 43 |
+
|
| 44 |
+
#ifndef NOMINMAX
|
| 45 |
+
#define NOMINMAX
|
| 46 |
+
#endif
|
| 47 |
+
|
| 48 |
+
#include <ios> // std::hex, std::dec
|
| 49 |
+
#include <iostream> // std::cerr
|
| 50 |
+
#include <ostream> // std::endl
|
| 51 |
+
#include <windows.h>
|
| 52 |
+
|
| 53 |
+
// The needed Windows API for processor groups could be missed from old Windows
|
| 54 |
+
// versions, so instead of calling them directly (forcing the linker to resolve
|
| 55 |
+
// the calls at compile time), try to load them at runtime. To do this we need
|
| 56 |
+
// first to define the corresponding function pointers.
|
| 57 |
+
|
| 58 |
+
#endif
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
namespace Stockfish {
|
| 62 |
+
|
| 63 |
+
// Wrappers for systems where the c++17 implementation does not guarantee the
|
| 64 |
+
// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
|
| 65 |
+
// must be freed with std_aligned_free().
|
| 66 |
+
|
| 67 |
+
void* std_aligned_alloc(size_t alignment, size_t size) {
|
| 68 |
+
#if defined(_ISOC11_SOURCE)
|
| 69 |
+
return aligned_alloc(alignment, size);
|
| 70 |
+
#elif defined(POSIXALIGNEDALLOC)
|
| 71 |
+
void* mem = nullptr;
|
| 72 |
+
posix_memalign(&mem, alignment, size);
|
| 73 |
+
return mem;
|
| 74 |
+
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
| 75 |
+
return _mm_malloc(size, alignment);
|
| 76 |
+
#elif defined(_WIN32)
|
| 77 |
+
return _aligned_malloc(size, alignment);
|
| 78 |
+
#else
|
| 79 |
+
return std::aligned_alloc(alignment, size);
|
| 80 |
+
#endif
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
void std_aligned_free(void* ptr) {
|
| 84 |
+
|
| 85 |
+
#if defined(POSIXALIGNEDALLOC)
|
| 86 |
+
free(ptr);
|
| 87 |
+
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
| 88 |
+
_mm_free(ptr);
|
| 89 |
+
#elif defined(_WIN32)
|
| 90 |
+
_aligned_free(ptr);
|
| 91 |
+
#else
|
| 92 |
+
free(ptr);
|
| 93 |
+
#endif
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// aligned_large_pages_alloc() will return suitably aligned memory,
|
| 97 |
+
// if possible using large pages.
|
| 98 |
+
|
| 99 |
+
#if defined(_WIN32)
|
| 100 |
+
|
| 101 |
+
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
|
| 102 |
+
|
| 103 |
+
return windows_try_with_large_page_priviliges(
|
| 104 |
+
[&](size_t largePageSize) {
|
| 105 |
+
// Round up size to full pages and allocate
|
| 106 |
+
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
|
| 107 |
+
return VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
|
| 108 |
+
PAGE_READWRITE);
|
| 109 |
+
},
|
| 110 |
+
[]() { return (void*) nullptr; });
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
void* aligned_large_pages_alloc(size_t allocSize) {
|
| 114 |
+
|
| 115 |
+
// Try to allocate large pages
|
| 116 |
+
void* mem = aligned_large_pages_alloc_windows(allocSize);
|
| 117 |
+
|
| 118 |
+
// Fall back to regular, page-aligned, allocation if necessary
|
| 119 |
+
if (!mem)
|
| 120 |
+
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
| 121 |
+
|
| 122 |
+
return mem;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
#else
|
| 126 |
+
|
| 127 |
+
void* aligned_large_pages_alloc(size_t allocSize) {
|
| 128 |
+
|
| 129 |
+
#if defined(__linux__)
|
| 130 |
+
constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed
|
| 131 |
+
#else
|
| 132 |
+
constexpr size_t alignment = 4096; // small page size assumed
|
| 133 |
+
#endif
|
| 134 |
+
|
| 135 |
+
// Round up to multiples of alignment
|
| 136 |
+
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
| 137 |
+
void* mem = std_aligned_alloc(alignment, size);
|
| 138 |
+
#if defined(MADV_HUGEPAGE)
|
| 139 |
+
madvise(mem, size, MADV_HUGEPAGE);
|
| 140 |
+
#endif
|
| 141 |
+
return mem;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
#endif
|
| 145 |
+
|
| 146 |
+
bool has_large_pages() {
|
| 147 |
+
|
| 148 |
+
#if defined(_WIN32)
|
| 149 |
+
|
| 150 |
+
constexpr size_t page_size = 2 * 1024 * 1024; // 2MB page size assumed
|
| 151 |
+
void* mem = aligned_large_pages_alloc_windows(page_size);
|
| 152 |
+
if (mem == nullptr)
|
| 153 |
+
{
|
| 154 |
+
return false;
|
| 155 |
+
}
|
| 156 |
+
else
|
| 157 |
+
{
|
| 158 |
+
aligned_large_pages_free(mem);
|
| 159 |
+
return true;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
#elif defined(__linux__)
|
| 163 |
+
|
| 164 |
+
#if defined(MADV_HUGEPAGE)
|
| 165 |
+
return true;
|
| 166 |
+
#else
|
| 167 |
+
return false;
|
| 168 |
+
#endif
|
| 169 |
+
|
| 170 |
+
#else
|
| 171 |
+
|
| 172 |
+
return false;
|
| 173 |
+
|
| 174 |
+
#endif
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
// aligned_large_pages_free() will free the previously memory allocated
|
| 179 |
+
// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
|
| 180 |
+
|
| 181 |
+
#if defined(_WIN32)
|
| 182 |
+
|
| 183 |
+
void aligned_large_pages_free(void* mem) {
|
| 184 |
+
|
| 185 |
+
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
| 186 |
+
{
|
| 187 |
+
DWORD err = GetLastError();
|
| 188 |
+
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
|
| 189 |
+
<< std::dec << std::endl;
|
| 190 |
+
exit(EXIT_FAILURE);
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
#else
|
| 195 |
+
|
| 196 |
+
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
|
| 197 |
+
|
| 198 |
+
#endif
|
| 199 |
+
} // namespace Stockfish
|
src/memory.h
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef MEMORY_H_INCLUDED
|
| 20 |
+
#define MEMORY_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <cstdint>
|
| 24 |
+
#include <memory>
|
| 25 |
+
#include <new>
|
| 26 |
+
#include <type_traits>
|
| 27 |
+
#include <utility>
|
| 28 |
+
#include <cstring>
|
| 29 |
+
|
| 30 |
+
#include "types.h"
|
| 31 |
+
|
| 32 |
+
#if defined(_WIN64)
|
| 33 |
+
|
| 34 |
+
#if _WIN32_WINNT < 0x0601
|
| 35 |
+
#undef _WIN32_WINNT
|
| 36 |
+
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
| 37 |
+
#endif
|
| 38 |
+
|
| 39 |
+
#if !defined(NOMINMAX)
|
| 40 |
+
#define NOMINMAX
|
| 41 |
+
#endif
|
| 42 |
+
#include <windows.h>
|
| 43 |
+
|
| 44 |
+
// Some Windows headers (RPC/old headers) define short macros such
|
| 45 |
+
// as 'small' expanding to 'char', which breaks identifiers in the code.
|
| 46 |
+
// Undefine those macros immediately after including <windows.h>.
|
| 47 |
+
#ifdef small
|
| 48 |
+
#undef small
|
| 49 |
+
#endif
|
| 50 |
+
|
| 51 |
+
#include <psapi.h>
|
| 52 |
+
|
| 53 |
+
extern "C" {
|
| 54 |
+
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
| 55 |
+
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
| 56 |
+
using AdjustTokenPrivileges_t =
|
| 57 |
+
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
| 58 |
+
}
|
| 59 |
+
#endif
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
namespace Stockfish {
|
| 63 |
+
|
| 64 |
+
void* std_aligned_alloc(size_t alignment, size_t size);
|
| 65 |
+
void std_aligned_free(void* ptr);
|
| 66 |
+
|
| 67 |
+
// Memory aligned by page size, min alignment: 4096 bytes
|
| 68 |
+
void* aligned_large_pages_alloc(size_t size);
|
| 69 |
+
void aligned_large_pages_free(void* mem);
|
| 70 |
+
|
| 71 |
+
bool has_large_pages();
|
| 72 |
+
|
| 73 |
+
// Frees memory which was placed there with placement new.
|
| 74 |
+
// Works for both single objects and arrays of unknown bound.
|
| 75 |
+
template<typename T, typename FREE_FUNC>
|
| 76 |
+
void memory_deleter(T* ptr, FREE_FUNC free_func) {
|
| 77 |
+
if (!ptr)
|
| 78 |
+
return;
|
| 79 |
+
|
| 80 |
+
// Explicitly needed to call the destructor
|
| 81 |
+
if constexpr (!std::is_trivially_destructible_v<T>)
|
| 82 |
+
ptr->~T();
|
| 83 |
+
|
| 84 |
+
free_func(ptr);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
// Frees memory which was placed there with placement new.
|
| 88 |
+
// Works for both single objects and arrays of unknown bound.
|
| 89 |
+
template<typename T, typename FREE_FUNC>
|
| 90 |
+
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
|
| 91 |
+
if (!ptr)
|
| 92 |
+
return;
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
// Move back on the pointer to where the size is allocated
|
| 96 |
+
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
|
| 97 |
+
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
|
| 98 |
+
|
| 99 |
+
if constexpr (!std::is_trivially_destructible_v<T>)
|
| 100 |
+
{
|
| 101 |
+
const size_t size = *reinterpret_cast<size_t*>(raw_memory);
|
| 102 |
+
|
| 103 |
+
// Explicitly call the destructor for each element in reverse order
|
| 104 |
+
for (size_t i = size; i-- > 0;)
|
| 105 |
+
ptr[i].~T();
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
free_func(raw_memory);
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
// Allocates memory for a single object and places it there with placement new
|
| 112 |
+
template<typename T, typename ALLOC_FUNC, typename... Args>
|
| 113 |
+
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
|
| 114 |
+
Args&&... args) {
|
| 115 |
+
void* raw_memory = alloc_func(sizeof(T));
|
| 116 |
+
ASSERT_ALIGNED(raw_memory, alignof(T));
|
| 117 |
+
return new (raw_memory) T(std::forward<Args>(args)...);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
// Allocates memory for an array of unknown bound and places it there with placement new
|
| 121 |
+
template<typename T, typename ALLOC_FUNC>
|
| 122 |
+
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
|
| 123 |
+
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
|
| 124 |
+
using ElementType = std::remove_extent_t<T>;
|
| 125 |
+
|
| 126 |
+
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
|
| 127 |
+
|
| 128 |
+
// Save the array size in the memory location
|
| 129 |
+
char* raw_memory =
|
| 130 |
+
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
|
| 131 |
+
ASSERT_ALIGNED(raw_memory, alignof(T));
|
| 132 |
+
|
| 133 |
+
new (raw_memory) size_t(num);
|
| 134 |
+
|
| 135 |
+
for (size_t i = 0; i < num; ++i)
|
| 136 |
+
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
|
| 137 |
+
|
| 138 |
+
// Need to return the pointer at the start of the array so that
|
| 139 |
+
// the indexing in unique_ptr<T[]> works.
|
| 140 |
+
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
//
|
| 144 |
+
//
|
| 145 |
+
// aligned large page unique ptr
|
| 146 |
+
//
|
| 147 |
+
//
|
| 148 |
+
|
| 149 |
+
template<typename T>
|
| 150 |
+
struct LargePageDeleter {
|
| 151 |
+
void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
|
| 152 |
+
};
|
| 153 |
+
|
| 154 |
+
template<typename T>
|
| 155 |
+
struct LargePageArrayDeleter {
|
| 156 |
+
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
|
| 157 |
+
};
|
| 158 |
+
|
| 159 |
+
template<typename T>
|
| 160 |
+
using LargePagePtr =
|
| 161 |
+
std::conditional_t<std::is_array_v<T>,
|
| 162 |
+
std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
|
| 163 |
+
std::unique_ptr<T, LargePageDeleter<T>>>;
|
| 164 |
+
|
| 165 |
+
// make_unique_large_page for single objects
|
| 166 |
+
template<typename T, typename... Args>
|
| 167 |
+
std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
|
| 168 |
+
static_assert(alignof(T) <= 4096,
|
| 169 |
+
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
| 170 |
+
|
| 171 |
+
T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
|
| 172 |
+
|
| 173 |
+
return LargePagePtr<T>(obj);
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
// make_unique_large_page for arrays of unknown bound
|
| 177 |
+
template<typename T>
|
| 178 |
+
std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
|
| 179 |
+
using ElementType = std::remove_extent_t<T>;
|
| 180 |
+
|
| 181 |
+
static_assert(alignof(ElementType) <= 4096,
|
| 182 |
+
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
| 183 |
+
|
| 184 |
+
ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
|
| 185 |
+
|
| 186 |
+
return LargePagePtr<T>(memory);
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
//
|
| 190 |
+
//
|
| 191 |
+
// aligned unique ptr
|
| 192 |
+
//
|
| 193 |
+
//
|
| 194 |
+
|
| 195 |
+
template<typename T>
|
| 196 |
+
struct AlignedDeleter {
|
| 197 |
+
void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
|
| 198 |
+
};
|
| 199 |
+
|
| 200 |
+
template<typename T>
|
| 201 |
+
struct AlignedArrayDeleter {
|
| 202 |
+
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
|
| 203 |
+
};
|
| 204 |
+
|
| 205 |
+
template<typename T>
|
| 206 |
+
using AlignedPtr =
|
| 207 |
+
std::conditional_t<std::is_array_v<T>,
|
| 208 |
+
std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
|
| 209 |
+
std::unique_ptr<T, AlignedDeleter<T>>>;
|
| 210 |
+
|
| 211 |
+
// make_unique_aligned for single objects
|
| 212 |
+
template<typename T, typename... Args>
|
| 213 |
+
std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
|
| 214 |
+
const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
|
| 215 |
+
T* obj = memory_allocator<T>(func, std::forward<Args>(args)...);
|
| 216 |
+
|
| 217 |
+
return AlignedPtr<T>(obj);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
// make_unique_aligned for arrays of unknown bound
|
| 221 |
+
template<typename T>
|
| 222 |
+
std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
|
| 223 |
+
using ElementType = std::remove_extent_t<T>;
|
| 224 |
+
|
| 225 |
+
const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
|
| 226 |
+
ElementType* memory = memory_allocator<T>(func, num);
|
| 227 |
+
|
| 228 |
+
return AlignedPtr<T>(memory);
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
// Get the first aligned element of an array.
|
| 233 |
+
// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
|
| 234 |
+
// where N is the number of elements in the array.
|
| 235 |
+
template<uintptr_t Alignment, typename T>
|
| 236 |
+
T* align_ptr_up(T* ptr) {
|
| 237 |
+
static_assert(alignof(T) < Alignment);
|
| 238 |
+
|
| 239 |
+
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
|
| 240 |
+
return reinterpret_cast<T*>(
|
| 241 |
+
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
#if defined(_WIN32)
|
| 245 |
+
|
| 246 |
+
template<typename FuncYesT, typename FuncNoT>
|
| 247 |
+
auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) {
|
| 248 |
+
|
| 249 |
+
#if !defined(_WIN64)
|
| 250 |
+
return fno();
|
| 251 |
+
#else
|
| 252 |
+
|
| 253 |
+
HANDLE hProcessToken{};
|
| 254 |
+
LUID luid{};
|
| 255 |
+
|
| 256 |
+
const size_t largePageSize = GetLargePageMinimum();
|
| 257 |
+
if (!largePageSize)
|
| 258 |
+
return fno();
|
| 259 |
+
|
| 260 |
+
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
|
| 261 |
+
|
| 262 |
+
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
|
| 263 |
+
|
| 264 |
+
if (!hAdvapi32)
|
| 265 |
+
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
| 266 |
+
|
| 267 |
+
auto OpenProcessToken_f =
|
| 268 |
+
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
| 269 |
+
if (!OpenProcessToken_f)
|
| 270 |
+
return fno();
|
| 271 |
+
auto LookupPrivilegeValueA_f =
|
| 272 |
+
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
| 273 |
+
if (!LookupPrivilegeValueA_f)
|
| 274 |
+
return fno();
|
| 275 |
+
auto AdjustTokenPrivileges_f =
|
| 276 |
+
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
| 277 |
+
if (!AdjustTokenPrivileges_f)
|
| 278 |
+
return fno();
|
| 279 |
+
|
| 280 |
+
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
| 281 |
+
|
| 282 |
+
if (!OpenProcessToken_f( // OpenProcessToken()
|
| 283 |
+
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
| 284 |
+
return fno();
|
| 285 |
+
|
| 286 |
+
if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
|
| 287 |
+
return fno();
|
| 288 |
+
|
| 289 |
+
TOKEN_PRIVILEGES tp{};
|
| 290 |
+
TOKEN_PRIVILEGES prevTp{};
|
| 291 |
+
DWORD prevTpLen = 0;
|
| 292 |
+
|
| 293 |
+
tp.PrivilegeCount = 1;
|
| 294 |
+
tp.Privileges[0].Luid = luid;
|
| 295 |
+
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
| 296 |
+
|
| 297 |
+
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
|
| 298 |
+
// succeeds, we still need to query GetLastError() to ensure that the privileges
|
| 299 |
+
// were actually obtained.
|
| 300 |
+
|
| 301 |
+
if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
| 302 |
+
&prevTpLen)
|
| 303 |
+
|| GetLastError() != ERROR_SUCCESS)
|
| 304 |
+
return fno();
|
| 305 |
+
|
| 306 |
+
auto&& ret = fyes(largePageSize);
|
| 307 |
+
|
| 308 |
+
// Privilege no longer needed, restore previous state
|
| 309 |
+
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
| 310 |
+
|
| 311 |
+
CloseHandle(hProcessToken);
|
| 312 |
+
|
| 313 |
+
return std::forward<decltype(ret)>(ret);
|
| 314 |
+
|
| 315 |
+
#endif
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
#endif
|
| 319 |
+
|
| 320 |
+
template<typename T, typename ByteT>
|
| 321 |
+
T load_as(const ByteT* buffer) {
|
| 322 |
+
static_assert(std::is_trivially_copyable<T>::value, "Type must be trivially copyable");
|
| 323 |
+
static_assert(sizeof(ByteT) == 1);
|
| 324 |
+
|
| 325 |
+
T value;
|
| 326 |
+
std::memcpy(&value, buffer, sizeof(T));
|
| 327 |
+
|
| 328 |
+
return value;
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
} // namespace Stockfish
|
| 332 |
+
|
| 333 |
+
#endif // #ifndef MEMORY_H_INCLUDED
|
src/misc.cpp
ADDED
|
@@ -0,0 +1,549 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "misc.h"
|
| 20 |
+
|
| 21 |
+
#include <array>
|
| 22 |
+
#include <atomic>
|
| 23 |
+
#include <cassert>
|
| 24 |
+
#include <cctype>
|
| 25 |
+
#include <cmath>
|
| 26 |
+
#include <cstdlib>
|
| 27 |
+
#include <fstream>
|
| 28 |
+
#include <iomanip>
|
| 29 |
+
#include <iostream>
|
| 30 |
+
#include <iterator>
|
| 31 |
+
#include <limits>
|
| 32 |
+
#include <mutex>
|
| 33 |
+
#include <sstream>
|
| 34 |
+
#include <string_view>
|
| 35 |
+
|
| 36 |
+
#include "types.h"
|
| 37 |
+
|
| 38 |
+
namespace Stockfish {
|
| 39 |
+
|
| 40 |
+
namespace {
|
| 41 |
+
|
| 42 |
+
// Version number or dev.
|
| 43 |
+
constexpr std::string_view version = "dev";
|
| 44 |
+
|
| 45 |
+
// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
|
| 46 |
+
// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
|
| 47 |
+
// can toggle the logging of std::cout and std::cin at runtime whilst preserving
|
| 48 |
+
// usual I/O functionality, all without changing a single line of code!
|
| 49 |
+
// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81
|
| 50 |
+
|
| 51 |
+
struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout
|
| 52 |
+
|
| 53 |
+
Tie(std::streambuf* b, std::streambuf* l) :
|
| 54 |
+
buf(b),
|
| 55 |
+
logBuf(l) {}
|
| 56 |
+
|
| 57 |
+
int sync() override { return logBuf->pubsync(), buf->pubsync(); }
|
| 58 |
+
int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); }
|
| 59 |
+
int underflow() override { return buf->sgetc(); }
|
| 60 |
+
int uflow() override { return log(buf->sbumpc(), ">> "); }
|
| 61 |
+
|
| 62 |
+
std::streambuf *buf, *logBuf;
|
| 63 |
+
|
| 64 |
+
int log(int c, const char* prefix) {
|
| 65 |
+
|
| 66 |
+
static int last = '\n'; // Single log file
|
| 67 |
+
|
| 68 |
+
if (last == '\n')
|
| 69 |
+
logBuf->sputn(prefix, 3);
|
| 70 |
+
|
| 71 |
+
return last = logBuf->sputc(char(c));
|
| 72 |
+
}
|
| 73 |
+
};
|
| 74 |
+
|
| 75 |
+
class Logger {
|
| 76 |
+
|
| 77 |
+
Logger() :
|
| 78 |
+
in(std::cin.rdbuf(), file.rdbuf()),
|
| 79 |
+
out(std::cout.rdbuf(), file.rdbuf()) {}
|
| 80 |
+
~Logger() { start(""); }
|
| 81 |
+
|
| 82 |
+
std::ofstream file;
|
| 83 |
+
Tie in, out;
|
| 84 |
+
|
| 85 |
+
public:
|
| 86 |
+
static void start(const std::string& fname) {
|
| 87 |
+
|
| 88 |
+
static Logger l;
|
| 89 |
+
|
| 90 |
+
if (l.file.is_open())
|
| 91 |
+
{
|
| 92 |
+
std::cout.rdbuf(l.out.buf);
|
| 93 |
+
std::cin.rdbuf(l.in.buf);
|
| 94 |
+
l.file.close();
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
if (!fname.empty())
|
| 98 |
+
{
|
| 99 |
+
l.file.open(fname, std::ifstream::out);
|
| 100 |
+
|
| 101 |
+
if (!l.file.is_open())
|
| 102 |
+
{
|
| 103 |
+
std::cerr << "Unable to open debug log file " << fname << std::endl;
|
| 104 |
+
exit(EXIT_FAILURE);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
std::cin.rdbuf(&l.in);
|
| 108 |
+
std::cout.rdbuf(&l.out);
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
};
|
| 112 |
+
|
| 113 |
+
} // namespace
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
// Returns the full name of the current Stockfish version.
|
| 117 |
+
//
|
| 118 |
+
// For local dev compiles we try to append the commit SHA and
|
| 119 |
+
// commit date from git. If that fails only the local compilation
|
| 120 |
+
// date is set and "nogit" is specified:
|
| 121 |
+
// Stockfish dev-YYYYMMDD-SHA
|
| 122 |
+
// or
|
| 123 |
+
// Stockfish dev-YYYYMMDD-nogit
|
| 124 |
+
//
|
| 125 |
+
// For releases (non-dev builds) we only include the version number:
|
| 126 |
+
// Stockfish version
|
| 127 |
+
std::string engine_version_info() {
|
| 128 |
+
std::stringstream ss;
|
| 129 |
+
ss << "Stockfish " << version << std::setfill('0');
|
| 130 |
+
|
| 131 |
+
if constexpr (version == "dev")
|
| 132 |
+
{
|
| 133 |
+
ss << "-";
|
| 134 |
+
#ifdef GIT_DATE
|
| 135 |
+
ss << stringify(GIT_DATE);
|
| 136 |
+
#else
|
| 137 |
+
constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
|
| 138 |
+
|
| 139 |
+
std::string month, day, year;
|
| 140 |
+
std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008"
|
| 141 |
+
|
| 142 |
+
date >> month >> day >> year;
|
| 143 |
+
ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
|
| 144 |
+
<< std::setw(2) << std::setfill('0') << day;
|
| 145 |
+
#endif
|
| 146 |
+
|
| 147 |
+
ss << "-";
|
| 148 |
+
|
| 149 |
+
#ifdef GIT_SHA
|
| 150 |
+
ss << stringify(GIT_SHA);
|
| 151 |
+
#else
|
| 152 |
+
ss << "nogit";
|
| 153 |
+
#endif
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
return ss.str();
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
std::string engine_info(bool to_uci) {
|
| 160 |
+
return engine_version_info() + (to_uci ? "\nid author " : " by ")
|
| 161 |
+
+ "the Stockfish developers (see AUTHORS file)";
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
// Returns a string trying to describe the compiler we use
|
| 166 |
+
std::string compiler_info() {
|
| 167 |
+
|
| 168 |
+
#define make_version_string(major, minor, patch) \
|
| 169 |
+
stringify(major) "." stringify(minor) "." stringify(patch)
|
| 170 |
+
|
| 171 |
+
// Predefined macros hell:
|
| 172 |
+
//
|
| 173 |
+
// __GNUC__ Compiler is GCC, Clang or ICX
|
| 174 |
+
// __clang__ Compiler is Clang or ICX
|
| 175 |
+
// __INTEL_LLVM_COMPILER Compiler is ICX
|
| 176 |
+
// _MSC_VER Compiler is MSVC
|
| 177 |
+
// _WIN32 Building on Windows (any)
|
| 178 |
+
// _WIN64 Building on Windows 64 bit
|
| 179 |
+
|
| 180 |
+
std::string compiler = "\nCompiled by : ";
|
| 181 |
+
|
| 182 |
+
#if defined(__INTEL_LLVM_COMPILER)
|
| 183 |
+
compiler += "ICX ";
|
| 184 |
+
compiler += stringify(__INTEL_LLVM_COMPILER);
|
| 185 |
+
#elif defined(__clang__)
|
| 186 |
+
compiler += "clang++ ";
|
| 187 |
+
compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__);
|
| 188 |
+
#elif _MSC_VER
|
| 189 |
+
compiler += "MSVC ";
|
| 190 |
+
compiler += "(version ";
|
| 191 |
+
compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
|
| 192 |
+
compiler += ")";
|
| 193 |
+
#elif defined(__e2k__) && defined(__LCC__)
|
| 194 |
+
#define dot_ver2(n) \
|
| 195 |
+
compiler += char('.'); \
|
| 196 |
+
compiler += char('0' + (n) / 10); \
|
| 197 |
+
compiler += char('0' + (n) % 10);
|
| 198 |
+
|
| 199 |
+
compiler += "MCST LCC ";
|
| 200 |
+
compiler += "(version ";
|
| 201 |
+
compiler += std::to_string(__LCC__ / 100);
|
| 202 |
+
dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")";
|
| 203 |
+
#elif __GNUC__
|
| 204 |
+
compiler += "g++ (GNUC) ";
|
| 205 |
+
compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
| 206 |
+
#else
|
| 207 |
+
compiler += "Unknown compiler ";
|
| 208 |
+
compiler += "(unknown version)";
|
| 209 |
+
#endif
|
| 210 |
+
|
| 211 |
+
#if defined(__APPLE__)
|
| 212 |
+
compiler += " on Apple";
|
| 213 |
+
#elif defined(__CYGWIN__)
|
| 214 |
+
compiler += " on Cygwin";
|
| 215 |
+
#elif defined(__MINGW64__)
|
| 216 |
+
compiler += " on MinGW64";
|
| 217 |
+
#elif defined(__MINGW32__)
|
| 218 |
+
compiler += " on MinGW32";
|
| 219 |
+
#elif defined(__ANDROID__)
|
| 220 |
+
compiler += " on Android";
|
| 221 |
+
#elif defined(__linux__)
|
| 222 |
+
compiler += " on Linux";
|
| 223 |
+
#elif defined(_WIN64)
|
| 224 |
+
compiler += " on Microsoft Windows 64-bit";
|
| 225 |
+
#elif defined(_WIN32)
|
| 226 |
+
compiler += " on Microsoft Windows 32-bit";
|
| 227 |
+
#else
|
| 228 |
+
compiler += " on unknown system";
|
| 229 |
+
#endif
|
| 230 |
+
|
| 231 |
+
compiler += "\nCompilation architecture : ";
|
| 232 |
+
#if defined(ARCH)
|
| 233 |
+
compiler += stringify(ARCH);
|
| 234 |
+
#else
|
| 235 |
+
compiler += "(undefined architecture)";
|
| 236 |
+
#endif
|
| 237 |
+
|
| 238 |
+
compiler += "\nCompilation settings : ";
|
| 239 |
+
compiler += (Is64Bit ? "64bit" : "32bit");
|
| 240 |
+
#if defined(USE_AVX512ICL)
|
| 241 |
+
compiler += " AVX512ICL";
|
| 242 |
+
#endif
|
| 243 |
+
#if defined(USE_VNNI)
|
| 244 |
+
compiler += " VNNI";
|
| 245 |
+
#endif
|
| 246 |
+
#if defined(USE_AVX512)
|
| 247 |
+
compiler += " AVX512";
|
| 248 |
+
#endif
|
| 249 |
+
compiler += (HasPext ? " BMI2" : "");
|
| 250 |
+
#if defined(USE_AVX2)
|
| 251 |
+
compiler += " AVX2";
|
| 252 |
+
#endif
|
| 253 |
+
#if defined(USE_SSE41)
|
| 254 |
+
compiler += " SSE41";
|
| 255 |
+
#endif
|
| 256 |
+
#if defined(USE_SSSE3)
|
| 257 |
+
compiler += " SSSE3";
|
| 258 |
+
#endif
|
| 259 |
+
#if defined(USE_SSE2)
|
| 260 |
+
compiler += " SSE2";
|
| 261 |
+
#endif
|
| 262 |
+
#if defined(USE_NEON_DOTPROD)
|
| 263 |
+
compiler += " NEON_DOTPROD";
|
| 264 |
+
#elif defined(USE_NEON)
|
| 265 |
+
compiler += " NEON";
|
| 266 |
+
#endif
|
| 267 |
+
compiler += (HasPopCnt ? " POPCNT" : "");
|
| 268 |
+
|
| 269 |
+
#if !defined(NDEBUG)
|
| 270 |
+
compiler += " DEBUG";
|
| 271 |
+
#endif
|
| 272 |
+
|
| 273 |
+
compiler += "\nCompiler __VERSION__ macro : ";
|
| 274 |
+
#ifdef __VERSION__
|
| 275 |
+
compiler += __VERSION__;
|
| 276 |
+
#else
|
| 277 |
+
compiler += "(undefined macro)";
|
| 278 |
+
#endif
|
| 279 |
+
|
| 280 |
+
compiler += "\n";
|
| 281 |
+
|
| 282 |
+
return compiler;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
// Debug functions used mainly to collect run-time statistics
|
| 287 |
+
constexpr int MaxDebugSlots = 32;
|
| 288 |
+
|
| 289 |
+
namespace {
|
| 290 |
+
|
| 291 |
+
template<size_t N>
|
| 292 |
+
struct DebugInfo {
|
| 293 |
+
std::array<std::atomic<int64_t>, N> data = {0};
|
| 294 |
+
|
| 295 |
+
[[nodiscard]] constexpr std::atomic<int64_t>& operator[](size_t index) {
|
| 296 |
+
assert(index < N);
|
| 297 |
+
return data[index];
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
constexpr DebugInfo& operator=(const DebugInfo& other) {
|
| 301 |
+
for (size_t i = 0; i < N; i++)
|
| 302 |
+
data[i].store(other.data[i].load());
|
| 303 |
+
return *this;
|
| 304 |
+
}
|
| 305 |
+
};
|
| 306 |
+
|
| 307 |
+
struct DebugExtremes: public DebugInfo<3> {
|
| 308 |
+
DebugExtremes() {
|
| 309 |
+
data[1] = std::numeric_limits<int64_t>::min();
|
| 310 |
+
data[2] = std::numeric_limits<int64_t>::max();
|
| 311 |
+
}
|
| 312 |
+
};
|
| 313 |
+
|
| 314 |
+
std::array<DebugInfo<2>, MaxDebugSlots> hit;
|
| 315 |
+
std::array<DebugInfo<2>, MaxDebugSlots> mean;
|
| 316 |
+
std::array<DebugInfo<3>, MaxDebugSlots> stdev;
|
| 317 |
+
std::array<DebugInfo<6>, MaxDebugSlots> correl;
|
| 318 |
+
std::array<DebugExtremes, MaxDebugSlots> extremes;
|
| 319 |
+
|
| 320 |
+
} // namespace
|
| 321 |
+
|
| 322 |
+
void dbg_hit_on(bool cond, int slot) {
|
| 323 |
+
|
| 324 |
+
++hit.at(slot)[0];
|
| 325 |
+
if (cond)
|
| 326 |
+
++hit.at(slot)[1];
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
void dbg_mean_of(int64_t value, int slot) {
|
| 330 |
+
|
| 331 |
+
++mean.at(slot)[0];
|
| 332 |
+
mean.at(slot)[1] += value;
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
void dbg_stdev_of(int64_t value, int slot) {
|
| 336 |
+
|
| 337 |
+
++stdev.at(slot)[0];
|
| 338 |
+
stdev.at(slot)[1] += value;
|
| 339 |
+
stdev.at(slot)[2] += value * value;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
void dbg_extremes_of(int64_t value, int slot) {
|
| 343 |
+
++extremes.at(slot)[0];
|
| 344 |
+
|
| 345 |
+
int64_t current_max = extremes.at(slot)[1].load();
|
| 346 |
+
while (current_max < value && !extremes.at(slot)[1].compare_exchange_weak(current_max, value))
|
| 347 |
+
{}
|
| 348 |
+
|
| 349 |
+
int64_t current_min = extremes.at(slot)[2].load();
|
| 350 |
+
while (current_min > value && !extremes.at(slot)[2].compare_exchange_weak(current_min, value))
|
| 351 |
+
{}
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
|
| 355 |
+
|
| 356 |
+
++correl.at(slot)[0];
|
| 357 |
+
correl.at(slot)[1] += value1;
|
| 358 |
+
correl.at(slot)[2] += value1 * value1;
|
| 359 |
+
correl.at(slot)[3] += value2;
|
| 360 |
+
correl.at(slot)[4] += value2 * value2;
|
| 361 |
+
correl.at(slot)[5] += value1 * value2;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
void dbg_print() {
|
| 365 |
+
|
| 366 |
+
int64_t n;
|
| 367 |
+
auto E = [&n](int64_t x) { return double(x) / n; };
|
| 368 |
+
auto sqr = [](double x) { return x * x; };
|
| 369 |
+
|
| 370 |
+
for (int i = 0; i < MaxDebugSlots; ++i)
|
| 371 |
+
if ((n = hit[i][0]))
|
| 372 |
+
std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1]
|
| 373 |
+
<< " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl;
|
| 374 |
+
|
| 375 |
+
for (int i = 0; i < MaxDebugSlots; ++i)
|
| 376 |
+
if ((n = mean[i][0]))
|
| 377 |
+
{
|
| 378 |
+
std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
for (int i = 0; i < MaxDebugSlots; ++i)
|
| 382 |
+
if ((n = stdev[i][0]))
|
| 383 |
+
{
|
| 384 |
+
double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1])));
|
| 385 |
+
std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
for (int i = 0; i < MaxDebugSlots; ++i)
|
| 389 |
+
if ((n = extremes[i][0]))
|
| 390 |
+
{
|
| 391 |
+
std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2]
|
| 392 |
+
<< " Max " << extremes[i][1] << std::endl;
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
for (int i = 0; i < MaxDebugSlots; ++i)
|
| 396 |
+
if ((n = correl[i][0]))
|
| 397 |
+
{
|
| 398 |
+
double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3]))
|
| 399 |
+
/ (sqrt(E(correl[i][2]) - sqr(E(correl[i][1])))
|
| 400 |
+
* sqrt(E(correl[i][4]) - sqr(E(correl[i][3]))));
|
| 401 |
+
std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl;
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
void dbg_clear() {
|
| 406 |
+
hit.fill({});
|
| 407 |
+
mean.fill({});
|
| 408 |
+
stdev.fill({});
|
| 409 |
+
correl.fill({});
|
| 410 |
+
extremes.fill({});
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
// Used to serialize access to std::cout
|
| 414 |
+
// to avoid multiple threads writing at the same time.
|
| 415 |
+
std::ostream& operator<<(std::ostream& os, SyncCout sc) {
|
| 416 |
+
|
| 417 |
+
static std::mutex m;
|
| 418 |
+
|
| 419 |
+
if (sc == IO_LOCK)
|
| 420 |
+
m.lock();
|
| 421 |
+
|
| 422 |
+
if (sc == IO_UNLOCK)
|
| 423 |
+
m.unlock();
|
| 424 |
+
|
| 425 |
+
return os;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
void sync_cout_start() { std::cout << IO_LOCK; }
|
| 429 |
+
void sync_cout_end() { std::cout << IO_UNLOCK; }
|
| 430 |
+
|
| 431 |
+
// Hash function based on public domain MurmurHash64A, by Austin Appleby.
|
| 432 |
+
uint64_t hash_bytes(const char* data, size_t size) {
|
| 433 |
+
const uint64_t m = 0xc6a4a7935bd1e995ull;
|
| 434 |
+
const int r = 47;
|
| 435 |
+
|
| 436 |
+
uint64_t h = size * m;
|
| 437 |
+
|
| 438 |
+
const char* end = data + (size & ~(size_t) 7);
|
| 439 |
+
|
| 440 |
+
for (const char* p = data; p != end; p += 8)
|
| 441 |
+
{
|
| 442 |
+
uint64_t k;
|
| 443 |
+
std::memcpy(&k, p, sizeof(k));
|
| 444 |
+
|
| 445 |
+
k *= m;
|
| 446 |
+
k ^= k >> r;
|
| 447 |
+
k *= m;
|
| 448 |
+
|
| 449 |
+
h ^= k;
|
| 450 |
+
h *= m;
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
if (size & 7)
|
| 454 |
+
{
|
| 455 |
+
uint64_t k = 0;
|
| 456 |
+
for (int i = (size & 7) - 1; i >= 0; i--)
|
| 457 |
+
k = (k << 8) | (uint64_t) end[i];
|
| 458 |
+
|
| 459 |
+
h ^= k;
|
| 460 |
+
h *= m;
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
h ^= h >> r;
|
| 464 |
+
h *= m;
|
| 465 |
+
h ^= h >> r;
|
| 466 |
+
|
| 467 |
+
return h;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
// Trampoline helper to avoid moving Logger to misc.h
|
| 471 |
+
void start_logger(const std::string& fname) { Logger::start(fname); }
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
#ifdef _WIN32
|
| 475 |
+
#include <direct.h>
|
| 476 |
+
#define GETCWD _getcwd
|
| 477 |
+
#else
|
| 478 |
+
#include <unistd.h>
|
| 479 |
+
#define GETCWD getcwd
|
| 480 |
+
#endif
|
| 481 |
+
|
| 482 |
+
size_t str_to_size_t(const std::string& s) {
|
| 483 |
+
unsigned long long value = std::stoull(s);
|
| 484 |
+
if (value > std::numeric_limits<size_t>::max())
|
| 485 |
+
std::exit(EXIT_FAILURE);
|
| 486 |
+
return static_cast<size_t>(value);
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
std::optional<std::string> read_file_to_string(const std::string& path) {
|
| 490 |
+
std::ifstream f(path, std::ios_base::binary);
|
| 491 |
+
if (!f)
|
| 492 |
+
return std::nullopt;
|
| 493 |
+
return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
void remove_whitespace(std::string& s) {
|
| 497 |
+
s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
bool is_whitespace(std::string_view s) {
|
| 501 |
+
return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); });
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
std::string CommandLine::get_binary_directory(std::string argv0) {
|
| 505 |
+
std::string pathSeparator;
|
| 506 |
+
|
| 507 |
+
#ifdef _WIN32
|
| 508 |
+
pathSeparator = "\\";
|
| 509 |
+
#ifdef _MSC_VER
|
| 510 |
+
// Under windows argv[0] may not have the extension. Also _get_pgmptr() had
|
| 511 |
+
// issues in some Windows 10 versions, so check returned values carefully.
|
| 512 |
+
char* pgmptr = nullptr;
|
| 513 |
+
if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
|
| 514 |
+
argv0 = pgmptr;
|
| 515 |
+
#endif
|
| 516 |
+
#else
|
| 517 |
+
pathSeparator = "/";
|
| 518 |
+
#endif
|
| 519 |
+
|
| 520 |
+
// Extract the working directory
|
| 521 |
+
auto workingDirectory = CommandLine::get_working_directory();
|
| 522 |
+
|
| 523 |
+
// Extract the binary directory path from argv0
|
| 524 |
+
auto binaryDirectory = argv0;
|
| 525 |
+
size_t pos = binaryDirectory.find_last_of("\\/");
|
| 526 |
+
if (pos == std::string::npos)
|
| 527 |
+
binaryDirectory = "." + pathSeparator;
|
| 528 |
+
else
|
| 529 |
+
binaryDirectory.resize(pos + 1);
|
| 530 |
+
|
| 531 |
+
// Pattern replacement: "./" at the start of path is replaced by the working directory
|
| 532 |
+
if (binaryDirectory.find("." + pathSeparator) == 0)
|
| 533 |
+
binaryDirectory.replace(0, 1, workingDirectory);
|
| 534 |
+
|
| 535 |
+
return binaryDirectory;
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
std::string CommandLine::get_working_directory() {
|
| 539 |
+
std::string workingDirectory = "";
|
| 540 |
+
char buff[40000];
|
| 541 |
+
char* cwd = GETCWD(buff, 40000);
|
| 542 |
+
if (cwd)
|
| 543 |
+
workingDirectory = cwd;
|
| 544 |
+
|
| 545 |
+
return workingDirectory;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
} // namespace Stockfish
|
src/misc.h
ADDED
|
@@ -0,0 +1,538 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef MISC_H_INCLUDED
|
| 20 |
+
#define MISC_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <array>
|
| 24 |
+
#include <cassert>
|
| 25 |
+
#include <chrono>
|
| 26 |
+
#include <cstdint>
|
| 27 |
+
#include <cstdio>
|
| 28 |
+
#include <exception> // IWYU pragma: keep
|
| 29 |
+
// IWYU pragma: no_include <__exception/terminate.h>
|
| 30 |
+
#include <functional>
|
| 31 |
+
#include <iosfwd>
|
| 32 |
+
#include <optional>
|
| 33 |
+
#include <cstring>
|
| 34 |
+
#include <memory>
|
| 35 |
+
#include <string>
|
| 36 |
+
#include <string_view>
|
| 37 |
+
#include <type_traits>
|
| 38 |
+
#include <vector>
|
| 39 |
+
|
| 40 |
+
#if !defined(NO_PREFETCH) && (defined(_MSC_VER) || defined(__INTEL_COMPILER))
|
| 41 |
+
#include <immintrin.h>
|
| 42 |
+
#endif
|
| 43 |
+
|
| 44 |
+
#define stringify2(x) #x
|
| 45 |
+
#define stringify(x) stringify2(x)
|
| 46 |
+
|
| 47 |
+
namespace Stockfish {
|
| 48 |
+
|
| 49 |
+
std::string engine_version_info();
|
| 50 |
+
std::string engine_info(bool to_uci = false);
|
| 51 |
+
std::string compiler_info();
|
| 52 |
+
|
| 53 |
+
// Prefetch hint enums for explicit call-site control.
|
| 54 |
+
enum class PrefetchRw {
|
| 55 |
+
READ,
|
| 56 |
+
WRITE
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
// NOTE: PrefetchLoc controls locality / cache level, not whether a prefetch
|
| 60 |
+
// is issued. In particular, PrefetchLoc::NONE maps to a non-temporal /
|
| 61 |
+
// lowest-locality prefetch (Intel: _MM_HINT_NTA, GCC/Clang: locality = 0)
|
| 62 |
+
// and therefore still performs a prefetch. To completely disable
|
| 63 |
+
// prefetching, define NO_PREFETCH so that prefetch() becomes a no-op.
|
| 64 |
+
enum class PrefetchLoc {
|
| 65 |
+
NONE, // Non-temporal / no cache locality (still issues a prefetch)
|
| 66 |
+
LOW, // Low locality (e.g. T2 / L2)
|
| 67 |
+
MODERATE, // Moderate locality (e.g. T1 / L1)
|
| 68 |
+
HIGH // High locality (e.g. T0 / closest cache)
|
| 69 |
+
};
|
| 70 |
+
|
| 71 |
+
// Preloads the given address into cache. This is a non-blocking
|
| 72 |
+
// function that doesn't stall the CPU waiting for data to be loaded from memory,
|
| 73 |
+
// which can be quite slow.
|
| 74 |
+
#ifdef NO_PREFETCH
|
| 75 |
+
template<PrefetchRw RW = PrefetchRw::READ, PrefetchLoc LOC = PrefetchLoc::HIGH>
|
| 76 |
+
void prefetch(const void*) {}
|
| 77 |
+
#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
| 78 |
+
|
| 79 |
+
constexpr int get_intel_hint(PrefetchRw rw, PrefetchLoc loc) {
|
| 80 |
+
if (rw == PrefetchRw::WRITE)
|
| 81 |
+
{
|
| 82 |
+
#ifdef _MM_HINT_ET0
|
| 83 |
+
return _MM_HINT_ET0;
|
| 84 |
+
#else
|
| 85 |
+
// Fallback when write-prefetch hint is not available: use T0
|
| 86 |
+
return _MM_HINT_T0;
|
| 87 |
+
#endif
|
| 88 |
+
}
|
| 89 |
+
switch (loc)
|
| 90 |
+
{
|
| 91 |
+
case PrefetchLoc::NONE :
|
| 92 |
+
return _MM_HINT_NTA;
|
| 93 |
+
case PrefetchLoc::LOW :
|
| 94 |
+
return _MM_HINT_T2;
|
| 95 |
+
case PrefetchLoc::MODERATE :
|
| 96 |
+
return _MM_HINT_T1;
|
| 97 |
+
case PrefetchLoc::HIGH :
|
| 98 |
+
return _MM_HINT_T0;
|
| 99 |
+
default :
|
| 100 |
+
return _MM_HINT_T0;
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
template<PrefetchRw RW = PrefetchRw::READ, PrefetchLoc LOC = PrefetchLoc::HIGH>
|
| 105 |
+
void prefetch(const void* addr) {
|
| 106 |
+
_mm_prefetch(static_cast<const char*>(addr), get_intel_hint(RW, LOC));
|
| 107 |
+
}
|
| 108 |
+
#else
|
| 109 |
+
template<PrefetchRw RW = PrefetchRw::READ, PrefetchLoc LOC = PrefetchLoc::HIGH>
|
| 110 |
+
void prefetch(const void* addr) {
|
| 111 |
+
__builtin_prefetch(addr, static_cast<int>(RW), static_cast<int>(LOC));
|
| 112 |
+
}
|
| 113 |
+
#endif
|
| 114 |
+
|
| 115 |
+
void start_logger(const std::string& fname);
|
| 116 |
+
|
| 117 |
+
size_t str_to_size_t(const std::string& s);
|
| 118 |
+
|
| 119 |
+
#if defined(__linux__)
|
| 120 |
+
|
| 121 |
+
struct PipeDeleter {
|
| 122 |
+
void operator()(FILE* file) const {
|
| 123 |
+
if (file != nullptr)
|
| 124 |
+
{
|
| 125 |
+
pclose(file);
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
};
|
| 129 |
+
|
| 130 |
+
#endif
|
| 131 |
+
|
| 132 |
+
// Reads the file as bytes.
|
| 133 |
+
// Returns std::nullopt if the file does not exist.
|
| 134 |
+
std::optional<std::string> read_file_to_string(const std::string& path);
|
| 135 |
+
|
| 136 |
+
void dbg_hit_on(bool cond, int slot = 0);
|
| 137 |
+
void dbg_mean_of(int64_t value, int slot = 0);
|
| 138 |
+
void dbg_stdev_of(int64_t value, int slot = 0);
|
| 139 |
+
void dbg_extremes_of(int64_t value, int slot = 0);
|
| 140 |
+
void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
|
| 141 |
+
void dbg_print();
|
| 142 |
+
void dbg_clear();
|
| 143 |
+
|
| 144 |
+
using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds
|
| 145 |
+
static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
|
| 146 |
+
inline TimePoint now() {
|
| 147 |
+
return std::chrono::duration_cast<std::chrono::milliseconds>(
|
| 148 |
+
std::chrono::steady_clock::now().time_since_epoch())
|
| 149 |
+
.count();
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
inline std::vector<std::string_view> split(std::string_view s, std::string_view delimiter) {
|
| 153 |
+
std::vector<std::string_view> res;
|
| 154 |
+
|
| 155 |
+
if (s.empty())
|
| 156 |
+
return res;
|
| 157 |
+
|
| 158 |
+
size_t begin = 0;
|
| 159 |
+
for (;;)
|
| 160 |
+
{
|
| 161 |
+
const size_t end = s.find(delimiter, begin);
|
| 162 |
+
if (end == std::string::npos)
|
| 163 |
+
break;
|
| 164 |
+
|
| 165 |
+
res.emplace_back(s.substr(begin, end - begin));
|
| 166 |
+
begin = end + delimiter.size();
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
res.emplace_back(s.substr(begin));
|
| 170 |
+
|
| 171 |
+
return res;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
void remove_whitespace(std::string& s);
|
| 175 |
+
bool is_whitespace(std::string_view s);
|
| 176 |
+
|
| 177 |
+
enum SyncCout {
|
| 178 |
+
IO_LOCK,
|
| 179 |
+
IO_UNLOCK
|
| 180 |
+
};
|
| 181 |
+
std::ostream& operator<<(std::ostream&, SyncCout);
|
| 182 |
+
|
| 183 |
+
#define sync_cout std::cout << IO_LOCK
|
| 184 |
+
#define sync_endl std::endl << IO_UNLOCK
|
| 185 |
+
|
| 186 |
+
void sync_cout_start();
|
| 187 |
+
void sync_cout_end();
|
| 188 |
+
|
| 189 |
+
// True if and only if the binary is compiled on a little-endian machine
|
| 190 |
+
static inline const std::uint16_t Le = 1;
|
| 191 |
+
static inline const bool IsLittleEndian = *reinterpret_cast<const char*>(&Le) == 1;
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
template<typename T, std::size_t MaxSize>
|
| 195 |
+
class ValueList {
|
| 196 |
+
|
| 197 |
+
public:
|
| 198 |
+
std::size_t size() const { return size_; }
|
| 199 |
+
int ssize() const { return int(size_); }
|
| 200 |
+
void push_back(const T& value) {
|
| 201 |
+
assert(size_ < MaxSize);
|
| 202 |
+
values_[size_++] = value;
|
| 203 |
+
}
|
| 204 |
+
const T* begin() const { return values_; }
|
| 205 |
+
const T* end() const { return values_ + size_; }
|
| 206 |
+
const T& operator[](int index) const { return values_[index]; }
|
| 207 |
+
|
| 208 |
+
T* make_space(size_t count) {
|
| 209 |
+
T* result = &values_[size_];
|
| 210 |
+
size_ += count;
|
| 211 |
+
assert(size_ <= MaxSize);
|
| 212 |
+
return result;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
private:
|
| 216 |
+
T values_[MaxSize];
|
| 217 |
+
std::size_t size_ = 0;
|
| 218 |
+
};
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
template<typename T, std::size_t Size, std::size_t... Sizes>
|
| 222 |
+
class MultiArray;
|
| 223 |
+
|
| 224 |
+
namespace Detail {
|
| 225 |
+
|
| 226 |
+
template<typename T, std::size_t Size, std::size_t... Sizes>
|
| 227 |
+
struct MultiArrayHelper {
|
| 228 |
+
using ChildType = MultiArray<T, Sizes...>;
|
| 229 |
+
};
|
| 230 |
+
|
| 231 |
+
template<typename T, std::size_t Size>
|
| 232 |
+
struct MultiArrayHelper<T, Size> {
|
| 233 |
+
using ChildType = T;
|
| 234 |
+
};
|
| 235 |
+
|
| 236 |
+
template<typename To, typename From>
|
| 237 |
+
constexpr bool is_strictly_assignable_v =
|
| 238 |
+
std::is_assignable_v<To&, From> && (std::is_same_v<To, From> || !std::is_convertible_v<From, To>);
|
| 239 |
+
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
// MultiArray is a generic N-dimensional array.
|
| 243 |
+
// The template parameters (Size and Sizes) encode the dimensions of the array.
|
| 244 |
+
template<typename T, std::size_t Size, std::size_t... Sizes>
|
| 245 |
+
class MultiArray {
|
| 246 |
+
using ChildType = typename Detail::MultiArrayHelper<T, Size, Sizes...>::ChildType;
|
| 247 |
+
using ArrayType = std::array<ChildType, Size>;
|
| 248 |
+
ArrayType data_;
|
| 249 |
+
|
| 250 |
+
public:
|
| 251 |
+
using value_type = typename ArrayType::value_type;
|
| 252 |
+
using size_type = typename ArrayType::size_type;
|
| 253 |
+
using difference_type = typename ArrayType::difference_type;
|
| 254 |
+
using reference = typename ArrayType::reference;
|
| 255 |
+
using const_reference = typename ArrayType::const_reference;
|
| 256 |
+
using pointer = typename ArrayType::pointer;
|
| 257 |
+
using const_pointer = typename ArrayType::const_pointer;
|
| 258 |
+
using iterator = typename ArrayType::iterator;
|
| 259 |
+
using const_iterator = typename ArrayType::const_iterator;
|
| 260 |
+
using reverse_iterator = typename ArrayType::reverse_iterator;
|
| 261 |
+
using const_reverse_iterator = typename ArrayType::const_reverse_iterator;
|
| 262 |
+
|
| 263 |
+
constexpr auto& at(size_type index) noexcept { return data_.at(index); }
|
| 264 |
+
constexpr const auto& at(size_type index) const noexcept { return data_.at(index); }
|
| 265 |
+
|
| 266 |
+
constexpr auto& operator[](size_type index) noexcept { return data_[index]; }
|
| 267 |
+
constexpr const auto& operator[](size_type index) const noexcept { return data_[index]; }
|
| 268 |
+
|
| 269 |
+
constexpr auto& front() noexcept { return data_.front(); }
|
| 270 |
+
constexpr const auto& front() const noexcept { return data_.front(); }
|
| 271 |
+
constexpr auto& back() noexcept { return data_.back(); }
|
| 272 |
+
constexpr const auto& back() const noexcept { return data_.back(); }
|
| 273 |
+
|
| 274 |
+
auto* data() { return data_.data(); }
|
| 275 |
+
const auto* data() const { return data_.data(); }
|
| 276 |
+
|
| 277 |
+
constexpr auto begin() noexcept { return data_.begin(); }
|
| 278 |
+
constexpr auto end() noexcept { return data_.end(); }
|
| 279 |
+
constexpr auto begin() const noexcept { return data_.begin(); }
|
| 280 |
+
constexpr auto end() const noexcept { return data_.end(); }
|
| 281 |
+
constexpr auto cbegin() const noexcept { return data_.cbegin(); }
|
| 282 |
+
constexpr auto cend() const noexcept { return data_.cend(); }
|
| 283 |
+
|
| 284 |
+
constexpr auto rbegin() noexcept { return data_.rbegin(); }
|
| 285 |
+
constexpr auto rend() noexcept { return data_.rend(); }
|
| 286 |
+
constexpr auto rbegin() const noexcept { return data_.rbegin(); }
|
| 287 |
+
constexpr auto rend() const noexcept { return data_.rend(); }
|
| 288 |
+
constexpr auto crbegin() const noexcept { return data_.crbegin(); }
|
| 289 |
+
constexpr auto crend() const noexcept { return data_.crend(); }
|
| 290 |
+
|
| 291 |
+
constexpr bool empty() const noexcept { return data_.empty(); }
|
| 292 |
+
constexpr size_type size() const noexcept { return data_.size(); }
|
| 293 |
+
constexpr size_type max_size() const noexcept { return data_.max_size(); }
|
| 294 |
+
|
| 295 |
+
template<typename U>
|
| 296 |
+
void fill(const U& v) {
|
| 297 |
+
static_assert(Detail::is_strictly_assignable_v<T, U>,
|
| 298 |
+
"Cannot assign fill value to entry type");
|
| 299 |
+
for (auto& ele : data_)
|
| 300 |
+
{
|
| 301 |
+
if constexpr (sizeof...(Sizes) == 0)
|
| 302 |
+
ele = v;
|
| 303 |
+
else
|
| 304 |
+
ele.fill(v);
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
constexpr void swap(MultiArray<T, Size, Sizes...>& other) noexcept { data_.swap(other.data_); }
|
| 309 |
+
};
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
// xorshift64star Pseudo-Random Number Generator
|
| 313 |
+
// This class is based on original code written and dedicated
|
| 314 |
+
// to the public domain by Sebastiano Vigna (2014).
|
| 315 |
+
// It has the following characteristics:
|
| 316 |
+
//
|
| 317 |
+
// - Outputs 64-bit numbers
|
| 318 |
+
// - Passes Dieharder and SmallCrush test batteries
|
| 319 |
+
// - Does not require warm-up, no zeroland to escape
|
| 320 |
+
// - Internal state is a single 64-bit integer
|
| 321 |
+
// - Period is 2^64 - 1
|
| 322 |
+
// - Speed: 1.60 ns/call (Core i7 @3.40GHz)
|
| 323 |
+
//
|
| 324 |
+
// For further analysis see
|
| 325 |
+
// <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
|
| 326 |
+
|
| 327 |
+
class PRNG {
|
| 328 |
+
|
| 329 |
+
uint64_t s;
|
| 330 |
+
|
| 331 |
+
uint64_t rand64() {
|
| 332 |
+
|
| 333 |
+
s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
|
| 334 |
+
return s * 2685821657736338717LL;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
public:
|
| 338 |
+
PRNG(uint64_t seed) :
|
| 339 |
+
s(seed) {
|
| 340 |
+
assert(seed);
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
template<typename T>
|
| 344 |
+
T rand() {
|
| 345 |
+
return T(rand64());
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
// Special generator used to fast init magic numbers.
|
| 349 |
+
// Output values only have 1/8th of their bits set on average.
|
| 350 |
+
template<typename T>
|
| 351 |
+
T sparse_rand() {
|
| 352 |
+
return T(rand64() & rand64() & rand64());
|
| 353 |
+
}
|
| 354 |
+
};
|
| 355 |
+
|
| 356 |
+
inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
| 357 |
+
#if defined(__GNUC__) && defined(IS_64BIT)
|
| 358 |
+
__extension__ using uint128 = unsigned __int128;
|
| 359 |
+
return (uint128(a) * uint128(b)) >> 64;
|
| 360 |
+
#else
|
| 361 |
+
uint64_t aL = uint32_t(a), aH = a >> 32;
|
| 362 |
+
uint64_t bL = uint32_t(b), bH = b >> 32;
|
| 363 |
+
uint64_t c1 = (aL * bL) >> 32;
|
| 364 |
+
uint64_t c2 = aH * bL + c1;
|
| 365 |
+
uint64_t c3 = aL * bH + uint32_t(c2);
|
| 366 |
+
return aH * bH + (c2 >> 32) + (c3 >> 32);
|
| 367 |
+
#endif
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
uint64_t hash_bytes(const char*, size_t);
|
| 371 |
+
|
| 372 |
+
template<typename T>
|
| 373 |
+
inline std::size_t get_raw_data_hash(const T& value) {
|
| 374 |
+
// We must have no padding bytes because we're reinterpreting as char
|
| 375 |
+
static_assert(std::has_unique_object_representations<T>());
|
| 376 |
+
|
| 377 |
+
return static_cast<std::size_t>(
|
| 378 |
+
hash_bytes(reinterpret_cast<const char*>(&value), sizeof(value)));
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
template<typename T>
|
| 382 |
+
inline void hash_combine(std::size_t& seed, const T& v) {
|
| 383 |
+
std::size_t x;
|
| 384 |
+
// For primitive types we avoid using the default hasher, which may be
|
| 385 |
+
// nondeterministic across program invocations
|
| 386 |
+
if constexpr (std::is_integral<T>())
|
| 387 |
+
x = v;
|
| 388 |
+
else
|
| 389 |
+
x = std::hash<T>{}(v);
|
| 390 |
+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
inline std::uint64_t hash_string(const std::string& sv) { return hash_bytes(sv.data(), sv.size()); }
|
| 394 |
+
|
| 395 |
+
template<std::size_t Capacity>
|
| 396 |
+
class FixedString {
|
| 397 |
+
public:
|
| 398 |
+
FixedString() :
|
| 399 |
+
length_(0) {
|
| 400 |
+
data_[0] = '\0';
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
FixedString(const char* str) {
|
| 404 |
+
size_t len = std::strlen(str);
|
| 405 |
+
if (len > Capacity)
|
| 406 |
+
std::terminate();
|
| 407 |
+
std::memcpy(data_, str, len);
|
| 408 |
+
length_ = len;
|
| 409 |
+
data_[length_] = '\0';
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
FixedString(const std::string& str) {
|
| 413 |
+
if (str.size() > Capacity)
|
| 414 |
+
std::terminate();
|
| 415 |
+
std::memcpy(data_, str.data(), str.size());
|
| 416 |
+
length_ = str.size();
|
| 417 |
+
data_[length_] = '\0';
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
std::size_t size() const { return length_; }
|
| 421 |
+
std::size_t capacity() const { return Capacity; }
|
| 422 |
+
|
| 423 |
+
const char* c_str() const { return data_; }
|
| 424 |
+
const char* data() const { return data_; }
|
| 425 |
+
|
| 426 |
+
char& operator[](std::size_t i) { return data_[i]; }
|
| 427 |
+
|
| 428 |
+
const char& operator[](std::size_t i) const { return data_[i]; }
|
| 429 |
+
|
| 430 |
+
FixedString& operator+=(const char* str) {
|
| 431 |
+
size_t len = std::strlen(str);
|
| 432 |
+
if (length_ + len > Capacity)
|
| 433 |
+
std::terminate();
|
| 434 |
+
std::memcpy(data_ + length_, str, len);
|
| 435 |
+
length_ += len;
|
| 436 |
+
data_[length_] = '\0';
|
| 437 |
+
return *this;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
FixedString& operator+=(const FixedString& other) { return (*this += other.c_str()); }
|
| 441 |
+
|
| 442 |
+
operator std::string() const { return std::string(data_, length_); }
|
| 443 |
+
|
| 444 |
+
operator std::string_view() const { return std::string_view(data_, length_); }
|
| 445 |
+
|
| 446 |
+
template<typename T>
|
| 447 |
+
bool operator==(const T& other) const noexcept {
|
| 448 |
+
return (std::string_view) (*this) == other;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
template<typename T>
|
| 452 |
+
bool operator!=(const T& other) const noexcept {
|
| 453 |
+
return (std::string_view) (*this) != other;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
void clear() {
|
| 457 |
+
length_ = 0;
|
| 458 |
+
data_[0] = '\0';
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
private:
|
| 462 |
+
char data_[Capacity + 1]; // +1 for null terminator
|
| 463 |
+
std::size_t length_;
|
| 464 |
+
};
|
| 465 |
+
|
| 466 |
+
struct CommandLine {
|
| 467 |
+
public:
|
| 468 |
+
CommandLine(int _argc, char** _argv) :
|
| 469 |
+
argc(_argc),
|
| 470 |
+
argv(_argv) {}
|
| 471 |
+
|
| 472 |
+
static std::string get_binary_directory(std::string argv0);
|
| 473 |
+
static std::string get_working_directory();
|
| 474 |
+
|
| 475 |
+
int argc;
|
| 476 |
+
char** argv;
|
| 477 |
+
};
|
| 478 |
+
|
| 479 |
+
namespace Utility {
|
| 480 |
+
|
| 481 |
+
template<typename T, typename Predicate>
|
| 482 |
+
void move_to_front(std::vector<T>& vec, Predicate pred) {
|
| 483 |
+
auto it = std::find_if(vec.begin(), vec.end(), pred);
|
| 484 |
+
|
| 485 |
+
if (it != vec.end())
|
| 486 |
+
{
|
| 487 |
+
std::rotate(vec.begin(), it, it + 1);
|
| 488 |
+
}
|
| 489 |
+
}
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
#if defined(__GNUC__)
|
| 493 |
+
#define sf_always_inline __attribute__((always_inline))
|
| 494 |
+
#elif defined(_MSC_VER)
|
| 495 |
+
#define sf_always_inline __forceinline
|
| 496 |
+
#else
|
| 497 |
+
// do nothing for other compilers
|
| 498 |
+
#define sf_always_inline
|
| 499 |
+
#endif
|
| 500 |
+
|
| 501 |
+
#if defined(__clang__)
|
| 502 |
+
#define sf_assume(cond) __builtin_assume(cond)
|
| 503 |
+
#elif defined(__GNUC__)
|
| 504 |
+
#if __GNUC__ >= 13
|
| 505 |
+
#define sf_assume(cond) __attribute__((assume(cond)))
|
| 506 |
+
#else
|
| 507 |
+
#define sf_assume(cond) \
|
| 508 |
+
do \
|
| 509 |
+
{ \
|
| 510 |
+
if (!(cond)) \
|
| 511 |
+
__builtin_unreachable(); \
|
| 512 |
+
} while (0)
|
| 513 |
+
#endif
|
| 514 |
+
#elif defined(_MSC_VER)
|
| 515 |
+
#define sf_assume(cond) __assume(cond)
|
| 516 |
+
#else
|
| 517 |
+
// do nothing for other compilers
|
| 518 |
+
#define sf_assume(cond)
|
| 519 |
+
#endif
|
| 520 |
+
|
| 521 |
+
#ifdef __GNUC__
|
| 522 |
+
#define sf_unreachable() __builtin_unreachable()
|
| 523 |
+
#elif defined(_MSC_VER)
|
| 524 |
+
#define sf_unreachable() __assume(0)
|
| 525 |
+
#else
|
| 526 |
+
#define sf_unreachable()
|
| 527 |
+
#endif
|
| 528 |
+
|
| 529 |
+
} // namespace Stockfish
|
| 530 |
+
|
| 531 |
+
template<std::size_t N>
|
| 532 |
+
struct std::hash<Stockfish::FixedString<N>> {
|
| 533 |
+
std::size_t operator()(const Stockfish::FixedString<N>& fstr) const noexcept {
|
| 534 |
+
return Stockfish::hash_bytes(fstr.data(), fstr.size());
|
| 535 |
+
}
|
| 536 |
+
};
|
| 537 |
+
|
| 538 |
+
#endif // #ifndef MISC_H_INCLUDED
|
src/movegen.cpp
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "movegen.h"
|
| 20 |
+
|
| 21 |
+
#include <cassert>
|
| 22 |
+
#include <initializer_list>
|
| 23 |
+
|
| 24 |
+
#include "bitboard.h"
|
| 25 |
+
#include "position.h"
|
| 26 |
+
|
| 27 |
+
#if defined(USE_AVX512ICL)
|
| 28 |
+
#include <array>
|
| 29 |
+
#include <algorithm>
|
| 30 |
+
#include <immintrin.h>
|
| 31 |
+
#endif
|
| 32 |
+
|
| 33 |
+
namespace Stockfish {
|
| 34 |
+
|
| 35 |
+
namespace {
|
| 36 |
+
|
| 37 |
+
#if defined(USE_AVX512ICL)
|
| 38 |
+
|
| 39 |
+
inline Move* write_moves(Move* moveList, uint32_t mask, __m512i vector) {
|
| 40 |
+
// Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4
|
| 41 |
+
_mm512_storeu_si512(reinterpret_cast<__m512i*>(moveList),
|
| 42 |
+
_mm512_maskz_compress_epi16(mask, vector));
|
| 43 |
+
return moveList + popcount(mask);
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
template<Direction offset>
|
| 47 |
+
inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) {
|
| 48 |
+
alignas(64) static constexpr auto SPLAT_TABLE = [] {
|
| 49 |
+
std::array<Move, 64> table{};
|
| 50 |
+
for (int i = 0; i < 64; i++)
|
| 51 |
+
{
|
| 52 |
+
Square from{uint8_t(std::clamp(i - offset, 0, 63))};
|
| 53 |
+
table[i] = {Move(from, Square{uint8_t(i)})};
|
| 54 |
+
}
|
| 55 |
+
return table;
|
| 56 |
+
}();
|
| 57 |
+
|
| 58 |
+
auto table = reinterpret_cast<const __m512i*>(SPLAT_TABLE.data());
|
| 59 |
+
|
| 60 |
+
moveList =
|
| 61 |
+
write_moves(moveList, static_cast<uint32_t>(to_bb >> 0), _mm512_load_si512(table + 0));
|
| 62 |
+
moveList =
|
| 63 |
+
write_moves(moveList, static_cast<uint32_t>(to_bb >> 32), _mm512_load_si512(table + 1));
|
| 64 |
+
|
| 65 |
+
return moveList;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) {
|
| 69 |
+
alignas(64) static constexpr auto SPLAT_TABLE = [] {
|
| 70 |
+
std::array<Move, 64> table{};
|
| 71 |
+
for (uint8_t i = 0; i < 64; i++)
|
| 72 |
+
table[i] = {Move(SQUARE_ZERO, Square{i})};
|
| 73 |
+
return table;
|
| 74 |
+
}();
|
| 75 |
+
|
| 76 |
+
__m512i fromVec = _mm512_set1_epi16(Move(from, SQUARE_ZERO).raw());
|
| 77 |
+
|
| 78 |
+
auto table = reinterpret_cast<const __m512i*>(SPLAT_TABLE.data());
|
| 79 |
+
|
| 80 |
+
moveList = write_moves(moveList, static_cast<uint32_t>(to_bb >> 0),
|
| 81 |
+
_mm512_or_si512(_mm512_load_si512(table + 0), fromVec));
|
| 82 |
+
moveList = write_moves(moveList, static_cast<uint32_t>(to_bb >> 32),
|
| 83 |
+
_mm512_or_si512(_mm512_load_si512(table + 1), fromVec));
|
| 84 |
+
|
| 85 |
+
return moveList;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
#else
|
| 89 |
+
|
| 90 |
+
template<Direction offset>
|
| 91 |
+
inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) {
|
| 92 |
+
while (to_bb)
|
| 93 |
+
{
|
| 94 |
+
Square to = pop_lsb(to_bb);
|
| 95 |
+
*moveList++ = Move(to - offset, to);
|
| 96 |
+
}
|
| 97 |
+
return moveList;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) {
|
| 101 |
+
while (to_bb)
|
| 102 |
+
*moveList++ = Move(from, pop_lsb(to_bb));
|
| 103 |
+
return moveList;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
#endif
|
| 107 |
+
|
| 108 |
+
template<GenType Type, Direction D, bool Enemy>
|
| 109 |
+
Move* make_promotions(Move* moveList, [[maybe_unused]] Square to) {
|
| 110 |
+
|
| 111 |
+
constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS;
|
| 112 |
+
|
| 113 |
+
if constexpr (Type == CAPTURES || all)
|
| 114 |
+
*moveList++ = Move::make<PROMOTION>(to - D, to, QUEEN);
|
| 115 |
+
|
| 116 |
+
if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all)
|
| 117 |
+
{
|
| 118 |
+
*moveList++ = Move::make<PROMOTION>(to - D, to, ROOK);
|
| 119 |
+
*moveList++ = Move::make<PROMOTION>(to - D, to, BISHOP);
|
| 120 |
+
*moveList++ = Move::make<PROMOTION>(to - D, to, KNIGHT);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
return moveList;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
template<Color Us, GenType Type>
|
| 128 |
+
Move* generate_pawn_moves(const Position& pos, Move* moveList, Bitboard target) {
|
| 129 |
+
|
| 130 |
+
constexpr Color Them = ~Us;
|
| 131 |
+
constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB);
|
| 132 |
+
constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB);
|
| 133 |
+
constexpr Direction Up = pawn_push(Us);
|
| 134 |
+
constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
|
| 135 |
+
constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);
|
| 136 |
+
|
| 137 |
+
const Bitboard emptySquares = ~pos.pieces();
|
| 138 |
+
const Bitboard enemies = Type == EVASIONS ? pos.checkers() : pos.pieces(Them);
|
| 139 |
+
|
| 140 |
+
Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB;
|
| 141 |
+
Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;
|
| 142 |
+
|
| 143 |
+
// Single and double pawn pushes, no promotions
|
| 144 |
+
if constexpr (Type != CAPTURES)
|
| 145 |
+
{
|
| 146 |
+
Bitboard b1 = shift<Up>(pawnsNotOn7) & emptySquares;
|
| 147 |
+
Bitboard b2 = shift<Up>(b1 & TRank3BB) & emptySquares;
|
| 148 |
+
|
| 149 |
+
if constexpr (Type == EVASIONS) // Consider only blocking squares
|
| 150 |
+
{
|
| 151 |
+
b1 &= target;
|
| 152 |
+
b2 &= target;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
moveList = splat_pawn_moves<Up>(moveList, b1);
|
| 156 |
+
moveList = splat_pawn_moves<Up + Up>(moveList, b2);
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
// Promotions and underpromotions
|
| 160 |
+
if (pawnsOn7)
|
| 161 |
+
{
|
| 162 |
+
Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
|
| 163 |
+
Bitboard b2 = shift<UpLeft>(pawnsOn7) & enemies;
|
| 164 |
+
Bitboard b3 = shift<Up>(pawnsOn7) & emptySquares;
|
| 165 |
+
|
| 166 |
+
if constexpr (Type == EVASIONS)
|
| 167 |
+
b3 &= target;
|
| 168 |
+
|
| 169 |
+
while (b1)
|
| 170 |
+
moveList = make_promotions<Type, UpRight, true>(moveList, pop_lsb(b1));
|
| 171 |
+
|
| 172 |
+
while (b2)
|
| 173 |
+
moveList = make_promotions<Type, UpLeft, true>(moveList, pop_lsb(b2));
|
| 174 |
+
|
| 175 |
+
while (b3)
|
| 176 |
+
moveList = make_promotions<Type, Up, false>(moveList, pop_lsb(b3));
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
// Standard and en passant captures
|
| 180 |
+
if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
|
| 181 |
+
{
|
| 182 |
+
Bitboard b1 = shift<UpRight>(pawnsNotOn7) & enemies;
|
| 183 |
+
Bitboard b2 = shift<UpLeft>(pawnsNotOn7) & enemies;
|
| 184 |
+
|
| 185 |
+
moveList = splat_pawn_moves<UpRight>(moveList, b1);
|
| 186 |
+
moveList = splat_pawn_moves<UpLeft>(moveList, b2);
|
| 187 |
+
|
| 188 |
+
if (pos.ep_square() != SQ_NONE)
|
| 189 |
+
{
|
| 190 |
+
assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6));
|
| 191 |
+
|
| 192 |
+
// An en passant capture cannot resolve a discovered check
|
| 193 |
+
if (Type == EVASIONS && (target & (pos.ep_square() + Up)))
|
| 194 |
+
return moveList;
|
| 195 |
+
|
| 196 |
+
b1 = pawnsNotOn7 & attacks_bb<PAWN>(pos.ep_square(), Them);
|
| 197 |
+
|
| 198 |
+
assert(b1);
|
| 199 |
+
|
| 200 |
+
while (b1)
|
| 201 |
+
*moveList++ = Move::make<EN_PASSANT>(pop_lsb(b1), pos.ep_square());
|
| 202 |
+
}
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
return moveList;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
template<Color Us, PieceType Pt>
|
| 210 |
+
Move* generate_moves(const Position& pos, Move* moveList, Bitboard target) {
|
| 211 |
+
|
| 212 |
+
static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");
|
| 213 |
+
|
| 214 |
+
Bitboard bb = pos.pieces(Us, Pt);
|
| 215 |
+
|
| 216 |
+
while (bb)
|
| 217 |
+
{
|
| 218 |
+
Square from = pop_lsb(bb);
|
| 219 |
+
Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
|
| 220 |
+
|
| 221 |
+
moveList = splat_moves(moveList, from, b);
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
return moveList;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
template<Color Us, GenType Type>
|
| 229 |
+
Move* generate_all(const Position& pos, Move* moveList) {
|
| 230 |
+
|
| 231 |
+
static_assert(Type != LEGAL, "Unsupported type in generate_all()");
|
| 232 |
+
|
| 233 |
+
const Square ksq = pos.square<KING>(Us);
|
| 234 |
+
Bitboard target;
|
| 235 |
+
|
| 236 |
+
// Skip generating non-king moves when in double check
|
| 237 |
+
if (Type != EVASIONS || !more_than_one(pos.checkers()))
|
| 238 |
+
{
|
| 239 |
+
target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers()))
|
| 240 |
+
: Type == NON_EVASIONS ? ~pos.pieces(Us)
|
| 241 |
+
: Type == CAPTURES ? pos.pieces(~Us)
|
| 242 |
+
: ~pos.pieces(); // QUIETS
|
| 243 |
+
|
| 244 |
+
moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
|
| 245 |
+
moveList = generate_moves<Us, KNIGHT>(pos, moveList, target);
|
| 246 |
+
moveList = generate_moves<Us, BISHOP>(pos, moveList, target);
|
| 247 |
+
moveList = generate_moves<Us, ROOK>(pos, moveList, target);
|
| 248 |
+
moveList = generate_moves<Us, QUEEN>(pos, moveList, target);
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);
|
| 252 |
+
|
| 253 |
+
moveList = splat_moves(moveList, ksq, b);
|
| 254 |
+
|
| 255 |
+
if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
|
| 256 |
+
for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
|
| 257 |
+
if (!pos.castling_impeded(cr) && pos.can_castle(cr))
|
| 258 |
+
*moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));
|
| 259 |
+
|
| 260 |
+
return moveList;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
} // namespace
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
// <CAPTURES> Generates all pseudo-legal captures plus queen promotions
|
| 267 |
+
// <QUIETS> Generates all pseudo-legal non-captures and underpromotions
|
| 268 |
+
// <EVASIONS> Generates all pseudo-legal check evasions
|
| 269 |
+
// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
|
| 270 |
+
//
|
| 271 |
+
// Returns a pointer to the end of the move list.
|
| 272 |
+
template<GenType Type>
|
| 273 |
+
Move* generate(const Position& pos, Move* moveList) {
|
| 274 |
+
|
| 275 |
+
static_assert(Type != LEGAL, "Unsupported type in generate()");
|
| 276 |
+
assert((Type == EVASIONS) == bool(pos.checkers()));
|
| 277 |
+
|
| 278 |
+
Color us = pos.side_to_move();
|
| 279 |
+
|
| 280 |
+
return us == WHITE ? generate_all<WHITE, Type>(pos, moveList)
|
| 281 |
+
: generate_all<BLACK, Type>(pos, moveList);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
// Explicit template instantiations
|
| 285 |
+
template Move* generate<CAPTURES>(const Position&, Move*);
|
| 286 |
+
template Move* generate<QUIETS>(const Position&, Move*);
|
| 287 |
+
template Move* generate<EVASIONS>(const Position&, Move*);
|
| 288 |
+
template Move* generate<NON_EVASIONS>(const Position&, Move*);
|
| 289 |
+
|
| 290 |
+
// generate<LEGAL> generates all the legal moves in the given position
|
| 291 |
+
|
| 292 |
+
template<>
|
| 293 |
+
Move* generate<LEGAL>(const Position& pos, Move* moveList) {
|
| 294 |
+
|
| 295 |
+
Color us = pos.side_to_move();
|
| 296 |
+
Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us);
|
| 297 |
+
Square ksq = pos.square<KING>(us);
|
| 298 |
+
Move* cur = moveList;
|
| 299 |
+
|
| 300 |
+
moveList =
|
| 301 |
+
pos.checkers() ? generate<EVASIONS>(pos, moveList) : generate<NON_EVASIONS>(pos, moveList);
|
| 302 |
+
while (cur != moveList)
|
| 303 |
+
if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT)
|
| 304 |
+
&& !pos.legal(*cur))
|
| 305 |
+
*cur = *(--moveList);
|
| 306 |
+
else
|
| 307 |
+
++cur;
|
| 308 |
+
|
| 309 |
+
return moveList;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
} // namespace Stockfish
|
src/movegen.h
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef MOVEGEN_H_INCLUDED
|
| 20 |
+
#define MOVEGEN_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm> // IWYU pragma: keep
|
| 23 |
+
#include <cstddef>
|
| 24 |
+
|
| 25 |
+
#include "types.h"
|
| 26 |
+
|
| 27 |
+
namespace Stockfish {
|
| 28 |
+
|
| 29 |
+
class Position;
|
| 30 |
+
|
| 31 |
+
enum GenType {
|
| 32 |
+
CAPTURES,
|
| 33 |
+
QUIETS,
|
| 34 |
+
EVASIONS,
|
| 35 |
+
NON_EVASIONS,
|
| 36 |
+
LEGAL
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
struct ExtMove: public Move {
|
| 40 |
+
int value;
|
| 41 |
+
|
| 42 |
+
void operator=(Move m) { data = m.raw(); }
|
| 43 |
+
|
| 44 |
+
// Inhibit unwanted implicit conversions to Move
|
| 45 |
+
// with an ambiguity that yields to a compile error.
|
| 46 |
+
operator float() const = delete;
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; }
|
| 50 |
+
|
| 51 |
+
template<GenType>
|
| 52 |
+
Move* generate(const Position& pos, Move* moveList);
|
| 53 |
+
|
| 54 |
+
// The MoveList struct wraps the generate() function and returns a convenient
|
| 55 |
+
// list of moves. Using MoveList is sometimes preferable to directly calling
|
| 56 |
+
// the lower level generate() function.
|
| 57 |
+
template<GenType T>
|
| 58 |
+
struct MoveList {
|
| 59 |
+
|
| 60 |
+
explicit MoveList(const Position& pos) :
|
| 61 |
+
last(generate<T>(pos, moveList)) {}
|
| 62 |
+
const Move* begin() const { return moveList; }
|
| 63 |
+
const Move* end() const { return last; }
|
| 64 |
+
size_t size() const { return last - moveList; }
|
| 65 |
+
bool contains(Move move) const { return std::find(begin(), end(), move) != end(); }
|
| 66 |
+
|
| 67 |
+
private:
|
| 68 |
+
Move moveList[MAX_MOVES], *last;
|
| 69 |
+
};
|
| 70 |
+
|
| 71 |
+
} // namespace Stockfish
|
| 72 |
+
|
| 73 |
+
#endif // #ifndef MOVEGEN_H_INCLUDED
|
src/movepick.cpp
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "movepick.h"
|
| 20 |
+
|
| 21 |
+
#include <cassert>
|
| 22 |
+
#include <limits>
|
| 23 |
+
#include <utility>
|
| 24 |
+
|
| 25 |
+
#include "bitboard.h"
|
| 26 |
+
#include "misc.h"
|
| 27 |
+
#include "position.h"
|
| 28 |
+
|
| 29 |
+
namespace Stockfish {
|
| 30 |
+
|
| 31 |
+
namespace {
|
| 32 |
+
|
| 33 |
+
enum Stages {
|
| 34 |
+
// generate main search moves
|
| 35 |
+
MAIN_TT,
|
| 36 |
+
CAPTURE_INIT,
|
| 37 |
+
GOOD_CAPTURE,
|
| 38 |
+
QUIET_INIT,
|
| 39 |
+
GOOD_QUIET,
|
| 40 |
+
BAD_CAPTURE,
|
| 41 |
+
BAD_QUIET,
|
| 42 |
+
|
| 43 |
+
// generate evasion moves
|
| 44 |
+
EVASION_TT,
|
| 45 |
+
EVASION_INIT,
|
| 46 |
+
EVASION,
|
| 47 |
+
|
| 48 |
+
// generate probcut moves
|
| 49 |
+
PROBCUT_TT,
|
| 50 |
+
PROBCUT_INIT,
|
| 51 |
+
PROBCUT,
|
| 52 |
+
|
| 53 |
+
// generate qsearch moves
|
| 54 |
+
QSEARCH_TT,
|
| 55 |
+
QCAPTURE_INIT,
|
| 56 |
+
QCAPTURE
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
// Sort moves in descending order up to and including a given limit.
|
| 61 |
+
// The order of moves smaller than the limit is left unspecified.
|
| 62 |
+
void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
|
| 63 |
+
|
| 64 |
+
for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
|
| 65 |
+
if (p->value >= limit)
|
| 66 |
+
{
|
| 67 |
+
ExtMove tmp = *p, *q;
|
| 68 |
+
*p = *++sortedEnd;
|
| 69 |
+
for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q)
|
| 70 |
+
*q = *(q - 1);
|
| 71 |
+
*q = tmp;
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
} // namespace
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
// Constructors of the MovePicker class. As arguments, we pass information
|
| 79 |
+
// to decide which class of moves to emit, to help sorting the (presumably)
|
| 80 |
+
// good moves first, and how important move ordering is at the current node.
|
| 81 |
+
|
| 82 |
+
// MovePicker constructor for the main search and for the quiescence search
|
| 83 |
+
MovePicker::MovePicker(const Position& p,
|
| 84 |
+
Move ttm,
|
| 85 |
+
Depth d,
|
| 86 |
+
const ButterflyHistory* mh,
|
| 87 |
+
const LowPlyHistory* lph,
|
| 88 |
+
const CapturePieceToHistory* cph,
|
| 89 |
+
const PieceToHistory** ch,
|
| 90 |
+
const SharedHistories* sh,
|
| 91 |
+
int pl) :
|
| 92 |
+
pos(p),
|
| 93 |
+
mainHistory(mh),
|
| 94 |
+
lowPlyHistory(lph),
|
| 95 |
+
captureHistory(cph),
|
| 96 |
+
continuationHistory(ch),
|
| 97 |
+
sharedHistory(sh),
|
| 98 |
+
ttMove(ttm),
|
| 99 |
+
depth(d),
|
| 100 |
+
ply(pl) {
|
| 101 |
+
|
| 102 |
+
if (pos.checkers())
|
| 103 |
+
stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm));
|
| 104 |
+
|
| 105 |
+
else
|
| 106 |
+
stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
// MovePicker constructor for ProbCut: we generate captures with Static Exchange
|
| 110 |
+
// Evaluation (SEE) greater than or equal to the given threshold.
|
| 111 |
+
MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
|
| 112 |
+
pos(p),
|
| 113 |
+
captureHistory(cph),
|
| 114 |
+
ttMove(ttm),
|
| 115 |
+
threshold(th) {
|
| 116 |
+
assert(!pos.checkers());
|
| 117 |
+
|
| 118 |
+
stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm));
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
// Assigns a numerical value to each move in a list, used for sorting.
|
| 122 |
+
// Captures are ordered by Most Valuable Victim (MVV), preferring captures
|
| 123 |
+
// with a good history. Quiets moves are ordered using the history tables.
|
| 124 |
+
template<GenType Type>
|
| 125 |
+
ExtMove* MovePicker::score(MoveList<Type>& ml) {
|
| 126 |
+
|
| 127 |
+
static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
|
| 128 |
+
|
| 129 |
+
Color us = pos.side_to_move();
|
| 130 |
+
|
| 131 |
+
[[maybe_unused]] Bitboard threatByLesser[KING + 1];
|
| 132 |
+
if constexpr (Type == QUIETS)
|
| 133 |
+
{
|
| 134 |
+
threatByLesser[PAWN] = 0;
|
| 135 |
+
threatByLesser[KNIGHT] = threatByLesser[BISHOP] = pos.attacks_by<PAWN>(~us);
|
| 136 |
+
threatByLesser[ROOK] =
|
| 137 |
+
pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatByLesser[KNIGHT];
|
| 138 |
+
threatByLesser[QUEEN] = pos.attacks_by<ROOK>(~us) | threatByLesser[ROOK];
|
| 139 |
+
threatByLesser[KING] = 0;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
ExtMove* it = cur;
|
| 143 |
+
for (auto move : ml)
|
| 144 |
+
{
|
| 145 |
+
ExtMove& m = *it++;
|
| 146 |
+
m = move;
|
| 147 |
+
|
| 148 |
+
const Square from = m.from_sq();
|
| 149 |
+
const Square to = m.to_sq();
|
| 150 |
+
const Piece pc = pos.moved_piece(m);
|
| 151 |
+
const PieceType pt = type_of(pc);
|
| 152 |
+
const Piece capturedPiece = pos.piece_on(to);
|
| 153 |
+
|
| 154 |
+
if constexpr (Type == CAPTURES)
|
| 155 |
+
m.value = (*captureHistory)[pc][to][type_of(capturedPiece)]
|
| 156 |
+
+ 7 * int(PieceValue[capturedPiece]);
|
| 157 |
+
|
| 158 |
+
else if constexpr (Type == QUIETS)
|
| 159 |
+
{
|
| 160 |
+
// histories
|
| 161 |
+
m.value = 2 * (*mainHistory)[us][m.raw()];
|
| 162 |
+
m.value += 2 * sharedHistory->pawn_entry(pos)[pc][to];
|
| 163 |
+
m.value += (*continuationHistory[0])[pc][to];
|
| 164 |
+
m.value += (*continuationHistory[1])[pc][to];
|
| 165 |
+
m.value += (*continuationHistory[2])[pc][to];
|
| 166 |
+
m.value += (*continuationHistory[3])[pc][to];
|
| 167 |
+
m.value += (*continuationHistory[5])[pc][to];
|
| 168 |
+
|
| 169 |
+
// bonus for checks
|
| 170 |
+
m.value += (bool(pos.check_squares(pt) & to) && pos.see_ge(m, -75)) * 16384;
|
| 171 |
+
|
| 172 |
+
// penalty for moving to a square threatened by a lesser piece
|
| 173 |
+
// or bonus for escaping an attack by a lesser piece.
|
| 174 |
+
int v = 20 * (bool(threatByLesser[pt] & from) - bool(threatByLesser[pt] & to));
|
| 175 |
+
m.value += PieceValue[pt] * v;
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
if (ply < LOW_PLY_HISTORY_SIZE)
|
| 179 |
+
m.value += 8 * (*lowPlyHistory)[ply][m.raw()] / (1 + ply);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
else // Type == EVASIONS
|
| 183 |
+
{
|
| 184 |
+
if (pos.capture_stage(m))
|
| 185 |
+
m.value = PieceValue[capturedPiece] + (1 << 28);
|
| 186 |
+
else
|
| 187 |
+
m.value = (*mainHistory)[us][m.raw()] + (*continuationHistory[0])[pc][to];
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
return it;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
// Returns the next move satisfying a predicate function.
|
| 194 |
+
// This never returns the TT move, as it was emitted before.
|
| 195 |
+
template<typename Pred>
|
| 196 |
+
Move MovePicker::select(Pred filter) {
|
| 197 |
+
|
| 198 |
+
for (; cur < endCur; ++cur)
|
| 199 |
+
if (*cur != ttMove && filter())
|
| 200 |
+
return *cur++;
|
| 201 |
+
|
| 202 |
+
return Move::none();
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
// This is the most important method of the MovePicker class. We emit one
|
| 206 |
+
// new pseudo-legal move on every call until there are no more moves left,
|
| 207 |
+
// picking the move with the highest score from a list of generated moves.
|
| 208 |
+
Move MovePicker::next_move() {
|
| 209 |
+
|
| 210 |
+
constexpr int goodQuietThreshold = -14000;
|
| 211 |
+
top:
|
| 212 |
+
switch (stage)
|
| 213 |
+
{
|
| 214 |
+
|
| 215 |
+
case MAIN_TT :
|
| 216 |
+
case EVASION_TT :
|
| 217 |
+
case QSEARCH_TT :
|
| 218 |
+
case PROBCUT_TT :
|
| 219 |
+
++stage;
|
| 220 |
+
return ttMove;
|
| 221 |
+
|
| 222 |
+
case CAPTURE_INIT :
|
| 223 |
+
case PROBCUT_INIT :
|
| 224 |
+
case QCAPTURE_INIT : {
|
| 225 |
+
MoveList<CAPTURES> ml(pos);
|
| 226 |
+
|
| 227 |
+
cur = endBadCaptures = moves;
|
| 228 |
+
endCur = endCaptures = score<CAPTURES>(ml);
|
| 229 |
+
|
| 230 |
+
partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
|
| 231 |
+
++stage;
|
| 232 |
+
goto top;
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
case GOOD_CAPTURE :
|
| 236 |
+
if (select([&]() {
|
| 237 |
+
if (pos.see_ge(*cur, -cur->value / 18))
|
| 238 |
+
return true;
|
| 239 |
+
std::swap(*endBadCaptures++, *cur);
|
| 240 |
+
return false;
|
| 241 |
+
}))
|
| 242 |
+
return *(cur - 1);
|
| 243 |
+
|
| 244 |
+
++stage;
|
| 245 |
+
[[fallthrough]];
|
| 246 |
+
|
| 247 |
+
case QUIET_INIT :
|
| 248 |
+
if (!skipQuiets)
|
| 249 |
+
{
|
| 250 |
+
MoveList<QUIETS> ml(pos);
|
| 251 |
+
|
| 252 |
+
endCur = endGenerated = score<QUIETS>(ml);
|
| 253 |
+
|
| 254 |
+
partial_insertion_sort(cur, endCur, -3560 * depth);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
++stage;
|
| 258 |
+
[[fallthrough]];
|
| 259 |
+
|
| 260 |
+
case GOOD_QUIET :
|
| 261 |
+
if (!skipQuiets && select([&]() { return cur->value > goodQuietThreshold; }))
|
| 262 |
+
return *(cur - 1);
|
| 263 |
+
|
| 264 |
+
// Prepare the pointers to loop over the bad captures
|
| 265 |
+
cur = moves;
|
| 266 |
+
endCur = endBadCaptures;
|
| 267 |
+
|
| 268 |
+
++stage;
|
| 269 |
+
[[fallthrough]];
|
| 270 |
+
|
| 271 |
+
case BAD_CAPTURE :
|
| 272 |
+
if (select([]() { return true; }))
|
| 273 |
+
return *(cur - 1);
|
| 274 |
+
|
| 275 |
+
// Prepare the pointers to loop over quiets again
|
| 276 |
+
cur = endCaptures;
|
| 277 |
+
endCur = endGenerated;
|
| 278 |
+
|
| 279 |
+
++stage;
|
| 280 |
+
[[fallthrough]];
|
| 281 |
+
|
| 282 |
+
case BAD_QUIET :
|
| 283 |
+
if (!skipQuiets)
|
| 284 |
+
return select([&]() { return cur->value <= goodQuietThreshold; });
|
| 285 |
+
|
| 286 |
+
return Move::none();
|
| 287 |
+
|
| 288 |
+
case EVASION_INIT : {
|
| 289 |
+
MoveList<EVASIONS> ml(pos);
|
| 290 |
+
|
| 291 |
+
cur = moves;
|
| 292 |
+
endCur = endGenerated = score<EVASIONS>(ml);
|
| 293 |
+
|
| 294 |
+
partial_insertion_sort(cur, endCur, std::numeric_limits<int>::min());
|
| 295 |
+
++stage;
|
| 296 |
+
[[fallthrough]];
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
case EVASION :
|
| 300 |
+
case QCAPTURE :
|
| 301 |
+
return select([]() { return true; });
|
| 302 |
+
|
| 303 |
+
case PROBCUT :
|
| 304 |
+
return select([&]() { return pos.see_ge(*cur, threshold); });
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
assert(false);
|
| 308 |
+
return Move::none(); // Silence warning
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
void MovePicker::skip_quiet_moves() { skipQuiets = true; }
|
| 312 |
+
|
| 313 |
+
} // namespace Stockfish
|
src/movepick.h
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef MOVEPICK_H_INCLUDED
|
| 20 |
+
#define MOVEPICK_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include "history.h"
|
| 23 |
+
#include "movegen.h"
|
| 24 |
+
#include "types.h"
|
| 25 |
+
|
| 26 |
+
namespace Stockfish {
|
| 27 |
+
|
| 28 |
+
class Position;
|
| 29 |
+
|
| 30 |
+
// The MovePicker class is used to pick one pseudo-legal move at a time from the
|
| 31 |
+
// current position. The most important method is next_move(), which emits one
|
| 32 |
+
// new pseudo-legal move on every call, until there are no moves left, when
|
| 33 |
+
// Move::none() is returned. In order to improve the efficiency of the alpha-beta
|
| 34 |
+
// algorithm, MovePicker attempts to return the moves which are most likely to get
|
| 35 |
+
// a cut-off first.
|
| 36 |
+
class MovePicker {
|
| 37 |
+
|
| 38 |
+
public:
|
| 39 |
+
MovePicker(const MovePicker&) = delete;
|
| 40 |
+
MovePicker& operator=(const MovePicker&) = delete;
|
| 41 |
+
MovePicker(const Position&,
|
| 42 |
+
Move,
|
| 43 |
+
Depth,
|
| 44 |
+
const ButterflyHistory*,
|
| 45 |
+
const LowPlyHistory*,
|
| 46 |
+
const CapturePieceToHistory*,
|
| 47 |
+
const PieceToHistory**,
|
| 48 |
+
const SharedHistories*,
|
| 49 |
+
int);
|
| 50 |
+
MovePicker(const Position&, Move, int, const CapturePieceToHistory*);
|
| 51 |
+
Move next_move();
|
| 52 |
+
void skip_quiet_moves();
|
| 53 |
+
|
| 54 |
+
private:
|
| 55 |
+
template<typename Pred>
|
| 56 |
+
Move select(Pred);
|
| 57 |
+
template<GenType T>
|
| 58 |
+
ExtMove* score(MoveList<T>&);
|
| 59 |
+
ExtMove* begin() { return cur; }
|
| 60 |
+
ExtMove* end() { return endCur; }
|
| 61 |
+
|
| 62 |
+
const Position& pos;
|
| 63 |
+
const ButterflyHistory* mainHistory;
|
| 64 |
+
const LowPlyHistory* lowPlyHistory;
|
| 65 |
+
const CapturePieceToHistory* captureHistory;
|
| 66 |
+
const PieceToHistory** continuationHistory;
|
| 67 |
+
const SharedHistories* sharedHistory;
|
| 68 |
+
Move ttMove;
|
| 69 |
+
ExtMove * cur, *endCur, *endBadCaptures, *endCaptures, *endGenerated;
|
| 70 |
+
int stage;
|
| 71 |
+
int threshold;
|
| 72 |
+
Depth depth;
|
| 73 |
+
int ply;
|
| 74 |
+
bool skipQuiets = false;
|
| 75 |
+
ExtMove moves[MAX_MOVES];
|
| 76 |
+
};
|
| 77 |
+
|
| 78 |
+
} // namespace Stockfish
|
| 79 |
+
|
| 80 |
+
#endif // #ifndef MOVEPICK_H_INCLUDED
|
src/nnue/features/full_threats.cpp
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
//Definition of input features FullThreats of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#include "full_threats.h"
|
| 22 |
+
|
| 23 |
+
#include <array>
|
| 24 |
+
#include <cstddef>
|
| 25 |
+
#include <cstdint>
|
| 26 |
+
#include <initializer_list>
|
| 27 |
+
#include <utility>
|
| 28 |
+
|
| 29 |
+
#include "../../bitboard.h"
|
| 30 |
+
#include "../../misc.h"
|
| 31 |
+
#include "../../position.h"
|
| 32 |
+
#include "../../types.h"
|
| 33 |
+
#include "../nnue_common.h"
|
| 34 |
+
|
| 35 |
+
namespace Stockfish::Eval::NNUE::Features {
|
| 36 |
+
|
| 37 |
+
struct HelperOffsets {
|
| 38 |
+
int cumulativePieceOffset, cumulativeOffset;
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
constexpr std::array<Piece, 12> AllPieces = {
|
| 42 |
+
W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
| 43 |
+
B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING,
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
template<PieceType PT>
|
| 47 |
+
constexpr auto make_piece_indices_type() {
|
| 48 |
+
static_assert(PT != PieceType::PAWN);
|
| 49 |
+
|
| 50 |
+
std::array<std::array<uint8_t, SQUARE_NB>, SQUARE_NB> out{};
|
| 51 |
+
|
| 52 |
+
for (Square from = SQ_A1; from <= SQ_H8; ++from)
|
| 53 |
+
{
|
| 54 |
+
Bitboard attacks = PseudoAttacks[PT][from];
|
| 55 |
+
|
| 56 |
+
for (Square to = SQ_A1; to <= SQ_H8; ++to)
|
| 57 |
+
{
|
| 58 |
+
out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks);
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
return out;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
template<Piece P>
|
| 66 |
+
constexpr auto make_piece_indices_piece() {
|
| 67 |
+
static_assert(type_of(P) == PieceType::PAWN);
|
| 68 |
+
|
| 69 |
+
std::array<std::array<uint8_t, SQUARE_NB>, SQUARE_NB> out{};
|
| 70 |
+
|
| 71 |
+
constexpr Color C = color_of(P);
|
| 72 |
+
|
| 73 |
+
for (Square from = SQ_A1; from <= SQ_H8; ++from)
|
| 74 |
+
{
|
| 75 |
+
Bitboard attacks = PseudoAttacks[C][from];
|
| 76 |
+
|
| 77 |
+
for (Square to = SQ_A1; to <= SQ_H8; ++to)
|
| 78 |
+
{
|
| 79 |
+
out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks);
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
return out;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
constexpr auto index_lut2_array() {
|
| 87 |
+
constexpr auto KNIGHT_ATTACKS = make_piece_indices_type<PieceType::KNIGHT>();
|
| 88 |
+
constexpr auto BISHOP_ATTACKS = make_piece_indices_type<PieceType::BISHOP>();
|
| 89 |
+
constexpr auto ROOK_ATTACKS = make_piece_indices_type<PieceType::ROOK>();
|
| 90 |
+
constexpr auto QUEEN_ATTACKS = make_piece_indices_type<PieceType::QUEEN>();
|
| 91 |
+
constexpr auto KING_ATTACKS = make_piece_indices_type<PieceType::KING>();
|
| 92 |
+
|
| 93 |
+
std::array<std::array<std::array<uint8_t, SQUARE_NB>, SQUARE_NB>, PIECE_NB> indices{};
|
| 94 |
+
|
| 95 |
+
indices[W_PAWN] = make_piece_indices_piece<W_PAWN>();
|
| 96 |
+
indices[B_PAWN] = make_piece_indices_piece<B_PAWN>();
|
| 97 |
+
|
| 98 |
+
indices[W_KNIGHT] = KNIGHT_ATTACKS;
|
| 99 |
+
indices[B_KNIGHT] = KNIGHT_ATTACKS;
|
| 100 |
+
|
| 101 |
+
indices[W_BISHOP] = BISHOP_ATTACKS;
|
| 102 |
+
indices[B_BISHOP] = BISHOP_ATTACKS;
|
| 103 |
+
|
| 104 |
+
indices[W_ROOK] = ROOK_ATTACKS;
|
| 105 |
+
indices[B_ROOK] = ROOK_ATTACKS;
|
| 106 |
+
|
| 107 |
+
indices[W_QUEEN] = QUEEN_ATTACKS;
|
| 108 |
+
indices[B_QUEEN] = QUEEN_ATTACKS;
|
| 109 |
+
|
| 110 |
+
indices[W_KING] = KING_ATTACKS;
|
| 111 |
+
indices[B_KING] = KING_ATTACKS;
|
| 112 |
+
|
| 113 |
+
return indices;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
constexpr auto init_threat_offsets() {
|
| 117 |
+
std::array<HelperOffsets, PIECE_NB> indices{};
|
| 118 |
+
std::array<std::array<IndexType, SQUARE_NB>, PIECE_NB> offsets{};
|
| 119 |
+
|
| 120 |
+
int cumulativeOffset = 0;
|
| 121 |
+
for (Piece piece : AllPieces)
|
| 122 |
+
{
|
| 123 |
+
int pieceIdx = piece;
|
| 124 |
+
int cumulativePieceOffset = 0;
|
| 125 |
+
|
| 126 |
+
for (Square from = SQ_A1; from <= SQ_H8; ++from)
|
| 127 |
+
{
|
| 128 |
+
offsets[pieceIdx][from] = cumulativePieceOffset;
|
| 129 |
+
|
| 130 |
+
if (type_of(piece) != PAWN)
|
| 131 |
+
{
|
| 132 |
+
Bitboard attacks = PseudoAttacks[type_of(piece)][from];
|
| 133 |
+
cumulativePieceOffset += constexpr_popcount(attacks);
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
else if (from >= SQ_A2 && from <= SQ_H7)
|
| 137 |
+
{
|
| 138 |
+
Bitboard attacks = (pieceIdx < 8) ? pawn_attacks_bb<WHITE>(square_bb(from))
|
| 139 |
+
: pawn_attacks_bb<BLACK>(square_bb(from));
|
| 140 |
+
cumulativePieceOffset += constexpr_popcount(attacks);
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
indices[pieceIdx] = {cumulativePieceOffset, cumulativeOffset};
|
| 145 |
+
|
| 146 |
+
cumulativeOffset += numValidTargets[pieceIdx] * cumulativePieceOffset;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
return std::pair{indices, offsets};
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
constexpr auto helper_offsets = init_threat_offsets().first;
|
| 153 |
+
// Lookup array for indexing threats
|
| 154 |
+
constexpr auto offsets = init_threat_offsets().second;
|
| 155 |
+
|
| 156 |
+
constexpr auto init_index_luts() {
|
| 157 |
+
std::array<std::array<std::array<uint32_t, 2>, PIECE_NB>, PIECE_NB> indices{};
|
| 158 |
+
|
| 159 |
+
for (Piece attacker : AllPieces)
|
| 160 |
+
{
|
| 161 |
+
for (Piece attacked : AllPieces)
|
| 162 |
+
{
|
| 163 |
+
bool enemy = (attacker ^ attacked) == 8;
|
| 164 |
+
PieceType attackerType = type_of(attacker);
|
| 165 |
+
PieceType attackedType = type_of(attacked);
|
| 166 |
+
|
| 167 |
+
int map = FullThreats::map[attackerType - 1][attackedType - 1];
|
| 168 |
+
bool semi_excluded = attackerType == attackedType && (enemy || attackerType != PAWN);
|
| 169 |
+
IndexType feature = helper_offsets[attacker].cumulativeOffset
|
| 170 |
+
+ (color_of(attacked) * (numValidTargets[attacker] / 2) + map)
|
| 171 |
+
* helper_offsets[attacker].cumulativePieceOffset;
|
| 172 |
+
|
| 173 |
+
bool excluded = map < 0;
|
| 174 |
+
indices[attacker][attacked][0] = excluded ? FullThreats::Dimensions : feature;
|
| 175 |
+
indices[attacker][attacked][1] =
|
| 176 |
+
excluded || semi_excluded ? FullThreats::Dimensions : feature;
|
| 177 |
+
}
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
return indices;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
// The final index is calculated from summing data found in these two LUTs, as well
|
| 184 |
+
// as offsets[attacker][from]
|
| 185 |
+
|
| 186 |
+
// [attacker][attacked][from < to]
|
| 187 |
+
constexpr auto index_lut1 = init_index_luts();
|
| 188 |
+
// [attacker][from][to]
|
| 189 |
+
constexpr auto index_lut2 = index_lut2_array();
|
| 190 |
+
|
| 191 |
+
// Index of a feature for a given king position and another piece on some square
|
| 192 |
+
inline sf_always_inline IndexType FullThreats::make_index(
|
| 193 |
+
Color perspective, Piece attacker, Square from, Square to, Piece attacked, Square ksq) {
|
| 194 |
+
const std::int8_t orientation = OrientTBL[ksq] ^ (56 * perspective);
|
| 195 |
+
unsigned from_oriented = uint8_t(from) ^ orientation;
|
| 196 |
+
unsigned to_oriented = uint8_t(to) ^ orientation;
|
| 197 |
+
|
| 198 |
+
std::int8_t swap = 8 * perspective;
|
| 199 |
+
unsigned attacker_oriented = attacker ^ swap;
|
| 200 |
+
unsigned attacked_oriented = attacked ^ swap;
|
| 201 |
+
|
| 202 |
+
return index_lut1[attacker_oriented][attacked_oriented][from_oriented < to_oriented]
|
| 203 |
+
+ offsets[attacker_oriented][from_oriented]
|
| 204 |
+
+ index_lut2[attacker_oriented][from_oriented][to_oriented];
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
// Get a list of indices for active features in ascending order
|
| 208 |
+
|
| 209 |
+
void FullThreats::append_active_indices(Color perspective, const Position& pos, IndexList& active) {
|
| 210 |
+
Square ksq = pos.square<KING>(perspective);
|
| 211 |
+
Bitboard occupied = pos.pieces();
|
| 212 |
+
|
| 213 |
+
for (Color color : {WHITE, BLACK})
|
| 214 |
+
{
|
| 215 |
+
for (PieceType pt = PAWN; pt < KING; ++pt)
|
| 216 |
+
{
|
| 217 |
+
Color c = Color(perspective ^ color);
|
| 218 |
+
Piece attacker = make_piece(c, pt);
|
| 219 |
+
Bitboard bb = pos.pieces(c, pt);
|
| 220 |
+
|
| 221 |
+
if (pt == PAWN)
|
| 222 |
+
{
|
| 223 |
+
auto right = (c == WHITE) ? NORTH_EAST : SOUTH_WEST;
|
| 224 |
+
auto left = (c == WHITE) ? NORTH_WEST : SOUTH_EAST;
|
| 225 |
+
auto attacks_left =
|
| 226 |
+
((c == WHITE) ? shift<NORTH_EAST>(bb) : shift<SOUTH_WEST>(bb)) & occupied;
|
| 227 |
+
auto attacks_right =
|
| 228 |
+
((c == WHITE) ? shift<NORTH_WEST>(bb) : shift<SOUTH_EAST>(bb)) & occupied;
|
| 229 |
+
|
| 230 |
+
while (attacks_left)
|
| 231 |
+
{
|
| 232 |
+
Square to = pop_lsb(attacks_left);
|
| 233 |
+
Square from = to - right;
|
| 234 |
+
Piece attacked = pos.piece_on(to);
|
| 235 |
+
IndexType index = make_index(perspective, attacker, from, to, attacked, ksq);
|
| 236 |
+
|
| 237 |
+
if (index < Dimensions)
|
| 238 |
+
active.push_back(index);
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
while (attacks_right)
|
| 242 |
+
{
|
| 243 |
+
Square to = pop_lsb(attacks_right);
|
| 244 |
+
Square from = to - left;
|
| 245 |
+
Piece attacked = pos.piece_on(to);
|
| 246 |
+
IndexType index = make_index(perspective, attacker, from, to, attacked, ksq);
|
| 247 |
+
|
| 248 |
+
if (index < Dimensions)
|
| 249 |
+
active.push_back(index);
|
| 250 |
+
}
|
| 251 |
+
}
|
| 252 |
+
else
|
| 253 |
+
{
|
| 254 |
+
while (bb)
|
| 255 |
+
{
|
| 256 |
+
Square from = pop_lsb(bb);
|
| 257 |
+
Bitboard attacks = (attacks_bb(pt, from, occupied)) & occupied;
|
| 258 |
+
|
| 259 |
+
while (attacks)
|
| 260 |
+
{
|
| 261 |
+
Square to = pop_lsb(attacks);
|
| 262 |
+
Piece attacked = pos.piece_on(to);
|
| 263 |
+
IndexType index =
|
| 264 |
+
make_index(perspective, attacker, from, to, attacked, ksq);
|
| 265 |
+
|
| 266 |
+
if (index < Dimensions)
|
| 267 |
+
active.push_back(index);
|
| 268 |
+
}
|
| 269 |
+
}
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
// Get a list of indices for recently changed features
|
| 276 |
+
|
| 277 |
+
void FullThreats::append_changed_indices(Color perspective,
|
| 278 |
+
Square ksq,
|
| 279 |
+
const DiffType& diff,
|
| 280 |
+
IndexList& removed,
|
| 281 |
+
IndexList& added,
|
| 282 |
+
FusedUpdateData* fusedData,
|
| 283 |
+
bool first,
|
| 284 |
+
const ThreatWeightType* prefetchBase,
|
| 285 |
+
IndexType prefetchStride) {
|
| 286 |
+
|
| 287 |
+
for (const auto& dirty : diff.list)
|
| 288 |
+
{
|
| 289 |
+
auto attacker = dirty.pc();
|
| 290 |
+
auto attacked = dirty.threatened_pc();
|
| 291 |
+
auto from = dirty.pc_sq();
|
| 292 |
+
auto to = dirty.threatened_sq();
|
| 293 |
+
auto add = dirty.add();
|
| 294 |
+
|
| 295 |
+
if (fusedData)
|
| 296 |
+
{
|
| 297 |
+
if (from == fusedData->dp2removed)
|
| 298 |
+
{
|
| 299 |
+
if (add)
|
| 300 |
+
{
|
| 301 |
+
if (first)
|
| 302 |
+
{
|
| 303 |
+
fusedData->dp2removedOriginBoard |= to;
|
| 304 |
+
continue;
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
else if (fusedData->dp2removedOriginBoard & to)
|
| 308 |
+
continue;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
if (to != SQ_NONE && to == fusedData->dp2removed)
|
| 312 |
+
{
|
| 313 |
+
if (add)
|
| 314 |
+
{
|
| 315 |
+
if (first)
|
| 316 |
+
{
|
| 317 |
+
fusedData->dp2removedTargetBoard |= from;
|
| 318 |
+
continue;
|
| 319 |
+
}
|
| 320 |
+
}
|
| 321 |
+
else if (fusedData->dp2removedTargetBoard & from)
|
| 322 |
+
continue;
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
auto& insert = add ? added : removed;
|
| 327 |
+
const IndexType index = make_index(perspective, attacker, from, to, attacked, ksq);
|
| 328 |
+
|
| 329 |
+
if (index < Dimensions)
|
| 330 |
+
{
|
| 331 |
+
if (prefetchBase)
|
| 332 |
+
prefetch<PrefetchRw::READ, PrefetchLoc::LOW>(
|
| 333 |
+
prefetchBase + static_cast<std::ptrdiff_t>(index) * prefetchStride);
|
| 334 |
+
insert.push_back(index);
|
| 335 |
+
}
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
bool FullThreats::requires_refresh(const DiffType& diff, Color perspective) {
|
| 340 |
+
return perspective == diff.us && (int8_t(diff.ksq) & 0b100) != (int8_t(diff.prevKsq) & 0b100);
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
} // namespace Stockfish::Eval::NNUE::Features
|
src/nnue/features/full_threats.h
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 5 |
+
it under the terms of the GNU General Public License as published by
|
| 6 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 7 |
+
(at your option) any later version.
|
| 8 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 9 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 10 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 11 |
+
GNU General Public License for more details.
|
| 12 |
+
You should have received a copy of the GNU General Public License
|
| 13 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 14 |
+
*/
|
| 15 |
+
|
| 16 |
+
//Definition of input features Simplified_Threats of NNUE evaluation function
|
| 17 |
+
|
| 18 |
+
#ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED
|
| 19 |
+
#define NNUE_FEATURES_FULL_THREATS_INCLUDED
|
| 20 |
+
|
| 21 |
+
#include <cstdint>
|
| 22 |
+
|
| 23 |
+
#include "../../misc.h"
|
| 24 |
+
#include "../../types.h"
|
| 25 |
+
#include "../nnue_common.h"
|
| 26 |
+
|
| 27 |
+
namespace Stockfish {
|
| 28 |
+
class Position;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
namespace Stockfish::Eval::NNUE::Features {
|
| 32 |
+
|
| 33 |
+
static constexpr int numValidTargets[PIECE_NB] = {0, 6, 10, 8, 8, 10, 0, 0,
|
| 34 |
+
0, 6, 10, 8, 8, 10, 0, 0};
|
| 35 |
+
|
| 36 |
+
class FullThreats {
|
| 37 |
+
public:
|
| 38 |
+
// Feature name
|
| 39 |
+
static constexpr const char* Name = "Full_Threats(Friend)";
|
| 40 |
+
|
| 41 |
+
// Hash value embedded in the evaluation file
|
| 42 |
+
static constexpr std::uint32_t HashValue = 0x8f234cb8u;
|
| 43 |
+
|
| 44 |
+
// Number of feature dimensions
|
| 45 |
+
static constexpr IndexType Dimensions = 60144;
|
| 46 |
+
|
| 47 |
+
// clang-format off
|
| 48 |
+
// Orient a square according to perspective (rotates by 180 for black)
|
| 49 |
+
static constexpr std::int8_t OrientTBL[SQUARE_NB] = {
|
| 50 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 51 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 52 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 53 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 54 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 55 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 56 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 57 |
+
SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1,
|
| 58 |
+
};
|
| 59 |
+
|
| 60 |
+
static constexpr int map[PIECE_TYPE_NB-2][PIECE_TYPE_NB-2] = {
|
| 61 |
+
{ 0, 1, -1, 2, -1, -1},
|
| 62 |
+
{ 0, 1, 2, 3, 4, -1},
|
| 63 |
+
{ 0, 1, 2, 3, -1, -1},
|
| 64 |
+
{ 0, 1, 2, 3, -1, -1},
|
| 65 |
+
{ 0, 1, 2, 3, 4, -1},
|
| 66 |
+
{-1, -1, -1, -1, -1, -1}
|
| 67 |
+
};
|
| 68 |
+
// clang-format on
|
| 69 |
+
|
| 70 |
+
struct FusedUpdateData {
|
| 71 |
+
Bitboard dp2removedOriginBoard = 0;
|
| 72 |
+
Bitboard dp2removedTargetBoard = 0;
|
| 73 |
+
|
| 74 |
+
Square dp2removed;
|
| 75 |
+
};
|
| 76 |
+
|
| 77 |
+
// Maximum number of simultaneously active features.
|
| 78 |
+
static constexpr IndexType MaxActiveDimensions = 128;
|
| 79 |
+
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
|
| 80 |
+
using DiffType = DirtyThreats;
|
| 81 |
+
|
| 82 |
+
static IndexType
|
| 83 |
+
make_index(Color perspective, Piece attkr, Square from, Square to, Piece attkd, Square ksq);
|
| 84 |
+
|
| 85 |
+
// Get a list of indices for active features
|
| 86 |
+
static void append_active_indices(Color perspective, const Position& pos, IndexList& active);
|
| 87 |
+
|
| 88 |
+
// Get a list of indices for recently changed features
|
| 89 |
+
static void append_changed_indices(Color perspective,
|
| 90 |
+
Square ksq,
|
| 91 |
+
const DiffType& diff,
|
| 92 |
+
IndexList& removed,
|
| 93 |
+
IndexList& added,
|
| 94 |
+
FusedUpdateData* fd = nullptr,
|
| 95 |
+
bool first = false,
|
| 96 |
+
const ThreatWeightType* prefetchBase = nullptr,
|
| 97 |
+
IndexType prefetchStride = 0);
|
| 98 |
+
|
| 99 |
+
// Returns whether the change stored in this DirtyPiece means
|
| 100 |
+
// that a full accumulator refresh is required.
|
| 101 |
+
static bool requires_refresh(const DiffType& diff, Color perspective);
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
} // namespace Stockfish::Eval::NNUE::Features
|
| 105 |
+
|
| 106 |
+
#endif // #ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED
|
src/nnue/features/half_ka_v2_hm.cpp
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
//Definition of input features HalfKAv2_hm of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#include "half_ka_v2_hm.h"
|
| 22 |
+
|
| 23 |
+
#include "../../bitboard.h"
|
| 24 |
+
#include "../../position.h"
|
| 25 |
+
#include "../../types.h"
|
| 26 |
+
#include "../nnue_common.h"
|
| 27 |
+
|
| 28 |
+
namespace Stockfish::Eval::NNUE::Features {
|
| 29 |
+
|
| 30 |
+
// Index of a feature for a given king position and another piece on some square
|
| 31 |
+
|
| 32 |
+
IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) {
|
| 33 |
+
const IndexType flip = 56 * perspective;
|
| 34 |
+
return (IndexType(s) ^ OrientTBL[ksq] ^ flip) + PieceSquareIndex[perspective][pc]
|
| 35 |
+
+ KingBuckets[int(ksq) ^ flip];
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
// Get a list of indices for active features
|
| 39 |
+
|
| 40 |
+
void HalfKAv2_hm::append_active_indices(Color perspective, const Position& pos, IndexList& active) {
|
| 41 |
+
Square ksq = pos.square<KING>(perspective);
|
| 42 |
+
Bitboard bb = pos.pieces();
|
| 43 |
+
while (bb)
|
| 44 |
+
{
|
| 45 |
+
Square s = pop_lsb(bb);
|
| 46 |
+
active.push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// Get a list of indices for recently changed features
|
| 51 |
+
|
| 52 |
+
void HalfKAv2_hm::append_changed_indices(
|
| 53 |
+
Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added) {
|
| 54 |
+
removed.push_back(make_index(perspective, diff.from, diff.pc, ksq));
|
| 55 |
+
if (diff.to != SQ_NONE)
|
| 56 |
+
added.push_back(make_index(perspective, diff.to, diff.pc, ksq));
|
| 57 |
+
|
| 58 |
+
if (diff.remove_sq != SQ_NONE)
|
| 59 |
+
removed.push_back(make_index(perspective, diff.remove_sq, diff.remove_pc, ksq));
|
| 60 |
+
|
| 61 |
+
if (diff.add_sq != SQ_NONE)
|
| 62 |
+
added.push_back(make_index(perspective, diff.add_sq, diff.add_pc, ksq));
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
bool HalfKAv2_hm::requires_refresh(const DiffType& diff, Color perspective) {
|
| 66 |
+
return diff.pc == make_piece(perspective, KING);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
} // namespace Stockfish::Eval::NNUE::Features
|
src/nnue/features/half_ka_v2_hm.h
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
//Definition of input features HalfKP of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
| 22 |
+
#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <cstdint>
|
| 25 |
+
|
| 26 |
+
#include "../../misc.h"
|
| 27 |
+
#include "../../types.h"
|
| 28 |
+
#include "../nnue_common.h"
|
| 29 |
+
|
| 30 |
+
namespace Stockfish {
|
| 31 |
+
class Position;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
namespace Stockfish::Eval::NNUE::Features {
|
| 35 |
+
|
| 36 |
+
// Feature HalfKAv2_hm: Combination of the position of own king and the
|
| 37 |
+
// position of pieces. Position mirrored such that king is always on e..h files.
|
| 38 |
+
class HalfKAv2_hm {
|
| 39 |
+
|
| 40 |
+
// Unique number for each piece type on each square
|
| 41 |
+
enum {
|
| 42 |
+
PS_NONE = 0,
|
| 43 |
+
PS_W_PAWN = 0,
|
| 44 |
+
PS_B_PAWN = 1 * SQUARE_NB,
|
| 45 |
+
PS_W_KNIGHT = 2 * SQUARE_NB,
|
| 46 |
+
PS_B_KNIGHT = 3 * SQUARE_NB,
|
| 47 |
+
PS_W_BISHOP = 4 * SQUARE_NB,
|
| 48 |
+
PS_B_BISHOP = 5 * SQUARE_NB,
|
| 49 |
+
PS_W_ROOK = 6 * SQUARE_NB,
|
| 50 |
+
PS_B_ROOK = 7 * SQUARE_NB,
|
| 51 |
+
PS_W_QUEEN = 8 * SQUARE_NB,
|
| 52 |
+
PS_B_QUEEN = 9 * SQUARE_NB,
|
| 53 |
+
PS_KING = 10 * SQUARE_NB,
|
| 54 |
+
PS_NB = 11 * SQUARE_NB
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
|
| 58 |
+
// Convention: W - us, B - them
|
| 59 |
+
// Viewed from other side, W and B are reversed
|
| 60 |
+
{PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
|
| 61 |
+
PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE},
|
| 62 |
+
{PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
|
| 63 |
+
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
|
| 64 |
+
|
| 65 |
+
public:
|
| 66 |
+
// Feature name
|
| 67 |
+
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
|
| 68 |
+
|
| 69 |
+
// Hash value embedded in the evaluation file
|
| 70 |
+
static constexpr std::uint32_t HashValue = 0x7f234cb8u;
|
| 71 |
+
|
| 72 |
+
// Number of feature dimensions
|
| 73 |
+
static constexpr IndexType Dimensions =
|
| 74 |
+
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
|
| 75 |
+
|
| 76 |
+
#define B(v) (v * PS_NB)
|
| 77 |
+
// clang-format off
|
| 78 |
+
static constexpr IndexType KingBuckets[SQUARE_NB] = {
|
| 79 |
+
B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28),
|
| 80 |
+
B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24),
|
| 81 |
+
B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20),
|
| 82 |
+
B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16),
|
| 83 |
+
B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12),
|
| 84 |
+
B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8),
|
| 85 |
+
B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4),
|
| 86 |
+
B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0),
|
| 87 |
+
};
|
| 88 |
+
// clang-format on
|
| 89 |
+
#undef B
|
| 90 |
+
// clang-format off
|
| 91 |
+
// Orient a square according to perspective (rotates by 180 for black)
|
| 92 |
+
static constexpr IndexType OrientTBL[SQUARE_NB] = {
|
| 93 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 94 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 95 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 96 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 97 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 98 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 99 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
|
| 100 |
+
SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 ,
|
| 101 |
+
};
|
| 102 |
+
// clang-format on
|
| 103 |
+
|
| 104 |
+
// Maximum number of simultaneously active features.
|
| 105 |
+
static constexpr IndexType MaxActiveDimensions = 32;
|
| 106 |
+
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
|
| 107 |
+
using DiffType = DirtyPiece;
|
| 108 |
+
|
| 109 |
+
// Index of a feature for a given king position and another piece on some square
|
| 110 |
+
|
| 111 |
+
static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq);
|
| 112 |
+
|
| 113 |
+
// Get a list of indices for active features
|
| 114 |
+
|
| 115 |
+
static void append_active_indices(Color perspective, const Position& pos, IndexList& active);
|
| 116 |
+
|
| 117 |
+
// Get a list of indices for recently changed features
|
| 118 |
+
static void append_changed_indices(
|
| 119 |
+
Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added);
|
| 120 |
+
|
| 121 |
+
// Returns whether the change stored in this DirtyPiece means
|
| 122 |
+
// that a full accumulator refresh is required.
|
| 123 |
+
static bool requires_refresh(const DiffType& diff, Color perspective);
|
| 124 |
+
};
|
| 125 |
+
|
| 126 |
+
} // namespace Stockfish::Eval::NNUE::Features
|
| 127 |
+
|
| 128 |
+
#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
|
src/nnue/layers/affine_transform.h
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Definition of layer AffineTransform of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
|
| 22 |
+
#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <cstdint>
|
| 25 |
+
#include <iostream>
|
| 26 |
+
|
| 27 |
+
#include "../../memory.h"
|
| 28 |
+
#include "../nnue_common.h"
|
| 29 |
+
#include "../simd.h"
|
| 30 |
+
|
| 31 |
+
/*
|
| 32 |
+
This file contains the definition for a fully connected layer (aka affine transform).
|
| 33 |
+
|
| 34 |
+
- expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
|
| 35 |
+
- that's why AVX512 is hard to implement
|
| 36 |
+
- expected use-case is small layers
|
| 37 |
+
- inputs are processed in chunks of 4, weights are respectively transposed
|
| 38 |
+
- accumulation happens directly to int32s
|
| 39 |
+
*/
|
| 40 |
+
|
| 41 |
+
namespace Stockfish::Eval::NNUE::Layers {
|
| 42 |
+
|
| 43 |
+
#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD)
|
| 44 |
+
#define ENABLE_SEQ_OPT
|
| 45 |
+
#endif
|
| 46 |
+
|
| 47 |
+
// Fallback implementation for older/other architectures.
|
| 48 |
+
// Requires the input to be padded to at least 16 values.
|
| 49 |
+
#ifndef ENABLE_SEQ_OPT
|
| 50 |
+
|
| 51 |
+
template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
|
| 52 |
+
static void affine_transform_non_ssse3(std::int32_t* output,
|
| 53 |
+
const std::int8_t* weights,
|
| 54 |
+
const std::int32_t* biases,
|
| 55 |
+
const std::uint8_t* input) {
|
| 56 |
+
#if defined(USE_SSE2) || defined(USE_NEON)
|
| 57 |
+
#if defined(USE_SSE2)
|
| 58 |
+
// At least a multiple of 16, with SSE2.
|
| 59 |
+
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
| 60 |
+
const __m128i Zeros = _mm_setzero_si128();
|
| 61 |
+
const auto inputVector = reinterpret_cast<const __m128i*>(input);
|
| 62 |
+
|
| 63 |
+
#elif defined(USE_NEON)
|
| 64 |
+
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
|
| 65 |
+
const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
|
| 66 |
+
#endif
|
| 67 |
+
|
| 68 |
+
for (IndexType i = 0; i < OutputDimensions; ++i)
|
| 69 |
+
{
|
| 70 |
+
const IndexType offset = i * PaddedInputDimensions;
|
| 71 |
+
|
| 72 |
+
#if defined(USE_SSE2)
|
| 73 |
+
__m128i sumLo = _mm_cvtsi32_si128(biases[i]);
|
| 74 |
+
__m128i sumHi = Zeros;
|
| 75 |
+
const auto row = reinterpret_cast<const __m128i*>(&weights[offset]);
|
| 76 |
+
for (IndexType j = 0; j < NumChunks; ++j)
|
| 77 |
+
{
|
| 78 |
+
__m128i row_j = _mm_load_si128(&row[j]);
|
| 79 |
+
__m128i input_j = _mm_load_si128(&inputVector[j]);
|
| 80 |
+
__m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
|
| 81 |
+
__m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
|
| 82 |
+
__m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
|
| 83 |
+
__m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
|
| 84 |
+
__m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo);
|
| 85 |
+
__m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi);
|
| 86 |
+
sumLo = _mm_add_epi32(sumLo, productLo);
|
| 87 |
+
sumHi = _mm_add_epi32(sumHi, productHi);
|
| 88 |
+
}
|
| 89 |
+
__m128i sum = _mm_add_epi32(sumLo, sumHi);
|
| 90 |
+
__m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
| 91 |
+
sum = _mm_add_epi32(sum, sumHigh_64);
|
| 92 |
+
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
|
| 93 |
+
sum = _mm_add_epi32(sum, sum_second_32);
|
| 94 |
+
output[i] = _mm_cvtsi128_si32(sum);
|
| 95 |
+
|
| 96 |
+
#elif defined(USE_NEON)
|
| 97 |
+
|
| 98 |
+
int32x4_t sum = {biases[i]};
|
| 99 |
+
const auto row = reinterpret_cast<const SIMD::vec_i8x8_t*>(&weights[offset]);
|
| 100 |
+
for (IndexType j = 0; j < NumChunks; ++j)
|
| 101 |
+
{
|
| 102 |
+
int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
|
| 103 |
+
product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
|
| 104 |
+
sum = vpadalq_s16(sum, product);
|
| 105 |
+
}
|
| 106 |
+
output[i] = SIMD::neon_m128_reduce_add_epi32(sum);
|
| 107 |
+
|
| 108 |
+
#endif
|
| 109 |
+
}
|
| 110 |
+
#else
|
| 111 |
+
std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions);
|
| 112 |
+
|
| 113 |
+
// Traverse weights in transpose order to take advantage of input sparsity
|
| 114 |
+
for (IndexType i = 0; i < InputDimensions; ++i)
|
| 115 |
+
if (input[i])
|
| 116 |
+
{
|
| 117 |
+
const std::int8_t* w = &weights[i];
|
| 118 |
+
const int in = input[i];
|
| 119 |
+
for (IndexType j = 0; j < OutputDimensions; ++j)
|
| 120 |
+
output[j] += w[j * PaddedInputDimensions] * in;
|
| 121 |
+
}
|
| 122 |
+
#endif
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
#endif // !ENABLE_SEQ_OPT
|
| 126 |
+
|
| 127 |
+
template<IndexType InDims, IndexType OutDims>
|
| 128 |
+
class AffineTransform {
|
| 129 |
+
public:
|
| 130 |
+
// Input/output type
|
| 131 |
+
using InputType = std::uint8_t;
|
| 132 |
+
using OutputType = std::int32_t;
|
| 133 |
+
|
| 134 |
+
// Number of input/output dimensions
|
| 135 |
+
static constexpr IndexType InputDimensions = InDims;
|
| 136 |
+
static constexpr IndexType OutputDimensions = OutDims;
|
| 137 |
+
|
| 138 |
+
static constexpr IndexType PaddedInputDimensions =
|
| 139 |
+
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
| 140 |
+
static constexpr IndexType PaddedOutputDimensions =
|
| 141 |
+
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
|
| 142 |
+
|
| 143 |
+
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
| 144 |
+
|
| 145 |
+
// Hash value embedded in the evaluation file
|
| 146 |
+
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
| 147 |
+
std::uint32_t hashValue = 0xCC03DAE4u;
|
| 148 |
+
hashValue += OutputDimensions;
|
| 149 |
+
hashValue ^= prevHash >> 1;
|
| 150 |
+
hashValue ^= prevHash << 31;
|
| 151 |
+
return hashValue;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
static constexpr IndexType get_weight_index_scrambled(IndexType i) {
|
| 155 |
+
return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4
|
| 156 |
+
+ i / PaddedInputDimensions * 4 + i % 4;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
static constexpr IndexType get_weight_index(IndexType i) {
|
| 160 |
+
#ifdef ENABLE_SEQ_OPT
|
| 161 |
+
return get_weight_index_scrambled(i);
|
| 162 |
+
#else
|
| 163 |
+
return i;
|
| 164 |
+
#endif
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
// Read network parameters
|
| 168 |
+
bool read_parameters(std::istream& stream) {
|
| 169 |
+
read_little_endian<BiasType>(stream, biases, OutputDimensions);
|
| 170 |
+
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
| 171 |
+
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
| 172 |
+
|
| 173 |
+
return !stream.fail();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
// Write network parameters
|
| 177 |
+
bool write_parameters(std::ostream& stream) const {
|
| 178 |
+
write_little_endian<BiasType>(stream, biases, OutputDimensions);
|
| 179 |
+
|
| 180 |
+
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
| 181 |
+
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
| 182 |
+
|
| 183 |
+
return !stream.fail();
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
std::size_t get_content_hash() const {
|
| 187 |
+
std::size_t h = 0;
|
| 188 |
+
hash_combine(h, get_raw_data_hash(biases));
|
| 189 |
+
hash_combine(h, get_raw_data_hash(weights));
|
| 190 |
+
hash_combine(h, get_hash_value(0));
|
| 191 |
+
return h;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
// Forward propagation
|
| 195 |
+
void propagate(const InputType* input, OutputType* output) const {
|
| 196 |
+
|
| 197 |
+
#ifdef ENABLE_SEQ_OPT
|
| 198 |
+
|
| 199 |
+
if constexpr (OutputDimensions > 1)
|
| 200 |
+
{
|
| 201 |
+
#if defined(USE_AVX512)
|
| 202 |
+
using vec_t = __m512i;
|
| 203 |
+
#define vec_set_32 _mm512_set1_epi32
|
| 204 |
+
#define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
|
| 205 |
+
#elif defined(USE_AVX2)
|
| 206 |
+
using vec_t = __m256i;
|
| 207 |
+
#define vec_set_32 _mm256_set1_epi32
|
| 208 |
+
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
| 209 |
+
#elif defined(USE_SSSE3)
|
| 210 |
+
using vec_t = __m128i;
|
| 211 |
+
#define vec_set_32 _mm_set1_epi32
|
| 212 |
+
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
| 213 |
+
#elif defined(USE_NEON_DOTPROD)
|
| 214 |
+
using vec_t = int32x4_t;
|
| 215 |
+
#define vec_set_32 vdupq_n_s32
|
| 216 |
+
#define vec_add_dpbusd_32(acc, a, b) \
|
| 217 |
+
SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
| 218 |
+
vreinterpretq_s8_s32(b))
|
| 219 |
+
#endif
|
| 220 |
+
|
| 221 |
+
static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
|
| 222 |
+
|
| 223 |
+
static_assert(OutputDimensions % OutputSimdWidth == 0);
|
| 224 |
+
|
| 225 |
+
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
|
| 226 |
+
constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
|
| 227 |
+
|
| 228 |
+
const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
|
| 229 |
+
vec_t acc[NumRegs];
|
| 230 |
+
for (IndexType k = 0; k < NumRegs; ++k)
|
| 231 |
+
acc[k] = biasvec[k];
|
| 232 |
+
|
| 233 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 234 |
+
{
|
| 235 |
+
const vec_t in0 =
|
| 236 |
+
vec_set_32(load_as<std::int32_t>(input + i * sizeof(std::int32_t)));
|
| 237 |
+
const auto col0 =
|
| 238 |
+
reinterpret_cast<const vec_t*>(&weights[i * OutputDimensions * 4]);
|
| 239 |
+
|
| 240 |
+
for (IndexType k = 0; k < NumRegs; ++k)
|
| 241 |
+
vec_add_dpbusd_32(acc[k], in0, col0[k]);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
vec_t* outptr = reinterpret_cast<vec_t*>(output);
|
| 245 |
+
for (IndexType k = 0; k < NumRegs; ++k)
|
| 246 |
+
outptr[k] = acc[k];
|
| 247 |
+
|
| 248 |
+
#undef vec_set_32
|
| 249 |
+
#undef vec_add_dpbusd_32
|
| 250 |
+
}
|
| 251 |
+
else if constexpr (OutputDimensions == 1)
|
| 252 |
+
{
|
| 253 |
+
// We cannot use AVX512 for the last layer because there are only 32 inputs
|
| 254 |
+
// and the buffer is not padded to 64 elements.
|
| 255 |
+
#if defined(USE_AVX2)
|
| 256 |
+
using vec_t = __m256i;
|
| 257 |
+
#define vec_setzero() _mm256_setzero_si256()
|
| 258 |
+
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
| 259 |
+
#define vec_hadd SIMD::m256_hadd
|
| 260 |
+
#elif defined(USE_SSSE3)
|
| 261 |
+
using vec_t = __m128i;
|
| 262 |
+
#define vec_setzero() _mm_setzero_si128()
|
| 263 |
+
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
| 264 |
+
#define vec_hadd SIMD::m128_hadd
|
| 265 |
+
#elif defined(USE_NEON_DOTPROD)
|
| 266 |
+
using vec_t = int32x4_t;
|
| 267 |
+
#define vec_setzero() vdupq_n_s32(0)
|
| 268 |
+
#define vec_add_dpbusd_32(acc, a, b) \
|
| 269 |
+
SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
|
| 270 |
+
vreinterpretq_s8_s32(b))
|
| 271 |
+
#define vec_hadd SIMD::neon_m128_hadd
|
| 272 |
+
#endif
|
| 273 |
+
|
| 274 |
+
const auto inputVector = reinterpret_cast<const vec_t*>(input);
|
| 275 |
+
|
| 276 |
+
static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType);
|
| 277 |
+
|
| 278 |
+
static_assert(PaddedInputDimensions % InputSimdWidth == 0);
|
| 279 |
+
|
| 280 |
+
constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth;
|
| 281 |
+
vec_t sum0 = vec_setzero();
|
| 282 |
+
const auto row0 = reinterpret_cast<const vec_t*>(&weights[0]);
|
| 283 |
+
|
| 284 |
+
for (int j = 0; j < int(NumChunks); ++j)
|
| 285 |
+
{
|
| 286 |
+
const vec_t in = inputVector[j];
|
| 287 |
+
vec_add_dpbusd_32(sum0, in, row0[j]);
|
| 288 |
+
}
|
| 289 |
+
output[0] = vec_hadd(sum0, biases[0]);
|
| 290 |
+
|
| 291 |
+
#undef vec_setzero
|
| 292 |
+
#undef vec_add_dpbusd_32
|
| 293 |
+
#undef vec_hadd
|
| 294 |
+
}
|
| 295 |
+
#else
|
| 296 |
+
// Use old implementation for the other architectures.
|
| 297 |
+
affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
|
| 298 |
+
output, weights, biases, input);
|
| 299 |
+
#endif
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
private:
|
| 303 |
+
using BiasType = OutputType;
|
| 304 |
+
using WeightType = std::int8_t;
|
| 305 |
+
|
| 306 |
+
alignas(CacheLineSize) BiasType biases[OutputDimensions];
|
| 307 |
+
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
|
| 308 |
+
};
|
| 309 |
+
|
| 310 |
+
} // namespace Stockfish::Eval::NNUE::Layers
|
| 311 |
+
|
| 312 |
+
#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
|
src/nnue/layers/affine_transform_sparse_input.h
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Definition of layer AffineTransformSparseInput of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
|
| 22 |
+
#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cstddef>
|
| 26 |
+
#include <cstdint>
|
| 27 |
+
#include <iostream>
|
| 28 |
+
|
| 29 |
+
#include "../../bitboard.h"
|
| 30 |
+
#include "../../memory.h"
|
| 31 |
+
#include "../simd.h"
|
| 32 |
+
#include "../nnue_common.h"
|
| 33 |
+
|
| 34 |
+
/*
|
| 35 |
+
This file contains the definition for a fully connected layer (aka affine transform) with block sparse input.
|
| 36 |
+
*/
|
| 37 |
+
|
| 38 |
+
namespace Stockfish::Eval::NNUE::Layers {
|
| 39 |
+
|
| 40 |
+
#if (USE_SSSE3 | (USE_NEON >= 8))
|
| 41 |
+
static constexpr int lsb_index64[64] = {
|
| 42 |
+
0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61, 54, 58, 35, 52, 50, 42,
|
| 43 |
+
21, 44, 38, 32, 29, 23, 17, 11, 4, 62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43,
|
| 44 |
+
31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63};
|
| 45 |
+
|
| 46 |
+
constexpr int constexpr_lsb(uint64_t bb) {
|
| 47 |
+
assert(bb != 0);
|
| 48 |
+
constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL;
|
| 49 |
+
return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58];
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
alignas(CacheLineSize) static constexpr struct OffsetIndices {
|
| 53 |
+
|
| 54 |
+
std::uint16_t offset_indices[256][8];
|
| 55 |
+
|
| 56 |
+
constexpr OffsetIndices() :
|
| 57 |
+
offset_indices() {
|
| 58 |
+
for (int i = 0; i < 256; ++i)
|
| 59 |
+
{
|
| 60 |
+
std::uint64_t j = i, k = 0;
|
| 61 |
+
while (j)
|
| 62 |
+
{
|
| 63 |
+
offset_indices[i][k++] = constexpr_lsb(j);
|
| 64 |
+
j &= j - 1;
|
| 65 |
+
}
|
| 66 |
+
while (k < 8)
|
| 67 |
+
offset_indices[i][k++] = 0;
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
} Lookup;
|
| 72 |
+
|
| 73 |
+
#if defined(__GNUC__) || defined(__clang__)
|
| 74 |
+
#define RESTRICT __restrict__
|
| 75 |
+
#elif defined(_MSC_VER)
|
| 76 |
+
#define RESTRICT __restrict
|
| 77 |
+
#else
|
| 78 |
+
#define RESTRICT
|
| 79 |
+
#endif
|
| 80 |
+
|
| 81 |
+
// Find indices of nonzero 32-bit values in a packed byte buffer.
|
| 82 |
+
// The input pointer addresses a sequence of 32-bit blocks stored in a
|
| 83 |
+
// std::uint8_t array.
|
| 84 |
+
template<const IndexType InputDimensions>
|
| 85 |
+
void find_nnz(const std::uint8_t* RESTRICT input,
|
| 86 |
+
std::uint16_t* RESTRICT out,
|
| 87 |
+
IndexType& count_out) {
|
| 88 |
+
|
| 89 |
+
#if defined(USE_AVX512ICL)
|
| 90 |
+
|
| 91 |
+
constexpr IndexType SimdWidthIn = 64; // 512 bits
|
| 92 |
+
constexpr IndexType SimdWidthOut = 32; // 512 bits / 16 bits
|
| 93 |
+
constexpr IndexType NumChunks = InputDimensions / SimdWidthOut;
|
| 94 |
+
const __m512i increment = _mm512_set1_epi16(SimdWidthOut);
|
| 95 |
+
__m512i base = _mm512_set_epi16( // Same permute order as _mm512_packus_epi32()
|
| 96 |
+
31, 30, 29, 28, 15, 14, 13, 12, 27, 26, 25, 24, 11, 10, 9, 8, 23, 22, 21, 20, 7, 6, 5, 4, 19,
|
| 97 |
+
18, 17, 16, 3, 2, 1, 0);
|
| 98 |
+
|
| 99 |
+
IndexType count = 0;
|
| 100 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 101 |
+
{
|
| 102 |
+
const __m512i inputV0 = _mm512_load_si512(input + i * 2 * SimdWidthIn);
|
| 103 |
+
const __m512i inputV1 = _mm512_load_si512(input + i * 2 * SimdWidthIn + SimdWidthIn);
|
| 104 |
+
|
| 105 |
+
// Get a bitmask and gather non zero indices
|
| 106 |
+
const __m512i inputV01 = _mm512_packus_epi32(inputV0, inputV1);
|
| 107 |
+
const __mmask32 nnzMask = _mm512_test_epi16_mask(inputV01, inputV01);
|
| 108 |
+
|
| 109 |
+
// Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4
|
| 110 |
+
__m512i nnz = _mm512_maskz_compress_epi16(nnzMask, base);
|
| 111 |
+
_mm512_storeu_si512(out + count, nnz);
|
| 112 |
+
|
| 113 |
+
count += popcount(nnzMask);
|
| 114 |
+
base = _mm512_add_epi16(base, increment);
|
| 115 |
+
}
|
| 116 |
+
count_out = count;
|
| 117 |
+
|
| 118 |
+
#elif defined(USE_AVX512)
|
| 119 |
+
|
| 120 |
+
constexpr IndexType SimdWidth = 16; // 512 bits / 32 bits
|
| 121 |
+
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
| 122 |
+
const __m512i increment = _mm512_set1_epi32(SimdWidth);
|
| 123 |
+
__m512i base = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
| 124 |
+
|
| 125 |
+
IndexType count = 0;
|
| 126 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 127 |
+
{
|
| 128 |
+
const __m512i inputV = _mm512_load_si512(input + i * SimdWidth * sizeof(std::uint32_t));
|
| 129 |
+
|
| 130 |
+
// Get a bitmask and gather non zero indices
|
| 131 |
+
const __mmask16 nnzMask = _mm512_test_epi32_mask(inputV, inputV);
|
| 132 |
+
const __m512i nnzV = _mm512_maskz_compress_epi32(nnzMask, base);
|
| 133 |
+
_mm512_mask_cvtepi32_storeu_epi16(out + count, 0xFFFF, nnzV);
|
| 134 |
+
count += popcount(nnzMask);
|
| 135 |
+
base = _mm512_add_epi32(base, increment);
|
| 136 |
+
}
|
| 137 |
+
count_out = count;
|
| 138 |
+
|
| 139 |
+
#else
|
| 140 |
+
|
| 141 |
+
using namespace SIMD;
|
| 142 |
+
|
| 143 |
+
constexpr IndexType InputSimdWidth = sizeof(vec_uint_t) / sizeof(std::int32_t);
|
| 144 |
+
// Outputs are processed 8 elements at a time, even if the SIMD width is narrower
|
| 145 |
+
constexpr IndexType ChunkSize = 8;
|
| 146 |
+
constexpr IndexType NumChunks = InputDimensions / ChunkSize;
|
| 147 |
+
constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth;
|
| 148 |
+
|
| 149 |
+
static_assert(InputsPerChunk > 0 && "SIMD width too wide");
|
| 150 |
+
|
| 151 |
+
const auto inputVector = reinterpret_cast<const vec_uint_t*>(input);
|
| 152 |
+
IndexType count = 0;
|
| 153 |
+
vec128_t base = vec128_zero;
|
| 154 |
+
const vec128_t increment = vec128_set_16(8);
|
| 155 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 156 |
+
{
|
| 157 |
+
// bitmask of nonzero values in this chunk
|
| 158 |
+
unsigned nnz = 0;
|
| 159 |
+
for (IndexType j = 0; j < InputsPerChunk; ++j)
|
| 160 |
+
{
|
| 161 |
+
const vec_uint_t inputChunk = inputVector[i * InputsPerChunk + j];
|
| 162 |
+
nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth);
|
| 163 |
+
}
|
| 164 |
+
const vec128_t offsets =
|
| 165 |
+
vec128_load(reinterpret_cast<const vec128_t*>(&Lookup.offset_indices[nnz]));
|
| 166 |
+
vec128_storeu(reinterpret_cast<vec128_t*>(out + count), vec128_add(base, offsets));
|
| 167 |
+
count += popcount(nnz);
|
| 168 |
+
base = vec128_add(base, increment);
|
| 169 |
+
}
|
| 170 |
+
count_out = count;
|
| 171 |
+
#endif
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
#endif
|
| 175 |
+
|
| 176 |
+
// Sparse input implementation
|
| 177 |
+
template<IndexType InDims, IndexType OutDims>
|
| 178 |
+
class AffineTransformSparseInput {
|
| 179 |
+
public:
|
| 180 |
+
// Input/output type
|
| 181 |
+
using InputType = std::uint8_t;
|
| 182 |
+
using OutputType = std::int32_t;
|
| 183 |
+
|
| 184 |
+
// Number of input/output dimensions
|
| 185 |
+
static constexpr IndexType InputDimensions = InDims;
|
| 186 |
+
static constexpr IndexType OutputDimensions = OutDims;
|
| 187 |
+
|
| 188 |
+
static_assert(OutputDimensions % 16 == 0,
|
| 189 |
+
"Only implemented for OutputDimensions divisible by 16.");
|
| 190 |
+
|
| 191 |
+
static constexpr IndexType PaddedInputDimensions =
|
| 192 |
+
ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
|
| 193 |
+
static constexpr IndexType PaddedOutputDimensions =
|
| 194 |
+
ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
|
| 195 |
+
|
| 196 |
+
#if (USE_SSSE3 | (USE_NEON >= 8))
|
| 197 |
+
static constexpr IndexType ChunkSize = 4;
|
| 198 |
+
#else
|
| 199 |
+
static constexpr IndexType ChunkSize = 1;
|
| 200 |
+
#endif
|
| 201 |
+
|
| 202 |
+
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
| 203 |
+
|
| 204 |
+
// Hash value embedded in the evaluation file
|
| 205 |
+
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
| 206 |
+
std::uint32_t hashValue = 0xCC03DAE4u;
|
| 207 |
+
hashValue += OutputDimensions;
|
| 208 |
+
hashValue ^= prevHash >> 1;
|
| 209 |
+
hashValue ^= prevHash << 31;
|
| 210 |
+
return hashValue;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
static constexpr IndexType get_weight_index_scrambled(IndexType i) {
|
| 214 |
+
return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize
|
| 215 |
+
+ i / PaddedInputDimensions * ChunkSize + i % ChunkSize;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
static constexpr IndexType get_weight_index(IndexType i) {
|
| 219 |
+
#if (USE_SSSE3 | (USE_NEON >= 8))
|
| 220 |
+
return get_weight_index_scrambled(i);
|
| 221 |
+
#else
|
| 222 |
+
return i;
|
| 223 |
+
#endif
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
// Read network parameters
|
| 227 |
+
bool read_parameters(std::istream& stream) {
|
| 228 |
+
read_little_endian<BiasType>(stream, biases, OutputDimensions);
|
| 229 |
+
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
| 230 |
+
weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
|
| 231 |
+
|
| 232 |
+
return !stream.fail();
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
// Write network parameters
|
| 236 |
+
bool write_parameters(std::ostream& stream) const {
|
| 237 |
+
write_little_endian<BiasType>(stream, biases, OutputDimensions);
|
| 238 |
+
|
| 239 |
+
for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
|
| 240 |
+
write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
|
| 241 |
+
|
| 242 |
+
return !stream.fail();
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
std::size_t get_content_hash() const {
|
| 246 |
+
std::size_t h = 0;
|
| 247 |
+
hash_combine(h, get_raw_data_hash(biases));
|
| 248 |
+
hash_combine(h, get_raw_data_hash(weights));
|
| 249 |
+
hash_combine(h, get_hash_value(0));
|
| 250 |
+
return h;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
// Forward propagation
|
| 254 |
+
void propagate(const InputType* input, OutputType* output) const {
|
| 255 |
+
|
| 256 |
+
#if (USE_SSSE3 | (USE_NEON >= 8))
|
| 257 |
+
#if defined(USE_AVX512)
|
| 258 |
+
using invec_t = __m512i;
|
| 259 |
+
using outvec_t = __m512i;
|
| 260 |
+
#define vec_add_32 _mm512_add_epi32
|
| 261 |
+
#define vec_set_32 _mm512_set1_epi32
|
| 262 |
+
#define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32
|
| 263 |
+
#elif defined(USE_AVX2)
|
| 264 |
+
using invec_t = __m256i;
|
| 265 |
+
using outvec_t = __m256i;
|
| 266 |
+
#define vec_add_32 _mm256_add_epi32
|
| 267 |
+
#define vec_set_32 _mm256_set1_epi32
|
| 268 |
+
#define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32
|
| 269 |
+
#elif defined(USE_SSSE3)
|
| 270 |
+
using invec_t = __m128i;
|
| 271 |
+
using outvec_t = __m128i;
|
| 272 |
+
#define vec_set_32 _mm_set1_epi32
|
| 273 |
+
#define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32
|
| 274 |
+
#elif defined(USE_NEON_DOTPROD)
|
| 275 |
+
using invec_t = int8x16_t;
|
| 276 |
+
using outvec_t = int32x4_t;
|
| 277 |
+
#define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
|
| 278 |
+
#define vec_add_dpbusd_32 SIMD::dotprod_m128_add_dpbusd_epi32
|
| 279 |
+
#elif defined(USE_NEON)
|
| 280 |
+
using invec_t = int8x16_t;
|
| 281 |
+
using outvec_t = int32x4_t;
|
| 282 |
+
#define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
|
| 283 |
+
#define vec_add_dpbusd_32 SIMD::neon_m128_add_dpbusd_epi32
|
| 284 |
+
#endif
|
| 285 |
+
constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType);
|
| 286 |
+
constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / ChunkSize;
|
| 287 |
+
constexpr IndexType NumAccums = OutputDimensions / OutputSimdWidth;
|
| 288 |
+
// If we're using high-latency dot product instructions, split the accumulators
|
| 289 |
+
// to create 3 separate dependency chains and merge at the end
|
| 290 |
+
constexpr IndexType NumRegs =
|
| 291 |
+
#if defined(USE_VNNI)
|
| 292 |
+
3 * NumAccums;
|
| 293 |
+
#else
|
| 294 |
+
NumAccums;
|
| 295 |
+
#endif
|
| 296 |
+
std::uint16_t nnz[NumChunks];
|
| 297 |
+
IndexType count;
|
| 298 |
+
|
| 299 |
+
// Find indices of nonzero 32-bit blocks
|
| 300 |
+
find_nnz<NumChunks>(input, nnz, count);
|
| 301 |
+
|
| 302 |
+
const outvec_t* biasvec = reinterpret_cast<const outvec_t*>(biases);
|
| 303 |
+
outvec_t acc[NumRegs];
|
| 304 |
+
for (IndexType k = 0; k < NumAccums; ++k)
|
| 305 |
+
acc[k] = biasvec[k];
|
| 306 |
+
|
| 307 |
+
const auto* start = nnz;
|
| 308 |
+
const auto* end = nnz + count;
|
| 309 |
+
|
| 310 |
+
// convince GCC to not do weird pointer arithmetic in the following loop
|
| 311 |
+
const std::int8_t* weights_cp = weights;
|
| 312 |
+
#if defined(USE_VNNI)
|
| 313 |
+
for (IndexType k = NumAccums; k < NumRegs; ++k)
|
| 314 |
+
acc[k] = vec_zero();
|
| 315 |
+
|
| 316 |
+
while (start < end - 2)
|
| 317 |
+
{
|
| 318 |
+
const std::ptrdiff_t i0 = *start++;
|
| 319 |
+
const std::ptrdiff_t i1 = *start++;
|
| 320 |
+
const std::ptrdiff_t i2 = *start++;
|
| 321 |
+
const invec_t in0 =
|
| 322 |
+
vec_set_32(load_as<std::int32_t>(input + i0 * sizeof(std::int32_t)));
|
| 323 |
+
const invec_t in1 =
|
| 324 |
+
vec_set_32(load_as<std::int32_t>(input + i1 * sizeof(std::int32_t)));
|
| 325 |
+
const invec_t in2 =
|
| 326 |
+
vec_set_32(load_as<std::int32_t>(input + i2 * sizeof(std::int32_t)));
|
| 327 |
+
const auto col0 =
|
| 328 |
+
reinterpret_cast<const invec_t*>(&weights_cp[i0 * OutputDimensions * ChunkSize]);
|
| 329 |
+
const auto col1 =
|
| 330 |
+
reinterpret_cast<const invec_t*>(&weights_cp[i1 * OutputDimensions * ChunkSize]);
|
| 331 |
+
const auto col2 =
|
| 332 |
+
reinterpret_cast<const invec_t*>(&weights_cp[i2 * OutputDimensions * ChunkSize]);
|
| 333 |
+
for (IndexType k = 0; k < NumAccums; ++k)
|
| 334 |
+
{
|
| 335 |
+
vec_add_dpbusd_32(acc[k], in0, col0[k]);
|
| 336 |
+
vec_add_dpbusd_32(acc[k + NumAccums], in1, col1[k]);
|
| 337 |
+
vec_add_dpbusd_32(acc[k + 2 * NumAccums], in2, col2[k]);
|
| 338 |
+
}
|
| 339 |
+
}
|
| 340 |
+
for (IndexType k = 0; k < NumAccums; ++k)
|
| 341 |
+
acc[k] = vec_add_32(vec_add_32(acc[k], acc[k + NumAccums]), acc[k + 2 * NumAccums]);
|
| 342 |
+
#endif
|
| 343 |
+
while (start < end)
|
| 344 |
+
{
|
| 345 |
+
const std::ptrdiff_t i = *start++;
|
| 346 |
+
const invec_t in = vec_set_32(load_as<std::int32_t>(input + i * sizeof(std::int32_t)));
|
| 347 |
+
const auto col =
|
| 348 |
+
reinterpret_cast<const invec_t*>(&weights_cp[i * OutputDimensions * ChunkSize]);
|
| 349 |
+
for (IndexType k = 0; k < NumAccums; ++k)
|
| 350 |
+
vec_add_dpbusd_32(acc[k], in, col[k]);
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
outvec_t* outptr = reinterpret_cast<outvec_t*>(output);
|
| 354 |
+
for (IndexType k = 0; k < NumAccums; ++k)
|
| 355 |
+
outptr[k] = acc[k];
|
| 356 |
+
|
| 357 |
+
#undef vec_set_32
|
| 358 |
+
#undef vec_add_dpbusd_32
|
| 359 |
+
#ifdef vec_add_32
|
| 360 |
+
#undef vec_add_32
|
| 361 |
+
#endif
|
| 362 |
+
#else
|
| 363 |
+
// Use dense implementation for the other architectures.
|
| 364 |
+
affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
|
| 365 |
+
output, weights, biases, input);
|
| 366 |
+
#endif
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
private:
|
| 370 |
+
using BiasType = OutputType;
|
| 371 |
+
using WeightType = std::int8_t;
|
| 372 |
+
|
| 373 |
+
alignas(CacheLineSize) BiasType biases[OutputDimensions];
|
| 374 |
+
alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
|
| 375 |
+
};
|
| 376 |
+
|
| 377 |
+
} // namespace Stockfish::Eval::NNUE::Layers
|
| 378 |
+
|
| 379 |
+
#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
|
src/nnue/layers/clipped_relu.h
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Definition of layer ClippedReLU of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
|
| 22 |
+
#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cstdint>
|
| 26 |
+
#include <iosfwd>
|
| 27 |
+
|
| 28 |
+
#include "../nnue_common.h"
|
| 29 |
+
|
| 30 |
+
namespace Stockfish::Eval::NNUE::Layers {
|
| 31 |
+
|
| 32 |
+
// Clipped ReLU
|
| 33 |
+
template<IndexType InDims>
|
| 34 |
+
class ClippedReLU {
|
| 35 |
+
public:
|
| 36 |
+
// Input/output type
|
| 37 |
+
using InputType = std::int32_t;
|
| 38 |
+
using OutputType = std::uint8_t;
|
| 39 |
+
|
| 40 |
+
// Number of input/output dimensions
|
| 41 |
+
static constexpr IndexType InputDimensions = InDims;
|
| 42 |
+
static constexpr IndexType OutputDimensions = InputDimensions;
|
| 43 |
+
static constexpr IndexType PaddedOutputDimensions =
|
| 44 |
+
ceil_to_multiple<IndexType>(OutputDimensions, 32);
|
| 45 |
+
|
| 46 |
+
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
| 47 |
+
|
| 48 |
+
// Hash value embedded in the evaluation file
|
| 49 |
+
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
| 50 |
+
std::uint32_t hashValue = 0x538D24C7u;
|
| 51 |
+
hashValue += prevHash;
|
| 52 |
+
return hashValue;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// Read network parameters
|
| 56 |
+
bool read_parameters(std::istream&) { return true; }
|
| 57 |
+
|
| 58 |
+
// Write network parameters
|
| 59 |
+
bool write_parameters(std::ostream&) const { return true; }
|
| 60 |
+
|
| 61 |
+
std::size_t get_content_hash() const {
|
| 62 |
+
std::size_t h = 0;
|
| 63 |
+
hash_combine(h, get_hash_value(0));
|
| 64 |
+
return h;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// Forward propagation
|
| 68 |
+
void propagate(const InputType* input, OutputType* output) const {
|
| 69 |
+
|
| 70 |
+
#if defined(USE_AVX2)
|
| 71 |
+
if constexpr (InputDimensions % SimdWidth == 0)
|
| 72 |
+
{
|
| 73 |
+
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
| 74 |
+
const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
|
| 75 |
+
const auto in = reinterpret_cast<const __m256i*>(input);
|
| 76 |
+
const auto out = reinterpret_cast<__m256i*>(output);
|
| 77 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 78 |
+
{
|
| 79 |
+
const __m256i words0 =
|
| 80 |
+
_mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]),
|
| 81 |
+
_mm256_load_si256(&in[i * 4 + 1])),
|
| 82 |
+
WeightScaleBits);
|
| 83 |
+
const __m256i words1 =
|
| 84 |
+
_mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]),
|
| 85 |
+
_mm256_load_si256(&in[i * 4 + 3])),
|
| 86 |
+
WeightScaleBits);
|
| 87 |
+
_mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(
|
| 88 |
+
_mm256_packs_epi16(words0, words1), Offsets));
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
else
|
| 92 |
+
{
|
| 93 |
+
constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
|
| 94 |
+
const auto in = reinterpret_cast<const __m128i*>(input);
|
| 95 |
+
const auto out = reinterpret_cast<__m128i*>(output);
|
| 96 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 97 |
+
{
|
| 98 |
+
const __m128i words0 = _mm_srli_epi16(
|
| 99 |
+
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
| 100 |
+
WeightScaleBits);
|
| 101 |
+
const __m128i words1 = _mm_srli_epi16(
|
| 102 |
+
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
| 103 |
+
WeightScaleBits);
|
| 104 |
+
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
|
| 105 |
+
}
|
| 106 |
+
}
|
| 107 |
+
constexpr IndexType Start = InputDimensions % SimdWidth == 0
|
| 108 |
+
? InputDimensions / SimdWidth * SimdWidth
|
| 109 |
+
: InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);
|
| 110 |
+
|
| 111 |
+
#elif defined(USE_SSE2)
|
| 112 |
+
constexpr IndexType NumChunks = InputDimensions / SimdWidth;
|
| 113 |
+
|
| 114 |
+
#ifndef USE_SSE41
|
| 115 |
+
const __m128i k0x80s = _mm_set1_epi8(-128);
|
| 116 |
+
#endif
|
| 117 |
+
|
| 118 |
+
const auto in = reinterpret_cast<const __m128i*>(input);
|
| 119 |
+
const auto out = reinterpret_cast<__m128i*>(output);
|
| 120 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 121 |
+
{
|
| 122 |
+
#if defined(USE_SSE41)
|
| 123 |
+
const __m128i words0 = _mm_srli_epi16(
|
| 124 |
+
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
| 125 |
+
WeightScaleBits);
|
| 126 |
+
const __m128i words1 = _mm_srli_epi16(
|
| 127 |
+
_mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
| 128 |
+
WeightScaleBits);
|
| 129 |
+
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
|
| 130 |
+
#else
|
| 131 |
+
const __m128i words0 = _mm_srai_epi16(
|
| 132 |
+
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
|
| 133 |
+
WeightScaleBits);
|
| 134 |
+
const __m128i words1 = _mm_srai_epi16(
|
| 135 |
+
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
|
| 136 |
+
WeightScaleBits);
|
| 137 |
+
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
|
| 138 |
+
_mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
|
| 139 |
+
#endif
|
| 140 |
+
}
|
| 141 |
+
constexpr IndexType Start = NumChunks * SimdWidth;
|
| 142 |
+
|
| 143 |
+
#elif defined(USE_NEON)
|
| 144 |
+
constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
|
| 145 |
+
const SIMD::vec_i8x8_t Zero = {0};
|
| 146 |
+
const auto in = reinterpret_cast<const SIMD::vec_i32x4_t*>(input);
|
| 147 |
+
const auto out = reinterpret_cast<SIMD::vec_i8x8_t*>(output);
|
| 148 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 149 |
+
{
|
| 150 |
+
int16x8_t shifted;
|
| 151 |
+
const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
|
| 152 |
+
pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits);
|
| 153 |
+
pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits);
|
| 154 |
+
out[i] = vmax_s8(vqmovn_s16(shifted), Zero);
|
| 155 |
+
}
|
| 156 |
+
constexpr IndexType Start = NumChunks * (SimdWidth / 2);
|
| 157 |
+
#else
|
| 158 |
+
constexpr IndexType Start = 0;
|
| 159 |
+
#endif
|
| 160 |
+
|
| 161 |
+
for (IndexType i = Start; i < InputDimensions; ++i)
|
| 162 |
+
{
|
| 163 |
+
output[i] = static_cast<OutputType>(std::clamp(input[i] >> WeightScaleBits, 0, 127));
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
+
};
|
| 167 |
+
|
| 168 |
+
} // namespace Stockfish::Eval::NNUE::Layers
|
| 169 |
+
|
| 170 |
+
#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
|
src/nnue/layers/sqr_clipped_relu.h
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Definition of layer ClippedReLU of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
|
| 22 |
+
#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cstdint>
|
| 26 |
+
#include <iosfwd>
|
| 27 |
+
|
| 28 |
+
#include "../nnue_common.h"
|
| 29 |
+
|
| 30 |
+
namespace Stockfish::Eval::NNUE::Layers {
|
| 31 |
+
|
| 32 |
+
// Clipped ReLU
|
| 33 |
+
template<IndexType InDims>
|
| 34 |
+
class SqrClippedReLU {
|
| 35 |
+
public:
|
| 36 |
+
// Input/output type
|
| 37 |
+
using InputType = std::int32_t;
|
| 38 |
+
using OutputType = std::uint8_t;
|
| 39 |
+
|
| 40 |
+
// Number of input/output dimensions
|
| 41 |
+
static constexpr IndexType InputDimensions = InDims;
|
| 42 |
+
static constexpr IndexType OutputDimensions = InputDimensions;
|
| 43 |
+
static constexpr IndexType PaddedOutputDimensions =
|
| 44 |
+
ceil_to_multiple<IndexType>(OutputDimensions, 32);
|
| 45 |
+
|
| 46 |
+
using OutputBuffer = OutputType[PaddedOutputDimensions];
|
| 47 |
+
|
| 48 |
+
// Hash value embedded in the evaluation file
|
| 49 |
+
static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
|
| 50 |
+
std::uint32_t hashValue = 0x538D24C7u;
|
| 51 |
+
hashValue += prevHash;
|
| 52 |
+
return hashValue;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// Read network parameters
|
| 56 |
+
bool read_parameters(std::istream&) { return true; }
|
| 57 |
+
|
| 58 |
+
// Write network parameters
|
| 59 |
+
bool write_parameters(std::ostream&) const { return true; }
|
| 60 |
+
|
| 61 |
+
std::size_t get_content_hash() const {
|
| 62 |
+
std::size_t h = 0;
|
| 63 |
+
hash_combine(h, get_hash_value(0));
|
| 64 |
+
return h;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// Forward propagation
|
| 68 |
+
void propagate(const InputType* input, OutputType* output) const {
|
| 69 |
+
|
| 70 |
+
#if defined(USE_SSE2)
|
| 71 |
+
constexpr IndexType NumChunks = InputDimensions / 16;
|
| 72 |
+
|
| 73 |
+
static_assert(WeightScaleBits == 6);
|
| 74 |
+
const auto in = reinterpret_cast<const __m128i*>(input);
|
| 75 |
+
const auto out = reinterpret_cast<__m128i*>(output);
|
| 76 |
+
for (IndexType i = 0; i < NumChunks; ++i)
|
| 77 |
+
{
|
| 78 |
+
__m128i words0 =
|
| 79 |
+
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1]));
|
| 80 |
+
__m128i words1 =
|
| 81 |
+
_mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3]));
|
| 82 |
+
|
| 83 |
+
// We shift by WeightScaleBits * 2 = 12 and divide by 128
|
| 84 |
+
// which is an additional shift-right of 7, meaning 19 in total.
|
| 85 |
+
// MulHi strips the lower 16 bits so we need to shift out 3 more to match.
|
| 86 |
+
words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3);
|
| 87 |
+
words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3);
|
| 88 |
+
|
| 89 |
+
_mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
|
| 90 |
+
}
|
| 91 |
+
constexpr IndexType Start = NumChunks * 16;
|
| 92 |
+
|
| 93 |
+
#else
|
| 94 |
+
constexpr IndexType Start = 0;
|
| 95 |
+
#endif
|
| 96 |
+
|
| 97 |
+
for (IndexType i = Start; i < InputDimensions; ++i)
|
| 98 |
+
{
|
| 99 |
+
output[i] = static_cast<OutputType>(
|
| 100 |
+
// Really should be /127 but we need to make it fast so we right-shift
|
| 101 |
+
// by an extra 7 bits instead. Needs to be accounted for in the trainer.
|
| 102 |
+
std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7)));
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
} // namespace Stockfish::Eval::NNUE::Layers
|
| 108 |
+
|
| 109 |
+
#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
|
src/nnue/network.cpp
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "network.h"
|
| 20 |
+
|
| 21 |
+
#include <cstdlib>
|
| 22 |
+
#include <fstream>
|
| 23 |
+
#include <iostream>
|
| 24 |
+
#include <optional>
|
| 25 |
+
#include <type_traits>
|
| 26 |
+
#include <vector>
|
| 27 |
+
|
| 28 |
+
#define INCBIN_SILENCE_BITCODE_WARNING
|
| 29 |
+
#include "../incbin/incbin.h"
|
| 30 |
+
|
| 31 |
+
#include "../evaluate.h"
|
| 32 |
+
#include "../misc.h"
|
| 33 |
+
#include "../position.h"
|
| 34 |
+
#include "../types.h"
|
| 35 |
+
#include "nnue_architecture.h"
|
| 36 |
+
#include "nnue_common.h"
|
| 37 |
+
#include "nnue_misc.h"
|
| 38 |
+
|
| 39 |
+
// Macro to embed the default efficiently updatable neural network (NNUE) file
|
| 40 |
+
// data in the engine binary (using incbin.h, by Dale Weiler).
|
| 41 |
+
// This macro invocation will declare the following three variables
|
| 42 |
+
// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data
|
| 43 |
+
// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end
|
| 44 |
+
// const unsigned int gEmbeddedNNUESize; // the size of the embedded file
|
| 45 |
+
// Note that this does not work in Microsoft Visual Studio.
|
| 46 |
+
#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
|
| 47 |
+
INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig);
|
| 48 |
+
INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall);
|
| 49 |
+
#else
|
| 50 |
+
const unsigned char gEmbeddedNNUEBigData[1] = {0x0};
|
| 51 |
+
const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1];
|
| 52 |
+
const unsigned int gEmbeddedNNUEBigSize = 1;
|
| 53 |
+
const unsigned char gEmbeddedNNUESmallData[1] = {0x0};
|
| 54 |
+
const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1];
|
| 55 |
+
const unsigned int gEmbeddedNNUESmallSize = 1;
|
| 56 |
+
#endif
|
| 57 |
+
|
| 58 |
+
namespace {
|
| 59 |
+
|
| 60 |
+
struct EmbeddedNNUE {
|
| 61 |
+
EmbeddedNNUE(const unsigned char* embeddedData,
|
| 62 |
+
const unsigned char* embeddedEnd,
|
| 63 |
+
const unsigned int embeddedSize) :
|
| 64 |
+
data(embeddedData),
|
| 65 |
+
end(embeddedEnd),
|
| 66 |
+
size(embeddedSize) {}
|
| 67 |
+
const unsigned char* data;
|
| 68 |
+
const unsigned char* end;
|
| 69 |
+
const unsigned int size;
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
using namespace Stockfish::Eval::NNUE;
|
| 73 |
+
|
| 74 |
+
EmbeddedNNUE get_embedded(EmbeddedNNUEType type) {
|
| 75 |
+
if (type == EmbeddedNNUEType::BIG)
|
| 76 |
+
return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize);
|
| 77 |
+
else
|
| 78 |
+
return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
namespace Stockfish::Eval::NNUE {
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
namespace Detail {
|
| 88 |
+
|
| 89 |
+
// Read evaluation function parameters
|
| 90 |
+
template<typename T>
|
| 91 |
+
bool read_parameters(std::istream& stream, T& reference) {
|
| 92 |
+
std::uint32_t header;
|
| 93 |
+
header = read_little_endian<std::uint32_t>(stream);
|
| 94 |
+
if (!stream)
|
| 95 |
+
return false;
|
| 96 |
+
return reference.read_parameters(stream);
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// Write evaluation function parameters
|
| 100 |
+
template<typename T>
|
| 101 |
+
bool write_parameters(std::ostream& stream, const T& reference) {
|
| 102 |
+
|
| 103 |
+
write_little_endian<std::uint32_t>(stream, T::get_hash_value());
|
| 104 |
+
return reference.write_parameters(stream);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
} // namespace Detail
|
| 108 |
+
|
| 109 |
+
template<typename Arch, typename Transformer>
|
| 110 |
+
void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) {
|
| 111 |
+
#if defined(DEFAULT_NNUE_DIRECTORY)
|
| 112 |
+
std::vector<std::string> dirs = {"<internal>", "", rootDirectory,
|
| 113 |
+
stringify(DEFAULT_NNUE_DIRECTORY)};
|
| 114 |
+
#else
|
| 115 |
+
std::vector<std::string> dirs = {"<internal>", "", rootDirectory};
|
| 116 |
+
#endif
|
| 117 |
+
|
| 118 |
+
if (evalfilePath.empty())
|
| 119 |
+
evalfilePath = evalFile.defaultName;
|
| 120 |
+
|
| 121 |
+
for (const auto& directory : dirs)
|
| 122 |
+
{
|
| 123 |
+
if (std::string(evalFile.current) != evalfilePath)
|
| 124 |
+
{
|
| 125 |
+
if (directory != "<internal>")
|
| 126 |
+
{
|
| 127 |
+
load_user_net(directory, evalfilePath);
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
if (directory == "<internal>" && evalfilePath == std::string(evalFile.defaultName))
|
| 131 |
+
{
|
| 132 |
+
load_internal();
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
template<typename Arch, typename Transformer>
|
| 140 |
+
bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename) const {
|
| 141 |
+
std::string actualFilename;
|
| 142 |
+
std::string msg;
|
| 143 |
+
|
| 144 |
+
if (filename.has_value())
|
| 145 |
+
actualFilename = filename.value();
|
| 146 |
+
else
|
| 147 |
+
{
|
| 148 |
+
if (std::string(evalFile.current) != std::string(evalFile.defaultName))
|
| 149 |
+
{
|
| 150 |
+
msg = "Failed to export a net. "
|
| 151 |
+
"A non-embedded net can only be saved if the filename is specified";
|
| 152 |
+
|
| 153 |
+
sync_cout << msg << sync_endl;
|
| 154 |
+
return false;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
actualFilename = evalFile.defaultName;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
std::ofstream stream(actualFilename, std::ios_base::binary);
|
| 161 |
+
bool saved = save(stream, evalFile.current, evalFile.netDescription);
|
| 162 |
+
|
| 163 |
+
msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net";
|
| 164 |
+
|
| 165 |
+
sync_cout << msg << sync_endl;
|
| 166 |
+
return saved;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
template<typename Arch, typename Transformer>
|
| 171 |
+
NetworkOutput
|
| 172 |
+
Network<Arch, Transformer>::evaluate(const Position& pos,
|
| 173 |
+
AccumulatorStack& accumulatorStack,
|
| 174 |
+
AccumulatorCaches::Cache<FTDimensions>& cache) const {
|
| 175 |
+
|
| 176 |
+
constexpr uint64_t alignment = CacheLineSize;
|
| 177 |
+
|
| 178 |
+
alignas(alignment)
|
| 179 |
+
TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
|
| 180 |
+
|
| 181 |
+
ASSERT_ALIGNED(transformedFeatures, alignment);
|
| 182 |
+
|
| 183 |
+
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
| 184 |
+
const auto psqt =
|
| 185 |
+
featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
|
| 186 |
+
const auto positional = network[bucket].propagate(transformedFeatures);
|
| 187 |
+
return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
template<typename Arch, typename Transformer>
|
| 192 |
+
void Network<Arch, Transformer>::verify(std::string evalfilePath,
|
| 193 |
+
const std::function<void(std::string_view)>& f) const {
|
| 194 |
+
if (evalfilePath.empty())
|
| 195 |
+
evalfilePath = evalFile.defaultName;
|
| 196 |
+
|
| 197 |
+
if (std::string(evalFile.current) != evalfilePath)
|
| 198 |
+
{
|
| 199 |
+
if (f)
|
| 200 |
+
{
|
| 201 |
+
std::string msg1 =
|
| 202 |
+
"Network evaluation parameters compatible with the engine must be available.";
|
| 203 |
+
std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully.";
|
| 204 |
+
std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
|
| 205 |
+
"including the directory name, to the network file.";
|
| 206 |
+
std::string msg4 = "The default net can be downloaded from: "
|
| 207 |
+
"https://tests.stockfishchess.org/api/nn/"
|
| 208 |
+
+ std::string(evalFile.defaultName);
|
| 209 |
+
std::string msg5 = "The engine will be terminated now.";
|
| 210 |
+
|
| 211 |
+
std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3
|
| 212 |
+
+ '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n';
|
| 213 |
+
|
| 214 |
+
f(msg);
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
exit(EXIT_FAILURE);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
if (f)
|
| 221 |
+
{
|
| 222 |
+
size_t size = sizeof(featureTransformer) + sizeof(Arch) * LayerStacks;
|
| 223 |
+
f("NNUE evaluation using " + evalfilePath + " (" + std::to_string(size / (1024 * 1024))
|
| 224 |
+
+ "MiB, (" + std::to_string(featureTransformer.TotalInputDimensions) + ", "
|
| 225 |
+
+ std::to_string(network[0].TransformedFeatureDimensions) + ", "
|
| 226 |
+
+ std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS)
|
| 227 |
+
+ ", 1))");
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
template<typename Arch, typename Transformer>
|
| 233 |
+
NnueEvalTrace
|
| 234 |
+
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
|
| 235 |
+
AccumulatorStack& accumulatorStack,
|
| 236 |
+
AccumulatorCaches::Cache<FTDimensions>& cache) const {
|
| 237 |
+
|
| 238 |
+
constexpr uint64_t alignment = CacheLineSize;
|
| 239 |
+
|
| 240 |
+
alignas(alignment)
|
| 241 |
+
TransformedFeatureType transformedFeatures[FeatureTransformer<FTDimensions>::BufferSize];
|
| 242 |
+
|
| 243 |
+
ASSERT_ALIGNED(transformedFeatures, alignment);
|
| 244 |
+
|
| 245 |
+
NnueEvalTrace t{};
|
| 246 |
+
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
|
| 247 |
+
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
|
| 248 |
+
{
|
| 249 |
+
const auto materialist =
|
| 250 |
+
featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket);
|
| 251 |
+
const auto positional = network[bucket].propagate(transformedFeatures);
|
| 252 |
+
|
| 253 |
+
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
|
| 254 |
+
t.positional[bucket] = static_cast<Value>(positional / OutputScale);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
return t;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
template<typename Arch, typename Transformer>
|
| 262 |
+
void Network<Arch, Transformer>::load_user_net(const std::string& dir,
|
| 263 |
+
const std::string& evalfilePath) {
|
| 264 |
+
std::ifstream stream(dir + evalfilePath, std::ios::binary);
|
| 265 |
+
auto description = load(stream);
|
| 266 |
+
|
| 267 |
+
if (description.has_value())
|
| 268 |
+
{
|
| 269 |
+
evalFile.current = evalfilePath;
|
| 270 |
+
evalFile.netDescription = description.value();
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
template<typename Arch, typename Transformer>
|
| 276 |
+
void Network<Arch, Transformer>::load_internal() {
|
| 277 |
+
// C++ way to prepare a buffer for a memory stream
|
| 278 |
+
class MemoryBuffer: public std::basic_streambuf<char> {
|
| 279 |
+
public:
|
| 280 |
+
MemoryBuffer(char* p, size_t n) {
|
| 281 |
+
setg(p, p, p + n);
|
| 282 |
+
setp(p, p + n);
|
| 283 |
+
}
|
| 284 |
+
};
|
| 285 |
+
|
| 286 |
+
const auto embedded = get_embedded(embeddedType);
|
| 287 |
+
|
| 288 |
+
MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(embedded.data)),
|
| 289 |
+
size_t(embedded.size));
|
| 290 |
+
|
| 291 |
+
std::istream stream(&buffer);
|
| 292 |
+
auto description = load(stream);
|
| 293 |
+
|
| 294 |
+
if (description.has_value())
|
| 295 |
+
{
|
| 296 |
+
evalFile.current = evalFile.defaultName;
|
| 297 |
+
evalFile.netDescription = description.value();
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
template<typename Arch, typename Transformer>
|
| 303 |
+
void Network<Arch, Transformer>::initialize() {
|
| 304 |
+
initialized = true;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
template<typename Arch, typename Transformer>
|
| 309 |
+
bool Network<Arch, Transformer>::save(std::ostream& stream,
|
| 310 |
+
const std::string& name,
|
| 311 |
+
const std::string& netDescription) const {
|
| 312 |
+
if (name.empty() || name == "None")
|
| 313 |
+
return false;
|
| 314 |
+
|
| 315 |
+
return write_parameters(stream, netDescription);
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
template<typename Arch, typename Transformer>
|
| 320 |
+
std::optional<std::string> Network<Arch, Transformer>::load(std::istream& stream) {
|
| 321 |
+
initialize();
|
| 322 |
+
std::string description;
|
| 323 |
+
|
| 324 |
+
return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt;
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
template<typename Arch, typename Transformer>
|
| 329 |
+
std::size_t Network<Arch, Transformer>::get_content_hash() const {
|
| 330 |
+
if (!initialized)
|
| 331 |
+
return 0;
|
| 332 |
+
|
| 333 |
+
std::size_t h = 0;
|
| 334 |
+
hash_combine(h, featureTransformer);
|
| 335 |
+
for (auto&& layerstack : network)
|
| 336 |
+
hash_combine(h, layerstack);
|
| 337 |
+
hash_combine(h, evalFile);
|
| 338 |
+
hash_combine(h, static_cast<int>(embeddedType));
|
| 339 |
+
return h;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
// Read network header
|
| 343 |
+
template<typename Arch, typename Transformer>
|
| 344 |
+
bool Network<Arch, Transformer>::read_header(std::istream& stream,
|
| 345 |
+
std::uint32_t* hashValue,
|
| 346 |
+
std::string* desc) const {
|
| 347 |
+
std::uint32_t magic;
|
| 348 |
+
|
| 349 |
+
magic = read_little_endian<std::uint32_t>(stream);
|
| 350 |
+
*hashValue = read_little_endian<std::uint32_t>(stream);
|
| 351 |
+
std::uint32_t size = read_little_endian<std::uint32_t>(stream);
|
| 352 |
+
if (!stream || magic != Version)
|
| 353 |
+
return false;
|
| 354 |
+
desc->resize(size);
|
| 355 |
+
stream.read(&(*desc)[0], size);
|
| 356 |
+
return !stream.fail();
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
// Write network header
|
| 361 |
+
template<typename Arch, typename Transformer>
|
| 362 |
+
bool Network<Arch, Transformer>::write_header(std::ostream& stream,
|
| 363 |
+
std::uint32_t hashValue,
|
| 364 |
+
const std::string& desc) const {
|
| 365 |
+
write_little_endian<std::uint32_t>(stream, Version);
|
| 366 |
+
write_little_endian<std::uint32_t>(stream, hashValue);
|
| 367 |
+
write_little_endian<std::uint32_t>(stream, std::uint32_t(desc.size()));
|
| 368 |
+
stream.write(&desc[0], desc.size());
|
| 369 |
+
return !stream.fail();
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
template<typename Arch, typename Transformer>
|
| 374 |
+
bool Network<Arch, Transformer>::read_parameters(std::istream& stream,
|
| 375 |
+
std::string& netDescription) {
|
| 376 |
+
std::uint32_t hashValue;
|
| 377 |
+
if (!read_header(stream, &hashValue, &netDescription))
|
| 378 |
+
return false;
|
| 379 |
+
if (false && hashValue != Network::hash)
|
| 380 |
+
return false;
|
| 381 |
+
if (!Detail::read_parameters(stream, featureTransformer))
|
| 382 |
+
return false;
|
| 383 |
+
for (std::size_t i = 0; i < LayerStacks; ++i)
|
| 384 |
+
{
|
| 385 |
+
if (!Detail::read_parameters(stream, network[i]))
|
| 386 |
+
return false;
|
| 387 |
+
}
|
| 388 |
+
return stream && stream.peek() == std::ios::traits_type::eof();
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
template<typename Arch, typename Transformer>
|
| 393 |
+
bool Network<Arch, Transformer>::write_parameters(std::ostream& stream,
|
| 394 |
+
const std::string& netDescription) const {
|
| 395 |
+
if (!write_header(stream, Network::hash, netDescription))
|
| 396 |
+
return false;
|
| 397 |
+
if (!Detail::write_parameters(stream, featureTransformer))
|
| 398 |
+
return false;
|
| 399 |
+
for (std::size_t i = 0; i < LayerStacks; ++i)
|
| 400 |
+
{
|
| 401 |
+
if (!Detail::write_parameters(stream, network[i]))
|
| 402 |
+
return false;
|
| 403 |
+
}
|
| 404 |
+
return bool(stream);
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
// Explicit template instantiations
|
| 408 |
+
|
| 409 |
+
template class Network<NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
|
| 410 |
+
FeatureTransformer<TransformedFeatureDimensionsBig>>;
|
| 411 |
+
|
| 412 |
+
template class Network<NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
|
| 413 |
+
FeatureTransformer<TransformedFeatureDimensionsSmall>>;
|
| 414 |
+
|
| 415 |
+
} // namespace Stockfish::Eval::NNUE
|
src/nnue/network.h
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef NETWORK_H_INCLUDED
|
| 20 |
+
#define NETWORK_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <cstddef>
|
| 23 |
+
#include <cstdint>
|
| 24 |
+
#include <functional>
|
| 25 |
+
#include <iostream>
|
| 26 |
+
#include <memory>
|
| 27 |
+
#include <optional>
|
| 28 |
+
#include <string>
|
| 29 |
+
#include <string_view>
|
| 30 |
+
#include <tuple>
|
| 31 |
+
|
| 32 |
+
#include "../misc.h"
|
| 33 |
+
#include "../types.h"
|
| 34 |
+
#include "nnue_accumulator.h"
|
| 35 |
+
#include "nnue_architecture.h"
|
| 36 |
+
#include "nnue_common.h"
|
| 37 |
+
#include "nnue_feature_transformer.h"
|
| 38 |
+
#include "nnue_misc.h"
|
| 39 |
+
|
| 40 |
+
namespace Stockfish {
|
| 41 |
+
class Position;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
namespace Stockfish::Eval::NNUE {
|
| 45 |
+
|
| 46 |
+
enum class EmbeddedNNUEType {
|
| 47 |
+
BIG,
|
| 48 |
+
SMALL,
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
using NetworkOutput = std::tuple<Value, Value>;
|
| 52 |
+
|
| 53 |
+
// The network must be a trivial type, i.e. the memory must be in-line.
|
| 54 |
+
// This is required to allow sharing the network via shared memory, as
|
| 55 |
+
// there is no way to run destructors.
|
| 56 |
+
template<typename Arch, typename Transformer>
|
| 57 |
+
class Network {
|
| 58 |
+
static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
|
| 59 |
+
|
| 60 |
+
public:
|
| 61 |
+
Network(EvalFile file, EmbeddedNNUEType type) :
|
| 62 |
+
evalFile(file),
|
| 63 |
+
embeddedType(type) {}
|
| 64 |
+
|
| 65 |
+
Network(const Network& other) = default;
|
| 66 |
+
Network(Network&& other) = default;
|
| 67 |
+
|
| 68 |
+
Network& operator=(const Network& other) = default;
|
| 69 |
+
Network& operator=(Network&& other) = default;
|
| 70 |
+
|
| 71 |
+
void load(const std::string& rootDirectory, std::string evalfilePath);
|
| 72 |
+
bool save(const std::optional<std::string>& filename) const;
|
| 73 |
+
|
| 74 |
+
std::size_t get_content_hash() const;
|
| 75 |
+
|
| 76 |
+
NetworkOutput evaluate(const Position& pos,
|
| 77 |
+
AccumulatorStack& accumulatorStack,
|
| 78 |
+
AccumulatorCaches::Cache<FTDimensions>& cache) const;
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
|
| 82 |
+
NnueEvalTrace trace_evaluate(const Position& pos,
|
| 83 |
+
AccumulatorStack& accumulatorStack,
|
| 84 |
+
AccumulatorCaches::Cache<FTDimensions>& cache) const;
|
| 85 |
+
|
| 86 |
+
private:
|
| 87 |
+
void load_user_net(const std::string&, const std::string&);
|
| 88 |
+
void load_internal();
|
| 89 |
+
|
| 90 |
+
void initialize();
|
| 91 |
+
|
| 92 |
+
bool save(std::ostream&, const std::string&, const std::string&) const;
|
| 93 |
+
std::optional<std::string> load(std::istream&);
|
| 94 |
+
|
| 95 |
+
bool read_header(std::istream&, std::uint32_t*, std::string*) const;
|
| 96 |
+
bool write_header(std::ostream&, std::uint32_t, const std::string&) const;
|
| 97 |
+
|
| 98 |
+
bool read_parameters(std::istream&, std::string&);
|
| 99 |
+
bool write_parameters(std::ostream&, const std::string&) const;
|
| 100 |
+
|
| 101 |
+
// Input feature converter
|
| 102 |
+
Transformer featureTransformer;
|
| 103 |
+
|
| 104 |
+
// Evaluation function
|
| 105 |
+
Arch network[LayerStacks];
|
| 106 |
+
|
| 107 |
+
EvalFile evalFile;
|
| 108 |
+
EmbeddedNNUEType embeddedType;
|
| 109 |
+
|
| 110 |
+
bool initialized = false;
|
| 111 |
+
|
| 112 |
+
// Hash value of evaluation function structure
|
| 113 |
+
static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
|
| 114 |
+
|
| 115 |
+
template<IndexType Size>
|
| 116 |
+
friend struct AccumulatorCaches::Cache;
|
| 117 |
+
};
|
| 118 |
+
|
| 119 |
+
// Definitions of the network types
|
| 120 |
+
using SmallFeatureTransformer = FeatureTransformer<TransformedFeatureDimensionsSmall>;
|
| 121 |
+
using SmallNetworkArchitecture =
|
| 122 |
+
NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
|
| 123 |
+
|
| 124 |
+
using BigFeatureTransformer = FeatureTransformer<TransformedFeatureDimensionsBig>;
|
| 125 |
+
using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
|
| 126 |
+
|
| 127 |
+
using NetworkBig = Network<BigNetworkArchitecture, BigFeatureTransformer>;
|
| 128 |
+
using NetworkSmall = Network<SmallNetworkArchitecture, SmallFeatureTransformer>;
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
struct Networks {
|
| 132 |
+
Networks(EvalFile bigFile, EvalFile smallFile) :
|
| 133 |
+
big(bigFile, EmbeddedNNUEType::BIG),
|
| 134 |
+
small(smallFile, EmbeddedNNUEType::SMALL) {}
|
| 135 |
+
|
| 136 |
+
NetworkBig big;
|
| 137 |
+
NetworkSmall small;
|
| 138 |
+
};
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
} // namespace Stockfish
|
| 142 |
+
|
| 143 |
+
template<typename ArchT, typename FeatureTransformerT>
|
| 144 |
+
struct std::hash<Stockfish::Eval::NNUE::Network<ArchT, FeatureTransformerT>> {
|
| 145 |
+
std::size_t operator()(
|
| 146 |
+
const Stockfish::Eval::NNUE::Network<ArchT, FeatureTransformerT>& network) const noexcept {
|
| 147 |
+
return network.get_content_hash();
|
| 148 |
+
}
|
| 149 |
+
};
|
| 150 |
+
|
| 151 |
+
template<>
|
| 152 |
+
struct std::hash<Stockfish::Eval::NNUE::Networks> {
|
| 153 |
+
std::size_t operator()(const Stockfish::Eval::NNUE::Networks& networks) const noexcept {
|
| 154 |
+
std::size_t h = 0;
|
| 155 |
+
Stockfish::hash_combine(h, networks.big);
|
| 156 |
+
Stockfish::hash_combine(h, networks.small);
|
| 157 |
+
return h;
|
| 158 |
+
}
|
| 159 |
+
};
|
| 160 |
+
|
| 161 |
+
#endif
|
src/nnue/nnue_accumulator.cpp
ADDED
|
@@ -0,0 +1,952 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "nnue_accumulator.h"
|
| 20 |
+
|
| 21 |
+
#include <cassert>
|
| 22 |
+
#include <cstdint>
|
| 23 |
+
#include <new>
|
| 24 |
+
#include <type_traits>
|
| 25 |
+
|
| 26 |
+
#include "../bitboard.h"
|
| 27 |
+
#include "../misc.h"
|
| 28 |
+
#include "../position.h"
|
| 29 |
+
#include "../types.h"
|
| 30 |
+
#include "features/half_ka_v2_hm.h"
|
| 31 |
+
#include "nnue_architecture.h"
|
| 32 |
+
#include "nnue_common.h"
|
| 33 |
+
#include "nnue_feature_transformer.h" // IWYU pragma: keep
|
| 34 |
+
#include "simd.h"
|
| 35 |
+
|
| 36 |
+
namespace Stockfish::Eval::NNUE {
|
| 37 |
+
|
| 38 |
+
using namespace SIMD;
|
| 39 |
+
|
| 40 |
+
namespace {
|
| 41 |
+
|
| 42 |
+
template<IndexType TransformedFeatureDimensions>
|
| 43 |
+
void double_inc_update(Color perspective,
|
| 44 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 45 |
+
const Square ksq,
|
| 46 |
+
AccumulatorState<PSQFeatureSet>& middle_state,
|
| 47 |
+
AccumulatorState<PSQFeatureSet>& target_state,
|
| 48 |
+
const AccumulatorState<PSQFeatureSet>& computed);
|
| 49 |
+
|
| 50 |
+
template<IndexType TransformedFeatureDimensions>
|
| 51 |
+
void double_inc_update(Color perspective,
|
| 52 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 53 |
+
const Square ksq,
|
| 54 |
+
AccumulatorState<ThreatFeatureSet>& middle_state,
|
| 55 |
+
AccumulatorState<ThreatFeatureSet>& target_state,
|
| 56 |
+
const AccumulatorState<ThreatFeatureSet>& computed,
|
| 57 |
+
const DirtyPiece& dp2);
|
| 58 |
+
|
| 59 |
+
template<bool Forward, typename FeatureSet, IndexType TransformedFeatureDimensions>
|
| 60 |
+
void update_accumulator_incremental(
|
| 61 |
+
Color perspective,
|
| 62 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 63 |
+
const Square ksq,
|
| 64 |
+
AccumulatorState<FeatureSet>& target_state,
|
| 65 |
+
const AccumulatorState<FeatureSet>& computed);
|
| 66 |
+
|
| 67 |
+
template<IndexType Dimensions>
|
| 68 |
+
void update_accumulator_refresh_cache(Color perspective,
|
| 69 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 70 |
+
const Position& pos,
|
| 71 |
+
AccumulatorState<PSQFeatureSet>& accumulatorState,
|
| 72 |
+
AccumulatorCaches::Cache<Dimensions>& cache);
|
| 73 |
+
|
| 74 |
+
template<IndexType Dimensions>
|
| 75 |
+
void update_threats_accumulator_full(Color perspective,
|
| 76 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 77 |
+
const Position& pos,
|
| 78 |
+
AccumulatorState<ThreatFeatureSet>& accumulatorState);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
template<typename T>
|
| 82 |
+
const AccumulatorState<T>& AccumulatorStack::latest() const noexcept {
|
| 83 |
+
return accumulators<T>()[size - 1];
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
// Explicit template instantiations
|
| 87 |
+
template const AccumulatorState<PSQFeatureSet>& AccumulatorStack::latest() const noexcept;
|
| 88 |
+
template const AccumulatorState<ThreatFeatureSet>& AccumulatorStack::latest() const noexcept;
|
| 89 |
+
|
| 90 |
+
template<typename T>
|
| 91 |
+
AccumulatorState<T>& AccumulatorStack::mut_latest() noexcept {
|
| 92 |
+
return mut_accumulators<T>()[size - 1];
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
template<typename T>
|
| 96 |
+
const std::array<AccumulatorState<T>, AccumulatorStack::MaxSize>&
|
| 97 |
+
AccumulatorStack::accumulators() const noexcept {
|
| 98 |
+
static_assert(std::is_same_v<T, PSQFeatureSet> || std::is_same_v<T, ThreatFeatureSet>,
|
| 99 |
+
"Invalid Feature Set Type");
|
| 100 |
+
|
| 101 |
+
if constexpr (std::is_same_v<T, PSQFeatureSet>)
|
| 102 |
+
return psq_accumulators;
|
| 103 |
+
|
| 104 |
+
if constexpr (std::is_same_v<T, ThreatFeatureSet>)
|
| 105 |
+
return threat_accumulators;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
template<typename T>
|
| 109 |
+
std::array<AccumulatorState<T>, AccumulatorStack::MaxSize>&
|
| 110 |
+
AccumulatorStack::mut_accumulators() noexcept {
|
| 111 |
+
static_assert(std::is_same_v<T, PSQFeatureSet> || std::is_same_v<T, ThreatFeatureSet>,
|
| 112 |
+
"Invalid Feature Set Type");
|
| 113 |
+
|
| 114 |
+
if constexpr (std::is_same_v<T, PSQFeatureSet>)
|
| 115 |
+
return psq_accumulators;
|
| 116 |
+
|
| 117 |
+
if constexpr (std::is_same_v<T, ThreatFeatureSet>)
|
| 118 |
+
return threat_accumulators;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
void AccumulatorStack::reset() noexcept {
|
| 122 |
+
psq_accumulators[0].reset({});
|
| 123 |
+
threat_accumulators[0].reset({});
|
| 124 |
+
size = 1;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
std::pair<DirtyPiece&, DirtyThreats&> AccumulatorStack::push() noexcept {
|
| 128 |
+
assert(size < MaxSize);
|
| 129 |
+
auto& dp = psq_accumulators[size].reset();
|
| 130 |
+
auto& dts = threat_accumulators[size].reset();
|
| 131 |
+
new (&dts) DirtyThreats;
|
| 132 |
+
size++;
|
| 133 |
+
return {dp, dts};
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
void AccumulatorStack::pop() noexcept {
|
| 137 |
+
assert(size > 1);
|
| 138 |
+
size--;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
template<IndexType Dimensions>
|
| 142 |
+
void AccumulatorStack::evaluate(const Position& pos,
|
| 143 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 144 |
+
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
| 145 |
+
constexpr bool UseThreats = (Dimensions == TransformedFeatureDimensionsBig);
|
| 146 |
+
|
| 147 |
+
evaluate_side<PSQFeatureSet>(WHITE, pos, featureTransformer, cache);
|
| 148 |
+
|
| 149 |
+
if (UseThreats)
|
| 150 |
+
evaluate_side<ThreatFeatureSet>(WHITE, pos, featureTransformer, cache);
|
| 151 |
+
|
| 152 |
+
evaluate_side<PSQFeatureSet>(BLACK, pos, featureTransformer, cache);
|
| 153 |
+
|
| 154 |
+
if (UseThreats)
|
| 155 |
+
evaluate_side<ThreatFeatureSet>(BLACK, pos, featureTransformer, cache);
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 159 |
+
void AccumulatorStack::evaluate_side(Color perspective,
|
| 160 |
+
const Position& pos,
|
| 161 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 162 |
+
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
|
| 163 |
+
|
| 164 |
+
const auto last_usable_accum =
|
| 165 |
+
find_last_usable_accumulator<FeatureSet, Dimensions>(perspective);
|
| 166 |
+
|
| 167 |
+
if ((accumulators<FeatureSet>()[last_usable_accum].template acc<Dimensions>())
|
| 168 |
+
.computed[perspective])
|
| 169 |
+
forward_update_incremental<FeatureSet>(perspective, pos, featureTransformer,
|
| 170 |
+
last_usable_accum);
|
| 171 |
+
|
| 172 |
+
else
|
| 173 |
+
{
|
| 174 |
+
if constexpr (std::is_same_v<FeatureSet, PSQFeatureSet>)
|
| 175 |
+
update_accumulator_refresh_cache(perspective, featureTransformer, pos,
|
| 176 |
+
mut_latest<PSQFeatureSet>(), cache);
|
| 177 |
+
else
|
| 178 |
+
update_threats_accumulator_full(perspective, featureTransformer, pos,
|
| 179 |
+
mut_latest<ThreatFeatureSet>());
|
| 180 |
+
|
| 181 |
+
backward_update_incremental<FeatureSet>(perspective, pos, featureTransformer,
|
| 182 |
+
last_usable_accum);
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
|
| 187 |
+
// state just before a change that requires full refresh.
|
| 188 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 189 |
+
std::size_t AccumulatorStack::find_last_usable_accumulator(Color perspective) const noexcept {
|
| 190 |
+
|
| 191 |
+
for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)
|
| 192 |
+
{
|
| 193 |
+
if ((accumulators<FeatureSet>()[curr_idx].template acc<Dimensions>()).computed[perspective])
|
| 194 |
+
return curr_idx;
|
| 195 |
+
|
| 196 |
+
if (FeatureSet::requires_refresh(accumulators<FeatureSet>()[curr_idx].diff, perspective))
|
| 197 |
+
return curr_idx;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
return 0;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 204 |
+
void AccumulatorStack::forward_update_incremental(
|
| 205 |
+
Color perspective,
|
| 206 |
+
const Position& pos,
|
| 207 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 208 |
+
const std::size_t begin) noexcept {
|
| 209 |
+
|
| 210 |
+
assert(begin < accumulators<FeatureSet>().size());
|
| 211 |
+
assert((accumulators<FeatureSet>()[begin].template acc<Dimensions>()).computed[perspective]);
|
| 212 |
+
|
| 213 |
+
const Square ksq = pos.square<KING>(perspective);
|
| 214 |
+
|
| 215 |
+
for (std::size_t next = begin + 1; next < size; next++)
|
| 216 |
+
{
|
| 217 |
+
if (next + 1 < size)
|
| 218 |
+
{
|
| 219 |
+
DirtyPiece& dp1 = mut_accumulators<PSQFeatureSet>()[next].diff;
|
| 220 |
+
DirtyPiece& dp2 = mut_accumulators<PSQFeatureSet>()[next + 1].diff;
|
| 221 |
+
|
| 222 |
+
auto& accumulators = mut_accumulators<FeatureSet>();
|
| 223 |
+
|
| 224 |
+
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
|
| 225 |
+
{
|
| 226 |
+
if (dp2.remove_sq != SQ_NONE
|
| 227 |
+
&& (accumulators[next].diff.threateningSqs & square_bb(dp2.remove_sq)))
|
| 228 |
+
{
|
| 229 |
+
double_inc_update(perspective, featureTransformer, ksq, accumulators[next],
|
| 230 |
+
accumulators[next + 1], accumulators[next - 1], dp2);
|
| 231 |
+
next++;
|
| 232 |
+
continue;
|
| 233 |
+
}
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
if constexpr (std::is_same_v<FeatureSet, PSQFeatureSet>)
|
| 237 |
+
{
|
| 238 |
+
if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)
|
| 239 |
+
{
|
| 240 |
+
const Square captureSq = dp1.to;
|
| 241 |
+
dp1.to = dp2.remove_sq = SQ_NONE;
|
| 242 |
+
double_inc_update(perspective, featureTransformer, ksq, accumulators[next],
|
| 243 |
+
accumulators[next + 1], accumulators[next - 1]);
|
| 244 |
+
dp1.to = dp2.remove_sq = captureSq;
|
| 245 |
+
next++;
|
| 246 |
+
continue;
|
| 247 |
+
}
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
update_accumulator_incremental<true>(perspective, featureTransformer, ksq,
|
| 252 |
+
mut_accumulators<FeatureSet>()[next],
|
| 253 |
+
accumulators<FeatureSet>()[next - 1]);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
assert((latest<PSQFeatureSet>().acc<Dimensions>()).computed[perspective]);
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 260 |
+
void AccumulatorStack::backward_update_incremental(
|
| 261 |
+
Color perspective,
|
| 262 |
+
|
| 263 |
+
const Position& pos,
|
| 264 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 265 |
+
const std::size_t end) noexcept {
|
| 266 |
+
|
| 267 |
+
assert(end < accumulators<FeatureSet>().size());
|
| 268 |
+
assert(end < size);
|
| 269 |
+
assert((latest<FeatureSet>().template acc<Dimensions>()).computed[perspective]);
|
| 270 |
+
|
| 271 |
+
const Square ksq = pos.square<KING>(perspective);
|
| 272 |
+
|
| 273 |
+
for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)
|
| 274 |
+
update_accumulator_incremental<false>(perspective, featureTransformer, ksq,
|
| 275 |
+
mut_accumulators<FeatureSet>()[next],
|
| 276 |
+
accumulators<FeatureSet>()[next + 1]);
|
| 277 |
+
|
| 278 |
+
assert((accumulators<FeatureSet>()[end].template acc<Dimensions>()).computed[perspective]);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
// Explicit template instantiations
|
| 282 |
+
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(
|
| 283 |
+
const Position& pos,
|
| 284 |
+
const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,
|
| 285 |
+
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
|
| 286 |
+
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(
|
| 287 |
+
const Position& pos,
|
| 288 |
+
const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,
|
| 289 |
+
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
namespace {
|
| 293 |
+
|
| 294 |
+
template<typename VectorWrapper,
|
| 295 |
+
IndexType Width,
|
| 296 |
+
UpdateOperation... ops,
|
| 297 |
+
typename ElementType,
|
| 298 |
+
typename... Ts,
|
| 299 |
+
std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>
|
| 300 |
+
void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {
|
| 301 |
+
constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);
|
| 302 |
+
|
| 303 |
+
auto* vecIn = reinterpret_cast<const typename VectorWrapper::type*>(in);
|
| 304 |
+
auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);
|
| 305 |
+
|
| 306 |
+
for (IndexType i = 0; i < size; ++i)
|
| 307 |
+
vecOut[i] = fused<VectorWrapper, ops...>(
|
| 308 |
+
vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 312 |
+
struct AccumulatorUpdateContext {
|
| 313 |
+
Color perspective;
|
| 314 |
+
const FeatureTransformer<Dimensions>& featureTransformer;
|
| 315 |
+
const AccumulatorState<FeatureSet>& from;
|
| 316 |
+
AccumulatorState<FeatureSet>& to;
|
| 317 |
+
|
| 318 |
+
AccumulatorUpdateContext(Color persp,
|
| 319 |
+
const FeatureTransformer<Dimensions>& ft,
|
| 320 |
+
const AccumulatorState<FeatureSet>& accF,
|
| 321 |
+
AccumulatorState<FeatureSet>& accT) noexcept :
|
| 322 |
+
perspective{persp},
|
| 323 |
+
featureTransformer{ft},
|
| 324 |
+
from{accF},
|
| 325 |
+
to{accT} {}
|
| 326 |
+
|
| 327 |
+
template<UpdateOperation... ops,
|
| 328 |
+
typename... Ts,
|
| 329 |
+
std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>
|
| 330 |
+
void apply(const Ts... indices) {
|
| 331 |
+
auto to_weight_vector = [&](const IndexType index) {
|
| 332 |
+
return &featureTransformer.weights[index * Dimensions];
|
| 333 |
+
};
|
| 334 |
+
|
| 335 |
+
auto to_psqt_weight_vector = [&](const IndexType index) {
|
| 336 |
+
return &featureTransformer.psqtWeights[index * PSQTBuckets];
|
| 337 |
+
};
|
| 338 |
+
|
| 339 |
+
fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(
|
| 340 |
+
(from.template acc<Dimensions>()).accumulation[perspective].data(),
|
| 341 |
+
(to.template acc<Dimensions>()).accumulation[perspective].data(),
|
| 342 |
+
to_weight_vector(indices)...);
|
| 343 |
+
|
| 344 |
+
fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(
|
| 345 |
+
(from.template acc<Dimensions>()).psqtAccumulation[perspective].data(),
|
| 346 |
+
(to.template acc<Dimensions>()).psqtAccumulation[perspective].data(),
|
| 347 |
+
to_psqt_weight_vector(indices)...);
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
void apply(const typename FeatureSet::IndexList& added,
|
| 351 |
+
const typename FeatureSet::IndexList& removed) {
|
| 352 |
+
const auto& fromAcc = from.template acc<Dimensions>().accumulation[perspective];
|
| 353 |
+
auto& toAcc = to.template acc<Dimensions>().accumulation[perspective];
|
| 354 |
+
|
| 355 |
+
const auto& fromPsqtAcc = from.template acc<Dimensions>().psqtAccumulation[perspective];
|
| 356 |
+
auto& toPsqtAcc = to.template acc<Dimensions>().psqtAccumulation[perspective];
|
| 357 |
+
|
| 358 |
+
#ifdef VECTOR
|
| 359 |
+
using Tiling = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
|
| 360 |
+
vec_t acc[Tiling::NumRegs];
|
| 361 |
+
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
| 362 |
+
|
| 363 |
+
const auto* threatWeights = &featureTransformer.threatWeights[0];
|
| 364 |
+
|
| 365 |
+
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
|
| 366 |
+
{
|
| 367 |
+
auto* fromTile = reinterpret_cast<const vec_t*>(&fromAcc[j * Tiling::TileHeight]);
|
| 368 |
+
auto* toTile = reinterpret_cast<vec_t*>(&toAcc[j * Tiling::TileHeight]);
|
| 369 |
+
|
| 370 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 371 |
+
acc[k] = fromTile[k];
|
| 372 |
+
|
| 373 |
+
for (int i = 0; i < removed.ssize(); ++i)
|
| 374 |
+
{
|
| 375 |
+
size_t index = removed[i];
|
| 376 |
+
const size_t offset = Dimensions * index;
|
| 377 |
+
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
|
| 378 |
+
|
| 379 |
+
#ifdef USE_NEON
|
| 380 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
|
| 381 |
+
{
|
| 382 |
+
acc[k] = vec_sub_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
|
| 383 |
+
acc[k + 1] = vec_sub_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
|
| 384 |
+
}
|
| 385 |
+
#else
|
| 386 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 387 |
+
acc[k] = vec_sub_16(acc[k], vec_convert_8_16(column[k]));
|
| 388 |
+
#endif
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
for (int i = 0; i < added.ssize(); ++i)
|
| 392 |
+
{
|
| 393 |
+
size_t index = added[i];
|
| 394 |
+
const size_t offset = Dimensions * index;
|
| 395 |
+
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
|
| 396 |
+
|
| 397 |
+
#ifdef USE_NEON
|
| 398 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
|
| 399 |
+
{
|
| 400 |
+
acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
|
| 401 |
+
acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
|
| 402 |
+
}
|
| 403 |
+
#else
|
| 404 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 405 |
+
acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k]));
|
| 406 |
+
#endif
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
| 410 |
+
vec_store(&toTile[k], acc[k]);
|
| 411 |
+
|
| 412 |
+
threatWeights += Tiling::TileHeight;
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
|
| 416 |
+
{
|
| 417 |
+
auto* fromTilePsqt =
|
| 418 |
+
reinterpret_cast<const psqt_vec_t*>(&fromPsqtAcc[j * Tiling::PsqtTileHeight]);
|
| 419 |
+
auto* toTilePsqt =
|
| 420 |
+
reinterpret_cast<psqt_vec_t*>(&toPsqtAcc[j * Tiling::PsqtTileHeight]);
|
| 421 |
+
|
| 422 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 423 |
+
psqt[k] = fromTilePsqt[k];
|
| 424 |
+
|
| 425 |
+
for (int i = 0; i < removed.ssize(); ++i)
|
| 426 |
+
{
|
| 427 |
+
size_t index = removed[i];
|
| 428 |
+
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
| 429 |
+
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(
|
| 430 |
+
&featureTransformer.threatPsqtWeights[offset]);
|
| 431 |
+
|
| 432 |
+
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 433 |
+
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
for (int i = 0; i < added.ssize(); ++i)
|
| 437 |
+
{
|
| 438 |
+
size_t index = added[i];
|
| 439 |
+
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
| 440 |
+
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(
|
| 441 |
+
&featureTransformer.threatPsqtWeights[offset]);
|
| 442 |
+
|
| 443 |
+
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 444 |
+
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 448 |
+
vec_store_psqt(&toTilePsqt[k], psqt[k]);
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
#else
|
| 452 |
+
|
| 453 |
+
toAcc = fromAcc;
|
| 454 |
+
toPsqtAcc = fromPsqtAcc;
|
| 455 |
+
|
| 456 |
+
for (const auto index : removed)
|
| 457 |
+
{
|
| 458 |
+
const IndexType offset = Dimensions * index;
|
| 459 |
+
|
| 460 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 461 |
+
toAcc[j] -= featureTransformer.threatWeights[offset + j];
|
| 462 |
+
|
| 463 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 464 |
+
toPsqtAcc[k] -= featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
for (const auto index : added)
|
| 468 |
+
{
|
| 469 |
+
const IndexType offset = Dimensions * index;
|
| 470 |
+
|
| 471 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 472 |
+
toAcc[j] += featureTransformer.threatWeights[offset + j];
|
| 473 |
+
|
| 474 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 475 |
+
toPsqtAcc[k] += featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
#endif
|
| 479 |
+
}
|
| 480 |
+
};
|
| 481 |
+
|
| 482 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 483 |
+
auto make_accumulator_update_context(Color perspective,
|
| 484 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 485 |
+
const AccumulatorState<FeatureSet>& accumulatorFrom,
|
| 486 |
+
AccumulatorState<FeatureSet>& accumulatorTo) noexcept {
|
| 487 |
+
return AccumulatorUpdateContext<FeatureSet, Dimensions>{perspective, featureTransformer,
|
| 488 |
+
accumulatorFrom, accumulatorTo};
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
template<IndexType TransformedFeatureDimensions>
|
| 492 |
+
void double_inc_update(Color perspective,
|
| 493 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 494 |
+
const Square ksq,
|
| 495 |
+
AccumulatorState<PSQFeatureSet>& middle_state,
|
| 496 |
+
AccumulatorState<PSQFeatureSet>& target_state,
|
| 497 |
+
const AccumulatorState<PSQFeatureSet>& computed) {
|
| 498 |
+
|
| 499 |
+
assert(computed.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 500 |
+
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 501 |
+
assert(!target_state.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 502 |
+
|
| 503 |
+
PSQFeatureSet::IndexList removed, added;
|
| 504 |
+
PSQFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added);
|
| 505 |
+
// you can't capture a piece that was just involved in castling since the rook ends up
|
| 506 |
+
// in a square that the king passed
|
| 507 |
+
assert(added.size() < 2);
|
| 508 |
+
PSQFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added);
|
| 509 |
+
|
| 510 |
+
[[maybe_unused]] const int addedSize = added.ssize();
|
| 511 |
+
[[maybe_unused]] const int removedSize = removed.ssize();
|
| 512 |
+
|
| 513 |
+
assert(addedSize == 1);
|
| 514 |
+
assert(removedSize == 2 || removedSize == 3);
|
| 515 |
+
|
| 516 |
+
// Workaround compiler warning for uninitialized variables, replicated on
|
| 517 |
+
// profile builds on windows with gcc 14.2.0.
|
| 518 |
+
// Also helps with optimizations on some compilers.
|
| 519 |
+
|
| 520 |
+
sf_assume(addedSize == 1);
|
| 521 |
+
sf_assume(removedSize == 2 || removedSize == 3);
|
| 522 |
+
|
| 523 |
+
auto updateContext =
|
| 524 |
+
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
|
| 525 |
+
|
| 526 |
+
if (removedSize == 2)
|
| 527 |
+
{
|
| 528 |
+
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
|
| 529 |
+
}
|
| 530 |
+
else
|
| 531 |
+
{
|
| 532 |
+
updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],
|
| 533 |
+
removed[2]);
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
target_state.acc<TransformedFeatureDimensions>().computed[perspective] = true;
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
template<IndexType TransformedFeatureDimensions>
|
| 540 |
+
void double_inc_update(Color perspective,
|
| 541 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 542 |
+
const Square ksq,
|
| 543 |
+
AccumulatorState<ThreatFeatureSet>& middle_state,
|
| 544 |
+
AccumulatorState<ThreatFeatureSet>& target_state,
|
| 545 |
+
const AccumulatorState<ThreatFeatureSet>& computed,
|
| 546 |
+
const DirtyPiece& dp2) {
|
| 547 |
+
|
| 548 |
+
assert(computed.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 549 |
+
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 550 |
+
assert(!target_state.acc<TransformedFeatureDimensions>().computed[perspective]);
|
| 551 |
+
|
| 552 |
+
ThreatFeatureSet::FusedUpdateData fusedData;
|
| 553 |
+
|
| 554 |
+
fusedData.dp2removed = dp2.remove_sq;
|
| 555 |
+
|
| 556 |
+
ThreatFeatureSet::IndexList removed, added;
|
| 557 |
+
const auto* pfBase = &featureTransformer.threatWeights[0];
|
| 558 |
+
auto pfStride = static_cast<IndexType>(TransformedFeatureDimensions);
|
| 559 |
+
ThreatFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added,
|
| 560 |
+
&fusedData, true, pfBase, pfStride);
|
| 561 |
+
ThreatFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added,
|
| 562 |
+
&fusedData, false, pfBase, pfStride);
|
| 563 |
+
|
| 564 |
+
auto updateContext =
|
| 565 |
+
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
|
| 566 |
+
|
| 567 |
+
updateContext.apply(added, removed);
|
| 568 |
+
|
| 569 |
+
target_state.acc<TransformedFeatureDimensions>().computed[perspective] = true;
|
| 570 |
+
}
|
| 571 |
+
|
| 572 |
+
template<bool Forward, typename FeatureSet, IndexType TransformedFeatureDimensions>
|
| 573 |
+
void update_accumulator_incremental(
|
| 574 |
+
Color perspective,
|
| 575 |
+
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
|
| 576 |
+
const Square ksq,
|
| 577 |
+
AccumulatorState<FeatureSet>& target_state,
|
| 578 |
+
const AccumulatorState<FeatureSet>& computed) {
|
| 579 |
+
|
| 580 |
+
assert((computed.template acc<TransformedFeatureDimensions>()).computed[perspective]);
|
| 581 |
+
assert(!(target_state.template acc<TransformedFeatureDimensions>()).computed[perspective]);
|
| 582 |
+
|
| 583 |
+
// The size must be enough to contain the largest possible update.
|
| 584 |
+
// That might depend on the feature set and generally relies on the
|
| 585 |
+
// feature set's update cost calculation to be correct and never allow
|
| 586 |
+
// updates with more added/removed features than MaxActiveDimensions.
|
| 587 |
+
// In this case, the maximum size of both feature addition and removal
|
| 588 |
+
// is 2, since we are incrementally updating one move at a time.
|
| 589 |
+
typename FeatureSet::IndexList removed, added;
|
| 590 |
+
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
|
| 591 |
+
{
|
| 592 |
+
const auto* pfBase = &featureTransformer.threatWeights[0];
|
| 593 |
+
auto pfStride = static_cast<IndexType>(TransformedFeatureDimensions);
|
| 594 |
+
if constexpr (Forward)
|
| 595 |
+
FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added,
|
| 596 |
+
nullptr, false, pfBase, pfStride);
|
| 597 |
+
else
|
| 598 |
+
FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed,
|
| 599 |
+
nullptr, false, pfBase, pfStride);
|
| 600 |
+
}
|
| 601 |
+
else
|
| 602 |
+
{
|
| 603 |
+
if constexpr (Forward)
|
| 604 |
+
FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added);
|
| 605 |
+
else
|
| 606 |
+
FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed);
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
auto updateContext =
|
| 610 |
+
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
|
| 611 |
+
|
| 612 |
+
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
|
| 613 |
+
updateContext.apply(added, removed);
|
| 614 |
+
else
|
| 615 |
+
{
|
| 616 |
+
[[maybe_unused]] const int addedSize = added.ssize();
|
| 617 |
+
[[maybe_unused]] const int removedSize = removed.ssize();
|
| 618 |
+
|
| 619 |
+
assert(addedSize == 1 || addedSize == 2);
|
| 620 |
+
assert(removedSize == 1 || removedSize == 2);
|
| 621 |
+
assert((Forward && addedSize <= removedSize) || (!Forward && addedSize >= removedSize));
|
| 622 |
+
|
| 623 |
+
// Workaround compiler warning for uninitialized variables, replicated
|
| 624 |
+
// on profile builds on windows with gcc 14.2.0.
|
| 625 |
+
// Also helps with optimizations on some compilers.
|
| 626 |
+
|
| 627 |
+
sf_assume(addedSize == 1 || addedSize == 2);
|
| 628 |
+
sf_assume(removedSize == 1 || removedSize == 2);
|
| 629 |
+
|
| 630 |
+
if (!(removedSize == 1 || removedSize == 2) || !(addedSize == 1 || addedSize == 2))
|
| 631 |
+
sf_unreachable();
|
| 632 |
+
|
| 633 |
+
if ((Forward && removedSize == 1) || (!Forward && addedSize == 1))
|
| 634 |
+
{
|
| 635 |
+
assert(addedSize == 1 && removedSize == 1);
|
| 636 |
+
updateContext.template apply<Add, Sub>(added[0], removed[0]);
|
| 637 |
+
}
|
| 638 |
+
else if (Forward && addedSize == 1)
|
| 639 |
+
{
|
| 640 |
+
assert(removedSize == 2);
|
| 641 |
+
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
|
| 642 |
+
}
|
| 643 |
+
else if (!Forward && removedSize == 1)
|
| 644 |
+
{
|
| 645 |
+
assert(addedSize == 2);
|
| 646 |
+
updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);
|
| 647 |
+
}
|
| 648 |
+
else
|
| 649 |
+
{
|
| 650 |
+
assert(addedSize == 2 && removedSize == 2);
|
| 651 |
+
updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],
|
| 652 |
+
removed[1]);
|
| 653 |
+
}
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
(target_state.template acc<TransformedFeatureDimensions>()).computed[perspective] = true;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
Bitboard get_changed_pieces(const std::array<Piece, SQUARE_NB>& oldPieces,
|
| 660 |
+
const std::array<Piece, SQUARE_NB>& newPieces) {
|
| 661 |
+
#if defined(USE_AVX512) || defined(USE_AVX2)
|
| 662 |
+
static_assert(sizeof(Piece) == 1);
|
| 663 |
+
Bitboard sameBB = 0;
|
| 664 |
+
|
| 665 |
+
for (int i = 0; i < 64; i += 32)
|
| 666 |
+
{
|
| 667 |
+
const __m256i old_v = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&oldPieces[i]));
|
| 668 |
+
const __m256i new_v = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&newPieces[i]));
|
| 669 |
+
const __m256i cmpEqual = _mm256_cmpeq_epi8(old_v, new_v);
|
| 670 |
+
const std::uint32_t equalMask = _mm256_movemask_epi8(cmpEqual);
|
| 671 |
+
sameBB |= static_cast<Bitboard>(equalMask) << i;
|
| 672 |
+
}
|
| 673 |
+
return ~sameBB;
|
| 674 |
+
#elif defined(USE_NEON)
|
| 675 |
+
uint8x16x4_t old_v = vld4q_u8(reinterpret_cast<const uint8_t*>(oldPieces.data()));
|
| 676 |
+
uint8x16x4_t new_v = vld4q_u8(reinterpret_cast<const uint8_t*>(newPieces.data()));
|
| 677 |
+
auto cmp = [=](const int i) { return vceqq_u8(old_v.val[i], new_v.val[i]); };
|
| 678 |
+
|
| 679 |
+
uint8x16_t cmp0_1 = vsriq_n_u8(cmp(1), cmp(0), 1);
|
| 680 |
+
uint8x16_t cmp2_3 = vsriq_n_u8(cmp(3), cmp(2), 1);
|
| 681 |
+
uint8x16_t merged = vsriq_n_u8(cmp2_3, cmp0_1, 2);
|
| 682 |
+
merged = vsriq_n_u8(merged, merged, 4);
|
| 683 |
+
uint8x8_t sameBB = vshrn_n_u16(vreinterpretq_u16_u8(merged), 4);
|
| 684 |
+
|
| 685 |
+
return ~vget_lane_u64(vreinterpret_u64_u8(sameBB), 0);
|
| 686 |
+
#else
|
| 687 |
+
Bitboard changed = 0;
|
| 688 |
+
|
| 689 |
+
for (Square sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq)
|
| 690 |
+
changed |= static_cast<Bitboard>(oldPieces[sq] != newPieces[sq]) << sq;
|
| 691 |
+
|
| 692 |
+
return changed;
|
| 693 |
+
#endif
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
template<IndexType Dimensions>
|
| 697 |
+
void update_accumulator_refresh_cache(Color perspective,
|
| 698 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 699 |
+
const Position& pos,
|
| 700 |
+
AccumulatorState<PSQFeatureSet>& accumulatorState,
|
| 701 |
+
AccumulatorCaches::Cache<Dimensions>& cache) {
|
| 702 |
+
|
| 703 |
+
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
|
| 704 |
+
|
| 705 |
+
const Square ksq = pos.square<KING>(perspective);
|
| 706 |
+
auto& entry = cache[ksq][perspective];
|
| 707 |
+
PSQFeatureSet::IndexList removed, added;
|
| 708 |
+
|
| 709 |
+
const Bitboard changedBB = get_changed_pieces(entry.pieces, pos.piece_array());
|
| 710 |
+
Bitboard removedBB = changedBB & entry.pieceBB;
|
| 711 |
+
Bitboard addedBB = changedBB & pos.pieces();
|
| 712 |
+
|
| 713 |
+
while (removedBB)
|
| 714 |
+
{
|
| 715 |
+
Square sq = pop_lsb(removedBB);
|
| 716 |
+
removed.push_back(PSQFeatureSet::make_index(perspective, sq, entry.pieces[sq], ksq));
|
| 717 |
+
}
|
| 718 |
+
while (addedBB)
|
| 719 |
+
{
|
| 720 |
+
Square sq = pop_lsb(addedBB);
|
| 721 |
+
added.push_back(PSQFeatureSet::make_index(perspective, sq, pos.piece_on(sq), ksq));
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
entry.pieceBB = pos.pieces();
|
| 725 |
+
entry.pieces = pos.piece_array();
|
| 726 |
+
|
| 727 |
+
auto& accumulator = accumulatorState.acc<Dimensions>();
|
| 728 |
+
accumulator.computed[perspective] = true;
|
| 729 |
+
|
| 730 |
+
#ifdef VECTOR
|
| 731 |
+
vec_t acc[Tiling::NumRegs];
|
| 732 |
+
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
| 733 |
+
|
| 734 |
+
const auto* weights = &featureTransformer.weights[0];
|
| 735 |
+
|
| 736 |
+
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
|
| 737 |
+
{
|
| 738 |
+
auto* accTile =
|
| 739 |
+
reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][j * Tiling::TileHeight]);
|
| 740 |
+
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
|
| 741 |
+
|
| 742 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 743 |
+
acc[k] = entryTile[k];
|
| 744 |
+
|
| 745 |
+
int i = 0;
|
| 746 |
+
for (; i < std::min(removed.ssize(), added.ssize()); ++i)
|
| 747 |
+
{
|
| 748 |
+
size_t indexR = removed[i];
|
| 749 |
+
const size_t offsetR = Dimensions * indexR;
|
| 750 |
+
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
|
| 751 |
+
size_t indexA = added[i];
|
| 752 |
+
const size_t offsetA = Dimensions * indexA;
|
| 753 |
+
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
|
| 754 |
+
|
| 755 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 756 |
+
acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
|
| 757 |
+
}
|
| 758 |
+
for (; i < removed.ssize(); ++i)
|
| 759 |
+
{
|
| 760 |
+
size_t index = removed[i];
|
| 761 |
+
const size_t offset = Dimensions * index;
|
| 762 |
+
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
| 763 |
+
|
| 764 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 765 |
+
acc[k] = vec_sub_16(acc[k], column[k]);
|
| 766 |
+
}
|
| 767 |
+
for (; i < added.ssize(); ++i)
|
| 768 |
+
{
|
| 769 |
+
size_t index = added[i];
|
| 770 |
+
const size_t offset = Dimensions * index;
|
| 771 |
+
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
|
| 772 |
+
|
| 773 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 774 |
+
acc[k] = vec_add_16(acc[k], column[k]);
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
| 778 |
+
vec_store(&entryTile[k], acc[k]);
|
| 779 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
| 780 |
+
vec_store(&accTile[k], acc[k]);
|
| 781 |
+
|
| 782 |
+
weights += Tiling::TileHeight;
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
|
| 786 |
+
{
|
| 787 |
+
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
| 788 |
+
&accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]);
|
| 789 |
+
auto* entryTilePsqt =
|
| 790 |
+
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
|
| 791 |
+
|
| 792 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 793 |
+
psqt[k] = entryTilePsqt[k];
|
| 794 |
+
|
| 795 |
+
for (int i = 0; i < removed.ssize(); ++i)
|
| 796 |
+
{
|
| 797 |
+
size_t index = removed[i];
|
| 798 |
+
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
| 799 |
+
auto* columnPsqt =
|
| 800 |
+
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
|
| 801 |
+
|
| 802 |
+
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 803 |
+
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
|
| 804 |
+
}
|
| 805 |
+
for (int i = 0; i < added.ssize(); ++i)
|
| 806 |
+
{
|
| 807 |
+
size_t index = added[i];
|
| 808 |
+
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
| 809 |
+
auto* columnPsqt =
|
| 810 |
+
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
|
| 811 |
+
|
| 812 |
+
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 813 |
+
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 817 |
+
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
|
| 818 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 819 |
+
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
#else
|
| 823 |
+
|
| 824 |
+
for (const auto index : removed)
|
| 825 |
+
{
|
| 826 |
+
const IndexType offset = Dimensions * index;
|
| 827 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 828 |
+
entry.accumulation[j] -= featureTransformer.weights[offset + j];
|
| 829 |
+
|
| 830 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 831 |
+
entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
|
| 832 |
+
}
|
| 833 |
+
for (const auto index : added)
|
| 834 |
+
{
|
| 835 |
+
const IndexType offset = Dimensions * index;
|
| 836 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 837 |
+
entry.accumulation[j] += featureTransformer.weights[offset + j];
|
| 838 |
+
|
| 839 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 840 |
+
entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
// The accumulator of the refresh entry has been updated.
|
| 844 |
+
// Now copy its content to the actual accumulator we were refreshing.
|
| 845 |
+
accumulator.accumulation[perspective] = entry.accumulation;
|
| 846 |
+
accumulator.psqtAccumulation[perspective] = entry.psqtAccumulation;
|
| 847 |
+
#endif
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
template<IndexType Dimensions>
|
| 851 |
+
void update_threats_accumulator_full(Color perspective,
|
| 852 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 853 |
+
const Position& pos,
|
| 854 |
+
AccumulatorState<ThreatFeatureSet>& accumulatorState) {
|
| 855 |
+
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
|
| 856 |
+
|
| 857 |
+
ThreatFeatureSet::IndexList active;
|
| 858 |
+
ThreatFeatureSet::append_active_indices(perspective, pos, active);
|
| 859 |
+
|
| 860 |
+
auto& accumulator = accumulatorState.acc<Dimensions>();
|
| 861 |
+
accumulator.computed[perspective] = true;
|
| 862 |
+
|
| 863 |
+
#ifdef VECTOR
|
| 864 |
+
vec_t acc[Tiling::NumRegs];
|
| 865 |
+
psqt_vec_t psqt[Tiling::NumPsqtRegs];
|
| 866 |
+
|
| 867 |
+
const auto* threatWeights = &featureTransformer.threatWeights[0];
|
| 868 |
+
|
| 869 |
+
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
|
| 870 |
+
{
|
| 871 |
+
auto* accTile =
|
| 872 |
+
reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][j * Tiling::TileHeight]);
|
| 873 |
+
|
| 874 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 875 |
+
acc[k] = vec_zero();
|
| 876 |
+
|
| 877 |
+
int i = 0;
|
| 878 |
+
|
| 879 |
+
for (; i < active.ssize(); ++i)
|
| 880 |
+
{
|
| 881 |
+
size_t index = active[i];
|
| 882 |
+
const size_t offset = Dimensions * index;
|
| 883 |
+
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
|
| 884 |
+
|
| 885 |
+
#ifdef USE_NEON
|
| 886 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
|
| 887 |
+
{
|
| 888 |
+
acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
|
| 889 |
+
acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
|
| 890 |
+
}
|
| 891 |
+
#else
|
| 892 |
+
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
|
| 893 |
+
acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k]));
|
| 894 |
+
#endif
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
for (IndexType k = 0; k < Tiling::NumRegs; k++)
|
| 898 |
+
vec_store(&accTile[k], acc[k]);
|
| 899 |
+
|
| 900 |
+
threatWeights += Tiling::TileHeight;
|
| 901 |
+
}
|
| 902 |
+
|
| 903 |
+
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
|
| 904 |
+
{
|
| 905 |
+
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
|
| 906 |
+
&accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]);
|
| 907 |
+
|
| 908 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 909 |
+
psqt[k] = vec_zero_psqt();
|
| 910 |
+
|
| 911 |
+
for (int i = 0; i < active.ssize(); ++i)
|
| 912 |
+
{
|
| 913 |
+
size_t index = active[i];
|
| 914 |
+
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
|
| 915 |
+
auto* columnPsqt =
|
| 916 |
+
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.threatPsqtWeights[offset]);
|
| 917 |
+
|
| 918 |
+
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 919 |
+
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
|
| 920 |
+
}
|
| 921 |
+
|
| 922 |
+
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
|
| 923 |
+
vec_store_psqt(&accTilePsqt[k], psqt[k]);
|
| 924 |
+
}
|
| 925 |
+
|
| 926 |
+
#else
|
| 927 |
+
|
| 928 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 929 |
+
accumulator.accumulation[perspective][j] = 0;
|
| 930 |
+
|
| 931 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 932 |
+
accumulator.psqtAccumulation[perspective][k] = 0;
|
| 933 |
+
|
| 934 |
+
for (const auto index : active)
|
| 935 |
+
{
|
| 936 |
+
const IndexType offset = Dimensions * index;
|
| 937 |
+
|
| 938 |
+
for (IndexType j = 0; j < Dimensions; ++j)
|
| 939 |
+
accumulator.accumulation[perspective][j] +=
|
| 940 |
+
featureTransformer.threatWeights[offset + j];
|
| 941 |
+
|
| 942 |
+
for (std::size_t k = 0; k < PSQTBuckets; ++k)
|
| 943 |
+
accumulator.psqtAccumulation[perspective][k] +=
|
| 944 |
+
featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
|
| 945 |
+
}
|
| 946 |
+
|
| 947 |
+
#endif
|
| 948 |
+
}
|
| 949 |
+
|
| 950 |
+
}
|
| 951 |
+
|
| 952 |
+
}
|
src/nnue/nnue_accumulator.h
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Class for difference calculation of NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
|
| 22 |
+
#define NNUE_ACCUMULATOR_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <array>
|
| 25 |
+
#include <cstddef>
|
| 26 |
+
#include <cstdint>
|
| 27 |
+
#include <cstring>
|
| 28 |
+
#include <utility>
|
| 29 |
+
|
| 30 |
+
#include "../types.h"
|
| 31 |
+
#include "nnue_architecture.h"
|
| 32 |
+
#include "nnue_common.h"
|
| 33 |
+
|
| 34 |
+
namespace Stockfish {
|
| 35 |
+
class Position;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
namespace Stockfish::Eval::NNUE {
|
| 39 |
+
|
| 40 |
+
template<IndexType Size>
|
| 41 |
+
struct alignas(CacheLineSize) Accumulator;
|
| 42 |
+
|
| 43 |
+
template<IndexType TransformedFeatureDimensions>
|
| 44 |
+
class FeatureTransformer;
|
| 45 |
+
|
| 46 |
+
// Class that holds the result of affine transformation of input features
|
| 47 |
+
template<IndexType Size>
|
| 48 |
+
struct alignas(CacheLineSize) Accumulator {
|
| 49 |
+
std::array<std::array<std::int16_t, Size>, COLOR_NB> accumulation;
|
| 50 |
+
std::array<std::array<std::int32_t, PSQTBuckets>, COLOR_NB> psqtAccumulation;
|
| 51 |
+
std::array<bool, COLOR_NB> computed = {};
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
// AccumulatorCaches struct provides per-thread accumulator caches, where each
|
| 56 |
+
// cache contains multiple entries for each of the possible king squares.
|
| 57 |
+
// When the accumulator needs to be refreshed, the cached entry is used to more
|
| 58 |
+
// efficiently update the accumulator, instead of rebuilding it from scratch.
|
| 59 |
+
// This idea, was first described by Luecx (author of Koivisto) and
|
| 60 |
+
// is commonly referred to as "Finny Tables".
|
| 61 |
+
struct AccumulatorCaches {
|
| 62 |
+
|
| 63 |
+
template<typename Networks>
|
| 64 |
+
AccumulatorCaches(const Networks& networks) {
|
| 65 |
+
clear(networks);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
template<IndexType Size>
|
| 69 |
+
struct alignas(CacheLineSize) Cache {
|
| 70 |
+
|
| 71 |
+
struct alignas(CacheLineSize) Entry {
|
| 72 |
+
std::array<BiasType, Size> accumulation;
|
| 73 |
+
std::array<PSQTWeightType, PSQTBuckets> psqtAccumulation;
|
| 74 |
+
std::array<Piece, SQUARE_NB> pieces;
|
| 75 |
+
Bitboard pieceBB;
|
| 76 |
+
|
| 77 |
+
// To initialize a refresh entry, we set all its bitboards empty,
|
| 78 |
+
// so we put the biases in the accumulation, without any weights on top
|
| 79 |
+
void clear(const std::array<BiasType, Size>& biases) {
|
| 80 |
+
accumulation = biases;
|
| 81 |
+
std::memset(reinterpret_cast<std::byte*>(this) + offsetof(Entry, psqtAccumulation),
|
| 82 |
+
0, sizeof(Entry) - offsetof(Entry, psqtAccumulation));
|
| 83 |
+
}
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
template<typename Network>
|
| 87 |
+
void clear(const Network& network) {
|
| 88 |
+
for (auto& entries1D : entries)
|
| 89 |
+
for (auto& entry : entries1D)
|
| 90 |
+
entry.clear(network.featureTransformer.biases);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
|
| 94 |
+
|
| 95 |
+
std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;
|
| 96 |
+
};
|
| 97 |
+
|
| 98 |
+
template<typename Networks>
|
| 99 |
+
void clear(const Networks& networks) {
|
| 100 |
+
big.clear(networks.big);
|
| 101 |
+
small.clear(networks.small);
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
Cache<TransformedFeatureDimensionsBig> big;
|
| 105 |
+
Cache<TransformedFeatureDimensionsSmall> small;
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
template<typename FeatureSet>
|
| 110 |
+
struct AccumulatorState {
|
| 111 |
+
Accumulator<TransformedFeatureDimensionsBig> accumulatorBig;
|
| 112 |
+
Accumulator<TransformedFeatureDimensionsSmall> accumulatorSmall;
|
| 113 |
+
typename FeatureSet::DiffType diff;
|
| 114 |
+
|
| 115 |
+
template<IndexType Size>
|
| 116 |
+
auto& acc() noexcept {
|
| 117 |
+
static_assert(Size == TransformedFeatureDimensionsBig
|
| 118 |
+
|| Size == TransformedFeatureDimensionsSmall,
|
| 119 |
+
"Invalid size for accumulator");
|
| 120 |
+
|
| 121 |
+
if constexpr (Size == TransformedFeatureDimensionsBig)
|
| 122 |
+
return accumulatorBig;
|
| 123 |
+
else if constexpr (Size == TransformedFeatureDimensionsSmall)
|
| 124 |
+
return accumulatorSmall;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
template<IndexType Size>
|
| 128 |
+
const auto& acc() const noexcept {
|
| 129 |
+
static_assert(Size == TransformedFeatureDimensionsBig
|
| 130 |
+
|| Size == TransformedFeatureDimensionsSmall,
|
| 131 |
+
"Invalid size for accumulator");
|
| 132 |
+
|
| 133 |
+
if constexpr (Size == TransformedFeatureDimensionsBig)
|
| 134 |
+
return accumulatorBig;
|
| 135 |
+
else if constexpr (Size == TransformedFeatureDimensionsSmall)
|
| 136 |
+
return accumulatorSmall;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
void reset(const typename FeatureSet::DiffType& dp) noexcept {
|
| 140 |
+
diff = dp;
|
| 141 |
+
accumulatorBig.computed.fill(false);
|
| 142 |
+
accumulatorSmall.computed.fill(false);
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
typename FeatureSet::DiffType& reset() noexcept {
|
| 146 |
+
accumulatorBig.computed.fill(false);
|
| 147 |
+
accumulatorSmall.computed.fill(false);
|
| 148 |
+
return diff;
|
| 149 |
+
}
|
| 150 |
+
};
|
| 151 |
+
|
| 152 |
+
class AccumulatorStack {
|
| 153 |
+
public:
|
| 154 |
+
static constexpr std::size_t MaxSize = MAX_PLY + 1;
|
| 155 |
+
|
| 156 |
+
template<typename T>
|
| 157 |
+
[[nodiscard]] const AccumulatorState<T>& latest() const noexcept;
|
| 158 |
+
|
| 159 |
+
void reset() noexcept;
|
| 160 |
+
std::pair<DirtyPiece&, DirtyThreats&> push() noexcept;
|
| 161 |
+
void pop() noexcept;
|
| 162 |
+
|
| 163 |
+
template<IndexType Dimensions>
|
| 164 |
+
void evaluate(const Position& pos,
|
| 165 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 166 |
+
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
| 167 |
+
|
| 168 |
+
private:
|
| 169 |
+
template<typename T>
|
| 170 |
+
[[nodiscard]] AccumulatorState<T>& mut_latest() noexcept;
|
| 171 |
+
|
| 172 |
+
template<typename T>
|
| 173 |
+
[[nodiscard]] const std::array<AccumulatorState<T>, MaxSize>& accumulators() const noexcept;
|
| 174 |
+
|
| 175 |
+
template<typename T>
|
| 176 |
+
[[nodiscard]] std::array<AccumulatorState<T>, MaxSize>& mut_accumulators() noexcept;
|
| 177 |
+
|
| 178 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 179 |
+
void evaluate_side(Color perspective,
|
| 180 |
+
const Position& pos,
|
| 181 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 182 |
+
AccumulatorCaches::Cache<Dimensions>& cache) noexcept;
|
| 183 |
+
|
| 184 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 185 |
+
[[nodiscard]] std::size_t find_last_usable_accumulator(Color perspective) const noexcept;
|
| 186 |
+
|
| 187 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 188 |
+
void forward_update_incremental(Color perspective,
|
| 189 |
+
const Position& pos,
|
| 190 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 191 |
+
const std::size_t begin) noexcept;
|
| 192 |
+
|
| 193 |
+
template<typename FeatureSet, IndexType Dimensions>
|
| 194 |
+
void backward_update_incremental(Color perspective,
|
| 195 |
+
const Position& pos,
|
| 196 |
+
const FeatureTransformer<Dimensions>& featureTransformer,
|
| 197 |
+
const std::size_t end) noexcept;
|
| 198 |
+
|
| 199 |
+
std::array<AccumulatorState<PSQFeatureSet>, MaxSize> psq_accumulators;
|
| 200 |
+
std::array<AccumulatorState<ThreatFeatureSet>, MaxSize> threat_accumulators;
|
| 201 |
+
std::size_t size = 1;
|
| 202 |
+
};
|
| 203 |
+
|
| 204 |
+
} // namespace Stockfish::Eval::NNUE
|
| 205 |
+
|
| 206 |
+
#endif // NNUE_ACCUMULATOR_H_INCLUDED
|
src/nnue/nnue_architecture.h
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Input features and network structure used in NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
| 22 |
+
#define NNUE_ARCHITECTURE_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <cstdint>
|
| 25 |
+
#include <cstring>
|
| 26 |
+
#include <iosfwd>
|
| 27 |
+
|
| 28 |
+
#include "features/half_ka_v2_hm.h"
|
| 29 |
+
#include "features/full_threats.h"
|
| 30 |
+
#include "layers/affine_transform.h"
|
| 31 |
+
#include "layers/affine_transform_sparse_input.h"
|
| 32 |
+
#include "layers/clipped_relu.h"
|
| 33 |
+
#include "layers/sqr_clipped_relu.h"
|
| 34 |
+
#include "nnue_common.h"
|
| 35 |
+
|
| 36 |
+
namespace Stockfish::Eval::NNUE {
|
| 37 |
+
|
| 38 |
+
// Input features used in evaluation function
|
| 39 |
+
using ThreatFeatureSet = Features::FullThreats;
|
| 40 |
+
using PSQFeatureSet = Features::HalfKAv2_hm;
|
| 41 |
+
|
| 42 |
+
// Number of input feature dimensions after conversion
|
| 43 |
+
constexpr IndexType TransformedFeatureDimensionsBig = 256;
|
| 44 |
+
constexpr int L2Big = 31;
|
| 45 |
+
constexpr int L3Big = 32;
|
| 46 |
+
|
| 47 |
+
constexpr IndexType TransformedFeatureDimensionsSmall = 128;
|
| 48 |
+
constexpr int L2Small = 15;
|
| 49 |
+
constexpr int L3Small = 32;
|
| 50 |
+
|
| 51 |
+
constexpr IndexType PSQTBuckets = 8;
|
| 52 |
+
constexpr IndexType LayerStacks = 8;
|
| 53 |
+
|
| 54 |
+
// If vector instructions are enabled, we update and refresh the
|
| 55 |
+
// accumulator tile by tile such that each tile fits in the CPU's
|
| 56 |
+
// vector registers.
|
| 57 |
+
static_assert(PSQTBuckets % 8 == 0,
|
| 58 |
+
"Per feature PSQT values cannot be processed at granularity lower than 8 at a time.");
|
| 59 |
+
|
| 60 |
+
template<IndexType L1, int L2, int L3>
|
| 61 |
+
struct NetworkArchitecture {
|
| 62 |
+
static constexpr IndexType TransformedFeatureDimensions = L1;
|
| 63 |
+
static constexpr int FC_0_OUTPUTS = L2;
|
| 64 |
+
static constexpr int FC_1_OUTPUTS = L3;
|
| 65 |
+
|
| 66 |
+
Layers::AffineTransformSparseInput<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
|
| 67 |
+
Layers::SqrClippedReLU<FC_0_OUTPUTS + 1> ac_sqr_0;
|
| 68 |
+
Layers::ClippedReLU<FC_0_OUTPUTS + 1> ac_0;
|
| 69 |
+
Layers::AffineTransform<FC_0_OUTPUTS * 2, FC_1_OUTPUTS> fc_1;
|
| 70 |
+
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
|
| 71 |
+
Layers::AffineTransform<FC_1_OUTPUTS, 1> fc_2;
|
| 72 |
+
|
| 73 |
+
// Hash value embedded in the evaluation file
|
| 74 |
+
static constexpr std::uint32_t get_hash_value() {
|
| 75 |
+
// input slice hash
|
| 76 |
+
std::uint32_t hashValue = 0xEC42E90Du;
|
| 77 |
+
hashValue ^= TransformedFeatureDimensions * 2;
|
| 78 |
+
|
| 79 |
+
hashValue = decltype(fc_0)::get_hash_value(hashValue);
|
| 80 |
+
hashValue = decltype(ac_0)::get_hash_value(hashValue);
|
| 81 |
+
hashValue = decltype(fc_1)::get_hash_value(hashValue);
|
| 82 |
+
hashValue = decltype(ac_1)::get_hash_value(hashValue);
|
| 83 |
+
hashValue = decltype(fc_2)::get_hash_value(hashValue);
|
| 84 |
+
|
| 85 |
+
return hashValue;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
// Read network parameters
|
| 89 |
+
bool read_parameters(std::istream& stream) {
|
| 90 |
+
return fc_0.read_parameters(stream) && ac_0.read_parameters(stream)
|
| 91 |
+
&& fc_1.read_parameters(stream) && ac_1.read_parameters(stream)
|
| 92 |
+
&& fc_2.read_parameters(stream);
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// Write network parameters
|
| 96 |
+
bool write_parameters(std::ostream& stream) const {
|
| 97 |
+
return fc_0.write_parameters(stream) && ac_0.write_parameters(stream)
|
| 98 |
+
&& fc_1.write_parameters(stream) && ac_1.write_parameters(stream)
|
| 99 |
+
&& fc_2.write_parameters(stream);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
std::int32_t propagate(const TransformedFeatureType* transformedFeatures) const {
|
| 103 |
+
struct alignas(CacheLineSize) Buffer {
|
| 104 |
+
alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out;
|
| 105 |
+
alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType
|
| 106 |
+
ac_sqr_0_out[ceil_to_multiple<IndexType>(FC_0_OUTPUTS * 2, 32)];
|
| 107 |
+
alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out;
|
| 108 |
+
alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out;
|
| 109 |
+
alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out;
|
| 110 |
+
alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out;
|
| 111 |
+
|
| 112 |
+
Buffer() { std::memset(this, 0, sizeof(*this)); }
|
| 113 |
+
};
|
| 114 |
+
|
| 115 |
+
#if defined(__clang__) && (__APPLE__)
|
| 116 |
+
// workaround for a bug reported with xcode 12
|
| 117 |
+
static thread_local auto tlsBuffer = std::make_unique<Buffer>();
|
| 118 |
+
// Access TLS only once, cache result.
|
| 119 |
+
Buffer& buffer = *tlsBuffer;
|
| 120 |
+
#else
|
| 121 |
+
alignas(CacheLineSize) static thread_local Buffer buffer;
|
| 122 |
+
#endif
|
| 123 |
+
|
| 124 |
+
fc_0.propagate(transformedFeatures, buffer.fc_0_out);
|
| 125 |
+
ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out);
|
| 126 |
+
ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
|
| 127 |
+
std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out,
|
| 128 |
+
FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType));
|
| 129 |
+
fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out);
|
| 130 |
+
ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
|
| 131 |
+
fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
|
| 132 |
+
|
| 133 |
+
// buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in
|
| 134 |
+
// quantized form, but we want 1.0 to be equal to 600*OutputScale
|
| 135 |
+
std::int32_t fwdOut =
|
| 136 |
+
(buffer.fc_0_out[FC_0_OUTPUTS]) * (600 * OutputScale) / (127 * (1 << WeightScaleBits));
|
| 137 |
+
std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
|
| 138 |
+
|
| 139 |
+
return outputValue;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
std::size_t get_content_hash() const {
|
| 143 |
+
std::size_t h = 0;
|
| 144 |
+
hash_combine(h, fc_0.get_content_hash());
|
| 145 |
+
hash_combine(h, ac_sqr_0.get_content_hash());
|
| 146 |
+
hash_combine(h, ac_0.get_content_hash());
|
| 147 |
+
hash_combine(h, fc_1.get_content_hash());
|
| 148 |
+
hash_combine(h, ac_1.get_content_hash());
|
| 149 |
+
hash_combine(h, fc_2.get_content_hash());
|
| 150 |
+
hash_combine(h, get_hash_value());
|
| 151 |
+
return h;
|
| 152 |
+
}
|
| 153 |
+
};
|
| 154 |
+
|
| 155 |
+
} // namespace Stockfish::Eval::NNUE
|
| 156 |
+
|
| 157 |
+
template<Stockfish::Eval::NNUE::IndexType L1, int L2, int L3>
|
| 158 |
+
struct std::hash<Stockfish::Eval::NNUE::NetworkArchitecture<L1, L2, L3>> {
|
| 159 |
+
std::size_t
|
| 160 |
+
operator()(const Stockfish::Eval::NNUE::NetworkArchitecture<L1, L2, L3>& arch) const noexcept {
|
| 161 |
+
return arch.get_content_hash();
|
| 162 |
+
}
|
| 163 |
+
};
|
| 164 |
+
|
| 165 |
+
#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
|
src/nnue/nnue_common.h
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Constants used in NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_COMMON_H_INCLUDED
|
| 22 |
+
#define NNUE_COMMON_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cassert>
|
| 26 |
+
#include <cstdint>
|
| 27 |
+
#include <cstring>
|
| 28 |
+
#include <iostream>
|
| 29 |
+
#include <type_traits>
|
| 30 |
+
|
| 31 |
+
#include "../misc.h"
|
| 32 |
+
|
| 33 |
+
#if defined(USE_AVX2)
|
| 34 |
+
#include <immintrin.h>
|
| 35 |
+
|
| 36 |
+
#elif defined(USE_SSE41)
|
| 37 |
+
#include <smmintrin.h>
|
| 38 |
+
|
| 39 |
+
#elif defined(USE_SSSE3)
|
| 40 |
+
#include <tmmintrin.h>
|
| 41 |
+
|
| 42 |
+
#elif defined(USE_SSE2)
|
| 43 |
+
#include <emmintrin.h>
|
| 44 |
+
|
| 45 |
+
#elif defined(USE_NEON)
|
| 46 |
+
#include <arm_neon.h>
|
| 47 |
+
#endif
|
| 48 |
+
|
| 49 |
+
namespace Stockfish::Eval::NNUE {
|
| 50 |
+
|
| 51 |
+
using BiasType = std::int16_t;
|
| 52 |
+
using ThreatWeightType = std::int8_t;
|
| 53 |
+
using WeightType = std::int16_t;
|
| 54 |
+
using PSQTWeightType = std::int32_t;
|
| 55 |
+
using IndexType = std::uint32_t;
|
| 56 |
+
|
| 57 |
+
// Version of the evaluation file
|
| 58 |
+
constexpr std::uint32_t Version = 0x7AF32F20u;
|
| 59 |
+
|
| 60 |
+
// Constant used in evaluation value calculation
|
| 61 |
+
constexpr int OutputScale = 16;
|
| 62 |
+
constexpr int WeightScaleBits = 6;
|
| 63 |
+
|
| 64 |
+
// Size of cache line (in bytes)
|
| 65 |
+
constexpr std::size_t CacheLineSize = 64;
|
| 66 |
+
|
| 67 |
+
constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128";
|
| 68 |
+
constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1;
|
| 69 |
+
|
| 70 |
+
// SIMD width (in bytes)
|
| 71 |
+
#if defined(USE_AVX2)
|
| 72 |
+
constexpr std::size_t SimdWidth = 32;
|
| 73 |
+
|
| 74 |
+
#elif defined(USE_SSE2)
|
| 75 |
+
constexpr std::size_t SimdWidth = 16;
|
| 76 |
+
|
| 77 |
+
#elif defined(USE_NEON)
|
| 78 |
+
constexpr std::size_t SimdWidth = 16;
|
| 79 |
+
#endif
|
| 80 |
+
|
| 81 |
+
constexpr std::size_t MaxSimdWidth = 32;
|
| 82 |
+
|
| 83 |
+
// Type of input feature after conversion
|
| 84 |
+
using TransformedFeatureType = std::uint8_t;
|
| 85 |
+
|
| 86 |
+
// Round n up to be a multiple of base
|
| 87 |
+
template<typename IntType>
|
| 88 |
+
constexpr IntType ceil_to_multiple(IntType n, IntType base) {
|
| 89 |
+
return (n + base - 1) / base * base;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
// Utility to read an integer (signed or unsigned, any size)
|
| 94 |
+
// from a stream in little-endian order. We swap the byte order after the read if
|
| 95 |
+
// necessary to return a result with the byte ordering of the compiling machine.
|
| 96 |
+
template<typename IntType>
|
| 97 |
+
inline IntType read_little_endian(std::istream& stream) {
|
| 98 |
+
IntType result;
|
| 99 |
+
|
| 100 |
+
if (IsLittleEndian)
|
| 101 |
+
stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
|
| 102 |
+
else
|
| 103 |
+
{
|
| 104 |
+
std::uint8_t u[sizeof(IntType)];
|
| 105 |
+
std::make_unsigned_t<IntType> v = 0;
|
| 106 |
+
|
| 107 |
+
stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
|
| 108 |
+
for (std::size_t i = 0; i < sizeof(IntType); ++i)
|
| 109 |
+
v = (v << 8) | u[sizeof(IntType) - i - 1];
|
| 110 |
+
|
| 111 |
+
std::memcpy(&result, &v, sizeof(IntType));
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
return result;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
// Utility to write an integer (signed or unsigned, any size)
|
| 119 |
+
// to a stream in little-endian order. We swap the byte order before the write if
|
| 120 |
+
// necessary to always write in little-endian order, independently of the byte
|
| 121 |
+
// ordering of the compiling machine.
|
| 122 |
+
template<typename IntType>
|
| 123 |
+
inline void write_little_endian(std::ostream& stream, IntType value) {
|
| 124 |
+
|
| 125 |
+
if (IsLittleEndian)
|
| 126 |
+
stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
|
| 127 |
+
else
|
| 128 |
+
{
|
| 129 |
+
std::uint8_t u[sizeof(IntType)];
|
| 130 |
+
std::make_unsigned_t<IntType> v = value;
|
| 131 |
+
|
| 132 |
+
std::size_t i = 0;
|
| 133 |
+
// if constexpr to silence the warning about shift by 8
|
| 134 |
+
if constexpr (sizeof(IntType) > 1)
|
| 135 |
+
{
|
| 136 |
+
for (; i + 1 < sizeof(IntType); ++i)
|
| 137 |
+
{
|
| 138 |
+
u[i] = std::uint8_t(v);
|
| 139 |
+
v >>= 8;
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
u[i] = std::uint8_t(v);
|
| 143 |
+
|
| 144 |
+
stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
// Read integers in bulk from a little-endian stream.
|
| 150 |
+
// This reads N integers from stream s and puts them in array out.
|
| 151 |
+
template<typename IntType>
|
| 152 |
+
inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
|
| 153 |
+
if (IsLittleEndian)
|
| 154 |
+
stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
|
| 155 |
+
else
|
| 156 |
+
for (std::size_t i = 0; i < count; ++i)
|
| 157 |
+
out[i] = read_little_endian<IntType>(stream);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
// Write integers in bulk to a little-endian stream.
|
| 162 |
+
// This takes N integers from array values and writes them on stream s.
|
| 163 |
+
template<typename IntType>
|
| 164 |
+
inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
|
| 165 |
+
if (IsLittleEndian)
|
| 166 |
+
stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
|
| 167 |
+
else
|
| 168 |
+
for (std::size_t i = 0; i < count; ++i)
|
| 169 |
+
write_little_endian<IntType>(stream, values[i]);
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
// Read N signed integers from the stream s, putting them in the array out.
|
| 173 |
+
// The stream is assumed to be compressed using the signed LEB128 format.
|
| 174 |
+
// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
|
| 175 |
+
template<typename BufType, typename IntType, std::size_t Count>
|
| 176 |
+
inline void read_leb_128_detail(std::istream& stream,
|
| 177 |
+
std::array<IntType, Count>& out,
|
| 178 |
+
std::uint32_t& bytes_left,
|
| 179 |
+
BufType& buf,
|
| 180 |
+
std::uint32_t& buf_pos) {
|
| 181 |
+
|
| 182 |
+
static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
|
| 183 |
+
static_assert(sizeof(IntType) <= 4, "Not implemented for types larger than 32 bit");
|
| 184 |
+
|
| 185 |
+
IntType result = 0;
|
| 186 |
+
size_t shift = 0, i = 0;
|
| 187 |
+
while (i < Count)
|
| 188 |
+
{
|
| 189 |
+
if (buf_pos == buf.size())
|
| 190 |
+
{
|
| 191 |
+
stream.read(reinterpret_cast<char*>(buf.data()),
|
| 192 |
+
std::min(std::size_t(bytes_left), buf.size()));
|
| 193 |
+
buf_pos = 0;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
std::uint8_t byte = buf[buf_pos++];
|
| 197 |
+
--bytes_left;
|
| 198 |
+
result |= (byte & 0x7f) << (shift % 32);
|
| 199 |
+
shift += 7;
|
| 200 |
+
|
| 201 |
+
if ((byte & 0x80) == 0)
|
| 202 |
+
{
|
| 203 |
+
out[i++] = (shift >= 32 || (byte & 0x40) == 0) ? result : result | ~((1 << shift) - 1);
|
| 204 |
+
result = 0;
|
| 205 |
+
shift = 0;
|
| 206 |
+
}
|
| 207 |
+
}
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
template<typename... Arrays>
|
| 211 |
+
inline void read_leb_128(std::istream& stream, Arrays&... outs) {
|
| 212 |
+
// Check the presence of our LEB128 magic string
|
| 213 |
+
char leb128MagicString[Leb128MagicStringSize];
|
| 214 |
+
stream.read(leb128MagicString, Leb128MagicStringSize);
|
| 215 |
+
if (stream.fail() || strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) != 0)
|
| 216 |
+
{
|
| 217 |
+
stream.setstate(std::ios::failbit);
|
| 218 |
+
return;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
auto bytes_left = read_little_endian<std::uint32_t>(stream);
|
| 222 |
+
std::array<std::uint8_t, 8192> buf;
|
| 223 |
+
std::uint32_t buf_pos = std::uint32_t(buf.size());
|
| 224 |
+
|
| 225 |
+
(read_leb_128_detail(stream, outs, bytes_left, buf, buf_pos), ...);
|
| 226 |
+
|
| 227 |
+
if (bytes_left != 0)
|
| 228 |
+
stream.setstate(std::ios::failbit);
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
// Write signed integers to a stream with LEB128 compression.
|
| 233 |
+
// This takes N integers from array values, compresses them with
|
| 234 |
+
// the LEB128 algorithm and writes the result on the stream s.
|
| 235 |
+
// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
|
| 236 |
+
template<typename IntType, std::size_t Count>
|
| 237 |
+
inline void write_leb_128(std::ostream& stream, const std::array<IntType, Count>& values) {
|
| 238 |
+
|
| 239 |
+
// Write our LEB128 magic string
|
| 240 |
+
stream.write(Leb128MagicString, Leb128MagicStringSize);
|
| 241 |
+
|
| 242 |
+
static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
|
| 243 |
+
|
| 244 |
+
std::uint32_t byte_count = 0;
|
| 245 |
+
for (std::size_t i = 0; i < Count; ++i)
|
| 246 |
+
{
|
| 247 |
+
IntType value = values[i];
|
| 248 |
+
std::uint8_t byte;
|
| 249 |
+
do
|
| 250 |
+
{
|
| 251 |
+
byte = value & 0x7f;
|
| 252 |
+
value >>= 7;
|
| 253 |
+
++byte_count;
|
| 254 |
+
} while ((byte & 0x40) == 0 ? value != 0 : value != -1);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
write_little_endian(stream, byte_count);
|
| 258 |
+
|
| 259 |
+
const std::uint32_t BUF_SIZE = 4096;
|
| 260 |
+
std::uint8_t buf[BUF_SIZE];
|
| 261 |
+
std::uint32_t buf_pos = 0;
|
| 262 |
+
|
| 263 |
+
auto flush = [&]() {
|
| 264 |
+
if (buf_pos > 0)
|
| 265 |
+
{
|
| 266 |
+
stream.write(reinterpret_cast<char*>(buf), buf_pos);
|
| 267 |
+
buf_pos = 0;
|
| 268 |
+
}
|
| 269 |
+
};
|
| 270 |
+
|
| 271 |
+
auto write = [&](std::uint8_t b) {
|
| 272 |
+
buf[buf_pos++] = b;
|
| 273 |
+
if (buf_pos == BUF_SIZE)
|
| 274 |
+
flush();
|
| 275 |
+
};
|
| 276 |
+
|
| 277 |
+
for (std::size_t i = 0; i < Count; ++i)
|
| 278 |
+
{
|
| 279 |
+
IntType value = values[i];
|
| 280 |
+
while (true)
|
| 281 |
+
{
|
| 282 |
+
std::uint8_t byte = value & 0x7f;
|
| 283 |
+
value >>= 7;
|
| 284 |
+
if ((byte & 0x40) == 0 ? value == 0 : value == -1)
|
| 285 |
+
{
|
| 286 |
+
write(byte);
|
| 287 |
+
break;
|
| 288 |
+
}
|
| 289 |
+
write(byte | 0x80);
|
| 290 |
+
}
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
flush();
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
} // namespace Stockfish::Eval::NNUE
|
| 297 |
+
|
| 298 |
+
#endif // #ifndef NNUE_COMMON_H_INCLUDED
|
src/nnue/nnue_feature_transformer.h
ADDED
|
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// A class that converts the input features of the NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
| 22 |
+
#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cstdint>
|
| 26 |
+
#include <cstring>
|
| 27 |
+
#include <iosfwd>
|
| 28 |
+
#include <iterator>
|
| 29 |
+
|
| 30 |
+
#include "../position.h"
|
| 31 |
+
#include "../types.h"
|
| 32 |
+
#include "nnue_accumulator.h"
|
| 33 |
+
#include "nnue_architecture.h"
|
| 34 |
+
#include "nnue_common.h"
|
| 35 |
+
#include "simd.h"
|
| 36 |
+
|
| 37 |
+
namespace Stockfish::Eval::NNUE {
|
| 38 |
+
|
| 39 |
+
// Returns the inverse of a permutation
|
| 40 |
+
template<std::size_t Len>
|
| 41 |
+
constexpr std::array<std::size_t, Len>
|
| 42 |
+
invert_permutation(const std::array<std::size_t, Len>& order) {
|
| 43 |
+
std::array<std::size_t, Len> inverse{};
|
| 44 |
+
for (std::size_t i = 0; i < order.size(); i++)
|
| 45 |
+
inverse[order[i]] = i;
|
| 46 |
+
return inverse;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
// Divide a byte region of size TotalSize to chunks of size
|
| 50 |
+
// BlockSize, and permute the blocks by a given order
|
| 51 |
+
template<std::size_t BlockSize, typename T, std::size_t N, std::size_t OrderSize>
|
| 52 |
+
void permute(std::array<T, N>& data, const std::array<std::size_t, OrderSize>& order) {
|
| 53 |
+
constexpr std::size_t TotalSize = N * sizeof(T);
|
| 54 |
+
|
| 55 |
+
static_assert(TotalSize % (BlockSize * OrderSize) == 0,
|
| 56 |
+
"ChunkSize * OrderSize must perfectly divide TotalSize");
|
| 57 |
+
|
| 58 |
+
constexpr std::size_t ProcessChunkSize = BlockSize * OrderSize;
|
| 59 |
+
|
| 60 |
+
std::array<std::byte, ProcessChunkSize> buffer{};
|
| 61 |
+
|
| 62 |
+
std::byte* const bytes = reinterpret_cast<std::byte*>(data.data());
|
| 63 |
+
|
| 64 |
+
for (std::size_t i = 0; i < TotalSize; i += ProcessChunkSize)
|
| 65 |
+
{
|
| 66 |
+
std::byte* const values = &bytes[i];
|
| 67 |
+
|
| 68 |
+
for (std::size_t j = 0; j < OrderSize; j++)
|
| 69 |
+
{
|
| 70 |
+
auto* const buffer_chunk = &buffer[j * BlockSize];
|
| 71 |
+
auto* const value_chunk = &values[order[j] * BlockSize];
|
| 72 |
+
|
| 73 |
+
std::copy(value_chunk, value_chunk + BlockSize, buffer_chunk);
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
std::copy(std::begin(buffer), std::end(buffer), values);
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
// Input feature converter
|
| 81 |
+
template<IndexType TransformedFeatureDimensions>
|
| 82 |
+
class FeatureTransformer {
|
| 83 |
+
static constexpr bool UseThreats =
|
| 84 |
+
(TransformedFeatureDimensions == TransformedFeatureDimensionsBig);
|
| 85 |
+
// Number of output dimensions for one side
|
| 86 |
+
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
|
| 87 |
+
|
| 88 |
+
public:
|
| 89 |
+
// Output type
|
| 90 |
+
using OutputType = TransformedFeatureType;
|
| 91 |
+
|
| 92 |
+
// Number of input/output dimensions
|
| 93 |
+
static constexpr IndexType InputDimensions = PSQFeatureSet::Dimensions;
|
| 94 |
+
static constexpr IndexType ThreatInputDimensions = ThreatFeatureSet::Dimensions;
|
| 95 |
+
static constexpr IndexType TotalInputDimensions =
|
| 96 |
+
InputDimensions + (UseThreats ? ThreatInputDimensions : 0);
|
| 97 |
+
static constexpr IndexType OutputDimensions = HalfDimensions;
|
| 98 |
+
|
| 99 |
+
// Size of forward propagation buffer
|
| 100 |
+
static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType);
|
| 101 |
+
|
| 102 |
+
// Store the order by which 128-bit blocks of a 1024-bit data must
|
| 103 |
+
// be permuted so that calling packus on adjacent vectors of 16-bit
|
| 104 |
+
// integers loaded from the data results in the pre-permutation order
|
| 105 |
+
static constexpr auto PackusEpi16Order = []() -> std::array<std::size_t, 8> {
|
| 106 |
+
#if defined(USE_AVX512)
|
| 107 |
+
// _mm512_packus_epi16 after permutation:
|
| 108 |
+
// | 0 | 2 | 4 | 6 | // Vector 0
|
| 109 |
+
// | 1 | 3 | 5 | 7 | // Vector 1
|
| 110 |
+
// | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | // Packed Result
|
| 111 |
+
return {0, 2, 4, 6, 1, 3, 5, 7};
|
| 112 |
+
#elif defined(USE_AVX2)
|
| 113 |
+
// _mm256_packus_epi16 after permutation:
|
| 114 |
+
// | 0 | 2 | | 4 | 6 | // Vector 0, 2
|
| 115 |
+
// | 1 | 3 | | 5 | 7 | // Vector 1, 3
|
| 116 |
+
// | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | // Packed Result
|
| 117 |
+
return {0, 2, 1, 3, 4, 6, 5, 7};
|
| 118 |
+
#else
|
| 119 |
+
return {0, 1, 2, 3, 4, 5, 6, 7};
|
| 120 |
+
#endif
|
| 121 |
+
}();
|
| 122 |
+
|
| 123 |
+
static constexpr auto InversePackusEpi16Order = invert_permutation(PackusEpi16Order);
|
| 124 |
+
|
| 125 |
+
static constexpr std::uint32_t combine_hash(std::initializer_list<std::uint32_t> hashes) {
|
| 126 |
+
std::uint32_t hash = 0;
|
| 127 |
+
for (const auto component_hash : hashes)
|
| 128 |
+
{
|
| 129 |
+
hash = (hash << 1) | (hash >> 31);
|
| 130 |
+
hash ^= component_hash;
|
| 131 |
+
}
|
| 132 |
+
return hash;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
// Hash value embedded in the evaluation file
|
| 136 |
+
static constexpr std::uint32_t get_hash_value() {
|
| 137 |
+
return (UseThreats ? combine_hash({ThreatFeatureSet::HashValue, PSQFeatureSet::HashValue})
|
| 138 |
+
: PSQFeatureSet::HashValue)
|
| 139 |
+
^ (OutputDimensions * 2);
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
void permute_weights() {
|
| 143 |
+
permute<16>(biases, PackusEpi16Order);
|
| 144 |
+
permute<16>(weights, PackusEpi16Order);
|
| 145 |
+
|
| 146 |
+
if constexpr (UseThreats)
|
| 147 |
+
permute<8>(threatWeights, PackusEpi16Order);
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
void unpermute_weights() {
|
| 151 |
+
permute<16>(biases, InversePackusEpi16Order);
|
| 152 |
+
permute<16>(weights, InversePackusEpi16Order);
|
| 153 |
+
|
| 154 |
+
if constexpr (UseThreats)
|
| 155 |
+
permute<8>(threatWeights, InversePackusEpi16Order);
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
// Read network parameters
|
| 159 |
+
bool read_parameters(std::istream& stream) {
|
| 160 |
+
const std::streampos beginPos = stream.tellg();
|
| 161 |
+
|
| 162 |
+
if constexpr (UseThreats)
|
| 163 |
+
{
|
| 164 |
+
// Primary path: Full_Threats + HalfKAv2_hm^ export layout
|
| 165 |
+
read_leb_128(stream, biases);
|
| 166 |
+
read_little_endian<ThreatWeightType>(stream, threatWeights.data(),
|
| 167 |
+
ThreatInputDimensions * HalfDimensions);
|
| 168 |
+
read_leb_128(stream, weights);
|
| 169 |
+
read_leb_128(stream, threatPsqtWeights);
|
| 170 |
+
read_leb_128(stream, psqtWeights);
|
| 171 |
+
|
| 172 |
+
if (stream.fail())
|
| 173 |
+
{
|
| 174 |
+
// Fallback path: HalfKAv2_hm^ only export layout (no threat tensors)
|
| 175 |
+
stream.clear();
|
| 176 |
+
stream.seekg(beginPos);
|
| 177 |
+
std::fill(threatWeights.begin(), threatWeights.end(), 0);
|
| 178 |
+
std::fill(threatPsqtWeights.begin(), threatPsqtWeights.end(), 0);
|
| 179 |
+
|
| 180 |
+
read_leb_128(stream, biases);
|
| 181 |
+
read_leb_128(stream, weights);
|
| 182 |
+
read_leb_128(stream, psqtWeights);
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
else
|
| 186 |
+
{
|
| 187 |
+
read_leb_128(stream, biases);
|
| 188 |
+
read_leb_128(stream, weights);
|
| 189 |
+
read_leb_128(stream, psqtWeights);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
if (stream.fail())
|
| 193 |
+
return false;
|
| 194 |
+
|
| 195 |
+
permute_weights();
|
| 196 |
+
return true;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// Write network parameters
|
| 200 |
+
bool write_parameters(std::ostream& stream) const {
|
| 201 |
+
std::unique_ptr<FeatureTransformer> copy = std::make_unique<FeatureTransformer>(*this);
|
| 202 |
+
|
| 203 |
+
copy->unpermute_weights();
|
| 204 |
+
|
| 205 |
+
write_leb_128<BiasType>(stream, copy->biases);
|
| 206 |
+
|
| 207 |
+
if constexpr (UseThreats)
|
| 208 |
+
{
|
| 209 |
+
write_little_endian<ThreatWeightType>(stream, copy->threatWeights.data(),
|
| 210 |
+
ThreatInputDimensions * HalfDimensions);
|
| 211 |
+
write_leb_128<WeightType>(stream, copy->weights);
|
| 212 |
+
|
| 213 |
+
auto combinedPsqtWeights =
|
| 214 |
+
std::make_unique<std::array<PSQTWeightType, TotalInputDimensions * PSQTBuckets>>();
|
| 215 |
+
|
| 216 |
+
std::copy(std::begin(copy->threatPsqtWeights),
|
| 217 |
+
std::begin(copy->threatPsqtWeights) + ThreatInputDimensions * PSQTBuckets,
|
| 218 |
+
combinedPsqtWeights->begin());
|
| 219 |
+
|
| 220 |
+
std::copy(std::begin(copy->psqtWeights),
|
| 221 |
+
std::begin(copy->psqtWeights) + InputDimensions * PSQTBuckets,
|
| 222 |
+
combinedPsqtWeights->begin() + ThreatInputDimensions * PSQTBuckets);
|
| 223 |
+
|
| 224 |
+
write_leb_128<PSQTWeightType>(stream, *combinedPsqtWeights);
|
| 225 |
+
}
|
| 226 |
+
else
|
| 227 |
+
{
|
| 228 |
+
write_leb_128<WeightType>(stream, copy->weights);
|
| 229 |
+
write_leb_128<PSQTWeightType>(stream, copy->psqtWeights);
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
return !stream.fail();
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
std::size_t get_content_hash() const {
|
| 236 |
+
std::size_t h = 0;
|
| 237 |
+
|
| 238 |
+
hash_combine(h, get_raw_data_hash(biases));
|
| 239 |
+
hash_combine(h, get_raw_data_hash(weights));
|
| 240 |
+
hash_combine(h, get_raw_data_hash(psqtWeights));
|
| 241 |
+
|
| 242 |
+
if constexpr (UseThreats)
|
| 243 |
+
{
|
| 244 |
+
hash_combine(h, get_raw_data_hash(threatWeights));
|
| 245 |
+
hash_combine(h, get_raw_data_hash(threatPsqtWeights));
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
hash_combine(h, get_hash_value());
|
| 249 |
+
|
| 250 |
+
return h;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
// Convert input features
|
| 254 |
+
std::int32_t transform(const Position& pos,
|
| 255 |
+
AccumulatorStack& accumulatorStack,
|
| 256 |
+
AccumulatorCaches::Cache<HalfDimensions>& cache,
|
| 257 |
+
OutputType* output,
|
| 258 |
+
int bucket) const {
|
| 259 |
+
|
| 260 |
+
using namespace SIMD;
|
| 261 |
+
accumulatorStack.evaluate(pos, *this, cache);
|
| 262 |
+
const auto& accumulatorState = accumulatorStack.latest<PSQFeatureSet>();
|
| 263 |
+
const auto& threatAccumulatorState = accumulatorStack.latest<ThreatFeatureSet>();
|
| 264 |
+
|
| 265 |
+
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
| 266 |
+
const auto& psqtAccumulation = (accumulatorState.acc<HalfDimensions>()).psqtAccumulation;
|
| 267 |
+
auto psqt =
|
| 268 |
+
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]);
|
| 269 |
+
|
| 270 |
+
if constexpr (UseThreats)
|
| 271 |
+
{
|
| 272 |
+
const auto& threatPsqtAccumulation =
|
| 273 |
+
(threatAccumulatorState.acc<HalfDimensions>()).psqtAccumulation;
|
| 274 |
+
psqt = (psqt + threatPsqtAccumulation[perspectives[0]][bucket]
|
| 275 |
+
- threatPsqtAccumulation[perspectives[1]][bucket])
|
| 276 |
+
/ 2;
|
| 277 |
+
}
|
| 278 |
+
else
|
| 279 |
+
psqt /= 2;
|
| 280 |
+
|
| 281 |
+
const auto& accumulation = (accumulatorState.acc<HalfDimensions>()).accumulation;
|
| 282 |
+
const auto& threatAccumulation =
|
| 283 |
+
(threatAccumulatorState.acc<HalfDimensions>()).accumulation;
|
| 284 |
+
|
| 285 |
+
for (IndexType p = 0; p < 2; ++p)
|
| 286 |
+
{
|
| 287 |
+
const IndexType offset = (HalfDimensions / 2) * p;
|
| 288 |
+
|
| 289 |
+
#if defined(VECTOR)
|
| 290 |
+
|
| 291 |
+
constexpr IndexType OutputChunkSize = MaxChunkSize;
|
| 292 |
+
static_assert((HalfDimensions / 2) % OutputChunkSize == 0);
|
| 293 |
+
constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize;
|
| 294 |
+
|
| 295 |
+
const vec_t Zero = vec_zero();
|
| 296 |
+
const vec_t One = vec_set_16(255);
|
| 297 |
+
|
| 298 |
+
const vec_t* in0 = reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][0]));
|
| 299 |
+
const vec_t* in1 =
|
| 300 |
+
reinterpret_cast<const vec_t*>(&(accumulation[perspectives[p]][HalfDimensions / 2]));
|
| 301 |
+
vec_t* out = reinterpret_cast<vec_t*>(output + offset);
|
| 302 |
+
|
| 303 |
+
// Per the NNUE architecture, here we want to multiply pairs of
|
| 304 |
+
// clipped elements and divide the product by 128. To do this,
|
| 305 |
+
// we can naively perform min/max operation to clip each of the
|
| 306 |
+
// four int16 vectors, mullo pairs together, then pack them into
|
| 307 |
+
// one int8 vector. However, there exists a faster way.
|
| 308 |
+
|
| 309 |
+
// The idea here is to use the implicit clipping from packus to
|
| 310 |
+
// save us two vec_max_16 instructions. This clipping works due
|
| 311 |
+
// to the fact that any int16 integer below zero will be zeroed
|
| 312 |
+
// on packus.
|
| 313 |
+
|
| 314 |
+
// Consider the case where the second element is negative.
|
| 315 |
+
// If we do standard clipping, that element will be zero, which
|
| 316 |
+
// means our pairwise product is zero. If we perform packus and
|
| 317 |
+
// remove the lower-side clip for the second element, then our
|
| 318 |
+
// product before packus will be negative, and is zeroed on pack.
|
| 319 |
+
// The two operation produce equivalent results, but the second
|
| 320 |
+
// one (using packus) saves one max operation per pair.
|
| 321 |
+
|
| 322 |
+
// But here we run into a problem: mullo does not preserve the
|
| 323 |
+
// sign of the multiplication. We can get around this by doing
|
| 324 |
+
// mulhi, which keeps the sign. But that requires an additional
|
| 325 |
+
// tweak.
|
| 326 |
+
|
| 327 |
+
// mulhi cuts off the last 16 bits of the resulting product,
|
| 328 |
+
// which is the same as performing a rightward shift of 16 bits.
|
| 329 |
+
// We can use this to our advantage. Recall that we want to
|
| 330 |
+
// divide the final product by 128, which is equivalent to a
|
| 331 |
+
// 7-bit right shift. Intuitively, if we shift the clipped
|
| 332 |
+
// value left by 9, and perform mulhi, which shifts the product
|
| 333 |
+
// right by 16 bits, then we will net a right shift of 7 bits.
|
| 334 |
+
// However, this won't work as intended. Since we clip the
|
| 335 |
+
// values to have a maximum value of 127, shifting it by 9 bits
|
| 336 |
+
// might occupy the signed bit, resulting in some positive
|
| 337 |
+
// values being interpreted as negative after the shift.
|
| 338 |
+
|
| 339 |
+
// There is a way, however, to get around this limitation. When
|
| 340 |
+
// loading the network, scale accumulator weights and biases by
|
| 341 |
+
// 2. To get the same pairwise multiplication result as before,
|
| 342 |
+
// we need to divide the product by 128 * 2 * 2 = 512, which
|
| 343 |
+
// amounts to a right shift of 9 bits. So now we only have to
|
| 344 |
+
// shift left by 7 bits, perform mulhi (shifts right by 16 bits)
|
| 345 |
+
// and net a 9 bit right shift. Since we scaled everything by
|
| 346 |
+
// two, the values are clipped at 127 * 2 = 254, which occupies
|
| 347 |
+
// 8 bits. Shifting it by 7 bits left will no longer occupy the
|
| 348 |
+
// signed bit, so we are safe.
|
| 349 |
+
|
| 350 |
+
// Note that on NEON processors, we shift left by 6 instead
|
| 351 |
+
// because the instruction "vqdmulhq_s16" also doubles the
|
| 352 |
+
// return value after the multiplication, adding an extra shift
|
| 353 |
+
// to the left by 1, so we compensate by shifting less before
|
| 354 |
+
// the multiplication.
|
| 355 |
+
|
| 356 |
+
constexpr int shift =
|
| 357 |
+
#if defined(USE_SSE2)
|
| 358 |
+
7;
|
| 359 |
+
#else
|
| 360 |
+
6;
|
| 361 |
+
#endif
|
| 362 |
+
if constexpr (UseThreats)
|
| 363 |
+
{
|
| 364 |
+
const vec_t* tin0 =
|
| 365 |
+
reinterpret_cast<const vec_t*>(&(threatAccumulation[perspectives[p]][0]));
|
| 366 |
+
const vec_t* tin1 = reinterpret_cast<const vec_t*>(
|
| 367 |
+
&(threatAccumulation[perspectives[p]][HalfDimensions / 2]));
|
| 368 |
+
for (IndexType j = 0; j < NumOutputChunks; ++j)
|
| 369 |
+
{
|
| 370 |
+
const vec_t acc0a = vec_add_16(in0[j * 2 + 0], tin0[j * 2 + 0]);
|
| 371 |
+
const vec_t acc0b = vec_add_16(in0[j * 2 + 1], tin0[j * 2 + 1]);
|
| 372 |
+
const vec_t acc1a = vec_add_16(in1[j * 2 + 0], tin1[j * 2 + 0]);
|
| 373 |
+
const vec_t acc1b = vec_add_16(in1[j * 2 + 1], tin1[j * 2 + 1]);
|
| 374 |
+
|
| 375 |
+
const vec_t sum0a =
|
| 376 |
+
vec_slli_16(vec_max_16(vec_min_16(acc0a, One), Zero), shift);
|
| 377 |
+
const vec_t sum0b =
|
| 378 |
+
vec_slli_16(vec_max_16(vec_min_16(acc0b, One), Zero), shift);
|
| 379 |
+
const vec_t sum1a = vec_min_16(acc1a, One);
|
| 380 |
+
const vec_t sum1b = vec_min_16(acc1b, One);
|
| 381 |
+
|
| 382 |
+
const vec_t pa = vec_mulhi_16(sum0a, sum1a);
|
| 383 |
+
const vec_t pb = vec_mulhi_16(sum0b, sum1b);
|
| 384 |
+
|
| 385 |
+
out[j] = vec_packus_16(pa, pb);
|
| 386 |
+
}
|
| 387 |
+
}
|
| 388 |
+
else
|
| 389 |
+
{
|
| 390 |
+
for (IndexType j = 0; j < NumOutputChunks; ++j)
|
| 391 |
+
{
|
| 392 |
+
const vec_t sum0a =
|
| 393 |
+
vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift);
|
| 394 |
+
const vec_t sum0b =
|
| 395 |
+
vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift);
|
| 396 |
+
const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One);
|
| 397 |
+
const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One);
|
| 398 |
+
|
| 399 |
+
const vec_t pa = vec_mulhi_16(sum0a, sum1a);
|
| 400 |
+
const vec_t pb = vec_mulhi_16(sum0b, sum1b);
|
| 401 |
+
|
| 402 |
+
out[j] = vec_packus_16(pa, pb);
|
| 403 |
+
}
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
#else
|
| 407 |
+
|
| 408 |
+
for (IndexType j = 0; j < HalfDimensions / 2; ++j)
|
| 409 |
+
{
|
| 410 |
+
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
|
| 411 |
+
BiasType sum1 =
|
| 412 |
+
accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
|
| 413 |
+
|
| 414 |
+
if constexpr (UseThreats)
|
| 415 |
+
{
|
| 416 |
+
sum0 += threatAccumulation[static_cast<int>(perspectives[p])][j + 0];
|
| 417 |
+
sum1 +=
|
| 418 |
+
threatAccumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
sum0 = std::clamp<BiasType>(sum0, 0, 255);
|
| 422 |
+
sum1 = std::clamp<BiasType>(sum1, 0, 255);
|
| 423 |
+
|
| 424 |
+
output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 512);
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
#endif
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
return psqt;
|
| 431 |
+
} // end of function transform()
|
| 432 |
+
|
| 433 |
+
alignas(CacheLineSize) std::array<BiasType, HalfDimensions> biases;
|
| 434 |
+
alignas(CacheLineSize) std::array<WeightType, HalfDimensions * InputDimensions> weights;
|
| 435 |
+
alignas(CacheLineSize)
|
| 436 |
+
std::array<ThreatWeightType,
|
| 437 |
+
UseThreats ? HalfDimensions * ThreatInputDimensions : 0> threatWeights;
|
| 438 |
+
alignas(CacheLineSize) std::array<PSQTWeightType, InputDimensions * PSQTBuckets> psqtWeights;
|
| 439 |
+
alignas(CacheLineSize)
|
| 440 |
+
std::array<PSQTWeightType,
|
| 441 |
+
UseThreats ? ThreatInputDimensions * PSQTBuckets : 0> threatPsqtWeights;
|
| 442 |
+
};
|
| 443 |
+
|
| 444 |
+
} // namespace Stockfish::Eval::NNUE
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
template<Stockfish::Eval::NNUE::IndexType TransformedFeatureDimensions>
|
| 448 |
+
struct std::hash<Stockfish::Eval::NNUE::FeatureTransformer<TransformedFeatureDimensions>> {
|
| 449 |
+
std::size_t
|
| 450 |
+
operator()(const Stockfish::Eval::NNUE::FeatureTransformer<TransformedFeatureDimensions>& ft)
|
| 451 |
+
const noexcept {
|
| 452 |
+
return ft.get_content_hash();
|
| 453 |
+
}
|
| 454 |
+
};
|
| 455 |
+
|
| 456 |
+
#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
|
src/nnue/nnue_misc.cpp
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
// Code for calculating NNUE evaluation function
|
| 20 |
+
|
| 21 |
+
#include "nnue_misc.h"
|
| 22 |
+
|
| 23 |
+
#include <cmath>
|
| 24 |
+
#include <cstdlib>
|
| 25 |
+
#include <cstring>
|
| 26 |
+
#include <iomanip>
|
| 27 |
+
#include <iosfwd>
|
| 28 |
+
#include <iostream>
|
| 29 |
+
#include <sstream>
|
| 30 |
+
#include <string_view>
|
| 31 |
+
#include <tuple>
|
| 32 |
+
|
| 33 |
+
#include "../position.h"
|
| 34 |
+
#include "../types.h"
|
| 35 |
+
#include "../uci.h"
|
| 36 |
+
#include "network.h"
|
| 37 |
+
#include "nnue_accumulator.h"
|
| 38 |
+
|
| 39 |
+
namespace Stockfish::Eval::NNUE {
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
namespace {
|
| 46 |
+
// Converts a Value into (centi)pawns and writes it in a buffer.
|
| 47 |
+
// The buffer must have capacity for at least 5 chars.
|
| 48 |
+
void format_cp_compact(Value v, char* buffer, const Position& pos) {
|
| 49 |
+
|
| 50 |
+
buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
|
| 51 |
+
|
| 52 |
+
int cp = std::abs(UCIEngine::to_cp(v, pos));
|
| 53 |
+
if (cp >= 10000)
|
| 54 |
+
{
|
| 55 |
+
buffer[1] = '0' + cp / 10000;
|
| 56 |
+
cp %= 10000;
|
| 57 |
+
buffer[2] = '0' + cp / 1000;
|
| 58 |
+
cp %= 1000;
|
| 59 |
+
buffer[3] = '0' + cp / 100;
|
| 60 |
+
buffer[4] = ' ';
|
| 61 |
+
}
|
| 62 |
+
else if (cp >= 1000)
|
| 63 |
+
{
|
| 64 |
+
buffer[1] = '0' + cp / 1000;
|
| 65 |
+
cp %= 1000;
|
| 66 |
+
buffer[2] = '0' + cp / 100;
|
| 67 |
+
cp %= 100;
|
| 68 |
+
buffer[3] = '.';
|
| 69 |
+
buffer[4] = '0' + cp / 10;
|
| 70 |
+
}
|
| 71 |
+
else
|
| 72 |
+
{
|
| 73 |
+
buffer[1] = '0' + cp / 100;
|
| 74 |
+
cp %= 100;
|
| 75 |
+
buffer[2] = '.';
|
| 76 |
+
buffer[3] = '0' + cp / 10;
|
| 77 |
+
cp %= 10;
|
| 78 |
+
buffer[4] = '0' + cp / 1;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
// Converts a Value into pawns, always keeping two decimals
|
| 84 |
+
void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) {
|
| 85 |
+
|
| 86 |
+
const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos));
|
| 87 |
+
|
| 88 |
+
stream << (v < 0 ? '-'
|
| 89 |
+
: v > 0 ? '+'
|
| 90 |
+
: ' ')
|
| 91 |
+
<< std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns;
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
// Returns a string with the value of each piece on a board,
|
| 97 |
+
// and a table for (PSQT, Layers) values bucket by bucket.
|
| 98 |
+
std::string
|
| 99 |
+
trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
|
| 100 |
+
|
| 101 |
+
std::stringstream ss;
|
| 102 |
+
|
| 103 |
+
char board[3 * 8 + 1][8 * 8 + 2];
|
| 104 |
+
std::memset(board, ' ', sizeof(board));
|
| 105 |
+
for (int row = 0; row < 3 * 8 + 1; ++row)
|
| 106 |
+
board[row][8 * 8 + 1] = '\0';
|
| 107 |
+
|
| 108 |
+
// A lambda to output one box of the board
|
| 109 |
+
auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) {
|
| 110 |
+
const int x = int(file) * 8;
|
| 111 |
+
const int y = (7 - int(rank)) * 3;
|
| 112 |
+
for (int i = 1; i < 8; ++i)
|
| 113 |
+
board[y][x + i] = board[y + 3][x + i] = '-';
|
| 114 |
+
for (int i = 1; i < 3; ++i)
|
| 115 |
+
board[y + i][x] = board[y + i][x + 8] = '|';
|
| 116 |
+
board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+';
|
| 117 |
+
if (pc != NO_PIECE)
|
| 118 |
+
board[y + 1][x + 4] = PieceToChar[pc];
|
| 119 |
+
if (is_valid(value))
|
| 120 |
+
format_cp_compact(value, &board[y + 2][x + 2], pos);
|
| 121 |
+
};
|
| 122 |
+
|
| 123 |
+
auto accumulators = std::make_unique<AccumulatorStack>();
|
| 124 |
+
|
| 125 |
+
// We estimate the value of each piece by doing a differential evaluation from
|
| 126 |
+
// the current base eval, simulating the removal of the piece from its square.
|
| 127 |
+
auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches.big);
|
| 128 |
+
Value base = psqt + positional;
|
| 129 |
+
base = pos.side_to_move() == WHITE ? base : -base;
|
| 130 |
+
|
| 131 |
+
for (File f = FILE_A; f <= FILE_H; ++f)
|
| 132 |
+
for (Rank r = RANK_1; r <= RANK_8; ++r)
|
| 133 |
+
{
|
| 134 |
+
Square sq = make_square(f, r);
|
| 135 |
+
Piece pc = pos.piece_on(sq);
|
| 136 |
+
Value v = VALUE_NONE;
|
| 137 |
+
|
| 138 |
+
if (pc != NO_PIECE && type_of(pc) != KING)
|
| 139 |
+
{
|
| 140 |
+
pos.remove_piece(sq);
|
| 141 |
+
|
| 142 |
+
accumulators->reset();
|
| 143 |
+
std::tie(psqt, positional) = networks.big.evaluate(pos, *accumulators, caches.big);
|
| 144 |
+
Value eval = psqt + positional;
|
| 145 |
+
eval = pos.side_to_move() == WHITE ? eval : -eval;
|
| 146 |
+
v = base - eval;
|
| 147 |
+
|
| 148 |
+
pos.put_piece(pc, sq);
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
writeSquare(f, r, pc, v);
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
ss << " NNUE derived piece values:\n";
|
| 155 |
+
for (int row = 0; row < 3 * 8 + 1; ++row)
|
| 156 |
+
ss << board[row] << '\n';
|
| 157 |
+
ss << '\n';
|
| 158 |
+
|
| 159 |
+
accumulators->reset();
|
| 160 |
+
auto t = networks.big.trace_evaluate(pos, *accumulators, caches.big);
|
| 161 |
+
|
| 162 |
+
ss << " NNUE network contributions "
|
| 163 |
+
<< (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
|
| 164 |
+
<< "+------------+------------+------------+------------+\n"
|
| 165 |
+
<< "| Bucket | Material | Positional | Total |\n"
|
| 166 |
+
<< "| | (PSQT) | (Layers) | |\n"
|
| 167 |
+
<< "+------------+------------+------------+------------+\n";
|
| 168 |
+
|
| 169 |
+
for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
|
| 170 |
+
{
|
| 171 |
+
ss << "| " << bucket << " " //
|
| 172 |
+
<< " | ";
|
| 173 |
+
format_cp_aligned_dot(t.psqt[bucket], ss, pos);
|
| 174 |
+
ss << " " //
|
| 175 |
+
<< " | ";
|
| 176 |
+
format_cp_aligned_dot(t.positional[bucket], ss, pos);
|
| 177 |
+
ss << " " //
|
| 178 |
+
<< " | ";
|
| 179 |
+
format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos);
|
| 180 |
+
ss << " " //
|
| 181 |
+
<< " |";
|
| 182 |
+
if (bucket == t.correctBucket)
|
| 183 |
+
ss << " <-- this bucket is used";
|
| 184 |
+
ss << '\n';
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
ss << "+------------+------------+------------+------------+\n";
|
| 188 |
+
|
| 189 |
+
return ss.str();
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
} // namespace Stockfish::Eval::NNUE
|
src/nnue/nnue_misc.h
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef NNUE_MISC_H_INCLUDED
|
| 20 |
+
#define NNUE_MISC_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <cstddef>
|
| 23 |
+
#include <memory>
|
| 24 |
+
#include <string>
|
| 25 |
+
|
| 26 |
+
#include "../misc.h"
|
| 27 |
+
#include "../types.h"
|
| 28 |
+
#include "nnue_architecture.h"
|
| 29 |
+
|
| 30 |
+
namespace Stockfish {
|
| 31 |
+
|
| 32 |
+
class Position;
|
| 33 |
+
|
| 34 |
+
namespace Eval::NNUE {
|
| 35 |
+
|
| 36 |
+
// EvalFile uses fixed string types because it's part of the network structure which must be trivial.
|
| 37 |
+
struct EvalFile {
|
| 38 |
+
// Default net name, will use one of the EvalFileDefaultName* macros defined
|
| 39 |
+
// in evaluate.h
|
| 40 |
+
FixedString<256> defaultName;
|
| 41 |
+
// Selected net name, either via uci option or default
|
| 42 |
+
FixedString<256> current;
|
| 43 |
+
// Net description extracted from the net file
|
| 44 |
+
FixedString<256> netDescription;
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
struct NnueEvalTrace {
|
| 48 |
+
static_assert(LayerStacks == PSQTBuckets);
|
| 49 |
+
|
| 50 |
+
Value psqt[LayerStacks];
|
| 51 |
+
Value positional[LayerStacks];
|
| 52 |
+
std::size_t correctBucket;
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
struct Networks;
|
| 56 |
+
struct AccumulatorCaches;
|
| 57 |
+
|
| 58 |
+
std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
|
| 59 |
+
|
| 60 |
+
} // namespace Stockfish::Eval::NNUE
|
| 61 |
+
} // namespace Stockfish
|
| 62 |
+
|
| 63 |
+
template<>
|
| 64 |
+
struct std::hash<Stockfish::Eval::NNUE::EvalFile> {
|
| 65 |
+
std::size_t operator()(const Stockfish::Eval::NNUE::EvalFile& evalFile) const noexcept {
|
| 66 |
+
std::size_t h = 0;
|
| 67 |
+
Stockfish::hash_combine(h, evalFile.defaultName);
|
| 68 |
+
Stockfish::hash_combine(h, evalFile.current);
|
| 69 |
+
Stockfish::hash_combine(h, evalFile.netDescription);
|
| 70 |
+
return h;
|
| 71 |
+
}
|
| 72 |
+
};
|
| 73 |
+
|
| 74 |
+
#endif // #ifndef NNUE_MISC_H_INCLUDED
|
src/nnue/simd.h
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef NNUE_SIMD_H_INCLUDED
|
| 20 |
+
#define NNUE_SIMD_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#if defined(USE_AVX2)
|
| 23 |
+
#include <immintrin.h>
|
| 24 |
+
|
| 25 |
+
#elif defined(USE_SSE41)
|
| 26 |
+
#include <smmintrin.h>
|
| 27 |
+
|
| 28 |
+
#elif defined(USE_SSSE3)
|
| 29 |
+
#include <tmmintrin.h>
|
| 30 |
+
|
| 31 |
+
#elif defined(USE_SSE2)
|
| 32 |
+
#include <emmintrin.h>
|
| 33 |
+
|
| 34 |
+
#elif defined(USE_NEON)
|
| 35 |
+
#include <arm_neon.h>
|
| 36 |
+
#endif
|
| 37 |
+
|
| 38 |
+
#include "../types.h"
|
| 39 |
+
#include "nnue_common.h"
|
| 40 |
+
|
| 41 |
+
namespace Stockfish::Eval::NNUE::SIMD {
|
| 42 |
+
|
| 43 |
+
// If vector instructions are enabled, we update and refresh the
|
| 44 |
+
// accumulator tile by tile such that each tile fits in the CPU's
|
| 45 |
+
// vector registers.
|
| 46 |
+
#define VECTOR
|
| 47 |
+
|
| 48 |
+
#ifdef USE_AVX512
|
| 49 |
+
using vec_t = __m512i;
|
| 50 |
+
using vec_i8_t = __m256i;
|
| 51 |
+
using vec128_t = __m128i;
|
| 52 |
+
using psqt_vec_t = __m256i;
|
| 53 |
+
using vec_uint_t = __m512i;
|
| 54 |
+
#define vec_load(a) _mm512_load_si512(a)
|
| 55 |
+
#define vec_store(a, b) _mm512_store_si512(a, b)
|
| 56 |
+
#define vec_convert_8_16(a) _mm512_cvtepi8_epi16(a)
|
| 57 |
+
#define vec_add_16(a, b) _mm512_add_epi16(a, b)
|
| 58 |
+
#define vec_sub_16(a, b) _mm512_sub_epi16(a, b)
|
| 59 |
+
#define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b)
|
| 60 |
+
#define vec_zero() _mm512_setzero_epi32()
|
| 61 |
+
#define vec_set_16(a) _mm512_set1_epi16(a)
|
| 62 |
+
#define vec_max_16(a, b) _mm512_max_epi16(a, b)
|
| 63 |
+
#define vec_min_16(a, b) _mm512_min_epi16(a, b)
|
| 64 |
+
#define vec_slli_16(a, b) _mm512_slli_epi16(a, b)
|
| 65 |
+
// Inverse permuted at load time
|
| 66 |
+
#define vec_packus_16(a, b) _mm512_packus_epi16(a, b)
|
| 67 |
+
#define vec_load_psqt(a) _mm256_load_si256(a)
|
| 68 |
+
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
| 69 |
+
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
| 70 |
+
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
| 71 |
+
#define vec_zero_psqt() _mm256_setzero_si256()
|
| 72 |
+
|
| 73 |
+
#ifdef USE_SSSE3
|
| 74 |
+
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
#define vec128_zero _mm_setzero_si128()
|
| 78 |
+
#define vec128_set_16(a) _mm_set1_epi16(a)
|
| 79 |
+
#define vec128_load(a) _mm_load_si128(a)
|
| 80 |
+
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
| 81 |
+
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
| 82 |
+
#define NumRegistersSIMD 16
|
| 83 |
+
#define MaxChunkSize 64
|
| 84 |
+
|
| 85 |
+
#elif USE_AVX2
|
| 86 |
+
using vec_t = __m256i;
|
| 87 |
+
using vec_i8_t = __m128i;
|
| 88 |
+
using vec128_t = __m128i;
|
| 89 |
+
using psqt_vec_t = __m256i;
|
| 90 |
+
using vec_uint_t = __m256i;
|
| 91 |
+
#define vec_load(a) _mm256_load_si256(a)
|
| 92 |
+
#define vec_store(a, b) _mm256_store_si256(a, b)
|
| 93 |
+
#define vec_convert_8_16(a) _mm256_cvtepi8_epi16(a)
|
| 94 |
+
#define vec_add_16(a, b) _mm256_add_epi16(a, b)
|
| 95 |
+
#define vec_sub_16(a, b) _mm256_sub_epi16(a, b)
|
| 96 |
+
#define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b)
|
| 97 |
+
#define vec_zero() _mm256_setzero_si256()
|
| 98 |
+
#define vec_set_16(a) _mm256_set1_epi16(a)
|
| 99 |
+
#define vec_max_16(a, b) _mm256_max_epi16(a, b)
|
| 100 |
+
#define vec_min_16(a, b) _mm256_min_epi16(a, b)
|
| 101 |
+
#define vec_slli_16(a, b) _mm256_slli_epi16(a, b)
|
| 102 |
+
// Inverse permuted at load time
|
| 103 |
+
#define vec_packus_16(a, b) _mm256_packus_epi16(a, b)
|
| 104 |
+
#define vec_load_psqt(a) _mm256_load_si256(a)
|
| 105 |
+
#define vec_store_psqt(a, b) _mm256_store_si256(a, b)
|
| 106 |
+
#define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b)
|
| 107 |
+
#define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b)
|
| 108 |
+
#define vec_zero_psqt() _mm256_setzero_si256()
|
| 109 |
+
|
| 110 |
+
#ifdef USE_SSSE3
|
| 111 |
+
#if defined(USE_VNNI) && !defined(USE_AVXVNNI)
|
| 112 |
+
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
|
| 113 |
+
#else
|
| 114 |
+
#define vec_nnz(a) \
|
| 115 |
+
_mm256_movemask_ps( \
|
| 116 |
+
_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
|
| 117 |
+
#endif
|
| 118 |
+
#endif
|
| 119 |
+
|
| 120 |
+
#define vec128_zero _mm_setzero_si128()
|
| 121 |
+
#define vec128_set_16(a) _mm_set1_epi16(a)
|
| 122 |
+
#define vec128_load(a) _mm_load_si128(a)
|
| 123 |
+
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
| 124 |
+
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
| 125 |
+
|
| 126 |
+
#define NumRegistersSIMD 12
|
| 127 |
+
#define MaxChunkSize 32
|
| 128 |
+
|
| 129 |
+
#elif USE_SSE2
|
| 130 |
+
using vec_t = __m128i;
|
| 131 |
+
using vec_i8_t = std::uint64_t; // for the correct size -- will be loaded into an xmm reg
|
| 132 |
+
using vec128_t = __m128i;
|
| 133 |
+
using psqt_vec_t = __m128i;
|
| 134 |
+
using vec_uint_t = __m128i;
|
| 135 |
+
#define vec_load(a) (*(a))
|
| 136 |
+
#define vec_store(a, b) *(a) = (b)
|
| 137 |
+
#define vec_add_16(a, b) _mm_add_epi16(a, b)
|
| 138 |
+
#define vec_sub_16(a, b) _mm_sub_epi16(a, b)
|
| 139 |
+
#define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b)
|
| 140 |
+
#define vec_zero() _mm_setzero_si128()
|
| 141 |
+
#define vec_set_16(a) _mm_set1_epi16(a)
|
| 142 |
+
#define vec_max_16(a, b) _mm_max_epi16(a, b)
|
| 143 |
+
#define vec_min_16(a, b) _mm_min_epi16(a, b)
|
| 144 |
+
#define vec_slli_16(a, b) _mm_slli_epi16(a, b)
|
| 145 |
+
#define vec_packus_16(a, b) _mm_packus_epi16(a, b)
|
| 146 |
+
#define vec_load_psqt(a) (*(a))
|
| 147 |
+
#define vec_store_psqt(a, b) *(a) = (b)
|
| 148 |
+
#define vec_add_psqt_32(a, b) _mm_add_epi32(a, b)
|
| 149 |
+
#define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b)
|
| 150 |
+
#define vec_zero_psqt() _mm_setzero_si128()
|
| 151 |
+
|
| 152 |
+
#ifdef USE_SSSE3
|
| 153 |
+
#define vec_nnz(a) \
|
| 154 |
+
_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
|
| 155 |
+
#endif
|
| 156 |
+
|
| 157 |
+
#ifdef __i386__
|
| 158 |
+
inline __m128i _mm_cvtsi64_si128(int64_t val) {
|
| 159 |
+
return _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&val));
|
| 160 |
+
}
|
| 161 |
+
#endif
|
| 162 |
+
|
| 163 |
+
#ifdef USE_SSE41
|
| 164 |
+
#define vec_convert_8_16(a) _mm_cvtepi8_epi16(_mm_cvtsi64_si128(static_cast<int64_t>(a)))
|
| 165 |
+
#else
|
| 166 |
+
// Credit: Yoshie2000
|
| 167 |
+
inline __m128i vec_convert_8_16(uint64_t x) {
|
| 168 |
+
__m128i v8 = _mm_cvtsi64_si128(static_cast<int64_t>(x));
|
| 169 |
+
__m128i sign = _mm_cmpgt_epi8(_mm_setzero_si128(), v8);
|
| 170 |
+
return _mm_unpacklo_epi8(v8, sign);
|
| 171 |
+
}
|
| 172 |
+
#endif
|
| 173 |
+
|
| 174 |
+
#define vec128_zero _mm_setzero_si128()
|
| 175 |
+
#define vec128_set_16(a) _mm_set1_epi16(a)
|
| 176 |
+
#define vec128_load(a) _mm_load_si128(a)
|
| 177 |
+
#define vec128_storeu(a, b) _mm_storeu_si128(a, b)
|
| 178 |
+
#define vec128_add(a, b) _mm_add_epi16(a, b)
|
| 179 |
+
|
| 180 |
+
#define NumRegistersSIMD (Is64Bit ? 12 : 6)
|
| 181 |
+
#define MaxChunkSize 16
|
| 182 |
+
|
| 183 |
+
#elif USE_NEON
|
| 184 |
+
using vec_i8x8_t __attribute__((may_alias)) = int8x8_t;
|
| 185 |
+
using vec_i16x8_t __attribute__((may_alias)) = int16x8_t;
|
| 186 |
+
using vec_i8x16_t __attribute__((may_alias)) = int8x16_t;
|
| 187 |
+
using vec_u16x8_t __attribute__((may_alias)) = uint16x8_t;
|
| 188 |
+
using vec_i32x4_t __attribute__((may_alias)) = int32x4_t;
|
| 189 |
+
|
| 190 |
+
using vec_t __attribute__((may_alias)) = int16x8_t;
|
| 191 |
+
using vec_i8_t __attribute__((may_alias)) = int8x16_t;
|
| 192 |
+
using psqt_vec_t __attribute__((may_alias)) = int32x4_t;
|
| 193 |
+
using vec128_t __attribute__((may_alias)) = uint16x8_t;
|
| 194 |
+
using vec_uint_t __attribute__((may_alias)) = uint32x4_t;
|
| 195 |
+
#define vec_load(a) (*(a))
|
| 196 |
+
#define vec_store(a, b) *(a) = (b)
|
| 197 |
+
#define vec_add_16(a, b) vaddq_s16(a, b)
|
| 198 |
+
#define vec_sub_16(a, b) vsubq_s16(a, b)
|
| 199 |
+
#define vec_mulhi_16(a, b) vqdmulhq_s16(a, b)
|
| 200 |
+
#define vec_zero() vec_t{0}
|
| 201 |
+
#define vec_set_16(a) vdupq_n_s16(a)
|
| 202 |
+
#define vec_max_16(a, b) vmaxq_s16(a, b)
|
| 203 |
+
#define vec_min_16(a, b) vminq_s16(a, b)
|
| 204 |
+
#define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b))
|
| 205 |
+
#define vec_packus_16(a, b) reinterpret_cast<vec_t>(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b)))
|
| 206 |
+
#define vec_load_psqt(a) (*(a))
|
| 207 |
+
#define vec_store_psqt(a, b) *(a) = (b)
|
| 208 |
+
#define vec_add_psqt_32(a, b) vaddq_s32(a, b)
|
| 209 |
+
#define vec_sub_psqt_32(a, b) vsubq_s32(a, b)
|
| 210 |
+
#define vec_zero_psqt() psqt_vec_t{0}
|
| 211 |
+
|
| 212 |
+
static constexpr std::uint32_t Mask[4] = {1, 2, 4, 8};
|
| 213 |
+
#define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask)))
|
| 214 |
+
#define vec128_zero vdupq_n_u16(0)
|
| 215 |
+
#define vec128_set_16(a) vdupq_n_u16(a)
|
| 216 |
+
#define vec128_load(a) vld1q_u16(reinterpret_cast<const std::uint16_t*>(a))
|
| 217 |
+
#define vec128_storeu(a, b) vst1q_u16(reinterpret_cast<std::uint16_t*>(a), b)
|
| 218 |
+
#define vec128_add(a, b) vaddq_u16(a, b)
|
| 219 |
+
|
| 220 |
+
#define NumRegistersSIMD 16
|
| 221 |
+
#define MaxChunkSize 16
|
| 222 |
+
|
| 223 |
+
#ifndef __aarch64__
|
| 224 |
+
// Single instruction doesn't exist on 32-bit ARM
|
| 225 |
+
inline int16x8_t vmovl_high_s8(int8x16_t val) { return vmovl_s8(vget_high_s8(val)); }
|
| 226 |
+
#endif
|
| 227 |
+
|
| 228 |
+
#else
|
| 229 |
+
#undef VECTOR
|
| 230 |
+
|
| 231 |
+
#endif
|
| 232 |
+
|
| 233 |
+
struct Vec16Wrapper {
|
| 234 |
+
#ifdef VECTOR
|
| 235 |
+
using type = vec_t;
|
| 236 |
+
static type add(const type& lhs, const type& rhs) { return vec_add_16(lhs, rhs); }
|
| 237 |
+
static type sub(const type& lhs, const type& rhs) { return vec_sub_16(lhs, rhs); }
|
| 238 |
+
#else
|
| 239 |
+
using type = BiasType;
|
| 240 |
+
static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
|
| 241 |
+
static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
|
| 242 |
+
#endif
|
| 243 |
+
};
|
| 244 |
+
|
| 245 |
+
struct Vec32Wrapper {
|
| 246 |
+
#ifdef VECTOR
|
| 247 |
+
using type = psqt_vec_t;
|
| 248 |
+
static type add(const type& lhs, const type& rhs) { return vec_add_psqt_32(lhs, rhs); }
|
| 249 |
+
static type sub(const type& lhs, const type& rhs) { return vec_sub_psqt_32(lhs, rhs); }
|
| 250 |
+
#else
|
| 251 |
+
using type = PSQTWeightType;
|
| 252 |
+
static type add(const type& lhs, const type& rhs) { return lhs + rhs; }
|
| 253 |
+
static type sub(const type& lhs, const type& rhs) { return lhs - rhs; }
|
| 254 |
+
#endif
|
| 255 |
+
};
|
| 256 |
+
|
| 257 |
+
enum UpdateOperation {
|
| 258 |
+
Add,
|
| 259 |
+
Sub
|
| 260 |
+
};
|
| 261 |
+
|
| 262 |
+
template<typename VecWrapper,
|
| 263 |
+
UpdateOperation... ops,
|
| 264 |
+
std::enable_if_t<sizeof...(ops) == 0, bool> = true>
|
| 265 |
+
typename VecWrapper::type fused(const typename VecWrapper::type& in) {
|
| 266 |
+
return in;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
template<typename VecWrapper,
|
| 270 |
+
UpdateOperation update_op,
|
| 271 |
+
UpdateOperation... ops,
|
| 272 |
+
typename T,
|
| 273 |
+
typename... Ts,
|
| 274 |
+
std::enable_if_t<is_all_same_v<typename VecWrapper::type, T, Ts...>, bool> = true,
|
| 275 |
+
std::enable_if_t<sizeof...(ops) == sizeof...(Ts), bool> = true>
|
| 276 |
+
typename VecWrapper::type
|
| 277 |
+
fused(const typename VecWrapper::type& in, const T& operand, const Ts&... operands) {
|
| 278 |
+
switch (update_op)
|
| 279 |
+
{
|
| 280 |
+
case Add :
|
| 281 |
+
return fused<VecWrapper, ops...>(VecWrapper::add(in, operand), operands...);
|
| 282 |
+
case Sub :
|
| 283 |
+
return fused<VecWrapper, ops...>(VecWrapper::sub(in, operand), operands...);
|
| 284 |
+
default :
|
| 285 |
+
static_assert(update_op == Add || update_op == Sub,
|
| 286 |
+
"Only Add and Sub are currently supported.");
|
| 287 |
+
return typename VecWrapper::type();
|
| 288 |
+
}
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
#if defined(USE_AVX512)
|
| 292 |
+
|
| 293 |
+
[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
|
| 294 |
+
return _mm512_reduce_add_epi32(sum) + bias;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
|
| 298 |
+
|
| 299 |
+
#if defined(USE_VNNI)
|
| 300 |
+
acc = _mm512_dpbusd_epi32(acc, a, b);
|
| 301 |
+
#else
|
| 302 |
+
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
| 303 |
+
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
|
| 304 |
+
acc = _mm512_add_epi32(acc, product0);
|
| 305 |
+
#endif
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
#endif
|
| 309 |
+
|
| 310 |
+
#if defined(USE_AVX2)
|
| 311 |
+
|
| 312 |
+
[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
|
| 313 |
+
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
| 314 |
+
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
| 315 |
+
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
| 316 |
+
return _mm_cvtsi128_si32(sum128) + bias;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) {
|
| 320 |
+
|
| 321 |
+
#if defined(USE_VNNI)
|
| 322 |
+
acc = _mm256_dpbusd_epi32(acc, a, b);
|
| 323 |
+
#else
|
| 324 |
+
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
| 325 |
+
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
|
| 326 |
+
acc = _mm256_add_epi32(acc, product0);
|
| 327 |
+
#endif
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
#endif
|
| 331 |
+
|
| 332 |
+
#if defined(USE_SSSE3)
|
| 333 |
+
|
| 334 |
+
[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
|
| 335 |
+
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
| 336 |
+
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
| 337 |
+
return _mm_cvtsi128_si32(sum) + bias;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) {
|
| 341 |
+
|
| 342 |
+
__m128i product0 = _mm_maddubs_epi16(a, b);
|
| 343 |
+
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
|
| 344 |
+
acc = _mm_add_epi32(acc, product0);
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
#endif
|
| 348 |
+
|
| 349 |
+
#if defined(USE_NEON_DOTPROD)
|
| 350 |
+
|
| 351 |
+
[[maybe_unused]] static void
|
| 352 |
+
dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
| 353 |
+
|
| 354 |
+
acc = vdotq_s32(acc, a, b);
|
| 355 |
+
}
|
| 356 |
+
#endif
|
| 357 |
+
|
| 358 |
+
#if defined(USE_NEON)
|
| 359 |
+
|
| 360 |
+
[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
|
| 361 |
+
#if USE_NEON >= 8
|
| 362 |
+
return vaddvq_s32(s);
|
| 363 |
+
#else
|
| 364 |
+
return s[0] + s[1] + s[2] + s[3];
|
| 365 |
+
#endif
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
|
| 369 |
+
return neon_m128_reduce_add_epi32(sum) + bias;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
#endif
|
| 373 |
+
|
| 374 |
+
#if USE_NEON >= 8
|
| 375 |
+
[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
|
| 376 |
+
|
| 377 |
+
int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b));
|
| 378 |
+
int16x8_t product1 = vmull_high_s8(a, b);
|
| 379 |
+
int16x8_t sum = vpaddq_s16(product0, product1);
|
| 380 |
+
acc = vpadalq_s16(acc, sum);
|
| 381 |
+
}
|
| 382 |
+
#endif
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
// Compute optimal SIMD register count for feature transformer accumulation.
|
| 386 |
+
template<IndexType TransformedFeatureWidth, IndexType HalfDimensions, IndexType PSQTBuckets>
|
| 387 |
+
class SIMDTiling {
|
| 388 |
+
#ifdef VECTOR
|
| 389 |
+
// We use __m* types as template arguments, which causes GCC to emit warnings
|
| 390 |
+
// about losing some attribute information. This is irrelevant to us as we
|
| 391 |
+
// only take their size, so the following pragma are harmless.
|
| 392 |
+
#if defined(__GNUC__)
|
| 393 |
+
#pragma GCC diagnostic push
|
| 394 |
+
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
| 395 |
+
#endif
|
| 396 |
+
|
| 397 |
+
template<typename SIMDRegisterType, typename LaneType, int NumLanes, int MaxRegisters>
|
| 398 |
+
static constexpr int BestRegisterCount() {
|
| 399 |
+
constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType);
|
| 400 |
+
constexpr std::size_t LaneSize = sizeof(LaneType);
|
| 401 |
+
|
| 402 |
+
static_assert(RegisterSize >= LaneSize);
|
| 403 |
+
static_assert(MaxRegisters <= NumRegistersSIMD);
|
| 404 |
+
static_assert(MaxRegisters > 0);
|
| 405 |
+
static_assert(NumRegistersSIMD > 0);
|
| 406 |
+
static_assert(RegisterSize % LaneSize == 0);
|
| 407 |
+
static_assert((NumLanes * LaneSize) % RegisterSize == 0);
|
| 408 |
+
|
| 409 |
+
const int ideal = (NumLanes * LaneSize) / RegisterSize;
|
| 410 |
+
if (ideal <= MaxRegisters)
|
| 411 |
+
return ideal;
|
| 412 |
+
|
| 413 |
+
// Look for the largest divisor of the ideal register count that is smaller than MaxRegisters
|
| 414 |
+
for (int divisor = MaxRegisters; divisor > 1; --divisor)
|
| 415 |
+
if (ideal % divisor == 0)
|
| 416 |
+
return divisor;
|
| 417 |
+
|
| 418 |
+
return 1;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
#if defined(__GNUC__)
|
| 422 |
+
#pragma GCC diagnostic pop
|
| 423 |
+
#endif
|
| 424 |
+
|
| 425 |
+
public:
|
| 426 |
+
static constexpr int NumRegs =
|
| 427 |
+
BestRegisterCount<vec_t, WeightType, TransformedFeatureWidth, NumRegistersSIMD>();
|
| 428 |
+
static constexpr int NumPsqtRegs =
|
| 429 |
+
BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
|
| 430 |
+
|
| 431 |
+
static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
|
| 432 |
+
static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
|
| 433 |
+
|
| 434 |
+
static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
|
| 435 |
+
static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
|
| 436 |
+
#endif
|
| 437 |
+
};
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
#endif
|
src/numa.h
ADDED
|
@@ -0,0 +1,1718 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef NUMA_H_INCLUDED
|
| 20 |
+
#define NUMA_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <atomic>
|
| 24 |
+
#include <cstdint>
|
| 25 |
+
#include <cstdlib>
|
| 26 |
+
#include <functional>
|
| 27 |
+
#include <iostream>
|
| 28 |
+
#include <limits>
|
| 29 |
+
#include <map>
|
| 30 |
+
#include <memory>
|
| 31 |
+
#include <mutex>
|
| 32 |
+
#include <set>
|
| 33 |
+
#include <sstream>
|
| 34 |
+
#include <string>
|
| 35 |
+
#include <thread>
|
| 36 |
+
#include <utility>
|
| 37 |
+
#include <vector>
|
| 38 |
+
#include <cstring>
|
| 39 |
+
|
| 40 |
+
#include "shm.h"
|
| 41 |
+
|
| 42 |
+
// We support linux very well, but we explicitly do NOT support Android,
|
| 43 |
+
// because there is no affected systems, not worth maintaining.
|
| 44 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 45 |
+
#if !defined(_GNU_SOURCE)
|
| 46 |
+
#define _GNU_SOURCE
|
| 47 |
+
#endif
|
| 48 |
+
#include <sched.h>
|
| 49 |
+
#elif defined(_WIN64)
|
| 50 |
+
|
| 51 |
+
#if _WIN32_WINNT < 0x0601
|
| 52 |
+
#undef _WIN32_WINNT
|
| 53 |
+
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
| 54 |
+
#endif
|
| 55 |
+
|
| 56 |
+
// On Windows each processor group can have up to 64 processors.
|
| 57 |
+
// https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups
|
| 58 |
+
static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64;
|
| 59 |
+
|
| 60 |
+
#if !defined(NOMINMAX)
|
| 61 |
+
#define NOMINMAX
|
| 62 |
+
#endif
|
| 63 |
+
#include <windows.h>
|
| 64 |
+
#if defined small
|
| 65 |
+
#undef small
|
| 66 |
+
#endif
|
| 67 |
+
|
| 68 |
+
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks
|
| 69 |
+
using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT);
|
| 70 |
+
|
| 71 |
+
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadselectedcpusetmasks
|
| 72 |
+
using GetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT, PUSHORT);
|
| 73 |
+
|
| 74 |
+
#endif
|
| 75 |
+
|
| 76 |
+
#include "misc.h"
|
| 77 |
+
|
| 78 |
+
namespace Stockfish {
|
| 79 |
+
|
| 80 |
+
using CpuIndex = size_t;
|
| 81 |
+
using NumaIndex = size_t;
|
| 82 |
+
|
| 83 |
+
inline CpuIndex get_hardware_concurrency() {
|
| 84 |
+
CpuIndex concurrency = std::thread::hardware_concurrency();
|
| 85 |
+
|
| 86 |
+
// Get all processors across all processor groups on windows, since
|
| 87 |
+
// hardware_concurrency() only returns the number of processors in
|
| 88 |
+
// the first group, because only these are available to std::thread.
|
| 89 |
+
#ifdef _WIN64
|
| 90 |
+
concurrency = std::max<CpuIndex>(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS));
|
| 91 |
+
#endif
|
| 92 |
+
|
| 93 |
+
return concurrency;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
inline const CpuIndex SYSTEM_THREADS_NB = std::max<CpuIndex>(1, get_hardware_concurrency());
|
| 97 |
+
|
| 98 |
+
#if defined(_WIN64)
|
| 99 |
+
|
| 100 |
+
struct WindowsAffinity {
|
| 101 |
+
std::optional<std::set<CpuIndex>> oldApi;
|
| 102 |
+
std::optional<std::set<CpuIndex>> newApi;
|
| 103 |
+
|
| 104 |
+
// We also provide diagnostic for when the affinity is set to nullopt
|
| 105 |
+
// whether it was due to being indeterminate. If affinity is indeterminate
|
| 106 |
+
// it is best to assume it is not set at all, so consistent with the meaning
|
| 107 |
+
// of the nullopt affinity.
|
| 108 |
+
bool isNewDeterminate = true;
|
| 109 |
+
bool isOldDeterminate = true;
|
| 110 |
+
|
| 111 |
+
std::optional<std::set<CpuIndex>> get_combined() const {
|
| 112 |
+
if (!oldApi.has_value())
|
| 113 |
+
return newApi;
|
| 114 |
+
if (!newApi.has_value())
|
| 115 |
+
return oldApi;
|
| 116 |
+
|
| 117 |
+
std::set<CpuIndex> intersect;
|
| 118 |
+
std::set_intersection(oldApi->begin(), oldApi->end(), newApi->begin(), newApi->end(),
|
| 119 |
+
std::inserter(intersect, intersect.begin()));
|
| 120 |
+
return intersect;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
// Since Windows 11 and Windows Server 2022 thread affinities can span
|
| 124 |
+
// processor groups and can be set as such by a new WinAPI function. However,
|
| 125 |
+
// we may need to force using the old API if we detect that the process has
|
| 126 |
+
// affinity set by the old API already and we want to override that. Due to the
|
| 127 |
+
// limitations of the old API we cannot detect its use reliably. There will be
|
| 128 |
+
// cases where we detect not use but it has actually been used and vice versa.
|
| 129 |
+
|
| 130 |
+
bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; }
|
| 131 |
+
};
|
| 132 |
+
|
| 133 |
+
inline std::pair<BOOL, std::vector<USHORT>> get_process_group_affinity() {
|
| 134 |
+
|
| 135 |
+
// GetProcessGroupAffinity requires the GroupArray argument to be
|
| 136 |
+
// aligned to 4 bytes instead of just 2.
|
| 137 |
+
static constexpr size_t GroupArrayMinimumAlignment = 4;
|
| 138 |
+
static_assert(GroupArrayMinimumAlignment >= alignof(USHORT));
|
| 139 |
+
|
| 140 |
+
// The function should succeed the second time, but it may fail if the group
|
| 141 |
+
// affinity has changed between GetProcessGroupAffinity calls. In such case
|
| 142 |
+
// we consider this a hard error, as we Cannot work with unstable affinities
|
| 143 |
+
// anyway.
|
| 144 |
+
static constexpr int MAX_TRIES = 2;
|
| 145 |
+
USHORT GroupCount = 1;
|
| 146 |
+
for (int i = 0; i < MAX_TRIES; ++i)
|
| 147 |
+
{
|
| 148 |
+
auto GroupArray = std::make_unique<USHORT[]>(
|
| 149 |
+
GroupCount + (GroupArrayMinimumAlignment / alignof(USHORT) - 1));
|
| 150 |
+
|
| 151 |
+
USHORT* GroupArrayAligned = align_ptr_up<GroupArrayMinimumAlignment>(GroupArray.get());
|
| 152 |
+
|
| 153 |
+
const BOOL status =
|
| 154 |
+
GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArrayAligned);
|
| 155 |
+
|
| 156 |
+
if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
| 157 |
+
{
|
| 158 |
+
break;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
if (status != 0)
|
| 162 |
+
{
|
| 163 |
+
return std::make_pair(status,
|
| 164 |
+
std::vector(GroupArrayAligned, GroupArrayAligned + GroupCount));
|
| 165 |
+
}
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
return std::make_pair(0, std::vector<USHORT>());
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
// On Windows there are two ways to set affinity, and therefore 2 ways to get it.
|
| 172 |
+
// These are not consistent, so we have to check both. In some cases it is actually
|
| 173 |
+
// not possible to determine affinity. For example when two different threads have
|
| 174 |
+
// affinity on different processor groups, set using SetThreadAffinityMask, we cannot
|
| 175 |
+
// retrieve the actual affinities.
|
| 176 |
+
// From documentation on GetProcessAffinityMask:
|
| 177 |
+
// > If the calling process contains threads in multiple groups,
|
| 178 |
+
// > the function returns zero for both affinity masks.
|
| 179 |
+
// In such cases we just give up and assume we have affinity for all processors.
|
| 180 |
+
// nullopt means no affinity is set, that is, all processors are allowed
|
| 181 |
+
inline WindowsAffinity get_process_affinity() {
|
| 182 |
+
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
| 183 |
+
auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t(
|
| 184 |
+
(void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks"));
|
| 185 |
+
|
| 186 |
+
BOOL status = 0;
|
| 187 |
+
|
| 188 |
+
WindowsAffinity affinity;
|
| 189 |
+
|
| 190 |
+
if (GetThreadSelectedCpuSetMasks_f != nullptr)
|
| 191 |
+
{
|
| 192 |
+
USHORT RequiredMaskCount;
|
| 193 |
+
status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount);
|
| 194 |
+
|
| 195 |
+
// We expect ERROR_INSUFFICIENT_BUFFER from GetThreadSelectedCpuSetMasks,
|
| 196 |
+
// but other failure is an actual error.
|
| 197 |
+
if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
| 198 |
+
{
|
| 199 |
+
affinity.isNewDeterminate = false;
|
| 200 |
+
}
|
| 201 |
+
else if (RequiredMaskCount > 0)
|
| 202 |
+
{
|
| 203 |
+
// If RequiredMaskCount then these affinities were never set, but it's
|
| 204 |
+
// not consistent so GetProcessAffinityMask may still return some affinity.
|
| 205 |
+
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(RequiredMaskCount);
|
| 206 |
+
|
| 207 |
+
status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(),
|
| 208 |
+
RequiredMaskCount, &RequiredMaskCount);
|
| 209 |
+
|
| 210 |
+
if (status == 0)
|
| 211 |
+
{
|
| 212 |
+
affinity.isNewDeterminate = false;
|
| 213 |
+
}
|
| 214 |
+
else
|
| 215 |
+
{
|
| 216 |
+
std::set<CpuIndex> cpus;
|
| 217 |
+
|
| 218 |
+
for (USHORT i = 0; i < RequiredMaskCount; ++i)
|
| 219 |
+
{
|
| 220 |
+
const size_t procGroupIndex = groupAffinities[i].Group;
|
| 221 |
+
|
| 222 |
+
for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j)
|
| 223 |
+
{
|
| 224 |
+
if (groupAffinities[i].Mask & (KAFFINITY(1) << j))
|
| 225 |
+
cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j);
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
affinity.newApi = std::move(cpus);
|
| 230 |
+
}
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// NOTE: There is no way to determine full affinity using the old API if
|
| 235 |
+
// individual threads set affinity on different processor groups.
|
| 236 |
+
|
| 237 |
+
DWORD_PTR proc, sys;
|
| 238 |
+
status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys);
|
| 239 |
+
|
| 240 |
+
// If proc == 0 then we cannot determine affinity because it spans processor groups.
|
| 241 |
+
// On Windows 11 and Server 2022 it will instead
|
| 242 |
+
// > If, however, hHandle specifies a handle to the current process, the function
|
| 243 |
+
// > always uses the calling thread's primary group (which by default is the same
|
| 244 |
+
// > as the process' primary group) in order to set the
|
| 245 |
+
// > lpProcessAffinityMask and lpSystemAffinityMask.
|
| 246 |
+
// So it will never be indeterminate here. We can only make assumptions later.
|
| 247 |
+
if (status == 0 || proc == 0)
|
| 248 |
+
{
|
| 249 |
+
affinity.isOldDeterminate = false;
|
| 250 |
+
return affinity;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
// If SetProcessAffinityMask was never called the affinity must span
|
| 254 |
+
// all processor groups, but if it was called it must only span one.
|
| 255 |
+
|
| 256 |
+
std::vector<USHORT> groupAffinity; // We need to capture this later and capturing
|
| 257 |
+
// from structured bindings requires c++20.
|
| 258 |
+
|
| 259 |
+
std::tie(status, groupAffinity) = get_process_group_affinity();
|
| 260 |
+
if (status == 0)
|
| 261 |
+
{
|
| 262 |
+
affinity.isOldDeterminate = false;
|
| 263 |
+
return affinity;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
if (groupAffinity.size() == 1)
|
| 267 |
+
{
|
| 268 |
+
// We detect the case when affinity is set to all processors and correctly
|
| 269 |
+
// leave affinity.oldApi as nullopt.
|
| 270 |
+
if (GetActiveProcessorGroupCount() != 1 || proc != sys)
|
| 271 |
+
{
|
| 272 |
+
std::set<CpuIndex> cpus;
|
| 273 |
+
|
| 274 |
+
const size_t procGroupIndex = groupAffinity[0];
|
| 275 |
+
|
| 276 |
+
const uint64_t mask = static_cast<uint64_t>(proc);
|
| 277 |
+
for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j)
|
| 278 |
+
{
|
| 279 |
+
if (mask & (KAFFINITY(1) << j))
|
| 280 |
+
cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j);
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
affinity.oldApi = std::move(cpus);
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
else
|
| 287 |
+
{
|
| 288 |
+
// If we got here it means that either SetProcessAffinityMask was never set
|
| 289 |
+
// or we're on Windows 11/Server 2022.
|
| 290 |
+
|
| 291 |
+
// Since Windows 11 and Windows Server 2022 the behaviour of
|
| 292 |
+
// GetProcessAffinityMask changed:
|
| 293 |
+
// > If, however, hHandle specifies a handle to the current process,
|
| 294 |
+
// > the function always uses the calling thread's primary group
|
| 295 |
+
// > (which by default is the same as the process' primary group)
|
| 296 |
+
// > in order to set the lpProcessAffinityMask and lpSystemAffinityMask.
|
| 297 |
+
// In which case we can actually retrieve the full affinity.
|
| 298 |
+
|
| 299 |
+
if (GetThreadSelectedCpuSetMasks_f != nullptr)
|
| 300 |
+
{
|
| 301 |
+
std::thread th([&]() {
|
| 302 |
+
std::set<CpuIndex> cpus;
|
| 303 |
+
bool isAffinityFull = true;
|
| 304 |
+
|
| 305 |
+
for (auto procGroupIndex : groupAffinity)
|
| 306 |
+
{
|
| 307 |
+
const int numActiveProcessors =
|
| 308 |
+
GetActiveProcessorCount(static_cast<WORD>(procGroupIndex));
|
| 309 |
+
|
| 310 |
+
// We have to schedule to two different processors
|
| 311 |
+
// and & the affinities we get. Otherwise our processor
|
| 312 |
+
// choice could influence the resulting affinity.
|
| 313 |
+
// We assume the processor IDs within the group are
|
| 314 |
+
// filled sequentially from 0.
|
| 315 |
+
uint64_t procCombined = std::numeric_limits<uint64_t>::max();
|
| 316 |
+
uint64_t sysCombined = std::numeric_limits<uint64_t>::max();
|
| 317 |
+
|
| 318 |
+
for (int i = 0; i < std::min(numActiveProcessors, 2); ++i)
|
| 319 |
+
{
|
| 320 |
+
GROUP_AFFINITY GroupAffinity;
|
| 321 |
+
std::memset(&GroupAffinity, 0, sizeof(GROUP_AFFINITY));
|
| 322 |
+
GroupAffinity.Group = static_cast<WORD>(procGroupIndex);
|
| 323 |
+
|
| 324 |
+
GroupAffinity.Mask = static_cast<KAFFINITY>(1) << i;
|
| 325 |
+
|
| 326 |
+
status =
|
| 327 |
+
SetThreadGroupAffinity(GetCurrentThread(), &GroupAffinity, nullptr);
|
| 328 |
+
if (status == 0)
|
| 329 |
+
{
|
| 330 |
+
affinity.isOldDeterminate = false;
|
| 331 |
+
return;
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
SwitchToThread();
|
| 335 |
+
|
| 336 |
+
DWORD_PTR proc2, sys2;
|
| 337 |
+
status = GetProcessAffinityMask(GetCurrentProcess(), &proc2, &sys2);
|
| 338 |
+
if (status == 0)
|
| 339 |
+
{
|
| 340 |
+
affinity.isOldDeterminate = false;
|
| 341 |
+
return;
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
procCombined &= static_cast<uint64_t>(proc2);
|
| 345 |
+
sysCombined &= static_cast<uint64_t>(sys2);
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
if (procCombined != sysCombined)
|
| 349 |
+
isAffinityFull = false;
|
| 350 |
+
|
| 351 |
+
for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j)
|
| 352 |
+
{
|
| 353 |
+
if (procCombined & (KAFFINITY(1) << j))
|
| 354 |
+
cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j);
|
| 355 |
+
}
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
// We have to detect the case where the affinity was not set,
|
| 359 |
+
// or is set to all processors so that we correctly produce as
|
| 360 |
+
// std::nullopt result.
|
| 361 |
+
if (!isAffinityFull)
|
| 362 |
+
{
|
| 363 |
+
affinity.oldApi = std::move(cpus);
|
| 364 |
+
}
|
| 365 |
+
});
|
| 366 |
+
|
| 367 |
+
th.join();
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
return affinity;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
// Type machinery used to emulate Cache->GroupCount
|
| 375 |
+
|
| 376 |
+
template<typename T, typename = void>
|
| 377 |
+
struct HasGroupCount: std::false_type {};
|
| 378 |
+
|
| 379 |
+
template<typename T>
|
| 380 |
+
struct HasGroupCount<T, std::void_t<decltype(std::declval<T>().Cache.GroupCount)>>: std::true_type {
|
| 381 |
+
};
|
| 382 |
+
|
| 383 |
+
template<typename T, typename Pred, std::enable_if_t<HasGroupCount<T>::value, bool> = true>
|
| 384 |
+
std::set<CpuIndex> readCacheMembers(const T* info, Pred&& is_cpu_allowed) {
|
| 385 |
+
std::set<CpuIndex> cpus;
|
| 386 |
+
// On Windows 10 this will read a 0 because GroupCount doesn't exist
|
| 387 |
+
int groupCount = std::max(info->Cache.GroupCount, WORD(1));
|
| 388 |
+
for (WORD procGroup = 0; procGroup < groupCount; ++procGroup)
|
| 389 |
+
{
|
| 390 |
+
for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number)
|
| 391 |
+
{
|
| 392 |
+
WORD groupNumber = info->Cache.GroupMasks[procGroup].Group;
|
| 393 |
+
const CpuIndex c = static_cast<CpuIndex>(groupNumber) * WIN_PROCESSOR_GROUP_SIZE
|
| 394 |
+
+ static_cast<CpuIndex>(number);
|
| 395 |
+
if (!(info->Cache.GroupMasks[procGroup].Mask & (1ULL << number)) || !is_cpu_allowed(c))
|
| 396 |
+
continue;
|
| 397 |
+
cpus.insert(c);
|
| 398 |
+
}
|
| 399 |
+
}
|
| 400 |
+
return cpus;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
template<typename T, typename Pred, std::enable_if_t<!HasGroupCount<T>::value, bool> = true>
|
| 404 |
+
std::set<CpuIndex> readCacheMembers(const T* info, Pred&& is_cpu_allowed) {
|
| 405 |
+
std::set<CpuIndex> cpus;
|
| 406 |
+
for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number)
|
| 407 |
+
{
|
| 408 |
+
WORD groupNumber = info->Cache.GroupMask.Group;
|
| 409 |
+
const CpuIndex c = static_cast<CpuIndex>(groupNumber) * WIN_PROCESSOR_GROUP_SIZE
|
| 410 |
+
+ static_cast<CpuIndex>(number);
|
| 411 |
+
if (!(info->Cache.GroupMask.Mask & (1ULL << number)) || !is_cpu_allowed(c))
|
| 412 |
+
continue;
|
| 413 |
+
cpus.insert(c);
|
| 414 |
+
}
|
| 415 |
+
return cpus;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
#endif
|
| 419 |
+
|
| 420 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 421 |
+
|
| 422 |
+
inline std::set<CpuIndex> get_process_affinity() {
|
| 423 |
+
|
| 424 |
+
std::set<CpuIndex> cpus;
|
| 425 |
+
|
| 426 |
+
// For unsupported systems, or in case of a soft error, we may assume
|
| 427 |
+
// all processors are available for use.
|
| 428 |
+
[[maybe_unused]] auto set_to_all_cpus = [&]() {
|
| 429 |
+
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
| 430 |
+
cpus.insert(c);
|
| 431 |
+
};
|
| 432 |
+
|
| 433 |
+
// cpu_set_t by default holds 1024 entries. This may not be enough soon,
|
| 434 |
+
// but there is no easy way to determine how many threads there actually
|
| 435 |
+
// is. In this case we just choose a reasonable upper bound.
|
| 436 |
+
static constexpr CpuIndex MaxNumCpus = 1024 * 64;
|
| 437 |
+
|
| 438 |
+
cpu_set_t* mask = CPU_ALLOC(MaxNumCpus);
|
| 439 |
+
if (mask == nullptr)
|
| 440 |
+
std::exit(EXIT_FAILURE);
|
| 441 |
+
|
| 442 |
+
const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus);
|
| 443 |
+
|
| 444 |
+
CPU_ZERO_S(masksize, mask);
|
| 445 |
+
|
| 446 |
+
const int status = sched_getaffinity(0, masksize, mask);
|
| 447 |
+
|
| 448 |
+
if (status != 0)
|
| 449 |
+
{
|
| 450 |
+
CPU_FREE(mask);
|
| 451 |
+
std::exit(EXIT_FAILURE);
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
for (CpuIndex c = 0; c < MaxNumCpus; ++c)
|
| 455 |
+
if (CPU_ISSET_S(c, masksize, mask))
|
| 456 |
+
cpus.insert(c);
|
| 457 |
+
|
| 458 |
+
CPU_FREE(mask);
|
| 459 |
+
|
| 460 |
+
return cpus;
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
#endif
|
| 464 |
+
|
| 465 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 466 |
+
|
| 467 |
+
inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity();
|
| 468 |
+
|
| 469 |
+
#elif defined(_WIN64)
|
| 470 |
+
|
| 471 |
+
inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity();
|
| 472 |
+
inline static const auto STARTUP_USE_OLD_AFFINITY_API =
|
| 473 |
+
STARTUP_PROCESSOR_AFFINITY.likely_used_old_api();
|
| 474 |
+
|
| 475 |
+
#endif
|
| 476 |
+
|
| 477 |
+
// We want to abstract the purpose of storing the numa node index somewhat.
|
| 478 |
+
// Whoever is using this does not need to know the specifics of the replication
|
| 479 |
+
// machinery to be able to access NUMA replicated memory.
|
| 480 |
+
class NumaReplicatedAccessToken {
|
| 481 |
+
public:
|
| 482 |
+
NumaReplicatedAccessToken() :
|
| 483 |
+
n(0) {}
|
| 484 |
+
|
| 485 |
+
explicit NumaReplicatedAccessToken(NumaIndex idx) :
|
| 486 |
+
n(idx) {}
|
| 487 |
+
|
| 488 |
+
NumaIndex get_numa_index() const { return n; }
|
| 489 |
+
|
| 490 |
+
private:
|
| 491 |
+
NumaIndex n;
|
| 492 |
+
};
|
| 493 |
+
|
| 494 |
+
struct L3Domain {
|
| 495 |
+
NumaIndex systemNumaIndex{};
|
| 496 |
+
std::set<CpuIndex> cpus{};
|
| 497 |
+
};
|
| 498 |
+
|
| 499 |
+
// Use system NUMA nodes
|
| 500 |
+
struct SystemNumaPolicy {};
|
| 501 |
+
// Use system-reported L3 domains
|
| 502 |
+
struct L3DomainsPolicy {};
|
| 503 |
+
// Group system-reported L3 domains until they reach bundleSize
|
| 504 |
+
struct BundledL3Policy {
|
| 505 |
+
size_t bundleSize;
|
| 506 |
+
};
|
| 507 |
+
|
| 508 |
+
using NumaAutoPolicy = std::variant<SystemNumaPolicy, L3DomainsPolicy, BundledL3Policy>;
|
| 509 |
+
|
| 510 |
+
// Designed as immutable, because there is no good reason to alter an already
|
| 511 |
+
// existing config in a way that doesn't require recreating it completely, and
|
| 512 |
+
// it would be complex and expensive to maintain class invariants.
|
| 513 |
+
// The CPU (processor) numbers always correspond to the actual numbering used
|
| 514 |
+
// by the system. The NUMA node numbers MAY NOT correspond to the system's
|
| 515 |
+
// numbering of the NUMA nodes. In particular, by default, if the processor has
|
| 516 |
+
// non-uniform cache access within a NUMA node (i.e., a non-unified L3 cache structure),
|
| 517 |
+
// then L3 domains within a system NUMA node will be used to subdivide it
|
| 518 |
+
// into multiple logical NUMA nodes in the config. Additionally, empty nodes may
|
| 519 |
+
// be removed, or the user may create custom nodes.
|
| 520 |
+
//
|
| 521 |
+
// As a special case, when performing system-wide replication of read-only data
|
| 522 |
+
// (i.e., LazyNumaReplicatedSystemWide), the system NUMA node is used, rather than
|
| 523 |
+
// custom or L3-aware nodes. See that class's get_discriminator() function.
|
| 524 |
+
//
|
| 525 |
+
// It is guaranteed that NUMA nodes are NOT empty: every node exposed by NumaConfig
|
| 526 |
+
// has at least one processor assigned.
|
| 527 |
+
//
|
| 528 |
+
// We use startup affinities so as not to modify its own behaviour in time.
|
| 529 |
+
//
|
| 530 |
+
// Since Stockfish doesn't support exceptions all places where an exception
|
| 531 |
+
// should be thrown are replaced by std::exit.
|
| 532 |
+
class NumaConfig {
|
| 533 |
+
public:
|
| 534 |
+
NumaConfig() :
|
| 535 |
+
highestCpuIndex(0),
|
| 536 |
+
customAffinity(false) {
|
| 537 |
+
const auto numCpus = SYSTEM_THREADS_NB;
|
| 538 |
+
add_cpu_range_to_node(NumaIndex{0}, CpuIndex{0}, numCpus - 1);
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
// This function gets a NumaConfig based on the system's provided information.
|
| 542 |
+
// The available policies are documented above.
|
| 543 |
+
static NumaConfig from_system([[maybe_unused]] const NumaAutoPolicy& policy,
|
| 544 |
+
bool respectProcessAffinity = true) {
|
| 545 |
+
NumaConfig cfg = empty();
|
| 546 |
+
|
| 547 |
+
#if !((defined(__linux__) && !defined(__ANDROID__)) || defined(_WIN64))
|
| 548 |
+
// Fallback for unsupported systems.
|
| 549 |
+
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
| 550 |
+
cfg.add_cpu_to_node(NumaIndex{0}, c);
|
| 551 |
+
#else
|
| 552 |
+
|
| 553 |
+
#if defined(_WIN64)
|
| 554 |
+
|
| 555 |
+
std::optional<std::set<CpuIndex>> allowedCpus;
|
| 556 |
+
|
| 557 |
+
if (respectProcessAffinity)
|
| 558 |
+
allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined();
|
| 559 |
+
|
| 560 |
+
// The affinity cannot be determined in all cases on Windows,
|
| 561 |
+
// but we at least guarantee that the number of allowed processors
|
| 562 |
+
// is >= number of processors in the affinity mask. In case the user
|
| 563 |
+
// is not satisfied they must set the processor numbers explicitly.
|
| 564 |
+
auto is_cpu_allowed = [&allowedCpus](CpuIndex c) {
|
| 565 |
+
return !allowedCpus.has_value() || allowedCpus->count(c) == 1;
|
| 566 |
+
};
|
| 567 |
+
|
| 568 |
+
#elif defined(__linux__) && !defined(__ANDROID__)
|
| 569 |
+
|
| 570 |
+
std::set<CpuIndex> allowedCpus;
|
| 571 |
+
|
| 572 |
+
if (respectProcessAffinity)
|
| 573 |
+
allowedCpus = STARTUP_PROCESSOR_AFFINITY;
|
| 574 |
+
|
| 575 |
+
auto is_cpu_allowed = [respectProcessAffinity, &allowedCpus](CpuIndex c) {
|
| 576 |
+
return !respectProcessAffinity || allowedCpus.count(c) == 1;
|
| 577 |
+
};
|
| 578 |
+
|
| 579 |
+
#endif
|
| 580 |
+
|
| 581 |
+
bool l3Success = false;
|
| 582 |
+
if (!std::holds_alternative<SystemNumaPolicy>(policy))
|
| 583 |
+
{
|
| 584 |
+
size_t l3BundleSize = 0;
|
| 585 |
+
if (const auto* v = std::get_if<BundledL3Policy>(&policy))
|
| 586 |
+
{
|
| 587 |
+
l3BundleSize = v->bundleSize;
|
| 588 |
+
}
|
| 589 |
+
if (auto l3Cfg =
|
| 590 |
+
try_get_l3_aware_config(respectProcessAffinity, l3BundleSize, is_cpu_allowed))
|
| 591 |
+
{
|
| 592 |
+
cfg = std::move(*l3Cfg);
|
| 593 |
+
l3Success = true;
|
| 594 |
+
}
|
| 595 |
+
}
|
| 596 |
+
if (!l3Success)
|
| 597 |
+
cfg = from_system_numa(respectProcessAffinity, is_cpu_allowed);
|
| 598 |
+
|
| 599 |
+
#if defined(_WIN64)
|
| 600 |
+
// Split the NUMA nodes to be contained within a group if necessary.
|
| 601 |
+
// This is needed between Windows 10 Build 20348 and Windows 11, because
|
| 602 |
+
// the new NUMA allocation behaviour was introduced while there was
|
| 603 |
+
// still no way to set thread affinity spanning multiple processor groups.
|
| 604 |
+
// See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
|
| 605 |
+
// We also do this is if need to force old API for some reason.
|
| 606 |
+
//
|
| 607 |
+
// 2024-08-26: It appears that we need to actually always force this behaviour.
|
| 608 |
+
// While Windows allows this to work now, such assignments have bad interaction
|
| 609 |
+
// with the scheduler - in particular it still prefers scheduling on the thread's
|
| 610 |
+
// "primary" node, even if it means scheduling SMT processors first.
|
| 611 |
+
// See https://github.com/official-stockfish/Stockfish/issues/5551
|
| 612 |
+
// See https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups
|
| 613 |
+
//
|
| 614 |
+
// Each process is assigned a primary group at creation, and by default all
|
| 615 |
+
// of its threads' primary group is the same. Each thread's ideal processor
|
| 616 |
+
// is in the thread's primary group, so threads will preferentially be
|
| 617 |
+
// scheduled to processors on their primary group, but they are able to
|
| 618 |
+
// be scheduled to processors on any other group.
|
| 619 |
+
//
|
| 620 |
+
// used to be guarded by if (STARTUP_USE_OLD_AFFINITY_API)
|
| 621 |
+
{
|
| 622 |
+
NumaConfig splitCfg = empty();
|
| 623 |
+
|
| 624 |
+
NumaIndex splitNodeIndex = 0;
|
| 625 |
+
for (const auto& cpus : cfg.nodes)
|
| 626 |
+
{
|
| 627 |
+
if (cpus.empty())
|
| 628 |
+
continue;
|
| 629 |
+
|
| 630 |
+
size_t lastProcGroupIndex = *(cpus.begin()) / WIN_PROCESSOR_GROUP_SIZE;
|
| 631 |
+
for (CpuIndex c : cpus)
|
| 632 |
+
{
|
| 633 |
+
const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE;
|
| 634 |
+
if (procGroupIndex != lastProcGroupIndex)
|
| 635 |
+
{
|
| 636 |
+
splitNodeIndex += 1;
|
| 637 |
+
lastProcGroupIndex = procGroupIndex;
|
| 638 |
+
}
|
| 639 |
+
splitCfg.add_cpu_to_node(splitNodeIndex, c);
|
| 640 |
+
}
|
| 641 |
+
splitNodeIndex += 1;
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
cfg = std::move(splitCfg);
|
| 645 |
+
}
|
| 646 |
+
#endif
|
| 647 |
+
|
| 648 |
+
#endif
|
| 649 |
+
|
| 650 |
+
// We have to ensure no empty NUMA nodes persist.
|
| 651 |
+
cfg.remove_empty_numa_nodes();
|
| 652 |
+
|
| 653 |
+
// If the user explicitly opts out from respecting the current process affinity
|
| 654 |
+
// then it may be inconsistent with the current affinity (obviously), so we
|
| 655 |
+
// consider it custom.
|
| 656 |
+
if (!respectProcessAffinity)
|
| 657 |
+
cfg.customAffinity = true;
|
| 658 |
+
|
| 659 |
+
return cfg;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
// ':'-separated numa nodes
|
| 663 |
+
// ','-separated cpu indices
|
| 664 |
+
// supports "first-last" range syntax for cpu indices
|
| 665 |
+
// For example "0-15,128-143:16-31,144-159:32-47,160-175:48-63,176-191"
|
| 666 |
+
static NumaConfig from_string(const std::string& s) {
|
| 667 |
+
NumaConfig cfg = empty();
|
| 668 |
+
|
| 669 |
+
NumaIndex n = 0;
|
| 670 |
+
for (auto&& nodeStr : split(s, ":"))
|
| 671 |
+
{
|
| 672 |
+
auto indices = indices_from_shortened_string(std::string(nodeStr));
|
| 673 |
+
if (!indices.empty())
|
| 674 |
+
{
|
| 675 |
+
for (auto idx : indices)
|
| 676 |
+
{
|
| 677 |
+
if (!cfg.add_cpu_to_node(n, CpuIndex(idx)))
|
| 678 |
+
std::exit(EXIT_FAILURE);
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
n += 1;
|
| 682 |
+
}
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
cfg.customAffinity = true;
|
| 686 |
+
|
| 687 |
+
return cfg;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
NumaConfig(const NumaConfig&) = delete;
|
| 691 |
+
NumaConfig(NumaConfig&&) = default;
|
| 692 |
+
NumaConfig& operator=(const NumaConfig&) = delete;
|
| 693 |
+
NumaConfig& operator=(NumaConfig&&) = default;
|
| 694 |
+
|
| 695 |
+
bool is_cpu_assigned(CpuIndex n) const { return nodeByCpu.count(n) == 1; }
|
| 696 |
+
|
| 697 |
+
NumaIndex num_numa_nodes() const { return nodes.size(); }
|
| 698 |
+
|
| 699 |
+
CpuIndex num_cpus_in_numa_node(NumaIndex n) const {
|
| 700 |
+
assert(n < nodes.size());
|
| 701 |
+
return nodes[n].size();
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
CpuIndex num_cpus() const { return nodeByCpu.size(); }
|
| 705 |
+
|
| 706 |
+
bool requires_memory_replication() const { return customAffinity || nodes.size() > 1; }
|
| 707 |
+
|
| 708 |
+
std::string to_string() const {
|
| 709 |
+
std::string str;
|
| 710 |
+
|
| 711 |
+
bool isFirstNode = true;
|
| 712 |
+
for (auto&& cpus : nodes)
|
| 713 |
+
{
|
| 714 |
+
if (!isFirstNode)
|
| 715 |
+
str += ":";
|
| 716 |
+
|
| 717 |
+
bool isFirstSet = true;
|
| 718 |
+
auto rangeStart = cpus.begin();
|
| 719 |
+
for (auto it = cpus.begin(); it != cpus.end(); ++it)
|
| 720 |
+
{
|
| 721 |
+
auto next = std::next(it);
|
| 722 |
+
if (next == cpus.end() || *next != *it + 1)
|
| 723 |
+
{
|
| 724 |
+
// cpus[i] is at the end of the range (may be of size 1)
|
| 725 |
+
if (!isFirstSet)
|
| 726 |
+
str += ",";
|
| 727 |
+
|
| 728 |
+
const CpuIndex last = *it;
|
| 729 |
+
|
| 730 |
+
if (it != rangeStart)
|
| 731 |
+
{
|
| 732 |
+
const CpuIndex first = *rangeStart;
|
| 733 |
+
|
| 734 |
+
str += std::to_string(first);
|
| 735 |
+
str += "-";
|
| 736 |
+
str += std::to_string(last);
|
| 737 |
+
}
|
| 738 |
+
else
|
| 739 |
+
str += std::to_string(last);
|
| 740 |
+
|
| 741 |
+
rangeStart = next;
|
| 742 |
+
isFirstSet = false;
|
| 743 |
+
}
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
isFirstNode = false;
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
return str;
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
bool suggests_binding_threads(CpuIndex numThreads) const {
|
| 753 |
+
// If we can reasonably determine that the threads cannot be contained
|
| 754 |
+
// by the OS within the first NUMA node then we advise distributing
|
| 755 |
+
// and binding threads. When the threads are not bound we can only use
|
| 756 |
+
// NUMA memory replicated objects from the first node, so when the OS
|
| 757 |
+
// has to schedule on other nodes we lose performance. We also suggest
|
| 758 |
+
// binding if there's enough threads to distribute among nodes with minimal
|
| 759 |
+
// disparity. We try to ignore small nodes, in particular the empty ones.
|
| 760 |
+
|
| 761 |
+
// If the affinity set by the user does not match the affinity given by
|
| 762 |
+
// the OS then binding is necessary to ensure the threads are running on
|
| 763 |
+
// correct processors.
|
| 764 |
+
if (customAffinity)
|
| 765 |
+
return true;
|
| 766 |
+
|
| 767 |
+
// We obviously cannot distribute a single thread, so a single thread
|
| 768 |
+
// should never be bound.
|
| 769 |
+
if (numThreads <= 1)
|
| 770 |
+
return false;
|
| 771 |
+
|
| 772 |
+
size_t largestNodeSize = 0;
|
| 773 |
+
for (auto&& cpus : nodes)
|
| 774 |
+
if (cpus.size() > largestNodeSize)
|
| 775 |
+
largestNodeSize = cpus.size();
|
| 776 |
+
|
| 777 |
+
auto is_node_small = [largestNodeSize](const std::set<CpuIndex>& node) {
|
| 778 |
+
static constexpr double SmallNodeThreshold = 0.6;
|
| 779 |
+
return static_cast<double>(node.size()) / static_cast<double>(largestNodeSize)
|
| 780 |
+
<= SmallNodeThreshold;
|
| 781 |
+
};
|
| 782 |
+
|
| 783 |
+
size_t numNotSmallNodes = 0;
|
| 784 |
+
for (auto&& cpus : nodes)
|
| 785 |
+
if (!is_node_small(cpus))
|
| 786 |
+
numNotSmallNodes += 1;
|
| 787 |
+
|
| 788 |
+
return (numThreads > largestNodeSize / 2 || numThreads >= numNotSmallNodes * 4)
|
| 789 |
+
&& nodes.size() > 1;
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
std::vector<NumaIndex> distribute_threads_among_numa_nodes(CpuIndex numThreads) const {
|
| 793 |
+
std::vector<NumaIndex> ns;
|
| 794 |
+
|
| 795 |
+
if (nodes.size() == 1)
|
| 796 |
+
{
|
| 797 |
+
// Special case for when there's no NUMA nodes. This doesn't buy us
|
| 798 |
+
// much, but let's keep the default path simple.
|
| 799 |
+
ns.resize(numThreads, NumaIndex{0});
|
| 800 |
+
}
|
| 801 |
+
else
|
| 802 |
+
{
|
| 803 |
+
std::vector<size_t> occupation(nodes.size(), 0);
|
| 804 |
+
for (CpuIndex c = 0; c < numThreads; ++c)
|
| 805 |
+
{
|
| 806 |
+
NumaIndex bestNode{0};
|
| 807 |
+
float bestNodeFill = std::numeric_limits<float>::max();
|
| 808 |
+
for (NumaIndex n = 0; n < nodes.size(); ++n)
|
| 809 |
+
{
|
| 810 |
+
float fill =
|
| 811 |
+
static_cast<float>(occupation[n] + 1) / static_cast<float>(nodes[n].size());
|
| 812 |
+
// NOTE: Do we want to perhaps fill the first available node
|
| 813 |
+
// up to 50% first before considering other nodes?
|
| 814 |
+
// Probably not, because it would interfere with running
|
| 815 |
+
// multiple instances. We basically shouldn't favor any
|
| 816 |
+
// particular node.
|
| 817 |
+
if (fill < bestNodeFill)
|
| 818 |
+
{
|
| 819 |
+
bestNode = n;
|
| 820 |
+
bestNodeFill = fill;
|
| 821 |
+
}
|
| 822 |
+
}
|
| 823 |
+
ns.emplace_back(bestNode);
|
| 824 |
+
occupation[bestNode] += 1;
|
| 825 |
+
}
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
return ns;
|
| 829 |
+
}
|
| 830 |
+
|
| 831 |
+
NumaReplicatedAccessToken bind_current_thread_to_numa_node(NumaIndex n) const {
|
| 832 |
+
if (n >= nodes.size() || nodes[n].size() == 0)
|
| 833 |
+
std::exit(EXIT_FAILURE);
|
| 834 |
+
|
| 835 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 836 |
+
|
| 837 |
+
cpu_set_t* mask = CPU_ALLOC(highestCpuIndex + 1);
|
| 838 |
+
if (mask == nullptr)
|
| 839 |
+
std::exit(EXIT_FAILURE);
|
| 840 |
+
|
| 841 |
+
const size_t masksize = CPU_ALLOC_SIZE(highestCpuIndex + 1);
|
| 842 |
+
|
| 843 |
+
CPU_ZERO_S(masksize, mask);
|
| 844 |
+
|
| 845 |
+
for (CpuIndex c : nodes[n])
|
| 846 |
+
CPU_SET_S(c, masksize, mask);
|
| 847 |
+
|
| 848 |
+
const int status = sched_setaffinity(0, masksize, mask);
|
| 849 |
+
|
| 850 |
+
CPU_FREE(mask);
|
| 851 |
+
|
| 852 |
+
if (status != 0)
|
| 853 |
+
std::exit(EXIT_FAILURE);
|
| 854 |
+
|
| 855 |
+
// We yield this thread just to be sure it gets rescheduled.
|
| 856 |
+
// This is defensive, allowed because this code is not performance critical.
|
| 857 |
+
sched_yield();
|
| 858 |
+
|
| 859 |
+
#elif defined(_WIN64)
|
| 860 |
+
|
| 861 |
+
// Requires Windows 11. No good way to set thread affinity spanning
|
| 862 |
+
// processor groups before that.
|
| 863 |
+
HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll"));
|
| 864 |
+
auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t(
|
| 865 |
+
(void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks"));
|
| 866 |
+
|
| 867 |
+
// We ALWAYS set affinity with the new API if available, because
|
| 868 |
+
// there's no downsides, and we forcibly keep it consistent with
|
| 869 |
+
// the old API should we need to use it. I.e. we always keep this
|
| 870 |
+
// as a superset of what we set with SetThreadGroupAffinity.
|
| 871 |
+
if (SetThreadSelectedCpuSetMasks_f != nullptr)
|
| 872 |
+
{
|
| 873 |
+
// Only available on Windows 11 and Windows Server 2022 onwards
|
| 874 |
+
const USHORT numProcGroups = USHORT(
|
| 875 |
+
((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE);
|
| 876 |
+
auto groupAffinities = std::make_unique<GROUP_AFFINITY[]>(numProcGroups);
|
| 877 |
+
std::memset(groupAffinities.get(), 0, sizeof(GROUP_AFFINITY) * numProcGroups);
|
| 878 |
+
for (WORD i = 0; i < numProcGroups; ++i)
|
| 879 |
+
groupAffinities[i].Group = i;
|
| 880 |
+
|
| 881 |
+
for (CpuIndex c : nodes[n])
|
| 882 |
+
{
|
| 883 |
+
const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE;
|
| 884 |
+
const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE;
|
| 885 |
+
groupAffinities[procGroupIndex].Mask |= KAFFINITY(1) << idxWithinProcGroup;
|
| 886 |
+
}
|
| 887 |
+
|
| 888 |
+
HANDLE hThread = GetCurrentThread();
|
| 889 |
+
|
| 890 |
+
const BOOL status =
|
| 891 |
+
SetThreadSelectedCpuSetMasks_f(hThread, groupAffinities.get(), numProcGroups);
|
| 892 |
+
if (status == 0)
|
| 893 |
+
std::exit(EXIT_FAILURE);
|
| 894 |
+
|
| 895 |
+
// We yield this thread just to be sure it gets rescheduled.
|
| 896 |
+
// This is defensive, allowed because this code is not performance critical.
|
| 897 |
+
SwitchToThread();
|
| 898 |
+
}
|
| 899 |
+
|
| 900 |
+
// Sometimes we need to force the old API, but do not use it unless necessary.
|
| 901 |
+
if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API)
|
| 902 |
+
{
|
| 903 |
+
// On earlier windows version (since windows 7) we cannot run a single thread
|
| 904 |
+
// on multiple processor groups, so we need to restrict the group.
|
| 905 |
+
// We assume the group of the first processor listed for this node.
|
| 906 |
+
// Processors from outside this group will not be assigned for this thread.
|
| 907 |
+
// Normally this won't be an issue because windows used to assign NUMA nodes
|
| 908 |
+
// such that they cannot span processor groups. However, since Windows 10
|
| 909 |
+
// Build 20348 the behaviour changed, so there's a small window of versions
|
| 910 |
+
// between this and Windows 11 that might exhibit problems with not all
|
| 911 |
+
// processors being utilized.
|
| 912 |
+
//
|
| 913 |
+
// We handle this in NumaConfig::from_system by manually splitting the
|
| 914 |
+
// nodes when we detect that there is no function to set affinity spanning
|
| 915 |
+
// processor nodes. This is required because otherwise our thread distribution
|
| 916 |
+
// code may produce suboptimal results.
|
| 917 |
+
//
|
| 918 |
+
// See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
|
| 919 |
+
GROUP_AFFINITY affinity;
|
| 920 |
+
std::memset(&affinity, 0, sizeof(GROUP_AFFINITY));
|
| 921 |
+
// We use an ordered set to be sure to get the smallest cpu number here.
|
| 922 |
+
const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE;
|
| 923 |
+
affinity.Group = static_cast<WORD>(forcedProcGroupIndex);
|
| 924 |
+
for (CpuIndex c : nodes[n])
|
| 925 |
+
{
|
| 926 |
+
const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE;
|
| 927 |
+
const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE;
|
| 928 |
+
// We skip processors that are not in the same processor group.
|
| 929 |
+
// If everything was set up correctly this will never be an issue,
|
| 930 |
+
// but we have to account for bad NUMA node specification.
|
| 931 |
+
if (procGroupIndex != forcedProcGroupIndex)
|
| 932 |
+
continue;
|
| 933 |
+
|
| 934 |
+
affinity.Mask |= KAFFINITY(1) << idxWithinProcGroup;
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
HANDLE hThread = GetCurrentThread();
|
| 938 |
+
|
| 939 |
+
const BOOL status = SetThreadGroupAffinity(hThread, &affinity, nullptr);
|
| 940 |
+
if (status == 0)
|
| 941 |
+
std::exit(EXIT_FAILURE);
|
| 942 |
+
|
| 943 |
+
// We yield this thread just to be sure it gets rescheduled. This is
|
| 944 |
+
// defensive, allowed because this code is not performance critical.
|
| 945 |
+
SwitchToThread();
|
| 946 |
+
}
|
| 947 |
+
|
| 948 |
+
#endif
|
| 949 |
+
|
| 950 |
+
return NumaReplicatedAccessToken(n);
|
| 951 |
+
}
|
| 952 |
+
|
| 953 |
+
template<typename FuncT>
|
| 954 |
+
void execute_on_numa_node(NumaIndex n, FuncT&& f) const {
|
| 955 |
+
std::thread th([this, &f, n]() {
|
| 956 |
+
bind_current_thread_to_numa_node(n);
|
| 957 |
+
std::forward<FuncT>(f)();
|
| 958 |
+
});
|
| 959 |
+
|
| 960 |
+
th.join();
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
std::vector<std::set<CpuIndex>> nodes;
|
| 964 |
+
std::map<CpuIndex, NumaIndex> nodeByCpu;
|
| 965 |
+
|
| 966 |
+
private:
|
| 967 |
+
CpuIndex highestCpuIndex;
|
| 968 |
+
|
| 969 |
+
bool customAffinity;
|
| 970 |
+
|
| 971 |
+
static NumaConfig empty() { return NumaConfig(EmptyNodeTag{}); }
|
| 972 |
+
|
| 973 |
+
struct EmptyNodeTag {};
|
| 974 |
+
|
| 975 |
+
NumaConfig(EmptyNodeTag) :
|
| 976 |
+
highestCpuIndex(0),
|
| 977 |
+
customAffinity(false) {}
|
| 978 |
+
|
| 979 |
+
void remove_empty_numa_nodes() {
|
| 980 |
+
std::vector<std::set<CpuIndex>> newNodes;
|
| 981 |
+
for (auto&& cpus : nodes)
|
| 982 |
+
if (!cpus.empty())
|
| 983 |
+
newNodes.emplace_back(std::move(cpus));
|
| 984 |
+
nodes = std::move(newNodes);
|
| 985 |
+
}
|
| 986 |
+
|
| 987 |
+
// Returns true if successful
|
| 988 |
+
// Returns false if failed, i.e. when the cpu is already present
|
| 989 |
+
// strong guarantee, the structure remains unmodified
|
| 990 |
+
bool add_cpu_to_node(NumaIndex n, CpuIndex c) {
|
| 991 |
+
if (is_cpu_assigned(c))
|
| 992 |
+
return false;
|
| 993 |
+
|
| 994 |
+
while (nodes.size() <= n)
|
| 995 |
+
nodes.emplace_back();
|
| 996 |
+
|
| 997 |
+
nodes[n].insert(c);
|
| 998 |
+
nodeByCpu[c] = n;
|
| 999 |
+
|
| 1000 |
+
if (c > highestCpuIndex)
|
| 1001 |
+
highestCpuIndex = c;
|
| 1002 |
+
|
| 1003 |
+
return true;
|
| 1004 |
+
}
|
| 1005 |
+
|
| 1006 |
+
// Returns true if successful
|
| 1007 |
+
// Returns false if failed, i.e. when any of the cpus is already present
|
| 1008 |
+
// strong guarantee, the structure remains unmodified
|
| 1009 |
+
bool add_cpu_range_to_node(NumaIndex n, CpuIndex cfirst, CpuIndex clast) {
|
| 1010 |
+
for (CpuIndex c = cfirst; c <= clast; ++c)
|
| 1011 |
+
if (is_cpu_assigned(c))
|
| 1012 |
+
return false;
|
| 1013 |
+
|
| 1014 |
+
while (nodes.size() <= n)
|
| 1015 |
+
nodes.emplace_back();
|
| 1016 |
+
|
| 1017 |
+
for (CpuIndex c = cfirst; c <= clast; ++c)
|
| 1018 |
+
{
|
| 1019 |
+
nodes[n].insert(c);
|
| 1020 |
+
nodeByCpu[c] = n;
|
| 1021 |
+
}
|
| 1022 |
+
|
| 1023 |
+
if (clast > highestCpuIndex)
|
| 1024 |
+
highestCpuIndex = clast;
|
| 1025 |
+
|
| 1026 |
+
return true;
|
| 1027 |
+
}
|
| 1028 |
+
|
| 1029 |
+
static std::vector<size_t> indices_from_shortened_string(const std::string& s) {
|
| 1030 |
+
std::vector<size_t> indices;
|
| 1031 |
+
|
| 1032 |
+
if (s.empty())
|
| 1033 |
+
return indices;
|
| 1034 |
+
|
| 1035 |
+
for (const auto& ss : split(s, ","))
|
| 1036 |
+
{
|
| 1037 |
+
if (ss.empty())
|
| 1038 |
+
continue;
|
| 1039 |
+
|
| 1040 |
+
auto parts = split(ss, "-");
|
| 1041 |
+
if (parts.size() == 1)
|
| 1042 |
+
{
|
| 1043 |
+
const CpuIndex c = CpuIndex{str_to_size_t(std::string(parts[0]))};
|
| 1044 |
+
indices.emplace_back(c);
|
| 1045 |
+
}
|
| 1046 |
+
else if (parts.size() == 2)
|
| 1047 |
+
{
|
| 1048 |
+
const CpuIndex cfirst = CpuIndex{str_to_size_t(std::string(parts[0]))};
|
| 1049 |
+
const CpuIndex clast = CpuIndex{str_to_size_t(std::string(parts[1]))};
|
| 1050 |
+
for (size_t c = cfirst; c <= clast; ++c)
|
| 1051 |
+
{
|
| 1052 |
+
indices.emplace_back(c);
|
| 1053 |
+
}
|
| 1054 |
+
}
|
| 1055 |
+
}
|
| 1056 |
+
|
| 1057 |
+
return indices;
|
| 1058 |
+
}
|
| 1059 |
+
|
| 1060 |
+
// This function queries the system for the mapping of processors to NUMA nodes.
|
| 1061 |
+
// On Linux we read from standardized kernel sysfs, with a fallback to single NUMA
|
| 1062 |
+
// node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see
|
| 1063 |
+
// comment for Windows implementation of get_process_affinity.
|
| 1064 |
+
template<typename Pred>
|
| 1065 |
+
static NumaConfig from_system_numa([[maybe_unused]] bool respectProcessAffinity,
|
| 1066 |
+
[[maybe_unused]] Pred&& is_cpu_allowed) {
|
| 1067 |
+
NumaConfig cfg = empty();
|
| 1068 |
+
|
| 1069 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 1070 |
+
|
| 1071 |
+
// On Linux things are straightforward, since there's no processor groups and
|
| 1072 |
+
// any thread can be scheduled on all processors.
|
| 1073 |
+
// We try to gather this information from the sysfs first
|
| 1074 |
+
// https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node
|
| 1075 |
+
|
| 1076 |
+
bool useFallback = false;
|
| 1077 |
+
auto fallback = [&]() {
|
| 1078 |
+
useFallback = true;
|
| 1079 |
+
cfg = empty();
|
| 1080 |
+
};
|
| 1081 |
+
|
| 1082 |
+
// /sys/devices/system/node/online contains information about active NUMA nodes
|
| 1083 |
+
auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online");
|
| 1084 |
+
if (!nodeIdsStr.has_value() || nodeIdsStr->empty())
|
| 1085 |
+
{
|
| 1086 |
+
fallback();
|
| 1087 |
+
}
|
| 1088 |
+
else
|
| 1089 |
+
{
|
| 1090 |
+
remove_whitespace(*nodeIdsStr);
|
| 1091 |
+
for (size_t n : indices_from_shortened_string(*nodeIdsStr))
|
| 1092 |
+
{
|
| 1093 |
+
// /sys/devices/system/node/node.../cpulist
|
| 1094 |
+
std::string path =
|
| 1095 |
+
std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist";
|
| 1096 |
+
auto cpuIdsStr = read_file_to_string(path);
|
| 1097 |
+
// Now, we only bail if the file does not exist. Some nodes may be
|
| 1098 |
+
// empty, that's fine. An empty node still has a file that appears
|
| 1099 |
+
// to have some whitespace, so we need to handle that.
|
| 1100 |
+
if (!cpuIdsStr.has_value())
|
| 1101 |
+
{
|
| 1102 |
+
fallback();
|
| 1103 |
+
break;
|
| 1104 |
+
}
|
| 1105 |
+
else
|
| 1106 |
+
{
|
| 1107 |
+
remove_whitespace(*cpuIdsStr);
|
| 1108 |
+
for (size_t c : indices_from_shortened_string(*cpuIdsStr))
|
| 1109 |
+
{
|
| 1110 |
+
if (is_cpu_allowed(c))
|
| 1111 |
+
cfg.add_cpu_to_node(n, c);
|
| 1112 |
+
}
|
| 1113 |
+
}
|
| 1114 |
+
}
|
| 1115 |
+
}
|
| 1116 |
+
|
| 1117 |
+
if (useFallback)
|
| 1118 |
+
{
|
| 1119 |
+
for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c)
|
| 1120 |
+
if (is_cpu_allowed(c))
|
| 1121 |
+
cfg.add_cpu_to_node(NumaIndex{0}, c);
|
| 1122 |
+
}
|
| 1123 |
+
|
| 1124 |
+
#elif defined(_WIN64)
|
| 1125 |
+
|
| 1126 |
+
WORD numProcGroups = GetActiveProcessorGroupCount();
|
| 1127 |
+
for (WORD procGroup = 0; procGroup < numProcGroups; ++procGroup)
|
| 1128 |
+
{
|
| 1129 |
+
for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number)
|
| 1130 |
+
{
|
| 1131 |
+
PROCESSOR_NUMBER procnum;
|
| 1132 |
+
procnum.Group = procGroup;
|
| 1133 |
+
procnum.Number = number;
|
| 1134 |
+
procnum.Reserved = 0;
|
| 1135 |
+
USHORT nodeNumber;
|
| 1136 |
+
|
| 1137 |
+
const BOOL status = GetNumaProcessorNodeEx(&procnum, &nodeNumber);
|
| 1138 |
+
const CpuIndex c = static_cast<CpuIndex>(procGroup) * WIN_PROCESSOR_GROUP_SIZE
|
| 1139 |
+
+ static_cast<CpuIndex>(number);
|
| 1140 |
+
if (status != 0 && nodeNumber != std::numeric_limits<USHORT>::max()
|
| 1141 |
+
&& is_cpu_allowed(c))
|
| 1142 |
+
{
|
| 1143 |
+
cfg.add_cpu_to_node(nodeNumber, c);
|
| 1144 |
+
}
|
| 1145 |
+
}
|
| 1146 |
+
}
|
| 1147 |
+
|
| 1148 |
+
#else
|
| 1149 |
+
|
| 1150 |
+
abort(); // should not reach here
|
| 1151 |
+
|
| 1152 |
+
#endif
|
| 1153 |
+
|
| 1154 |
+
return cfg;
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
template<typename Pred>
|
| 1158 |
+
static std::optional<NumaConfig> try_get_l3_aware_config(
|
| 1159 |
+
bool respectProcessAffinity, size_t bundleSize, [[maybe_unused]] Pred&& is_cpu_allowed) {
|
| 1160 |
+
// Get the normal system configuration so we know to which NUMA node
|
| 1161 |
+
// each L3 domain belongs.
|
| 1162 |
+
NumaConfig systemConfig =
|
| 1163 |
+
NumaConfig::from_system(SystemNumaPolicy{}, respectProcessAffinity);
|
| 1164 |
+
std::vector<L3Domain> l3Domains;
|
| 1165 |
+
|
| 1166 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 1167 |
+
|
| 1168 |
+
std::set<CpuIndex> seenCpus;
|
| 1169 |
+
auto nextUnseenCpu = [&seenCpus]() {
|
| 1170 |
+
for (CpuIndex i = 0;; ++i)
|
| 1171 |
+
if (!seenCpus.count(i))
|
| 1172 |
+
return i;
|
| 1173 |
+
};
|
| 1174 |
+
|
| 1175 |
+
while (true)
|
| 1176 |
+
{
|
| 1177 |
+
CpuIndex next = nextUnseenCpu();
|
| 1178 |
+
auto siblingsStr =
|
| 1179 |
+
read_file_to_string("/sys/devices/system/cpu/cpu" + std::to_string(next)
|
| 1180 |
+
+ "/cache/index3/shared_cpu_list");
|
| 1181 |
+
|
| 1182 |
+
if (!siblingsStr.has_value() || siblingsStr->empty())
|
| 1183 |
+
{
|
| 1184 |
+
break; // we have read all available CPUs
|
| 1185 |
+
}
|
| 1186 |
+
|
| 1187 |
+
L3Domain domain;
|
| 1188 |
+
for (size_t c : indices_from_shortened_string(*siblingsStr))
|
| 1189 |
+
{
|
| 1190 |
+
if (is_cpu_allowed(c))
|
| 1191 |
+
{
|
| 1192 |
+
domain.systemNumaIndex = systemConfig.nodeByCpu.at(c);
|
| 1193 |
+
domain.cpus.insert(c);
|
| 1194 |
+
}
|
| 1195 |
+
seenCpus.insert(c);
|
| 1196 |
+
}
|
| 1197 |
+
if (!domain.cpus.empty())
|
| 1198 |
+
{
|
| 1199 |
+
l3Domains.emplace_back(std::move(domain));
|
| 1200 |
+
}
|
| 1201 |
+
}
|
| 1202 |
+
|
| 1203 |
+
#elif defined(_WIN64)
|
| 1204 |
+
|
| 1205 |
+
DWORD bufSize = 0;
|
| 1206 |
+
GetLogicalProcessorInformationEx(RelationCache, nullptr, &bufSize);
|
| 1207 |
+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
| 1208 |
+
return std::nullopt;
|
| 1209 |
+
|
| 1210 |
+
std::vector<char> buffer(bufSize);
|
| 1211 |
+
auto info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
| 1212 |
+
if (!GetLogicalProcessorInformationEx(RelationCache, info, &bufSize))
|
| 1213 |
+
return std::nullopt;
|
| 1214 |
+
|
| 1215 |
+
while (reinterpret_cast<char*>(info) < buffer.data() + bufSize)
|
| 1216 |
+
{
|
| 1217 |
+
info = std::launder(info);
|
| 1218 |
+
if (info->Relationship == RelationCache && info->Cache.Level == 3)
|
| 1219 |
+
{
|
| 1220 |
+
L3Domain domain{};
|
| 1221 |
+
domain.cpus = readCacheMembers(info, is_cpu_allowed);
|
| 1222 |
+
if (!domain.cpus.empty())
|
| 1223 |
+
{
|
| 1224 |
+
domain.systemNumaIndex = systemConfig.nodeByCpu.at(*domain.cpus.begin());
|
| 1225 |
+
l3Domains.push_back(std::move(domain));
|
| 1226 |
+
}
|
| 1227 |
+
}
|
| 1228 |
+
// Variable length data structure, advance to next
|
| 1229 |
+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(
|
| 1230 |
+
reinterpret_cast<char*>(info) + info->Size);
|
| 1231 |
+
}
|
| 1232 |
+
#endif
|
| 1233 |
+
|
| 1234 |
+
if (!l3Domains.empty())
|
| 1235 |
+
return {NumaConfig::from_l3_info(std::move(l3Domains), bundleSize)};
|
| 1236 |
+
|
| 1237 |
+
return std::nullopt;
|
| 1238 |
+
}
|
| 1239 |
+
|
| 1240 |
+
|
| 1241 |
+
static NumaConfig from_l3_info(std::vector<L3Domain>&& domains, size_t bundleSize) {
|
| 1242 |
+
assert(!domains.empty());
|
| 1243 |
+
|
| 1244 |
+
std::map<NumaIndex, std::vector<L3Domain>> list;
|
| 1245 |
+
for (auto& d : domains)
|
| 1246 |
+
list[d.systemNumaIndex].emplace_back(std::move(d));
|
| 1247 |
+
|
| 1248 |
+
NumaConfig cfg = empty();
|
| 1249 |
+
NumaIndex n = 0;
|
| 1250 |
+
for (auto& [_, ds] : list)
|
| 1251 |
+
{
|
| 1252 |
+
bool changed;
|
| 1253 |
+
// Scan through pairs and merge them. With roughly equal L3 sizes, should give
|
| 1254 |
+
// a decent distribution.
|
| 1255 |
+
do
|
| 1256 |
+
{
|
| 1257 |
+
changed = false;
|
| 1258 |
+
for (size_t j = 0; j + 1 < ds.size(); ++j)
|
| 1259 |
+
{
|
| 1260 |
+
if (ds[j].cpus.size() + ds[j + 1].cpus.size() <= bundleSize)
|
| 1261 |
+
{
|
| 1262 |
+
changed = true;
|
| 1263 |
+
ds[j].cpus.merge(ds[j + 1].cpus);
|
| 1264 |
+
ds.erase(ds.begin() + j + 1);
|
| 1265 |
+
}
|
| 1266 |
+
}
|
| 1267 |
+
// ds.size() has decreased if changed is true, so this loop will terminate
|
| 1268 |
+
} while (changed);
|
| 1269 |
+
for (const L3Domain& d : ds)
|
| 1270 |
+
{
|
| 1271 |
+
const NumaIndex dn = n++;
|
| 1272 |
+
for (CpuIndex cpu : d.cpus)
|
| 1273 |
+
{
|
| 1274 |
+
cfg.add_cpu_to_node(dn, cpu);
|
| 1275 |
+
}
|
| 1276 |
+
}
|
| 1277 |
+
}
|
| 1278 |
+
return cfg;
|
| 1279 |
+
}
|
| 1280 |
+
};
|
| 1281 |
+
|
| 1282 |
+
class NumaReplicationContext;
|
| 1283 |
+
|
| 1284 |
+
// Instances of this class are tracked by the NumaReplicationContext instance.
|
| 1285 |
+
// NumaReplicationContext informs all tracked instances when NUMA configuration changes.
|
| 1286 |
+
class NumaReplicatedBase {
|
| 1287 |
+
public:
|
| 1288 |
+
NumaReplicatedBase(NumaReplicationContext& ctx);
|
| 1289 |
+
|
| 1290 |
+
NumaReplicatedBase(const NumaReplicatedBase&) = delete;
|
| 1291 |
+
NumaReplicatedBase(NumaReplicatedBase&& other) noexcept;
|
| 1292 |
+
|
| 1293 |
+
NumaReplicatedBase& operator=(const NumaReplicatedBase&) = delete;
|
| 1294 |
+
NumaReplicatedBase& operator=(NumaReplicatedBase&& other) noexcept;
|
| 1295 |
+
|
| 1296 |
+
virtual void on_numa_config_changed() = 0;
|
| 1297 |
+
virtual ~NumaReplicatedBase();
|
| 1298 |
+
|
| 1299 |
+
const NumaConfig& get_numa_config() const;
|
| 1300 |
+
|
| 1301 |
+
private:
|
| 1302 |
+
NumaReplicationContext* context;
|
| 1303 |
+
};
|
| 1304 |
+
|
| 1305 |
+
// We force boxing with a unique_ptr. If this becomes an issue due to added
|
| 1306 |
+
// indirection we may need to add an option for a custom boxing type. When the
|
| 1307 |
+
// NUMA config changes the value stored at the index 0 is replicated to other nodes.
|
| 1308 |
+
template<typename T>
|
| 1309 |
+
class NumaReplicated: public NumaReplicatedBase {
|
| 1310 |
+
public:
|
| 1311 |
+
using ReplicatorFuncType = std::function<T(const T&)>;
|
| 1312 |
+
|
| 1313 |
+
NumaReplicated(NumaReplicationContext& ctx) :
|
| 1314 |
+
NumaReplicatedBase(ctx) {
|
| 1315 |
+
replicate_from(T{});
|
| 1316 |
+
}
|
| 1317 |
+
|
| 1318 |
+
NumaReplicated(NumaReplicationContext& ctx, T&& source) :
|
| 1319 |
+
NumaReplicatedBase(ctx) {
|
| 1320 |
+
replicate_from(std::move(source));
|
| 1321 |
+
}
|
| 1322 |
+
|
| 1323 |
+
NumaReplicated(const NumaReplicated&) = delete;
|
| 1324 |
+
NumaReplicated(NumaReplicated&& other) noexcept :
|
| 1325 |
+
NumaReplicatedBase(std::move(other)),
|
| 1326 |
+
instances(std::exchange(other.instances, {})) {}
|
| 1327 |
+
|
| 1328 |
+
NumaReplicated& operator=(const NumaReplicated&) = delete;
|
| 1329 |
+
NumaReplicated& operator=(NumaReplicated&& other) noexcept {
|
| 1330 |
+
NumaReplicatedBase::operator=(*this, std::move(other));
|
| 1331 |
+
instances = std::exchange(other.instances, {});
|
| 1332 |
+
|
| 1333 |
+
return *this;
|
| 1334 |
+
}
|
| 1335 |
+
|
| 1336 |
+
NumaReplicated& operator=(T&& source) {
|
| 1337 |
+
replicate_from(std::move(source));
|
| 1338 |
+
|
| 1339 |
+
return *this;
|
| 1340 |
+
}
|
| 1341 |
+
|
| 1342 |
+
~NumaReplicated() override = default;
|
| 1343 |
+
|
| 1344 |
+
const T& operator[](NumaReplicatedAccessToken token) const {
|
| 1345 |
+
assert(token.get_numa_index() < instances.size());
|
| 1346 |
+
return *(instances[token.get_numa_index()]);
|
| 1347 |
+
}
|
| 1348 |
+
|
| 1349 |
+
const T& operator*() const { return *(instances[0]); }
|
| 1350 |
+
|
| 1351 |
+
const T* operator->() const { return instances[0].get(); }
|
| 1352 |
+
|
| 1353 |
+
template<typename FuncT>
|
| 1354 |
+
void modify_and_replicate(FuncT&& f) {
|
| 1355 |
+
auto source = std::move(instances[0]);
|
| 1356 |
+
std::forward<FuncT>(f)(*source);
|
| 1357 |
+
replicate_from(std::move(*source));
|
| 1358 |
+
}
|
| 1359 |
+
|
| 1360 |
+
void on_numa_config_changed() override {
|
| 1361 |
+
// Use the first one as the source. It doesn't matter which one we use,
|
| 1362 |
+
// because they all must be identical, but the first one is guaranteed to exist.
|
| 1363 |
+
auto source = std::move(instances[0]);
|
| 1364 |
+
replicate_from(std::move(*source));
|
| 1365 |
+
}
|
| 1366 |
+
|
| 1367 |
+
private:
|
| 1368 |
+
std::vector<std::unique_ptr<T>> instances;
|
| 1369 |
+
|
| 1370 |
+
void replicate_from(T&& source) {
|
| 1371 |
+
instances.clear();
|
| 1372 |
+
|
| 1373 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1374 |
+
if (cfg.requires_memory_replication())
|
| 1375 |
+
{
|
| 1376 |
+
for (NumaIndex n = 0; n < cfg.num_numa_nodes(); ++n)
|
| 1377 |
+
{
|
| 1378 |
+
cfg.execute_on_numa_node(
|
| 1379 |
+
n, [this, &source]() { instances.emplace_back(std::make_unique<T>(source)); });
|
| 1380 |
+
}
|
| 1381 |
+
}
|
| 1382 |
+
else
|
| 1383 |
+
{
|
| 1384 |
+
assert(cfg.num_numa_nodes() == 1);
|
| 1385 |
+
// We take advantage of the fact that replication is not required
|
| 1386 |
+
// and reuse the source value, avoiding one copy operation.
|
| 1387 |
+
instances.emplace_back(std::make_unique<T>(std::move(source)));
|
| 1388 |
+
}
|
| 1389 |
+
}
|
| 1390 |
+
};
|
| 1391 |
+
|
| 1392 |
+
// We force boxing with a unique_ptr. If this becomes an issue due to added
|
| 1393 |
+
// indirection we may need to add an option for a custom boxing type.
|
| 1394 |
+
template<typename T>
|
| 1395 |
+
class LazyNumaReplicated: public NumaReplicatedBase {
|
| 1396 |
+
public:
|
| 1397 |
+
using ReplicatorFuncType = std::function<T(const T&)>;
|
| 1398 |
+
|
| 1399 |
+
LazyNumaReplicated(NumaReplicationContext& ctx) :
|
| 1400 |
+
NumaReplicatedBase(ctx) {
|
| 1401 |
+
prepare_replicate_from(T{});
|
| 1402 |
+
}
|
| 1403 |
+
|
| 1404 |
+
LazyNumaReplicated(NumaReplicationContext& ctx, T&& source) :
|
| 1405 |
+
NumaReplicatedBase(ctx) {
|
| 1406 |
+
prepare_replicate_from(std::move(source));
|
| 1407 |
+
}
|
| 1408 |
+
|
| 1409 |
+
LazyNumaReplicated(const LazyNumaReplicated&) = delete;
|
| 1410 |
+
LazyNumaReplicated(LazyNumaReplicated&& other) noexcept :
|
| 1411 |
+
NumaReplicatedBase(std::move(other)),
|
| 1412 |
+
instances(std::exchange(other.instances, {})) {}
|
| 1413 |
+
|
| 1414 |
+
LazyNumaReplicated& operator=(const LazyNumaReplicated&) = delete;
|
| 1415 |
+
LazyNumaReplicated& operator=(LazyNumaReplicated&& other) noexcept {
|
| 1416 |
+
NumaReplicatedBase::operator=(*this, std::move(other));
|
| 1417 |
+
instances = std::exchange(other.instances, {});
|
| 1418 |
+
|
| 1419 |
+
return *this;
|
| 1420 |
+
}
|
| 1421 |
+
|
| 1422 |
+
LazyNumaReplicated& operator=(T&& source) {
|
| 1423 |
+
prepare_replicate_from(std::move(source));
|
| 1424 |
+
|
| 1425 |
+
return *this;
|
| 1426 |
+
}
|
| 1427 |
+
|
| 1428 |
+
~LazyNumaReplicated() override = default;
|
| 1429 |
+
|
| 1430 |
+
const T& operator[](NumaReplicatedAccessToken token) const {
|
| 1431 |
+
assert(token.get_numa_index() < instances.size());
|
| 1432 |
+
ensure_present(token.get_numa_index());
|
| 1433 |
+
return *(instances[token.get_numa_index()]);
|
| 1434 |
+
}
|
| 1435 |
+
|
| 1436 |
+
const T& operator*() const { return *(instances[0]); }
|
| 1437 |
+
|
| 1438 |
+
const T* operator->() const { return instances[0].get(); }
|
| 1439 |
+
|
| 1440 |
+
template<typename FuncT>
|
| 1441 |
+
void modify_and_replicate(FuncT&& f) {
|
| 1442 |
+
auto source = std::move(instances[0]);
|
| 1443 |
+
std::forward<FuncT>(f)(*source);
|
| 1444 |
+
prepare_replicate_from(std::move(*source));
|
| 1445 |
+
}
|
| 1446 |
+
|
| 1447 |
+
void on_numa_config_changed() override {
|
| 1448 |
+
// Use the first one as the source. It doesn't matter which one we use,
|
| 1449 |
+
// because they all must be identical, but the first one is guaranteed to exist.
|
| 1450 |
+
auto source = std::move(instances[0]);
|
| 1451 |
+
prepare_replicate_from(std::move(*source));
|
| 1452 |
+
}
|
| 1453 |
+
|
| 1454 |
+
private:
|
| 1455 |
+
mutable std::vector<std::unique_ptr<T>> instances;
|
| 1456 |
+
mutable std::mutex mutex;
|
| 1457 |
+
|
| 1458 |
+
void ensure_present(NumaIndex idx) const {
|
| 1459 |
+
assert(idx < instances.size());
|
| 1460 |
+
|
| 1461 |
+
if (instances[idx] != nullptr)
|
| 1462 |
+
return;
|
| 1463 |
+
|
| 1464 |
+
assert(idx != 0);
|
| 1465 |
+
|
| 1466 |
+
std::unique_lock<std::mutex> lock(mutex);
|
| 1467 |
+
// Check again for races.
|
| 1468 |
+
if (instances[idx] != nullptr)
|
| 1469 |
+
return;
|
| 1470 |
+
|
| 1471 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1472 |
+
cfg.execute_on_numa_node(
|
| 1473 |
+
idx, [this, idx]() { instances[idx] = std::make_unique<T>(*instances[0]); });
|
| 1474 |
+
}
|
| 1475 |
+
|
| 1476 |
+
void prepare_replicate_from(T&& source) {
|
| 1477 |
+
instances.clear();
|
| 1478 |
+
|
| 1479 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1480 |
+
if (cfg.requires_memory_replication())
|
| 1481 |
+
{
|
| 1482 |
+
assert(cfg.num_numa_nodes() > 0);
|
| 1483 |
+
|
| 1484 |
+
// We just need to make sure the first instance is there.
|
| 1485 |
+
// Note that we cannot move here as we need to reallocate the data
|
| 1486 |
+
// on the correct NUMA node.
|
| 1487 |
+
cfg.execute_on_numa_node(
|
| 1488 |
+
0, [this, &source]() { instances.emplace_back(std::make_unique<T>(source)); });
|
| 1489 |
+
|
| 1490 |
+
// Prepare others for lazy init.
|
| 1491 |
+
instances.resize(cfg.num_numa_nodes());
|
| 1492 |
+
}
|
| 1493 |
+
else
|
| 1494 |
+
{
|
| 1495 |
+
assert(cfg.num_numa_nodes() == 1);
|
| 1496 |
+
// We take advantage of the fact that replication is not required
|
| 1497 |
+
// and reuse the source value, avoiding one copy operation.
|
| 1498 |
+
instances.emplace_back(std::make_unique<T>(std::move(source)));
|
| 1499 |
+
}
|
| 1500 |
+
}
|
| 1501 |
+
};
|
| 1502 |
+
|
| 1503 |
+
// Utilizes shared memory.
|
| 1504 |
+
template<typename T>
|
| 1505 |
+
class LazyNumaReplicatedSystemWide: public NumaReplicatedBase {
|
| 1506 |
+
public:
|
| 1507 |
+
using ReplicatorFuncType = std::function<T(const T&)>;
|
| 1508 |
+
|
| 1509 |
+
LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx) :
|
| 1510 |
+
NumaReplicatedBase(ctx) {
|
| 1511 |
+
prepare_replicate_from(std::make_unique<T>());
|
| 1512 |
+
}
|
| 1513 |
+
|
| 1514 |
+
LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx, std::unique_ptr<T>&& source) :
|
| 1515 |
+
NumaReplicatedBase(ctx) {
|
| 1516 |
+
prepare_replicate_from(std::move(source));
|
| 1517 |
+
}
|
| 1518 |
+
|
| 1519 |
+
LazyNumaReplicatedSystemWide(const LazyNumaReplicatedSystemWide&) = delete;
|
| 1520 |
+
LazyNumaReplicatedSystemWide(LazyNumaReplicatedSystemWide&& other) noexcept :
|
| 1521 |
+
NumaReplicatedBase(std::move(other)),
|
| 1522 |
+
instances(std::exchange(other.instances, {})) {}
|
| 1523 |
+
|
| 1524 |
+
LazyNumaReplicatedSystemWide& operator=(const LazyNumaReplicatedSystemWide&) = delete;
|
| 1525 |
+
LazyNumaReplicatedSystemWide& operator=(LazyNumaReplicatedSystemWide&& other) noexcept {
|
| 1526 |
+
NumaReplicatedBase::operator=(*this, std::move(other));
|
| 1527 |
+
instances = std::exchange(other.instances, {});
|
| 1528 |
+
|
| 1529 |
+
return *this;
|
| 1530 |
+
}
|
| 1531 |
+
|
| 1532 |
+
LazyNumaReplicatedSystemWide& operator=(std::unique_ptr<T>&& source) {
|
| 1533 |
+
prepare_replicate_from(std::move(source));
|
| 1534 |
+
|
| 1535 |
+
return *this;
|
| 1536 |
+
}
|
| 1537 |
+
|
| 1538 |
+
~LazyNumaReplicatedSystemWide() override = default;
|
| 1539 |
+
|
| 1540 |
+
const T& operator[](NumaReplicatedAccessToken token) const {
|
| 1541 |
+
assert(token.get_numa_index() < instances.size());
|
| 1542 |
+
ensure_present(token.get_numa_index());
|
| 1543 |
+
return *(instances[token.get_numa_index()]);
|
| 1544 |
+
}
|
| 1545 |
+
|
| 1546 |
+
const T& operator*() const { return *(instances[0]); }
|
| 1547 |
+
|
| 1548 |
+
const T* operator->() const { return &*instances[0]; }
|
| 1549 |
+
|
| 1550 |
+
std::vector<std::pair<SystemWideSharedConstantAllocationStatus, std::optional<std::string>>>
|
| 1551 |
+
get_status_and_errors() const {
|
| 1552 |
+
std::vector<std::pair<SystemWideSharedConstantAllocationStatus, std::optional<std::string>>>
|
| 1553 |
+
status;
|
| 1554 |
+
status.reserve(instances.size());
|
| 1555 |
+
|
| 1556 |
+
for (const auto& instance : instances)
|
| 1557 |
+
{
|
| 1558 |
+
status.emplace_back(instance.get_status(), instance.get_error_message());
|
| 1559 |
+
}
|
| 1560 |
+
|
| 1561 |
+
return status;
|
| 1562 |
+
}
|
| 1563 |
+
|
| 1564 |
+
template<typename FuncT>
|
| 1565 |
+
void modify_and_replicate(FuncT&& f) {
|
| 1566 |
+
auto source = std::make_unique<T>(*instances[0]);
|
| 1567 |
+
std::forward<FuncT>(f)(*source);
|
| 1568 |
+
prepare_replicate_from(std::move(source));
|
| 1569 |
+
}
|
| 1570 |
+
|
| 1571 |
+
void on_numa_config_changed() override {
|
| 1572 |
+
// Use the first one as the source. It doesn't matter which one we use,
|
| 1573 |
+
// because they all must be identical, but the first one is guaranteed to exist.
|
| 1574 |
+
auto source = std::make_unique<T>(*instances[0]);
|
| 1575 |
+
prepare_replicate_from(std::move(source));
|
| 1576 |
+
}
|
| 1577 |
+
|
| 1578 |
+
private:
|
| 1579 |
+
mutable std::vector<SystemWideSharedConstant<T>> instances;
|
| 1580 |
+
mutable std::mutex mutex;
|
| 1581 |
+
|
| 1582 |
+
std::size_t get_discriminator(NumaIndex idx) const {
|
| 1583 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1584 |
+
const NumaConfig& cfg_sys = NumaConfig::from_system(SystemNumaPolicy{}, false);
|
| 1585 |
+
// as a discriminator, locate the hardware/system numadomain this cpuindex belongs to
|
| 1586 |
+
CpuIndex cpu = *cfg.nodes[idx].begin(); // get a CpuIndex from NumaIndex
|
| 1587 |
+
NumaIndex sys_idx = cfg_sys.is_cpu_assigned(cpu) ? cfg_sys.nodeByCpu.at(cpu) : 0;
|
| 1588 |
+
std::string s = cfg_sys.to_string() + "$" + std::to_string(sys_idx);
|
| 1589 |
+
return static_cast<std::size_t>(hash_string(s));
|
| 1590 |
+
}
|
| 1591 |
+
|
| 1592 |
+
void ensure_present(NumaIndex idx) const {
|
| 1593 |
+
assert(idx < instances.size());
|
| 1594 |
+
|
| 1595 |
+
if (instances[idx] != nullptr)
|
| 1596 |
+
return;
|
| 1597 |
+
|
| 1598 |
+
assert(idx != 0);
|
| 1599 |
+
|
| 1600 |
+
std::unique_lock<std::mutex> lock(mutex);
|
| 1601 |
+
// Check again for races.
|
| 1602 |
+
if (instances[idx] != nullptr)
|
| 1603 |
+
return;
|
| 1604 |
+
|
| 1605 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1606 |
+
cfg.execute_on_numa_node(idx, [this, idx]() {
|
| 1607 |
+
instances[idx] = SystemWideSharedConstant<T>(*instances[0], get_discriminator(idx));
|
| 1608 |
+
});
|
| 1609 |
+
}
|
| 1610 |
+
|
| 1611 |
+
void prepare_replicate_from(std::unique_ptr<T>&& source) {
|
| 1612 |
+
instances.clear();
|
| 1613 |
+
|
| 1614 |
+
const NumaConfig& cfg = get_numa_config();
|
| 1615 |
+
// We just need to make sure the first instance is there.
|
| 1616 |
+
// Note that we cannot move here as we need to reallocate the data
|
| 1617 |
+
// on the correct NUMA node.
|
| 1618 |
+
// Even in the case of a single NUMA node we have to copy since it's shared memory.
|
| 1619 |
+
if (cfg.requires_memory_replication())
|
| 1620 |
+
{
|
| 1621 |
+
assert(cfg.num_numa_nodes() > 0);
|
| 1622 |
+
|
| 1623 |
+
cfg.execute_on_numa_node(0, [this, &source]() {
|
| 1624 |
+
instances.emplace_back(SystemWideSharedConstant<T>(*source, get_discriminator(0)));
|
| 1625 |
+
});
|
| 1626 |
+
|
| 1627 |
+
// Prepare others for lazy init.
|
| 1628 |
+
instances.resize(cfg.num_numa_nodes());
|
| 1629 |
+
}
|
| 1630 |
+
else
|
| 1631 |
+
{
|
| 1632 |
+
assert(cfg.num_numa_nodes() == 1);
|
| 1633 |
+
instances.emplace_back(SystemWideSharedConstant<T>(*source, get_discriminator(0)));
|
| 1634 |
+
}
|
| 1635 |
+
}
|
| 1636 |
+
};
|
| 1637 |
+
|
| 1638 |
+
class NumaReplicationContext {
|
| 1639 |
+
public:
|
| 1640 |
+
NumaReplicationContext(NumaConfig&& cfg) :
|
| 1641 |
+
config(std::move(cfg)) {}
|
| 1642 |
+
|
| 1643 |
+
NumaReplicationContext(const NumaReplicationContext&) = delete;
|
| 1644 |
+
NumaReplicationContext(NumaReplicationContext&&) = delete;
|
| 1645 |
+
|
| 1646 |
+
NumaReplicationContext& operator=(const NumaReplicationContext&) = delete;
|
| 1647 |
+
NumaReplicationContext& operator=(NumaReplicationContext&&) = delete;
|
| 1648 |
+
|
| 1649 |
+
~NumaReplicationContext() {
|
| 1650 |
+
// The context must outlive replicated objects
|
| 1651 |
+
if (!trackedReplicatedObjects.empty())
|
| 1652 |
+
std::exit(EXIT_FAILURE);
|
| 1653 |
+
}
|
| 1654 |
+
|
| 1655 |
+
void attach(NumaReplicatedBase* obj) {
|
| 1656 |
+
assert(trackedReplicatedObjects.count(obj) == 0);
|
| 1657 |
+
trackedReplicatedObjects.insert(obj);
|
| 1658 |
+
}
|
| 1659 |
+
|
| 1660 |
+
void detach(NumaReplicatedBase* obj) {
|
| 1661 |
+
assert(trackedReplicatedObjects.count(obj) == 1);
|
| 1662 |
+
trackedReplicatedObjects.erase(obj);
|
| 1663 |
+
}
|
| 1664 |
+
|
| 1665 |
+
// oldObj may be invalid at this point
|
| 1666 |
+
void move_attached([[maybe_unused]] NumaReplicatedBase* oldObj, NumaReplicatedBase* newObj) {
|
| 1667 |
+
assert(trackedReplicatedObjects.count(oldObj) == 1);
|
| 1668 |
+
assert(trackedReplicatedObjects.count(newObj) == 0);
|
| 1669 |
+
trackedReplicatedObjects.erase(oldObj);
|
| 1670 |
+
trackedReplicatedObjects.insert(newObj);
|
| 1671 |
+
}
|
| 1672 |
+
|
| 1673 |
+
void set_numa_config(NumaConfig&& cfg) {
|
| 1674 |
+
config = std::move(cfg);
|
| 1675 |
+
for (auto&& obj : trackedReplicatedObjects)
|
| 1676 |
+
obj->on_numa_config_changed();
|
| 1677 |
+
}
|
| 1678 |
+
|
| 1679 |
+
const NumaConfig& get_numa_config() const { return config; }
|
| 1680 |
+
|
| 1681 |
+
private:
|
| 1682 |
+
NumaConfig config;
|
| 1683 |
+
|
| 1684 |
+
// std::set uses std::less by default, which is required for pointer comparison
|
| 1685 |
+
std::set<NumaReplicatedBase*> trackedReplicatedObjects;
|
| 1686 |
+
};
|
| 1687 |
+
|
| 1688 |
+
inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicationContext& ctx) :
|
| 1689 |
+
context(&ctx) {
|
| 1690 |
+
context->attach(this);
|
| 1691 |
+
}
|
| 1692 |
+
|
| 1693 |
+
inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicatedBase&& other) noexcept :
|
| 1694 |
+
context(std::exchange(other.context, nullptr)) {
|
| 1695 |
+
context->move_attached(&other, this);
|
| 1696 |
+
}
|
| 1697 |
+
|
| 1698 |
+
inline NumaReplicatedBase& NumaReplicatedBase::operator=(NumaReplicatedBase&& other) noexcept {
|
| 1699 |
+
context = std::exchange(other.context, nullptr);
|
| 1700 |
+
|
| 1701 |
+
context->move_attached(&other, this);
|
| 1702 |
+
|
| 1703 |
+
return *this;
|
| 1704 |
+
}
|
| 1705 |
+
|
| 1706 |
+
inline NumaReplicatedBase::~NumaReplicatedBase() {
|
| 1707 |
+
if (context != nullptr)
|
| 1708 |
+
context->detach(this);
|
| 1709 |
+
}
|
| 1710 |
+
|
| 1711 |
+
inline const NumaConfig& NumaReplicatedBase::get_numa_config() const {
|
| 1712 |
+
return context->get_numa_config();
|
| 1713 |
+
}
|
| 1714 |
+
|
| 1715 |
+
} // namespace Stockfish
|
| 1716 |
+
|
| 1717 |
+
|
| 1718 |
+
#endif // #ifndef NUMA_H_INCLUDED
|
src/perft.h
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef PERFT_H_INCLUDED
|
| 20 |
+
#define PERFT_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <cstdint>
|
| 23 |
+
|
| 24 |
+
#include "movegen.h"
|
| 25 |
+
#include "position.h"
|
| 26 |
+
#include "types.h"
|
| 27 |
+
#include "uci.h"
|
| 28 |
+
|
| 29 |
+
namespace Stockfish::Benchmark {
|
| 30 |
+
|
| 31 |
+
// Utility to verify move generation. All the leaf nodes up
|
| 32 |
+
// to the given depth are generated and counted, and the sum is returned.
|
| 33 |
+
template<bool Root>
|
| 34 |
+
uint64_t perft(Position& pos, Depth depth) {
|
| 35 |
+
|
| 36 |
+
StateInfo st;
|
| 37 |
+
|
| 38 |
+
uint64_t cnt, nodes = 0;
|
| 39 |
+
const bool leaf = (depth == 2);
|
| 40 |
+
|
| 41 |
+
for (const auto& m : MoveList<LEGAL>(pos))
|
| 42 |
+
{
|
| 43 |
+
if (Root && depth <= 1)
|
| 44 |
+
cnt = 1, nodes++;
|
| 45 |
+
else
|
| 46 |
+
{
|
| 47 |
+
pos.do_move(m, st);
|
| 48 |
+
cnt = leaf ? MoveList<LEGAL>(pos).size() : perft<false>(pos, depth - 1);
|
| 49 |
+
nodes += cnt;
|
| 50 |
+
pos.undo_move(m);
|
| 51 |
+
}
|
| 52 |
+
if (Root)
|
| 53 |
+
sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl;
|
| 54 |
+
}
|
| 55 |
+
return nodes;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) {
|
| 59 |
+
StateInfo st;
|
| 60 |
+
Position p;
|
| 61 |
+
p.set(fen, isChess960, &st);
|
| 62 |
+
|
| 63 |
+
return perft<true>(p, depth);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
#endif // PERFT_H_INCLUDED
|
src/position.cpp
ADDED
|
@@ -0,0 +1,1566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "position.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <array>
|
| 23 |
+
#include <cassert>
|
| 24 |
+
#include <cctype>
|
| 25 |
+
#include <cstddef>
|
| 26 |
+
#include <cstring>
|
| 27 |
+
#include <initializer_list>
|
| 28 |
+
#include <iomanip>
|
| 29 |
+
#include <iostream>
|
| 30 |
+
#include <sstream>
|
| 31 |
+
#include <string_view>
|
| 32 |
+
#include <utility>
|
| 33 |
+
|
| 34 |
+
#include "bitboard.h"
|
| 35 |
+
#include "history.h"
|
| 36 |
+
#include "misc.h"
|
| 37 |
+
#include "movegen.h"
|
| 38 |
+
#include "syzygy/tbprobe.h"
|
| 39 |
+
#include "tt.h"
|
| 40 |
+
#include "uci.h"
|
| 41 |
+
|
| 42 |
+
using std::string;
|
| 43 |
+
|
| 44 |
+
namespace Stockfish {
|
| 45 |
+
|
| 46 |
+
namespace Zobrist {
|
| 47 |
+
|
| 48 |
+
Key psq[PIECE_NB][SQUARE_NB];
|
| 49 |
+
Key enpassant[FILE_NB];
|
| 50 |
+
Key castling[CASTLING_RIGHT_NB];
|
| 51 |
+
Key side, noPawns;
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
namespace {
|
| 56 |
+
|
| 57 |
+
constexpr std::string_view PieceToChar(" PNBRQK pnbrqk");
|
| 58 |
+
|
| 59 |
+
static constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING,
|
| 60 |
+
B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING};
|
| 61 |
+
} // namespace
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
// Returns an ASCII representation of the position
|
| 65 |
+
std::ostream& operator<<(std::ostream& os, const Position& pos) {
|
| 66 |
+
|
| 67 |
+
os << "\n +---+---+---+---+---+---+---+---+\n";
|
| 68 |
+
|
| 69 |
+
for (Rank r = RANK_8;; --r)
|
| 70 |
+
{
|
| 71 |
+
for (File f = FILE_A; f <= FILE_H; ++f)
|
| 72 |
+
os << " | " << PieceToChar[pos.piece_on(make_square(f, r))];
|
| 73 |
+
|
| 74 |
+
os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n";
|
| 75 |
+
|
| 76 |
+
if (r == RANK_1)
|
| 77 |
+
break;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
os << " a b c d e f g h\n"
|
| 81 |
+
<< "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase << std::setfill('0')
|
| 82 |
+
<< std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: ";
|
| 83 |
+
|
| 84 |
+
for (Bitboard b = pos.checkers(); b;)
|
| 85 |
+
os << UCIEngine::square(pop_lsb(b)) << " ";
|
| 86 |
+
|
| 87 |
+
if (Tablebases::MaxCardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
|
| 88 |
+
{
|
| 89 |
+
StateInfo st;
|
| 90 |
+
|
| 91 |
+
Position p;
|
| 92 |
+
p.set(pos.fen(), pos.is_chess960(), &st);
|
| 93 |
+
Tablebases::ProbeState s1, s2;
|
| 94 |
+
Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1);
|
| 95 |
+
int dtz = Tablebases::probe_dtz(p, &s2);
|
| 96 |
+
os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")"
|
| 97 |
+
<< "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")";
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
return os;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
// Implements Marcel van Kervinck's cuckoo algorithm to detect repetition of positions
|
| 105 |
+
// for 3-fold repetition draws. The algorithm uses two hash tables with Zobrist hashes
|
| 106 |
+
// to allow fast detection of recurring positions. For details see:
|
| 107 |
+
// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf
|
| 108 |
+
|
| 109 |
+
// First and second hash functions for indexing the cuckoo tables
|
| 110 |
+
inline int H1(Key h) { return h & 0x1fff; }
|
| 111 |
+
inline int H2(Key h) { return (h >> 16) & 0x1fff; }
|
| 112 |
+
|
| 113 |
+
// Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves
|
| 114 |
+
std::array<Key, 8192> cuckoo;
|
| 115 |
+
std::array<Move, 8192> cuckooMove;
|
| 116 |
+
|
| 117 |
+
// Initializes at startup the various arrays used to compute hash keys
|
| 118 |
+
void Position::init() {
|
| 119 |
+
|
| 120 |
+
PRNG rng(1070372);
|
| 121 |
+
|
| 122 |
+
for (Piece pc : Pieces)
|
| 123 |
+
for (Square s = SQ_A1; s <= SQ_H8; ++s)
|
| 124 |
+
Zobrist::psq[pc][s] = rng.rand<Key>();
|
| 125 |
+
// pawns on these squares will promote
|
| 126 |
+
std::fill_n(Zobrist::psq[W_PAWN] + SQ_A8, 8, 0);
|
| 127 |
+
std::fill_n(Zobrist::psq[B_PAWN], 8, 0);
|
| 128 |
+
|
| 129 |
+
for (File f = FILE_A; f <= FILE_H; ++f)
|
| 130 |
+
Zobrist::enpassant[f] = rng.rand<Key>();
|
| 131 |
+
|
| 132 |
+
for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr)
|
| 133 |
+
Zobrist::castling[cr] = rng.rand<Key>();
|
| 134 |
+
|
| 135 |
+
Zobrist::side = rng.rand<Key>();
|
| 136 |
+
Zobrist::noPawns = rng.rand<Key>();
|
| 137 |
+
|
| 138 |
+
// Prepare the cuckoo tables
|
| 139 |
+
cuckoo.fill(0);
|
| 140 |
+
cuckooMove.fill(Move::none());
|
| 141 |
+
[[maybe_unused]] int count = 0;
|
| 142 |
+
for (Piece pc : Pieces)
|
| 143 |
+
for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
|
| 144 |
+
for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2)
|
| 145 |
+
if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2))
|
| 146 |
+
{
|
| 147 |
+
Move move = Move(s1, s2);
|
| 148 |
+
Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side;
|
| 149 |
+
int i = H1(key);
|
| 150 |
+
while (true)
|
| 151 |
+
{
|
| 152 |
+
std::swap(cuckoo[i], key);
|
| 153 |
+
std::swap(cuckooMove[i], move);
|
| 154 |
+
if (move == Move::none()) // Arrived at empty slot?
|
| 155 |
+
break;
|
| 156 |
+
i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot
|
| 157 |
+
}
|
| 158 |
+
count++;
|
| 159 |
+
}
|
| 160 |
+
assert(count == 3668);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
// Initializes the position object with the given FEN string.
|
| 165 |
+
// This function is not very robust - make sure that input FENs are correct,
|
| 166 |
+
// this is assumed to be the responsibility of the GUI.
|
| 167 |
+
Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si) {
|
| 168 |
+
/*
|
| 169 |
+
A FEN string defines a particular position using only the ASCII character set.
|
| 170 |
+
|
| 171 |
+
A FEN string contains six fields separated by a space. The fields are:
|
| 172 |
+
|
| 173 |
+
1) Piece placement (from white's perspective). Each rank is described, starting
|
| 174 |
+
with rank 8 and ending with rank 1. Within each rank, the contents of each
|
| 175 |
+
square are described from file A through file H. Following the Standard
|
| 176 |
+
Algebraic Notation (SAN), each piece is identified by a single letter taken
|
| 177 |
+
from the standard English names. White pieces are designated using upper-case
|
| 178 |
+
letters ("PNBRQK") whilst Black uses lowercase ("pnbrqk"). Blank squares are
|
| 179 |
+
noted using digits 1 through 8 (the number of blank squares), and "/"
|
| 180 |
+
separates ranks.
|
| 181 |
+
|
| 182 |
+
2) Active color. "w" means white moves next, "b" means black.
|
| 183 |
+
|
| 184 |
+
3) Castling availability. If neither side can castle, this is "-". Otherwise,
|
| 185 |
+
this has one or more letters: "K" (White can castle kingside), "Q" (White
|
| 186 |
+
can castle queenside), "k" (Black can castle kingside), and/or "q" (Black
|
| 187 |
+
can castle queenside).
|
| 188 |
+
|
| 189 |
+
4) En passant target square (in algebraic notation). If there's no en passant
|
| 190 |
+
target square, this is "-". If a pawn has just made a 2-square move, this
|
| 191 |
+
is the position "behind" the pawn. Following X-FEN standard, this is recorded
|
| 192 |
+
only if there is a pawn in position to make an en passant capture, and if
|
| 193 |
+
there really is a pawn that might have advanced two squares.
|
| 194 |
+
|
| 195 |
+
5) Halfmove clock. This is the number of halfmoves since the last pawn advance
|
| 196 |
+
or capture. This is used to determine if a draw can be claimed under the
|
| 197 |
+
fifty-move rule.
|
| 198 |
+
|
| 199 |
+
6) Fullmove number. The number of the full move. It starts at 1, and is
|
| 200 |
+
incremented after Black's move.
|
| 201 |
+
*/
|
| 202 |
+
|
| 203 |
+
unsigned char col, row, token;
|
| 204 |
+
size_t idx;
|
| 205 |
+
Square sq = SQ_A8;
|
| 206 |
+
std::istringstream ss(fenStr);
|
| 207 |
+
|
| 208 |
+
std::memset(reinterpret_cast<char*>(this), 0, sizeof(Position));
|
| 209 |
+
std::memset(si, 0, sizeof(StateInfo));
|
| 210 |
+
st = si;
|
| 211 |
+
|
| 212 |
+
ss >> std::noskipws;
|
| 213 |
+
|
| 214 |
+
// 1. Piece placement
|
| 215 |
+
while ((ss >> token) && !isspace(token))
|
| 216 |
+
{
|
| 217 |
+
if (isdigit(token))
|
| 218 |
+
sq += (token - '0') * EAST; // Advance the given number of files
|
| 219 |
+
|
| 220 |
+
else if (token == '/')
|
| 221 |
+
sq += 2 * SOUTH;
|
| 222 |
+
|
| 223 |
+
else if ((idx = PieceToChar.find(token)) != string::npos)
|
| 224 |
+
{
|
| 225 |
+
put_piece(Piece(idx), sq);
|
| 226 |
+
++sq;
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
// 2. Active color
|
| 231 |
+
ss >> token;
|
| 232 |
+
sideToMove = (token == 'w' ? WHITE : BLACK);
|
| 233 |
+
ss >> token;
|
| 234 |
+
|
| 235 |
+
// 3. Castling availability. Compatible with 3 standards: Normal FEN standard,
|
| 236 |
+
// Shredder-FEN that uses the letters of the columns on which the rooks began
|
| 237 |
+
// the game instead of KQkq and also X-FEN standard that, in case of Chess960,
|
| 238 |
+
// if an inner rook is associated with the castling right, the castling tag is
|
| 239 |
+
// replaced by the file letter of the involved rook, as for the Shredder-FEN.
|
| 240 |
+
while ((ss >> token) && !isspace(token))
|
| 241 |
+
{
|
| 242 |
+
Square rsq;
|
| 243 |
+
Color c = islower(token) ? BLACK : WHITE;
|
| 244 |
+
Piece rook = make_piece(c, ROOK);
|
| 245 |
+
|
| 246 |
+
token = char(toupper(token));
|
| 247 |
+
|
| 248 |
+
if (token == 'K')
|
| 249 |
+
for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq)
|
| 250 |
+
{}
|
| 251 |
+
|
| 252 |
+
else if (token == 'Q')
|
| 253 |
+
for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq)
|
| 254 |
+
{}
|
| 255 |
+
|
| 256 |
+
else if (token >= 'A' && token <= 'H')
|
| 257 |
+
rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1));
|
| 258 |
+
|
| 259 |
+
else
|
| 260 |
+
continue;
|
| 261 |
+
|
| 262 |
+
set_castling_right(c, rsq);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
// 4. En passant square.
|
| 266 |
+
// Ignore if square is invalid or not on side to move relative rank 6.
|
| 267 |
+
bool enpassant = false, legalEP = false;
|
| 268 |
+
|
| 269 |
+
if (((ss >> col) && (col >= 'a' && col <= 'h'))
|
| 270 |
+
&& ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3'))))
|
| 271 |
+
{
|
| 272 |
+
st->epSquare = make_square(File(col - 'a'), Rank(row - '1'));
|
| 273 |
+
|
| 274 |
+
Bitboard pawns = attacks_bb<PAWN>(st->epSquare, ~sideToMove) & pieces(sideToMove, PAWN);
|
| 275 |
+
Bitboard target = (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove)));
|
| 276 |
+
Bitboard occ = pieces() ^ target ^ st->epSquare;
|
| 277 |
+
|
| 278 |
+
// En passant square will be considered only if
|
| 279 |
+
// a) side to move have a pawn threatening epSquare
|
| 280 |
+
// b) there is an enemy pawn in front of epSquare
|
| 281 |
+
// c) there is no piece on epSquare or behind epSquare
|
| 282 |
+
enpassant =
|
| 283 |
+
pawns && target && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove))));
|
| 284 |
+
|
| 285 |
+
// If no pawn can execute the en passant capture without leaving the king in check, don't record the epSquare
|
| 286 |
+
while (pawns)
|
| 287 |
+
legalEP |= !(attackers_to(square<KING>(sideToMove), occ ^ pop_lsb(pawns))
|
| 288 |
+
& pieces(~sideToMove) & ~target);
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
if (!enpassant || !legalEP)
|
| 292 |
+
st->epSquare = SQ_NONE;
|
| 293 |
+
|
| 294 |
+
// 5-6. Halfmove clock and fullmove number
|
| 295 |
+
ss >> std::skipws >> st->rule50 >> gamePly;
|
| 296 |
+
|
| 297 |
+
// Convert from fullmove starting from 1 to gamePly starting from 0,
|
| 298 |
+
// handle also common incorrect FEN with fullmove = 0.
|
| 299 |
+
gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
|
| 300 |
+
|
| 301 |
+
chess960 = isChess960;
|
| 302 |
+
set_state();
|
| 303 |
+
|
| 304 |
+
assert(pos_is_ok());
|
| 305 |
+
|
| 306 |
+
return *this;
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
// Helper function used to set castling
|
| 311 |
+
// rights given the corresponding color and the rook starting square.
|
| 312 |
+
void Position::set_castling_right(Color c, Square rfrom) {
|
| 313 |
+
|
| 314 |
+
Square kfrom = square<KING>(c);
|
| 315 |
+
CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE : QUEEN_SIDE);
|
| 316 |
+
|
| 317 |
+
st->castlingRights |= cr;
|
| 318 |
+
castlingRightsMask[kfrom] |= cr;
|
| 319 |
+
castlingRightsMask[rfrom] |= cr;
|
| 320 |
+
castlingRookSquare[cr] = rfrom;
|
| 321 |
+
|
| 322 |
+
Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1);
|
| 323 |
+
Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1);
|
| 324 |
+
|
| 325 |
+
castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom);
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
// Sets king attacks to detect if a move gives check
|
| 330 |
+
void Position::set_check_info() const {
|
| 331 |
+
|
| 332 |
+
update_slider_blockers(WHITE);
|
| 333 |
+
update_slider_blockers(BLACK);
|
| 334 |
+
|
| 335 |
+
Square ksq = square<KING>(~sideToMove);
|
| 336 |
+
|
| 337 |
+
st->checkSquares[PAWN] = attacks_bb<PAWN>(ksq, ~sideToMove);
|
| 338 |
+
st->checkSquares[KNIGHT] = attacks_bb<KNIGHT>(ksq);
|
| 339 |
+
st->checkSquares[BISHOP] = attacks_bb<BISHOP>(ksq, pieces());
|
| 340 |
+
st->checkSquares[ROOK] = attacks_bb<ROOK>(ksq, pieces());
|
| 341 |
+
st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK];
|
| 342 |
+
st->checkSquares[KING] = 0;
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
// Computes the hash keys of the position, and other
|
| 347 |
+
// data that once computed is updated incrementally as moves are made.
|
| 348 |
+
// The function is only used when a new position is set up
|
| 349 |
+
void Position::set_state() const {
|
| 350 |
+
|
| 351 |
+
st->key = 0;
|
| 352 |
+
st->minorPieceKey = 0;
|
| 353 |
+
st->nonPawnKey[WHITE] = st->nonPawnKey[BLACK] = 0;
|
| 354 |
+
st->pawnKey = Zobrist::noPawns;
|
| 355 |
+
st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO;
|
| 356 |
+
st->checkersBB = attackers_to(square<KING>(sideToMove)) & pieces(~sideToMove);
|
| 357 |
+
|
| 358 |
+
set_check_info();
|
| 359 |
+
|
| 360 |
+
for (Bitboard b = pieces(); b;)
|
| 361 |
+
{
|
| 362 |
+
Square s = pop_lsb(b);
|
| 363 |
+
Piece pc = piece_on(s);
|
| 364 |
+
st->key ^= Zobrist::psq[pc][s];
|
| 365 |
+
|
| 366 |
+
if (type_of(pc) == PAWN)
|
| 367 |
+
st->pawnKey ^= Zobrist::psq[pc][s];
|
| 368 |
+
|
| 369 |
+
else
|
| 370 |
+
{
|
| 371 |
+
st->nonPawnKey[color_of(pc)] ^= Zobrist::psq[pc][s];
|
| 372 |
+
|
| 373 |
+
if (type_of(pc) != KING)
|
| 374 |
+
{
|
| 375 |
+
st->nonPawnMaterial[color_of(pc)] += PieceValue[pc];
|
| 376 |
+
|
| 377 |
+
if (type_of(pc) <= BISHOP)
|
| 378 |
+
st->minorPieceKey ^= Zobrist::psq[pc][s];
|
| 379 |
+
}
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
if (st->epSquare != SQ_NONE)
|
| 384 |
+
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
|
| 385 |
+
|
| 386 |
+
if (sideToMove == BLACK)
|
| 387 |
+
st->key ^= Zobrist::side;
|
| 388 |
+
|
| 389 |
+
st->key ^= Zobrist::castling[st->castlingRights];
|
| 390 |
+
st->materialKey = compute_material_key();
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
Key Position::compute_material_key() const {
|
| 394 |
+
Key k = 0;
|
| 395 |
+
for (Piece pc : Pieces)
|
| 396 |
+
for (int cnt = 0; cnt < pieceCount[pc]; ++cnt)
|
| 397 |
+
k ^= Zobrist::psq[pc][8 + cnt];
|
| 398 |
+
return k;
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
// Overload to initialize the position object with the given endgame code string
|
| 403 |
+
// like "KBPKN". It's mainly a helper to get the material key out of an endgame code.
|
| 404 |
+
Position& Position::set(const string& code, Color c, StateInfo* si) {
|
| 405 |
+
|
| 406 |
+
assert(code[0] == 'K');
|
| 407 |
+
|
| 408 |
+
string sides[] = {code.substr(code.find('K', 1)), // Weak
|
| 409 |
+
code.substr(0, std::min(code.find('v'), code.find('K', 1)))}; // Strong
|
| 410 |
+
|
| 411 |
+
assert(sides[0].length() > 0 && sides[0].length() < 8);
|
| 412 |
+
assert(sides[1].length() > 0 && sides[1].length() < 8);
|
| 413 |
+
|
| 414 |
+
std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower);
|
| 415 |
+
|
| 416 |
+
string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + sides[1]
|
| 417 |
+
+ char(8 - sides[1].length() + '0') + "/8 w - - 0 10";
|
| 418 |
+
|
| 419 |
+
return set(fenStr, false, si);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
// Returns a FEN representation of the position. In case of
|
| 424 |
+
// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function.
|
| 425 |
+
string Position::fen() const {
|
| 426 |
+
|
| 427 |
+
int emptyCnt;
|
| 428 |
+
std::ostringstream ss;
|
| 429 |
+
|
| 430 |
+
for (Rank r = RANK_8;; --r)
|
| 431 |
+
{
|
| 432 |
+
for (File f = FILE_A; f <= FILE_H; ++f)
|
| 433 |
+
{
|
| 434 |
+
for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f)
|
| 435 |
+
++emptyCnt;
|
| 436 |
+
|
| 437 |
+
if (emptyCnt)
|
| 438 |
+
ss << emptyCnt;
|
| 439 |
+
|
| 440 |
+
if (f <= FILE_H)
|
| 441 |
+
ss << PieceToChar[piece_on(make_square(f, r))];
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
if (r == RANK_1)
|
| 445 |
+
break;
|
| 446 |
+
ss << '/';
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
ss << (sideToMove == WHITE ? " w " : " b ");
|
| 450 |
+
|
| 451 |
+
if (can_castle(WHITE_OO))
|
| 452 |
+
ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO))) : 'K');
|
| 453 |
+
|
| 454 |
+
if (can_castle(WHITE_OOO))
|
| 455 |
+
ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q');
|
| 456 |
+
|
| 457 |
+
if (can_castle(BLACK_OO))
|
| 458 |
+
ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO))) : 'k');
|
| 459 |
+
|
| 460 |
+
if (can_castle(BLACK_OOO))
|
| 461 |
+
ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q');
|
| 462 |
+
|
| 463 |
+
if (!can_castle(ANY_CASTLING))
|
| 464 |
+
ss << '-';
|
| 465 |
+
|
| 466 |
+
ss << (ep_square() == SQ_NONE ? " - " : " " + UCIEngine::square(ep_square()) + " ")
|
| 467 |
+
<< st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2;
|
| 468 |
+
|
| 469 |
+
return ss.str();
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
// Calculates st->blockersForKing[c] and st->pinners[~c],
|
| 473 |
+
// which store respectively the pieces preventing king of color c from being in check
|
| 474 |
+
// and the slider pieces of color ~c pinning pieces of color c to the king.
|
| 475 |
+
void Position::update_slider_blockers(Color c) const {
|
| 476 |
+
|
| 477 |
+
Square ksq = square<KING>(c);
|
| 478 |
+
|
| 479 |
+
st->blockersForKing[c] = 0;
|
| 480 |
+
st->pinners[~c] = 0;
|
| 481 |
+
|
| 482 |
+
// Snipers are sliders that attack 's' when a piece and other snipers are removed
|
| 483 |
+
Bitboard snipers = ((attacks_bb<ROOK>(ksq) & pieces(QUEEN, ROOK))
|
| 484 |
+
| (attacks_bb<BISHOP>(ksq) & pieces(QUEEN, BISHOP)))
|
| 485 |
+
& pieces(~c);
|
| 486 |
+
Bitboard occupancy = pieces() ^ snipers;
|
| 487 |
+
|
| 488 |
+
while (snipers)
|
| 489 |
+
{
|
| 490 |
+
Square sniperSq = pop_lsb(snipers);
|
| 491 |
+
Bitboard b = between_bb(ksq, sniperSq) & occupancy;
|
| 492 |
+
|
| 493 |
+
if (b && !more_than_one(b))
|
| 494 |
+
{
|
| 495 |
+
st->blockersForKing[c] |= b;
|
| 496 |
+
if (b & pieces(c))
|
| 497 |
+
st->pinners[~c] |= sniperSq;
|
| 498 |
+
}
|
| 499 |
+
}
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
// Computes a bitboard of all pieces which attack a given square.
|
| 504 |
+
// Slider attacks use the occupied bitboard to indicate occupancy.
|
| 505 |
+
Bitboard Position::attackers_to(Square s, Bitboard occupied) const {
|
| 506 |
+
|
| 507 |
+
return (attacks_bb<ROOK>(s, occupied) & pieces(ROOK, QUEEN))
|
| 508 |
+
| (attacks_bb<BISHOP>(s, occupied) & pieces(BISHOP, QUEEN))
|
| 509 |
+
| (attacks_bb<PAWN>(s, BLACK) & pieces(WHITE, PAWN))
|
| 510 |
+
| (attacks_bb<PAWN>(s, WHITE) & pieces(BLACK, PAWN))
|
| 511 |
+
| (attacks_bb<KNIGHT>(s) & pieces(KNIGHT)) | (attacks_bb<KING>(s) & pieces(KING));
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
bool Position::attackers_to_exist(Square s, Bitboard occupied, Color c) const {
|
| 515 |
+
|
| 516 |
+
return (attacks_bb<ROOK>(s, occupied) & pieces(c, ROOK, QUEEN))
|
| 517 |
+
|| (attacks_bb<BISHOP>(s, occupied) & pieces(c, BISHOP, QUEEN))
|
| 518 |
+
|| (attacks_bb<PAWN>(s, ~c) & pieces(c, PAWN))
|
| 519 |
+
|| (attacks_bb<KNIGHT>(s) & pieces(c, KNIGHT)) || (attacks_bb<KING>(s) & pieces(c, KING));
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// Tests whether a pseudo-legal move is legal
|
| 523 |
+
bool Position::legal(Move m) const {
|
| 524 |
+
|
| 525 |
+
assert(m.is_ok());
|
| 526 |
+
|
| 527 |
+
Color us = sideToMove;
|
| 528 |
+
Square from = m.from_sq();
|
| 529 |
+
Square to = m.to_sq();
|
| 530 |
+
|
| 531 |
+
assert(color_of(moved_piece(m)) == us);
|
| 532 |
+
assert(piece_on(square<KING>(us)) == make_piece(us, KING));
|
| 533 |
+
|
| 534 |
+
// En passant captures are a tricky special case. Because they are rather
|
| 535 |
+
// uncommon, we do it simply by testing whether the king is attacked after
|
| 536 |
+
// the move is made.
|
| 537 |
+
if (m.type_of() == EN_PASSANT)
|
| 538 |
+
{
|
| 539 |
+
Square ksq = square<KING>(us);
|
| 540 |
+
Square capsq = to - pawn_push(us);
|
| 541 |
+
Bitboard occupied = (pieces() ^ from ^ capsq) | to;
|
| 542 |
+
|
| 543 |
+
assert(to == ep_square());
|
| 544 |
+
assert(moved_piece(m) == make_piece(us, PAWN));
|
| 545 |
+
assert(piece_on(capsq) == make_piece(~us, PAWN));
|
| 546 |
+
assert(piece_on(to) == NO_PIECE);
|
| 547 |
+
|
| 548 |
+
return !(attacks_bb<ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK))
|
| 549 |
+
&& !(attacks_bb<BISHOP>(ksq, occupied) & pieces(~us, QUEEN, BISHOP));
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
// Castling moves generation does not check if the castling path is clear of
|
| 553 |
+
// enemy attacks, it is delayed at a later time: now!
|
| 554 |
+
if (m.type_of() == CASTLING)
|
| 555 |
+
{
|
| 556 |
+
// After castling, the rook and king final positions are the same in
|
| 557 |
+
// Chess960 as they would be in standard chess.
|
| 558 |
+
to = relative_square(us, to > from ? SQ_G1 : SQ_C1);
|
| 559 |
+
Direction step = to > from ? WEST : EAST;
|
| 560 |
+
|
| 561 |
+
for (Square s = to; s != from; s += step)
|
| 562 |
+
if (attackers_to_exist(s, pieces(), ~us))
|
| 563 |
+
return false;
|
| 564 |
+
|
| 565 |
+
// In case of Chess960, verify if the Rook blocks some checks.
|
| 566 |
+
// For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1.
|
| 567 |
+
return !chess960 || !(blockers_for_king(us) & m.to_sq());
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
// If the moving piece is a king, check whether the destination square is
|
| 571 |
+
// attacked by the opponent.
|
| 572 |
+
if (type_of(piece_on(from)) == KING)
|
| 573 |
+
return !(attackers_to_exist(to, pieces() ^ from, ~us));
|
| 574 |
+
|
| 575 |
+
// A non-king move is legal if and only if it is not pinned or it
|
| 576 |
+
// is moving along the ray towards or away from the king.
|
| 577 |
+
return !(blockers_for_king(us) & from) || line_bb(from, to) & pieces(us, KING);
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
// Takes a random move and tests whether the move is
|
| 582 |
+
// pseudo-legal. It is used to validate moves from TT that can be corrupted
|
| 583 |
+
// due to SMP concurrent access or hash position key aliasing.
|
| 584 |
+
bool Position::pseudo_legal(const Move m) const {
|
| 585 |
+
|
| 586 |
+
Color us = sideToMove;
|
| 587 |
+
Square from = m.from_sq();
|
| 588 |
+
Square to = m.to_sq();
|
| 589 |
+
Piece pc = moved_piece(m);
|
| 590 |
+
|
| 591 |
+
// Use a slower but simpler function for uncommon cases
|
| 592 |
+
// yet we skip the legality check of MoveList<LEGAL>().
|
| 593 |
+
if (m.type_of() != NORMAL)
|
| 594 |
+
return checkers() ? MoveList<EVASIONS>(*this).contains(m)
|
| 595 |
+
: MoveList<NON_EVASIONS>(*this).contains(m);
|
| 596 |
+
|
| 597 |
+
// Is not a promotion, so the promotion piece must be empty
|
| 598 |
+
assert(m.promotion_type() - KNIGHT == NO_PIECE_TYPE);
|
| 599 |
+
|
| 600 |
+
// If the 'from' square is not occupied by a piece belonging to the side to
|
| 601 |
+
// move, the move is obviously not legal.
|
| 602 |
+
if (pc == NO_PIECE || color_of(pc) != us)
|
| 603 |
+
return false;
|
| 604 |
+
|
| 605 |
+
// The destination square cannot be occupied by a friendly piece
|
| 606 |
+
if (pieces(us) & to)
|
| 607 |
+
return false;
|
| 608 |
+
|
| 609 |
+
// Handle the special case of a pawn move
|
| 610 |
+
if (type_of(pc) == PAWN)
|
| 611 |
+
{
|
| 612 |
+
// We have already handled promotion moves, so destination cannot be on the 8th/1st rank
|
| 613 |
+
if ((Rank8BB | Rank1BB) & to)
|
| 614 |
+
return false;
|
| 615 |
+
|
| 616 |
+
// Check if it's a valid capture, single push, or double push
|
| 617 |
+
const bool isCapture = bool(attacks_bb<PAWN>(from, us) & pieces(~us) & to);
|
| 618 |
+
const bool isSinglePush = (from + pawn_push(us) == to) && empty(to);
|
| 619 |
+
const bool isDoublePush = (from + 2 * pawn_push(us) == to)
|
| 620 |
+
&& (relative_rank(us, from) == RANK_2) && empty(to)
|
| 621 |
+
&& empty(to - pawn_push(us));
|
| 622 |
+
|
| 623 |
+
if (!(isCapture || isSinglePush || isDoublePush))
|
| 624 |
+
return false;
|
| 625 |
+
}
|
| 626 |
+
else if (!(attacks_bb(type_of(pc), from, pieces()) & to))
|
| 627 |
+
return false;
|
| 628 |
+
|
| 629 |
+
// Evasions generator already takes care to avoid some kind of illegal moves
|
| 630 |
+
// and legal() relies on this. We therefore have to take care that the same
|
| 631 |
+
// kind of moves are filtered out here.
|
| 632 |
+
if (checkers())
|
| 633 |
+
{
|
| 634 |
+
if (type_of(pc) != KING)
|
| 635 |
+
{
|
| 636 |
+
// Double check? In this case, a king move is required
|
| 637 |
+
if (more_than_one(checkers()))
|
| 638 |
+
return false;
|
| 639 |
+
|
| 640 |
+
// Our move must be a blocking interposition or a capture of the checking piece
|
| 641 |
+
if (!(between_bb(square<KING>(us), lsb(checkers())) & to))
|
| 642 |
+
return false;
|
| 643 |
+
}
|
| 644 |
+
// In case of king moves under check we have to remove the king so as to catch
|
| 645 |
+
// invalid moves like b1a1 when opposite queen is on c1.
|
| 646 |
+
else if (attackers_to_exist(to, pieces() ^ from, ~us))
|
| 647 |
+
return false;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
return true;
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
// Tests whether a pseudo-legal move gives a check
|
| 655 |
+
bool Position::gives_check(Move m) const {
|
| 656 |
+
|
| 657 |
+
assert(m.is_ok());
|
| 658 |
+
assert(color_of(moved_piece(m)) == sideToMove);
|
| 659 |
+
|
| 660 |
+
Square from = m.from_sq();
|
| 661 |
+
Square to = m.to_sq();
|
| 662 |
+
|
| 663 |
+
// Is there a direct check?
|
| 664 |
+
if (check_squares(type_of(piece_on(from))) & to)
|
| 665 |
+
return true;
|
| 666 |
+
|
| 667 |
+
// Is there a discovered check?
|
| 668 |
+
if (blockers_for_king(~sideToMove) & from)
|
| 669 |
+
return !(line_bb(from, to) & pieces(~sideToMove, KING)) || m.type_of() == CASTLING;
|
| 670 |
+
|
| 671 |
+
switch (m.type_of())
|
| 672 |
+
{
|
| 673 |
+
case NORMAL :
|
| 674 |
+
return false;
|
| 675 |
+
|
| 676 |
+
case PROMOTION :
|
| 677 |
+
return attacks_bb(m.promotion_type(), to, pieces() ^ from) & pieces(~sideToMove, KING);
|
| 678 |
+
|
| 679 |
+
// En passant capture with check? We have already handled the case of direct
|
| 680 |
+
// checks and ordinary discovered check, so the only case we need to handle
|
| 681 |
+
// is the unusual case of a discovered check through the captured pawn.
|
| 682 |
+
case EN_PASSANT : {
|
| 683 |
+
Square capsq = make_square(file_of(to), rank_of(from));
|
| 684 |
+
Bitboard b = (pieces() ^ from ^ capsq) | to;
|
| 685 |
+
|
| 686 |
+
return (attacks_bb<ROOK>(square<KING>(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK))
|
| 687 |
+
| (attacks_bb<BISHOP>(square<KING>(~sideToMove), b)
|
| 688 |
+
& pieces(sideToMove, QUEEN, BISHOP));
|
| 689 |
+
}
|
| 690 |
+
default : //CASTLING
|
| 691 |
+
{
|
| 692 |
+
// Castling is encoded as 'king captures the rook'
|
| 693 |
+
Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1);
|
| 694 |
+
|
| 695 |
+
return check_squares(ROOK) & rto;
|
| 696 |
+
}
|
| 697 |
+
}
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
// Makes a move, and saves all information necessary
|
| 702 |
+
// to a StateInfo object. The move is assumed to be legal. Pseudo-legal
|
| 703 |
+
// moves should be filtered out before this function is called.
|
| 704 |
+
// If a pointer to the TT table is passed, the entry for the new position
|
| 705 |
+
// will be prefetched, and likewise for shared history.
|
| 706 |
+
void Position::do_move(Move m,
|
| 707 |
+
StateInfo& newSt,
|
| 708 |
+
bool givesCheck,
|
| 709 |
+
DirtyPiece& dp,
|
| 710 |
+
DirtyThreats& dts,
|
| 711 |
+
const TranspositionTable* tt = nullptr,
|
| 712 |
+
const SharedHistories* history = nullptr) {
|
| 713 |
+
|
| 714 |
+
assert(m.is_ok());
|
| 715 |
+
assert(&newSt != st);
|
| 716 |
+
|
| 717 |
+
Key k = st->key ^ Zobrist::side;
|
| 718 |
+
|
| 719 |
+
// Copy some fields of the old state to our new StateInfo object except the
|
| 720 |
+
// ones which are going to be recalculated from scratch anyway and then switch
|
| 721 |
+
// our state pointer to point to the new (ready to be updated) state.
|
| 722 |
+
std::memcpy(&newSt, st, offsetof(StateInfo, key));
|
| 723 |
+
newSt.previous = st;
|
| 724 |
+
st = &newSt;
|
| 725 |
+
|
| 726 |
+
// Increment ply counters. In particular, rule50 will be reset to zero later on
|
| 727 |
+
// in case of a capture or a pawn move.
|
| 728 |
+
++gamePly;
|
| 729 |
+
++st->rule50;
|
| 730 |
+
++st->pliesFromNull;
|
| 731 |
+
|
| 732 |
+
Color us = sideToMove;
|
| 733 |
+
Color them = ~us;
|
| 734 |
+
Square from = m.from_sq();
|
| 735 |
+
Square to = m.to_sq();
|
| 736 |
+
Piece pc = piece_on(from);
|
| 737 |
+
Piece captured = m.type_of() == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to);
|
| 738 |
+
|
| 739 |
+
dp.pc = pc;
|
| 740 |
+
dp.from = from;
|
| 741 |
+
dp.to = to;
|
| 742 |
+
dp.add_sq = SQ_NONE;
|
| 743 |
+
dts.us = us;
|
| 744 |
+
dts.prevKsq = square<KING>(us);
|
| 745 |
+
dts.threatenedSqs = dts.threateningSqs = 0;
|
| 746 |
+
|
| 747 |
+
assert(color_of(pc) == us);
|
| 748 |
+
assert(captured == NO_PIECE || color_of(captured) == (m.type_of() != CASTLING ? them : us));
|
| 749 |
+
assert(type_of(captured) != KING);
|
| 750 |
+
|
| 751 |
+
if (m.type_of() == CASTLING)
|
| 752 |
+
{
|
| 753 |
+
assert(pc == make_piece(us, KING));
|
| 754 |
+
assert(captured == make_piece(us, ROOK));
|
| 755 |
+
|
| 756 |
+
Square rfrom, rto;
|
| 757 |
+
do_castling<true>(us, from, to, rfrom, rto, &dts, &dp);
|
| 758 |
+
|
| 759 |
+
k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
|
| 760 |
+
st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto];
|
| 761 |
+
captured = NO_PIECE;
|
| 762 |
+
}
|
| 763 |
+
else if (captured)
|
| 764 |
+
{
|
| 765 |
+
Square capsq = to;
|
| 766 |
+
|
| 767 |
+
// If the captured piece is a pawn, update pawn hash key, otherwise
|
| 768 |
+
// update non-pawn material.
|
| 769 |
+
if (type_of(captured) == PAWN)
|
| 770 |
+
{
|
| 771 |
+
if (m.type_of() == EN_PASSANT)
|
| 772 |
+
{
|
| 773 |
+
capsq -= pawn_push(us);
|
| 774 |
+
|
| 775 |
+
assert(pc == make_piece(us, PAWN));
|
| 776 |
+
assert(to == st->epSquare);
|
| 777 |
+
assert(relative_rank(us, to) == RANK_6);
|
| 778 |
+
assert(piece_on(to) == NO_PIECE);
|
| 779 |
+
assert(piece_on(capsq) == make_piece(them, PAWN));
|
| 780 |
+
|
| 781 |
+
// Update board and piece lists in ep case, normal captures are updated later
|
| 782 |
+
remove_piece(capsq, &dts);
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
st->pawnKey ^= Zobrist::psq[captured][capsq];
|
| 786 |
+
}
|
| 787 |
+
else
|
| 788 |
+
{
|
| 789 |
+
st->nonPawnMaterial[them] -= PieceValue[captured];
|
| 790 |
+
st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq];
|
| 791 |
+
|
| 792 |
+
if (type_of(captured) <= BISHOP)
|
| 793 |
+
st->minorPieceKey ^= Zobrist::psq[captured][capsq];
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
dp.remove_pc = captured;
|
| 797 |
+
dp.remove_sq = capsq;
|
| 798 |
+
|
| 799 |
+
k ^= Zobrist::psq[captured][capsq];
|
| 800 |
+
st->materialKey ^=
|
| 801 |
+
Zobrist::psq[captured][8 + pieceCount[captured] - (m.type_of() != EN_PASSANT)];
|
| 802 |
+
|
| 803 |
+
// Reset rule 50 counter
|
| 804 |
+
st->rule50 = 0;
|
| 805 |
+
}
|
| 806 |
+
else
|
| 807 |
+
dp.remove_sq = SQ_NONE;
|
| 808 |
+
|
| 809 |
+
// Update hash key
|
| 810 |
+
k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
|
| 811 |
+
|
| 812 |
+
// Reset en passant square
|
| 813 |
+
if (st->epSquare != SQ_NONE)
|
| 814 |
+
{
|
| 815 |
+
k ^= Zobrist::enpassant[file_of(st->epSquare)];
|
| 816 |
+
st->epSquare = SQ_NONE;
|
| 817 |
+
}
|
| 818 |
+
|
| 819 |
+
// Update castling rights.
|
| 820 |
+
k ^= Zobrist::castling[st->castlingRights];
|
| 821 |
+
st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]);
|
| 822 |
+
k ^= Zobrist::castling[st->castlingRights];
|
| 823 |
+
|
| 824 |
+
// Move the piece. The tricky Chess960 castling is handled earlier
|
| 825 |
+
if (m.type_of() != CASTLING)
|
| 826 |
+
{
|
| 827 |
+
if (captured && m.type_of() != EN_PASSANT)
|
| 828 |
+
{
|
| 829 |
+
remove_piece(from, &dts);
|
| 830 |
+
swap_piece(to, pc, &dts);
|
| 831 |
+
}
|
| 832 |
+
else
|
| 833 |
+
move_piece(from, to, &dts);
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
// If the moving piece is a pawn do some special extra work
|
| 837 |
+
if (type_of(pc) == PAWN)
|
| 838 |
+
{
|
| 839 |
+
// Check if the en passant square needs to be set. Accurate e.p. info is needed
|
| 840 |
+
// for correct zobrist key generation and 3-fold checking.
|
| 841 |
+
if ((int(to) ^ int(from)) == 16)
|
| 842 |
+
{
|
| 843 |
+
Square epSquare = to - pawn_push(us);
|
| 844 |
+
Bitboard pawns = attacks_bb<PAWN>(epSquare, us) & pieces(them, PAWN);
|
| 845 |
+
|
| 846 |
+
// If there are no pawns attacking the ep square, ep is not possible.
|
| 847 |
+
if (pawns)
|
| 848 |
+
{
|
| 849 |
+
Square ksq = square<KING>(them);
|
| 850 |
+
Bitboard notBlockers = ~st->previous->blockersForKing[them];
|
| 851 |
+
bool noDiscovery = (from & notBlockers) || file_of(from) == file_of(ksq);
|
| 852 |
+
|
| 853 |
+
// If the pawn gives discovered check, ep is never legal. Else, if at least one
|
| 854 |
+
// pawn was not a blocker for the enemy king or lies on the same line as the
|
| 855 |
+
// enemy king and en passant square, a legal capture exists.
|
| 856 |
+
if (noDiscovery && (pawns & (notBlockers | line_bb(epSquare, ksq))))
|
| 857 |
+
{
|
| 858 |
+
st->epSquare = epSquare;
|
| 859 |
+
k ^= Zobrist::enpassant[file_of(epSquare)];
|
| 860 |
+
}
|
| 861 |
+
}
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
+
else if (m.type_of() == PROMOTION)
|
| 865 |
+
{
|
| 866 |
+
Piece promotion = make_piece(us, m.promotion_type());
|
| 867 |
+
PieceType promotionType = type_of(promotion);
|
| 868 |
+
|
| 869 |
+
assert(relative_rank(us, to) == RANK_8);
|
| 870 |
+
assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN);
|
| 871 |
+
|
| 872 |
+
swap_piece(to, promotion, &dts);
|
| 873 |
+
|
| 874 |
+
dp.add_pc = promotion;
|
| 875 |
+
dp.add_sq = to;
|
| 876 |
+
dp.to = SQ_NONE;
|
| 877 |
+
|
| 878 |
+
// Update hash keys
|
| 879 |
+
// Zobrist::psq[pc][to] is zero, so we don't need to clear it
|
| 880 |
+
k ^= Zobrist::psq[promotion][to];
|
| 881 |
+
st->materialKey ^= Zobrist::psq[promotion][8 + pieceCount[promotion] - 1]
|
| 882 |
+
^ Zobrist::psq[pc][8 + pieceCount[pc]];
|
| 883 |
+
st->nonPawnKey[us] ^= Zobrist::psq[promotion][to];
|
| 884 |
+
|
| 885 |
+
if (promotionType <= BISHOP)
|
| 886 |
+
st->minorPieceKey ^= Zobrist::psq[promotion][to];
|
| 887 |
+
|
| 888 |
+
// Update material
|
| 889 |
+
st->nonPawnMaterial[us] += PieceValue[promotion];
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
// Update pawn hash key
|
| 893 |
+
st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
|
| 894 |
+
|
| 895 |
+
// Reset rule 50 draw counter
|
| 896 |
+
st->rule50 = 0;
|
| 897 |
+
}
|
| 898 |
+
|
| 899 |
+
else
|
| 900 |
+
{
|
| 901 |
+
st->nonPawnKey[us] ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
|
| 902 |
+
|
| 903 |
+
if (type_of(pc) <= BISHOP)
|
| 904 |
+
st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to];
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
// Update the key with the final value
|
| 908 |
+
st->key = k;
|
| 909 |
+
if (tt)
|
| 910 |
+
prefetch(tt->first_entry(key()));
|
| 911 |
+
|
| 912 |
+
if (history)
|
| 913 |
+
{
|
| 914 |
+
prefetch(&history->pawn_entry(*this)[pc][to]);
|
| 915 |
+
prefetch(&history->pawn_correction_entry(*this));
|
| 916 |
+
prefetch(&history->minor_piece_correction_entry(*this));
|
| 917 |
+
prefetch(&history->nonpawn_correction_entry<WHITE>(*this));
|
| 918 |
+
prefetch(&history->nonpawn_correction_entry<BLACK>(*this));
|
| 919 |
+
}
|
| 920 |
+
|
| 921 |
+
// Set capture piece
|
| 922 |
+
st->capturedPiece = captured;
|
| 923 |
+
|
| 924 |
+
// Calculate checkers bitboard (if move gives check)
|
| 925 |
+
st->checkersBB = givesCheck ? attackers_to(square<KING>(them)) & pieces(us) : 0;
|
| 926 |
+
|
| 927 |
+
sideToMove = ~sideToMove;
|
| 928 |
+
|
| 929 |
+
// Update king attacks used for fast check detection
|
| 930 |
+
set_check_info();
|
| 931 |
+
|
| 932 |
+
// Calculate the repetition info. It is the ply distance from the previous
|
| 933 |
+
// occurrence of the same position, negative in the 3-fold case, or zero
|
| 934 |
+
// if the position was not repeated.
|
| 935 |
+
st->repetition = 0;
|
| 936 |
+
int end = std::min(st->rule50, st->pliesFromNull);
|
| 937 |
+
if (end >= 4)
|
| 938 |
+
{
|
| 939 |
+
StateInfo* stp = st->previous->previous;
|
| 940 |
+
for (int i = 4; i <= end; i += 2)
|
| 941 |
+
{
|
| 942 |
+
stp = stp->previous->previous;
|
| 943 |
+
if (stp->key == st->key)
|
| 944 |
+
{
|
| 945 |
+
st->repetition = stp->repetition ? -i : i;
|
| 946 |
+
break;
|
| 947 |
+
}
|
| 948 |
+
}
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
dts.ksq = square<KING>(us);
|
| 952 |
+
|
| 953 |
+
assert(pos_is_ok());
|
| 954 |
+
|
| 955 |
+
assert(dp.pc != NO_PIECE);
|
| 956 |
+
assert(!(bool(captured) || m.type_of() == CASTLING) ^ (dp.remove_sq != SQ_NONE));
|
| 957 |
+
assert(dp.from != SQ_NONE);
|
| 958 |
+
assert(!(dp.add_sq != SQ_NONE) ^ (m.type_of() == PROMOTION || m.type_of() == CASTLING));
|
| 959 |
+
}
|
| 960 |
+
|
| 961 |
+
|
| 962 |
+
// Unmakes a move. When it returns, the position should
|
| 963 |
+
// be restored to exactly the same state as before the move was made.
|
| 964 |
+
void Position::undo_move(Move m) {
|
| 965 |
+
|
| 966 |
+
assert(m.is_ok());
|
| 967 |
+
|
| 968 |
+
sideToMove = ~sideToMove;
|
| 969 |
+
|
| 970 |
+
Color us = sideToMove;
|
| 971 |
+
Square from = m.from_sq();
|
| 972 |
+
Square to = m.to_sq();
|
| 973 |
+
Piece pc = piece_on(to);
|
| 974 |
+
|
| 975 |
+
assert(empty(from) || m.type_of() == CASTLING);
|
| 976 |
+
assert(type_of(st->capturedPiece) != KING);
|
| 977 |
+
|
| 978 |
+
if (m.type_of() == PROMOTION)
|
| 979 |
+
{
|
| 980 |
+
assert(relative_rank(us, to) == RANK_8);
|
| 981 |
+
assert(type_of(pc) == m.promotion_type());
|
| 982 |
+
assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN);
|
| 983 |
+
|
| 984 |
+
remove_piece(to);
|
| 985 |
+
pc = make_piece(us, PAWN);
|
| 986 |
+
put_piece(pc, to);
|
| 987 |
+
}
|
| 988 |
+
|
| 989 |
+
if (m.type_of() == CASTLING)
|
| 990 |
+
{
|
| 991 |
+
Square rfrom, rto;
|
| 992 |
+
do_castling<false>(us, from, to, rfrom, rto);
|
| 993 |
+
}
|
| 994 |
+
else
|
| 995 |
+
{
|
| 996 |
+
move_piece(to, from); // Put the piece back at the source square
|
| 997 |
+
|
| 998 |
+
if (st->capturedPiece)
|
| 999 |
+
{
|
| 1000 |
+
Square capsq = to;
|
| 1001 |
+
|
| 1002 |
+
if (m.type_of() == EN_PASSANT)
|
| 1003 |
+
{
|
| 1004 |
+
capsq -= pawn_push(us);
|
| 1005 |
+
|
| 1006 |
+
assert(type_of(pc) == PAWN);
|
| 1007 |
+
assert(to == st->previous->epSquare);
|
| 1008 |
+
assert(relative_rank(us, to) == RANK_6);
|
| 1009 |
+
assert(piece_on(capsq) == NO_PIECE);
|
| 1010 |
+
assert(st->capturedPiece == make_piece(~us, PAWN));
|
| 1011 |
+
}
|
| 1012 |
+
|
| 1013 |
+
put_piece(st->capturedPiece, capsq); // Restore the captured piece
|
| 1014 |
+
}
|
| 1015 |
+
}
|
| 1016 |
+
|
| 1017 |
+
// Finally point our state pointer back to the previous state
|
| 1018 |
+
st = st->previous;
|
| 1019 |
+
--gamePly;
|
| 1020 |
+
|
| 1021 |
+
assert(pos_is_ok());
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
template<bool PutPiece>
|
| 1025 |
+
inline void add_dirty_threat(
|
| 1026 |
+
DirtyThreats* const dts, Piece pc, Piece threatened, Square s, Square threatenedSq) {
|
| 1027 |
+
if (PutPiece)
|
| 1028 |
+
{
|
| 1029 |
+
dts->threatenedSqs |= threatenedSq;
|
| 1030 |
+
dts->threateningSqs |= s;
|
| 1031 |
+
}
|
| 1032 |
+
|
| 1033 |
+
dts->list.push_back({pc, threatened, s, threatenedSq, PutPiece});
|
| 1034 |
+
}
|
| 1035 |
+
|
| 1036 |
+
#ifdef USE_AVX512ICL
|
| 1037 |
+
// Given a DirtyThreat template and bit offsets to insert the piece type and square, write the threats
|
| 1038 |
+
// present at the given bitboard.
|
| 1039 |
+
template<int SqShift, int PcShift>
|
| 1040 |
+
void write_multiple_dirties(const Position& p,
|
| 1041 |
+
Bitboard mask,
|
| 1042 |
+
DirtyThreat dt_template,
|
| 1043 |
+
DirtyThreats* dts) {
|
| 1044 |
+
static_assert(sizeof(DirtyThreat) == 4);
|
| 1045 |
+
|
| 1046 |
+
const __m512i board = _mm512_loadu_si512(p.piece_array().data());
|
| 1047 |
+
const __m512i AllSquares = _mm512_set_epi8(
|
| 1048 |
+
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41,
|
| 1049 |
+
40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
|
| 1050 |
+
17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
| 1051 |
+
|
| 1052 |
+
const int dt_count = popcount(mask);
|
| 1053 |
+
assert(dt_count <= 16);
|
| 1054 |
+
|
| 1055 |
+
const __m512i template_v = _mm512_set1_epi32(dt_template.raw());
|
| 1056 |
+
auto* write = dts->list.make_space(dt_count);
|
| 1057 |
+
|
| 1058 |
+
// Extract the list of squares and upconvert to 32 bits. There are never more than 16
|
| 1059 |
+
// incoming threats so this is sufficient.
|
| 1060 |
+
__m512i threat_squares = _mm512_maskz_compress_epi8(mask, AllSquares);
|
| 1061 |
+
threat_squares = _mm512_cvtepi8_epi32(_mm512_castsi512_si128(threat_squares));
|
| 1062 |
+
|
| 1063 |
+
__m512i threat_pieces =
|
| 1064 |
+
_mm512_maskz_permutexvar_epi8(0x1111111111111111ULL, threat_squares, board);
|
| 1065 |
+
|
| 1066 |
+
// Shift the piece and square into place
|
| 1067 |
+
threat_squares = _mm512_slli_epi32(threat_squares, SqShift);
|
| 1068 |
+
threat_pieces = _mm512_slli_epi32(threat_pieces, PcShift);
|
| 1069 |
+
|
| 1070 |
+
const __m512i dirties =
|
| 1071 |
+
_mm512_ternarylogic_epi32(template_v, threat_squares, threat_pieces, 254 /* A | B | C */);
|
| 1072 |
+
_mm512_storeu_si512(write, dirties);
|
| 1073 |
+
}
|
| 1074 |
+
#endif
|
| 1075 |
+
|
| 1076 |
+
template<bool PutPiece, bool ComputeRay>
|
| 1077 |
+
void Position::update_piece_threats(Piece pc,
|
| 1078 |
+
Square s,
|
| 1079 |
+
DirtyThreats* const dts,
|
| 1080 |
+
[[maybe_unused]] Bitboard noRaysContaining) const {
|
| 1081 |
+
const Bitboard occupied = pieces();
|
| 1082 |
+
const Bitboard rookQueens = pieces(ROOK, QUEEN);
|
| 1083 |
+
const Bitboard bishopQueens = pieces(BISHOP, QUEEN);
|
| 1084 |
+
const Bitboard rAttacks = attacks_bb<ROOK>(s, occupied);
|
| 1085 |
+
const Bitboard bAttacks = attacks_bb<BISHOP>(s, occupied);
|
| 1086 |
+
const Bitboard kings = pieces(KING);
|
| 1087 |
+
Bitboard occupiedNoK = occupied ^ kings;
|
| 1088 |
+
|
| 1089 |
+
Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks);
|
| 1090 |
+
auto process_sliders = [&](bool addDirectAttacks) {
|
| 1091 |
+
while (sliders)
|
| 1092 |
+
{
|
| 1093 |
+
Square sliderSq = pop_lsb(sliders);
|
| 1094 |
+
Piece slider = piece_on(sliderSq);
|
| 1095 |
+
|
| 1096 |
+
const Bitboard ray = RayPassBB[sliderSq][s];
|
| 1097 |
+
const Bitboard discovered = ray & (rAttacks | bAttacks) & occupiedNoK;
|
| 1098 |
+
|
| 1099 |
+
assert(!more_than_one(discovered));
|
| 1100 |
+
if (discovered && (RayPassBB[sliderSq][s] & noRaysContaining) != noRaysContaining)
|
| 1101 |
+
{
|
| 1102 |
+
const Square threatenedSq = lsb(discovered);
|
| 1103 |
+
const Piece threatenedPc = piece_on(threatenedSq);
|
| 1104 |
+
add_dirty_threat<!PutPiece>(dts, slider, threatenedPc, sliderSq, threatenedSq);
|
| 1105 |
+
}
|
| 1106 |
+
|
| 1107 |
+
if (addDirectAttacks)
|
| 1108 |
+
add_dirty_threat<PutPiece>(dts, slider, pc, sliderSq, s);
|
| 1109 |
+
}
|
| 1110 |
+
};
|
| 1111 |
+
|
| 1112 |
+
if (type_of(pc) == KING)
|
| 1113 |
+
{
|
| 1114 |
+
if constexpr (ComputeRay)
|
| 1115 |
+
process_sliders(false);
|
| 1116 |
+
return;
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
|
| 1120 |
+
const Bitboard knights = pieces(KNIGHT);
|
| 1121 |
+
const Bitboard whitePawns = pieces(WHITE, PAWN);
|
| 1122 |
+
const Bitboard blackPawns = pieces(BLACK, PAWN);
|
| 1123 |
+
|
| 1124 |
+
|
| 1125 |
+
Bitboard threatened = attacks_bb(pc, s, occupied) & occupiedNoK;
|
| 1126 |
+
Bitboard incoming_threats =
|
| 1127 |
+
(PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb<PAWN>(s, WHITE) & blackPawns)
|
| 1128 |
+
| (attacks_bb<PAWN>(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings);
|
| 1129 |
+
|
| 1130 |
+
#ifdef USE_AVX512ICL
|
| 1131 |
+
if constexpr (PutPiece)
|
| 1132 |
+
{
|
| 1133 |
+
dts->threatenedSqs |= threatened;
|
| 1134 |
+
// A bit may only be set if that square actually produces a threat, so we
|
| 1135 |
+
// must guard setting the square accordingly
|
| 1136 |
+
dts->threateningSqs |= Bitboard(bool(threatened)) << s;
|
| 1137 |
+
}
|
| 1138 |
+
|
| 1139 |
+
DirtyThreat dt_template{pc, NO_PIECE, s, Square(0), PutPiece};
|
| 1140 |
+
write_multiple_dirties<DirtyThreat::ThreatenedSqOffset, DirtyThreat::ThreatenedPcOffset>(
|
| 1141 |
+
*this, threatened, dt_template, dts);
|
| 1142 |
+
|
| 1143 |
+
Bitboard all_attackers = sliders | incoming_threats;
|
| 1144 |
+
|
| 1145 |
+
if constexpr (PutPiece)
|
| 1146 |
+
{
|
| 1147 |
+
dts->threatenedSqs |= Bitboard(bool(all_attackers)) << s; // same as above
|
| 1148 |
+
dts->threateningSqs |= all_attackers;
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
dt_template = {NO_PIECE, pc, Square(0), s, PutPiece};
|
| 1152 |
+
write_multiple_dirties<DirtyThreat::PcSqOffset, DirtyThreat::PcOffset>(*this, all_attackers,
|
| 1153 |
+
dt_template, dts);
|
| 1154 |
+
#else
|
| 1155 |
+
while (threatened)
|
| 1156 |
+
{
|
| 1157 |
+
Square threatenedSq = pop_lsb(threatened);
|
| 1158 |
+
Piece threatenedPc = piece_on(threatenedSq);
|
| 1159 |
+
|
| 1160 |
+
assert(threatenedSq != s);
|
| 1161 |
+
assert(threatenedPc);
|
| 1162 |
+
|
| 1163 |
+
add_dirty_threat<PutPiece>(dts, pc, threatenedPc, s, threatenedSq);
|
| 1164 |
+
}
|
| 1165 |
+
#endif
|
| 1166 |
+
|
| 1167 |
+
if constexpr (ComputeRay)
|
| 1168 |
+
{
|
| 1169 |
+
#ifndef USE_AVX512ICL
|
| 1170 |
+
process_sliders(true);
|
| 1171 |
+
#else // for ICL, direct threats were processed earlier (all_attackers)
|
| 1172 |
+
process_sliders(false);
|
| 1173 |
+
#endif
|
| 1174 |
+
}
|
| 1175 |
+
else
|
| 1176 |
+
{
|
| 1177 |
+
incoming_threats |= sliders;
|
| 1178 |
+
}
|
| 1179 |
+
|
| 1180 |
+
#ifndef USE_AVX512ICL
|
| 1181 |
+
while (incoming_threats)
|
| 1182 |
+
{
|
| 1183 |
+
Square srcSq = pop_lsb(incoming_threats);
|
| 1184 |
+
Piece srcPc = piece_on(srcSq);
|
| 1185 |
+
|
| 1186 |
+
assert(srcSq != s);
|
| 1187 |
+
assert(srcPc != NO_PIECE);
|
| 1188 |
+
|
| 1189 |
+
add_dirty_threat<PutPiece>(dts, srcPc, pc, srcSq, s);
|
| 1190 |
+
}
|
| 1191 |
+
#endif
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
// Helper used to do/undo a castling move. This is a bit
|
| 1195 |
+
// tricky in Chess960 where from/to squares can overlap.
|
| 1196 |
+
template<bool Do>
|
| 1197 |
+
void Position::do_castling(Color us,
|
| 1198 |
+
Square from,
|
| 1199 |
+
Square& to,
|
| 1200 |
+
Square& rfrom,
|
| 1201 |
+
Square& rto,
|
| 1202 |
+
DirtyThreats* const dts,
|
| 1203 |
+
DirtyPiece* const dp) {
|
| 1204 |
+
|
| 1205 |
+
bool kingSide = to > from;
|
| 1206 |
+
rfrom = to; // Castling is encoded as "king captures friendly rook"
|
| 1207 |
+
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
|
| 1208 |
+
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
|
| 1209 |
+
|
| 1210 |
+
assert(!Do || dp);
|
| 1211 |
+
|
| 1212 |
+
if (Do)
|
| 1213 |
+
{
|
| 1214 |
+
dp->to = to;
|
| 1215 |
+
dp->remove_pc = dp->add_pc = make_piece(us, ROOK);
|
| 1216 |
+
dp->remove_sq = rfrom;
|
| 1217 |
+
dp->add_sq = rto;
|
| 1218 |
+
}
|
| 1219 |
+
|
| 1220 |
+
// Remove both pieces first since squares could overlap in Chess960
|
| 1221 |
+
remove_piece(Do ? from : to, dts);
|
| 1222 |
+
remove_piece(Do ? rfrom : rto, dts);
|
| 1223 |
+
put_piece(make_piece(us, KING), Do ? to : from, dts);
|
| 1224 |
+
put_piece(make_piece(us, ROOK), Do ? rto : rfrom, dts);
|
| 1225 |
+
}
|
| 1226 |
+
|
| 1227 |
+
|
| 1228 |
+
// Used to do a "null move": it flips
|
| 1229 |
+
// the side to move without executing any move on the board.
|
| 1230 |
+
void Position::do_null_move(StateInfo& newSt) {
|
| 1231 |
+
|
| 1232 |
+
assert(!checkers());
|
| 1233 |
+
assert(&newSt != st);
|
| 1234 |
+
|
| 1235 |
+
std::memcpy(&newSt, st, sizeof(StateInfo));
|
| 1236 |
+
|
| 1237 |
+
newSt.previous = st;
|
| 1238 |
+
st = &newSt;
|
| 1239 |
+
|
| 1240 |
+
if (st->epSquare != SQ_NONE)
|
| 1241 |
+
{
|
| 1242 |
+
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
|
| 1243 |
+
st->epSquare = SQ_NONE;
|
| 1244 |
+
}
|
| 1245 |
+
|
| 1246 |
+
st->key ^= Zobrist::side;
|
| 1247 |
+
|
| 1248 |
+
st->pliesFromNull = 0;
|
| 1249 |
+
|
| 1250 |
+
sideToMove = ~sideToMove;
|
| 1251 |
+
|
| 1252 |
+
set_check_info();
|
| 1253 |
+
|
| 1254 |
+
st->repetition = 0;
|
| 1255 |
+
|
| 1256 |
+
assert(pos_is_ok());
|
| 1257 |
+
}
|
| 1258 |
+
|
| 1259 |
+
|
| 1260 |
+
// Must be used to undo a "null move"
|
| 1261 |
+
void Position::undo_null_move() {
|
| 1262 |
+
|
| 1263 |
+
assert(!checkers());
|
| 1264 |
+
|
| 1265 |
+
st = st->previous;
|
| 1266 |
+
sideToMove = ~sideToMove;
|
| 1267 |
+
}
|
| 1268 |
+
|
| 1269 |
+
|
| 1270 |
+
// Tests if the SEE (Static Exchange Evaluation)
|
| 1271 |
+
// value of move is greater or equal to the given threshold. We'll use an
|
| 1272 |
+
// algorithm similar to alpha-beta pruning with a null window.
|
| 1273 |
+
bool Position::see_ge(Move m, int threshold) const {
|
| 1274 |
+
|
| 1275 |
+
assert(m.is_ok());
|
| 1276 |
+
|
| 1277 |
+
// Only deal with normal moves, assume others pass a simple SEE
|
| 1278 |
+
if (m.type_of() != NORMAL)
|
| 1279 |
+
return VALUE_ZERO >= threshold;
|
| 1280 |
+
|
| 1281 |
+
Square from = m.from_sq(), to = m.to_sq();
|
| 1282 |
+
|
| 1283 |
+
assert(piece_on(from) != NO_PIECE);
|
| 1284 |
+
|
| 1285 |
+
int swap = PieceValue[piece_on(to)] - threshold;
|
| 1286 |
+
if (swap < 0)
|
| 1287 |
+
return false;
|
| 1288 |
+
|
| 1289 |
+
swap = PieceValue[piece_on(from)] - swap;
|
| 1290 |
+
if (swap <= 0)
|
| 1291 |
+
return true;
|
| 1292 |
+
|
| 1293 |
+
assert(color_of(piece_on(from)) == sideToMove);
|
| 1294 |
+
Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic
|
| 1295 |
+
Color stm = sideToMove;
|
| 1296 |
+
Bitboard attackers = attackers_to(to, occupied);
|
| 1297 |
+
Bitboard stmAttackers, bb;
|
| 1298 |
+
int res = 1;
|
| 1299 |
+
|
| 1300 |
+
while (true)
|
| 1301 |
+
{
|
| 1302 |
+
stm = ~stm;
|
| 1303 |
+
attackers &= occupied;
|
| 1304 |
+
|
| 1305 |
+
// If stm has no more attackers then give up: stm loses
|
| 1306 |
+
if (!(stmAttackers = attackers & pieces(stm)))
|
| 1307 |
+
break;
|
| 1308 |
+
|
| 1309 |
+
// Don't allow pinned pieces to attack as long as there are
|
| 1310 |
+
// pinners on their original square.
|
| 1311 |
+
if (pinners(~stm) & occupied)
|
| 1312 |
+
{
|
| 1313 |
+
stmAttackers &= ~blockers_for_king(stm);
|
| 1314 |
+
|
| 1315 |
+
if (!stmAttackers)
|
| 1316 |
+
break;
|
| 1317 |
+
}
|
| 1318 |
+
|
| 1319 |
+
res ^= 1;
|
| 1320 |
+
|
| 1321 |
+
// Locate and remove the next least valuable attacker, and add to
|
| 1322 |
+
// the bitboard 'attackers' any X-ray attackers behind it.
|
| 1323 |
+
if ((bb = stmAttackers & pieces(PAWN)))
|
| 1324 |
+
{
|
| 1325 |
+
if ((swap = PawnValue - swap) < res)
|
| 1326 |
+
break;
|
| 1327 |
+
occupied ^= least_significant_square_bb(bb);
|
| 1328 |
+
|
| 1329 |
+
attackers |= attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN);
|
| 1330 |
+
}
|
| 1331 |
+
|
| 1332 |
+
else if ((bb = stmAttackers & pieces(KNIGHT)))
|
| 1333 |
+
{
|
| 1334 |
+
if ((swap = KnightValue - swap) < res)
|
| 1335 |
+
break;
|
| 1336 |
+
occupied ^= least_significant_square_bb(bb);
|
| 1337 |
+
}
|
| 1338 |
+
|
| 1339 |
+
else if ((bb = stmAttackers & pieces(BISHOP)))
|
| 1340 |
+
{
|
| 1341 |
+
if ((swap = BishopValue - swap) < res)
|
| 1342 |
+
break;
|
| 1343 |
+
occupied ^= least_significant_square_bb(bb);
|
| 1344 |
+
|
| 1345 |
+
attackers |= attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN);
|
| 1346 |
+
}
|
| 1347 |
+
|
| 1348 |
+
else if ((bb = stmAttackers & pieces(ROOK)))
|
| 1349 |
+
{
|
| 1350 |
+
if ((swap = RookValue - swap) < res)
|
| 1351 |
+
break;
|
| 1352 |
+
occupied ^= least_significant_square_bb(bb);
|
| 1353 |
+
|
| 1354 |
+
attackers |= attacks_bb<ROOK>(to, occupied) & pieces(ROOK, QUEEN);
|
| 1355 |
+
}
|
| 1356 |
+
|
| 1357 |
+
else if ((bb = stmAttackers & pieces(QUEEN)))
|
| 1358 |
+
{
|
| 1359 |
+
swap = QueenValue - swap;
|
| 1360 |
+
// implies that the previous recapture was done by a higher rated piece than a Queen (King is excluded)
|
| 1361 |
+
assert(swap >= res);
|
| 1362 |
+
occupied ^= least_significant_square_bb(bb);
|
| 1363 |
+
|
| 1364 |
+
attackers |= (attacks_bb<BISHOP>(to, occupied) & pieces(BISHOP, QUEEN))
|
| 1365 |
+
| (attacks_bb<ROOK>(to, occupied) & pieces(ROOK, QUEEN));
|
| 1366 |
+
}
|
| 1367 |
+
|
| 1368 |
+
else // KING
|
| 1369 |
+
// If we "capture" with the king but the opponent still has attackers,
|
| 1370 |
+
// reverse the result.
|
| 1371 |
+
return (attackers & ~pieces(stm)) ? res ^ 1 : res;
|
| 1372 |
+
}
|
| 1373 |
+
|
| 1374 |
+
return bool(res);
|
| 1375 |
+
}
|
| 1376 |
+
|
| 1377 |
+
// Tests whether the position is drawn by 50-move rule
|
| 1378 |
+
// or by repetition. It does not detect stalemates.
|
| 1379 |
+
bool Position::is_draw(int ply) const {
|
| 1380 |
+
|
| 1381 |
+
if (st->rule50 > 99 && (!checkers() || MoveList<LEGAL>(*this).size()))
|
| 1382 |
+
return true;
|
| 1383 |
+
|
| 1384 |
+
return is_repetition(ply);
|
| 1385 |
+
}
|
| 1386 |
+
|
| 1387 |
+
// Return a draw score if a position repeats once earlier but strictly
|
| 1388 |
+
// after the root, or repeats twice before or at the root.
|
| 1389 |
+
bool Position::is_repetition(int ply) const { return st->repetition && st->repetition < ply; }
|
| 1390 |
+
|
| 1391 |
+
// Tests whether there has been at least one repetition
|
| 1392 |
+
// of positions since the last capture or pawn move.
|
| 1393 |
+
bool Position::has_repeated() const {
|
| 1394 |
+
|
| 1395 |
+
StateInfo* stc = st;
|
| 1396 |
+
int end = std::min(st->rule50, st->pliesFromNull);
|
| 1397 |
+
while (end-- >= 4)
|
| 1398 |
+
{
|
| 1399 |
+
if (stc->repetition)
|
| 1400 |
+
return true;
|
| 1401 |
+
|
| 1402 |
+
stc = stc->previous;
|
| 1403 |
+
}
|
| 1404 |
+
return false;
|
| 1405 |
+
}
|
| 1406 |
+
|
| 1407 |
+
|
| 1408 |
+
// Tests if the position has a move which draws by repetition.
|
| 1409 |
+
// This function accurately matches the outcome of is_draw() over all legal moves.
|
| 1410 |
+
bool Position::upcoming_repetition(int ply) const {
|
| 1411 |
+
|
| 1412 |
+
int j;
|
| 1413 |
+
|
| 1414 |
+
int end = std::min(st->rule50, st->pliesFromNull);
|
| 1415 |
+
|
| 1416 |
+
if (end < 3)
|
| 1417 |
+
return false;
|
| 1418 |
+
|
| 1419 |
+
Key originalKey = st->key;
|
| 1420 |
+
StateInfo* stp = st->previous;
|
| 1421 |
+
Key other = originalKey ^ stp->key ^ Zobrist::side;
|
| 1422 |
+
|
| 1423 |
+
for (int i = 3; i <= end; i += 2)
|
| 1424 |
+
{
|
| 1425 |
+
stp = stp->previous;
|
| 1426 |
+
other ^= stp->key ^ stp->previous->key ^ Zobrist::side;
|
| 1427 |
+
stp = stp->previous;
|
| 1428 |
+
|
| 1429 |
+
if (other != 0)
|
| 1430 |
+
continue;
|
| 1431 |
+
|
| 1432 |
+
Key moveKey = originalKey ^ stp->key;
|
| 1433 |
+
if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey))
|
| 1434 |
+
{
|
| 1435 |
+
Move move = cuckooMove[j];
|
| 1436 |
+
Square s1 = move.from_sq();
|
| 1437 |
+
Square s2 = move.to_sq();
|
| 1438 |
+
|
| 1439 |
+
if (!((between_bb(s1, s2) ^ s2) & pieces()))
|
| 1440 |
+
{
|
| 1441 |
+
if (ply > i)
|
| 1442 |
+
return true;
|
| 1443 |
+
|
| 1444 |
+
// For nodes before or at the root, check that the move is a
|
| 1445 |
+
// repetition rather than a move to the current position.
|
| 1446 |
+
if (stp->repetition)
|
| 1447 |
+
return true;
|
| 1448 |
+
}
|
| 1449 |
+
}
|
| 1450 |
+
}
|
| 1451 |
+
return false;
|
| 1452 |
+
}
|
| 1453 |
+
|
| 1454 |
+
|
| 1455 |
+
// Flips position with the white and black sides reversed. This
|
| 1456 |
+
// is only useful for debugging e.g. for finding evaluation symmetry bugs.
|
| 1457 |
+
void Position::flip() {
|
| 1458 |
+
|
| 1459 |
+
string f, token;
|
| 1460 |
+
std::stringstream ss(fen());
|
| 1461 |
+
|
| 1462 |
+
for (Rank r = RANK_8;; --r) // Piece placement
|
| 1463 |
+
{
|
| 1464 |
+
std::getline(ss, token, r > RANK_1 ? '/' : ' ');
|
| 1465 |
+
f.insert(0, token + (f.empty() ? " " : "/"));
|
| 1466 |
+
|
| 1467 |
+
if (r == RANK_1)
|
| 1468 |
+
break;
|
| 1469 |
+
}
|
| 1470 |
+
|
| 1471 |
+
ss >> token; // Active color
|
| 1472 |
+
f += (token == "w" ? "B " : "W "); // Will be lowercased later
|
| 1473 |
+
|
| 1474 |
+
ss >> token; // Castling availability
|
| 1475 |
+
f += token + " ";
|
| 1476 |
+
|
| 1477 |
+
std::transform(f.begin(), f.end(), f.begin(),
|
| 1478 |
+
[](char c) { return char(islower(c) ? toupper(c) : tolower(c)); });
|
| 1479 |
+
|
| 1480 |
+
ss >> token; // En passant square
|
| 1481 |
+
f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3"));
|
| 1482 |
+
|
| 1483 |
+
std::getline(ss, token); // Half and full moves
|
| 1484 |
+
f += token;
|
| 1485 |
+
|
| 1486 |
+
set(f, is_chess960(), st);
|
| 1487 |
+
|
| 1488 |
+
assert(pos_is_ok());
|
| 1489 |
+
}
|
| 1490 |
+
|
| 1491 |
+
|
| 1492 |
+
bool Position::material_key_is_ok() const { return compute_material_key() == st->materialKey; }
|
| 1493 |
+
|
| 1494 |
+
|
| 1495 |
+
// Performs some consistency checks for the position object
|
| 1496 |
+
// and raise an assert if something wrong is detected.
|
| 1497 |
+
// This is meant to be helpful when debugging.
|
| 1498 |
+
bool Position::pos_is_ok() const {
|
| 1499 |
+
|
| 1500 |
+
constexpr bool Fast = true; // Quick (default) or full check?
|
| 1501 |
+
|
| 1502 |
+
if ((sideToMove != WHITE && sideToMove != BLACK) || piece_on(square<KING>(WHITE)) != W_KING
|
| 1503 |
+
|| piece_on(square<KING>(BLACK)) != B_KING
|
| 1504 |
+
|| (ep_square() != SQ_NONE && relative_rank(sideToMove, ep_square()) != RANK_6))
|
| 1505 |
+
assert(0 && "pos_is_ok: Default");
|
| 1506 |
+
|
| 1507 |
+
if (Fast)
|
| 1508 |
+
return true;
|
| 1509 |
+
|
| 1510 |
+
if (pieceCount[W_KING] != 1 || pieceCount[B_KING] != 1
|
| 1511 |
+
|| attackers_to_exist(square<KING>(~sideToMove), pieces(), sideToMove))
|
| 1512 |
+
assert(0 && "pos_is_ok: Kings");
|
| 1513 |
+
|
| 1514 |
+
if ((pieces(PAWN) & (Rank1BB | Rank8BB)) || pieceCount[W_PAWN] > 8 || pieceCount[B_PAWN] > 8)
|
| 1515 |
+
assert(0 && "pos_is_ok: Pawns");
|
| 1516 |
+
|
| 1517 |
+
|
| 1518 |
+
if (ep_square() != SQ_NONE)
|
| 1519 |
+
{
|
| 1520 |
+
Square ksq = square<KING>(sideToMove);
|
| 1521 |
+
|
| 1522 |
+
Bitboard captured = (ep_square() + pawn_push(~sideToMove)) & pieces(~sideToMove, PAWN);
|
| 1523 |
+
Bitboard pawns = attacks_bb<PAWN>(ep_square(), ~sideToMove) & pieces(sideToMove, PAWN);
|
| 1524 |
+
Bitboard potentialCheckers = pieces(~sideToMove) ^ captured;
|
| 1525 |
+
|
| 1526 |
+
if (!captured || !pawns
|
| 1527 |
+
|| ((attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ lsb(pawns))
|
| 1528 |
+
& potentialCheckers)
|
| 1529 |
+
&& (attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ msb(pawns))
|
| 1530 |
+
& potentialCheckers)))
|
| 1531 |
+
assert(0 && "pos_is_ok: En passant square");
|
| 1532 |
+
}
|
| 1533 |
+
|
| 1534 |
+
if ((pieces(WHITE) & pieces(BLACK)) || (pieces(WHITE) | pieces(BLACK)) != pieces()
|
| 1535 |
+
|| popcount(pieces(WHITE)) > 16 || popcount(pieces(BLACK)) > 16)
|
| 1536 |
+
assert(0 && "pos_is_ok: Bitboards");
|
| 1537 |
+
|
| 1538 |
+
for (PieceType p1 = PAWN; p1 <= KING; ++p1)
|
| 1539 |
+
for (PieceType p2 = PAWN; p2 <= KING; ++p2)
|
| 1540 |
+
if (p1 != p2 && (pieces(p1) & pieces(p2)))
|
| 1541 |
+
assert(0 && "pos_is_ok: Bitboards");
|
| 1542 |
+
|
| 1543 |
+
|
| 1544 |
+
for (Piece pc : Pieces)
|
| 1545 |
+
if (pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc)))
|
| 1546 |
+
|| pieceCount[pc] != std::count(board.begin(), board.end(), pc))
|
| 1547 |
+
assert(0 && "pos_is_ok: Pieces");
|
| 1548 |
+
|
| 1549 |
+
for (Color c : {WHITE, BLACK})
|
| 1550 |
+
for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE})
|
| 1551 |
+
{
|
| 1552 |
+
if (!can_castle(cr))
|
| 1553 |
+
continue;
|
| 1554 |
+
|
| 1555 |
+
if (piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK)
|
| 1556 |
+
|| castlingRightsMask[castlingRookSquare[cr]] != cr
|
| 1557 |
+
|| (castlingRightsMask[square<KING>(c)] & cr) != cr)
|
| 1558 |
+
assert(0 && "pos_is_ok: Castling");
|
| 1559 |
+
}
|
| 1560 |
+
|
| 1561 |
+
assert(material_key_is_ok() && "pos_is_ok: materialKey");
|
| 1562 |
+
|
| 1563 |
+
return true;
|
| 1564 |
+
}
|
| 1565 |
+
|
| 1566 |
+
} // namespace Stockfish
|
src/position.h
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef POSITION_H_INCLUDED
|
| 20 |
+
#define POSITION_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <array>
|
| 23 |
+
#include <cassert>
|
| 24 |
+
#include <deque>
|
| 25 |
+
#include <iosfwd>
|
| 26 |
+
#include <memory>
|
| 27 |
+
#include <new>
|
| 28 |
+
#include <string>
|
| 29 |
+
|
| 30 |
+
#include "bitboard.h"
|
| 31 |
+
#include "types.h"
|
| 32 |
+
|
| 33 |
+
namespace Stockfish {
|
| 34 |
+
|
| 35 |
+
class TranspositionTable;
|
| 36 |
+
struct SharedHistories;
|
| 37 |
+
|
| 38 |
+
// StateInfo struct stores information needed to restore a Position object to
|
| 39 |
+
// its previous state when we retract a move. Whenever a move is made on the
|
| 40 |
+
// board (by calling Position::do_move), a StateInfo object must be passed.
|
| 41 |
+
|
| 42 |
+
struct StateInfo {
|
| 43 |
+
|
| 44 |
+
// Copied when making a move
|
| 45 |
+
Key materialKey;
|
| 46 |
+
Key pawnKey;
|
| 47 |
+
Key minorPieceKey;
|
| 48 |
+
Key nonPawnKey[COLOR_NB];
|
| 49 |
+
Value nonPawnMaterial[COLOR_NB];
|
| 50 |
+
int castlingRights;
|
| 51 |
+
int rule50;
|
| 52 |
+
int pliesFromNull;
|
| 53 |
+
Square epSquare;
|
| 54 |
+
|
| 55 |
+
// Not copied when making a move (will be recomputed anyhow)
|
| 56 |
+
Key key;
|
| 57 |
+
Bitboard checkersBB;
|
| 58 |
+
StateInfo* previous;
|
| 59 |
+
Bitboard blockersForKing[COLOR_NB];
|
| 60 |
+
Bitboard pinners[COLOR_NB];
|
| 61 |
+
Bitboard checkSquares[PIECE_TYPE_NB];
|
| 62 |
+
Piece capturedPiece;
|
| 63 |
+
int repetition;
|
| 64 |
+
};
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
// A list to keep track of the position states along the setup moves (from the
|
| 68 |
+
// start position to the position just before the search starts). Needed by
|
| 69 |
+
// 'draw by repetition' detection. Use a std::deque because pointers to
|
| 70 |
+
// elements are not invalidated upon list resizing.
|
| 71 |
+
using StateListPtr = std::unique_ptr<std::deque<StateInfo>>;
|
| 72 |
+
|
| 73 |
+
// Position class stores information regarding the board representation as
|
| 74 |
+
// pieces, side to move, hash keys, castling info, etc. Important methods are
|
| 75 |
+
// do_move() and undo_move(), used by the search to update node info when
|
| 76 |
+
// traversing the search tree.
|
| 77 |
+
class Position {
|
| 78 |
+
public:
|
| 79 |
+
static void init();
|
| 80 |
+
|
| 81 |
+
Position() = default;
|
| 82 |
+
Position(const Position&) = delete;
|
| 83 |
+
Position& operator=(const Position&) = delete;
|
| 84 |
+
|
| 85 |
+
// FEN string input/output
|
| 86 |
+
Position& set(const std::string& fenStr, bool isChess960, StateInfo* si);
|
| 87 |
+
Position& set(const std::string& code, Color c, StateInfo* si);
|
| 88 |
+
std::string fen() const;
|
| 89 |
+
|
| 90 |
+
// Position representation
|
| 91 |
+
Bitboard pieces() const; // All pieces
|
| 92 |
+
template<typename... PieceTypes>
|
| 93 |
+
Bitboard pieces(PieceTypes... pts) const;
|
| 94 |
+
Bitboard pieces(Color c) const;
|
| 95 |
+
template<typename... PieceTypes>
|
| 96 |
+
Bitboard pieces(Color c, PieceTypes... pts) const;
|
| 97 |
+
Piece piece_on(Square s) const;
|
| 98 |
+
const std::array<Piece, SQUARE_NB>& piece_array() const;
|
| 99 |
+
Square ep_square() const;
|
| 100 |
+
bool empty(Square s) const;
|
| 101 |
+
template<PieceType Pt>
|
| 102 |
+
int count(Color c) const;
|
| 103 |
+
template<PieceType Pt>
|
| 104 |
+
int count() const;
|
| 105 |
+
template<PieceType Pt>
|
| 106 |
+
Square square(Color c) const;
|
| 107 |
+
|
| 108 |
+
// Castling
|
| 109 |
+
bool can_castle(CastlingRights cr) const;
|
| 110 |
+
bool castling_impeded(CastlingRights cr) const;
|
| 111 |
+
Square castling_rook_square(CastlingRights cr) const;
|
| 112 |
+
|
| 113 |
+
// Checking
|
| 114 |
+
Bitboard checkers() const;
|
| 115 |
+
Bitboard blockers_for_king(Color c) const;
|
| 116 |
+
Bitboard check_squares(PieceType pt) const;
|
| 117 |
+
Bitboard pinners(Color c) const;
|
| 118 |
+
|
| 119 |
+
// Attacks to/from a given square
|
| 120 |
+
Bitboard attackers_to(Square s) const;
|
| 121 |
+
Bitboard attackers_to(Square s, Bitboard occupied) const;
|
| 122 |
+
bool attackers_to_exist(Square s, Bitboard occupied, Color c) const;
|
| 123 |
+
void update_slider_blockers(Color c) const;
|
| 124 |
+
template<PieceType Pt>
|
| 125 |
+
Bitboard attacks_by(Color c) const;
|
| 126 |
+
|
| 127 |
+
// Properties of moves
|
| 128 |
+
bool legal(Move m) const;
|
| 129 |
+
bool pseudo_legal(const Move m) const;
|
| 130 |
+
bool capture(Move m) const;
|
| 131 |
+
bool capture_stage(Move m) const;
|
| 132 |
+
bool gives_check(Move m) const;
|
| 133 |
+
Piece moved_piece(Move m) const;
|
| 134 |
+
Piece captured_piece() const;
|
| 135 |
+
|
| 136 |
+
// Doing and undoing moves
|
| 137 |
+
void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
|
| 138 |
+
void do_move(Move m,
|
| 139 |
+
StateInfo& newSt,
|
| 140 |
+
bool givesCheck,
|
| 141 |
+
DirtyPiece& dp,
|
| 142 |
+
DirtyThreats& dts,
|
| 143 |
+
const TranspositionTable* tt,
|
| 144 |
+
const SharedHistories* worker);
|
| 145 |
+
void undo_move(Move m);
|
| 146 |
+
void do_null_move(StateInfo& newSt);
|
| 147 |
+
void undo_null_move();
|
| 148 |
+
|
| 149 |
+
// Static Exchange Evaluation
|
| 150 |
+
bool see_ge(Move m, int threshold = 0) const;
|
| 151 |
+
|
| 152 |
+
// Accessing hash keys
|
| 153 |
+
Key key() const;
|
| 154 |
+
Key material_key() const;
|
| 155 |
+
Key pawn_key() const;
|
| 156 |
+
Key minor_piece_key() const;
|
| 157 |
+
Key non_pawn_key(Color c) const;
|
| 158 |
+
|
| 159 |
+
// Other properties of the position
|
| 160 |
+
Color side_to_move() const;
|
| 161 |
+
int game_ply() const;
|
| 162 |
+
bool is_chess960() const;
|
| 163 |
+
bool is_draw(int ply) const;
|
| 164 |
+
bool is_repetition(int ply) const;
|
| 165 |
+
bool upcoming_repetition(int ply) const;
|
| 166 |
+
bool has_repeated() const;
|
| 167 |
+
int rule50_count() const;
|
| 168 |
+
Value non_pawn_material(Color c) const;
|
| 169 |
+
Value non_pawn_material() const;
|
| 170 |
+
|
| 171 |
+
// Position consistency check, for debugging
|
| 172 |
+
bool pos_is_ok() const;
|
| 173 |
+
bool material_key_is_ok() const;
|
| 174 |
+
void flip();
|
| 175 |
+
|
| 176 |
+
StateInfo* state() const;
|
| 177 |
+
|
| 178 |
+
void put_piece(Piece pc, Square s, DirtyThreats* const dts = nullptr);
|
| 179 |
+
void remove_piece(Square s, DirtyThreats* const dts = nullptr);
|
| 180 |
+
void swap_piece(Square s, Piece pc, DirtyThreats* const dts = nullptr);
|
| 181 |
+
|
| 182 |
+
private:
|
| 183 |
+
// Initialization helpers (used while setting up a position)
|
| 184 |
+
void set_castling_right(Color c, Square rfrom);
|
| 185 |
+
Key compute_material_key() const;
|
| 186 |
+
void set_state() const;
|
| 187 |
+
void set_check_info() const;
|
| 188 |
+
|
| 189 |
+
// Other helpers
|
| 190 |
+
template<bool PutPiece, bool ComputeRay = true>
|
| 191 |
+
void update_piece_threats(Piece pc,
|
| 192 |
+
Square s,
|
| 193 |
+
DirtyThreats* const dts,
|
| 194 |
+
Bitboard noRaysContaining = -1ULL) const;
|
| 195 |
+
void move_piece(Square from, Square to, DirtyThreats* const dts = nullptr);
|
| 196 |
+
template<bool Do>
|
| 197 |
+
void do_castling(Color us,
|
| 198 |
+
Square from,
|
| 199 |
+
Square& to,
|
| 200 |
+
Square& rfrom,
|
| 201 |
+
Square& rto,
|
| 202 |
+
DirtyThreats* const dts = nullptr,
|
| 203 |
+
DirtyPiece* const dp = nullptr);
|
| 204 |
+
Key adjust_key50(Key k) const;
|
| 205 |
+
|
| 206 |
+
// Data members
|
| 207 |
+
std::array<Piece, SQUARE_NB> board;
|
| 208 |
+
std::array<Bitboard, PIECE_TYPE_NB> byTypeBB;
|
| 209 |
+
std::array<Bitboard, COLOR_NB> byColorBB;
|
| 210 |
+
|
| 211 |
+
int pieceCount[PIECE_NB];
|
| 212 |
+
int castlingRightsMask[SQUARE_NB];
|
| 213 |
+
Square castlingRookSquare[CASTLING_RIGHT_NB];
|
| 214 |
+
Bitboard castlingPath[CASTLING_RIGHT_NB];
|
| 215 |
+
StateInfo* st;
|
| 216 |
+
int gamePly;
|
| 217 |
+
Color sideToMove;
|
| 218 |
+
bool chess960;
|
| 219 |
+
DirtyPiece scratch_dp;
|
| 220 |
+
DirtyThreats scratch_dts;
|
| 221 |
+
};
|
| 222 |
+
|
| 223 |
+
std::ostream& operator<<(std::ostream& os, const Position& pos);
|
| 224 |
+
|
| 225 |
+
inline Color Position::side_to_move() const { return sideToMove; }
|
| 226 |
+
|
| 227 |
+
inline Piece Position::piece_on(Square s) const {
|
| 228 |
+
assert(is_ok(s));
|
| 229 |
+
return board[s];
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
inline const std::array<Piece, SQUARE_NB>& Position::piece_array() const { return board; }
|
| 233 |
+
|
| 234 |
+
inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; }
|
| 235 |
+
|
| 236 |
+
inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); }
|
| 237 |
+
|
| 238 |
+
inline Bitboard Position::pieces() const { return byTypeBB[ALL_PIECES]; }
|
| 239 |
+
|
| 240 |
+
template<typename... PieceTypes>
|
| 241 |
+
inline Bitboard Position::pieces(PieceTypes... pts) const {
|
| 242 |
+
return (byTypeBB[pts] | ...);
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; }
|
| 246 |
+
|
| 247 |
+
template<typename... PieceTypes>
|
| 248 |
+
inline Bitboard Position::pieces(Color c, PieceTypes... pts) const {
|
| 249 |
+
return pieces(c) & pieces(pts...);
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
template<PieceType Pt>
|
| 253 |
+
inline int Position::count(Color c) const {
|
| 254 |
+
return pieceCount[make_piece(c, Pt)];
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
template<PieceType Pt>
|
| 258 |
+
inline int Position::count() const {
|
| 259 |
+
return count<Pt>(WHITE) + count<Pt>(BLACK);
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
template<PieceType Pt>
|
| 263 |
+
inline Square Position::square(Color c) const {
|
| 264 |
+
assert(count<Pt>(c) == 1);
|
| 265 |
+
return lsb(pieces(c, Pt));
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
inline Square Position::ep_square() const { return st->epSquare; }
|
| 269 |
+
|
| 270 |
+
inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; }
|
| 271 |
+
|
| 272 |
+
inline bool Position::castling_impeded(CastlingRights cr) const {
|
| 273 |
+
assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
|
| 274 |
+
return pieces() & castlingPath[cr];
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
inline Square Position::castling_rook_square(CastlingRights cr) const {
|
| 278 |
+
assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
|
| 279 |
+
return castlingRookSquare[cr];
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); }
|
| 283 |
+
|
| 284 |
+
template<PieceType Pt>
|
| 285 |
+
inline Bitboard Position::attacks_by(Color c) const {
|
| 286 |
+
|
| 287 |
+
if constexpr (Pt == PAWN)
|
| 288 |
+
return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
|
| 289 |
+
: pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
|
| 290 |
+
else
|
| 291 |
+
{
|
| 292 |
+
Bitboard threats = 0;
|
| 293 |
+
Bitboard attackers = pieces(c, Pt);
|
| 294 |
+
while (attackers)
|
| 295 |
+
threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
|
| 296 |
+
return threats;
|
| 297 |
+
}
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
inline Bitboard Position::checkers() const { return st->checkersBB; }
|
| 301 |
+
|
| 302 |
+
inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; }
|
| 303 |
+
|
| 304 |
+
inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; }
|
| 305 |
+
|
| 306 |
+
inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; }
|
| 307 |
+
|
| 308 |
+
inline Key Position::key() const { return adjust_key50(st->key); }
|
| 309 |
+
|
| 310 |
+
inline Key Position::adjust_key50(Key k) const {
|
| 311 |
+
return st->rule50 < 14 ? k : k ^ make_key((st->rule50 - 14) / 8);
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
inline Key Position::pawn_key() const { return st->pawnKey; }
|
| 315 |
+
|
| 316 |
+
inline Key Position::material_key() const { return st->materialKey; }
|
| 317 |
+
|
| 318 |
+
inline Key Position::minor_piece_key() const { return st->minorPieceKey; }
|
| 319 |
+
|
| 320 |
+
inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; }
|
| 321 |
+
|
| 322 |
+
inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; }
|
| 323 |
+
|
| 324 |
+
inline Value Position::non_pawn_material() const {
|
| 325 |
+
return non_pawn_material(WHITE) + non_pawn_material(BLACK);
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
inline int Position::game_ply() const { return gamePly; }
|
| 329 |
+
|
| 330 |
+
inline int Position::rule50_count() const { return st->rule50; }
|
| 331 |
+
|
| 332 |
+
inline bool Position::is_chess960() const { return chess960; }
|
| 333 |
+
|
| 334 |
+
inline bool Position::capture(Move m) const {
|
| 335 |
+
assert(m.is_ok());
|
| 336 |
+
return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT;
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
// Returns true if a move is generated from the capture stage, having also
|
| 340 |
+
// queen promotions covered, i.e. consistency with the capture stage move
|
| 341 |
+
// generation is needed to avoid the generation of duplicate moves.
|
| 342 |
+
inline bool Position::capture_stage(Move m) const {
|
| 343 |
+
assert(m.is_ok());
|
| 344 |
+
return capture(m) || m.promotion_type() == QUEEN;
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
inline Piece Position::captured_piece() const { return st->capturedPiece; }
|
| 348 |
+
|
| 349 |
+
inline void Position::put_piece(Piece pc, Square s, DirtyThreats* const dts) {
|
| 350 |
+
board[s] = pc;
|
| 351 |
+
byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s;
|
| 352 |
+
byColorBB[color_of(pc)] |= s;
|
| 353 |
+
pieceCount[pc]++;
|
| 354 |
+
pieceCount[make_piece(color_of(pc), ALL_PIECES)]++;
|
| 355 |
+
|
| 356 |
+
if (dts)
|
| 357 |
+
update_piece_threats<true>(pc, s, dts);
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
inline void Position::remove_piece(Square s, DirtyThreats* const dts) {
|
| 361 |
+
Piece pc = board[s];
|
| 362 |
+
|
| 363 |
+
if (dts)
|
| 364 |
+
update_piece_threats<false>(pc, s, dts);
|
| 365 |
+
|
| 366 |
+
byTypeBB[ALL_PIECES] ^= s;
|
| 367 |
+
byTypeBB[type_of(pc)] ^= s;
|
| 368 |
+
byColorBB[color_of(pc)] ^= s;
|
| 369 |
+
board[s] = NO_PIECE;
|
| 370 |
+
pieceCount[pc]--;
|
| 371 |
+
pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
inline void Position::move_piece(Square from, Square to, DirtyThreats* const dts) {
|
| 375 |
+
Piece pc = board[from];
|
| 376 |
+
Bitboard fromTo = from | to;
|
| 377 |
+
|
| 378 |
+
if (dts)
|
| 379 |
+
update_piece_threats<false>(pc, from, dts, fromTo);
|
| 380 |
+
|
| 381 |
+
byTypeBB[ALL_PIECES] ^= fromTo;
|
| 382 |
+
byTypeBB[type_of(pc)] ^= fromTo;
|
| 383 |
+
byColorBB[color_of(pc)] ^= fromTo;
|
| 384 |
+
board[from] = NO_PIECE;
|
| 385 |
+
board[to] = pc;
|
| 386 |
+
|
| 387 |
+
if (dts)
|
| 388 |
+
update_piece_threats<true>(pc, to, dts, fromTo);
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
inline void Position::swap_piece(Square s, Piece pc, DirtyThreats* const dts) {
|
| 392 |
+
Piece old = board[s];
|
| 393 |
+
|
| 394 |
+
remove_piece(s);
|
| 395 |
+
|
| 396 |
+
if (dts)
|
| 397 |
+
update_piece_threats<false, false>(old, s, dts);
|
| 398 |
+
|
| 399 |
+
put_piece(pc, s);
|
| 400 |
+
|
| 401 |
+
if (dts)
|
| 402 |
+
update_piece_threats<true, false>(pc, s, dts);
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
inline void Position::do_move(Move m, StateInfo& newSt, const TranspositionTable* tt = nullptr) {
|
| 406 |
+
new (&scratch_dts) DirtyThreats;
|
| 407 |
+
do_move(m, newSt, gives_check(m), scratch_dp, scratch_dts, tt, nullptr);
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
inline StateInfo* Position::state() const { return st; }
|
| 411 |
+
|
| 412 |
+
} // namespace Stockfish
|
| 413 |
+
|
| 414 |
+
#endif // #ifndef POSITION_H_INCLUDED
|
src/score.cpp
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "score.h"
|
| 20 |
+
|
| 21 |
+
#include <cassert>
|
| 22 |
+
#include <cmath>
|
| 23 |
+
#include <cstdlib>
|
| 24 |
+
|
| 25 |
+
#include "uci.h"
|
| 26 |
+
|
| 27 |
+
namespace Stockfish {
|
| 28 |
+
|
| 29 |
+
Score::Score(Value v, const Position& pos) {
|
| 30 |
+
assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);
|
| 31 |
+
|
| 32 |
+
if (!is_decisive(v))
|
| 33 |
+
{
|
| 34 |
+
score = InternalUnits{UCIEngine::to_cp(v, pos)};
|
| 35 |
+
}
|
| 36 |
+
else if (std::abs(v) <= VALUE_TB)
|
| 37 |
+
{
|
| 38 |
+
auto distance = VALUE_TB - std::abs(v);
|
| 39 |
+
score = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false};
|
| 40 |
+
}
|
| 41 |
+
else
|
| 42 |
+
{
|
| 43 |
+
auto distance = VALUE_MATE - std::abs(v);
|
| 44 |
+
score = (v > 0) ? Mate{distance} : Mate{-distance};
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
}
|
src/score.h
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef SCORE_H_INCLUDED
|
| 20 |
+
#define SCORE_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <variant>
|
| 23 |
+
#include <utility>
|
| 24 |
+
|
| 25 |
+
#include "types.h"
|
| 26 |
+
|
| 27 |
+
namespace Stockfish {
|
| 28 |
+
|
| 29 |
+
class Position;
|
| 30 |
+
|
| 31 |
+
class Score {
|
| 32 |
+
public:
|
| 33 |
+
struct Mate {
|
| 34 |
+
int plies;
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
struct Tablebase {
|
| 38 |
+
int plies;
|
| 39 |
+
bool win;
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
struct InternalUnits {
|
| 43 |
+
int value;
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
Score() = default;
|
| 47 |
+
Score(Value v, const Position& pos);
|
| 48 |
+
|
| 49 |
+
template<typename T>
|
| 50 |
+
bool is() const {
|
| 51 |
+
return std::holds_alternative<T>(score);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
template<typename T>
|
| 55 |
+
T get() const {
|
| 56 |
+
return std::get<T>(score);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
template<typename F>
|
| 60 |
+
decltype(auto) visit(F&& f) const {
|
| 61 |
+
return std::visit(std::forward<F>(f), score);
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
private:
|
| 65 |
+
std::variant<Mate, Tablebase, InternalUnits> score;
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
#endif // #ifndef SCORE_H_INCLUDED
|
src/search.cpp
ADDED
|
@@ -0,0 +1,2217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "search.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <array>
|
| 23 |
+
#include <atomic>
|
| 24 |
+
#include <cassert>
|
| 25 |
+
#include <chrono>
|
| 26 |
+
#include <cmath>
|
| 27 |
+
#include <cstdint>
|
| 28 |
+
#include <cstdlib>
|
| 29 |
+
#include <initializer_list>
|
| 30 |
+
#include <iostream>
|
| 31 |
+
#include <list>
|
| 32 |
+
#include <ratio>
|
| 33 |
+
#include <string>
|
| 34 |
+
#include <utility>
|
| 35 |
+
|
| 36 |
+
#include "bitboard.h"
|
| 37 |
+
#include "evaluate.h"
|
| 38 |
+
#include "history.h"
|
| 39 |
+
#include "misc.h"
|
| 40 |
+
#include "movegen.h"
|
| 41 |
+
#include "movepick.h"
|
| 42 |
+
#include "nnue/network.h"
|
| 43 |
+
#include "nnue/nnue_accumulator.h"
|
| 44 |
+
#include "position.h"
|
| 45 |
+
#include "syzygy/tbprobe.h"
|
| 46 |
+
#include "thread.h"
|
| 47 |
+
#include "timeman.h"
|
| 48 |
+
#include "tt.h"
|
| 49 |
+
#include "types.h"
|
| 50 |
+
#include "uci.h"
|
| 51 |
+
#include "ucioption.h"
|
| 52 |
+
|
| 53 |
+
namespace Stockfish {
|
| 54 |
+
|
| 55 |
+
namespace TB = Tablebases;
|
| 56 |
+
|
| 57 |
+
void syzygy_extend_pv(const OptionsMap& options,
|
| 58 |
+
const Search::LimitsType& limits,
|
| 59 |
+
Stockfish::Position& pos,
|
| 60 |
+
Stockfish::Search::RootMove& rootMove,
|
| 61 |
+
Value& v);
|
| 62 |
+
|
| 63 |
+
using namespace Search;
|
| 64 |
+
|
| 65 |
+
namespace {
|
| 66 |
+
|
| 67 |
+
constexpr int SEARCHEDLIST_CAPACITY = 32;
|
| 68 |
+
using SearchedList = ValueList<Move, SEARCHEDLIST_CAPACITY>;
|
| 69 |
+
|
| 70 |
+
// (*Scalers):
|
| 71 |
+
// The values with Scaler asterisks have proven non-linear scaling.
|
| 72 |
+
// They are optimized to time controls of 180 + 1.8 and longer,
|
| 73 |
+
// so changing them or adding conditions that are similar requires
|
| 74 |
+
// tests at these types of time controls.
|
| 75 |
+
|
| 76 |
+
// (*Scaler) All tuned parameters at time controls shorter than
|
| 77 |
+
// optimized for require verifications at longer time controls
|
| 78 |
+
|
| 79 |
+
int correction_value(const Worker& w, const Position& pos, const Stack* const ss) {
|
| 80 |
+
const Color us = pos.side_to_move();
|
| 81 |
+
const auto m = (ss - 1)->currentMove;
|
| 82 |
+
const auto& shared = w.sharedHistory;
|
| 83 |
+
const int pcv = shared.pawn_correction_entry(pos).at(us).pawn;
|
| 84 |
+
const int micv = shared.minor_piece_correction_entry(pos).at(us).minor;
|
| 85 |
+
const int wnpcv = shared.nonpawn_correction_entry<WHITE>(pos).at(us).nonPawnWhite;
|
| 86 |
+
const int bnpcv = shared.nonpawn_correction_entry<BLACK>(pos).at(us).nonPawnBlack;
|
| 87 |
+
const int cntcv =
|
| 88 |
+
m.is_ok() ? (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()]
|
| 89 |
+
+ (*(ss - 4)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()]
|
| 90 |
+
: 8;
|
| 91 |
+
|
| 92 |
+
return 11433 * pcv + 8823 * micv + 12749 * (wnpcv + bnpcv) + 8022 * cntcv;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// Add correctionHistory value to raw staticEval and guarantee evaluation
|
| 96 |
+
// does not hit the tablebase range.
|
| 97 |
+
Value to_corrected_static_eval(const Value v, const int cv) {
|
| 98 |
+
return std::clamp(v + cv / 131072, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
void update_correction_history(const Position& pos,
|
| 102 |
+
Stack* const ss,
|
| 103 |
+
Search::Worker& workerThread,
|
| 104 |
+
const int bonus) {
|
| 105 |
+
const Move m = (ss - 1)->currentMove;
|
| 106 |
+
const Color us = pos.side_to_move();
|
| 107 |
+
|
| 108 |
+
constexpr int nonPawnWeight = 181;
|
| 109 |
+
auto& shared = workerThread.sharedHistory;
|
| 110 |
+
|
| 111 |
+
shared.pawn_correction_entry(pos).at(us).pawn << bonus;
|
| 112 |
+
shared.minor_piece_correction_entry(pos).at(us).minor << bonus * 155 / 128;
|
| 113 |
+
shared.nonpawn_correction_entry<WHITE>(pos).at(us).nonPawnWhite << bonus * nonPawnWeight / 128;
|
| 114 |
+
shared.nonpawn_correction_entry<BLACK>(pos).at(us).nonPawnBlack << bonus * nonPawnWeight / 128;
|
| 115 |
+
|
| 116 |
+
// Branchless: use mask to zero bonus when move is not ok
|
| 117 |
+
const int mask = int(m.is_ok());
|
| 118 |
+
const Square to = m.to_sq_unchecked();
|
| 119 |
+
const Piece pc = pos.piece_on(to);
|
| 120 |
+
const int bonus2 = (bonus * 129 / 128) * mask;
|
| 121 |
+
const int bonus4 = (bonus * 61 / 128) * mask;
|
| 122 |
+
(*(ss - 2)->continuationCorrectionHistory)[pc][to] << bonus2;
|
| 123 |
+
(*(ss - 4)->continuationCorrectionHistory)[pc][to] << bonus4;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
// Add a small random component to draw evaluations to avoid 3-fold blindness
|
| 127 |
+
Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); }
|
| 128 |
+
Value value_to_tt(Value v, int ply);
|
| 129 |
+
Value value_from_tt(Value v, int ply, int r50c);
|
| 130 |
+
void update_pv(Move* pv, Move move, const Move* childPv);
|
| 131 |
+
void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus);
|
| 132 |
+
void update_quiet_histories(
|
| 133 |
+
const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus);
|
| 134 |
+
void update_all_stats(const Position& pos,
|
| 135 |
+
Stack* ss,
|
| 136 |
+
Search::Worker& workerThread,
|
| 137 |
+
Move bestMove,
|
| 138 |
+
Square prevSq,
|
| 139 |
+
SearchedList& quietsSearched,
|
| 140 |
+
SearchedList& capturesSearched,
|
| 141 |
+
Depth depth,
|
| 142 |
+
Move ttMove);
|
| 143 |
+
|
| 144 |
+
bool is_shuffling(Move move, Stack* const ss, const Position& pos) {
|
| 145 |
+
if (pos.capture_stage(move) || pos.rule50_count() < 11)
|
| 146 |
+
return false;
|
| 147 |
+
if (pos.state()->pliesFromNull <= 6 || ss->ply < 19)
|
| 148 |
+
return false;
|
| 149 |
+
return move.from_sq() == (ss - 2)->currentMove.to_sq()
|
| 150 |
+
&& (ss - 2)->currentMove.from_sq() == (ss - 4)->currentMove.to_sq();
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
} // namespace
|
| 154 |
+
|
| 155 |
+
Search::Worker::Worker(SharedState& sharedState,
|
| 156 |
+
std::unique_ptr<ISearchManager> sm,
|
| 157 |
+
size_t threadId,
|
| 158 |
+
size_t numaThreadId,
|
| 159 |
+
size_t numaTotalThreads,
|
| 160 |
+
NumaReplicatedAccessToken token) :
|
| 161 |
+
// Unpack the SharedState struct into member variables
|
| 162 |
+
sharedHistory(sharedState.sharedHistories.at(token.get_numa_index())),
|
| 163 |
+
threadIdx(threadId),
|
| 164 |
+
numaThreadIdx(numaThreadId),
|
| 165 |
+
numaTotal(numaTotalThreads),
|
| 166 |
+
numaAccessToken(token),
|
| 167 |
+
manager(std::move(sm)),
|
| 168 |
+
options(sharedState.options),
|
| 169 |
+
threads(sharedState.threads),
|
| 170 |
+
tt(sharedState.tt),
|
| 171 |
+
networks(sharedState.networks),
|
| 172 |
+
refreshTable(networks[token]) {
|
| 173 |
+
clear();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
void Search::Worker::ensure_network_replicated() {
|
| 177 |
+
// Access once to force lazy initialization.
|
| 178 |
+
// We do this because we want to avoid initialization during search.
|
| 179 |
+
(void) (networks[numaAccessToken]);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
void Search::Worker::start_searching() {
|
| 183 |
+
|
| 184 |
+
accumulatorStack.reset();
|
| 185 |
+
|
| 186 |
+
// Non-main threads go directly to iterative_deepening()
|
| 187 |
+
if (!is_mainthread())
|
| 188 |
+
{
|
| 189 |
+
iterative_deepening();
|
| 190 |
+
return;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options,
|
| 194 |
+
main_manager()->originalTimeAdjust);
|
| 195 |
+
tt.new_search();
|
| 196 |
+
|
| 197 |
+
if (rootMoves.empty())
|
| 198 |
+
{
|
| 199 |
+
rootMoves.emplace_back(Move::none());
|
| 200 |
+
main_manager()->updates.onUpdateNoMoves(
|
| 201 |
+
{0, {rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW, rootPos}});
|
| 202 |
+
}
|
| 203 |
+
else
|
| 204 |
+
{
|
| 205 |
+
threads.start_searching(); // start non-main threads
|
| 206 |
+
iterative_deepening(); // main thread start searching
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
// When we reach the maximum depth, we can arrive here without a raise of
|
| 210 |
+
// threads.stop. However, if we are pondering or in an infinite search,
|
| 211 |
+
// the UCI protocol states that we shouldn't print the best move before the
|
| 212 |
+
// GUI sends a "stop" or "ponderhit" command. We therefore simply wait here
|
| 213 |
+
// until the GUI sends one of those commands.
|
| 214 |
+
while (!threads.stop && (main_manager()->ponder || limits.infinite))
|
| 215 |
+
{} // Busy wait for a stop or a ponder reset
|
| 216 |
+
|
| 217 |
+
// Stop the threads if not already stopped (also raise the stop if
|
| 218 |
+
// "ponderhit" just reset threads.ponder)
|
| 219 |
+
threads.stop = true;
|
| 220 |
+
|
| 221 |
+
// Wait until all threads have finished
|
| 222 |
+
threads.wait_for_search_finished();
|
| 223 |
+
|
| 224 |
+
// When playing in 'nodes as time' mode, subtract the searched nodes from
|
| 225 |
+
// the available ones before exiting.
|
| 226 |
+
if (limits.npmsec)
|
| 227 |
+
main_manager()->tm.advance_nodes_time(threads.nodes_searched()
|
| 228 |
+
- limits.inc[rootPos.side_to_move()]);
|
| 229 |
+
|
| 230 |
+
Worker* bestThread = this;
|
| 231 |
+
Skill skill =
|
| 232 |
+
Skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0);
|
| 233 |
+
|
| 234 |
+
if (int(options["MultiPV"]) == 1 && !limits.depth && !limits.mate && !skill.enabled()
|
| 235 |
+
&& rootMoves[0].pv[0] != Move::none())
|
| 236 |
+
bestThread = threads.get_best_thread()->worker.get();
|
| 237 |
+
|
| 238 |
+
main_manager()->bestPreviousScore = bestThread->rootMoves[0].score;
|
| 239 |
+
main_manager()->bestPreviousAverageScore = bestThread->rootMoves[0].averageScore;
|
| 240 |
+
|
| 241 |
+
// Send again PV info if we have a new best thread
|
| 242 |
+
if (bestThread != this)
|
| 243 |
+
main_manager()->pv(*bestThread, threads, tt, bestThread->completedDepth);
|
| 244 |
+
|
| 245 |
+
std::string ponder;
|
| 246 |
+
|
| 247 |
+
if (bestThread->rootMoves[0].pv.size() > 1
|
| 248 |
+
|| bestThread->rootMoves[0].extract_ponder_from_tt(tt, rootPos))
|
| 249 |
+
ponder = UCIEngine::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960());
|
| 250 |
+
|
| 251 |
+
auto bestmove = UCIEngine::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960());
|
| 252 |
+
main_manager()->updates.onBestmove(bestmove, ponder);
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
// Main iterative deepening loop. It calls search()
|
| 256 |
+
// repeatedly with increasing depth until the allocated thinking time has been
|
| 257 |
+
// consumed, the user stops the search, or the maximum search depth is reached.
|
| 258 |
+
void Search::Worker::iterative_deepening() {
|
| 259 |
+
|
| 260 |
+
SearchManager* mainThread = (is_mainthread() ? main_manager() : nullptr);
|
| 261 |
+
|
| 262 |
+
Move pv[MAX_PLY + 1];
|
| 263 |
+
|
| 264 |
+
Depth lastBestMoveDepth = 0;
|
| 265 |
+
Value lastBestScore = -VALUE_INFINITE;
|
| 266 |
+
auto lastBestPV = std::vector{Move::none()};
|
| 267 |
+
|
| 268 |
+
Value alpha, beta;
|
| 269 |
+
Value bestValue = -VALUE_INFINITE;
|
| 270 |
+
Color us = rootPos.side_to_move();
|
| 271 |
+
double timeReduction = 1, totBestMoveChanges = 0;
|
| 272 |
+
int delta, iterIdx = 0;
|
| 273 |
+
|
| 274 |
+
// Allocate stack with extra size to allow access from (ss - 7) to (ss + 2):
|
| 275 |
+
// (ss - 7) is needed for update_continuation_histories(ss - 1) which accesses (ss - 6),
|
| 276 |
+
// (ss + 2) is needed for initialization of cutOffCnt.
|
| 277 |
+
Stack stack[MAX_PLY + 10] = {};
|
| 278 |
+
Stack* ss = stack + 7;
|
| 279 |
+
|
| 280 |
+
for (int i = 7; i > 0; --i)
|
| 281 |
+
{
|
| 282 |
+
(ss - i)->continuationHistory =
|
| 283 |
+
&continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
|
| 284 |
+
(ss - i)->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0];
|
| 285 |
+
(ss - i)->staticEval = VALUE_NONE;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
for (int i = 0; i <= MAX_PLY + 2; ++i)
|
| 289 |
+
(ss + i)->ply = i;
|
| 290 |
+
|
| 291 |
+
ss->pv = pv;
|
| 292 |
+
|
| 293 |
+
if (mainThread)
|
| 294 |
+
{
|
| 295 |
+
if (mainThread->bestPreviousScore == VALUE_INFINITE)
|
| 296 |
+
mainThread->iterValue.fill(VALUE_ZERO);
|
| 297 |
+
else
|
| 298 |
+
mainThread->iterValue.fill(mainThread->bestPreviousScore);
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
size_t multiPV = size_t(options["MultiPV"]);
|
| 302 |
+
Skill skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0);
|
| 303 |
+
|
| 304 |
+
// When playing with strength handicap enable MultiPV search that we will
|
| 305 |
+
// use behind-the-scenes to retrieve a set of possible moves.
|
| 306 |
+
if (skill.enabled())
|
| 307 |
+
multiPV = std::max(multiPV, size_t(4));
|
| 308 |
+
|
| 309 |
+
multiPV = std::min(multiPV, rootMoves.size());
|
| 310 |
+
|
| 311 |
+
int searchAgainCounter = 0;
|
| 312 |
+
|
| 313 |
+
lowPlyHistory.fill(100);
|
| 314 |
+
|
| 315 |
+
for (Color c : {WHITE, BLACK})
|
| 316 |
+
for (int i = 0; i < UINT_16_HISTORY_SIZE; i++)
|
| 317 |
+
mainHistory[c][i] = mainHistory[c][i] * 778 / 1024;
|
| 318 |
+
|
| 319 |
+
// Iterative deepening loop until requested to stop or the target depth is reached
|
| 320 |
+
while (++rootDepth < MAX_PLY && !threads.stop
|
| 321 |
+
&& !(limits.depth && mainThread && rootDepth > limits.depth))
|
| 322 |
+
{
|
| 323 |
+
// Age out PV variability metric
|
| 324 |
+
if (mainThread)
|
| 325 |
+
totBestMoveChanges /= 2;
|
| 326 |
+
|
| 327 |
+
// Save the last iteration's scores before the first PV line is searched and
|
| 328 |
+
// all the move scores except the (new) PV are set to -VALUE_INFINITE.
|
| 329 |
+
for (RootMove& rm : rootMoves)
|
| 330 |
+
rm.previousScore = rm.score;
|
| 331 |
+
|
| 332 |
+
size_t pvFirst = 0;
|
| 333 |
+
pvLast = 0;
|
| 334 |
+
|
| 335 |
+
if (!threads.increaseDepth)
|
| 336 |
+
searchAgainCounter++;
|
| 337 |
+
|
| 338 |
+
// MultiPV loop. We perform a full root search for each PV line
|
| 339 |
+
for (pvIdx = 0; pvIdx < multiPV; ++pvIdx)
|
| 340 |
+
{
|
| 341 |
+
if (pvIdx == pvLast)
|
| 342 |
+
{
|
| 343 |
+
pvFirst = pvLast;
|
| 344 |
+
for (pvLast++; pvLast < rootMoves.size(); pvLast++)
|
| 345 |
+
if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank)
|
| 346 |
+
break;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
// Reset UCI info selDepth for each depth and each PV line
|
| 350 |
+
selDepth = 0;
|
| 351 |
+
|
| 352 |
+
// Reset aspiration window starting size
|
| 353 |
+
delta = 5 + threadIdx % 8 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 9968;
|
| 354 |
+
Value avg = rootMoves[pvIdx].averageScore;
|
| 355 |
+
alpha = std::max(avg - delta, -VALUE_INFINITE);
|
| 356 |
+
beta = std::min(avg + delta, VALUE_INFINITE);
|
| 357 |
+
|
| 358 |
+
// Adjust optimism based on root move's averageScore
|
| 359 |
+
optimism[us] = 142 * avg / (std::abs(avg) + 86);
|
| 360 |
+
optimism[~us] = -optimism[us];
|
| 361 |
+
|
| 362 |
+
// Start with a small aspiration window and, in the case of a fail
|
| 363 |
+
// high/low, re-search with a bigger window until we don't fail
|
| 364 |
+
// high/low anymore.
|
| 365 |
+
int failedHighCnt = 0;
|
| 366 |
+
while (true)
|
| 367 |
+
{
|
| 368 |
+
// Adjust the effective depth searched, but ensure at least one
|
| 369 |
+
// effective increment for every four searchAgain steps (see issue #2717).
|
| 370 |
+
Depth adjustedDepth =
|
| 371 |
+
std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4);
|
| 372 |
+
rootDelta = beta - alpha;
|
| 373 |
+
bestValue = search<Root>(rootPos, ss, alpha, beta, adjustedDepth, false);
|
| 374 |
+
|
| 375 |
+
// Bring the best move to the front. It is critical that sorting
|
| 376 |
+
// is done with a stable algorithm because all the values but the
|
| 377 |
+
// first and eventually the new best one is set to -VALUE_INFINITE
|
| 378 |
+
// and we want to keep the same order for all the moves except the
|
| 379 |
+
// new PV that goes to the front. Note that in the case of MultiPV
|
| 380 |
+
// search the already searched PV lines are preserved.
|
| 381 |
+
std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast);
|
| 382 |
+
|
| 383 |
+
// If search has been stopped, we break immediately. Sorting is
|
| 384 |
+
// safe because RootMoves is still valid, although it refers to
|
| 385 |
+
// the previous iteration.
|
| 386 |
+
if (threads.stop)
|
| 387 |
+
break;
|
| 388 |
+
|
| 389 |
+
// When failing high/low give some update before a re-search. To avoid
|
| 390 |
+
// excessive output that could hang GUIs like Fritz 19, only start
|
| 391 |
+
// at nodes > 10M (rather than depth N, which can be reached quickly)
|
| 392 |
+
if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta)
|
| 393 |
+
&& nodes > 10000000)
|
| 394 |
+
main_manager()->pv(*this, threads, tt, rootDepth);
|
| 395 |
+
|
| 396 |
+
// In case of failing low/high increase aspiration window and re-search,
|
| 397 |
+
// otherwise exit the loop.
|
| 398 |
+
if (bestValue <= alpha)
|
| 399 |
+
{
|
| 400 |
+
beta = alpha;
|
| 401 |
+
alpha = std::max(bestValue - delta, -VALUE_INFINITE);
|
| 402 |
+
|
| 403 |
+
failedHighCnt = 0;
|
| 404 |
+
if (mainThread)
|
| 405 |
+
mainThread->stopOnPonderhit = false;
|
| 406 |
+
}
|
| 407 |
+
else if (bestValue >= beta)
|
| 408 |
+
{
|
| 409 |
+
alpha = std::max(beta - delta, alpha);
|
| 410 |
+
beta = std::min(bestValue + delta, VALUE_INFINITE);
|
| 411 |
+
++failedHighCnt;
|
| 412 |
+
}
|
| 413 |
+
else
|
| 414 |
+
break;
|
| 415 |
+
|
| 416 |
+
delta += delta / 3;
|
| 417 |
+
|
| 418 |
+
assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE);
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
// Sort the PV lines searched so far and update the GUI
|
| 422 |
+
std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1);
|
| 423 |
+
|
| 424 |
+
if (mainThread
|
| 425 |
+
&& (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000)
|
| 426 |
+
// A thread that aborted search can have a mated-in/TB-loss score and
|
| 427 |
+
// PV that cannot be trusted, i.e. it can be delayed or refuted if we
|
| 428 |
+
// would have had time to fully search other root-moves. Thus here we
|
| 429 |
+
// suppress any exact mated-in/TB loss output and, if we do, below pick
|
| 430 |
+
// the score/PV from the previously completed iteration with the most
|
| 431 |
+
// recent bestmove change.
|
| 432 |
+
&& !(threads.stop && is_loss(rootMoves[0].uciScore)
|
| 433 |
+
&& rootMoves[0].score == rootMoves[0].uciScore))
|
| 434 |
+
main_manager()->pv(*this, threads, tt, rootDepth);
|
| 435 |
+
|
| 436 |
+
if (threads.stop)
|
| 437 |
+
break;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
if (!threads.stop)
|
| 441 |
+
completedDepth = rootDepth;
|
| 442 |
+
|
| 443 |
+
// We make sure not to pick an unproven mated-in score,
|
| 444 |
+
// in case this thread prematurely stopped search (aborted-search).
|
| 445 |
+
if (completedDepth != rootDepth && rootMoves[0].score != -VALUE_INFINITE
|
| 446 |
+
&& is_loss(rootMoves[0].score))
|
| 447 |
+
{
|
| 448 |
+
// Bring the last best move to the front for best thread selection.
|
| 449 |
+
Utility::move_to_front(rootMoves, [&lastBestPV = std::as_const(lastBestPV)](
|
| 450 |
+
const auto& rm) { return rm == lastBestPV[0]; });
|
| 451 |
+
rootMoves[0].pv = lastBestPV;
|
| 452 |
+
rootMoves[0].score = rootMoves[0].uciScore = lastBestScore;
|
| 453 |
+
}
|
| 454 |
+
else if (rootMoves[0].pv[0] != lastBestPV[0])
|
| 455 |
+
{
|
| 456 |
+
lastBestPV = rootMoves[0].pv;
|
| 457 |
+
lastBestScore = rootMoves[0].score;
|
| 458 |
+
lastBestMoveDepth = rootDepth;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
if (!mainThread)
|
| 462 |
+
continue;
|
| 463 |
+
|
| 464 |
+
// Have we found a "mate in x"?
|
| 465 |
+
if (limits.mate && rootMoves[0].score == rootMoves[0].uciScore
|
| 466 |
+
&& ((rootMoves[0].score >= VALUE_MATE_IN_MAX_PLY
|
| 467 |
+
&& VALUE_MATE - rootMoves[0].score <= 2 * limits.mate)
|
| 468 |
+
|| (rootMoves[0].score != -VALUE_INFINITE
|
| 469 |
+
&& rootMoves[0].score <= VALUE_MATED_IN_MAX_PLY
|
| 470 |
+
&& VALUE_MATE + rootMoves[0].score <= 2 * limits.mate)))
|
| 471 |
+
threads.stop = true;
|
| 472 |
+
|
| 473 |
+
// If the skill level is enabled and time is up, pick a sub-optimal best move
|
| 474 |
+
if (skill.enabled() && skill.time_to_pick(rootDepth))
|
| 475 |
+
skill.pick_best(rootMoves, multiPV);
|
| 476 |
+
|
| 477 |
+
// Use part of the gained time from a previous stable move for the current move
|
| 478 |
+
for (auto&& th : threads)
|
| 479 |
+
{
|
| 480 |
+
totBestMoveChanges += th->worker->bestMoveChanges;
|
| 481 |
+
th->worker->bestMoveChanges = 0;
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
// Do we have time for the next iteration? Can we stop searching now?
|
| 485 |
+
if (limits.use_time_management() && !threads.stop && !mainThread->stopOnPonderhit)
|
| 486 |
+
{
|
| 487 |
+
uint64_t nodesEffort =
|
| 488 |
+
rootMoves[0].effort * 100000 / std::max(size_t(1), size_t(nodes));
|
| 489 |
+
|
| 490 |
+
double fallingEval = (11.85 + 2.24 * (mainThread->bestPreviousAverageScore - bestValue)
|
| 491 |
+
+ 0.93 * (mainThread->iterValue[iterIdx] - bestValue))
|
| 492 |
+
/ 100.0;
|
| 493 |
+
fallingEval = std::clamp(fallingEval, 0.57, 1.70);
|
| 494 |
+
|
| 495 |
+
// If the bestMove is stable over several iterations, reduce time accordingly
|
| 496 |
+
double k = 0.51;
|
| 497 |
+
double center = lastBestMoveDepth + 12.15;
|
| 498 |
+
|
| 499 |
+
timeReduction = 0.66 + 0.85 / (0.98 + std::exp(-k * (completedDepth - center)));
|
| 500 |
+
|
| 501 |
+
double reduction = (1.43 + mainThread->previousTimeReduction) / (2.28 * timeReduction);
|
| 502 |
+
|
| 503 |
+
double bestMoveInstability = 1.02 + 2.14 * totBestMoveChanges / threads.size();
|
| 504 |
+
|
| 505 |
+
double highBestMoveEffort = nodesEffort >= 93340 ? 0.76 : 1.0;
|
| 506 |
+
|
| 507 |
+
double totalTime = mainThread->tm.optimum() * fallingEval * reduction
|
| 508 |
+
* bestMoveInstability * highBestMoveEffort;
|
| 509 |
+
|
| 510 |
+
// Cap used time in case of a single legal move for a better viewer experience
|
| 511 |
+
if (rootMoves.size() == 1)
|
| 512 |
+
totalTime = std::min(502.0, totalTime);
|
| 513 |
+
|
| 514 |
+
auto elapsedTime = elapsed();
|
| 515 |
+
|
| 516 |
+
// Stop the search if we have exceeded the totalTime or maximum
|
| 517 |
+
if (elapsedTime > std::min(totalTime, double(mainThread->tm.maximum())))
|
| 518 |
+
{
|
| 519 |
+
// If we are allowed to ponder do not stop the search now but
|
| 520 |
+
// keep pondering until the GUI sends "ponderhit" or "stop".
|
| 521 |
+
if (mainThread->ponder)
|
| 522 |
+
mainThread->stopOnPonderhit = true;
|
| 523 |
+
else
|
| 524 |
+
threads.stop = true;
|
| 525 |
+
}
|
| 526 |
+
else
|
| 527 |
+
threads.increaseDepth = mainThread->ponder || elapsedTime <= totalTime * 0.50;
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
mainThread->iterValue[iterIdx] = bestValue;
|
| 531 |
+
iterIdx = (iterIdx + 1) & 3;
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
if (!mainThread)
|
| 535 |
+
return;
|
| 536 |
+
|
| 537 |
+
mainThread->previousTimeReduction = timeReduction;
|
| 538 |
+
|
| 539 |
+
// If the skill level is enabled, swap the best PV line with the sub-optimal one
|
| 540 |
+
if (skill.enabled())
|
| 541 |
+
std::swap(rootMoves[0],
|
| 542 |
+
*std::find(rootMoves.begin(), rootMoves.end(),
|
| 543 |
+
skill.best ? skill.best : skill.pick_best(rootMoves, multiPV)));
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss) {
|
| 548 |
+
do_move(pos, move, st, pos.gives_check(move), ss);
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
void Search::Worker::do_move(
|
| 552 |
+
Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss) {
|
| 553 |
+
bool capture = pos.capture_stage(move);
|
| 554 |
+
// Preferable over fetch_add to avoid locking instructions
|
| 555 |
+
nodes.store(nodes.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
|
| 556 |
+
|
| 557 |
+
auto [dirtyPiece, dirtyThreats] = accumulatorStack.push();
|
| 558 |
+
pos.do_move(move, st, givesCheck, dirtyPiece, dirtyThreats, &tt, &sharedHistory);
|
| 559 |
+
|
| 560 |
+
if (ss != nullptr)
|
| 561 |
+
{
|
| 562 |
+
ss->currentMove = move;
|
| 563 |
+
ss->continuationHistory =
|
| 564 |
+
&continuationHistory[ss->inCheck][capture][dirtyPiece.pc][move.to_sq()];
|
| 565 |
+
ss->continuationCorrectionHistory =
|
| 566 |
+
&continuationCorrectionHistory[dirtyPiece.pc][move.to_sq()];
|
| 567 |
+
}
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
void Search::Worker::do_null_move(Position& pos, StateInfo& st, Stack* const ss) {
|
| 571 |
+
pos.do_null_move(st);
|
| 572 |
+
ss->currentMove = Move::null();
|
| 573 |
+
ss->continuationHistory = &continuationHistory[0][0][NO_PIECE][0];
|
| 574 |
+
ss->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0];
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
void Search::Worker::undo_move(Position& pos, const Move move) {
|
| 578 |
+
pos.undo_move(move);
|
| 579 |
+
accumulatorStack.pop();
|
| 580 |
+
}
|
| 581 |
+
|
| 582 |
+
void Search::Worker::undo_null_move(Position& pos) { pos.undo_null_move(); }
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
// Reset histories, usually before a new game
|
| 586 |
+
void Search::Worker::clear() {
|
| 587 |
+
mainHistory.fill(0);
|
| 588 |
+
captureHistory.fill(-689);
|
| 589 |
+
|
| 590 |
+
// Each thread is responsible for clearing their part of shared history
|
| 591 |
+
sharedHistory.correctionHistory.clear_range(0, numaThreadIdx, numaTotal);
|
| 592 |
+
sharedHistory.pawnHistory.clear_range(-1238, numaThreadIdx, numaTotal);
|
| 593 |
+
|
| 594 |
+
ttMoveHistory = 0;
|
| 595 |
+
|
| 596 |
+
for (auto& to : continuationCorrectionHistory)
|
| 597 |
+
for (auto& h : to)
|
| 598 |
+
h.fill(7);
|
| 599 |
+
|
| 600 |
+
for (bool inCheck : {false, true})
|
| 601 |
+
for (StatsType c : {NoCaptures, Captures})
|
| 602 |
+
for (auto& to : continuationHistory[inCheck][c])
|
| 603 |
+
for (auto& h : to)
|
| 604 |
+
h.fill(-541);
|
| 605 |
+
|
| 606 |
+
for (size_t i = 1; i < reductions.size(); ++i)
|
| 607 |
+
reductions[i] = int(2809 / 128.0 * std::log(i));
|
| 608 |
+
|
| 609 |
+
refreshTable.clear(networks[numaAccessToken]);
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
// Main search function for both PV and non-PV nodes
|
| 614 |
+
template<NodeType nodeType>
|
| 615 |
+
Value Search::Worker::search(
|
| 616 |
+
Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) {
|
| 617 |
+
|
| 618 |
+
constexpr bool PvNode = nodeType != NonPV;
|
| 619 |
+
constexpr bool rootNode = nodeType == Root;
|
| 620 |
+
const bool allNode = !(PvNode || cutNode);
|
| 621 |
+
|
| 622 |
+
// Dive into quiescence search when the depth reaches zero
|
| 623 |
+
if (depth <= 0)
|
| 624 |
+
return qsearch<PvNode ? PV : NonPV>(pos, ss, alpha, beta);
|
| 625 |
+
|
| 626 |
+
// Limit the depth if extensions made it too large
|
| 627 |
+
depth = std::min(depth, MAX_PLY - 1);
|
| 628 |
+
|
| 629 |
+
// Check if we have an upcoming move that draws by repetition
|
| 630 |
+
if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
|
| 631 |
+
{
|
| 632 |
+
alpha = value_draw(nodes);
|
| 633 |
+
if (alpha >= beta)
|
| 634 |
+
return alpha;
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE);
|
| 638 |
+
assert(PvNode || (alpha == beta - 1));
|
| 639 |
+
assert(0 < depth && depth < MAX_PLY);
|
| 640 |
+
assert(!(PvNode && cutNode));
|
| 641 |
+
|
| 642 |
+
Move pv[MAX_PLY + 1];
|
| 643 |
+
StateInfo st;
|
| 644 |
+
|
| 645 |
+
Key posKey;
|
| 646 |
+
Move move, excludedMove, bestMove;
|
| 647 |
+
Depth extension, newDepth;
|
| 648 |
+
Value bestValue, value, eval, maxValue, probCutBeta;
|
| 649 |
+
bool givesCheck, improving, priorCapture, opponentWorsening;
|
| 650 |
+
bool capture, ttCapture;
|
| 651 |
+
int priorReduction;
|
| 652 |
+
Piece movedPiece;
|
| 653 |
+
|
| 654 |
+
SearchedList capturesSearched;
|
| 655 |
+
SearchedList quietsSearched;
|
| 656 |
+
|
| 657 |
+
// Step 1. Initialize node
|
| 658 |
+
ss->inCheck = pos.checkers();
|
| 659 |
+
priorCapture = pos.captured_piece();
|
| 660 |
+
Color us = pos.side_to_move();
|
| 661 |
+
ss->moveCount = 0;
|
| 662 |
+
bestValue = -VALUE_INFINITE;
|
| 663 |
+
maxValue = VALUE_INFINITE;
|
| 664 |
+
|
| 665 |
+
// Check for the available remaining time
|
| 666 |
+
if (is_mainthread())
|
| 667 |
+
main_manager()->check_time(*this);
|
| 668 |
+
|
| 669 |
+
// Used to send selDepth info to GUI (selDepth counts from 1, ply from 0)
|
| 670 |
+
if (PvNode && selDepth < ss->ply + 1)
|
| 671 |
+
selDepth = ss->ply + 1;
|
| 672 |
+
|
| 673 |
+
if (!rootNode)
|
| 674 |
+
{
|
| 675 |
+
// Step 2. Check for aborted search and immediate draw
|
| 676 |
+
if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply)
|
| 677 |
+
|| ss->ply >= MAX_PLY)
|
| 678 |
+
return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : value_draw(nodes);
|
| 679 |
+
|
| 680 |
+
// Step 3. Mate distance pruning. Even if we mate at the next move our score
|
| 681 |
+
// would be at best mate_in(ss->ply + 1), but if alpha is already bigger because
|
| 682 |
+
// a shorter mate was found upward in the tree then there is no need to search
|
| 683 |
+
// because we will never beat the current alpha. Same logic but with reversed
|
| 684 |
+
// signs apply also in the opposite condition of being mated instead of giving
|
| 685 |
+
// mate. In this case, return a fail-high score.
|
| 686 |
+
alpha = std::max(mated_in(ss->ply), alpha);
|
| 687 |
+
beta = std::min(mate_in(ss->ply + 1), beta);
|
| 688 |
+
if (alpha >= beta)
|
| 689 |
+
return alpha;
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
assert(0 <= ss->ply && ss->ply < MAX_PLY);
|
| 693 |
+
|
| 694 |
+
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
|
| 695 |
+
bestMove = Move::none();
|
| 696 |
+
priorReduction = (ss - 1)->reduction;
|
| 697 |
+
(ss - 1)->reduction = 0;
|
| 698 |
+
ss->statScore = 0;
|
| 699 |
+
(ss + 2)->cutoffCnt = 0;
|
| 700 |
+
|
| 701 |
+
// Step 4. Transposition table lookup
|
| 702 |
+
excludedMove = ss->excludedMove;
|
| 703 |
+
posKey = pos.key();
|
| 704 |
+
auto [ttHit, ttData, ttWriter] = tt.probe(posKey);
|
| 705 |
+
// Need further processing of the saved data
|
| 706 |
+
ss->ttHit = ttHit;
|
| 707 |
+
ttData.move = rootNode ? rootMoves[pvIdx].pv[0] : ttHit ? ttData.move : Move::none();
|
| 708 |
+
ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE;
|
| 709 |
+
ss->ttPv = excludedMove ? ss->ttPv : PvNode || (ttHit && ttData.is_pv);
|
| 710 |
+
ttCapture = ttData.move && pos.capture_stage(ttData.move);
|
| 711 |
+
|
| 712 |
+
// Step 6. Static evaluation of the position
|
| 713 |
+
Value unadjustedStaticEval = VALUE_NONE;
|
| 714 |
+
const auto correctionValue = correction_value(*this, pos, ss);
|
| 715 |
+
// Skip early pruning when in check
|
| 716 |
+
if (ss->inCheck)
|
| 717 |
+
ss->staticEval = eval = (ss - 2)->staticEval;
|
| 718 |
+
else if (excludedMove)
|
| 719 |
+
unadjustedStaticEval = eval = ss->staticEval;
|
| 720 |
+
else if (ss->ttHit)
|
| 721 |
+
{
|
| 722 |
+
// Never assume anything about values stored in TT
|
| 723 |
+
unadjustedStaticEval = ttData.eval;
|
| 724 |
+
if (!is_valid(unadjustedStaticEval))
|
| 725 |
+
unadjustedStaticEval = evaluate(pos);
|
| 726 |
+
|
| 727 |
+
ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue);
|
| 728 |
+
|
| 729 |
+
// ttValue can be used as a better position evaluation
|
| 730 |
+
if (is_valid(ttData.value)
|
| 731 |
+
&& (ttData.bound & (ttData.value > eval ? BOUND_LOWER : BOUND_UPPER)))
|
| 732 |
+
eval = ttData.value;
|
| 733 |
+
}
|
| 734 |
+
else
|
| 735 |
+
{
|
| 736 |
+
unadjustedStaticEval = evaluate(pos);
|
| 737 |
+
ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue);
|
| 738 |
+
|
| 739 |
+
// Static evaluation is saved as it was before adjustment by correction history
|
| 740 |
+
ttWriter.write(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(),
|
| 741 |
+
unadjustedStaticEval, tt.generation());
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
// Set up the improving flag, which is true if current static evaluation is
|
| 745 |
+
// bigger than the previous static evaluation at our turn (if we were in
|
| 746 |
+
// check at our previous move we go back until we weren't in check) and is
|
| 747 |
+
// false otherwise. The improving flag is used in various pruning heuristics.
|
| 748 |
+
// Similarly, opponentWorsening is true if our static evaluation is better
|
| 749 |
+
// for us than at the last ply.
|
| 750 |
+
improving = ss->staticEval > (ss - 2)->staticEval;
|
| 751 |
+
opponentWorsening = ss->staticEval > -(ss - 1)->staticEval;
|
| 752 |
+
|
| 753 |
+
// Hindsight adjustment of reductions based on static evaluation difference.
|
| 754 |
+
if (priorReduction >= 3 && !opponentWorsening)
|
| 755 |
+
depth++;
|
| 756 |
+
if (priorReduction >= 2 && depth >= 2 && ss->staticEval + (ss - 1)->staticEval > 188)
|
| 757 |
+
depth--;
|
| 758 |
+
|
| 759 |
+
// At non-PV nodes we check for an early TT cutoff
|
| 760 |
+
if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta)
|
| 761 |
+
&& is_valid(ttData.value) // Can happen when !ttHit or when access race in probe()
|
| 762 |
+
&& (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))
|
| 763 |
+
&& (cutNode == (ttData.value >= beta) || depth > 5))
|
| 764 |
+
{
|
| 765 |
+
// If ttMove is quiet, update move sorting heuristics on TT hit
|
| 766 |
+
if (ttData.move && ttData.value >= beta)
|
| 767 |
+
{
|
| 768 |
+
// Bonus for a quiet ttMove that fails high
|
| 769 |
+
if (!ttCapture)
|
| 770 |
+
update_quiet_histories(pos, ss, *this, ttData.move,
|
| 771 |
+
std::min(121 * depth - 75, 932));
|
| 772 |
+
|
| 773 |
+
// Extra penalty for early quiet moves of the previous ply
|
| 774 |
+
if (prevSq != SQ_NONE && (ss - 1)->moveCount < 4 && !priorCapture)
|
| 775 |
+
update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -2104);
|
| 776 |
+
}
|
| 777 |
+
|
| 778 |
+
// Partial workaround for the graph history interaction problem
|
| 779 |
+
// For high rule50 counts don't produce transposition table cutoffs.
|
| 780 |
+
if (pos.rule50_count() < 96)
|
| 781 |
+
{
|
| 782 |
+
if (depth >= 7 && ttData.move && pos.pseudo_legal(ttData.move) && pos.legal(ttData.move)
|
| 783 |
+
&& !is_decisive(ttData.value))
|
| 784 |
+
{
|
| 785 |
+
pos.do_move(ttData.move, st);
|
| 786 |
+
Key nextPosKey = pos.key();
|
| 787 |
+
auto [ttHitNext, ttDataNext, ttWriterNext] = tt.probe(nextPosKey);
|
| 788 |
+
pos.undo_move(ttData.move);
|
| 789 |
+
|
| 790 |
+
// Check that the ttValue after the tt move would also trigger a cutoff
|
| 791 |
+
if (!is_valid(ttDataNext.value))
|
| 792 |
+
return ttData.value;
|
| 793 |
+
|
| 794 |
+
if ((ttData.value >= beta) == (-ttDataNext.value >= beta))
|
| 795 |
+
return ttData.value;
|
| 796 |
+
}
|
| 797 |
+
else
|
| 798 |
+
return ttData.value;
|
| 799 |
+
}
|
| 800 |
+
}
|
| 801 |
+
|
| 802 |
+
// Step 5. Tablebases probe
|
| 803 |
+
if (!rootNode && !excludedMove && tbConfig.cardinality)
|
| 804 |
+
{
|
| 805 |
+
int piecesCount = pos.count<ALL_PIECES>();
|
| 806 |
+
|
| 807 |
+
if (piecesCount <= tbConfig.cardinality
|
| 808 |
+
&& (piecesCount < tbConfig.cardinality || depth >= tbConfig.probeDepth)
|
| 809 |
+
&& pos.rule50_count() == 0 && !pos.can_castle(ANY_CASTLING))
|
| 810 |
+
{
|
| 811 |
+
TB::ProbeState err;
|
| 812 |
+
TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
|
| 813 |
+
|
| 814 |
+
// Force check of time on the next occasion
|
| 815 |
+
if (is_mainthread())
|
| 816 |
+
main_manager()->callsCnt = 0;
|
| 817 |
+
|
| 818 |
+
if (err != TB::ProbeState::FAIL)
|
| 819 |
+
{
|
| 820 |
+
// Preferable over fetch_add to avoid locking instructions
|
| 821 |
+
tbHits.store(tbHits.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
|
| 822 |
+
|
| 823 |
+
int drawScore = tbConfig.useRule50 ? 1 : 0;
|
| 824 |
+
|
| 825 |
+
Value tbValue = VALUE_TB - ss->ply;
|
| 826 |
+
|
| 827 |
+
// Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score
|
| 828 |
+
value = wdl < -drawScore ? -tbValue
|
| 829 |
+
: wdl > drawScore ? tbValue
|
| 830 |
+
: VALUE_DRAW + 2 * wdl * drawScore;
|
| 831 |
+
|
| 832 |
+
Bound b = wdl < -drawScore ? BOUND_UPPER
|
| 833 |
+
: wdl > drawScore ? BOUND_LOWER
|
| 834 |
+
: BOUND_EXACT;
|
| 835 |
+
|
| 836 |
+
if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha))
|
| 837 |
+
{
|
| 838 |
+
ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, b,
|
| 839 |
+
std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE,
|
| 840 |
+
tt.generation());
|
| 841 |
+
|
| 842 |
+
return value;
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
+
if (PvNode)
|
| 846 |
+
{
|
| 847 |
+
if (b == BOUND_LOWER)
|
| 848 |
+
bestValue = value, alpha = std::max(alpha, bestValue);
|
| 849 |
+
else
|
| 850 |
+
maxValue = value;
|
| 851 |
+
}
|
| 852 |
+
}
|
| 853 |
+
}
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
if (ss->inCheck)
|
| 857 |
+
goto moves_loop;
|
| 858 |
+
|
| 859 |
+
// Use static evaluation difference to improve quiet move ordering
|
| 860 |
+
if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture)
|
| 861 |
+
{
|
| 862 |
+
int evalDiff = std::clamp(-int((ss - 1)->staticEval + ss->staticEval), -213, 175) + 59;
|
| 863 |
+
mainHistory[~us][((ss - 1)->currentMove).raw()] << evalDiff * 10;
|
| 864 |
+
if (!ttHit && type_of(pos.piece_on(prevSq)) != PAWN
|
| 865 |
+
&& ((ss - 1)->currentMove).type_of() != PROMOTION)
|
| 866 |
+
sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << evalDiff * 13;
|
| 867 |
+
}
|
| 868 |
+
|
| 869 |
+
|
| 870 |
+
// Step 7. Razoring
|
| 871 |
+
// If eval is really low, skip search entirely and return the qsearch value.
|
| 872 |
+
// For PvNodes, we must have a guard against mates being returned.
|
| 873 |
+
if (!PvNode && eval < alpha - 507 - 312 * depth * depth)
|
| 874 |
+
return qsearch<NonPV>(pos, ss, alpha, beta);
|
| 875 |
+
|
| 876 |
+
// Step 8. Futility pruning: child node
|
| 877 |
+
// The depth condition is important for mate finding.
|
| 878 |
+
{
|
| 879 |
+
auto futility_margin = [&](Depth d) {
|
| 880 |
+
Value futilityMult = 77 - 22 * !ss->ttHit;
|
| 881 |
+
|
| 882 |
+
return futilityMult * d
|
| 883 |
+
- (2661 * improving + 355 * opponentWorsening) * futilityMult / 1024 //
|
| 884 |
+
+ std::abs(correctionValue) / 176900;
|
| 885 |
+
};
|
| 886 |
+
|
| 887 |
+
if (!ss->ttPv && depth < 16 && eval - futility_margin(depth) >= beta && eval >= beta
|
| 888 |
+
&& (!ttData.move || ttCapture) && !is_loss(beta) && !is_win(eval))
|
| 889 |
+
return (2 * beta + eval) / 3;
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
// Step 9. Null move search with verification search
|
| 893 |
+
if (cutNode && ss->staticEval >= beta - 17 * depth - 50 * improving + 359 && !excludedMove
|
| 894 |
+
&& pos.non_pawn_material(us) && ss->ply >= nmpMinPly && !is_loss(beta))
|
| 895 |
+
{
|
| 896 |
+
assert((ss - 1)->currentMove != Move::null());
|
| 897 |
+
|
| 898 |
+
// Null move dynamic reduction based on depth
|
| 899 |
+
Depth R = 7 + depth / 3;
|
| 900 |
+
do_null_move(pos, st, ss);
|
| 901 |
+
|
| 902 |
+
Value nullValue = -search<NonPV>(pos, ss + 1, -beta, -beta + 1, depth - R, false);
|
| 903 |
+
|
| 904 |
+
undo_null_move(pos);
|
| 905 |
+
|
| 906 |
+
// Do not return unproven mate or TB scores
|
| 907 |
+
if (nullValue >= beta && !is_win(nullValue))
|
| 908 |
+
{
|
| 909 |
+
if (nmpMinPly || depth < 16)
|
| 910 |
+
return nullValue;
|
| 911 |
+
|
| 912 |
+
assert(!nmpMinPly); // Recursive verification is not allowed
|
| 913 |
+
|
| 914 |
+
// Do verification search at high depths, with null move pruning disabled
|
| 915 |
+
// until ply exceeds nmpMinPly.
|
| 916 |
+
nmpMinPly = ss->ply + 3 * (depth - R) / 4;
|
| 917 |
+
|
| 918 |
+
Value v = search<NonPV>(pos, ss, beta - 1, beta, depth - R, false);
|
| 919 |
+
|
| 920 |
+
nmpMinPly = 0;
|
| 921 |
+
|
| 922 |
+
if (v >= beta)
|
| 923 |
+
return nullValue;
|
| 924 |
+
}
|
| 925 |
+
}
|
| 926 |
+
|
| 927 |
+
improving |= ss->staticEval >= beta;
|
| 928 |
+
|
| 929 |
+
// Step 10. Internal iterative reductions
|
| 930 |
+
// At sufficient depth, reduce depth for PV/Cut nodes without a TTMove.
|
| 931 |
+
// (*Scaler) Making IIR more aggressive scales poorly.
|
| 932 |
+
if (!allNode && depth >= 6 && !ttData.move && priorReduction <= 3)
|
| 933 |
+
depth--;
|
| 934 |
+
|
| 935 |
+
// Step 11. ProbCut
|
| 936 |
+
// If we have a good enough capture (or queen promotion) and a reduced search
|
| 937 |
+
// returns a value much above beta, we can (almost) safely prune the previous move.
|
| 938 |
+
probCutBeta = beta + 229 - 63 * improving;
|
| 939 |
+
if (depth >= 3
|
| 940 |
+
&& !is_decisive(beta)
|
| 941 |
+
// If value from transposition table is lower than probCutBeta, don't attempt
|
| 942 |
+
// probCut there
|
| 943 |
+
&& !(is_valid(ttData.value) && ttData.value < probCutBeta))
|
| 944 |
+
{
|
| 945 |
+
assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta);
|
| 946 |
+
|
| 947 |
+
MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &captureHistory);
|
| 948 |
+
Depth probCutDepth = depth - 4;
|
| 949 |
+
|
| 950 |
+
while ((move = mp.next_move()) != Move::none())
|
| 951 |
+
{
|
| 952 |
+
assert(move.is_ok());
|
| 953 |
+
|
| 954 |
+
if (move == excludedMove || !pos.legal(move))
|
| 955 |
+
continue;
|
| 956 |
+
|
| 957 |
+
assert(pos.capture_stage(move));
|
| 958 |
+
|
| 959 |
+
do_move(pos, move, st, ss);
|
| 960 |
+
|
| 961 |
+
// Perform a preliminary qsearch to verify that the move holds
|
| 962 |
+
value = -qsearch<NonPV>(pos, ss + 1, -probCutBeta, -probCutBeta + 1);
|
| 963 |
+
|
| 964 |
+
// If the qsearch held, perform the regular search
|
| 965 |
+
if (value >= probCutBeta && probCutDepth > 0)
|
| 966 |
+
value = -search<NonPV>(pos, ss + 1, -probCutBeta, -probCutBeta + 1, probCutDepth,
|
| 967 |
+
!cutNode);
|
| 968 |
+
|
| 969 |
+
undo_move(pos, move);
|
| 970 |
+
|
| 971 |
+
if (value >= probCutBeta)
|
| 972 |
+
{
|
| 973 |
+
// Save ProbCut data into transposition table
|
| 974 |
+
ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER,
|
| 975 |
+
probCutDepth + 1, move, unadjustedStaticEval, tt.generation());
|
| 976 |
+
|
| 977 |
+
if (!is_decisive(value))
|
| 978 |
+
return value - (probCutBeta - beta);
|
| 979 |
+
}
|
| 980 |
+
}
|
| 981 |
+
}
|
| 982 |
+
|
| 983 |
+
moves_loop: // When in check, search starts here
|
| 984 |
+
|
| 985 |
+
// Step 12. A small Probcut idea
|
| 986 |
+
probCutBeta = beta + 416;
|
| 987 |
+
if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta
|
| 988 |
+
&& !is_decisive(beta) && is_valid(ttData.value) && !is_decisive(ttData.value))
|
| 989 |
+
return probCutBeta;
|
| 990 |
+
|
| 991 |
+
const PieceToHistory* contHist[] = {
|
| 992 |
+
(ss - 1)->continuationHistory, (ss - 2)->continuationHistory, (ss - 3)->continuationHistory,
|
| 993 |
+
(ss - 4)->continuationHistory, (ss - 5)->continuationHistory, (ss - 6)->continuationHistory};
|
| 994 |
+
|
| 995 |
+
|
| 996 |
+
MovePicker mp(pos, ttData.move, depth, &mainHistory, &lowPlyHistory, &captureHistory, contHist,
|
| 997 |
+
&sharedHistory, ss->ply);
|
| 998 |
+
|
| 999 |
+
value = bestValue;
|
| 1000 |
+
|
| 1001 |
+
int moveCount = 0;
|
| 1002 |
+
|
| 1003 |
+
// Step 13. Loop through all pseudo-legal moves until no moves remain
|
| 1004 |
+
// or a beta cutoff occurs.
|
| 1005 |
+
while ((move = mp.next_move()) != Move::none())
|
| 1006 |
+
{
|
| 1007 |
+
assert(move.is_ok());
|
| 1008 |
+
|
| 1009 |
+
if (move == excludedMove)
|
| 1010 |
+
continue;
|
| 1011 |
+
|
| 1012 |
+
// Check for legality
|
| 1013 |
+
if (!pos.legal(move))
|
| 1014 |
+
continue;
|
| 1015 |
+
|
| 1016 |
+
// At root obey the "searchmoves" option and skip moves not listed in Root
|
| 1017 |
+
// Move List. In MultiPV mode we also skip PV moves that have been already
|
| 1018 |
+
// searched and those of lower "TB rank" if we are in a TB root position.
|
| 1019 |
+
if (rootNode && !std::count(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast, move))
|
| 1020 |
+
continue;
|
| 1021 |
+
|
| 1022 |
+
ss->moveCount = ++moveCount;
|
| 1023 |
+
|
| 1024 |
+
if (rootNode && is_mainthread() && nodes > 10000000)
|
| 1025 |
+
{
|
| 1026 |
+
main_manager()->updates.onIter(
|
| 1027 |
+
{depth, UCIEngine::move(move, pos.is_chess960()), moveCount + pvIdx});
|
| 1028 |
+
}
|
| 1029 |
+
if (PvNode)
|
| 1030 |
+
(ss + 1)->pv = nullptr;
|
| 1031 |
+
|
| 1032 |
+
extension = 0;
|
| 1033 |
+
capture = pos.capture_stage(move);
|
| 1034 |
+
movedPiece = pos.moved_piece(move);
|
| 1035 |
+
givesCheck = pos.gives_check(move);
|
| 1036 |
+
|
| 1037 |
+
// Calculate new depth for this move
|
| 1038 |
+
newDepth = depth - 1;
|
| 1039 |
+
|
| 1040 |
+
int delta = beta - alpha;
|
| 1041 |
+
|
| 1042 |
+
Depth r = reduction(improving, depth, moveCount, delta);
|
| 1043 |
+
|
| 1044 |
+
// Increase reduction for ttPv nodes (*Scaler)
|
| 1045 |
+
// Larger values scale well
|
| 1046 |
+
if (ss->ttPv)
|
| 1047 |
+
r += 949;
|
| 1048 |
+
|
| 1049 |
+
// Step 14. Pruning at shallow depths.
|
| 1050 |
+
// Depth conditions are important for mate finding.
|
| 1051 |
+
if (!rootNode && pos.non_pawn_material(us) && !is_loss(bestValue))
|
| 1052 |
+
{
|
| 1053 |
+
// Skip quiet moves if movecount exceeds our FutilityMoveCount threshold
|
| 1054 |
+
if (moveCount >= (3 + depth * depth) / (2 - improving))
|
| 1055 |
+
mp.skip_quiet_moves();
|
| 1056 |
+
|
| 1057 |
+
// Reduced depth of the next LMR search
|
| 1058 |
+
int lmrDepth = newDepth - r / 1024;
|
| 1059 |
+
|
| 1060 |
+
if (capture || givesCheck)
|
| 1061 |
+
{
|
| 1062 |
+
Piece capturedPiece = pos.piece_on(move.to_sq());
|
| 1063 |
+
int captHist = captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)];
|
| 1064 |
+
|
| 1065 |
+
// Futility pruning for captures
|
| 1066 |
+
if (!givesCheck && lmrDepth < 7)
|
| 1067 |
+
{
|
| 1068 |
+
Value futilityValue = ss->staticEval + 235 + 211 * lmrDepth
|
| 1069 |
+
+ PieceValue[capturedPiece] + 126 * captHist / 1024;
|
| 1070 |
+
|
| 1071 |
+
if (futilityValue <= alpha)
|
| 1072 |
+
continue;
|
| 1073 |
+
}
|
| 1074 |
+
|
| 1075 |
+
// SEE based pruning for captures and checks
|
| 1076 |
+
// Avoid pruning sacrifices of our last piece for stalemate
|
| 1077 |
+
int margin = std::max(185 * depth + captHist / 28, 0);
|
| 1078 |
+
if ((alpha >= VALUE_DRAW || pos.non_pawn_material(us) != PieceValue[movedPiece])
|
| 1079 |
+
&& !pos.see_ge(move, -margin))
|
| 1080 |
+
continue;
|
| 1081 |
+
}
|
| 1082 |
+
else
|
| 1083 |
+
{
|
| 1084 |
+
int history = (*contHist[0])[movedPiece][move.to_sq()]
|
| 1085 |
+
+ (*contHist[1])[movedPiece][move.to_sq()]
|
| 1086 |
+
+ sharedHistory.pawn_entry(pos)[movedPiece][move.to_sq()];
|
| 1087 |
+
|
| 1088 |
+
// Continuation history based pruning
|
| 1089 |
+
if (history < -3826 * depth)
|
| 1090 |
+
continue;
|
| 1091 |
+
|
| 1092 |
+
history += 73 * mainHistory[us][move.raw()] / 32;
|
| 1093 |
+
|
| 1094 |
+
// (*Scaler): Generally, lower divisors scales well
|
| 1095 |
+
lmrDepth += history / 2917;
|
| 1096 |
+
|
| 1097 |
+
Value futilityValue = ss->staticEval + 42 + 157 * !bestMove + 120 * lmrDepth
|
| 1098 |
+
+ 86 * (ss->staticEval > alpha);
|
| 1099 |
+
|
| 1100 |
+
// Futility pruning: parent node
|
| 1101 |
+
// (*Scaler): Generally, more frequent futility pruning
|
| 1102 |
+
// scales well
|
| 1103 |
+
if (!ss->inCheck && lmrDepth < 13 && futilityValue <= alpha)
|
| 1104 |
+
{
|
| 1105 |
+
if (bestValue <= futilityValue && !is_decisive(bestValue)
|
| 1106 |
+
&& !is_win(futilityValue))
|
| 1107 |
+
bestValue = futilityValue;
|
| 1108 |
+
continue;
|
| 1109 |
+
}
|
| 1110 |
+
|
| 1111 |
+
lmrDepth = std::max(lmrDepth, 0);
|
| 1112 |
+
|
| 1113 |
+
// Prune moves with negative SEE
|
| 1114 |
+
if (!pos.see_ge(move, -25 * lmrDepth * lmrDepth))
|
| 1115 |
+
continue;
|
| 1116 |
+
}
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
// Step 15. Extensions
|
| 1120 |
+
// Singular extension search. If all moves but one
|
| 1121 |
+
// fail low on a search of (alpha-s, beta-s), and just one fails high on
|
| 1122 |
+
// (alpha, beta), then that move is singular and should be extended. To
|
| 1123 |
+
// verify this we do a reduced search on the position excluding the ttMove
|
| 1124 |
+
// and if the result is lower than ttValue minus a margin, then we will
|
| 1125 |
+
// extend the ttMove. Recursive singular search is avoided.
|
| 1126 |
+
|
| 1127 |
+
// (*Scaler) Generally, higher singularBeta (i.e closer to ttValue)
|
| 1128 |
+
// and lower extension margins scale well.
|
| 1129 |
+
if (!rootNode && move == ttData.move && !excludedMove && depth >= 6 + ss->ttPv
|
| 1130 |
+
&& is_valid(ttData.value) && !is_decisive(ttData.value) && (ttData.bound & BOUND_LOWER)
|
| 1131 |
+
&& ttData.depth >= depth - 3 && !is_shuffling(move, ss, pos))
|
| 1132 |
+
{
|
| 1133 |
+
Value singularBeta = ttData.value - (58 + 67 * (ss->ttPv && !PvNode)) * depth / 57;
|
| 1134 |
+
Depth singularDepth = newDepth / 2;
|
| 1135 |
+
|
| 1136 |
+
ss->excludedMove = move;
|
| 1137 |
+
value = search<NonPV>(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode);
|
| 1138 |
+
ss->excludedMove = Move::none();
|
| 1139 |
+
|
| 1140 |
+
if (value < singularBeta)
|
| 1141 |
+
{
|
| 1142 |
+
int corrValAdj = std::abs(correctionValue) / 220870;
|
| 1143 |
+
int doubleMargin = -4 + 213 * PvNode - 196 * !ttCapture - corrValAdj
|
| 1144 |
+
- 943 * ttMoveHistory / 123477 - (ss->ply > rootDepth) * 45;
|
| 1145 |
+
int tripleMargin = 73 + 324 * PvNode - 229 * !ttCapture + 87 * ss->ttPv - corrValAdj
|
| 1146 |
+
- (ss->ply > rootDepth) * 50;
|
| 1147 |
+
|
| 1148 |
+
extension =
|
| 1149 |
+
1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin);
|
| 1150 |
+
|
| 1151 |
+
depth++;
|
| 1152 |
+
}
|
| 1153 |
+
|
| 1154 |
+
// Multi-cut pruning
|
| 1155 |
+
// Our ttMove is assumed to fail high based on the bound of the TT entry,
|
| 1156 |
+
// and if after excluding the ttMove with a reduced search we fail high
|
| 1157 |
+
// over the original beta, we assume this expected cut-node is not
|
| 1158 |
+
// singular (multiple moves fail high), and we can prune the whole
|
| 1159 |
+
// subtree by returning a softbound.
|
| 1160 |
+
else if (value >= beta && !is_decisive(value))
|
| 1161 |
+
{
|
| 1162 |
+
ttMoveHistory << std::max(-394 - 105 * depth, -3692);
|
| 1163 |
+
return value;
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
// Negative extensions
|
| 1167 |
+
// If other moves failed high over (ttValue - margin) without the
|
| 1168 |
+
// ttMove on a reduced search, but we cannot do multi-cut because
|
| 1169 |
+
// (ttValue - margin) is lower than the original beta, we do not know
|
| 1170 |
+
// if the ttMove is singular or can do a multi-cut, so we reduce the
|
| 1171 |
+
// ttMove in favor of other moves based on some conditions:
|
| 1172 |
+
|
| 1173 |
+
// If the ttMove is assumed to fail high over current beta
|
| 1174 |
+
else if (ttData.value >= beta)
|
| 1175 |
+
extension = -3;
|
| 1176 |
+
|
| 1177 |
+
// If we are on a cutNode but the ttMove is not assumed to fail high
|
| 1178 |
+
// over current beta
|
| 1179 |
+
else if (cutNode)
|
| 1180 |
+
extension = -2;
|
| 1181 |
+
}
|
| 1182 |
+
|
| 1183 |
+
// Step 16. Make the move
|
| 1184 |
+
do_move(pos, move, st, givesCheck, ss);
|
| 1185 |
+
|
| 1186 |
+
// Add extension to new depth
|
| 1187 |
+
newDepth += extension;
|
| 1188 |
+
uint64_t nodeCount = rootNode ? uint64_t(nodes) : 0;
|
| 1189 |
+
|
| 1190 |
+
// Decrease reduction for PvNodes (*Scaler)
|
| 1191 |
+
if (ss->ttPv)
|
| 1192 |
+
r -= 2823 + PvNode * 1013 + (ttData.value > alpha) * 910
|
| 1193 |
+
+ (ttData.depth >= depth) * (933 + cutNode * 979);
|
| 1194 |
+
|
| 1195 |
+
r += 690; // Base reduction offset to compensate for other tweaks
|
| 1196 |
+
r -= moveCount * 70;
|
| 1197 |
+
r -= std::abs(correctionValue) / 26878;
|
| 1198 |
+
|
| 1199 |
+
// Increase reduction for cut nodes
|
| 1200 |
+
if (cutNode)
|
| 1201 |
+
r += 3582 + 1015 * !ttData.move;
|
| 1202 |
+
|
| 1203 |
+
// Increase reduction if ttMove is a capture
|
| 1204 |
+
if (ttCapture)
|
| 1205 |
+
r += 1075;
|
| 1206 |
+
|
| 1207 |
+
// Increase reduction if next ply has a lot of fail high
|
| 1208 |
+
if ((ss + 1)->cutoffCnt > 1)
|
| 1209 |
+
r += 249 + 1073 * ((ss + 1)->cutoffCnt > 2) + 1064 * allNode;
|
| 1210 |
+
|
| 1211 |
+
// For first picked move (ttMove) reduce reduction
|
| 1212 |
+
if (move == ttData.move)
|
| 1213 |
+
r -= 2069;
|
| 1214 |
+
|
| 1215 |
+
if (capture)
|
| 1216 |
+
ss->statScore = 892 * int(PieceValue[pos.captured_piece()]) / 128
|
| 1217 |
+
+ captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())];
|
| 1218 |
+
else
|
| 1219 |
+
ss->statScore = 2 * mainHistory[us][move.raw()]
|
| 1220 |
+
+ (*contHist[0])[movedPiece][move.to_sq()]
|
| 1221 |
+
+ (*contHist[1])[movedPiece][move.to_sq()];
|
| 1222 |
+
|
| 1223 |
+
// Decrease/increase reduction for moves with a good/bad history
|
| 1224 |
+
r -= ss->statScore * 454 / 4096;
|
| 1225 |
+
|
| 1226 |
+
// Scale up reductions for expected ALL nodes
|
| 1227 |
+
if (allNode)
|
| 1228 |
+
r += r * 276 / (256 * depth + 254);
|
| 1229 |
+
|
| 1230 |
+
// Step 17. Late moves reduction / extension (LMR)
|
| 1231 |
+
if (depth >= 2 && moveCount > 1)
|
| 1232 |
+
{
|
| 1233 |
+
// In general we want to cap the LMR depth search at newDepth, but when
|
| 1234 |
+
// reduction is negative, we allow this move a limited search extension
|
| 1235 |
+
// beyond the first move depth.
|
| 1236 |
+
// To prevent problems when the max value is less than the min value,
|
| 1237 |
+
// std::clamp has been replaced by a more robust implementation.
|
| 1238 |
+
Depth d = std::max(1, std::min(newDepth - r / 1024, newDepth + 2)) + PvNode;
|
| 1239 |
+
|
| 1240 |
+
ss->reduction = newDepth - d;
|
| 1241 |
+
value = -search<NonPV>(pos, ss + 1, -(alpha + 1), -alpha, d, true);
|
| 1242 |
+
ss->reduction = 0;
|
| 1243 |
+
|
| 1244 |
+
// Do a full-depth search when reduced LMR search fails high
|
| 1245 |
+
// (*Scaler) Shallower searches here don't scale well
|
| 1246 |
+
if (value > alpha)
|
| 1247 |
+
{
|
| 1248 |
+
// Adjust full-depth search based on LMR results - if the result was
|
| 1249 |
+
// good enough search deeper, if it was bad enough search shallower.
|
| 1250 |
+
const bool doDeeperSearch = d < newDepth && value > bestValue + 50;
|
| 1251 |
+
const bool doShallowerSearch = value < bestValue + 9;
|
| 1252 |
+
|
| 1253 |
+
newDepth += doDeeperSearch - doShallowerSearch;
|
| 1254 |
+
|
| 1255 |
+
if (newDepth > d)
|
| 1256 |
+
value = -search<NonPV>(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode);
|
| 1257 |
+
|
| 1258 |
+
// Post LMR continuation history updates
|
| 1259 |
+
update_continuation_histories(ss, movedPiece, move.to_sq(), 1342);
|
| 1260 |
+
}
|
| 1261 |
+
}
|
| 1262 |
+
|
| 1263 |
+
// Step 18. Full-depth search when LMR is skipped
|
| 1264 |
+
else if (!PvNode || moveCount > 1)
|
| 1265 |
+
{
|
| 1266 |
+
// Increase reduction if ttMove is not present
|
| 1267 |
+
if (!ttData.move)
|
| 1268 |
+
r += 993;
|
| 1269 |
+
|
| 1270 |
+
// Note that if expected reduction is high, we reduce search depth here
|
| 1271 |
+
value = -search<NonPV>(pos, ss + 1, -(alpha + 1), -alpha,
|
| 1272 |
+
newDepth - (r > 4302) - (r > 5919 && newDepth > 2), !cutNode);
|
| 1273 |
+
}
|
| 1274 |
+
|
| 1275 |
+
// For PV nodes only, do a full PV search on the first move or after a fail high,
|
| 1276 |
+
// otherwise let the parent node fail low with value <= alpha and try another move.
|
| 1277 |
+
if (PvNode && (moveCount == 1 || value > alpha))
|
| 1278 |
+
{
|
| 1279 |
+
(ss + 1)->pv = pv;
|
| 1280 |
+
(ss + 1)->pv[0] = Move::none();
|
| 1281 |
+
|
| 1282 |
+
// Extend move from transposition table if we are about to dive into qsearch.
|
| 1283 |
+
// decisive score handling improves mate finding and retrograde analysis.
|
| 1284 |
+
if (move == ttData.move
|
| 1285 |
+
&& ((is_valid(ttData.value) && is_decisive(ttData.value) && ttData.depth > 0)
|
| 1286 |
+
|| ttData.depth > 1))
|
| 1287 |
+
newDepth = std::max(newDepth, 1);
|
| 1288 |
+
|
| 1289 |
+
value = -search<PV>(pos, ss + 1, -beta, -alpha, newDepth, false);
|
| 1290 |
+
}
|
| 1291 |
+
|
| 1292 |
+
// Step 19. Undo move
|
| 1293 |
+
undo_move(pos, move);
|
| 1294 |
+
|
| 1295 |
+
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
|
| 1296 |
+
|
| 1297 |
+
// Step 20. Check for a new best move
|
| 1298 |
+
// Finished searching the move. If a stop occurred, the return value of
|
| 1299 |
+
// the search cannot be trusted, and we return immediately without updating
|
| 1300 |
+
// best move, principal variation nor transposition table.
|
| 1301 |
+
if (threads.stop.load(std::memory_order_relaxed))
|
| 1302 |
+
return VALUE_ZERO;
|
| 1303 |
+
|
| 1304 |
+
if (rootNode)
|
| 1305 |
+
{
|
| 1306 |
+
RootMove& rm = *std::find(rootMoves.begin(), rootMoves.end(), move);
|
| 1307 |
+
|
| 1308 |
+
rm.effort += nodes - nodeCount;
|
| 1309 |
+
|
| 1310 |
+
rm.averageScore =
|
| 1311 |
+
rm.averageScore != -VALUE_INFINITE ? (value + rm.averageScore) / 2 : value;
|
| 1312 |
+
|
| 1313 |
+
rm.meanSquaredScore = rm.meanSquaredScore != -VALUE_INFINITE * VALUE_INFINITE
|
| 1314 |
+
? (value * std::abs(value) + rm.meanSquaredScore) / 2
|
| 1315 |
+
: value * std::abs(value);
|
| 1316 |
+
|
| 1317 |
+
// PV move or new best move?
|
| 1318 |
+
if (moveCount == 1 || value > alpha)
|
| 1319 |
+
{
|
| 1320 |
+
rm.score = rm.uciScore = value;
|
| 1321 |
+
rm.selDepth = selDepth;
|
| 1322 |
+
rm.scoreLowerbound = rm.scoreUpperbound = false;
|
| 1323 |
+
|
| 1324 |
+
if (value >= beta)
|
| 1325 |
+
{
|
| 1326 |
+
rm.scoreLowerbound = true;
|
| 1327 |
+
rm.uciScore = beta;
|
| 1328 |
+
}
|
| 1329 |
+
else if (value <= alpha)
|
| 1330 |
+
{
|
| 1331 |
+
rm.scoreUpperbound = true;
|
| 1332 |
+
rm.uciScore = alpha;
|
| 1333 |
+
}
|
| 1334 |
+
|
| 1335 |
+
rm.pv.resize(1);
|
| 1336 |
+
|
| 1337 |
+
assert((ss + 1)->pv);
|
| 1338 |
+
|
| 1339 |
+
for (Move* m = (ss + 1)->pv; *m != Move::none(); ++m)
|
| 1340 |
+
rm.pv.push_back(*m);
|
| 1341 |
+
|
| 1342 |
+
// We record how often the best move has been changed in each iteration.
|
| 1343 |
+
// This information is used for time management. In MultiPV mode,
|
| 1344 |
+
// we must take care to only do this for the first PV line.
|
| 1345 |
+
if (moveCount > 1 && !pvIdx)
|
| 1346 |
+
++bestMoveChanges;
|
| 1347 |
+
}
|
| 1348 |
+
else
|
| 1349 |
+
// All other moves but the PV, are set to the lowest value: this
|
| 1350 |
+
// is not a problem when sorting because the sort is stable and the
|
| 1351 |
+
// move position in the list is preserved - just the PV is pushed up.
|
| 1352 |
+
rm.score = -VALUE_INFINITE;
|
| 1353 |
+
}
|
| 1354 |
+
|
| 1355 |
+
// In case we have an alternative move equal in eval to the current bestmove,
|
| 1356 |
+
// promote it to bestmove by pretending it just exceeds alpha (but not beta).
|
| 1357 |
+
int inc = (value == bestValue && ss->ply + 2 >= rootDepth && (int(nodes) & 14) == 0
|
| 1358 |
+
&& !is_win(std::abs(value) + 1));
|
| 1359 |
+
|
| 1360 |
+
if (value + inc > bestValue)
|
| 1361 |
+
{
|
| 1362 |
+
bestValue = value;
|
| 1363 |
+
|
| 1364 |
+
if (value + inc > alpha)
|
| 1365 |
+
{
|
| 1366 |
+
bestMove = move;
|
| 1367 |
+
|
| 1368 |
+
if (PvNode && !rootNode) // Update pv even in fail-high case
|
| 1369 |
+
update_pv(ss->pv, move, (ss + 1)->pv);
|
| 1370 |
+
|
| 1371 |
+
if (value >= beta)
|
| 1372 |
+
{
|
| 1373 |
+
// (*Scaler) Infrequent and small updates scale well
|
| 1374 |
+
ss->cutoffCnt += (extension < 2) || PvNode;
|
| 1375 |
+
assert(value >= beta); // Fail high
|
| 1376 |
+
break;
|
| 1377 |
+
}
|
| 1378 |
+
|
| 1379 |
+
// Reduce other moves if we have found at least one score improvement
|
| 1380 |
+
if (depth > 2 && depth < 14 && !is_decisive(value))
|
| 1381 |
+
depth -= 2;
|
| 1382 |
+
|
| 1383 |
+
assert(depth > 0);
|
| 1384 |
+
alpha = value; // Update alpha! Always alpha < beta
|
| 1385 |
+
}
|
| 1386 |
+
}
|
| 1387 |
+
|
| 1388 |
+
// If the move is worse than some previously searched move,
|
| 1389 |
+
// remember it, to update its stats later.
|
| 1390 |
+
if (move != bestMove && moveCount <= SEARCHEDLIST_CAPACITY)
|
| 1391 |
+
{
|
| 1392 |
+
if (capture)
|
| 1393 |
+
capturesSearched.push_back(move);
|
| 1394 |
+
else
|
| 1395 |
+
quietsSearched.push_back(move);
|
| 1396 |
+
}
|
| 1397 |
+
}
|
| 1398 |
+
|
| 1399 |
+
// Step 21. Check for mate and stalemate
|
| 1400 |
+
// All legal moves have been searched and if there are no legal moves, it
|
| 1401 |
+
// must be a mate or a stalemate. If we are in a singular extension search then
|
| 1402 |
+
// return a fail low score.
|
| 1403 |
+
|
| 1404 |
+
assert(moveCount || !ss->inCheck || excludedMove || !MoveList<LEGAL>(pos).size());
|
| 1405 |
+
|
| 1406 |
+
// Adjust best value for fail high cases
|
| 1407 |
+
if (bestValue >= beta && !is_decisive(bestValue) && !is_decisive(alpha))
|
| 1408 |
+
bestValue = (bestValue * depth + beta) / (depth + 1);
|
| 1409 |
+
|
| 1410 |
+
if (!moveCount)
|
| 1411 |
+
bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW;
|
| 1412 |
+
|
| 1413 |
+
// If there is a move that produces search value greater than alpha,
|
| 1414 |
+
// we update the stats of searched moves.
|
| 1415 |
+
else if (bestMove)
|
| 1416 |
+
{
|
| 1417 |
+
update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth,
|
| 1418 |
+
ttData.move);
|
| 1419 |
+
if (!PvNode)
|
| 1420 |
+
ttMoveHistory << (bestMove == ttData.move ? 804 : -860);
|
| 1421 |
+
}
|
| 1422 |
+
|
| 1423 |
+
// Bonus for prior quiet countermove that caused the fail low
|
| 1424 |
+
else if (!priorCapture && prevSq != SQ_NONE)
|
| 1425 |
+
{
|
| 1426 |
+
int bonusScale = -227;
|
| 1427 |
+
bonusScale -= (ss - 1)->statScore / 101;
|
| 1428 |
+
bonusScale += std::min(58 * depth, 488);
|
| 1429 |
+
bonusScale += 172 * ((ss - 1)->moveCount > 8);
|
| 1430 |
+
bonusScale += 150 * (!ss->inCheck && bestValue <= ss->staticEval - 113);
|
| 1431 |
+
bonusScale += 154 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 68);
|
| 1432 |
+
|
| 1433 |
+
bonusScale = std::max(bonusScale, 0);
|
| 1434 |
+
|
| 1435 |
+
// scaledBonus ranges from 0 to roughly 2.3M, overflows happen for multipliers larger than 900
|
| 1436 |
+
const int scaledBonus = std::min(137 * depth - 79, 1394) * bonusScale;
|
| 1437 |
+
|
| 1438 |
+
update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq,
|
| 1439 |
+
scaledBonus * 222 / 16384);
|
| 1440 |
+
|
| 1441 |
+
mainHistory[~us][((ss - 1)->currentMove).raw()] << scaledBonus * 221 / 32768;
|
| 1442 |
+
|
| 1443 |
+
if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION)
|
| 1444 |
+
sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << scaledBonus * 286 / 8192;
|
| 1445 |
+
}
|
| 1446 |
+
|
| 1447 |
+
// Bonus for prior capture countermove that caused the fail low
|
| 1448 |
+
else if (priorCapture && prevSq != SQ_NONE)
|
| 1449 |
+
{
|
| 1450 |
+
Piece capturedPiece = pos.captured_piece();
|
| 1451 |
+
assert(capturedPiece != NO_PIECE);
|
| 1452 |
+
captureHistory[pos.piece_on(prevSq)][prevSq][type_of(capturedPiece)] << 993;
|
| 1453 |
+
}
|
| 1454 |
+
|
| 1455 |
+
if (PvNode)
|
| 1456 |
+
bestValue = std::min(bestValue, maxValue);
|
| 1457 |
+
|
| 1458 |
+
// If no good move is found and the previous position was ttPv, then the previous
|
| 1459 |
+
// opponent move is probably good and the new position is added to the search tree.
|
| 1460 |
+
if (bestValue <= alpha)
|
| 1461 |
+
ss->ttPv = ss->ttPv || (ss - 1)->ttPv;
|
| 1462 |
+
|
| 1463 |
+
// Write gathered information in transposition table. Note that the
|
| 1464 |
+
// static evaluation is saved as it was before correction history.
|
| 1465 |
+
if (!excludedMove && !(rootNode && pvIdx))
|
| 1466 |
+
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
|
| 1467 |
+
bestValue >= beta ? BOUND_LOWER
|
| 1468 |
+
: PvNode && bestMove ? BOUND_EXACT
|
| 1469 |
+
: BOUND_UPPER,
|
| 1470 |
+
moveCount != 0 ? depth : std::min(MAX_PLY - 1, depth + 6), bestMove,
|
| 1471 |
+
unadjustedStaticEval, tt.generation());
|
| 1472 |
+
|
| 1473 |
+
// Adjust correction history if the best move is not a capture
|
| 1474 |
+
// and the error direction matches whether we are above/below bounds.
|
| 1475 |
+
if (!ss->inCheck && !(bestMove && pos.capture(bestMove))
|
| 1476 |
+
&& (bestValue > ss->staticEval) == bool(bestMove))
|
| 1477 |
+
{
|
| 1478 |
+
auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / (bestMove ? 10 : 8),
|
| 1479 |
+
-CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4);
|
| 1480 |
+
update_correction_history(pos, ss, *this, bonus);
|
| 1481 |
+
}
|
| 1482 |
+
|
| 1483 |
+
assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE);
|
| 1484 |
+
|
| 1485 |
+
return bestValue;
|
| 1486 |
+
}
|
| 1487 |
+
|
| 1488 |
+
|
| 1489 |
+
// Quiescence search function, which is called by the main search function with
|
| 1490 |
+
// depth zero, or recursively with further decreasing depth. With depth <= 0, we
|
| 1491 |
+
// "should" be using static eval only, but tactical moves may confuse the static eval.
|
| 1492 |
+
// To fight this horizon effect, we implement this qsearch of tactical moves.
|
| 1493 |
+
// See https://www.chessprogramming.org/Horizon_Effect
|
| 1494 |
+
// and https://www.chessprogramming.org/Quiescence_Search
|
| 1495 |
+
template<NodeType nodeType>
|
| 1496 |
+
Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) {
|
| 1497 |
+
|
| 1498 |
+
static_assert(nodeType != Root);
|
| 1499 |
+
constexpr bool PvNode = nodeType == PV;
|
| 1500 |
+
|
| 1501 |
+
assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE);
|
| 1502 |
+
assert(PvNode || (alpha == beta - 1));
|
| 1503 |
+
|
| 1504 |
+
// Check if we have an upcoming move that draws by repetition
|
| 1505 |
+
if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply))
|
| 1506 |
+
{
|
| 1507 |
+
alpha = value_draw(nodes);
|
| 1508 |
+
if (alpha >= beta)
|
| 1509 |
+
return alpha;
|
| 1510 |
+
}
|
| 1511 |
+
|
| 1512 |
+
Move pv[MAX_PLY + 1];
|
| 1513 |
+
StateInfo st;
|
| 1514 |
+
|
| 1515 |
+
Key posKey;
|
| 1516 |
+
Move move, bestMove;
|
| 1517 |
+
Value bestValue, value, futilityBase;
|
| 1518 |
+
bool pvHit, givesCheck, capture;
|
| 1519 |
+
int moveCount;
|
| 1520 |
+
|
| 1521 |
+
// Step 1. Initialize node
|
| 1522 |
+
if (PvNode)
|
| 1523 |
+
{
|
| 1524 |
+
(ss + 1)->pv = pv;
|
| 1525 |
+
ss->pv[0] = Move::none();
|
| 1526 |
+
}
|
| 1527 |
+
|
| 1528 |
+
bestMove = Move::none();
|
| 1529 |
+
ss->inCheck = pos.checkers();
|
| 1530 |
+
moveCount = 0;
|
| 1531 |
+
|
| 1532 |
+
// Used to send selDepth info to GUI (selDepth counts from 1, ply from 0)
|
| 1533 |
+
if (PvNode && selDepth < ss->ply + 1)
|
| 1534 |
+
selDepth = ss->ply + 1;
|
| 1535 |
+
|
| 1536 |
+
// Step 2. Check for an immediate draw or maximum ply reached
|
| 1537 |
+
if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY)
|
| 1538 |
+
return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW;
|
| 1539 |
+
|
| 1540 |
+
assert(0 <= ss->ply && ss->ply < MAX_PLY);
|
| 1541 |
+
|
| 1542 |
+
// Step 3. Transposition table lookup
|
| 1543 |
+
posKey = pos.key();
|
| 1544 |
+
auto [ttHit, ttData, ttWriter] = tt.probe(posKey);
|
| 1545 |
+
// Need further processing of the saved data
|
| 1546 |
+
ss->ttHit = ttHit;
|
| 1547 |
+
ttData.move = ttHit ? ttData.move : Move::none();
|
| 1548 |
+
ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE;
|
| 1549 |
+
pvHit = ttHit && ttData.is_pv;
|
| 1550 |
+
|
| 1551 |
+
// At non-PV nodes we check for an early TT cutoff
|
| 1552 |
+
if (!PvNode && ttData.depth >= DEPTH_QS
|
| 1553 |
+
&& is_valid(ttData.value) // Can happen when !ttHit or when access race in probe()
|
| 1554 |
+
&& (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER)))
|
| 1555 |
+
return ttData.value;
|
| 1556 |
+
|
| 1557 |
+
// Step 4. Static evaluation of the position
|
| 1558 |
+
Value unadjustedStaticEval = VALUE_NONE;
|
| 1559 |
+
if (ss->inCheck)
|
| 1560 |
+
bestValue = futilityBase = -VALUE_INFINITE;
|
| 1561 |
+
else
|
| 1562 |
+
{
|
| 1563 |
+
const auto correctionValue = correction_value(*this, pos, ss);
|
| 1564 |
+
|
| 1565 |
+
if (ss->ttHit)
|
| 1566 |
+
{
|
| 1567 |
+
// Never assume anything about values stored in TT
|
| 1568 |
+
unadjustedStaticEval = ttData.eval;
|
| 1569 |
+
|
| 1570 |
+
if (!is_valid(unadjustedStaticEval))
|
| 1571 |
+
unadjustedStaticEval = evaluate(pos);
|
| 1572 |
+
|
| 1573 |
+
ss->staticEval = bestValue =
|
| 1574 |
+
to_corrected_static_eval(unadjustedStaticEval, correctionValue);
|
| 1575 |
+
|
| 1576 |
+
// ttValue can be used as a better position evaluation
|
| 1577 |
+
if (is_valid(ttData.value) && !is_decisive(ttData.value)
|
| 1578 |
+
&& (ttData.bound & (ttData.value > bestValue ? BOUND_LOWER : BOUND_UPPER)))
|
| 1579 |
+
bestValue = ttData.value;
|
| 1580 |
+
}
|
| 1581 |
+
else
|
| 1582 |
+
{
|
| 1583 |
+
unadjustedStaticEval = evaluate(pos);
|
| 1584 |
+
ss->staticEval = bestValue =
|
| 1585 |
+
to_corrected_static_eval(unadjustedStaticEval, correctionValue);
|
| 1586 |
+
}
|
| 1587 |
+
|
| 1588 |
+
// Stand pat. Return immediately if static value is at least beta
|
| 1589 |
+
if (bestValue >= beta)
|
| 1590 |
+
{
|
| 1591 |
+
if (!is_decisive(bestValue))
|
| 1592 |
+
bestValue = (bestValue + beta) / 2;
|
| 1593 |
+
|
| 1594 |
+
if (!ss->ttHit)
|
| 1595 |
+
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
|
| 1596 |
+
DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval,
|
| 1597 |
+
tt.generation());
|
| 1598 |
+
return bestValue;
|
| 1599 |
+
}
|
| 1600 |
+
|
| 1601 |
+
if (bestValue > alpha)
|
| 1602 |
+
alpha = bestValue;
|
| 1603 |
+
|
| 1604 |
+
futilityBase = ss->staticEval + 351;
|
| 1605 |
+
}
|
| 1606 |
+
|
| 1607 |
+
const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory};
|
| 1608 |
+
|
| 1609 |
+
Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE;
|
| 1610 |
+
|
| 1611 |
+
// Initialize a MovePicker object for the current position, and prepare to search
|
| 1612 |
+
// the moves. We presently use two stages of move generator in quiescence search:
|
| 1613 |
+
// captures, or evasions only when in check.
|
| 1614 |
+
MovePicker mp(pos, ttData.move, DEPTH_QS, &mainHistory, &lowPlyHistory, &captureHistory,
|
| 1615 |
+
contHist, &sharedHistory, ss->ply);
|
| 1616 |
+
|
| 1617 |
+
// Step 5. Loop through all pseudo-legal moves until no moves remain or a beta
|
| 1618 |
+
// cutoff occurs.
|
| 1619 |
+
while ((move = mp.next_move()) != Move::none())
|
| 1620 |
+
{
|
| 1621 |
+
assert(move.is_ok());
|
| 1622 |
+
|
| 1623 |
+
if (!pos.legal(move))
|
| 1624 |
+
continue;
|
| 1625 |
+
|
| 1626 |
+
givesCheck = pos.gives_check(move);
|
| 1627 |
+
capture = pos.capture_stage(move);
|
| 1628 |
+
|
| 1629 |
+
moveCount++;
|
| 1630 |
+
|
| 1631 |
+
// Step 6. Pruning
|
| 1632 |
+
if (!is_loss(bestValue))
|
| 1633 |
+
{
|
| 1634 |
+
// Futility pruning and moveCount pruning
|
| 1635 |
+
if (!givesCheck && move.to_sq() != prevSq && !is_loss(futilityBase)
|
| 1636 |
+
&& move.type_of() != PROMOTION)
|
| 1637 |
+
{
|
| 1638 |
+
if (moveCount > 2)
|
| 1639 |
+
continue;
|
| 1640 |
+
|
| 1641 |
+
Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())];
|
| 1642 |
+
|
| 1643 |
+
// If static eval + value of piece we are going to capture is
|
| 1644 |
+
// much lower than alpha, we can prune this move.
|
| 1645 |
+
if (futilityValue <= alpha)
|
| 1646 |
+
{
|
| 1647 |
+
bestValue = std::max(bestValue, futilityValue);
|
| 1648 |
+
continue;
|
| 1649 |
+
}
|
| 1650 |
+
|
| 1651 |
+
// If static exchange evaluation is low enough
|
| 1652 |
+
// we can prune this move.
|
| 1653 |
+
if (!pos.see_ge(move, alpha - futilityBase))
|
| 1654 |
+
{
|
| 1655 |
+
bestValue = std::max(bestValue, std::min(alpha, futilityBase));
|
| 1656 |
+
continue;
|
| 1657 |
+
}
|
| 1658 |
+
}
|
| 1659 |
+
|
| 1660 |
+
// Skip non-captures
|
| 1661 |
+
if (!capture)
|
| 1662 |
+
continue;
|
| 1663 |
+
|
| 1664 |
+
// Do not search moves with bad enough SEE values
|
| 1665 |
+
if (!pos.see_ge(move, -72))
|
| 1666 |
+
continue;
|
| 1667 |
+
}
|
| 1668 |
+
|
| 1669 |
+
// Step 7. Make and search the move
|
| 1670 |
+
do_move(pos, move, st, givesCheck, ss);
|
| 1671 |
+
|
| 1672 |
+
value = -qsearch<nodeType>(pos, ss + 1, -beta, -alpha);
|
| 1673 |
+
undo_move(pos, move);
|
| 1674 |
+
|
| 1675 |
+
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
|
| 1676 |
+
|
| 1677 |
+
// Step 8. Check for a new best move
|
| 1678 |
+
if (value > bestValue)
|
| 1679 |
+
{
|
| 1680 |
+
bestValue = value;
|
| 1681 |
+
|
| 1682 |
+
if (value > alpha)
|
| 1683 |
+
{
|
| 1684 |
+
bestMove = move;
|
| 1685 |
+
|
| 1686 |
+
if (PvNode) // Update pv even in fail-high case
|
| 1687 |
+
update_pv(ss->pv, move, (ss + 1)->pv);
|
| 1688 |
+
|
| 1689 |
+
if (value < beta) // Update alpha here!
|
| 1690 |
+
alpha = value;
|
| 1691 |
+
else
|
| 1692 |
+
break; // Fail high
|
| 1693 |
+
}
|
| 1694 |
+
}
|
| 1695 |
+
}
|
| 1696 |
+
|
| 1697 |
+
// Step 9. Check for mate
|
| 1698 |
+
// All legal moves have been searched. A special case: if we are
|
| 1699 |
+
// in check and no legal moves were found, it is checkmate.
|
| 1700 |
+
if (ss->inCheck && bestValue == -VALUE_INFINITE)
|
| 1701 |
+
{
|
| 1702 |
+
assert(!MoveList<LEGAL>(pos).size());
|
| 1703 |
+
return mated_in(ss->ply); // Plies to mate from the root
|
| 1704 |
+
}
|
| 1705 |
+
|
| 1706 |
+
if (!is_decisive(bestValue) && bestValue > beta)
|
| 1707 |
+
bestValue = (bestValue + beta) / 2;
|
| 1708 |
+
|
| 1709 |
+
Color us = pos.side_to_move();
|
| 1710 |
+
if (!ss->inCheck && !moveCount && !pos.non_pawn_material(us)
|
| 1711 |
+
&& type_of(pos.captured_piece()) >= ROOK)
|
| 1712 |
+
{
|
| 1713 |
+
if (!((us == WHITE ? shift<NORTH>(pos.pieces(us, PAWN))
|
| 1714 |
+
: shift<SOUTH>(pos.pieces(us, PAWN)))
|
| 1715 |
+
& ~pos.pieces())) // no pawn pushes available
|
| 1716 |
+
{
|
| 1717 |
+
pos.state()->checkersBB = Rank1BB; // search for legal king-moves only
|
| 1718 |
+
if (!MoveList<LEGAL>(pos).size()) // stalemate
|
| 1719 |
+
bestValue = VALUE_DRAW;
|
| 1720 |
+
pos.state()->checkersBB = 0;
|
| 1721 |
+
}
|
| 1722 |
+
}
|
| 1723 |
+
|
| 1724 |
+
// Save gathered info in transposition table. The static evaluation
|
| 1725 |
+
// is saved as it was before adjustment by correction history.
|
| 1726 |
+
ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit,
|
| 1727 |
+
bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, DEPTH_QS, bestMove,
|
| 1728 |
+
unadjustedStaticEval, tt.generation());
|
| 1729 |
+
|
| 1730 |
+
assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE);
|
| 1731 |
+
|
| 1732 |
+
return bestValue;
|
| 1733 |
+
}
|
| 1734 |
+
|
| 1735 |
+
Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const {
|
| 1736 |
+
int reductionScale = reductions[d] * reductions[mn];
|
| 1737 |
+
return reductionScale - delta * 576 / rootDelta + !i * reductionScale * 217 / 512 + 1182;
|
| 1738 |
+
}
|
| 1739 |
+
|
| 1740 |
+
// elapsed() returns the time elapsed since the search started. If the
|
| 1741 |
+
// 'nodestime' option is enabled, it will return the count of nodes searched
|
| 1742 |
+
// instead. This function is called to check whether the search should be
|
| 1743 |
+
// stopped based on predefined thresholds like time limits or nodes searched.
|
| 1744 |
+
//
|
| 1745 |
+
// elapsed_time() returns the actual time elapsed since the start of the search.
|
| 1746 |
+
// This function is intended for use only when printing PV outputs, and not used
|
| 1747 |
+
// for making decisions within the search algorithm itself.
|
| 1748 |
+
TimePoint Search::Worker::elapsed() const {
|
| 1749 |
+
return main_manager()->tm.elapsed([this]() { return threads.nodes_searched(); });
|
| 1750 |
+
}
|
| 1751 |
+
|
| 1752 |
+
TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); }
|
| 1753 |
+
|
| 1754 |
+
Value Search::Worker::evaluate(const Position& pos) {
|
| 1755 |
+
return Eval::evaluate(networks[numaAccessToken], pos, accumulatorStack, refreshTable,
|
| 1756 |
+
optimism[pos.side_to_move()]);
|
| 1757 |
+
}
|
| 1758 |
+
|
| 1759 |
+
namespace {
|
| 1760 |
+
// Adjusts a mate or TB score from "plies to mate from the root" to
|
| 1761 |
+
// "plies to mate from the current position". Standard scores are unchanged.
|
| 1762 |
+
// The function is called before storing a value in the transposition table.
|
| 1763 |
+
Value value_to_tt(Value v, int ply) { return is_win(v) ? v + ply : is_loss(v) ? v - ply : v; }
|
| 1764 |
+
|
| 1765 |
+
|
| 1766 |
+
// Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition
|
| 1767 |
+
// table (which refers to the plies to mate/be mated from current position) to
|
| 1768 |
+
// "plies to mate/be mated (TB win/loss) from the root". However, to avoid
|
| 1769 |
+
// potentially false mate or TB scores related to the 50 moves rule and the
|
| 1770 |
+
// graph history interaction, we return the highest non-TB score instead.
|
| 1771 |
+
Value value_from_tt(Value v, int ply, int r50c) {
|
| 1772 |
+
|
| 1773 |
+
if (!is_valid(v))
|
| 1774 |
+
return VALUE_NONE;
|
| 1775 |
+
|
| 1776 |
+
// handle TB win or better
|
| 1777 |
+
if (is_win(v))
|
| 1778 |
+
{
|
| 1779 |
+
// Downgrade a potentially false mate score
|
| 1780 |
+
if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 100 - r50c)
|
| 1781 |
+
return VALUE_TB_WIN_IN_MAX_PLY - 1;
|
| 1782 |
+
|
| 1783 |
+
// Downgrade a potentially false TB score.
|
| 1784 |
+
if (VALUE_TB - v > 100 - r50c)
|
| 1785 |
+
return VALUE_TB_WIN_IN_MAX_PLY - 1;
|
| 1786 |
+
|
| 1787 |
+
return v - ply;
|
| 1788 |
+
}
|
| 1789 |
+
|
| 1790 |
+
// handle TB loss or worse
|
| 1791 |
+
if (is_loss(v))
|
| 1792 |
+
{
|
| 1793 |
+
// Downgrade a potentially false mate score.
|
| 1794 |
+
if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 100 - r50c)
|
| 1795 |
+
return VALUE_TB_LOSS_IN_MAX_PLY + 1;
|
| 1796 |
+
|
| 1797 |
+
// Downgrade a potentially false TB score.
|
| 1798 |
+
if (VALUE_TB + v > 100 - r50c)
|
| 1799 |
+
return VALUE_TB_LOSS_IN_MAX_PLY + 1;
|
| 1800 |
+
|
| 1801 |
+
return v + ply;
|
| 1802 |
+
}
|
| 1803 |
+
|
| 1804 |
+
return v;
|
| 1805 |
+
}
|
| 1806 |
+
|
| 1807 |
+
|
| 1808 |
+
// Adds current move and appends child pv[]
|
| 1809 |
+
void update_pv(Move* pv, Move move, const Move* childPv) {
|
| 1810 |
+
|
| 1811 |
+
for (*pv++ = move; childPv && *childPv != Move::none();)
|
| 1812 |
+
*pv++ = *childPv++;
|
| 1813 |
+
*pv = Move::none();
|
| 1814 |
+
}
|
| 1815 |
+
|
| 1816 |
+
|
| 1817 |
+
// Updates stats at the end of search() when a bestMove is found
|
| 1818 |
+
void update_all_stats(const Position& pos,
|
| 1819 |
+
Stack* ss,
|
| 1820 |
+
Search::Worker& workerThread,
|
| 1821 |
+
Move bestMove,
|
| 1822 |
+
Square prevSq,
|
| 1823 |
+
SearchedList& quietsSearched,
|
| 1824 |
+
SearchedList& capturesSearched,
|
| 1825 |
+
Depth depth,
|
| 1826 |
+
Move ttMove) {
|
| 1827 |
+
|
| 1828 |
+
CapturePieceToHistory& captureHistory = workerThread.captureHistory;
|
| 1829 |
+
Piece movedPiece = pos.moved_piece(bestMove);
|
| 1830 |
+
PieceType capturedPiece;
|
| 1831 |
+
|
| 1832 |
+
int bonus =
|
| 1833 |
+
std::min(124 * depth - 84, 1376) + 349 * (bestMove == ttMove) + (ss - 1)->statScore / 32;
|
| 1834 |
+
int malus = std::min(872 * depth - 212, 2104);
|
| 1835 |
+
|
| 1836 |
+
if (!pos.capture_stage(bestMove))
|
| 1837 |
+
{
|
| 1838 |
+
update_quiet_histories(pos, ss, workerThread, bestMove, bonus * 810 / 1024);
|
| 1839 |
+
|
| 1840 |
+
int actualMalus = malus * 1159 / 1024;
|
| 1841 |
+
// Decrease stats for all non-best quiet moves
|
| 1842 |
+
for (Move move : quietsSearched)
|
| 1843 |
+
{
|
| 1844 |
+
actualMalus = actualMalus * 963 / 1024;
|
| 1845 |
+
update_quiet_histories(pos, ss, workerThread, move, -actualMalus);
|
| 1846 |
+
}
|
| 1847 |
+
}
|
| 1848 |
+
else
|
| 1849 |
+
{
|
| 1850 |
+
// Increase stats for the best move in case it was a capture move
|
| 1851 |
+
capturedPiece = type_of(pos.piece_on(bestMove.to_sq()));
|
| 1852 |
+
captureHistory[movedPiece][bestMove.to_sq()][capturedPiece] << bonus * 1290 / 1024;
|
| 1853 |
+
}
|
| 1854 |
+
|
| 1855 |
+
// Extra penalty for a quiet early move that was not a TT move in
|
| 1856 |
+
// previous ply when it gets refuted.
|
| 1857 |
+
if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit) && !pos.captured_piece())
|
| 1858 |
+
update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -malus * 596 / 1024);
|
| 1859 |
+
|
| 1860 |
+
// Decrease stats for all non-best capture moves
|
| 1861 |
+
for (Move move : capturesSearched)
|
| 1862 |
+
{
|
| 1863 |
+
movedPiece = pos.moved_piece(move);
|
| 1864 |
+
capturedPiece = type_of(pos.piece_on(move.to_sq()));
|
| 1865 |
+
captureHistory[movedPiece][move.to_sq()][capturedPiece] << -malus * 1561 / 1024;
|
| 1866 |
+
}
|
| 1867 |
+
}
|
| 1868 |
+
|
| 1869 |
+
|
| 1870 |
+
// Updates histories of the move pairs formed by moves
|
| 1871 |
+
// at ply -1, -2, -3, -4, and -6 with current move.
|
| 1872 |
+
void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) {
|
| 1873 |
+
static constexpr std::array<ConthistBonus, 6> conthist_bonuses = {
|
| 1874 |
+
{{1, 1106}, {2, 705}, {3, 316}, {4, 572}, {5, 126}, {6, 427}}};
|
| 1875 |
+
|
| 1876 |
+
// Multipliers for positive history consistency
|
| 1877 |
+
constexpr int CMHCMultipliers[] = {87, 94, 106, 118, 114, 128, 128};
|
| 1878 |
+
int positiveCount = 0;
|
| 1879 |
+
|
| 1880 |
+
for (const auto [i, weight] : conthist_bonuses)
|
| 1881 |
+
{
|
| 1882 |
+
// Only update the first 2 continuation histories if we are in check
|
| 1883 |
+
if (ss->inCheck && i > 2)
|
| 1884 |
+
break;
|
| 1885 |
+
|
| 1886 |
+
if (((ss - i)->currentMove).is_ok())
|
| 1887 |
+
{
|
| 1888 |
+
auto& historyEntry = (*(ss - i)->continuationHistory)[pc][to];
|
| 1889 |
+
if (historyEntry > 0)
|
| 1890 |
+
positiveCount++;
|
| 1891 |
+
|
| 1892 |
+
int multiplier = CMHCMultipliers[positiveCount];
|
| 1893 |
+
historyEntry << (bonus * weight * multiplier / 131072) + 82 * (i < 2);
|
| 1894 |
+
}
|
| 1895 |
+
}
|
| 1896 |
+
}
|
| 1897 |
+
|
| 1898 |
+
// Updates move sorting heuristics
|
| 1899 |
+
|
| 1900 |
+
void update_quiet_histories(
|
| 1901 |
+
const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) {
|
| 1902 |
+
|
| 1903 |
+
Color us = pos.side_to_move();
|
| 1904 |
+
workerThread.mainHistory[us][move.raw()] << bonus; // Untuned to prevent duplicate effort
|
| 1905 |
+
|
| 1906 |
+
if (ss->ply < LOW_PLY_HISTORY_SIZE)
|
| 1907 |
+
workerThread.lowPlyHistory[ss->ply][move.raw()] << bonus * 714 / 1024;
|
| 1908 |
+
|
| 1909 |
+
update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus * 898 / 1024);
|
| 1910 |
+
|
| 1911 |
+
workerThread.sharedHistory.pawn_entry(pos)[pos.moved_piece(move)][move.to_sq()]
|
| 1912 |
+
<< bonus * (bonus > 0 ? 967 : 535) / 1024;
|
| 1913 |
+
}
|
| 1914 |
+
|
| 1915 |
+
}
|
| 1916 |
+
|
| 1917 |
+
// When playing with strength handicap, choose the best move among a set of
|
| 1918 |
+
// RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
|
| 1919 |
+
Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) {
|
| 1920 |
+
static PRNG rng(now()); // PRNG sequence should be non-deterministic
|
| 1921 |
+
|
| 1922 |
+
// RootMoves are already sorted by score in descending order
|
| 1923 |
+
Value topScore = rootMoves[0].score;
|
| 1924 |
+
int delta = std::min(topScore - rootMoves[multiPV - 1].score, int(PawnValue));
|
| 1925 |
+
int maxScore = -VALUE_INFINITE;
|
| 1926 |
+
double weakness = 120 - 2 * level;
|
| 1927 |
+
|
| 1928 |
+
// Choose best move. For each move score we add two terms, both dependent on
|
| 1929 |
+
// weakness. One is deterministic and bigger for weaker levels, and one is
|
| 1930 |
+
// random. Then we choose the move with the resulting highest score.
|
| 1931 |
+
for (size_t i = 0; i < multiPV; ++i)
|
| 1932 |
+
{
|
| 1933 |
+
// This is our magic formula
|
| 1934 |
+
int push = int(weakness * int(topScore - rootMoves[i].score)
|
| 1935 |
+
+ delta * (rng.rand<unsigned>() % int(weakness)))
|
| 1936 |
+
/ 128;
|
| 1937 |
+
|
| 1938 |
+
if (rootMoves[i].score + push >= maxScore)
|
| 1939 |
+
{
|
| 1940 |
+
maxScore = rootMoves[i].score + push;
|
| 1941 |
+
best = rootMoves[i].pv[0];
|
| 1942 |
+
}
|
| 1943 |
+
}
|
| 1944 |
+
|
| 1945 |
+
return best;
|
| 1946 |
+
}
|
| 1947 |
+
|
| 1948 |
+
// Used to print debug info and, more importantly, to detect
|
| 1949 |
+
// when we are out of available time and thus stop the search.
|
| 1950 |
+
void SearchManager::check_time(Search::Worker& worker) {
|
| 1951 |
+
if (--callsCnt > 0)
|
| 1952 |
+
return;
|
| 1953 |
+
|
| 1954 |
+
// When using nodes, ensure checking rate is not lower than 0.1% of nodes
|
| 1955 |
+
callsCnt = worker.limits.nodes ? std::min(512, int(worker.limits.nodes / 1024)) : 512;
|
| 1956 |
+
|
| 1957 |
+
static TimePoint lastInfoTime = now();
|
| 1958 |
+
|
| 1959 |
+
TimePoint elapsed = tm.elapsed([&worker]() { return worker.threads.nodes_searched(); });
|
| 1960 |
+
TimePoint tick = worker.limits.startTime + elapsed;
|
| 1961 |
+
|
| 1962 |
+
if (tick - lastInfoTime >= 1000)
|
| 1963 |
+
{
|
| 1964 |
+
lastInfoTime = tick;
|
| 1965 |
+
dbg_print();
|
| 1966 |
+
}
|
| 1967 |
+
|
| 1968 |
+
// We should not stop pondering until told so by the GUI
|
| 1969 |
+
if (ponder)
|
| 1970 |
+
return;
|
| 1971 |
+
|
| 1972 |
+
if (
|
| 1973 |
+
// Later we rely on the fact that we can at least use the mainthread previous
|
| 1974 |
+
// root-search score and PV in a multithreaded environment to prove mated-in scores.
|
| 1975 |
+
worker.completedDepth >= 1
|
| 1976 |
+
&& ((worker.limits.use_time_management() && (elapsed > tm.maximum() || stopOnPonderhit))
|
| 1977 |
+
|| (worker.limits.movetime && elapsed >= worker.limits.movetime)
|
| 1978 |
+
|| (worker.limits.nodes && worker.threads.nodes_searched() >= worker.limits.nodes)))
|
| 1979 |
+
worker.threads.stop = true;
|
| 1980 |
+
}
|
| 1981 |
+
|
| 1982 |
+
// Used to correct and extend PVs for moves that have a TB (but not a mate) score.
|
| 1983 |
+
// Keeps the search based PV for as long as it is verified to maintain the game
|
| 1984 |
+
// outcome, truncates afterwards. Finally, extends to mate the PV, providing a
|
| 1985 |
+
// possible continuation (but not a proven mating line).
|
| 1986 |
+
void syzygy_extend_pv(const OptionsMap& options,
|
| 1987 |
+
const Search::LimitsType& limits,
|
| 1988 |
+
Position& pos,
|
| 1989 |
+
RootMove& rootMove,
|
| 1990 |
+
Value& v) {
|
| 1991 |
+
|
| 1992 |
+
auto t_start = std::chrono::steady_clock::now();
|
| 1993 |
+
int moveOverhead = int(options["Move Overhead"]);
|
| 1994 |
+
bool rule50 = bool(options["Syzygy50MoveRule"]);
|
| 1995 |
+
|
| 1996 |
+
// Do not use more than moveOverhead / 2 time, if time management is active
|
| 1997 |
+
auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool {
|
| 1998 |
+
auto t_end = std::chrono::steady_clock::now();
|
| 1999 |
+
return limits.use_time_management()
|
| 2000 |
+
&& 2 * std::chrono::duration<double, std::milli>(t_end - t_start).count()
|
| 2001 |
+
> moveOverhead;
|
| 2002 |
+
};
|
| 2003 |
+
|
| 2004 |
+
std::list<StateInfo> sts;
|
| 2005 |
+
|
| 2006 |
+
// Step 0, do the rootMove, no correction allowed, as needed for MultiPV in TB.
|
| 2007 |
+
auto& stRoot = sts.emplace_back();
|
| 2008 |
+
pos.do_move(rootMove.pv[0], stRoot);
|
| 2009 |
+
int ply = 1;
|
| 2010 |
+
|
| 2011 |
+
// Step 1, walk the PV to the last position in TB with correct decisive score
|
| 2012 |
+
while (size_t(ply) < rootMove.pv.size())
|
| 2013 |
+
{
|
| 2014 |
+
Move& pvMove = rootMove.pv[ply];
|
| 2015 |
+
|
| 2016 |
+
RootMoves legalMoves;
|
| 2017 |
+
for (const auto& m : MoveList<LEGAL>(pos))
|
| 2018 |
+
legalMoves.emplace_back(m);
|
| 2019 |
+
|
| 2020 |
+
Tablebases::Config config =
|
| 2021 |
+
Tablebases::rank_root_moves(options, pos, legalMoves, false, time_abort);
|
| 2022 |
+
RootMove& rm = *std::find(legalMoves.begin(), legalMoves.end(), pvMove);
|
| 2023 |
+
|
| 2024 |
+
if (legalMoves[0].tbRank != rm.tbRank)
|
| 2025 |
+
break;
|
| 2026 |
+
|
| 2027 |
+
ply++;
|
| 2028 |
+
|
| 2029 |
+
auto& st = sts.emplace_back();
|
| 2030 |
+
pos.do_move(pvMove, st);
|
| 2031 |
+
|
| 2032 |
+
// Do not allow for repetitions or drawing moves along the PV in TB regime
|
| 2033 |
+
if (config.rootInTB && ((rule50 && pos.is_draw(ply)) || pos.is_repetition(ply)))
|
| 2034 |
+
{
|
| 2035 |
+
pos.undo_move(pvMove);
|
| 2036 |
+
ply--;
|
| 2037 |
+
break;
|
| 2038 |
+
}
|
| 2039 |
+
|
| 2040 |
+
// Full PV shown will thus be validated and end in TB.
|
| 2041 |
+
// If we cannot validate the full PV in time, we do not show it.
|
| 2042 |
+
if (config.rootInTB && time_abort())
|
| 2043 |
+
break;
|
| 2044 |
+
}
|
| 2045 |
+
|
| 2046 |
+
// Resize the PV to the correct part
|
| 2047 |
+
rootMove.pv.resize(ply);
|
| 2048 |
+
|
| 2049 |
+
// Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info
|
| 2050 |
+
// using top ranked moves (minimal DTZ), which gives optimal mates only for simple
|
| 2051 |
+
// endgames e.g. KRvK.
|
| 2052 |
+
while (!(rule50 && pos.is_draw(0)))
|
| 2053 |
+
{
|
| 2054 |
+
if (time_abort())
|
| 2055 |
+
break;
|
| 2056 |
+
|
| 2057 |
+
RootMoves legalMoves;
|
| 2058 |
+
for (const auto& m : MoveList<LEGAL>(pos))
|
| 2059 |
+
{
|
| 2060 |
+
auto& rm = legalMoves.emplace_back(m);
|
| 2061 |
+
StateInfo tmpSI;
|
| 2062 |
+
pos.do_move(m, tmpSI);
|
| 2063 |
+
// Give a score of each move to break DTZ ties restricting opponent mobility,
|
| 2064 |
+
// but not giving the opponent a capture.
|
| 2065 |
+
for (const auto& mOpp : MoveList<LEGAL>(pos))
|
| 2066 |
+
rm.tbRank -= pos.capture(mOpp) ? 100 : 1;
|
| 2067 |
+
pos.undo_move(m);
|
| 2068 |
+
}
|
| 2069 |
+
|
| 2070 |
+
// Mate found
|
| 2071 |
+
if (legalMoves.size() == 0)
|
| 2072 |
+
break;
|
| 2073 |
+
|
| 2074 |
+
// Sort moves according to their above assigned rank.
|
| 2075 |
+
// This will break ties for moves with equal DTZ in rank_root_moves.
|
| 2076 |
+
std::stable_sort(
|
| 2077 |
+
legalMoves.begin(), legalMoves.end(),
|
| 2078 |
+
[](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; });
|
| 2079 |
+
|
| 2080 |
+
// The winning side tries to minimize DTZ, the losing side maximizes it
|
| 2081 |
+
Tablebases::Config config =
|
| 2082 |
+
Tablebases::rank_root_moves(options, pos, legalMoves, true, time_abort);
|
| 2083 |
+
|
| 2084 |
+
// If DTZ is not available we might not find a mate, so we bail out
|
| 2085 |
+
if (!config.rootInTB || config.cardinality > 0)
|
| 2086 |
+
break;
|
| 2087 |
+
|
| 2088 |
+
ply++;
|
| 2089 |
+
|
| 2090 |
+
Move& pvMove = legalMoves[0].pv[0];
|
| 2091 |
+
rootMove.pv.push_back(pvMove);
|
| 2092 |
+
auto& st = sts.emplace_back();
|
| 2093 |
+
pos.do_move(pvMove, st);
|
| 2094 |
+
}
|
| 2095 |
+
|
| 2096 |
+
// Finding a draw in this function is an exceptional case, that cannot happen when rule50 is false or
|
| 2097 |
+
// during engine game play, since we have a winning score, and play correctly
|
| 2098 |
+
// with TB support. However, it can be that a position is draw due to the 50 move
|
| 2099 |
+
// rule if it has been been reached on the board with a non-optimal 50 move counter
|
| 2100 |
+
// (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding
|
| 2101 |
+
// cannot always correctly rank. See also
|
| 2102 |
+
// https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495
|
| 2103 |
+
// We adjust the score to match the found PV. Note that a TB loss score can be
|
| 2104 |
+
// displayed if the engine did not find a drawing move yet, but eventually search
|
| 2105 |
+
// will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 )
|
| 2106 |
+
if (pos.is_draw(0))
|
| 2107 |
+
v = VALUE_DRAW;
|
| 2108 |
+
|
| 2109 |
+
// Undo the PV moves
|
| 2110 |
+
for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it)
|
| 2111 |
+
pos.undo_move(*it);
|
| 2112 |
+
|
| 2113 |
+
// Inform if we couldn't get a full extension in time
|
| 2114 |
+
if (time_abort())
|
| 2115 |
+
sync_cout
|
| 2116 |
+
<< "info string Syzygy based PV extension requires more time, increase Move Overhead as needed."
|
| 2117 |
+
<< sync_endl;
|
| 2118 |
+
}
|
| 2119 |
+
|
| 2120 |
+
void SearchManager::pv(Search::Worker& worker,
|
| 2121 |
+
const ThreadPool& threads,
|
| 2122 |
+
const TranspositionTable& tt,
|
| 2123 |
+
Depth depth) {
|
| 2124 |
+
|
| 2125 |
+
const auto nodes = threads.nodes_searched();
|
| 2126 |
+
auto& rootMoves = worker.rootMoves;
|
| 2127 |
+
auto& pos = worker.rootPos;
|
| 2128 |
+
size_t pvIdx = worker.pvIdx;
|
| 2129 |
+
size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size());
|
| 2130 |
+
uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0);
|
| 2131 |
+
|
| 2132 |
+
for (size_t i = 0; i < multiPV; ++i)
|
| 2133 |
+
{
|
| 2134 |
+
bool updated = rootMoves[i].score != -VALUE_INFINITE;
|
| 2135 |
+
|
| 2136 |
+
if (depth == 1 && !updated && i > 0)
|
| 2137 |
+
continue;
|
| 2138 |
+
|
| 2139 |
+
Depth d = updated ? depth : std::max(1, depth - 1);
|
| 2140 |
+
Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore;
|
| 2141 |
+
|
| 2142 |
+
if (v == -VALUE_INFINITE)
|
| 2143 |
+
v = VALUE_ZERO;
|
| 2144 |
+
|
| 2145 |
+
bool tb = worker.tbConfig.rootInTB && std::abs(v) <= VALUE_TB;
|
| 2146 |
+
v = tb ? rootMoves[i].tbScore : v;
|
| 2147 |
+
|
| 2148 |
+
bool isExact = i != pvIdx || tb || !updated; // tablebase- and previous-scores are exact
|
| 2149 |
+
|
| 2150 |
+
// Potentially correct and extend the PV, and in exceptional cases v
|
| 2151 |
+
if (is_decisive(v) && std::abs(v) < VALUE_MATE_IN_MAX_PLY
|
| 2152 |
+
&& ((!rootMoves[i].scoreLowerbound && !rootMoves[i].scoreUpperbound) || isExact))
|
| 2153 |
+
syzygy_extend_pv(worker.options, worker.limits, pos, rootMoves[i], v);
|
| 2154 |
+
|
| 2155 |
+
std::string pv;
|
| 2156 |
+
for (Move m : rootMoves[i].pv)
|
| 2157 |
+
pv += UCIEngine::move(m, pos.is_chess960()) + " ";
|
| 2158 |
+
|
| 2159 |
+
// Remove last whitespace
|
| 2160 |
+
if (!pv.empty())
|
| 2161 |
+
pv.pop_back();
|
| 2162 |
+
|
| 2163 |
+
auto wdl = worker.options["UCI_ShowWDL"] ? UCIEngine::wdl(v, pos) : "";
|
| 2164 |
+
auto bound = rootMoves[i].scoreLowerbound
|
| 2165 |
+
? "lowerbound"
|
| 2166 |
+
: (rootMoves[i].scoreUpperbound ? "upperbound" : "");
|
| 2167 |
+
|
| 2168 |
+
InfoFull info;
|
| 2169 |
+
|
| 2170 |
+
info.depth = d;
|
| 2171 |
+
info.selDepth = rootMoves[i].selDepth;
|
| 2172 |
+
info.multiPV = i + 1;
|
| 2173 |
+
info.score = {v, pos};
|
| 2174 |
+
info.wdl = wdl;
|
| 2175 |
+
|
| 2176 |
+
if (!isExact)
|
| 2177 |
+
info.bound = bound;
|
| 2178 |
+
|
| 2179 |
+
TimePoint time = std::max(TimePoint(1), tm.elapsed_time());
|
| 2180 |
+
info.timeMs = time;
|
| 2181 |
+
info.nodes = nodes;
|
| 2182 |
+
info.nps = nodes * 1000 / time;
|
| 2183 |
+
info.tbHits = tbHits;
|
| 2184 |
+
info.pv = pv;
|
| 2185 |
+
info.hashfull = tt.hashfull();
|
| 2186 |
+
|
| 2187 |
+
updates.onUpdateFull(info);
|
| 2188 |
+
}
|
| 2189 |
+
}
|
| 2190 |
+
|
| 2191 |
+
// Called in case we have no ponder move before exiting the search,
|
| 2192 |
+
// for instance, in case we stop the search during a fail high at root.
|
| 2193 |
+
// We try hard to have a ponder move to return to the GUI,
|
| 2194 |
+
// otherwise in case of 'ponder on' we have nothing to think about.
|
| 2195 |
+
bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) {
|
| 2196 |
+
|
| 2197 |
+
StateInfo st;
|
| 2198 |
+
|
| 2199 |
+
assert(pv.size() == 1);
|
| 2200 |
+
if (pv[0] == Move::none())
|
| 2201 |
+
return false;
|
| 2202 |
+
|
| 2203 |
+
pos.do_move(pv[0], st, &tt);
|
| 2204 |
+
|
| 2205 |
+
auto [ttHit, ttData, ttWriter] = tt.probe(pos.key());
|
| 2206 |
+
if (ttHit)
|
| 2207 |
+
{
|
| 2208 |
+
if (MoveList<LEGAL>(pos).contains(ttData.move))
|
| 2209 |
+
pv.push_back(ttData.move);
|
| 2210 |
+
}
|
| 2211 |
+
|
| 2212 |
+
pos.undo_move(pv[0]);
|
| 2213 |
+
return pv.size() > 1;
|
| 2214 |
+
}
|
| 2215 |
+
|
| 2216 |
+
|
| 2217 |
+
} // namespace Stockfish
|
src/search.h
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef SEARCH_H_INCLUDED
|
| 20 |
+
#define SEARCH_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <array>
|
| 24 |
+
#include <atomic>
|
| 25 |
+
#include <cassert>
|
| 26 |
+
#include <cstddef>
|
| 27 |
+
#include <cstdint>
|
| 28 |
+
#include <functional>
|
| 29 |
+
#include <map>
|
| 30 |
+
#include <memory>
|
| 31 |
+
#include <string>
|
| 32 |
+
#include <string_view>
|
| 33 |
+
#include <vector>
|
| 34 |
+
|
| 35 |
+
#include "history.h"
|
| 36 |
+
#include "misc.h"
|
| 37 |
+
#include "nnue/network.h"
|
| 38 |
+
#include "nnue/nnue_accumulator.h"
|
| 39 |
+
#include "numa.h"
|
| 40 |
+
#include "position.h"
|
| 41 |
+
#include "score.h"
|
| 42 |
+
#include "syzygy/tbprobe.h"
|
| 43 |
+
#include "timeman.h"
|
| 44 |
+
#include "types.h"
|
| 45 |
+
|
| 46 |
+
namespace Stockfish {
|
| 47 |
+
|
| 48 |
+
// Different node types, used as a template parameter
|
| 49 |
+
enum NodeType {
|
| 50 |
+
NonPV,
|
| 51 |
+
PV,
|
| 52 |
+
Root
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
class TranspositionTable;
|
| 56 |
+
class ThreadPool;
|
| 57 |
+
class OptionsMap;
|
| 58 |
+
|
| 59 |
+
namespace Search {
|
| 60 |
+
|
| 61 |
+
// Stack struct keeps track of the information we need to remember from nodes
|
| 62 |
+
// shallower and deeper in the tree during the search. Each search thread has
|
| 63 |
+
// its own array of Stack objects, indexed by the current ply.
|
| 64 |
+
struct Stack {
|
| 65 |
+
Move* pv;
|
| 66 |
+
PieceToHistory* continuationHistory;
|
| 67 |
+
CorrectionHistory<PieceTo>* continuationCorrectionHistory;
|
| 68 |
+
int ply;
|
| 69 |
+
Move currentMove;
|
| 70 |
+
Move excludedMove;
|
| 71 |
+
Value staticEval;
|
| 72 |
+
int statScore;
|
| 73 |
+
int moveCount;
|
| 74 |
+
bool inCheck;
|
| 75 |
+
bool ttPv;
|
| 76 |
+
bool ttHit;
|
| 77 |
+
int cutoffCnt;
|
| 78 |
+
int reduction;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
// RootMove struct is used for moves at the root of the tree. For each root move
|
| 83 |
+
// we store a score and a PV (really a refutation in the case of moves which
|
| 84 |
+
// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves.
|
| 85 |
+
struct RootMove {
|
| 86 |
+
|
| 87 |
+
explicit RootMove(Move m) :
|
| 88 |
+
pv(1, m) {}
|
| 89 |
+
bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos);
|
| 90 |
+
bool operator==(const Move& m) const { return pv[0] == m; }
|
| 91 |
+
// Sort in descending order
|
| 92 |
+
bool operator<(const RootMove& m) const {
|
| 93 |
+
return m.score != score ? m.score < score : m.previousScore < previousScore;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
uint64_t effort = 0;
|
| 97 |
+
Value score = -VALUE_INFINITE;
|
| 98 |
+
Value previousScore = -VALUE_INFINITE;
|
| 99 |
+
Value averageScore = -VALUE_INFINITE;
|
| 100 |
+
Value meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE;
|
| 101 |
+
Value uciScore = -VALUE_INFINITE;
|
| 102 |
+
bool scoreLowerbound = false;
|
| 103 |
+
bool scoreUpperbound = false;
|
| 104 |
+
int selDepth = 0;
|
| 105 |
+
int tbRank = 0;
|
| 106 |
+
Value tbScore;
|
| 107 |
+
std::vector<Move> pv;
|
| 108 |
+
};
|
| 109 |
+
|
| 110 |
+
using RootMoves = std::vector<RootMove>;
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
// LimitsType struct stores information sent by the caller about the analysis required.
|
| 114 |
+
struct LimitsType {
|
| 115 |
+
|
| 116 |
+
// Init explicitly due to broken value-initialization of non POD in MSVC
|
| 117 |
+
LimitsType() {
|
| 118 |
+
time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
|
| 119 |
+
movestogo = depth = mate = perft = infinite = 0;
|
| 120 |
+
nodes = 0;
|
| 121 |
+
ponderMode = false;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
bool use_time_management() const { return time[WHITE] || time[BLACK]; }
|
| 125 |
+
|
| 126 |
+
std::vector<std::string> searchmoves;
|
| 127 |
+
TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
|
| 128 |
+
int movestogo, depth, mate, perft, infinite;
|
| 129 |
+
uint64_t nodes;
|
| 130 |
+
bool ponderMode;
|
| 131 |
+
};
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
// The UCI stores the uci options, thread pool, and transposition table.
|
| 135 |
+
// This struct is used to easily forward data to the Search::Worker class.
|
| 136 |
+
struct SharedState {
|
| 137 |
+
SharedState(const OptionsMap& optionsMap,
|
| 138 |
+
ThreadPool& threadPool,
|
| 139 |
+
TranspositionTable& transpositionTable,
|
| 140 |
+
std::map<NumaIndex, SharedHistories>& sharedHists,
|
| 141 |
+
const LazyNumaReplicatedSystemWide<Eval::NNUE::Networks>& nets) :
|
| 142 |
+
options(optionsMap),
|
| 143 |
+
threads(threadPool),
|
| 144 |
+
tt(transpositionTable),
|
| 145 |
+
sharedHistories(sharedHists),
|
| 146 |
+
networks(nets) {}
|
| 147 |
+
|
| 148 |
+
const OptionsMap& options;
|
| 149 |
+
ThreadPool& threads;
|
| 150 |
+
TranspositionTable& tt;
|
| 151 |
+
std::map<NumaIndex, SharedHistories>& sharedHistories;
|
| 152 |
+
const LazyNumaReplicatedSystemWide<Eval::NNUE::Networks>& networks;
|
| 153 |
+
};
|
| 154 |
+
|
| 155 |
+
class Worker;
|
| 156 |
+
|
| 157 |
+
// Null Object Pattern, implement a common interface for the SearchManagers.
|
| 158 |
+
// A Null Object will be given to non-mainthread workers.
|
| 159 |
+
class ISearchManager {
|
| 160 |
+
public:
|
| 161 |
+
virtual ~ISearchManager() {}
|
| 162 |
+
virtual void check_time(Search::Worker&) = 0;
|
| 163 |
+
};
|
| 164 |
+
|
| 165 |
+
struct InfoShort {
|
| 166 |
+
int depth;
|
| 167 |
+
Score score;
|
| 168 |
+
};
|
| 169 |
+
|
| 170 |
+
struct InfoFull: InfoShort {
|
| 171 |
+
int selDepth;
|
| 172 |
+
size_t multiPV;
|
| 173 |
+
std::string_view wdl;
|
| 174 |
+
std::string_view bound;
|
| 175 |
+
size_t timeMs;
|
| 176 |
+
size_t nodes;
|
| 177 |
+
size_t nps;
|
| 178 |
+
size_t tbHits;
|
| 179 |
+
std::string_view pv;
|
| 180 |
+
int hashfull;
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
struct InfoIteration {
|
| 184 |
+
int depth;
|
| 185 |
+
std::string_view currmove;
|
| 186 |
+
size_t currmovenumber;
|
| 187 |
+
};
|
| 188 |
+
|
| 189 |
+
// Skill structure is used to implement strength limit. If we have a UCI_Elo,
|
| 190 |
+
// we convert it to an appropriate skill level, anchored to the Stash engine.
|
| 191 |
+
// This method is based on a fit of the Elo results for games played between
|
| 192 |
+
// Stockfish at various skill levels and various versions of the Stash engine.
|
| 193 |
+
// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately
|
| 194 |
+
// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2
|
| 195 |
+
struct Skill {
|
| 196 |
+
// Lowest and highest Elo ratings used in the skill level calculation
|
| 197 |
+
constexpr static int LowestElo = 1320;
|
| 198 |
+
constexpr static int HighestElo = 3190;
|
| 199 |
+
|
| 200 |
+
Skill(int skill_level, int uci_elo) {
|
| 201 |
+
if (uci_elo)
|
| 202 |
+
{
|
| 203 |
+
double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo);
|
| 204 |
+
level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
|
| 205 |
+
}
|
| 206 |
+
else
|
| 207 |
+
level = double(skill_level);
|
| 208 |
+
}
|
| 209 |
+
bool enabled() const { return level < 20.0; }
|
| 210 |
+
bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
|
| 211 |
+
Move pick_best(const RootMoves&, size_t multiPV);
|
| 212 |
+
|
| 213 |
+
double level;
|
| 214 |
+
Move best = Move::none();
|
| 215 |
+
};
|
| 216 |
+
|
| 217 |
+
// SearchManager manages the search from the main thread. It is responsible for
|
| 218 |
+
// keeping track of the time, and storing data strictly related to the main thread.
|
| 219 |
+
class SearchManager: public ISearchManager {
|
| 220 |
+
public:
|
| 221 |
+
using UpdateShort = std::function<void(const InfoShort&)>;
|
| 222 |
+
using UpdateFull = std::function<void(const InfoFull&)>;
|
| 223 |
+
using UpdateIter = std::function<void(const InfoIteration&)>;
|
| 224 |
+
using UpdateBestmove = std::function<void(std::string_view, std::string_view)>;
|
| 225 |
+
|
| 226 |
+
struct UpdateContext {
|
| 227 |
+
UpdateShort onUpdateNoMoves;
|
| 228 |
+
UpdateFull onUpdateFull;
|
| 229 |
+
UpdateIter onIter;
|
| 230 |
+
UpdateBestmove onBestmove;
|
| 231 |
+
};
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
SearchManager(const UpdateContext& updateContext) :
|
| 235 |
+
updates(updateContext) {}
|
| 236 |
+
|
| 237 |
+
void check_time(Search::Worker& worker) override;
|
| 238 |
+
|
| 239 |
+
void pv(Search::Worker& worker,
|
| 240 |
+
const ThreadPool& threads,
|
| 241 |
+
const TranspositionTable& tt,
|
| 242 |
+
Depth depth);
|
| 243 |
+
|
| 244 |
+
Stockfish::TimeManagement tm;
|
| 245 |
+
double originalTimeAdjust;
|
| 246 |
+
int callsCnt;
|
| 247 |
+
std::atomic_bool ponder;
|
| 248 |
+
|
| 249 |
+
std::array<Value, 4> iterValue;
|
| 250 |
+
double previousTimeReduction;
|
| 251 |
+
Value bestPreviousScore;
|
| 252 |
+
Value bestPreviousAverageScore;
|
| 253 |
+
bool stopOnPonderhit;
|
| 254 |
+
|
| 255 |
+
size_t id;
|
| 256 |
+
|
| 257 |
+
const UpdateContext& updates;
|
| 258 |
+
};
|
| 259 |
+
|
| 260 |
+
class NullSearchManager: public ISearchManager {
|
| 261 |
+
public:
|
| 262 |
+
void check_time(Search::Worker&) override {}
|
| 263 |
+
};
|
| 264 |
+
|
| 265 |
+
// Search::Worker is the class that does the actual search.
|
| 266 |
+
// It is instantiated once per thread, and it is responsible for keeping track
|
| 267 |
+
// of the search history, and storing data required for the search.
|
| 268 |
+
class Worker {
|
| 269 |
+
public:
|
| 270 |
+
Worker(SharedState&,
|
| 271 |
+
std::unique_ptr<ISearchManager>,
|
| 272 |
+
size_t,
|
| 273 |
+
size_t,
|
| 274 |
+
size_t,
|
| 275 |
+
NumaReplicatedAccessToken);
|
| 276 |
+
|
| 277 |
+
// Called at instantiation to initialize reductions tables.
|
| 278 |
+
// Reset histories, usually before a new game.
|
| 279 |
+
void clear();
|
| 280 |
+
|
| 281 |
+
// Called when the program receives the UCI 'go' command.
|
| 282 |
+
// It searches from the root position and outputs the "bestmove".
|
| 283 |
+
void start_searching();
|
| 284 |
+
|
| 285 |
+
bool is_mainthread() const { return threadIdx == 0; }
|
| 286 |
+
|
| 287 |
+
void ensure_network_replicated();
|
| 288 |
+
|
| 289 |
+
// Public because they need to be updatable by the stats
|
| 290 |
+
ButterflyHistory mainHistory;
|
| 291 |
+
LowPlyHistory lowPlyHistory;
|
| 292 |
+
|
| 293 |
+
CapturePieceToHistory captureHistory;
|
| 294 |
+
ContinuationHistory continuationHistory[2][2];
|
| 295 |
+
CorrectionHistory<Continuation> continuationCorrectionHistory;
|
| 296 |
+
|
| 297 |
+
TTMoveHistory ttMoveHistory;
|
| 298 |
+
SharedHistories& sharedHistory;
|
| 299 |
+
|
| 300 |
+
private:
|
| 301 |
+
void iterative_deepening();
|
| 302 |
+
|
| 303 |
+
void do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss);
|
| 304 |
+
void
|
| 305 |
+
do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss);
|
| 306 |
+
void do_null_move(Position& pos, StateInfo& st, Stack* const ss);
|
| 307 |
+
void undo_move(Position& pos, const Move move);
|
| 308 |
+
void undo_null_move(Position& pos);
|
| 309 |
+
|
| 310 |
+
// This is the main search function, for both PV and non-PV nodes
|
| 311 |
+
template<NodeType nodeType>
|
| 312 |
+
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
|
| 313 |
+
|
| 314 |
+
// Quiescence search function, which is called by the main search
|
| 315 |
+
template<NodeType nodeType>
|
| 316 |
+
Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta);
|
| 317 |
+
|
| 318 |
+
Depth reduction(bool i, Depth d, int mn, int delta) const;
|
| 319 |
+
|
| 320 |
+
// Pointer to the search manager, only allowed to be called by the main thread
|
| 321 |
+
SearchManager* main_manager() const {
|
| 322 |
+
assert(threadIdx == 0);
|
| 323 |
+
return static_cast<SearchManager*>(manager.get());
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
TimePoint elapsed() const;
|
| 327 |
+
TimePoint elapsed_time() const;
|
| 328 |
+
|
| 329 |
+
Value evaluate(const Position&);
|
| 330 |
+
|
| 331 |
+
LimitsType limits;
|
| 332 |
+
|
| 333 |
+
size_t pvIdx, pvLast;
|
| 334 |
+
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
|
| 335 |
+
int selDepth, nmpMinPly;
|
| 336 |
+
|
| 337 |
+
Value optimism[COLOR_NB];
|
| 338 |
+
|
| 339 |
+
Position rootPos;
|
| 340 |
+
StateInfo rootState;
|
| 341 |
+
RootMoves rootMoves;
|
| 342 |
+
Depth rootDepth, completedDepth;
|
| 343 |
+
Value rootDelta;
|
| 344 |
+
|
| 345 |
+
size_t threadIdx, numaThreadIdx, numaTotal;
|
| 346 |
+
NumaReplicatedAccessToken numaAccessToken;
|
| 347 |
+
|
| 348 |
+
// Reductions lookup table initialized at startup
|
| 349 |
+
std::array<int, MAX_MOVES> reductions; // [depth or moveNumber]
|
| 350 |
+
|
| 351 |
+
// The main thread has a SearchManager, the others have a NullSearchManager
|
| 352 |
+
std::unique_ptr<ISearchManager> manager;
|
| 353 |
+
|
| 354 |
+
Tablebases::Config tbConfig;
|
| 355 |
+
|
| 356 |
+
const OptionsMap& options;
|
| 357 |
+
ThreadPool& threads;
|
| 358 |
+
TranspositionTable& tt;
|
| 359 |
+
const LazyNumaReplicatedSystemWide<Eval::NNUE::Networks>& networks;
|
| 360 |
+
|
| 361 |
+
// Used by NNUE
|
| 362 |
+
Eval::NNUE::AccumulatorStack accumulatorStack;
|
| 363 |
+
Eval::NNUE::AccumulatorCaches refreshTable;
|
| 364 |
+
|
| 365 |
+
friend class Stockfish::ThreadPool;
|
| 366 |
+
friend class SearchManager;
|
| 367 |
+
};
|
| 368 |
+
|
| 369 |
+
struct ConthistBonus {
|
| 370 |
+
int index;
|
| 371 |
+
int weight;
|
| 372 |
+
};
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
} // namespace Search
|
| 376 |
+
|
| 377 |
+
} // namespace Stockfish
|
| 378 |
+
|
| 379 |
+
#endif // #ifndef SEARCH_H_INCLUDED
|
src/shm.h
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef SHM_H_INCLUDED
|
| 20 |
+
#define SHM_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <cinttypes>
|
| 24 |
+
#include <cstddef>
|
| 25 |
+
#include <cstdint>
|
| 26 |
+
#include <cstring>
|
| 27 |
+
#include <functional>
|
| 28 |
+
#include <iomanip>
|
| 29 |
+
#include <iostream>
|
| 30 |
+
#include <memory>
|
| 31 |
+
#include <new>
|
| 32 |
+
#include <optional>
|
| 33 |
+
#include <sstream>
|
| 34 |
+
#include <string>
|
| 35 |
+
#include <type_traits>
|
| 36 |
+
#include <utility>
|
| 37 |
+
#include <variant>
|
| 38 |
+
|
| 39 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 40 |
+
#include "shm_linux.h"
|
| 41 |
+
#endif
|
| 42 |
+
|
| 43 |
+
#if defined(__ANDROID__)
|
| 44 |
+
#include <limits.h>
|
| 45 |
+
#define SF_MAX_SEM_NAME_LEN NAME_MAX
|
| 46 |
+
#endif
|
| 47 |
+
|
| 48 |
+
#include "types.h"
|
| 49 |
+
|
| 50 |
+
#include "memory.h"
|
| 51 |
+
|
| 52 |
+
#if defined(_WIN32)
|
| 53 |
+
|
| 54 |
+
#if _WIN32_WINNT < 0x0601
|
| 55 |
+
#undef _WIN32_WINNT
|
| 56 |
+
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
| 57 |
+
#endif
|
| 58 |
+
|
| 59 |
+
#if !defined(NOMINMAX)
|
| 60 |
+
#define NOMINMAX
|
| 61 |
+
#endif
|
| 62 |
+
#include <windows.h>
|
| 63 |
+
#elif defined(__linux__)
|
| 64 |
+
#include <cstring>
|
| 65 |
+
#include <fcntl.h>
|
| 66 |
+
#include <pthread.h>
|
| 67 |
+
#include <semaphore.h>
|
| 68 |
+
#include <sys/mman.h>
|
| 69 |
+
#include <sys/stat.h>
|
| 70 |
+
#include <unistd.h>
|
| 71 |
+
#endif
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
#if defined(__APPLE__)
|
| 75 |
+
#include <mach-o/dyld.h>
|
| 76 |
+
#include <sys/syslimits.h>
|
| 77 |
+
|
| 78 |
+
#elif defined(__sun)
|
| 79 |
+
#include <stdlib.h>
|
| 80 |
+
|
| 81 |
+
#elif defined(__FreeBSD__)
|
| 82 |
+
#include <sys/sysctl.h>
|
| 83 |
+
#include <sys/types.h>
|
| 84 |
+
#include <unistd.h>
|
| 85 |
+
|
| 86 |
+
#elif defined(__NetBSD__) || defined(__DragonFly__) || defined(__linux__)
|
| 87 |
+
#include <limits.h>
|
| 88 |
+
#include <unistd.h>
|
| 89 |
+
#endif
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
namespace Stockfish {
|
| 93 |
+
|
| 94 |
+
// argv[0] CANNOT be used because we need to identify the executable.
|
| 95 |
+
// argv[0] contains the command used to invoke it, which does not involve the full path.
|
| 96 |
+
// Just using a path is not fully resilient either, as the executable could
|
| 97 |
+
// have changed if it wasn't locked by the OS. Ideally we would hash the executable
|
| 98 |
+
// but it's not really that important at this point.
|
| 99 |
+
// If the path is longer than 4095 bytes the hash will be computed from an unspecified
|
| 100 |
+
// amount of bytes of the path; in particular it can a hash of an empty string.
|
| 101 |
+
|
| 102 |
+
inline std::string getExecutablePathHash() {
|
| 103 |
+
char executable_path[4096] = {0};
|
| 104 |
+
std::size_t path_length = 0;
|
| 105 |
+
|
| 106 |
+
#if defined(_WIN32)
|
| 107 |
+
path_length = GetModuleFileNameA(NULL, executable_path, sizeof(executable_path));
|
| 108 |
+
|
| 109 |
+
#elif defined(__APPLE__)
|
| 110 |
+
uint32_t size = sizeof(executable_path);
|
| 111 |
+
if (_NSGetExecutablePath(executable_path, &size) == 0)
|
| 112 |
+
{
|
| 113 |
+
path_length = std::strlen(executable_path);
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
#elif defined(__sun) // Solaris
|
| 117 |
+
const char* path = getexecname();
|
| 118 |
+
if (path)
|
| 119 |
+
{
|
| 120 |
+
std::strncpy(executable_path, path, sizeof(executable_path) - 1);
|
| 121 |
+
path_length = std::strlen(executable_path);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
#elif defined(__FreeBSD__)
|
| 125 |
+
size_t size = sizeof(executable_path);
|
| 126 |
+
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
|
| 127 |
+
if (sysctl(mib, 4, executable_path, &size, NULL, 0) == 0)
|
| 128 |
+
{
|
| 129 |
+
path_length = std::strlen(executable_path);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
#elif defined(__NetBSD__) || defined(__DragonFly__)
|
| 133 |
+
ssize_t len = readlink("/proc/curproc/exe", executable_path, sizeof(executable_path) - 1);
|
| 134 |
+
if (len >= 0)
|
| 135 |
+
{
|
| 136 |
+
executable_path[len] = '\0';
|
| 137 |
+
path_length = len;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
#elif defined(__linux__)
|
| 141 |
+
ssize_t len = readlink("/proc/self/exe", executable_path, sizeof(executable_path) - 1);
|
| 142 |
+
if (len >= 0)
|
| 143 |
+
{
|
| 144 |
+
executable_path[len] = '\0';
|
| 145 |
+
path_length = len;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
#endif
|
| 149 |
+
|
| 150 |
+
// In case of any error the path will be empty.
|
| 151 |
+
return std::string(executable_path, path_length);
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
enum class SystemWideSharedConstantAllocationStatus {
|
| 155 |
+
NoAllocation,
|
| 156 |
+
LocalMemory,
|
| 157 |
+
SharedMemory
|
| 158 |
+
};
|
| 159 |
+
|
| 160 |
+
#if defined(_WIN32)
|
| 161 |
+
|
| 162 |
+
inline std::string GetLastErrorAsString(DWORD error) {
|
| 163 |
+
//Get the error message ID, if any.
|
| 164 |
+
DWORD errorMessageID = error;
|
| 165 |
+
if (errorMessageID == 0)
|
| 166 |
+
{
|
| 167 |
+
return std::string(); //No error message has been recorded
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
LPSTR messageBuffer = nullptr;
|
| 171 |
+
|
| 172 |
+
//Ask Win32 to give us the string version of that message ID.
|
| 173 |
+
//The parameters we pass in, tell Win32 to create the buffer that holds the message for us (because we don't yet know how long the message string will be).
|
| 174 |
+
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM
|
| 175 |
+
| FORMAT_MESSAGE_IGNORE_INSERTS,
|
| 176 |
+
NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
| 177 |
+
(LPSTR) &messageBuffer, 0, NULL);
|
| 178 |
+
|
| 179 |
+
//Copy the error message into a std::string.
|
| 180 |
+
std::string message(messageBuffer, size);
|
| 181 |
+
|
| 182 |
+
//Free the Win32's string's buffer.
|
| 183 |
+
LocalFree(messageBuffer);
|
| 184 |
+
|
| 185 |
+
return message;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
// Utilizes shared memory to store the value. It is deduplicated system-wide (for the single user).
|
| 189 |
+
template<typename T>
|
| 190 |
+
class SharedMemoryBackend {
|
| 191 |
+
public:
|
| 192 |
+
enum class Status {
|
| 193 |
+
Success,
|
| 194 |
+
LargePageAllocationError,
|
| 195 |
+
FileMappingError,
|
| 196 |
+
MapViewError,
|
| 197 |
+
MutexCreateError,
|
| 198 |
+
MutexWaitError,
|
| 199 |
+
MutexReleaseError,
|
| 200 |
+
NotInitialized
|
| 201 |
+
};
|
| 202 |
+
|
| 203 |
+
static constexpr DWORD IS_INITIALIZED_VALUE = 1;
|
| 204 |
+
|
| 205 |
+
SharedMemoryBackend() :
|
| 206 |
+
status(Status::NotInitialized) {};
|
| 207 |
+
|
| 208 |
+
SharedMemoryBackend(const std::string& shm_name, const T& value) :
|
| 209 |
+
status(Status::NotInitialized) {
|
| 210 |
+
|
| 211 |
+
initialize(shm_name, value);
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
bool is_valid() const { return status == Status::Success; }
|
| 215 |
+
|
| 216 |
+
std::optional<std::string> get_error_message() const {
|
| 217 |
+
switch (status)
|
| 218 |
+
{
|
| 219 |
+
case Status::Success :
|
| 220 |
+
return std::nullopt;
|
| 221 |
+
case Status::LargePageAllocationError :
|
| 222 |
+
return "Failed to allocate large page memory";
|
| 223 |
+
case Status::FileMappingError :
|
| 224 |
+
return "Failed to create file mapping: " + last_error_message;
|
| 225 |
+
case Status::MapViewError :
|
| 226 |
+
return "Failed to map view: " + last_error_message;
|
| 227 |
+
case Status::MutexCreateError :
|
| 228 |
+
return "Failed to create mutex: " + last_error_message;
|
| 229 |
+
case Status::MutexWaitError :
|
| 230 |
+
return "Failed to wait on mutex: " + last_error_message;
|
| 231 |
+
case Status::MutexReleaseError :
|
| 232 |
+
return "Failed to release mutex: " + last_error_message;
|
| 233 |
+
case Status::NotInitialized :
|
| 234 |
+
return "Not initialized";
|
| 235 |
+
default :
|
| 236 |
+
return "Unknown error";
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
void* get() const { return is_valid() ? pMap : nullptr; }
|
| 241 |
+
|
| 242 |
+
~SharedMemoryBackend() { cleanup(); }
|
| 243 |
+
|
| 244 |
+
SharedMemoryBackend(const SharedMemoryBackend&) = delete;
|
| 245 |
+
SharedMemoryBackend& operator=(const SharedMemoryBackend&) = delete;
|
| 246 |
+
|
| 247 |
+
SharedMemoryBackend(SharedMemoryBackend&& other) noexcept :
|
| 248 |
+
pMap(other.pMap),
|
| 249 |
+
hMapFile(other.hMapFile),
|
| 250 |
+
status(other.status),
|
| 251 |
+
last_error_message(std::move(other.last_error_message)) {
|
| 252 |
+
|
| 253 |
+
other.pMap = nullptr;
|
| 254 |
+
other.hMapFile = 0;
|
| 255 |
+
other.status = Status::NotInitialized;
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
SharedMemoryBackend& operator=(SharedMemoryBackend&& other) noexcept {
|
| 259 |
+
if (this != &other)
|
| 260 |
+
{
|
| 261 |
+
cleanup();
|
| 262 |
+
pMap = other.pMap;
|
| 263 |
+
hMapFile = other.hMapFile;
|
| 264 |
+
status = other.status;
|
| 265 |
+
last_error_message = std::move(other.last_error_message);
|
| 266 |
+
|
| 267 |
+
other.pMap = nullptr;
|
| 268 |
+
other.hMapFile = 0;
|
| 269 |
+
other.status = Status::NotInitialized;
|
| 270 |
+
}
|
| 271 |
+
return *this;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
SystemWideSharedConstantAllocationStatus get_status() const {
|
| 275 |
+
return status == Status::Success ? SystemWideSharedConstantAllocationStatus::SharedMemory
|
| 276 |
+
: SystemWideSharedConstantAllocationStatus::NoAllocation;
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
private:
|
| 280 |
+
void initialize(const std::string& shm_name, const T& value) {
|
| 281 |
+
const size_t total_size = sizeof(T) + sizeof(IS_INITIALIZED_VALUE);
|
| 282 |
+
|
| 283 |
+
// Try allocating with large pages first.
|
| 284 |
+
hMapFile = windows_try_with_large_page_priviliges(
|
| 285 |
+
[&](size_t largePageSize) {
|
| 286 |
+
const size_t total_size_aligned =
|
| 287 |
+
(total_size + largePageSize - 1) / largePageSize * largePageSize;
|
| 288 |
+
|
| 289 |
+
#if defined(_WIN64)
|
| 290 |
+
DWORD total_size_low = total_size_aligned & 0xFFFFFFFFu;
|
| 291 |
+
DWORD total_size_high = total_size_aligned >> 32u;
|
| 292 |
+
#else
|
| 293 |
+
DWORD total_size_low = total_size_aligned;
|
| 294 |
+
DWORD total_size_high = 0;
|
| 295 |
+
#endif
|
| 296 |
+
|
| 297 |
+
return CreateFileMappingA(INVALID_HANDLE_VALUE, NULL,
|
| 298 |
+
PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES,
|
| 299 |
+
total_size_high, total_size_low, shm_name.c_str());
|
| 300 |
+
},
|
| 301 |
+
[]() { return (void*) nullptr; });
|
| 302 |
+
|
| 303 |
+
// Fallback to normal allocation if no large pages available.
|
| 304 |
+
if (!hMapFile)
|
| 305 |
+
{
|
| 306 |
+
hMapFile = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
|
| 307 |
+
static_cast<DWORD>(total_size), shm_name.c_str());
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
if (!hMapFile)
|
| 311 |
+
{
|
| 312 |
+
const DWORD err = GetLastError();
|
| 313 |
+
last_error_message = GetLastErrorAsString(err);
|
| 314 |
+
status = Status::FileMappingError;
|
| 315 |
+
return;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
pMap = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, total_size);
|
| 319 |
+
if (!pMap)
|
| 320 |
+
{
|
| 321 |
+
const DWORD err = GetLastError();
|
| 322 |
+
last_error_message = GetLastErrorAsString(err);
|
| 323 |
+
status = Status::MapViewError;
|
| 324 |
+
cleanup_partial();
|
| 325 |
+
return;
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
// Use named mutex to ensure only one initializer
|
| 329 |
+
std::string mutex_name = shm_name + "$mutex";
|
| 330 |
+
HANDLE hMutex = CreateMutexA(NULL, FALSE, mutex_name.c_str());
|
| 331 |
+
if (!hMutex)
|
| 332 |
+
{
|
| 333 |
+
const DWORD err = GetLastError();
|
| 334 |
+
last_error_message = GetLastErrorAsString(err);
|
| 335 |
+
status = Status::MutexCreateError;
|
| 336 |
+
cleanup_partial();
|
| 337 |
+
return;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
DWORD wait_result = WaitForSingleObject(hMutex, INFINITE);
|
| 341 |
+
if (wait_result != WAIT_OBJECT_0)
|
| 342 |
+
{
|
| 343 |
+
const DWORD err = GetLastError();
|
| 344 |
+
last_error_message = GetLastErrorAsString(err);
|
| 345 |
+
status = Status::MutexWaitError;
|
| 346 |
+
CloseHandle(hMutex);
|
| 347 |
+
cleanup_partial();
|
| 348 |
+
return;
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
// Crucially, we place the object first to ensure alignment.
|
| 352 |
+
volatile DWORD* is_initialized =
|
| 353 |
+
std::launder(reinterpret_cast<DWORD*>(reinterpret_cast<char*>(pMap) + sizeof(T)));
|
| 354 |
+
T* object = std::launder(reinterpret_cast<T*>(pMap));
|
| 355 |
+
|
| 356 |
+
if (*is_initialized != IS_INITIALIZED_VALUE)
|
| 357 |
+
{
|
| 358 |
+
// First time initialization, message for debug purposes
|
| 359 |
+
new (object) T{value};
|
| 360 |
+
*is_initialized = IS_INITIALIZED_VALUE;
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
BOOL release_result = ReleaseMutex(hMutex);
|
| 364 |
+
CloseHandle(hMutex);
|
| 365 |
+
|
| 366 |
+
if (!release_result)
|
| 367 |
+
{
|
| 368 |
+
const DWORD err = GetLastError();
|
| 369 |
+
last_error_message = GetLastErrorAsString(err);
|
| 370 |
+
status = Status::MutexReleaseError;
|
| 371 |
+
cleanup_partial();
|
| 372 |
+
return;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
status = Status::Success;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
void cleanup_partial() {
|
| 379 |
+
if (pMap != nullptr)
|
| 380 |
+
{
|
| 381 |
+
UnmapViewOfFile(pMap);
|
| 382 |
+
pMap = nullptr;
|
| 383 |
+
}
|
| 384 |
+
if (hMapFile)
|
| 385 |
+
{
|
| 386 |
+
CloseHandle(hMapFile);
|
| 387 |
+
hMapFile = 0;
|
| 388 |
+
}
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
void cleanup() {
|
| 392 |
+
if (pMap != nullptr)
|
| 393 |
+
{
|
| 394 |
+
UnmapViewOfFile(pMap);
|
| 395 |
+
pMap = nullptr;
|
| 396 |
+
}
|
| 397 |
+
if (hMapFile)
|
| 398 |
+
{
|
| 399 |
+
CloseHandle(hMapFile);
|
| 400 |
+
hMapFile = 0;
|
| 401 |
+
}
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
void* pMap = nullptr;
|
| 405 |
+
HANDLE hMapFile = 0;
|
| 406 |
+
Status status = Status::NotInitialized;
|
| 407 |
+
std::string last_error_message;
|
| 408 |
+
};
|
| 409 |
+
|
| 410 |
+
#elif defined(__linux__) && !defined(__ANDROID__)
|
| 411 |
+
|
| 412 |
+
template<typename T>
|
| 413 |
+
class SharedMemoryBackend {
|
| 414 |
+
public:
|
| 415 |
+
SharedMemoryBackend() = default;
|
| 416 |
+
|
| 417 |
+
SharedMemoryBackend(const std::string& shm_name, const T& value) :
|
| 418 |
+
shm1(shm::create_shared<T>(shm_name, value)) {}
|
| 419 |
+
|
| 420 |
+
void* get() const {
|
| 421 |
+
const T* ptr = &shm1->get();
|
| 422 |
+
return reinterpret_cast<void*>(const_cast<T*>(ptr));
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
bool is_valid() const { return shm1 && shm1->is_open() && shm1->is_initialized(); }
|
| 426 |
+
|
| 427 |
+
SystemWideSharedConstantAllocationStatus get_status() const {
|
| 428 |
+
return is_valid() ? SystemWideSharedConstantAllocationStatus::SharedMemory
|
| 429 |
+
: SystemWideSharedConstantAllocationStatus::NoAllocation;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
std::optional<std::string> get_error_message() const {
|
| 433 |
+
if (!shm1)
|
| 434 |
+
return "Shared memory not initialized";
|
| 435 |
+
|
| 436 |
+
if (!shm1->is_open())
|
| 437 |
+
return "Shared memory is not open";
|
| 438 |
+
|
| 439 |
+
if (!shm1->is_initialized())
|
| 440 |
+
return "Not initialized";
|
| 441 |
+
|
| 442 |
+
return std::nullopt;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
private:
|
| 446 |
+
std::optional<shm::SharedMemory<T>> shm1;
|
| 447 |
+
};
|
| 448 |
+
|
| 449 |
+
#else
|
| 450 |
+
|
| 451 |
+
// For systems that don't have shared memory, or support is troublesome.
|
| 452 |
+
// The way fallback is done is that we need a dummy backend.
|
| 453 |
+
|
| 454 |
+
template<typename T>
|
| 455 |
+
class SharedMemoryBackend {
|
| 456 |
+
public:
|
| 457 |
+
SharedMemoryBackend() = default;
|
| 458 |
+
|
| 459 |
+
SharedMemoryBackend([[maybe_unused]] const std::string& shm_name,
|
| 460 |
+
[[maybe_unused]] const T& value) {}
|
| 461 |
+
|
| 462 |
+
void* get() const { return nullptr; }
|
| 463 |
+
|
| 464 |
+
bool is_valid() const { return false; }
|
| 465 |
+
|
| 466 |
+
SystemWideSharedConstantAllocationStatus get_status() const {
|
| 467 |
+
return SystemWideSharedConstantAllocationStatus::NoAllocation;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
std::optional<std::string> get_error_message() const { return "Dummy SharedMemoryBackend"; }
|
| 471 |
+
};
|
| 472 |
+
|
| 473 |
+
#endif
|
| 474 |
+
|
| 475 |
+
template<typename T>
|
| 476 |
+
struct SharedMemoryBackendFallback {
|
| 477 |
+
SharedMemoryBackendFallback() = default;
|
| 478 |
+
|
| 479 |
+
SharedMemoryBackendFallback(const std::string&, const T& value) :
|
| 480 |
+
fallback_object(make_unique_large_page<T>(value)) {}
|
| 481 |
+
|
| 482 |
+
void* get() const { return fallback_object.get(); }
|
| 483 |
+
|
| 484 |
+
SharedMemoryBackendFallback(const SharedMemoryBackendFallback&) = delete;
|
| 485 |
+
SharedMemoryBackendFallback& operator=(const SharedMemoryBackendFallback&) = delete;
|
| 486 |
+
|
| 487 |
+
SharedMemoryBackendFallback(SharedMemoryBackendFallback&& other) noexcept :
|
| 488 |
+
fallback_object(std::move(other.fallback_object)) {}
|
| 489 |
+
|
| 490 |
+
SharedMemoryBackendFallback& operator=(SharedMemoryBackendFallback&& other) noexcept {
|
| 491 |
+
fallback_object = std::move(other.fallback_object);
|
| 492 |
+
return *this;
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
SystemWideSharedConstantAllocationStatus get_status() const {
|
| 496 |
+
return fallback_object == nullptr ? SystemWideSharedConstantAllocationStatus::NoAllocation
|
| 497 |
+
: SystemWideSharedConstantAllocationStatus::LocalMemory;
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
std::optional<std::string> get_error_message() const {
|
| 501 |
+
if (fallback_object == nullptr)
|
| 502 |
+
return "Not initialized";
|
| 503 |
+
|
| 504 |
+
return "Shared memory not supported by the OS. Local allocation fallback.";
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
private:
|
| 508 |
+
LargePagePtr<T> fallback_object;
|
| 509 |
+
};
|
| 510 |
+
|
| 511 |
+
// Platform-independent wrapper
|
| 512 |
+
template<typename T>
|
| 513 |
+
struct SystemWideSharedConstant {
|
| 514 |
+
private:
|
| 515 |
+
static std::string createHashString(const std::string& input) {
|
| 516 |
+
char buf[1024];
|
| 517 |
+
std::snprintf(buf, sizeof(buf), "%016" PRIx64, hash_string(input));
|
| 518 |
+
return buf;
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
public:
|
| 522 |
+
// We can't run the destructor because it may be in a completely different process.
|
| 523 |
+
// The object stored must also be obviously in-line but we can't check for that, other than some basic checks that cover most cases.
|
| 524 |
+
static_assert(std::is_trivially_destructible_v<T>);
|
| 525 |
+
static_assert(std::is_trivially_move_constructible_v<T>);
|
| 526 |
+
static_assert(std::is_trivially_copy_constructible_v<T>);
|
| 527 |
+
|
| 528 |
+
SystemWideSharedConstant() = default;
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
// Content is addressed by its hash. An additional discriminator can be added to account for differences
|
| 532 |
+
// that are not present in the content, for example NUMA node allocation.
|
| 533 |
+
SystemWideSharedConstant(const T& value, std::size_t discriminator = 0) {
|
| 534 |
+
std::size_t content_hash = std::hash<T>{}(value);
|
| 535 |
+
std::size_t executable_hash = hash_string(getExecutablePathHash());
|
| 536 |
+
|
| 537 |
+
char buf[1024];
|
| 538 |
+
std::snprintf(buf, sizeof(buf), "Local\\sf_%zu$%zu$%zu", content_hash, executable_hash,
|
| 539 |
+
discriminator);
|
| 540 |
+
std::string shm_name = buf;
|
| 541 |
+
|
| 542 |
+
#if defined(__linux__) && !defined(__ANDROID__)
|
| 543 |
+
// POSIX shared memory names must start with a slash
|
| 544 |
+
shm_name = "/sf_" + createHashString(shm_name);
|
| 545 |
+
|
| 546 |
+
// hash name and make sure it is not longer than SF_MAX_SEM_NAME_LEN
|
| 547 |
+
if (shm_name.size() > SF_MAX_SEM_NAME_LEN)
|
| 548 |
+
{
|
| 549 |
+
shm_name = shm_name.substr(0, SF_MAX_SEM_NAME_LEN - 1);
|
| 550 |
+
}
|
| 551 |
+
#endif
|
| 552 |
+
|
| 553 |
+
SharedMemoryBackend<T> shm_backend(shm_name, value);
|
| 554 |
+
|
| 555 |
+
if (shm_backend.is_valid())
|
| 556 |
+
{
|
| 557 |
+
backend = std::move(shm_backend);
|
| 558 |
+
}
|
| 559 |
+
else
|
| 560 |
+
{
|
| 561 |
+
backend = SharedMemoryBackendFallback<T>(shm_name, value);
|
| 562 |
+
}
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
SystemWideSharedConstant(const SystemWideSharedConstant&) = delete;
|
| 566 |
+
SystemWideSharedConstant& operator=(const SystemWideSharedConstant&) = delete;
|
| 567 |
+
|
| 568 |
+
SystemWideSharedConstant(SystemWideSharedConstant&& other) noexcept :
|
| 569 |
+
backend(std::move(other.backend)) {}
|
| 570 |
+
|
| 571 |
+
SystemWideSharedConstant& operator=(SystemWideSharedConstant&& other) noexcept {
|
| 572 |
+
backend = std::move(other.backend);
|
| 573 |
+
return *this;
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
const T& operator*() const { return *std::launder(reinterpret_cast<const T*>(get_ptr())); }
|
| 577 |
+
|
| 578 |
+
bool operator==(std::nullptr_t) const noexcept { return get_ptr() == nullptr; }
|
| 579 |
+
|
| 580 |
+
bool operator!=(std::nullptr_t) const noexcept { return get_ptr() != nullptr; }
|
| 581 |
+
|
| 582 |
+
SystemWideSharedConstantAllocationStatus get_status() const {
|
| 583 |
+
return std::visit(
|
| 584 |
+
[](const auto& end) -> SystemWideSharedConstantAllocationStatus {
|
| 585 |
+
if constexpr (std::is_same_v<std::decay_t<decltype(end)>, std::monostate>)
|
| 586 |
+
{
|
| 587 |
+
return SystemWideSharedConstantAllocationStatus::NoAllocation;
|
| 588 |
+
}
|
| 589 |
+
else
|
| 590 |
+
{
|
| 591 |
+
return end.get_status();
|
| 592 |
+
}
|
| 593 |
+
},
|
| 594 |
+
backend);
|
| 595 |
+
}
|
| 596 |
+
|
| 597 |
+
std::optional<std::string> get_error_message() const {
|
| 598 |
+
return std::visit(
|
| 599 |
+
[](const auto& end) -> std::optional<std::string> {
|
| 600 |
+
if constexpr (std::is_same_v<std::decay_t<decltype(end)>, std::monostate>)
|
| 601 |
+
{
|
| 602 |
+
return std::nullopt;
|
| 603 |
+
}
|
| 604 |
+
else
|
| 605 |
+
{
|
| 606 |
+
return end.get_error_message();
|
| 607 |
+
}
|
| 608 |
+
},
|
| 609 |
+
backend);
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
private:
|
| 613 |
+
auto get_ptr() const {
|
| 614 |
+
return std::visit(
|
| 615 |
+
[](const auto& end) -> void* {
|
| 616 |
+
if constexpr (std::is_same_v<std::decay_t<decltype(end)>, std::monostate>)
|
| 617 |
+
{
|
| 618 |
+
return nullptr;
|
| 619 |
+
}
|
| 620 |
+
else
|
| 621 |
+
{
|
| 622 |
+
return end.get();
|
| 623 |
+
}
|
| 624 |
+
},
|
| 625 |
+
backend);
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
std::variant<std::monostate, SharedMemoryBackend<T>, SharedMemoryBackendFallback<T>> backend;
|
| 629 |
+
};
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
} // namespace Stockfish
|
| 633 |
+
|
| 634 |
+
#endif // #ifndef SHM_H_INCLUDED
|
src/shm_linux.h
ADDED
|
@@ -0,0 +1,672 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef SHM_LINUX_H_INCLUDED
|
| 20 |
+
#define SHM_LINUX_H_INCLUDED
|
| 21 |
+
|
| 22 |
+
#if !defined(__linux__) || defined(__ANDROID__)
|
| 23 |
+
#error shm_linux.h should not be included on this platform.
|
| 24 |
+
#endif
|
| 25 |
+
|
| 26 |
+
#include <atomic>
|
| 27 |
+
#include <cassert>
|
| 28 |
+
#include <cerrno>
|
| 29 |
+
#include <cstdlib>
|
| 30 |
+
#include <cstring>
|
| 31 |
+
#include <cstdio>
|
| 32 |
+
#include <dirent.h>
|
| 33 |
+
#include <mutex>
|
| 34 |
+
#include <new>
|
| 35 |
+
#include <optional>
|
| 36 |
+
#include <pthread.h>
|
| 37 |
+
#include <string>
|
| 38 |
+
#include <inttypes.h>
|
| 39 |
+
#include <type_traits>
|
| 40 |
+
|
| 41 |
+
#include <fcntl.h>
|
| 42 |
+
#include <signal.h>
|
| 43 |
+
#include <sys/file.h>
|
| 44 |
+
#include <sys/mman.h>
|
| 45 |
+
#include <sys/stat.h>
|
| 46 |
+
#include <unistd.h>
|
| 47 |
+
#include <limits.h>
|
| 48 |
+
#define SF_MAX_SEM_NAME_LEN NAME_MAX
|
| 49 |
+
|
| 50 |
+
#include "misc.h"
|
| 51 |
+
|
| 52 |
+
namespace Stockfish::shm {
|
| 53 |
+
|
| 54 |
+
namespace detail {
|
| 55 |
+
|
| 56 |
+
struct ShmHeader {
|
| 57 |
+
static constexpr uint32_t SHM_MAGIC = 0xAD5F1A12;
|
| 58 |
+
pthread_mutex_t mutex;
|
| 59 |
+
std::atomic<uint32_t> ref_count{0};
|
| 60 |
+
std::atomic<bool> initialized{false};
|
| 61 |
+
uint32_t magic = SHM_MAGIC;
|
| 62 |
+
};
|
| 63 |
+
|
| 64 |
+
class SharedMemoryBase {
|
| 65 |
+
public:
|
| 66 |
+
virtual ~SharedMemoryBase() = default;
|
| 67 |
+
virtual void close(bool skip_unmap = false) noexcept = 0;
|
| 68 |
+
virtual const std::string& name() const noexcept = 0;
|
| 69 |
+
};
|
| 70 |
+
|
| 71 |
+
class SharedMemoryRegistry {
|
| 72 |
+
private:
|
| 73 |
+
static std::mutex registry_mutex_;
|
| 74 |
+
static std::vector<SharedMemoryBase*> active_instances_;
|
| 75 |
+
|
| 76 |
+
public:
|
| 77 |
+
static void register_instance(SharedMemoryBase* instance) {
|
| 78 |
+
std::scoped_lock lock(registry_mutex_);
|
| 79 |
+
active_instances_.push_back(instance);
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
static void unregister_instance(SharedMemoryBase* instance) {
|
| 83 |
+
std::scoped_lock lock(registry_mutex_);
|
| 84 |
+
active_instances_.erase(
|
| 85 |
+
std::remove(active_instances_.begin(), active_instances_.end(), instance),
|
| 86 |
+
active_instances_.end());
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
static void cleanup_all(bool skip_unmap = false) noexcept {
|
| 90 |
+
std::scoped_lock lock(registry_mutex_);
|
| 91 |
+
for (auto* instance : active_instances_)
|
| 92 |
+
instance->close(skip_unmap);
|
| 93 |
+
active_instances_.clear();
|
| 94 |
+
}
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
inline std::mutex SharedMemoryRegistry::registry_mutex_;
|
| 98 |
+
inline std::vector<SharedMemoryBase*> SharedMemoryRegistry::active_instances_;
|
| 99 |
+
|
| 100 |
+
class CleanupHooks {
|
| 101 |
+
private:
|
| 102 |
+
static std::once_flag register_once_;
|
| 103 |
+
|
| 104 |
+
static void handle_signal(int sig) noexcept {
|
| 105 |
+
// Search threads may still be running, so skip munmap (but still perform
|
| 106 |
+
// other cleanup actions). The memory mappings will be released on exit.
|
| 107 |
+
SharedMemoryRegistry::cleanup_all(true);
|
| 108 |
+
|
| 109 |
+
// Invoke the default handler, which will exit
|
| 110 |
+
struct sigaction sa;
|
| 111 |
+
sa.sa_handler = SIG_DFL;
|
| 112 |
+
sigemptyset(&sa.sa_mask);
|
| 113 |
+
sa.sa_flags = 0;
|
| 114 |
+
if (sigaction(sig, &sa, nullptr) == -1)
|
| 115 |
+
_Exit(128 + sig);
|
| 116 |
+
|
| 117 |
+
raise(sig);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
static void register_signal_handlers() noexcept {
|
| 121 |
+
std::atexit([]() { SharedMemoryRegistry::cleanup_all(true); });
|
| 122 |
+
|
| 123 |
+
constexpr int signals[] = {SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGABRT, SIGFPE,
|
| 124 |
+
SIGSEGV, SIGTERM, SIGBUS, SIGSYS, SIGXCPU, SIGXFSZ};
|
| 125 |
+
|
| 126 |
+
struct sigaction sa;
|
| 127 |
+
sa.sa_handler = handle_signal;
|
| 128 |
+
sigemptyset(&sa.sa_mask);
|
| 129 |
+
sa.sa_flags = 0;
|
| 130 |
+
|
| 131 |
+
for (int sig : signals)
|
| 132 |
+
sigaction(sig, &sa, nullptr);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
public:
|
| 136 |
+
static void ensure_registered() noexcept {
|
| 137 |
+
std::call_once(register_once_, register_signal_handlers);
|
| 138 |
+
}
|
| 139 |
+
};
|
| 140 |
+
|
| 141 |
+
inline std::once_flag CleanupHooks::register_once_;
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
inline int portable_fallocate(int fd, off_t offset, off_t length) {
|
| 145 |
+
#ifdef __APPLE__
|
| 146 |
+
fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, offset, length, 0};
|
| 147 |
+
int ret = fcntl(fd, F_PREALLOCATE, &store);
|
| 148 |
+
if (ret == -1)
|
| 149 |
+
{
|
| 150 |
+
store.fst_flags = F_ALLOCATEALL;
|
| 151 |
+
ret = fcntl(fd, F_PREALLOCATE, &store);
|
| 152 |
+
}
|
| 153 |
+
if (ret != -1)
|
| 154 |
+
ret = ftruncate(fd, offset + length);
|
| 155 |
+
return ret;
|
| 156 |
+
#else
|
| 157 |
+
return posix_fallocate(fd, offset, length);
|
| 158 |
+
#endif
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
} // namespace detail
|
| 162 |
+
|
| 163 |
+
template<typename T>
|
| 164 |
+
class SharedMemory: public detail::SharedMemoryBase {
|
| 165 |
+
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
|
| 166 |
+
static_assert(!std::is_pointer_v<T>, "T cannot be a pointer type");
|
| 167 |
+
|
| 168 |
+
private:
|
| 169 |
+
std::string name_;
|
| 170 |
+
int fd_ = -1;
|
| 171 |
+
void* mapped_ptr_ = nullptr;
|
| 172 |
+
T* data_ptr_ = nullptr;
|
| 173 |
+
detail::ShmHeader* header_ptr_ = nullptr;
|
| 174 |
+
size_t total_size_ = 0;
|
| 175 |
+
std::string sentinel_base_;
|
| 176 |
+
std::string sentinel_path_;
|
| 177 |
+
|
| 178 |
+
static constexpr size_t calculate_total_size() noexcept {
|
| 179 |
+
return sizeof(T) + sizeof(detail::ShmHeader);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
static std::string make_sentinel_base(const std::string& name) {
|
| 183 |
+
char buf[32];
|
| 184 |
+
// Using std::to_string here causes non-deterministic PGO builds.
|
| 185 |
+
// snprintf, being part of libc, is insensitive to the formatted values.
|
| 186 |
+
std::snprintf(buf, sizeof(buf), "sfshm_%016" PRIu64, hash_string(name));
|
| 187 |
+
return buf;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
public:
|
| 191 |
+
explicit SharedMemory(const std::string& name) noexcept :
|
| 192 |
+
name_(name),
|
| 193 |
+
total_size_(calculate_total_size()),
|
| 194 |
+
sentinel_base_(make_sentinel_base(name)) {}
|
| 195 |
+
|
| 196 |
+
~SharedMemory() noexcept override {
|
| 197 |
+
detail::SharedMemoryRegistry::unregister_instance(this);
|
| 198 |
+
close();
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
SharedMemory(const SharedMemory&) = delete;
|
| 202 |
+
SharedMemory& operator=(const SharedMemory&) = delete;
|
| 203 |
+
|
| 204 |
+
SharedMemory(SharedMemory&& other) noexcept :
|
| 205 |
+
name_(std::move(other.name_)),
|
| 206 |
+
fd_(other.fd_),
|
| 207 |
+
mapped_ptr_(other.mapped_ptr_),
|
| 208 |
+
data_ptr_(other.data_ptr_),
|
| 209 |
+
header_ptr_(other.header_ptr_),
|
| 210 |
+
total_size_(other.total_size_),
|
| 211 |
+
sentinel_base_(std::move(other.sentinel_base_)),
|
| 212 |
+
sentinel_path_(std::move(other.sentinel_path_)) {
|
| 213 |
+
|
| 214 |
+
detail::SharedMemoryRegistry::unregister_instance(&other);
|
| 215 |
+
detail::SharedMemoryRegistry::register_instance(this);
|
| 216 |
+
other.reset();
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
SharedMemory& operator=(SharedMemory&& other) noexcept {
|
| 220 |
+
if (this != &other)
|
| 221 |
+
{
|
| 222 |
+
detail::SharedMemoryRegistry::unregister_instance(this);
|
| 223 |
+
close();
|
| 224 |
+
|
| 225 |
+
name_ = std::move(other.name_);
|
| 226 |
+
fd_ = other.fd_;
|
| 227 |
+
mapped_ptr_ = other.mapped_ptr_;
|
| 228 |
+
data_ptr_ = other.data_ptr_;
|
| 229 |
+
header_ptr_ = other.header_ptr_;
|
| 230 |
+
total_size_ = other.total_size_;
|
| 231 |
+
sentinel_base_ = std::move(other.sentinel_base_);
|
| 232 |
+
sentinel_path_ = std::move(other.sentinel_path_);
|
| 233 |
+
|
| 234 |
+
detail::SharedMemoryRegistry::unregister_instance(&other);
|
| 235 |
+
detail::SharedMemoryRegistry::register_instance(this);
|
| 236 |
+
|
| 237 |
+
other.reset();
|
| 238 |
+
}
|
| 239 |
+
return *this;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
[[nodiscard]] bool open(const T& initial_value) noexcept {
|
| 243 |
+
detail::CleanupHooks::ensure_registered();
|
| 244 |
+
|
| 245 |
+
bool retried_stale = false;
|
| 246 |
+
|
| 247 |
+
while (true)
|
| 248 |
+
{
|
| 249 |
+
if (is_open())
|
| 250 |
+
return false;
|
| 251 |
+
|
| 252 |
+
bool created_new = false;
|
| 253 |
+
fd_ = shm_open(name_.c_str(), O_CREAT | O_EXCL | O_RDWR, 0666);
|
| 254 |
+
|
| 255 |
+
if (fd_ == -1)
|
| 256 |
+
{
|
| 257 |
+
fd_ = shm_open(name_.c_str(), O_RDWR, 0666);
|
| 258 |
+
if (fd_ == -1)
|
| 259 |
+
return false;
|
| 260 |
+
}
|
| 261 |
+
else
|
| 262 |
+
created_new = true;
|
| 263 |
+
|
| 264 |
+
if (!lock_file(LOCK_EX))
|
| 265 |
+
{
|
| 266 |
+
::close(fd_);
|
| 267 |
+
reset();
|
| 268 |
+
return false;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
bool invalid_header = false;
|
| 272 |
+
bool success =
|
| 273 |
+
created_new ? setup_new_region(initial_value) : setup_existing_region(invalid_header);
|
| 274 |
+
|
| 275 |
+
if (!success)
|
| 276 |
+
{
|
| 277 |
+
if (created_new || invalid_header)
|
| 278 |
+
shm_unlink(name_.c_str());
|
| 279 |
+
if (mapped_ptr_)
|
| 280 |
+
unmap_region();
|
| 281 |
+
unlock_file();
|
| 282 |
+
::close(fd_);
|
| 283 |
+
reset();
|
| 284 |
+
|
| 285 |
+
if (!created_new && invalid_header && !retried_stale)
|
| 286 |
+
{
|
| 287 |
+
retried_stale = true;
|
| 288 |
+
continue;
|
| 289 |
+
}
|
| 290 |
+
return false;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
if (!lock_shared_mutex())
|
| 294 |
+
{
|
| 295 |
+
if (created_new)
|
| 296 |
+
shm_unlink(name_.c_str());
|
| 297 |
+
if (mapped_ptr_)
|
| 298 |
+
unmap_region();
|
| 299 |
+
unlock_file();
|
| 300 |
+
::close(fd_);
|
| 301 |
+
reset();
|
| 302 |
+
|
| 303 |
+
if (!created_new && !retried_stale)
|
| 304 |
+
{
|
| 305 |
+
retried_stale = true;
|
| 306 |
+
continue;
|
| 307 |
+
}
|
| 308 |
+
return false;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
if (!create_sentinel_file_locked())
|
| 312 |
+
{
|
| 313 |
+
unlock_shared_mutex();
|
| 314 |
+
unmap_region();
|
| 315 |
+
if (created_new)
|
| 316 |
+
shm_unlink(name_.c_str());
|
| 317 |
+
unlock_file();
|
| 318 |
+
::close(fd_);
|
| 319 |
+
reset();
|
| 320 |
+
return false;
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
header_ptr_->ref_count.fetch_add(1, std::memory_order_acq_rel);
|
| 324 |
+
|
| 325 |
+
unlock_shared_mutex();
|
| 326 |
+
unlock_file();
|
| 327 |
+
detail::SharedMemoryRegistry::register_instance(this);
|
| 328 |
+
return true;
|
| 329 |
+
}
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
void close(bool skip_unmap = false) noexcept override {
|
| 333 |
+
if (fd_ == -1 && mapped_ptr_ == nullptr)
|
| 334 |
+
return;
|
| 335 |
+
|
| 336 |
+
bool remove_region = false;
|
| 337 |
+
bool file_locked = lock_file(LOCK_EX);
|
| 338 |
+
bool mutex_locked = false;
|
| 339 |
+
|
| 340 |
+
if (file_locked && header_ptr_ != nullptr)
|
| 341 |
+
mutex_locked = lock_shared_mutex();
|
| 342 |
+
|
| 343 |
+
if (mutex_locked)
|
| 344 |
+
{
|
| 345 |
+
if (header_ptr_)
|
| 346 |
+
{
|
| 347 |
+
header_ptr_->ref_count.fetch_sub(1, std::memory_order_acq_rel);
|
| 348 |
+
}
|
| 349 |
+
remove_sentinel_file();
|
| 350 |
+
remove_region = !has_other_live_sentinels_locked();
|
| 351 |
+
unlock_shared_mutex();
|
| 352 |
+
}
|
| 353 |
+
else
|
| 354 |
+
{
|
| 355 |
+
remove_sentinel_file();
|
| 356 |
+
decrement_refcount_relaxed();
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
if (skip_unmap)
|
| 360 |
+
mapped_ptr_ = nullptr;
|
| 361 |
+
else
|
| 362 |
+
unmap_region();
|
| 363 |
+
|
| 364 |
+
if (remove_region)
|
| 365 |
+
shm_unlink(name_.c_str());
|
| 366 |
+
|
| 367 |
+
if (file_locked)
|
| 368 |
+
unlock_file();
|
| 369 |
+
|
| 370 |
+
if (fd_ != -1)
|
| 371 |
+
{
|
| 372 |
+
::close(fd_);
|
| 373 |
+
fd_ = -1;
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
if (!skip_unmap)
|
| 377 |
+
reset();
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
const std::string& name() const noexcept override { return name_; }
|
| 381 |
+
|
| 382 |
+
[[nodiscard]] bool is_open() const noexcept { return fd_ != -1 && mapped_ptr_ && data_ptr_; }
|
| 383 |
+
|
| 384 |
+
[[nodiscard]] const T& get() const noexcept { return *data_ptr_; }
|
| 385 |
+
|
| 386 |
+
[[nodiscard]] const T* operator->() const noexcept { return data_ptr_; }
|
| 387 |
+
|
| 388 |
+
[[nodiscard]] const T& operator*() const noexcept { return *data_ptr_; }
|
| 389 |
+
|
| 390 |
+
[[nodiscard]] uint32_t ref_count() const noexcept {
|
| 391 |
+
return header_ptr_ ? header_ptr_->ref_count.load(std::memory_order_acquire) : 0;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
[[nodiscard]] bool is_initialized() const noexcept {
|
| 395 |
+
return header_ptr_ ? header_ptr_->initialized.load(std::memory_order_acquire) : false;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
static void cleanup_all_instances() noexcept { detail::SharedMemoryRegistry::cleanup_all(); }
|
| 399 |
+
|
| 400 |
+
private:
|
| 401 |
+
void reset() noexcept {
|
| 402 |
+
fd_ = -1;
|
| 403 |
+
mapped_ptr_ = nullptr;
|
| 404 |
+
data_ptr_ = nullptr;
|
| 405 |
+
header_ptr_ = nullptr;
|
| 406 |
+
sentinel_path_.clear();
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
void unmap_region() noexcept {
|
| 410 |
+
if (mapped_ptr_)
|
| 411 |
+
{
|
| 412 |
+
munmap(mapped_ptr_, total_size_);
|
| 413 |
+
mapped_ptr_ = nullptr;
|
| 414 |
+
data_ptr_ = nullptr;
|
| 415 |
+
header_ptr_ = nullptr;
|
| 416 |
+
}
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
[[nodiscard]] bool lock_file(int operation) noexcept {
|
| 420 |
+
if (fd_ == -1)
|
| 421 |
+
return false;
|
| 422 |
+
|
| 423 |
+
while (flock(fd_, operation) == -1)
|
| 424 |
+
{
|
| 425 |
+
if (errno == EINTR)
|
| 426 |
+
continue;
|
| 427 |
+
return false;
|
| 428 |
+
}
|
| 429 |
+
return true;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
void unlock_file() noexcept {
|
| 433 |
+
if (fd_ == -1)
|
| 434 |
+
return;
|
| 435 |
+
|
| 436 |
+
while (flock(fd_, LOCK_UN) == -1)
|
| 437 |
+
{
|
| 438 |
+
if (errno == EINTR)
|
| 439 |
+
continue;
|
| 440 |
+
break;
|
| 441 |
+
}
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
std::string sentinel_full_path(pid_t pid) const {
|
| 445 |
+
char buf[1024];
|
| 446 |
+
// See above snprintf comment
|
| 447 |
+
std::snprintf(buf, sizeof(buf), "/dev/shm/%s.%ld", sentinel_base_.c_str(), long(pid));
|
| 448 |
+
return buf;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
void decrement_refcount_relaxed() noexcept {
|
| 452 |
+
if (!header_ptr_)
|
| 453 |
+
return;
|
| 454 |
+
|
| 455 |
+
uint32_t expected = header_ptr_->ref_count.load(std::memory_order_relaxed);
|
| 456 |
+
while (expected != 0
|
| 457 |
+
&& !header_ptr_->ref_count.compare_exchange_weak(
|
| 458 |
+
expected, expected - 1, std::memory_order_acq_rel, std::memory_order_relaxed))
|
| 459 |
+
{}
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
bool create_sentinel_file_locked() noexcept {
|
| 463 |
+
if (!header_ptr_)
|
| 464 |
+
return false;
|
| 465 |
+
|
| 466 |
+
const pid_t self_pid = getpid();
|
| 467 |
+
sentinel_path_ = sentinel_full_path(self_pid);
|
| 468 |
+
|
| 469 |
+
for (int attempt = 0; attempt < 2; ++attempt)
|
| 470 |
+
{
|
| 471 |
+
int fd = ::open(sentinel_path_.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0600);
|
| 472 |
+
if (fd != -1)
|
| 473 |
+
{
|
| 474 |
+
::close(fd);
|
| 475 |
+
return true;
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
if (errno == EEXIST)
|
| 479 |
+
{
|
| 480 |
+
::unlink(sentinel_path_.c_str());
|
| 481 |
+
decrement_refcount_relaxed();
|
| 482 |
+
continue;
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
break;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
sentinel_path_.clear();
|
| 489 |
+
return false;
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
void remove_sentinel_file() noexcept {
|
| 493 |
+
if (!sentinel_path_.empty())
|
| 494 |
+
{
|
| 495 |
+
::unlink(sentinel_path_.c_str());
|
| 496 |
+
sentinel_path_.clear();
|
| 497 |
+
}
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
static bool pid_is_alive(pid_t pid) noexcept {
|
| 501 |
+
if (pid <= 0)
|
| 502 |
+
return false;
|
| 503 |
+
|
| 504 |
+
if (kill(pid, 0) == 0)
|
| 505 |
+
return true;
|
| 506 |
+
|
| 507 |
+
return errno == EPERM;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
[[nodiscard]] bool initialize_shared_mutex() noexcept {
|
| 511 |
+
if (!header_ptr_)
|
| 512 |
+
return false;
|
| 513 |
+
|
| 514 |
+
pthread_mutexattr_t attr;
|
| 515 |
+
if (pthread_mutexattr_init(&attr) != 0)
|
| 516 |
+
return false;
|
| 517 |
+
|
| 518 |
+
bool success = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) == 0;
|
| 519 |
+
#if _POSIX_C_SOURCE >= 200809L
|
| 520 |
+
if (success)
|
| 521 |
+
success = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) == 0;
|
| 522 |
+
#endif
|
| 523 |
+
|
| 524 |
+
if (success)
|
| 525 |
+
success = pthread_mutex_init(&header_ptr_->mutex, &attr) == 0;
|
| 526 |
+
|
| 527 |
+
pthread_mutexattr_destroy(&attr);
|
| 528 |
+
return success;
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
[[nodiscard]] bool lock_shared_mutex() noexcept {
|
| 532 |
+
if (!header_ptr_)
|
| 533 |
+
return false;
|
| 534 |
+
|
| 535 |
+
while (true)
|
| 536 |
+
{
|
| 537 |
+
int rc = pthread_mutex_lock(&header_ptr_->mutex);
|
| 538 |
+
if (rc == 0)
|
| 539 |
+
return true;
|
| 540 |
+
|
| 541 |
+
#if _POSIX_C_SOURCE >= 200809L
|
| 542 |
+
if (rc == EOWNERDEAD)
|
| 543 |
+
{
|
| 544 |
+
if (pthread_mutex_consistent(&header_ptr_->mutex) == 0)
|
| 545 |
+
return true;
|
| 546 |
+
return false;
|
| 547 |
+
}
|
| 548 |
+
#endif
|
| 549 |
+
|
| 550 |
+
if (rc == EINTR)
|
| 551 |
+
continue;
|
| 552 |
+
|
| 553 |
+
return false;
|
| 554 |
+
}
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
void unlock_shared_mutex() noexcept {
|
| 558 |
+
if (header_ptr_)
|
| 559 |
+
pthread_mutex_unlock(&header_ptr_->mutex);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
bool has_other_live_sentinels_locked() const noexcept {
|
| 563 |
+
DIR* dir = opendir("/dev/shm");
|
| 564 |
+
if (!dir)
|
| 565 |
+
return false;
|
| 566 |
+
|
| 567 |
+
std::string prefix = sentinel_base_ + ".";
|
| 568 |
+
bool found = false;
|
| 569 |
+
|
| 570 |
+
while (dirent* entry = readdir(dir))
|
| 571 |
+
{
|
| 572 |
+
std::string name = entry->d_name;
|
| 573 |
+
if (name.rfind(prefix, 0) != 0)
|
| 574 |
+
continue;
|
| 575 |
+
|
| 576 |
+
auto pid_str = name.substr(prefix.size());
|
| 577 |
+
char* end = nullptr;
|
| 578 |
+
long value = std::strtol(pid_str.c_str(), &end, 10);
|
| 579 |
+
if (!end || *end != '\0')
|
| 580 |
+
continue;
|
| 581 |
+
|
| 582 |
+
pid_t pid = static_cast<pid_t>(value);
|
| 583 |
+
if (pid_is_alive(pid))
|
| 584 |
+
{
|
| 585 |
+
found = true;
|
| 586 |
+
break;
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
std::string stale_path = std::string("/dev/shm/") + name;
|
| 590 |
+
::unlink(stale_path.c_str());
|
| 591 |
+
const_cast<SharedMemory*>(this)->decrement_refcount_relaxed();
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
closedir(dir);
|
| 595 |
+
return found;
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
[[nodiscard]] bool setup_new_region(const T& initial_value) noexcept {
|
| 599 |
+
if (ftruncate(fd_, static_cast<off_t>(total_size_)) == -1)
|
| 600 |
+
return false;
|
| 601 |
+
|
| 602 |
+
if (detail::portable_fallocate(fd_, 0, static_cast<off_t>(total_size_)) != 0)
|
| 603 |
+
return false;
|
| 604 |
+
|
| 605 |
+
mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
|
| 606 |
+
if (mapped_ptr_ == MAP_FAILED)
|
| 607 |
+
{
|
| 608 |
+
mapped_ptr_ = nullptr;
|
| 609 |
+
return false;
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
data_ptr_ = static_cast<T*>(mapped_ptr_);
|
| 613 |
+
header_ptr_ =
|
| 614 |
+
reinterpret_cast<detail::ShmHeader*>(static_cast<char*>(mapped_ptr_) + sizeof(T));
|
| 615 |
+
|
| 616 |
+
new (header_ptr_) detail::ShmHeader{};
|
| 617 |
+
new (data_ptr_) T{initial_value};
|
| 618 |
+
|
| 619 |
+
if (!initialize_shared_mutex())
|
| 620 |
+
return false;
|
| 621 |
+
|
| 622 |
+
header_ptr_->ref_count.store(0, std::memory_order_release);
|
| 623 |
+
header_ptr_->initialized.store(true, std::memory_order_release);
|
| 624 |
+
return true;
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
[[nodiscard]] bool setup_existing_region(bool& invalid_header) noexcept {
|
| 628 |
+
invalid_header = false;
|
| 629 |
+
|
| 630 |
+
struct stat st;
|
| 631 |
+
fstat(fd_, &st);
|
| 632 |
+
if (static_cast<size_t>(st.st_size) < total_size_)
|
| 633 |
+
{
|
| 634 |
+
invalid_header = true;
|
| 635 |
+
return false;
|
| 636 |
+
}
|
| 637 |
+
|
| 638 |
+
mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
|
| 639 |
+
if (mapped_ptr_ == MAP_FAILED)
|
| 640 |
+
{
|
| 641 |
+
mapped_ptr_ = nullptr;
|
| 642 |
+
return false;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
data_ptr_ = static_cast<T*>(mapped_ptr_);
|
| 646 |
+
header_ptr_ = std::launder(
|
| 647 |
+
reinterpret_cast<detail::ShmHeader*>(static_cast<char*>(mapped_ptr_) + sizeof(T)));
|
| 648 |
+
|
| 649 |
+
if (!header_ptr_->initialized.load(std::memory_order_acquire)
|
| 650 |
+
|| header_ptr_->magic != detail::ShmHeader::SHM_MAGIC)
|
| 651 |
+
{
|
| 652 |
+
invalid_header = true;
|
| 653 |
+
unmap_region();
|
| 654 |
+
return false;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
return true;
|
| 658 |
+
}
|
| 659 |
+
};
|
| 660 |
+
|
| 661 |
+
template<typename T>
|
| 662 |
+
[[nodiscard]] std::optional<SharedMemory<T>> create_shared(const std::string& name,
|
| 663 |
+
const T& initial_value) noexcept {
|
| 664 |
+
SharedMemory<T> shm(name);
|
| 665 |
+
if (shm.open(initial_value))
|
| 666 |
+
return shm;
|
| 667 |
+
return std::nullopt;
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
} // namespace Stockfish::shm
|
| 671 |
+
|
| 672 |
+
#endif // #ifndef SHM_LINUX_H_INCLUDED
|
src/syzygy/tbprobe.cpp
ADDED
|
@@ -0,0 +1,1776 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
| 3 |
+
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
|
| 4 |
+
|
| 5 |
+
Stockfish is free software: you can redistribute it and/or modify
|
| 6 |
+
it under the terms of the GNU General Public License as published by
|
| 7 |
+
the Free Software Foundation, either version 3 of the License, or
|
| 8 |
+
(at your option) any later version.
|
| 9 |
+
|
| 10 |
+
Stockfish is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 13 |
+
GNU General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU General Public License
|
| 16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#include "tbprobe.h"
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <atomic>
|
| 23 |
+
#include <cassert>
|
| 24 |
+
#include <cstdint>
|
| 25 |
+
#include <cstdlib>
|
| 26 |
+
#include <cstring>
|
| 27 |
+
#include <deque>
|
| 28 |
+
#include <fstream>
|
| 29 |
+
#include <initializer_list>
|
| 30 |
+
#include <iostream>
|
| 31 |
+
#include <mutex>
|
| 32 |
+
#include <sstream>
|
| 33 |
+
#include <string_view>
|
| 34 |
+
#include <sys/stat.h>
|
| 35 |
+
#include <type_traits>
|
| 36 |
+
#include <utility>
|
| 37 |
+
#include <vector>
|
| 38 |
+
#include <array>
|
| 39 |
+
|
| 40 |
+
#include "../bitboard.h"
|
| 41 |
+
#include "../misc.h"
|
| 42 |
+
#include "../movegen.h"
|
| 43 |
+
#include "../position.h"
|
| 44 |
+
#include "../search.h"
|
| 45 |
+
#include "../types.h"
|
| 46 |
+
#include "../ucioption.h"
|
| 47 |
+
|
| 48 |
+
#ifndef _WIN32
|
| 49 |
+
#include <fcntl.h>
|
| 50 |
+
#include <sys/mman.h>
|
| 51 |
+
#include <unistd.h>
|
| 52 |
+
#else
|
| 53 |
+
#define WIN32_LEAN_AND_MEAN
|
| 54 |
+
#ifndef NOMINMAX
|
| 55 |
+
#define NOMINMAX // Disable macros min() and max()
|
| 56 |
+
#endif
|
| 57 |
+
#include <windows.h>
|
| 58 |
+
#endif
|
| 59 |
+
|
| 60 |
+
using namespace Stockfish::Tablebases;
|
| 61 |
+
|
| 62 |
+
int Stockfish::Tablebases::MaxCardinality;
|
| 63 |
+
|
| 64 |
+
namespace Stockfish {
|
| 65 |
+
|
| 66 |
+
namespace {
|
| 67 |
+
|
| 68 |
+
constexpr int TBPIECES = 7; // Max number of supported pieces
|
| 69 |
+
constexpr int MAX_DTZ =
|
| 70 |
+
1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit.
|
| 71 |
+
|
| 72 |
+
enum {
|
| 73 |
+
BigEndian,
|
| 74 |
+
LittleEndian
|
| 75 |
+
};
|
| 76 |
+
enum TBType {
|
| 77 |
+
WDL,
|
| 78 |
+
DTZ
|
| 79 |
+
}; // Used as template parameter
|
| 80 |
+
|
| 81 |
+
// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables
|
| 82 |
+
enum TBFlag {
|
| 83 |
+
STM = 1,
|
| 84 |
+
Mapped = 2,
|
| 85 |
+
WinPlies = 4,
|
| 86 |
+
LossPlies = 8,
|
| 87 |
+
Wide = 16,
|
| 88 |
+
SingleValue = 128
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); }
|
| 92 |
+
inline Square operator^(Square s, int i) { return Square(int(s) ^ i); }
|
| 93 |
+
|
| 94 |
+
constexpr std::string_view PieceToChar = " PNBRQK pnbrqk";
|
| 95 |
+
|
| 96 |
+
int MapPawns[SQUARE_NB];
|
| 97 |
+
int MapB1H1H7[SQUARE_NB];
|
| 98 |
+
int MapA1D1D4[SQUARE_NB];
|
| 99 |
+
int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB]
|
| 100 |
+
|
| 101 |
+
int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements
|
| 102 |
+
int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB]
|
| 103 |
+
int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D]
|
| 104 |
+
|
| 105 |
+
// Comparison function to sort leading pawns in ascending MapPawns[] order
|
| 106 |
+
bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; }
|
| 107 |
+
int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); }
|
| 108 |
+
|
| 109 |
+
constexpr Value WDL_to_value[] = {-VALUE_MATE + MAX_PLY + 1, VALUE_DRAW - 2, VALUE_DRAW,
|
| 110 |
+
VALUE_DRAW + 2, VALUE_MATE - MAX_PLY - 1};
|
| 111 |
+
|
| 112 |
+
template<typename T, int Half = sizeof(T) / 2, int End = sizeof(T) - 1>
|
| 113 |
+
inline void swap_endian(T& x) {
|
| 114 |
+
static_assert(std::is_unsigned_v<T>, "Argument of swap_endian not unsigned");
|
| 115 |
+
|
| 116 |
+
uint8_t tmp, *c = (uint8_t*) &x;
|
| 117 |
+
for (int i = 0; i < Half; ++i)
|
| 118 |
+
tmp = c[i], c[i] = c[End - i], c[End - i] = tmp;
|
| 119 |
+
}
|
| 120 |
+
template<>
|
| 121 |
+
inline void swap_endian<uint8_t>(uint8_t&) {}
|
| 122 |
+
|
| 123 |
+
template<typename T, int LE>
|
| 124 |
+
T number(void* addr) {
|
| 125 |
+
T v;
|
| 126 |
+
|
| 127 |
+
if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare)
|
| 128 |
+
std::memcpy(&v, addr, sizeof(T));
|
| 129 |
+
else
|
| 130 |
+
v = *((T*) addr);
|
| 131 |
+
|
| 132 |
+
if (LE != IsLittleEndian)
|
| 133 |
+
swap_endian(v);
|
| 134 |
+
return v;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
// DTZ tables don't store valid scores for moves that reset the rule50 counter
|
| 138 |
+
// like captures and pawn moves but we can easily recover the correct dtz of the
|
| 139 |
+
// previous move if we know the position's WDL score.
|
| 140 |
+
int dtz_before_zeroing(WDLScore wdl) {
|
| 141 |
+
return wdl == WDLWin ? 1
|
| 142 |
+
: wdl == WDLCursedWin ? 101
|
| 143 |
+
: wdl == WDLBlessedLoss ? -101
|
| 144 |
+
: wdl == WDLLoss ? -1
|
| 145 |
+
: 0;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
// Return the sign of a number (-1, 0, 1)
|
| 149 |
+
template<typename T>
|
| 150 |
+
int sign_of(T val) {
|
| 151 |
+
return (T(0) < val) - (val < T(0));
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// Numbers in little-endian used by sparseIndex[] to point into blockLength[]
|
| 155 |
+
struct SparseEntry {
|
| 156 |
+
char block[4]; // Number of block
|
| 157 |
+
char offset[2]; // Offset within the block
|
| 158 |
+
};
|
| 159 |
+
|
| 160 |
+
static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes");
|
| 161 |
+
|
| 162 |
+
using Sym = uint16_t; // Huffman symbol
|
| 163 |
+
|
| 164 |
+
struct LR {
|
| 165 |
+
enum Side {
|
| 166 |
+
Left,
|
| 167 |
+
Right
|
| 168 |
+
};
|
| 169 |
+
|
| 170 |
+
uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12
|
| 171 |
+
// bits is the right-hand symbol. If the symbol has length 1,
|
| 172 |
+
// then the left-hand symbol is the stored value.
|
| 173 |
+
template<Side S>
|
| 174 |
+
Sym get() {
|
| 175 |
+
return S == Left ? ((lr[1] & 0xF) << 8) | lr[0]
|
| 176 |
+
: S == Right ? (lr[2] << 4) | (lr[1] >> 4)
|
| 177 |
+
: (assert(false), Sym(-1));
|
| 178 |
+
}
|
| 179 |
+
};
|
| 180 |
+
|
| 181 |
+
static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes");
|
| 182 |
+
|
| 183 |
+
// Tablebases data layout is structured as following:
|
| 184 |
+
//
|
| 185 |
+
// TBFile: memory maps/unmaps the physical .rtbw and .rtbz files
|
| 186 |
+
// TBTable: one object for each file with corresponding indexing information
|
| 187 |
+
// TBTables: has ownership of TBTable objects, keeping a list and a hash
|
| 188 |
+
|
| 189 |
+
// class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are
|
| 190 |
+
// memory mapped for best performance. Files are mapped at first access: at init
|
| 191 |
+
// time only existence of the file is checked.
|
| 192 |
+
class TBFile: public std::ifstream {
|
| 193 |
+
|
| 194 |
+
std::string fname;
|
| 195 |
+
|
| 196 |
+
public:
|
| 197 |
+
// Look for and open the file among the Paths directories where the .rtbw
|
| 198 |
+
// and .rtbz files can be found. Multiple directories are separated by ";"
|
| 199 |
+
// on Windows and by ":" on Unix-based operating systems.
|
| 200 |
+
//
|
| 201 |
+
// Example:
|
| 202 |
+
// C:\tb\wdl345;C:\tb\wdl6;D:\tb\dtz345;D:\tb\dtz6
|
| 203 |
+
static std::string Paths;
|
| 204 |
+
|
| 205 |
+
TBFile(const std::string& f) {
|
| 206 |
+
|
| 207 |
+
#ifndef _WIN32
|
| 208 |
+
constexpr char SepChar = ':';
|
| 209 |
+
#else
|
| 210 |
+
constexpr char SepChar = ';';
|
| 211 |
+
#endif
|
| 212 |
+
std::stringstream ss(Paths);
|
| 213 |
+
std::string path;
|
| 214 |
+
|
| 215 |
+
while (std::getline(ss, path, SepChar))
|
| 216 |
+
{
|
| 217 |
+
fname = path + "/" + f;
|
| 218 |
+
std::ifstream::open(fname);
|
| 219 |
+
if (is_open())
|
| 220 |
+
return;
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// Memory map the file and check it.
|
| 225 |
+
uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) {
|
| 226 |
+
if (is_open())
|
| 227 |
+
close(); // Need to re-open to get native file descriptor
|
| 228 |
+
|
| 229 |
+
#ifndef _WIN32
|
| 230 |
+
struct stat statbuf;
|
| 231 |
+
int fd = ::open(fname.c_str(), O_RDONLY);
|
| 232 |
+
|
| 233 |
+
if (fd == -1)
|
| 234 |
+
return *baseAddress = nullptr, nullptr;
|
| 235 |
+
|
| 236 |
+
fstat(fd, &statbuf);
|
| 237 |
+
|
| 238 |
+
if (statbuf.st_size % 64 != 16)
|
| 239 |
+
{
|
| 240 |
+
std::cerr << "Corrupt tablebase file " << fname << std::endl;
|
| 241 |
+
exit(EXIT_FAILURE);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
*mapping = statbuf.st_size;
|
| 245 |
+
*baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
| 246 |
+
#if defined(MADV_RANDOM)
|
| 247 |
+
madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
|
| 248 |
+
#endif
|
| 249 |
+
::close(fd);
|
| 250 |
+
|
| 251 |
+
if (*baseAddress == MAP_FAILED)
|
| 252 |
+
{
|
| 253 |
+
std::cerr << "Could not mmap() " << fname << std::endl;
|
| 254 |
+
exit(EXIT_FAILURE);
|
| 255 |
+
}
|
| 256 |
+
#else
|
| 257 |
+
// Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored.
|
| 258 |
+
HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
|
| 259 |
+
OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr);
|
| 260 |
+
|
| 261 |
+
if (fd == INVALID_HANDLE_VALUE)
|
| 262 |
+
return *baseAddress = nullptr, nullptr;
|
| 263 |
+
|
| 264 |
+
DWORD size_high;
|
| 265 |
+
DWORD size_low = GetFileSize(fd, &size_high);
|
| 266 |
+
|
| 267 |
+
if (size_low % 64 != 16)
|
| 268 |
+
{
|
| 269 |
+
std::cerr << "Corrupt tablebase file " << fname << std::endl;
|
| 270 |
+
exit(EXIT_FAILURE);
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr);
|
| 274 |
+
CloseHandle(fd);
|
| 275 |
+
|
| 276 |
+
if (!mmap)
|
| 277 |
+
{
|
| 278 |
+
std::cerr << "CreateFileMapping() failed" << std::endl;
|
| 279 |
+
exit(EXIT_FAILURE);
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
*mapping = uint64_t(mmap);
|
| 283 |
+
*baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0);
|
| 284 |
+
|
| 285 |
+
if (!*baseAddress)
|
| 286 |
+
{
|
| 287 |
+
std::cerr << "MapViewOfFile() failed, name = " << fname
|
| 288 |
+
<< ", error = " << GetLastError() << std::endl;
|
| 289 |
+
exit(EXIT_FAILURE);
|
| 290 |
+
}
|
| 291 |
+
#endif
|
| 292 |
+
uint8_t* data = (uint8_t*) *baseAddress;
|
| 293 |
+
|
| 294 |
+
constexpr uint8_t Magics[][4] = {{0xD7, 0x66, 0x0C, 0xA5}, {0x71, 0xE8, 0x23, 0x5D}};
|
| 295 |
+
|
| 296 |
+
if (memcmp(data, Magics[type == WDL], 4))
|
| 297 |
+
{
|
| 298 |
+
std::cerr << "Corrupted table in file " << fname << std::endl;
|
| 299 |
+
unmap(*baseAddress, *mapping);
|
| 300 |
+
return *baseAddress = nullptr, nullptr;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
return data + 4; // Skip Magics's header
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
static void unmap(void* baseAddress, uint64_t mapping) {
|
| 307 |
+
|
| 308 |
+
#ifndef _WIN32
|
| 309 |
+
munmap(baseAddress, mapping);
|
| 310 |
+
#else
|
| 311 |
+
UnmapViewOfFile(baseAddress);
|
| 312 |
+
CloseHandle((HANDLE) mapping);
|
| 313 |
+
#endif
|
| 314 |
+
}
|
| 315 |
+
};
|
| 316 |
+
|
| 317 |
+
std::string TBFile::Paths;
|
| 318 |
+
|
| 319 |
+
// struct PairsData contains low-level indexing information to access TB data.
|
| 320 |
+
// There are 8, 4, or 2 PairsData records for each TBTable, according to the type
|
| 321 |
+
// of table and if positions have pawns or not. It is populated at first access.
|
| 322 |
+
struct PairsData {
|
| 323 |
+
uint8_t flags; // Table flags, see enum TBFlag
|
| 324 |
+
uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols
|
| 325 |
+
uint8_t minSymLen; // Minimum length in bits of the Huffman symbols
|
| 326 |
+
uint32_t blocksNum; // Number of blocks in the TB file
|
| 327 |
+
size_t sizeofBlock; // Block size in bytes
|
| 328 |
+
size_t span; // About every span values there is a SparseIndex[] entry
|
| 329 |
+
Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value
|
| 330 |
+
LR* btree; // btree[sym] stores the left and right symbols that expand sym
|
| 331 |
+
uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536
|
| 332 |
+
uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum
|
| 333 |
+
SparseEntry* sparseIndex; // Partial indices into blockLength[]
|
| 334 |
+
size_t sparseIndexSize; // Size of SparseIndex[] table
|
| 335 |
+
uint8_t* data; // Start of Huffman compressed data
|
| 336 |
+
std::vector<uint64_t>
|
| 337 |
+
base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l
|
| 338 |
+
std::vector<uint8_t>
|
| 339 |
+
symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256
|
| 340 |
+
Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups
|
| 341 |
+
uint64_t groupIdx[TBPIECES + 1]; // Start index used for the encoding of the group's pieces
|
| 342 |
+
int groupLen[TBPIECES + 1]; // Number of pieces in a given group: KRKN -> (3, 1)
|
| 343 |
+
uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ)
|
| 344 |
+
};
|
| 345 |
+
|
| 346 |
+
// struct TBTable contains indexing information to access the corresponding TBFile.
|
| 347 |
+
// There are 2 types of TBTable, corresponding to a WDL or a DTZ file. TBTable
|
| 348 |
+
// is populated at init time but the nested PairsData records are populated at
|
| 349 |
+
// first access, when the corresponding file is memory mapped.
|
| 350 |
+
template<TBType Type>
|
| 351 |
+
struct TBTable {
|
| 352 |
+
using Ret = std::conditional_t<Type == WDL, WDLScore, int>;
|
| 353 |
+
|
| 354 |
+
static constexpr int Sides = Type == WDL ? 2 : 1;
|
| 355 |
+
|
| 356 |
+
std::atomic_bool ready;
|
| 357 |
+
void* baseAddress;
|
| 358 |
+
uint8_t* map;
|
| 359 |
+
uint64_t mapping;
|
| 360 |
+
Key key;
|
| 361 |
+
Key key2;
|
| 362 |
+
int pieceCount;
|
| 363 |
+
bool hasPawns;
|
| 364 |
+
bool hasUniquePieces;
|
| 365 |
+
uint8_t pawnCount[2]; // [Lead color / other color]
|
| 366 |
+
PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0]
|
| 367 |
+
|
| 368 |
+
PairsData* get(int stm, int f) { return &items[stm % Sides][hasPawns ? f : 0]; }
|
| 369 |
+
|
| 370 |
+
TBTable() :
|
| 371 |
+
ready(false),
|
| 372 |
+
baseAddress(nullptr) {}
|
| 373 |
+
explicit TBTable(const std::string& code);
|
| 374 |
+
explicit TBTable(const TBTable<WDL>& wdl);
|
| 375 |
+
|
| 376 |
+
~TBTable() {
|
| 377 |
+
if (baseAddress)
|
| 378 |
+
TBFile::unmap(baseAddress, mapping);
|
| 379 |
+
}
|
| 380 |
+
};
|
| 381 |
+
|
| 382 |
+
template<>
|
| 383 |
+
TBTable<WDL>::TBTable(const std::string& code) :
|
| 384 |
+
TBTable() {
|
| 385 |
+
|
| 386 |
+
StateInfo st;
|
| 387 |
+
Position pos;
|
| 388 |
+
|
| 389 |
+
key = pos.set(code, WHITE, &st).material_key();
|
| 390 |
+
pieceCount = pos.count<ALL_PIECES>();
|
| 391 |
+
hasPawns = pos.pieces(PAWN);
|
| 392 |
+
|
| 393 |
+
hasUniquePieces = false;
|
| 394 |
+
for (Color c : {WHITE, BLACK})
|
| 395 |
+
for (PieceType pt = PAWN; pt < KING; ++pt)
|
| 396 |
+
if (popcount(pos.pieces(c, pt)) == 1)
|
| 397 |
+
hasUniquePieces = true;
|
| 398 |
+
|
| 399 |
+
// Set the leading color. In case both sides have pawns the leading color
|
| 400 |
+
// is the side with fewer pawns because this leads to better compression.
|
| 401 |
+
bool c = !pos.count<PAWN>(BLACK)
|
| 402 |
+
|| (pos.count<PAWN>(WHITE) && pos.count<PAWN>(BLACK) >= pos.count<PAWN>(WHITE));
|
| 403 |
+
|
| 404 |
+
pawnCount[0] = pos.count<PAWN>(c ? WHITE : BLACK);
|
| 405 |
+
pawnCount[1] = pos.count<PAWN>(c ? BLACK : WHITE);
|
| 406 |
+
|
| 407 |
+
key2 = pos.set(code, BLACK, &st).material_key();
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
template<>
|
| 411 |
+
TBTable<DTZ>::TBTable(const TBTable<WDL>& wdl) :
|
| 412 |
+
TBTable() {
|
| 413 |
+
|
| 414 |
+
// Use the corresponding WDL table to avoid recalculating all from scratch
|
| 415 |
+
key = wdl.key;
|
| 416 |
+
key2 = wdl.key2;
|
| 417 |
+
pieceCount = wdl.pieceCount;
|
| 418 |
+
hasPawns = wdl.hasPawns;
|
| 419 |
+
hasUniquePieces = wdl.hasUniquePieces;
|
| 420 |
+
pawnCount[0] = wdl.pawnCount[0];
|
| 421 |
+
pawnCount[1] = wdl.pawnCount[1];
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
// class TBTables creates and keeps ownership of the TBTable objects, one for
|
| 425 |
+
// each TB file found. It supports a fast, hash-based, table lookup. Populated
|
| 426 |
+
// at init time, accessed at probe time.
|
| 427 |
+
class TBTables {
|
| 428 |
+
|
| 429 |
+
struct Entry {
|
| 430 |
+
Key key;
|
| 431 |
+
TBTable<WDL>* wdl;
|
| 432 |
+
TBTable<DTZ>* dtz;
|
| 433 |
+
|
| 434 |
+
template<TBType Type>
|
| 435 |
+
TBTable<Type>* get() const {
|
| 436 |
+
return (TBTable<Type>*) (Type == WDL ? (void*) wdl : (void*) dtz);
|
| 437 |
+
}
|
| 438 |
+
};
|
| 439 |
+
|
| 440 |
+
static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb
|
| 441 |
+
static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket
|
| 442 |
+
|
| 443 |
+
Entry hashTable[Size + Overflow];
|
| 444 |
+
|
| 445 |
+
std::deque<TBTable<WDL>> wdlTable;
|
| 446 |
+
std::deque<TBTable<DTZ>> dtzTable;
|
| 447 |
+
size_t foundDTZFiles = 0;
|
| 448 |
+
size_t foundWDLFiles = 0;
|
| 449 |
+
|
| 450 |
+
void insert(Key key, TBTable<WDL>* wdl, TBTable<DTZ>* dtz) {
|
| 451 |
+
uint32_t homeBucket = uint32_t(key) & (Size - 1);
|
| 452 |
+
Entry entry{key, wdl, dtz};
|
| 453 |
+
|
| 454 |
+
// Ensure last element is empty to avoid overflow when looking up
|
| 455 |
+
for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket)
|
| 456 |
+
{
|
| 457 |
+
Key otherKey = hashTable[bucket].key;
|
| 458 |
+
if (otherKey == key || !hashTable[bucket].get<WDL>())
|
| 459 |
+
{
|
| 460 |
+
hashTable[bucket] = entry;
|
| 461 |
+
return;
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
// Robin Hood hashing: If we've probed for longer than this element,
|
| 465 |
+
// insert here and search for a new spot for the other element instead.
|
| 466 |
+
uint32_t otherHomeBucket = uint32_t(otherKey) & (Size - 1);
|
| 467 |
+
if (otherHomeBucket > homeBucket)
|
| 468 |
+
{
|
| 469 |
+
std::swap(entry, hashTable[bucket]);
|
| 470 |
+
key = otherKey;
|
| 471 |
+
homeBucket = otherHomeBucket;
|
| 472 |
+
}
|
| 473 |
+
}
|
| 474 |
+
std::cerr << "TB hash table size too low!" << std::endl;
|
| 475 |
+
exit(EXIT_FAILURE);
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
public:
|
| 479 |
+
template<TBType Type>
|
| 480 |
+
TBTable<Type>* get(Key key) {
|
| 481 |
+
for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)];; ++entry)
|
| 482 |
+
{
|
| 483 |
+
if (entry->key == key || !entry->get<Type>())
|
| 484 |
+
return entry->get<Type>();
|
| 485 |
+
}
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
void clear() {
|
| 489 |
+
memset(hashTable, 0, sizeof(hashTable));
|
| 490 |
+
wdlTable.clear();
|
| 491 |
+
dtzTable.clear();
|
| 492 |
+
foundDTZFiles = 0;
|
| 493 |
+
foundWDLFiles = 0;
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
void info() const {
|
| 497 |
+
sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles
|
| 498 |
+
<< " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
void add(const std::vector<PieceType>& pieces);
|
| 502 |
+
};
|
| 503 |
+
|
| 504 |
+
TBTables TBTables;
|
| 505 |
+
|
| 506 |
+
// If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
|
| 507 |
+
// are created and added to the lists and hash table. Called at init time.
|
| 508 |
+
void TBTables::add(const std::vector<PieceType>& pieces) {
|
| 509 |
+
|
| 510 |
+
std::string code;
|
| 511 |
+
|
| 512 |
+
for (PieceType pt : pieces)
|
| 513 |
+
code += PieceToChar[pt];
|
| 514 |
+
code.insert(code.find('K', 1), "v");
|
| 515 |
+
|
| 516 |
+
TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK
|
| 517 |
+
if (file_dtz.is_open())
|
| 518 |
+
{
|
| 519 |
+
file_dtz.close();
|
| 520 |
+
foundDTZFiles++;
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
TBFile file(code + ".rtbw"); // KRK -> KRvK
|
| 524 |
+
|
| 525 |
+
if (!file.is_open()) // Only WDL file is checked
|
| 526 |
+
return;
|
| 527 |
+
|
| 528 |
+
file.close();
|
| 529 |
+
foundWDLFiles++;
|
| 530 |
+
|
| 531 |
+
MaxCardinality = std::max(int(pieces.size()), MaxCardinality);
|
| 532 |
+
|
| 533 |
+
wdlTable.emplace_back(code);
|
| 534 |
+
dtzTable.emplace_back(wdlTable.back());
|
| 535 |
+
|
| 536 |
+
// Insert into the hash keys for both colors: KRvK with KR white and black
|
| 537 |
+
insert(wdlTable.back().key, &wdlTable.back(), &dtzTable.back());
|
| 538 |
+
insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back());
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
// TB tables are compressed with canonical Huffman code. The compressed data is divided into
|
| 542 |
+
// blocks of size d->sizeofBlock, and each block stores a variable number of symbols.
|
| 543 |
+
// Each symbol represents either a WDL or a (remapped) DTZ value, or a pair of other symbols
|
| 544 |
+
// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536
|
| 545 |
+
// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after
|
| 546 |
+
// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most
|
| 547 |
+
// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly
|
| 548 |
+
// of draws or mostly of wins, but such tables are actually quite common. In principle, the
|
| 549 |
+
// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for
|
| 550 |
+
// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so
|
| 551 |
+
// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long.
|
| 552 |
+
// The generator picks the size that leads to the smallest table. The "book" of symbols and
|
| 553 |
+
// Huffman codes are the same for all blocks in the table. A non-symmetric pawnless TB file
|
| 554 |
+
// will have one table for wtm and one for btm, a TB file with pawns will have tables per
|
| 555 |
+
// file a,b,c,d also, in this case, one set for wtm and one for btm.
|
| 556 |
+
int decompress_pairs(PairsData* d, uint64_t idx) {
|
| 557 |
+
|
| 558 |
+
// Special case where all table positions store the same value
|
| 559 |
+
if (d->flags & TBFlag::SingleValue)
|
| 560 |
+
return d->minSymLen;
|
| 561 |
+
|
| 562 |
+
// First we need to locate the right block that stores the value at index "idx".
|
| 563 |
+
// Because each block n stores blockLength[n] + 1 values, the index i of the block
|
| 564 |
+
// that contains the value at position idx is:
|
| 565 |
+
//
|
| 566 |
+
// for (i = -1, sum = 0; sum <= idx; i++)
|
| 567 |
+
// sum += blockLength[i + 1] + 1;
|
| 568 |
+
//
|
| 569 |
+
// This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that
|
| 570 |
+
// point to known indices into blockLength[]. Namely SparseIndex[k] is a SparseEntry
|
| 571 |
+
// that stores the blockLength[] index and the offset within that block of the value
|
| 572 |
+
// with index I(k), where:
|
| 573 |
+
//
|
| 574 |
+
// I(k) = k * d->span + d->span / 2 (1)
|
| 575 |
+
|
| 576 |
+
// First step is to get the 'k' of the I(k) nearest to our idx, using definition (1)
|
| 577 |
+
uint32_t k = uint32_t(idx / d->span);
|
| 578 |
+
|
| 579 |
+
// Then we read the corresponding SparseIndex[] entry
|
| 580 |
+
uint32_t block = number<uint32_t, LittleEndian>(&d->sparseIndex[k].block);
|
| 581 |
+
int offset = number<uint16_t, LittleEndian>(&d->sparseIndex[k].offset);
|
| 582 |
+
|
| 583 |
+
// Now compute the difference idx - I(k). From the definition of k, we know that
|
| 584 |
+
//
|
| 585 |
+
// idx = k * d->span + idx % d->span (2)
|
| 586 |
+
//
|
| 587 |
+
// So from (1) and (2) we can compute idx - I(K):
|
| 588 |
+
int diff = int(idx % d->span - d->span / 2);
|
| 589 |
+
|
| 590 |
+
// Sum the above to offset to find the offset corresponding to our idx
|
| 591 |
+
offset += diff;
|
| 592 |
+
|
| 593 |
+
// Move to the previous/next block, until we reach the correct block that contains idx,
|
| 594 |
+
// that is when 0 <= offset <= d->blockLength[block]
|
| 595 |
+
while (offset < 0)
|
| 596 |
+
offset += d->blockLength[--block] + 1;
|
| 597 |
+
|
| 598 |
+
while (offset > d->blockLength[block])
|
| 599 |
+
offset -= d->blockLength[block++] + 1;
|
| 600 |
+
|
| 601 |
+
// Finally, we find the start address of our block of canonical Huffman symbols
|
| 602 |
+
uint32_t* ptr = (uint32_t*) (d->data + (uint64_t(block) * d->sizeofBlock));
|
| 603 |
+
|
| 604 |
+
// Read the first 64 bits in our block, this is a (truncated) sequence of
|
| 605 |
+
// unknown number of symbols of unknown length but we know the first one
|
| 606 |
+
// is at the beginning of this 64-bit sequence.
|
| 607 |
+
uint64_t buf64 = number<uint64_t, BigEndian>(ptr);
|
| 608 |
+
ptr += 2;
|
| 609 |
+
int buf64Size = 64;
|
| 610 |
+
Sym sym;
|
| 611 |
+
|
| 612 |
+
while (true)
|
| 613 |
+
{
|
| 614 |
+
int len = 0; // This is the symbol length - d->min_sym_len
|
| 615 |
+
|
| 616 |
+
// Now get the symbol length. For any symbol s64 of length l right-padded
|
| 617 |
+
// to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we
|
| 618 |
+
// can find the symbol length iterating through base64[].
|
| 619 |
+
while (buf64 < d->base64[len])
|
| 620 |
+
++len;
|
| 621 |
+
|
| 622 |
+
// All the symbols of a given length are consecutive integers (numerical
|
| 623 |
+
// sequence property), so we can compute the offset of our symbol of
|
| 624 |
+
// length len, stored at the beginning of buf64.
|
| 625 |
+
sym = Sym((buf64 - d->base64[len]) >> (64 - len - d->minSymLen));
|
| 626 |
+
|
| 627 |
+
// Now add the value of the lowest symbol of length len to get our symbol
|
| 628 |
+
sym += number<Sym, LittleEndian>(&d->lowestSym[len]);
|
| 629 |
+
|
| 630 |
+
// If our offset is within the number of values represented by symbol sym,
|
| 631 |
+
// we are done.
|
| 632 |
+
if (offset < d->symlen[sym] + 1)
|
| 633 |
+
break;
|
| 634 |
+
|
| 635 |
+
// ...otherwise update the offset and continue to iterate
|
| 636 |
+
offset -= d->symlen[sym] + 1;
|
| 637 |
+
len += d->minSymLen; // Get the real length
|
| 638 |
+
buf64 <<= len; // Consume the just processed symbol
|
| 639 |
+
buf64Size -= len;
|
| 640 |
+
|
| 641 |
+
if (buf64Size <= 32)
|
| 642 |
+
{ // Refill the buffer
|
| 643 |
+
buf64Size += 32;
|
| 644 |
+
buf64 |= uint64_t(number<uint32_t, BigEndian>(ptr++)) << (64 - buf64Size);
|
| 645 |
+
}
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
// Now we have our symbol that expands into d->symlen[sym] + 1 symbols.
|
| 649 |
+
// We binary-search for our value recursively expanding into the left and
|
| 650 |
+
// right child symbols until we reach a leaf node where symlen[sym] + 1 == 1
|
| 651 |
+
// that will store the value we need.
|
| 652 |
+
while (d->symlen[sym])
|
| 653 |
+
{
|
| 654 |
+
Sym left = d->btree[sym].get<LR::Left>();
|
| 655 |
+
|
| 656 |
+
// If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and
|
| 657 |
+
// expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then
|
| 658 |
+
// we know that, for instance, the tenth value (offset = 10) will be on
|
| 659 |
+
// the left side because in Recursive Pairing child symbols are adjacent.
|
| 660 |
+
if (offset < d->symlen[left] + 1)
|
| 661 |
+
sym = left;
|
| 662 |
+
else
|
| 663 |
+
{
|
| 664 |
+
offset -= d->symlen[left] + 1;
|
| 665 |
+
sym = d->btree[sym].get<LR::Right>();
|
| 666 |
+
}
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
return d->btree[sym].get<LR::Left>();
|
| 670 |
+
}
|
| 671 |
+
|
| 672 |
+
bool check_dtz_stm(TBTable<WDL>*, int, File) { return true; }
|
| 673 |
+
|
| 674 |
+
bool check_dtz_stm(TBTable<DTZ>* entry, int stm, File f) {
|
| 675 |
+
|
| 676 |
+
auto flags = entry->get(stm, f)->flags;
|
| 677 |
+
return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns);
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
// DTZ scores are sorted by frequency of occurrence and then assigned the
|
| 681 |
+
// values 0, 1, 2, ... in order of decreasing frequency. This is done for each
|
| 682 |
+
// of the four WDLScore values. The mapping information necessary to reconstruct
|
| 683 |
+
// the original values are stored in the TB file and read during map[] init.
|
| 684 |
+
WDLScore map_score(TBTable<WDL>*, File, int value, WDLScore) { return WDLScore(value - 2); }
|
| 685 |
+
|
| 686 |
+
int map_score(TBTable<DTZ>* entry, File f, int value, WDLScore wdl) {
|
| 687 |
+
|
| 688 |
+
constexpr int WDLMap[] = {1, 3, 0, 2, 0};
|
| 689 |
+
|
| 690 |
+
auto flags = entry->get(0, f)->flags;
|
| 691 |
+
|
| 692 |
+
uint8_t* map = entry->map;
|
| 693 |
+
uint16_t* idx = entry->get(0, f)->map_idx;
|
| 694 |
+
if (flags & TBFlag::Mapped)
|
| 695 |
+
{
|
| 696 |
+
if (flags & TBFlag::Wide)
|
| 697 |
+
value = ((uint16_t*) map)[idx[WDLMap[wdl + 2]] + value];
|
| 698 |
+
else
|
| 699 |
+
value = map[idx[WDLMap[wdl + 2]] + value];
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
// DTZ tables store distance to zero in number of moves or plies. We
|
| 703 |
+
// want to return plies, so we have to convert to plies when needed.
|
| 704 |
+
if ((wdl == WDLWin && !(flags & TBFlag::WinPlies))
|
| 705 |
+
|| (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin
|
| 706 |
+
|| wdl == WDLBlessedLoss)
|
| 707 |
+
value *= 2;
|
| 708 |
+
|
| 709 |
+
return value + 1;
|
| 710 |
+
}
|
| 711 |
+
|
| 712 |
+
// A temporary fix for the compiler bug with vectorization. (#4450)
|
| 713 |
+
#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 15
|
| 714 |
+
#define DISABLE_CLANG_LOOP_VEC _Pragma("clang loop vectorize(disable)")
|
| 715 |
+
#else
|
| 716 |
+
#define DISABLE_CLANG_LOOP_VEC
|
| 717 |
+
#endif
|
| 718 |
+
|
| 719 |
+
// Compute a unique index out of a position and use it to probe the TB file. To
|
| 720 |
+
// encode k pieces of the same type and color, first sort the pieces by square in
|
| 721 |
+
// ascending order s1 <= s2 <= ... <= sk then compute the unique index as:
|
| 722 |
+
//
|
| 723 |
+
// idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk]
|
| 724 |
+
//
|
| 725 |
+
template<typename T, typename Ret = typename T::Ret>
|
| 726 |
+
Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) {
|
| 727 |
+
|
| 728 |
+
Square squares[TBPIECES];
|
| 729 |
+
Piece pieces[TBPIECES];
|
| 730 |
+
uint64_t idx;
|
| 731 |
+
int next = 0, size = 0, leadPawnsCnt = 0;
|
| 732 |
+
PairsData* d;
|
| 733 |
+
Bitboard b, leadPawns = 0;
|
| 734 |
+
File tbFile = FILE_A;
|
| 735 |
+
|
| 736 |
+
// A given TB entry like KRK has associated two material keys: KRvk and Kvkr.
|
| 737 |
+
// If both sides have the same pieces keys are equal. In this case TB tables
|
| 738 |
+
// only stores the 'white to move' case, so if the position to lookup has black
|
| 739 |
+
// to move, we need to switch the color and flip the squares before to lookup.
|
| 740 |
+
bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move());
|
| 741 |
+
|
| 742 |
+
// TB files are calculated for white as the stronger side. For instance, we
|
| 743 |
+
// have KRvK, not KvKR. A position where the stronger side is white will have
|
| 744 |
+
// its material key == entry->key, otherwise we have to switch the color and
|
| 745 |
+
// flip the squares before to lookup.
|
| 746 |
+
bool blackStronger = (pos.material_key() != entry->key);
|
| 747 |
+
|
| 748 |
+
int flipColor = (symmetricBlackToMove || blackStronger) * 8;
|
| 749 |
+
int flipSquares = (symmetricBlackToMove || blackStronger) * 56;
|
| 750 |
+
int stm = (symmetricBlackToMove || blackStronger) ^ pos.side_to_move();
|
| 751 |
+
|
| 752 |
+
// For pawns, TB files store 4 separate tables according if leading pawn is on
|
| 753 |
+
// file a, b, c or d after reordering. The leading pawn is the one with maximum
|
| 754 |
+
// MapPawns[] value, that is the one most toward the edges and with lowest rank.
|
| 755 |
+
if (entry->hasPawns)
|
| 756 |
+
{
|
| 757 |
+
|
| 758 |
+
// In all the 4 tables, pawns are at the beginning of the piece sequence and
|
| 759 |
+
// their color is the reference one. So we just pick the first one.
|
| 760 |
+
Piece pc = Piece(entry->get(0, 0)->pieces[0] ^ flipColor);
|
| 761 |
+
|
| 762 |
+
assert(type_of(pc) == PAWN);
|
| 763 |
+
|
| 764 |
+
leadPawns = b = pos.pieces(color_of(pc), PAWN);
|
| 765 |
+
do
|
| 766 |
+
squares[size++] = pop_lsb(b) ^ flipSquares;
|
| 767 |
+
while (b);
|
| 768 |
+
|
| 769 |
+
leadPawnsCnt = size;
|
| 770 |
+
|
| 771 |
+
std::swap(squares[0], *std::max_element(squares, squares + leadPawnsCnt, pawns_comp));
|
| 772 |
+
|
| 773 |
+
tbFile = File(edge_distance(file_of(squares[0])));
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
// DTZ tables are one-sided, i.e. they store positions only for white to
|
| 777 |
+
// move or only for black to move, so check for side to move to be stm,
|
| 778 |
+
// early exit otherwise.
|
| 779 |
+
if (!check_dtz_stm(entry, stm, tbFile))
|
| 780 |
+
return *result = CHANGE_STM, Ret();
|
| 781 |
+
|
| 782 |
+
// Now we are ready to get all the position pieces (but the lead pawns) and
|
| 783 |
+
// directly map them to the correct color and square.
|
| 784 |
+
b = pos.pieces() ^ leadPawns;
|
| 785 |
+
do
|
| 786 |
+
{
|
| 787 |
+
Square s = pop_lsb(b);
|
| 788 |
+
squares[size] = s ^ flipSquares;
|
| 789 |
+
pieces[size++] = Piece(pos.piece_on(s) ^ flipColor);
|
| 790 |
+
} while (b);
|
| 791 |
+
|
| 792 |
+
assert(size >= 2);
|
| 793 |
+
|
| 794 |
+
d = entry->get(stm, tbFile);
|
| 795 |
+
|
| 796 |
+
// Then we reorder the pieces to have the same sequence as the one stored
|
| 797 |
+
// in pieces[i]: the sequence that ensures the best compression.
|
| 798 |
+
for (int i = leadPawnsCnt; i < size - 1; ++i)
|
| 799 |
+
for (int j = i + 1; j < size; ++j)
|
| 800 |
+
if (d->pieces[i] == pieces[j])
|
| 801 |
+
{
|
| 802 |
+
std::swap(pieces[i], pieces[j]);
|
| 803 |
+
std::swap(squares[i], squares[j]);
|
| 804 |
+
break;
|
| 805 |
+
}
|
| 806 |
+
|
| 807 |
+
// Now we map again the squares so that the square of the lead piece is in
|
| 808 |
+
// the triangle A1-D1-D4.
|
| 809 |
+
if (file_of(squares[0]) > FILE_D)
|
| 810 |
+
{
|
| 811 |
+
DISABLE_CLANG_LOOP_VEC
|
| 812 |
+
for (int i = 0; i < size; ++i)
|
| 813 |
+
squares[i] = flip_file(squares[i]);
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
// Encode leading pawns starting with the one with minimum MapPawns[] and
|
| 817 |
+
// proceeding in ascending order.
|
| 818 |
+
if (entry->hasPawns)
|
| 819 |
+
{
|
| 820 |
+
idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
|
| 821 |
+
|
| 822 |
+
std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
|
| 823 |
+
|
| 824 |
+
for (int i = 1; i < leadPawnsCnt; ++i)
|
| 825 |
+
idx += Binomial[i][MapPawns[squares[i]]];
|
| 826 |
+
|
| 827 |
+
goto encode_remaining; // With pawns we have finished special treatments
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
// In positions without pawns, we further flip the squares to ensure leading
|
| 831 |
+
// piece is below RANK_5.
|
| 832 |
+
if (rank_of(squares[0]) > RANK_4)
|
| 833 |
+
{
|
| 834 |
+
DISABLE_CLANG_LOOP_VEC
|
| 835 |
+
for (int i = 0; i < size; ++i)
|
| 836 |
+
squares[i] = flip_rank(squares[i]);
|
| 837 |
+
}
|
| 838 |
+
|
| 839 |
+
// Look for the first piece of the leading group not on the A1-D4 diagonal
|
| 840 |
+
// and ensure it is mapped below the diagonal.
|
| 841 |
+
DISABLE_CLANG_LOOP_VEC
|
| 842 |
+
for (int i = 0; i < d->groupLen[0]; ++i)
|
| 843 |
+
{
|
| 844 |
+
if (!off_A1H8(squares[i]))
|
| 845 |
+
continue;
|
| 846 |
+
|
| 847 |
+
if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1
|
| 848 |
+
{
|
| 849 |
+
DISABLE_CLANG_LOOP_VEC
|
| 850 |
+
for (int j = i; j < size; ++j)
|
| 851 |
+
squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63);
|
| 852 |
+
}
|
| 853 |
+
break;
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
// Encode the leading group.
|
| 857 |
+
//
|
| 858 |
+
// Suppose we have KRvK. Let's say the pieces are on square numbers wK, wR
|
| 859 |
+
// and bK (each 0...63). The simplest way to map this position to an index
|
| 860 |
+
// is like this:
|
| 861 |
+
//
|
| 862 |
+
// index = wK * 64 * 64 + wR * 64 + bK;
|
| 863 |
+
//
|
| 864 |
+
// But this way the TB is going to have 64*64*64 = 262144 positions, with
|
| 865 |
+
// lots of positions being equivalent (because they are mirrors of each
|
| 866 |
+
// other) and lots of positions being invalid (two pieces on one square,
|
| 867 |
+
// adjacent kings, etc.).
|
| 868 |
+
// Usually the first step is to take the wK and bK together. There are just
|
| 869 |
+
// 462 ways legal and not-mirrored ways to place the wK and bK on the board.
|
| 870 |
+
// Once we have placed the wK and bK, there are 62 squares left for the wR
|
| 871 |
+
// Mapping its square from 0..63 to available squares 0..61 can be done like:
|
| 872 |
+
//
|
| 873 |
+
// wR -= (wR > wK) + (wR > bK);
|
| 874 |
+
//
|
| 875 |
+
// In words: if wR "comes later" than wK, we deduct 1, and the same if wR
|
| 876 |
+
// "comes later" than bK. In case of two same pieces like KRRvK we want to
|
| 877 |
+
// place the two Rs "together". If we have 62 squares left, we can place two
|
| 878 |
+
// Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be
|
| 879 |
+
// swapped and still get the same position.)
|
| 880 |
+
//
|
| 881 |
+
// In case we have at least 3 unique pieces (including kings) we encode them
|
| 882 |
+
// together.
|
| 883 |
+
if (entry->hasUniquePieces)
|
| 884 |
+
{
|
| 885 |
+
|
| 886 |
+
int adjust1 = squares[1] > squares[0];
|
| 887 |
+
int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]);
|
| 888 |
+
|
| 889 |
+
// First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3
|
| 890 |
+
// triangle to 0...5. There are 63 squares for second piece and 62
|
| 891 |
+
// (mapped to 0...61) for the third.
|
| 892 |
+
if (off_A1H8(squares[0]))
|
| 893 |
+
idx = (MapA1D1D4[squares[0]] * 63 + (squares[1] - adjust1)) * 62 + squares[2] - adjust2;
|
| 894 |
+
|
| 895 |
+
// First piece is on a1-h8 diagonal, second below: map this occurrence to
|
| 896 |
+
// 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal
|
| 897 |
+
// to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27.
|
| 898 |
+
else if (off_A1H8(squares[1]))
|
| 899 |
+
idx = (6 * 63 + rank_of(squares[0]) * 28 + MapB1H1H7[squares[1]]) * 62 + squares[2]
|
| 900 |
+
- adjust2;
|
| 901 |
+
|
| 902 |
+
// First two pieces are on a1-h8 diagonal, third below
|
| 903 |
+
else if (off_A1H8(squares[2]))
|
| 904 |
+
idx = 6 * 63 * 62 + 4 * 28 * 62 + rank_of(squares[0]) * 7 * 28
|
| 905 |
+
+ (rank_of(squares[1]) - adjust1) * 28 + MapB1H1H7[squares[2]];
|
| 906 |
+
|
| 907 |
+
// All 3 pieces on the diagonal a1-h8
|
| 908 |
+
else
|
| 909 |
+
idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + rank_of(squares[0]) * 7 * 6
|
| 910 |
+
+ (rank_of(squares[1]) - adjust1) * 6 + (rank_of(squares[2]) - adjust2);
|
| 911 |
+
}
|
| 912 |
+
else
|
| 913 |
+
// We don't have at least 3 unique pieces, like in KRRvKBB, just map
|
| 914 |
+
// the kings.
|
| 915 |
+
idx = MapKK[MapA1D1D4[squares[0]]][squares[1]];
|
| 916 |
+
|
| 917 |
+
encode_remaining:
|
| 918 |
+
idx *= d->groupIdx[0];
|
| 919 |
+
Square* groupSq = squares + d->groupLen[0];
|
| 920 |
+
|
| 921 |
+
// Encode remaining pawns and then pieces according to square, in ascending order
|
| 922 |
+
bool remainingPawns = entry->hasPawns && entry->pawnCount[1];
|
| 923 |
+
|
| 924 |
+
while (d->groupLen[++next])
|
| 925 |
+
{
|
| 926 |
+
std::stable_sort(groupSq, groupSq + d->groupLen[next]);
|
| 927 |
+
uint64_t n = 0;
|
| 928 |
+
|
| 929 |
+
// Map down a square if "comes later" than a square in the previous
|
| 930 |
+
// groups (similar to what was done earlier for leading group pieces).
|
| 931 |
+
for (int i = 0; i < d->groupLen[next]; ++i)
|
| 932 |
+
{
|
| 933 |
+
auto f = [&](Square s) { return groupSq[i] > s; };
|
| 934 |
+
auto adjust = std::count_if(squares, groupSq, f);
|
| 935 |
+
n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns];
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
remainingPawns = false;
|
| 939 |
+
idx += n * d->groupIdx[next];
|
| 940 |
+
groupSq += d->groupLen[next];
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
+
// Now that we have the index, decompress the pair and get the score
|
| 944 |
+
return map_score(entry, tbFile, decompress_pairs(d, idx), wdl);
|
| 945 |
+
}
|
| 946 |
+
|
| 947 |
+
// Group together pieces that will be encoded together. The general rule is that
|
| 948 |
+
// a group contains pieces of the same type and color. The exception is the leading
|
| 949 |
+
// group that, in case of positions without pawns, can be formed by 3 different
|
| 950 |
+
// pieces (default) or by the king pair when there is not a unique piece apart
|
| 951 |
+
// from the kings. When there are pawns, pawns are always first in pieces[].
|
| 952 |
+
//
|
| 953 |
+
// As example KRKN -> KRK + N, KNNK -> KK + NN, KPPKP -> P + PP + K + K
|
| 954 |
+
//
|
| 955 |
+
// The actual grouping depends on the TB generator and can be inferred from the
|
| 956 |
+
// sequence of pieces in piece[] array.
|
| 957 |
+
template<typename T>
|
| 958 |
+
void set_groups(T& e, PairsData* d, int order[], File f) {
|
| 959 |
+
|
| 960 |
+
int n = 0, firstLen = e.hasPawns ? 0 : e.hasUniquePieces ? 3 : 2;
|
| 961 |
+
d->groupLen[n] = 1;
|
| 962 |
+
|
| 963 |
+
// Number of pieces per group is stored in groupLen[], for instance in KRKN
|
| 964 |
+
// the encoder will default on '111', so groupLen[] will be (3, 1).
|
| 965 |
+
for (int i = 1; i < e.pieceCount; ++i)
|
| 966 |
+
if (--firstLen > 0 || d->pieces[i] == d->pieces[i - 1])
|
| 967 |
+
d->groupLen[n]++;
|
| 968 |
+
else
|
| 969 |
+
d->groupLen[++n] = 1;
|
| 970 |
+
|
| 971 |
+
d->groupLen[++n] = 0; // Zero-terminated
|
| 972 |
+
|
| 973 |
+
// The sequence in pieces[] defines the groups, but not the order in which
|
| 974 |
+
// they are encoded. If the pieces in a group g can be combined on the board
|
| 975 |
+
// in N(g) different ways, then the position encoding will be of the form:
|
| 976 |
+
//
|
| 977 |
+
// g1 * N(g2) * N(g3) + g2 * N(g3) + g3
|
| 978 |
+
//
|
| 979 |
+
// This ensures unique encoding for the whole position. The order of the
|
| 980 |
+
// groups is a per-table parameter and could not follow the canonical leading
|
| 981 |
+
// pawns/pieces -> remaining pawns -> remaining pieces. In particular the
|
| 982 |
+
// first group is at order[0] position and the remaining pawns, when present,
|
| 983 |
+
// are at order[1] position.
|
| 984 |
+
bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides
|
| 985 |
+
int next = pp ? 2 : 1;
|
| 986 |
+
int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0);
|
| 987 |
+
uint64_t idx = 1;
|
| 988 |
+
|
| 989 |
+
for (int k = 0; next < n || k == order[0] || k == order[1]; ++k)
|
| 990 |
+
if (k == order[0]) // Leading pawns or pieces
|
| 991 |
+
{
|
| 992 |
+
d->groupIdx[0] = idx;
|
| 993 |
+
idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] : e.hasUniquePieces ? 31332 : 462;
|
| 994 |
+
}
|
| 995 |
+
else if (k == order[1]) // Remaining pawns
|
| 996 |
+
{
|
| 997 |
+
d->groupIdx[1] = idx;
|
| 998 |
+
idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]];
|
| 999 |
+
}
|
| 1000 |
+
else // Remaining pieces
|
| 1001 |
+
{
|
| 1002 |
+
d->groupIdx[next] = idx;
|
| 1003 |
+
idx *= Binomial[d->groupLen[next]][freeSquares];
|
| 1004 |
+
freeSquares -= d->groupLen[next++];
|
| 1005 |
+
}
|
| 1006 |
+
|
| 1007 |
+
d->groupIdx[n] = idx;
|
| 1008 |
+
}
|
| 1009 |
+
|
| 1010 |
+
// In Recursive Pairing each symbol represents a pair of children symbols. So
|
| 1011 |
+
// read d->btree[] symbols data and expand each one in his left and right child
|
| 1012 |
+
// symbol until reaching the leaves that represent the symbol value.
|
| 1013 |
+
uint8_t set_symlen(PairsData* d, Sym s, std::vector<bool>& visited) {
|
| 1014 |
+
|
| 1015 |
+
visited[s] = true; // We can set it now because tree is acyclic
|
| 1016 |
+
Sym sr = d->btree[s].get<LR::Right>();
|
| 1017 |
+
|
| 1018 |
+
if (sr == 0xFFF)
|
| 1019 |
+
return 0;
|
| 1020 |
+
|
| 1021 |
+
Sym sl = d->btree[s].get<LR::Left>();
|
| 1022 |
+
|
| 1023 |
+
if (!visited[sl])
|
| 1024 |
+
d->symlen[sl] = set_symlen(d, sl, visited);
|
| 1025 |
+
|
| 1026 |
+
if (!visited[sr])
|
| 1027 |
+
d->symlen[sr] = set_symlen(d, sr, visited);
|
| 1028 |
+
|
| 1029 |
+
return d->symlen[sl] + d->symlen[sr] + 1;
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
uint8_t* set_sizes(PairsData* d, uint8_t* data) {
|
| 1033 |
+
|
| 1034 |
+
d->flags = *data++;
|
| 1035 |
+
|
| 1036 |
+
if (d->flags & TBFlag::SingleValue)
|
| 1037 |
+
{
|
| 1038 |
+
d->blocksNum = d->blockLengthSize = 0;
|
| 1039 |
+
d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init
|
| 1040 |
+
d->minSymLen = *data++; // Here we store the single value
|
| 1041 |
+
return data;
|
| 1042 |
+
}
|
| 1043 |
+
|
| 1044 |
+
// groupLen[] is a zero-terminated list of group lengths, the last groupIdx[]
|
| 1045 |
+
// element stores the biggest index that is the tb size.
|
| 1046 |
+
uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen];
|
| 1047 |
+
|
| 1048 |
+
d->sizeofBlock = 1ULL << *data++;
|
| 1049 |
+
d->span = 1ULL << *data++;
|
| 1050 |
+
d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up
|
| 1051 |
+
auto padding = number<uint8_t, LittleEndian>(data++);
|
| 1052 |
+
d->blocksNum = number<uint32_t, LittleEndian>(data);
|
| 1053 |
+
data += sizeof(uint32_t);
|
| 1054 |
+
d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[]
|
| 1055 |
+
// does not point out of range.
|
| 1056 |
+
d->maxSymLen = *data++;
|
| 1057 |
+
d->minSymLen = *data++;
|
| 1058 |
+
d->lowestSym = (Sym*) data;
|
| 1059 |
+
d->base64.resize(d->maxSymLen - d->minSymLen + 1);
|
| 1060 |
+
|
| 1061 |
+
// See https://en.wikipedia.org/wiki/Huffman_coding
|
| 1062 |
+
// The canonical code is ordered such that longer symbols (in terms of
|
| 1063 |
+
// the number of bits of their Huffman code) have a lower numeric value,
|
| 1064 |
+
// so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian).
|
| 1065 |
+
// Starting from this we compute a base64[] table indexed by symbol length
|
| 1066 |
+
// and containing 64 bit values so that d->base64[i] >= d->base64[i+1].
|
| 1067 |
+
|
| 1068 |
+
// Implementation note: we first cast the unsigned size_t "base64.size()"
|
| 1069 |
+
// to a signed int "base64_size" variable and then we are able to subtract 2,
|
| 1070 |
+
// avoiding unsigned overflow warnings.
|
| 1071 |
+
|
| 1072 |
+
int base64_size = static_cast<int>(d->base64.size());
|
| 1073 |
+
for (int i = base64_size - 2; i >= 0; --i)
|
| 1074 |
+
{
|
| 1075 |
+
d->base64[i] = (d->base64[i + 1] + number<Sym, LittleEndian>(&d->lowestSym[i])
|
| 1076 |
+
- number<Sym, LittleEndian>(&d->lowestSym[i + 1]))
|
| 1077 |
+
/ 2;
|
| 1078 |
+
|
| 1079 |
+
assert(d->base64[i] * 2 >= d->base64[i + 1]);
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
+
// Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more
|
| 1083 |
+
// than d->base64[i+1] and given the above assert condition, we ensure that
|
| 1084 |
+
// d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i
|
| 1085 |
+
// and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i].
|
| 1086 |
+
for (int i = 0; i < base64_size; ++i)
|
| 1087 |
+
d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits
|
| 1088 |
+
|
| 1089 |
+
data += base64_size * sizeof(Sym);
|
| 1090 |
+
d->symlen.resize(number<uint16_t, LittleEndian>(data));
|
| 1091 |
+
data += sizeof(uint16_t);
|
| 1092 |
+
d->btree = (LR*) data;
|
| 1093 |
+
|
| 1094 |
+
// The compression scheme used is "Recursive Pairing", that replaces the most
|
| 1095 |
+
// frequent adjacent pair of symbols in the source message by a new symbol,
|
| 1096 |
+
// reevaluating the frequencies of all of the symbol pairs with respect to
|
| 1097 |
+
// the extended alphabet, and then repeating the process.
|
| 1098 |
+
// See https://web.archive.org/web/20201106232444/http://www.larsson.dogma.net/dcc99.pdf
|
| 1099 |
+
std::vector<bool> visited(d->symlen.size());
|
| 1100 |
+
|
| 1101 |
+
for (Sym sym = 0; sym < d->symlen.size(); ++sym)
|
| 1102 |
+
if (!visited[sym])
|
| 1103 |
+
d->symlen[sym] = set_symlen(d, sym, visited);
|
| 1104 |
+
|
| 1105 |
+
return data + d->symlen.size() * sizeof(LR) + (d->symlen.size() & 1);
|
| 1106 |
+
}
|
| 1107 |
+
|
| 1108 |
+
uint8_t* set_dtz_map(TBTable<WDL>&, uint8_t* data, File) { return data; }
|
| 1109 |
+
|
| 1110 |
+
uint8_t* set_dtz_map(TBTable<DTZ>& e, uint8_t* data, File maxFile) {
|
| 1111 |
+
|
| 1112 |
+
e.map = data;
|
| 1113 |
+
|
| 1114 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1115 |
+
{
|
| 1116 |
+
auto flags = e.get(0, f)->flags;
|
| 1117 |
+
if (flags & TBFlag::Mapped)
|
| 1118 |
+
{
|
| 1119 |
+
if (flags & TBFlag::Wide)
|
| 1120 |
+
{
|
| 1121 |
+
data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table
|
| 1122 |
+
for (int i = 0; i < 4; ++i)
|
| 1123 |
+
{ // Sequence like 3,x,x,x,1,x,0,2,x,x
|
| 1124 |
+
e.get(0, f)->map_idx[i] = uint16_t((uint16_t*) data - (uint16_t*) e.map + 1);
|
| 1125 |
+
data += 2 * number<uint16_t, LittleEndian>(data) + 2;
|
| 1126 |
+
}
|
| 1127 |
+
}
|
| 1128 |
+
else
|
| 1129 |
+
{
|
| 1130 |
+
for (int i = 0; i < 4; ++i)
|
| 1131 |
+
{
|
| 1132 |
+
e.get(0, f)->map_idx[i] = uint16_t(data - e.map + 1);
|
| 1133 |
+
data += *data + 1;
|
| 1134 |
+
}
|
| 1135 |
+
}
|
| 1136 |
+
}
|
| 1137 |
+
}
|
| 1138 |
+
|
| 1139 |
+
return data += uintptr_t(data) & 1; // Word alignment
|
| 1140 |
+
}
|
| 1141 |
+
|
| 1142 |
+
// Populate entry's PairsData records with data from the just memory-mapped file.
|
| 1143 |
+
// Called at first access.
|
| 1144 |
+
template<typename T>
|
| 1145 |
+
void set(T& e, uint8_t* data) {
|
| 1146 |
+
|
| 1147 |
+
PairsData* d;
|
| 1148 |
+
|
| 1149 |
+
enum {
|
| 1150 |
+
Split = 1,
|
| 1151 |
+
HasPawns = 2
|
| 1152 |
+
};
|
| 1153 |
+
|
| 1154 |
+
assert(e.hasPawns == bool(*data & HasPawns));
|
| 1155 |
+
assert((e.key != e.key2) == bool(*data & Split));
|
| 1156 |
+
|
| 1157 |
+
data++; // First byte stores flags
|
| 1158 |
+
|
| 1159 |
+
const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1;
|
| 1160 |
+
const File maxFile = e.hasPawns ? FILE_D : FILE_A;
|
| 1161 |
+
|
| 1162 |
+
bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides
|
| 1163 |
+
|
| 1164 |
+
assert(!pp || e.pawnCount[0]);
|
| 1165 |
+
|
| 1166 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1167 |
+
{
|
| 1168 |
+
|
| 1169 |
+
for (int i = 0; i < sides; i++)
|
| 1170 |
+
*e.get(i, f) = PairsData();
|
| 1171 |
+
|
| 1172 |
+
int order[][2] = {{*data & 0xF, pp ? *(data + 1) & 0xF : 0xF},
|
| 1173 |
+
{*data >> 4, pp ? *(data + 1) >> 4 : 0xF}};
|
| 1174 |
+
data += 1 + pp;
|
| 1175 |
+
|
| 1176 |
+
for (int k = 0; k < e.pieceCount; ++k, ++data)
|
| 1177 |
+
for (int i = 0; i < sides; i++)
|
| 1178 |
+
e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF);
|
| 1179 |
+
|
| 1180 |
+
for (int i = 0; i < sides; ++i)
|
| 1181 |
+
set_groups(e, e.get(i, f), order[i], f);
|
| 1182 |
+
}
|
| 1183 |
+
|
| 1184 |
+
data += uintptr_t(data) & 1; // Word alignment
|
| 1185 |
+
|
| 1186 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1187 |
+
for (int i = 0; i < sides; i++)
|
| 1188 |
+
data = set_sizes(e.get(i, f), data);
|
| 1189 |
+
|
| 1190 |
+
data = set_dtz_map(e, data, maxFile);
|
| 1191 |
+
|
| 1192 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1193 |
+
for (int i = 0; i < sides; i++)
|
| 1194 |
+
{
|
| 1195 |
+
(d = e.get(i, f))->sparseIndex = (SparseEntry*) data;
|
| 1196 |
+
data += d->sparseIndexSize * sizeof(SparseEntry);
|
| 1197 |
+
}
|
| 1198 |
+
|
| 1199 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1200 |
+
for (int i = 0; i < sides; i++)
|
| 1201 |
+
{
|
| 1202 |
+
(d = e.get(i, f))->blockLength = (uint16_t*) data;
|
| 1203 |
+
data += d->blockLengthSize * sizeof(uint16_t);
|
| 1204 |
+
}
|
| 1205 |
+
|
| 1206 |
+
for (File f = FILE_A; f <= maxFile; ++f)
|
| 1207 |
+
for (int i = 0; i < sides; i++)
|
| 1208 |
+
{
|
| 1209 |
+
data = (uint8_t*) ((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment
|
| 1210 |
+
(d = e.get(i, f))->data = data;
|
| 1211 |
+
data += d->blocksNum * d->sizeofBlock;
|
| 1212 |
+
}
|
| 1213 |
+
}
|
| 1214 |
+
|
| 1215 |
+
// If the TB file corresponding to the given position is already memory-mapped
|
| 1216 |
+
// then return its base address, otherwise, try to memory map and init it. Called
|
| 1217 |
+
// at every probe, memory map, and init only at first access. Function is thread
|
| 1218 |
+
// safe and can be called concurrently.
|
| 1219 |
+
template<TBType Type>
|
| 1220 |
+
void* mapped(TBTable<Type>& e, const Position& pos) {
|
| 1221 |
+
|
| 1222 |
+
static std::mutex mutex;
|
| 1223 |
+
// Because TB is the only usage of materialKey, check it here in debug mode
|
| 1224 |
+
assert(pos.material_key_is_ok());
|
| 1225 |
+
|
| 1226 |
+
// Use 'acquire' to avoid a thread reading 'ready' == true while
|
| 1227 |
+
// another is still working. (compiler reordering may cause this).
|
| 1228 |
+
if (e.ready.load(std::memory_order_acquire))
|
| 1229 |
+
return e.baseAddress; // Could be nullptr if file does not exist
|
| 1230 |
+
|
| 1231 |
+
std::scoped_lock<std::mutex> lk(mutex);
|
| 1232 |
+
|
| 1233 |
+
if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock
|
| 1234 |
+
return e.baseAddress;
|
| 1235 |
+
|
| 1236 |
+
// Pieces strings in decreasing order for each color, like ("KPP","KR")
|
| 1237 |
+
std::string fname, w, b;
|
| 1238 |
+
for (PieceType pt = KING; pt >= PAWN; --pt)
|
| 1239 |
+
{
|
| 1240 |
+
w += std::string(popcount(pos.pieces(WHITE, pt)), PieceToChar[pt]);
|
| 1241 |
+
b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]);
|
| 1242 |
+
}
|
| 1243 |
+
|
| 1244 |
+
fname =
|
| 1245 |
+
(e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + (Type == WDL ? ".rtbw" : ".rtbz");
|
| 1246 |
+
|
| 1247 |
+
uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type);
|
| 1248 |
+
|
| 1249 |
+
if (data)
|
| 1250 |
+
set(e, data);
|
| 1251 |
+
|
| 1252 |
+
e.ready.store(true, std::memory_order_release);
|
| 1253 |
+
return e.baseAddress;
|
| 1254 |
+
}
|
| 1255 |
+
|
| 1256 |
+
template<TBType Type, typename Ret = typename TBTable<Type>::Ret>
|
| 1257 |
+
Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) {
|
| 1258 |
+
|
| 1259 |
+
if (pos.count<ALL_PIECES>() == 2) // KvK
|
| 1260 |
+
return Ret(WDLDraw);
|
| 1261 |
+
|
| 1262 |
+
TBTable<Type>* entry = TBTables.get<Type>(pos.material_key());
|
| 1263 |
+
|
| 1264 |
+
if (!entry || !mapped(*entry, pos))
|
| 1265 |
+
return *result = FAIL, Ret();
|
| 1266 |
+
|
| 1267 |
+
return do_probe_table(pos, entry, wdl, result);
|
| 1268 |
+
}
|
| 1269 |
+
|
| 1270 |
+
// For a position where the side to move has a winning capture it is not necessary
|
| 1271 |
+
// to store a winning value so the generator treats such positions as "don't care"
|
| 1272 |
+
// and tries to assign to it a value that improves the compression ratio. Similarly,
|
| 1273 |
+
// if the side to move has a drawing capture, then the position is at least drawn.
|
| 1274 |
+
// If the position is won, then the TB needs to store a win value. But if the
|
| 1275 |
+
// position is drawn, the TB may store a loss value if that is better for compression.
|
| 1276 |
+
// All of this means that during probing, the engine must look at captures and probe
|
| 1277 |
+
// their results and must probe the position itself. The "best" result of these
|
| 1278 |
+
// probes is the correct result for the position.
|
| 1279 |
+
// DTZ tables do not store values when a following move is a zeroing winning move
|
| 1280 |
+
// (winning capture or winning pawn move). Also, DTZ store wrong values for positions
|
| 1281 |
+
// where the best move is an ep-move (even if losing). So in all these cases set
|
| 1282 |
+
// the state to ZEROING_BEST_MOVE.
|
| 1283 |
+
template<bool CheckZeroingMoves>
|
| 1284 |
+
WDLScore search(Position& pos, ProbeState* result) {
|
| 1285 |
+
|
| 1286 |
+
WDLScore value, bestValue = WDLLoss;
|
| 1287 |
+
StateInfo st;
|
| 1288 |
+
|
| 1289 |
+
auto moveList = MoveList<LEGAL>(pos);
|
| 1290 |
+
size_t totalCount = moveList.size(), moveCount = 0;
|
| 1291 |
+
|
| 1292 |
+
for (const Move move : moveList)
|
| 1293 |
+
{
|
| 1294 |
+
if (!pos.capture(move) && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN))
|
| 1295 |
+
continue;
|
| 1296 |
+
|
| 1297 |
+
moveCount++;
|
| 1298 |
+
|
| 1299 |
+
pos.do_move(move, st);
|
| 1300 |
+
value = -search<false>(pos, result);
|
| 1301 |
+
pos.undo_move(move);
|
| 1302 |
+
|
| 1303 |
+
if (*result == FAIL)
|
| 1304 |
+
return WDLDraw;
|
| 1305 |
+
|
| 1306 |
+
if (value > bestValue)
|
| 1307 |
+
{
|
| 1308 |
+
bestValue = value;
|
| 1309 |
+
|
| 1310 |
+
if (value >= WDLWin)
|
| 1311 |
+
{
|
| 1312 |
+
*result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move
|
| 1313 |
+
return value;
|
| 1314 |
+
}
|
| 1315 |
+
}
|
| 1316 |
+
}
|
| 1317 |
+
|
| 1318 |
+
// In case we have already searched all the legal moves we don't have to probe
|
| 1319 |
+
// the TB because the stored score could be wrong. For instance TB tables
|
| 1320 |
+
// do not contain information on position with ep rights, so in this case
|
| 1321 |
+
// the result of probe_wdl_table is wrong. Also in case of only capture
|
| 1322 |
+
// moves, for instance here 4K3/4q3/6p1/2k5/6p1/8/8/8 w - - 0 7, we have to
|
| 1323 |
+
// return with ZEROING_BEST_MOVE set.
|
| 1324 |
+
bool noMoreMoves = (moveCount && moveCount == totalCount);
|
| 1325 |
+
|
| 1326 |
+
if (noMoreMoves)
|
| 1327 |
+
value = bestValue;
|
| 1328 |
+
else
|
| 1329 |
+
{
|
| 1330 |
+
value = probe_table<WDL>(pos, result);
|
| 1331 |
+
|
| 1332 |
+
if (*result == FAIL)
|
| 1333 |
+
return WDLDraw;
|
| 1334 |
+
}
|
| 1335 |
+
|
| 1336 |
+
// DTZ stores a "don't care" value if bestValue is a win
|
| 1337 |
+
if (bestValue >= value)
|
| 1338 |
+
return *result = (bestValue > WDLDraw || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue;
|
| 1339 |
+
|
| 1340 |
+
return *result = OK, value;
|
| 1341 |
+
}
|
| 1342 |
+
|
| 1343 |
+
} // namespace
|
| 1344 |
+
|
| 1345 |
+
|
| 1346 |
+
// Called at startup and after every change to
|
| 1347 |
+
// "SyzygyPath" UCI option to (re)create the various tables. It is not thread
|
| 1348 |
+
// safe, nor it needs to be.
|
| 1349 |
+
void Tablebases::init(const std::string& paths) {
|
| 1350 |
+
|
| 1351 |
+
TBTables.clear();
|
| 1352 |
+
MaxCardinality = 0;
|
| 1353 |
+
TBFile::Paths = paths;
|
| 1354 |
+
|
| 1355 |
+
if (paths.empty())
|
| 1356 |
+
return;
|
| 1357 |
+
|
| 1358 |
+
// MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27
|
| 1359 |
+
int code = 0;
|
| 1360 |
+
for (Square s = SQ_A1; s <= SQ_H8; ++s)
|
| 1361 |
+
if (off_A1H8(s) < 0)
|
| 1362 |
+
MapB1H1H7[s] = code++;
|
| 1363 |
+
|
| 1364 |
+
// MapA1D1D4[] encodes a square in the a1-d1-d4 triangle to 0..9
|
| 1365 |
+
std::vector<Square> diagonal;
|
| 1366 |
+
code = 0;
|
| 1367 |
+
for (Square s = SQ_A1; s <= SQ_D4; ++s)
|
| 1368 |
+
if (off_A1H8(s) < 0 && file_of(s) <= FILE_D)
|
| 1369 |
+
MapA1D1D4[s] = code++;
|
| 1370 |
+
|
| 1371 |
+
else if (!off_A1H8(s) && file_of(s) <= FILE_D)
|
| 1372 |
+
diagonal.push_back(s);
|
| 1373 |
+
|
| 1374 |
+
// Diagonal squares are encoded as last ones
|
| 1375 |
+
for (auto s : diagonal)
|
| 1376 |
+
MapA1D1D4[s] = code++;
|
| 1377 |
+
|
| 1378 |
+
// MapKK[] encodes all the 462 possible legal positions of two kings where
|
| 1379 |
+
// the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4
|
| 1380 |
+
// diagonal, the other one shall not be above the a1-h8 diagonal.
|
| 1381 |
+
std::vector<std::pair<int, Square>> bothOnDiagonal;
|
| 1382 |
+
code = 0;
|
| 1383 |
+
for (int idx = 0; idx < 10; idx++)
|
| 1384 |
+
for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1)
|
| 1385 |
+
if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0
|
| 1386 |
+
{
|
| 1387 |
+
for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
|
| 1388 |
+
if ((PseudoAttacks[KING][s1] | s1) & s2)
|
| 1389 |
+
continue; // Illegal position
|
| 1390 |
+
|
| 1391 |
+
else if (!off_A1H8(s1) && off_A1H8(s2) > 0)
|
| 1392 |
+
continue; // First on diagonal, second above
|
| 1393 |
+
|
| 1394 |
+
else if (!off_A1H8(s1) && !off_A1H8(s2))
|
| 1395 |
+
bothOnDiagonal.emplace_back(idx, s2);
|
| 1396 |
+
|
| 1397 |
+
else
|
| 1398 |
+
MapKK[idx][s2] = code++;
|
| 1399 |
+
}
|
| 1400 |
+
|
| 1401 |
+
// Legal positions with both kings on a diagonal are encoded as last ones
|
| 1402 |
+
for (auto p : bothOnDiagonal)
|
| 1403 |
+
MapKK[p.first][p.second] = code++;
|
| 1404 |
+
|
| 1405 |
+
// Binomial[] stores the Binomial Coefficients using Pascal rule. There
|
| 1406 |
+
// are Binomial[k][n] ways to choose k elements from a set of n elements.
|
| 1407 |
+
Binomial[0][0] = 1;
|
| 1408 |
+
|
| 1409 |
+
for (int n = 1; n < 64; n++) // Squares
|
| 1410 |
+
for (int k = 0; k < 6 && k <= n; ++k) // Pieces
|
| 1411 |
+
Binomial[k][n] =
|
| 1412 |
+
(k > 0 ? Binomial[k - 1][n - 1] : 0) + (k < n ? Binomial[k][n - 1] : 0);
|
| 1413 |
+
|
| 1414 |
+
// MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible
|
| 1415 |
+
// available squares when the leading one is in 's'. Moreover the pawn with
|
| 1416 |
+
// highest MapPawns[] is the leading pawn, the one nearest the edge, and
|
| 1417 |
+
// among pawns with the same file, the one with the lowest rank.
|
| 1418 |
+
int availableSquares = 47; // Available squares when lead pawn is in a2
|
| 1419 |
+
|
| 1420 |
+
// Init the tables for the encoding of leading pawns group: with 7-men TB we
|
| 1421 |
+
// can have up to 5 leading pawns (KPPPPPK).
|
| 1422 |
+
for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt)
|
| 1423 |
+
for (File f = FILE_A; f <= FILE_D; ++f)
|
| 1424 |
+
{
|
| 1425 |
+
// Restart the index at every file because TB table is split
|
| 1426 |
+
// by file, so we can reuse the same index for different files.
|
| 1427 |
+
int idx = 0;
|
| 1428 |
+
|
| 1429 |
+
// Sum all possible combinations for a given file, starting with
|
| 1430 |
+
// the leading pawn on rank 2 and increasing the rank.
|
| 1431 |
+
for (Rank r = RANK_2; r <= RANK_7; ++r)
|
| 1432 |
+
{
|
| 1433 |
+
Square sq = make_square(f, r);
|
| 1434 |
+
|
| 1435 |
+
// Compute MapPawns[] at first pass.
|
| 1436 |
+
// If sq is the leading pawn square, any other pawn cannot be
|
| 1437 |
+
// below or more toward the edge of sq. There are 47 available
|
| 1438 |
+
// squares when sq = a2 and reduced by 2 for any rank increase
|
| 1439 |
+
// due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45
|
| 1440 |
+
if (leadPawnsCnt == 1)
|
| 1441 |
+
{
|
| 1442 |
+
MapPawns[sq] = availableSquares--;
|
| 1443 |
+
MapPawns[flip_file(sq)] = availableSquares--;
|
| 1444 |
+
}
|
| 1445 |
+
LeadPawnIdx[leadPawnsCnt][sq] = idx;
|
| 1446 |
+
idx += Binomial[leadPawnsCnt - 1][MapPawns[sq]];
|
| 1447 |
+
}
|
| 1448 |
+
// After a file is traversed, store the cumulated per-file index
|
| 1449 |
+
LeadPawnsSize[leadPawnsCnt][f] = idx;
|
| 1450 |
+
}
|
| 1451 |
+
|
| 1452 |
+
// Add entries in TB tables if the corresponding ".rtbw" file exists
|
| 1453 |
+
for (PieceType p1 = PAWN; p1 < KING; ++p1)
|
| 1454 |
+
{
|
| 1455 |
+
TBTables.add({KING, p1, KING});
|
| 1456 |
+
|
| 1457 |
+
for (PieceType p2 = PAWN; p2 <= p1; ++p2)
|
| 1458 |
+
{
|
| 1459 |
+
TBTables.add({KING, p1, p2, KING});
|
| 1460 |
+
TBTables.add({KING, p1, KING, p2});
|
| 1461 |
+
|
| 1462 |
+
for (PieceType p3 = PAWN; p3 < KING; ++p3)
|
| 1463 |
+
TBTables.add({KING, p1, p2, KING, p3});
|
| 1464 |
+
|
| 1465 |
+
for (PieceType p3 = PAWN; p3 <= p2; ++p3)
|
| 1466 |
+
{
|
| 1467 |
+
TBTables.add({KING, p1, p2, p3, KING});
|
| 1468 |
+
|
| 1469 |
+
for (PieceType p4 = PAWN; p4 <= p3; ++p4)
|
| 1470 |
+
{
|
| 1471 |
+
TBTables.add({KING, p1, p2, p3, p4, KING});
|
| 1472 |
+
|
| 1473 |
+
for (PieceType p5 = PAWN; p5 <= p4; ++p5)
|
| 1474 |
+
TBTables.add({KING, p1, p2, p3, p4, p5, KING});
|
| 1475 |
+
|
| 1476 |
+
for (PieceType p5 = PAWN; p5 < KING; ++p5)
|
| 1477 |
+
TBTables.add({KING, p1, p2, p3, p4, KING, p5});
|
| 1478 |
+
}
|
| 1479 |
+
|
| 1480 |
+
for (PieceType p4 = PAWN; p4 < KING; ++p4)
|
| 1481 |
+
{
|
| 1482 |
+
TBTables.add({KING, p1, p2, p3, KING, p4});
|
| 1483 |
+
|
| 1484 |
+
for (PieceType p5 = PAWN; p5 <= p4; ++p5)
|
| 1485 |
+
TBTables.add({KING, p1, p2, p3, KING, p4, p5});
|
| 1486 |
+
}
|
| 1487 |
+
}
|
| 1488 |
+
|
| 1489 |
+
for (PieceType p3 = PAWN; p3 <= p1; ++p3)
|
| 1490 |
+
for (PieceType p4 = PAWN; p4 <= (p1 == p3 ? p2 : p3); ++p4)
|
| 1491 |
+
TBTables.add({KING, p1, p2, KING, p3, p4});
|
| 1492 |
+
}
|
| 1493 |
+
}
|
| 1494 |
+
|
| 1495 |
+
TBTables.info();
|
| 1496 |
+
}
|
| 1497 |
+
|
| 1498 |
+
// Probe the WDL table for a particular position.
|
| 1499 |
+
// If *result != FAIL, the probe was successful.
|
| 1500 |
+
// The return value is from the point of view of the side to move:
|
| 1501 |
+
// -2 : loss
|
| 1502 |
+
// -1 : loss, but draw under 50-move rule
|
| 1503 |
+
// 0 : draw
|
| 1504 |
+
// 1 : win, but draw under 50-move rule
|
| 1505 |
+
// 2 : win
|
| 1506 |
+
WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) {
|
| 1507 |
+
|
| 1508 |
+
*result = OK;
|
| 1509 |
+
return search<false>(pos, result);
|
| 1510 |
+
}
|
| 1511 |
+
|
| 1512 |
+
// Probe the DTZ table for a particular position.
|
| 1513 |
+
// If *result != FAIL, the probe was successful.
|
| 1514 |
+
// The return value is from the point of view of the side to move:
|
| 1515 |
+
// n < -100 : loss, but draw under 50-move rule
|
| 1516 |
+
// -100 <= n < -1 : loss in n ply (assuming 50-move counter == 0)
|
| 1517 |
+
// -1 : loss, the side to move is mated
|
| 1518 |
+
// 0 : draw
|
| 1519 |
+
// 1 < n <= 100 : win in n ply (assuming 50-move counter == 0)
|
| 1520 |
+
// 100 < n : win, but draw under 50-move rule
|
| 1521 |
+
//
|
| 1522 |
+
// The return value n can be off by 1: a return value -n can mean a loss
|
| 1523 |
+
// in n+1 ply and a return value +n can mean a win in n+1 ply. This
|
| 1524 |
+
// cannot happen for tables with positions exactly on the "edge" of
|
| 1525 |
+
// the 50-move rule.
|
| 1526 |
+
//
|
| 1527 |
+
// This implies that if dtz > 0 is returned, the position is certainly
|
| 1528 |
+
// a win if dtz + 50-move-counter <= 99. Care must be taken that the engine
|
| 1529 |
+
// picks moves that preserve dtz + 50-move-counter <= 99.
|
| 1530 |
+
//
|
| 1531 |
+
// If n = 100 immediately after a capture or pawn move, then the position
|
| 1532 |
+
// is also certainly a win, and during the whole phase until the next
|
| 1533 |
+
// capture or pawn move, the inequality to be preserved is
|
| 1534 |
+
// dtz + 50-move-counter <= 100.
|
| 1535 |
+
//
|
| 1536 |
+
// In short, if a move is available resulting in dtz + 50-move-counter <= 99,
|
| 1537 |
+
// then do not accept moves leading to dtz + 50-move-counter == 100.
|
| 1538 |
+
int Tablebases::probe_dtz(Position& pos, ProbeState* result) {
|
| 1539 |
+
|
| 1540 |
+
*result = OK;
|
| 1541 |
+
WDLScore wdl = search<true>(pos, result);
|
| 1542 |
+
|
| 1543 |
+
if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws
|
| 1544 |
+
return 0;
|
| 1545 |
+
|
| 1546 |
+
// DTZ stores a 'don't care value in this case, or even a plain wrong
|
| 1547 |
+
// one as in case the best move is a losing ep, so it cannot be probed.
|
| 1548 |
+
if (*result == ZEROING_BEST_MOVE)
|
| 1549 |
+
return dtz_before_zeroing(wdl);
|
| 1550 |
+
|
| 1551 |
+
int dtz = probe_table<DTZ>(pos, result, wdl);
|
| 1552 |
+
|
| 1553 |
+
if (*result == FAIL)
|
| 1554 |
+
return 0;
|
| 1555 |
+
|
| 1556 |
+
if (*result != CHANGE_STM)
|
| 1557 |
+
return (dtz + 100 * (wdl == WDLBlessedLoss || wdl == WDLCursedWin)) * sign_of(wdl);
|
| 1558 |
+
|
| 1559 |
+
// DTZ stores results for the other side, so we need to do a 1-ply search and
|
| 1560 |
+
// find the winning move that minimizes DTZ.
|
| 1561 |
+
StateInfo st;
|
| 1562 |
+
int minDTZ = 0xFFFF;
|
| 1563 |
+
|
| 1564 |
+
for (const Move move : MoveList<LEGAL>(pos))
|
| 1565 |
+
{
|
| 1566 |
+
bool zeroing = pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN;
|
| 1567 |
+
|
| 1568 |
+
pos.do_move(move, st);
|
| 1569 |
+
|
| 1570 |
+
// For zeroing moves we want the dtz of the move _before_ doing it,
|
| 1571 |
+
// otherwise we will get the dtz of the next move sequence. Search the
|
| 1572 |
+
// position after the move to get the score sign (because even in a
|
| 1573 |
+
// winning position we could make a losing capture or go for a draw).
|
| 1574 |
+
dtz = zeroing ? -dtz_before_zeroing(search<false>(pos, result)) : -probe_dtz(pos, result);
|
| 1575 |
+
|
| 1576 |
+
// If the move mates, force minDTZ to 1
|
| 1577 |
+
if (dtz == 1 && pos.checkers() && MoveList<LEGAL>(pos).size() == 0)
|
| 1578 |
+
minDTZ = 1;
|
| 1579 |
+
|
| 1580 |
+
// Convert result from 1-ply search. Zeroing moves are already accounted
|
| 1581 |
+
// by dtz_before_zeroing() that returns the DTZ of the previous move.
|
| 1582 |
+
if (!zeroing)
|
| 1583 |
+
dtz += sign_of(dtz);
|
| 1584 |
+
|
| 1585 |
+
// Skip the draws and if we are winning only pick positive dtz
|
| 1586 |
+
if (dtz < minDTZ && sign_of(dtz) == sign_of(wdl))
|
| 1587 |
+
minDTZ = dtz;
|
| 1588 |
+
|
| 1589 |
+
pos.undo_move(move);
|
| 1590 |
+
|
| 1591 |
+
if (*result == FAIL)
|
| 1592 |
+
return 0;
|
| 1593 |
+
}
|
| 1594 |
+
|
| 1595 |
+
// When there are no legal moves, the position is mate: we return -1
|
| 1596 |
+
return minDTZ == 0xFFFF ? -1 : minDTZ;
|
| 1597 |
+
}
|
| 1598 |
+
|
| 1599 |
+
|
| 1600 |
+
// Use the DTZ tables to rank root moves.
|
| 1601 |
+
//
|
| 1602 |
+
// A return value false indicates that not all probes were successful.
|
| 1603 |
+
bool Tablebases::root_probe(Position& pos,
|
| 1604 |
+
Search::RootMoves& rootMoves,
|
| 1605 |
+
bool rule50,
|
| 1606 |
+
bool rankDTZ,
|
| 1607 |
+
const std::function<bool()>& time_abort) {
|
| 1608 |
+
|
| 1609 |
+
ProbeState result = OK;
|
| 1610 |
+
StateInfo st;
|
| 1611 |
+
|
| 1612 |
+
// Obtain 50-move counter for the root position
|
| 1613 |
+
int cnt50 = pos.rule50_count();
|
| 1614 |
+
|
| 1615 |
+
// Check whether a position was repeated since the last zeroing move.
|
| 1616 |
+
bool rep = pos.has_repeated();
|
| 1617 |
+
|
| 1618 |
+
int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1;
|
| 1619 |
+
|
| 1620 |
+
// Probe and rank each move
|
| 1621 |
+
for (auto& m : rootMoves)
|
| 1622 |
+
{
|
| 1623 |
+
pos.do_move(m.pv[0], st);
|
| 1624 |
+
|
| 1625 |
+
// Calculate dtz for the current move counting from the root position
|
| 1626 |
+
if (pos.rule50_count() == 0)
|
| 1627 |
+
{
|
| 1628 |
+
// In case of a zeroing move, dtz is one of -101/-1/0/1/101
|
| 1629 |
+
WDLScore wdl = -probe_wdl(pos, &result);
|
| 1630 |
+
dtz = dtz_before_zeroing(wdl);
|
| 1631 |
+
}
|
| 1632 |
+
else if ((rule50 && pos.is_draw(1)) || pos.is_repetition(1))
|
| 1633 |
+
{
|
| 1634 |
+
// In case a root move leads to a draw by repetition or 50-move rule,
|
| 1635 |
+
// we set dtz to zero. Note: since we are only 1 ply from the root,
|
| 1636 |
+
// this must be a true 3-fold repetition inside the game history.
|
| 1637 |
+
dtz = 0;
|
| 1638 |
+
}
|
| 1639 |
+
else
|
| 1640 |
+
{
|
| 1641 |
+
// Otherwise, take dtz for the new position and correct by 1 ply
|
| 1642 |
+
dtz = -probe_dtz(pos, &result);
|
| 1643 |
+
dtz = dtz > 0 ? dtz + 1 : dtz < 0 ? dtz - 1 : dtz;
|
| 1644 |
+
}
|
| 1645 |
+
|
| 1646 |
+
// Make sure that a mating move is assigned a dtz value of 1
|
| 1647 |
+
if (pos.checkers() && dtz == 2 && MoveList<LEGAL>(pos).size() == 0)
|
| 1648 |
+
dtz = 1;
|
| 1649 |
+
|
| 1650 |
+
pos.undo_move(m.pv[0]);
|
| 1651 |
+
|
| 1652 |
+
if (time_abort() || result == FAIL)
|
| 1653 |
+
return false;
|
| 1654 |
+
|
| 1655 |
+
// Better moves are ranked higher. Certain wins are ranked equally.
|
| 1656 |
+
// Losing moves are ranked equally unless a 50-move draw is in sight.
|
| 1657 |
+
int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0)
|
| 1658 |
+
: MAX_DTZ / 2 - (dtz + cnt50))
|
| 1659 |
+
: dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0)
|
| 1660 |
+
: -MAX_DTZ / 2 + (-dtz + cnt50))
|
| 1661 |
+
: 0;
|
| 1662 |
+
m.tbRank = r;
|
| 1663 |
+
|
| 1664 |
+
// Determine the score to be displayed for this move. Assign at least
|
| 1665 |
+
// 1 cp to cursed wins and let it grow to 49 cp as the positions gets
|
| 1666 |
+
// closer to a real win.
|
| 1667 |
+
m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1
|
| 1668 |
+
: r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
|
| 1669 |
+
: r == 0 ? VALUE_DRAW
|
| 1670 |
+
: r > -bound
|
| 1671 |
+
? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200)
|
| 1672 |
+
: -VALUE_MATE + MAX_PLY + 1;
|
| 1673 |
+
}
|
| 1674 |
+
|
| 1675 |
+
return true;
|
| 1676 |
+
}
|
| 1677 |
+
|
| 1678 |
+
|
| 1679 |
+
// Use the WDL tables to rank root moves.
|
| 1680 |
+
// This is a fallback for the case that some or all DTZ tables are missing.
|
| 1681 |
+
//
|
| 1682 |
+
// A return value false indicates that not all probes were successful.
|
| 1683 |
+
bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50) {
|
| 1684 |
+
|
| 1685 |
+
static const int WDL_to_rank[] = {-MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ};
|
| 1686 |
+
|
| 1687 |
+
ProbeState result = OK;
|
| 1688 |
+
StateInfo st;
|
| 1689 |
+
WDLScore wdl;
|
| 1690 |
+
|
| 1691 |
+
|
| 1692 |
+
// Probe and rank each move
|
| 1693 |
+
for (auto& m : rootMoves)
|
| 1694 |
+
{
|
| 1695 |
+
pos.do_move(m.pv[0], st);
|
| 1696 |
+
|
| 1697 |
+
if (pos.is_draw(1))
|
| 1698 |
+
wdl = WDLDraw;
|
| 1699 |
+
else
|
| 1700 |
+
wdl = -probe_wdl(pos, &result);
|
| 1701 |
+
|
| 1702 |
+
pos.undo_move(m.pv[0]);
|
| 1703 |
+
|
| 1704 |
+
if (result == FAIL)
|
| 1705 |
+
return false;
|
| 1706 |
+
|
| 1707 |
+
m.tbRank = WDL_to_rank[wdl + 2];
|
| 1708 |
+
|
| 1709 |
+
if (!rule50)
|
| 1710 |
+
wdl = wdl > WDLDraw ? WDLWin : wdl < WDLDraw ? WDLLoss : WDLDraw;
|
| 1711 |
+
m.tbScore = WDL_to_value[wdl + 2];
|
| 1712 |
+
}
|
| 1713 |
+
|
| 1714 |
+
return true;
|
| 1715 |
+
}
|
| 1716 |
+
|
| 1717 |
+
Config Tablebases::rank_root_moves(const OptionsMap& options,
|
| 1718 |
+
Position& pos,
|
| 1719 |
+
Search::RootMoves& rootMoves,
|
| 1720 |
+
bool rankDTZ,
|
| 1721 |
+
const std::function<bool()>& time_abort) {
|
| 1722 |
+
Config config;
|
| 1723 |
+
|
| 1724 |
+
if (rootMoves.empty())
|
| 1725 |
+
return config;
|
| 1726 |
+
|
| 1727 |
+
config.rootInTB = false;
|
| 1728 |
+
config.useRule50 = bool(options["Syzygy50MoveRule"]);
|
| 1729 |
+
config.probeDepth = int(options["SyzygyProbeDepth"]);
|
| 1730 |
+
config.cardinality = int(options["SyzygyProbeLimit"]);
|
| 1731 |
+
|
| 1732 |
+
bool dtz_available = true;
|
| 1733 |
+
|
| 1734 |
+
// Tables with fewer pieces than SyzygyProbeLimit are searched with
|
| 1735 |
+
// probeDepth == DEPTH_ZERO
|
| 1736 |
+
if (config.cardinality > MaxCardinality)
|
| 1737 |
+
{
|
| 1738 |
+
config.cardinality = MaxCardinality;
|
| 1739 |
+
config.probeDepth = 0;
|
| 1740 |
+
}
|
| 1741 |
+
|
| 1742 |
+
if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
|
| 1743 |
+
{
|
| 1744 |
+
// Rank moves using DTZ tables, bail out if time_abort flags zeitnot
|
| 1745 |
+
config.rootInTB =
|
| 1746 |
+
root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ, time_abort);
|
| 1747 |
+
|
| 1748 |
+
if (!config.rootInTB && !time_abort())
|
| 1749 |
+
{
|
| 1750 |
+
// DTZ tables are missing; try to rank moves using WDL tables
|
| 1751 |
+
dtz_available = false;
|
| 1752 |
+
config.rootInTB = root_probe_wdl(pos, rootMoves, options["Syzygy50MoveRule"]);
|
| 1753 |
+
}
|
| 1754 |
+
}
|
| 1755 |
+
|
| 1756 |
+
if (config.rootInTB)
|
| 1757 |
+
{
|
| 1758 |
+
// Sort moves according to TB rank
|
| 1759 |
+
std::stable_sort(
|
| 1760 |
+
rootMoves.begin(), rootMoves.end(),
|
| 1761 |
+
[](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; });
|
| 1762 |
+
|
| 1763 |
+
// Probe during search only if DTZ is not available and we are winning
|
| 1764 |
+
if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW)
|
| 1765 |
+
config.cardinality = 0;
|
| 1766 |
+
}
|
| 1767 |
+
else
|
| 1768 |
+
{
|
| 1769 |
+
// Clean up if root_probe() and root_probe_wdl() have failed
|
| 1770 |
+
for (auto& m : rootMoves)
|
| 1771 |
+
m.tbRank = 0;
|
| 1772 |
+
}
|
| 1773 |
+
|
| 1774 |
+
return config;
|
| 1775 |
+
}
|
| 1776 |
+
} // namespace Stockfish
|