Phase 1: Opset 17 switch — Slice-based flip/rotate, tensor-based Pad, IR=8
Browse filesChanges:
- IR 10→8, OPSET 10→17
- s_flip: Slice(step=-1) instead of Gather (0 MACs vs ~165K)
- s_rotate k=2: double Slice(step=-1) (0 MACs vs ~165K)
- s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
- s_transpose: already zero-cost, no change needed
- All Pad nodes: attribute-based→tensor-based pads input (opset 17 requirement)
- New helpers: _make_int64_init(), _build_pad_node(), _build_slice_crop_pad()
- mk() updated to use IR=8 and opset 17
- neurogolf_solver.py +336 -561
neurogolf_solver.py
CHANGED
|
@@ -1,16 +1,15 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
ARC-AGI NeuroGolf Championship - Complete Solver
|
| 4 |
-
Format: [1,10,30,30] one-hot input/output, opset
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
-
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
- get_exs_for_fitting(): uses train+test+arc-gen for conv fitting
|
| 14 |
|
| 15 |
Solvers:
|
| 16 |
- Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
|
|
@@ -46,8 +45,10 @@ except ImportError:
|
|
| 46 |
BATCH, CH, GH, GW = 1, 10, 30, 30
|
| 47 |
GRID_SHAPE = [BATCH, CH, GH, GW]
|
| 48 |
DT = TensorProto.FLOAT
|
| 49 |
-
IR =
|
| 50 |
-
OPSET = [helper.make_opsetid("",
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Officially excluded tasks (score 0 regardless)
|
| 53 |
EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
|
|
@@ -55,9 +56,7 @@ EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
|
|
| 55 |
# Max ARC-GEN examples to use for validation (to keep runtime reasonable)
|
| 56 |
MAX_ARCGEN_VALIDATE = 30
|
| 57 |
# Max ARC-GEN examples for conv fitting (keep separate from validation!)
|
| 58 |
-
|
| 59 |
-
# lstsq underdetermines with too many variable-size arc-gen examples.
|
| 60 |
-
MAX_ARCGEN_FIT = 0 # Don't use arc-gen for fitting — use for validation only
|
| 61 |
|
| 62 |
def get_providers():
|
| 63 |
return ['CPUExecutionProvider']
|
|
@@ -76,7 +75,6 @@ def load_tasks_dir(data_dir, arcgen_dir=None):
|
|
| 76 |
with open(os.path.join(data_dir, f)) as fh:
|
| 77 |
data = json.load(fh)
|
| 78 |
hex_id = f.replace('.json','')
|
| 79 |
-
# Load ARC-GEN data if available
|
| 80 |
if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
|
| 81 |
with open(os.path.join(arcgen_dir, f)) as fh:
|
| 82 |
arcgen_examples = json.load(fh)
|
|
@@ -109,8 +107,7 @@ def to_onehot(grid):
|
|
| 109 |
return arr
|
| 110 |
|
| 111 |
def validate(path, td):
|
| 112 |
-
"""Validate model against ALL examples: train + test + arc-gen.
|
| 113 |
-
This matches what Kaggle does for scoring."""
|
| 114 |
try:
|
| 115 |
opts = ort.SessionOptions()
|
| 116 |
opts.log_severity_level = 3
|
|
@@ -118,7 +115,6 @@ def validate(path, td):
|
|
| 118 |
except:
|
| 119 |
return False
|
| 120 |
examples = td['train'] + td['test']
|
| 121 |
-
# Include arc-gen examples (capped for speed)
|
| 122 |
if 'arc-gen' in td:
|
| 123 |
examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
|
| 124 |
for ex in examples:
|
|
@@ -164,8 +160,7 @@ BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
|
|
| 164 |
MAX_FILESIZE = int(1.44 * 1024 * 1024)
|
| 165 |
|
| 166 |
def score_network(path):
|
| 167 |
-
"""Static profiler matching Kaggle scoring: cost = macs + memory + params.
|
| 168 |
-
Falls back to official neurogolf_utils if available."""
|
| 169 |
if HAS_ONNX_TOOL:
|
| 170 |
try:
|
| 171 |
return _score_network_official(path)
|
|
@@ -215,6 +210,44 @@ def _static_profile(path):
|
|
| 215 |
|
| 216 |
return int(macs), int(nbytes), int(params)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
def mk(nodes, inits=None):
|
| 219 |
x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
|
| 220 |
y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
|
|
@@ -227,24 +260,19 @@ def get_exs(td):
|
|
| 227 |
for ex in td['train'] + td['test']]
|
| 228 |
|
| 229 |
def get_exs_for_fitting(td):
|
| 230 |
-
"""Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH.
|
| 231 |
-
For fixed-size tasks, arc-gen examples have the same grid size,
|
| 232 |
-
so they provide more data points for lstsq without changing the feature dimension.
|
| 233 |
-
For variable-size tasks, only use train+test (arc-gen varies too much)."""
|
| 234 |
base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
|
| 235 |
for ex in td['train'] + td['test']]
|
| 236 |
|
| 237 |
if not base_exs:
|
| 238 |
return base_exs
|
| 239 |
|
| 240 |
-
# Check if all base examples have same input shape
|
| 241 |
base_shapes = {inp.shape for inp, _ in base_exs}
|
| 242 |
if len(base_shapes) != 1:
|
| 243 |
-
return base_exs
|
| 244 |
|
| 245 |
base_shape = list(base_shapes)[0]
|
| 246 |
|
| 247 |
-
# Add arc-gen examples that match the base shape
|
| 248 |
ag_exs = []
|
| 249 |
for ex in td.get('arc-gen', []):
|
| 250 |
inp = np.array(ex['input'], dtype=np.int64)
|
|
@@ -252,17 +280,13 @@ def get_exs_for_fitting(td):
|
|
| 252 |
if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
|
| 253 |
ag_exs.append((inp, out))
|
| 254 |
|
| 255 |
-
# Cap to avoid massive lstsq (diminishing returns after ~10)
|
| 256 |
return base_exs + ag_exs[:10]
|
| 257 |
|
| 258 |
def get_exs_for_fitting_variable(td):
|
| 259 |
-
"""Get examples for variable-shape conv fitting.
|
| 260 |
-
For variable-shape tasks, arc-gen examples may have different sizes per example
|
| 261 |
-
but since we embed in 30x30 anyway, we can safely include them."""
|
| 262 |
base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
|
| 263 |
for ex in td['train'] + td['test']]
|
| 264 |
|
| 265 |
-
# For variable shape, include arc-gen examples (they get embedded in 30x30)
|
| 266 |
ag_exs = []
|
| 267 |
for ex in td.get('arc-gen', []):
|
| 268 |
inp = np.array(ex['input'], dtype=np.int64)
|
|
@@ -279,12 +303,11 @@ def fixed_shapes(td):
|
|
| 279 |
return list(shapes)[0] if len(shapes) == 1 else None
|
| 280 |
|
| 281 |
# ============================================================
|
| 282 |
-
# GATHER HELPERS
|
| 283 |
# ============================================================
|
| 284 |
|
| 285 |
def _build_gather_model(OH, OW, idx):
|
| 286 |
-
|
| 287 |
-
# Flatten spatial: [1,10,900] -> Gather(axis=2, indices=[900]) -> [1,10,900]
|
| 288 |
flat_idx = np.zeros((GH*GW,), dtype=np.int64)
|
| 289 |
mask = np.zeros((1,1,GH,GW), dtype=np.float32)
|
| 290 |
for oi in range(OH):
|
|
@@ -306,7 +329,7 @@ def _build_gather_model(OH, OW, idx):
|
|
| 306 |
return mk(nodes, inits)
|
| 307 |
|
| 308 |
def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
|
| 309 |
-
|
| 310 |
flat_idx = np.zeros((GH*GW,), dtype=np.int64)
|
| 311 |
gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
|
| 312 |
const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
|
|
@@ -356,11 +379,9 @@ def s_color_map(td):
|
|
| 356 |
if iv in cm and cm[iv] != ov: return None
|
| 357 |
cm[iv] = ov
|
| 358 |
|
| 359 |
-
# Check if it's a permutation (bijective + all mapped colors form a closed set)
|
| 360 |
is_permutation = (set(cm.keys()) == set(cm.values()))
|
| 361 |
|
| 362 |
if is_permutation:
|
| 363 |
-
# Use channel Gather — zero MACs, much cheaper
|
| 364 |
gather_ch = np.arange(10, dtype=np.int32)
|
| 365 |
for src, dst in cm.items():
|
| 366 |
if 0 <= src < 10 and 0 <= dst < 10:
|
|
@@ -369,7 +390,6 @@ def s_color_map(td):
|
|
| 369 |
nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
|
| 370 |
return mk(nodes, inits)
|
| 371 |
else:
|
| 372 |
-
# Non-permutation: use Conv 1x1 (has MACs but handles any mapping)
|
| 373 |
W = np.zeros((10,10,1,1), dtype=np.float32)
|
| 374 |
for ic in range(10):
|
| 375 |
W[cm.get(ic,ic), ic, 0, 0] = 1.0
|
|
@@ -377,44 +397,113 @@ def s_color_map(td):
|
|
| 377 |
[numpy_helper.from_array(W, 'W')])
|
| 378 |
|
| 379 |
def s_transpose(td):
|
|
|
|
| 380 |
for ex in td['train']+td['test']:
|
| 381 |
if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
|
| 382 |
return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
|
| 383 |
|
| 384 |
def s_flip(td):
|
|
|
|
| 385 |
exs = get_exs(td)
|
| 386 |
sp = fixed_shapes(td)
|
| 387 |
if sp is None: return None
|
| 388 |
(IH,IW),(OH,OW) = sp
|
| 389 |
if (IH,IW) != (OH,OW): return None
|
|
|
|
| 390 |
for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
|
| 391 |
if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
|
| 392 |
-
#
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
return None
|
| 402 |
|
| 403 |
def s_rotate(td):
|
|
|
|
|
|
|
| 404 |
exs = get_exs(td)
|
| 405 |
sp = fixed_shapes(td)
|
| 406 |
if sp is None: return None
|
| 407 |
(IH,IW),(OH,OW) = sp
|
|
|
|
| 408 |
for k in [1, 2, 3]:
|
| 409 |
-
if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
return None
|
| 419 |
|
| 420 |
def s_spatial_gather(td):
|
|
@@ -440,10 +529,9 @@ def s_spatial_gather(td):
|
|
| 440 |
def s_varshape_spatial_gather(td):
|
| 441 |
"""Spatial gather that works for variable-shape tasks by embedding in 30x30."""
|
| 442 |
sp = fixed_shapes(td)
|
| 443 |
-
if sp is not None: return None
|
| 444 |
exs = get_exs(td)
|
| 445 |
|
| 446 |
-
# Embed all examples in 30x30
|
| 447 |
exs_30 = []
|
| 448 |
for inp, out in exs:
|
| 449 |
ih, iw = inp.shape
|
|
@@ -495,15 +583,15 @@ def s_tile(td):
|
|
| 495 |
if not np.array_equal(out, np.tile(inp, (rH, rW))): return None
|
| 496 |
pad_h, pad_w = 30-OH, 30-OW
|
| 497 |
inits = [
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
]
|
| 502 |
nodes = [
|
| 503 |
helper.make_node('Slice', ['input','st','en'], ['cr']),
|
| 504 |
helper.make_node('Tile', ['cr','rp'], ['tl']),
|
| 505 |
-
helper.make_node('Pad', ['tl'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
|
| 506 |
]
|
|
|
|
| 507 |
return mk(nodes, inits)
|
| 508 |
|
| 509 |
def s_upscale(td):
|
|
@@ -584,13 +672,11 @@ def s_concat_enhanced(td):
|
|
| 584 |
(IH,IW),(OH,OW) = sp
|
| 585 |
if IH == OH and IW == OW: return None
|
| 586 |
|
| 587 |
-
# Need block decomposition
|
| 588 |
if OH % IH != 0 or OW % IW != 0: return None
|
| 589 |
rH, rW = OH // IH, OW // IW
|
| 590 |
if rH * rW > 16 or rH * rW < 2: return None
|
| 591 |
if OH > 30 or OW > 30: return None
|
| 592 |
|
| 593 |
-
# All 8 symmetry transforms of the dihedral group
|
| 594 |
transforms = [
|
| 595 |
('id', lambda x: x),
|
| 596 |
('fliplr', lambda x: np.fliplr(x)),
|
|
@@ -602,7 +688,6 @@ def s_concat_enhanced(td):
|
|
| 602 |
('T_fliplr', lambda x: np.fliplr(x.T)),
|
| 603 |
]
|
| 604 |
|
| 605 |
-
# For each block, find which transform matches
|
| 606 |
block_transforms = {}
|
| 607 |
for bi in range(rH):
|
| 608 |
for bj in range(rW):
|
|
@@ -622,7 +707,6 @@ def s_concat_enhanced(td):
|
|
| 622 |
return None
|
| 623 |
block_transforms[(bi, bj)] = found
|
| 624 |
|
| 625 |
-
# Build index map
|
| 626 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 627 |
for bi in range(rH):
|
| 628 |
for bj in range(rW):
|
|
@@ -640,7 +724,6 @@ def s_concat_enhanced(td):
|
|
| 640 |
elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
|
| 641 |
idx[oi, oj] = [sr, sc]
|
| 642 |
|
| 643 |
-
# Verify
|
| 644 |
for inp, out in exs:
|
| 645 |
reconstructed = np.zeros_like(out)
|
| 646 |
for oi in range(OH):
|
|
@@ -672,15 +755,6 @@ def s_input_driven_tile(td):
|
|
| 672 |
else:
|
| 673 |
if not np.all(block == 0):
|
| 674 |
return None
|
| 675 |
-
|
| 676 |
-
# Build gather model: each output pixel at (bi*IH+lr, bj*IW+lc) maps to
|
| 677 |
-
# input[lr, lc] if input[bi, bj] != 0, else constant 0
|
| 678 |
-
# Problem: whether block is active depends on input value, which varies.
|
| 679 |
-
# This needs a different ONNX approach: can't use static gather.
|
| 680 |
-
# But we CAN use: Tile input -> Mul by mask derived from input
|
| 681 |
-
# Actually we need: for each (bi,bj) block position, multiply by inp[bi,bj] != 0
|
| 682 |
-
# This is NOT static - it depends on input content.
|
| 683 |
-
# Skip for now - spatial_gather can handle if block positions are fixed.
|
| 684 |
return None
|
| 685 |
|
| 686 |
def s_kronecker(td):
|
|
@@ -699,7 +773,6 @@ def s_kronecker(td):
|
|
| 699 |
if not np.array_equal(out, expected):
|
| 700 |
return None
|
| 701 |
|
| 702 |
-
# This is identical to upscale - build gather index
|
| 703 |
idx = np.zeros((OH,OW,2), dtype=np.int64)
|
| 704 |
for r in range(OH):
|
| 705 |
for c in range(OW):
|
|
@@ -728,7 +801,6 @@ def s_diagonal_tile(td):
|
|
| 728 |
if not np.all(block == 0):
|
| 729 |
return None
|
| 730 |
|
| 731 |
-
# Build: diagonal blocks map to input, off-diagonal are constant 0
|
| 732 |
idx = np.zeros((OH,OW,2), dtype=np.int64)
|
| 733 |
cst = np.full((OH,OW), -1, dtype=np.int64)
|
| 734 |
for bi in range(rH):
|
|
@@ -765,9 +837,8 @@ def s_shift(td):
|
|
| 765 |
if not np.array_equal(shifted, out):
|
| 766 |
ok = False; break
|
| 767 |
if not ok: continue
|
| 768 |
-
# Build gather index
|
| 769 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 770 |
-
cst = np.full((OH, OW), 0, dtype=np.int64)
|
| 771 |
for r in range(OH):
|
| 772 |
for c in range(OW):
|
| 773 |
sr, sc = r - dr, c - dc
|
|
@@ -802,10 +873,6 @@ def s_gravity(td):
|
|
| 802 |
|
| 803 |
for d in ('down', 'up', 'left', 'right'):
|
| 804 |
if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
|
| 805 |
-
# Gravity is input-dependent (positions depend on content)
|
| 806 |
-
# Can't use static Gather — need Conv to learn it
|
| 807 |
-
# But conv also can't learn arbitrary sorting...
|
| 808 |
-
# Skip for now — this needs a specialized ONNX graph
|
| 809 |
return None
|
| 810 |
return None
|
| 811 |
|
|
@@ -820,7 +887,6 @@ def s_mirror_h(td):
|
|
| 820 |
for inp, out in exs:
|
| 821 |
expected = np.concatenate([inp, np.flip(inp, 1)], 1)
|
| 822 |
if not np.array_equal(expected, out): return None
|
| 823 |
-
# Build gather index
|
| 824 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 825 |
for r in range(OH):
|
| 826 |
for c in range(OW):
|
|
@@ -984,9 +1050,7 @@ def solve_conv_fixed(td, path, time_budget=30.0):
|
|
| 984 |
if len(shapes) != 1: return None
|
| 985 |
IH, IW = shapes.pop()
|
| 986 |
|
| 987 |
-
# Use ARC-GEN data for better fitting
|
| 988 |
fit_exs = get_exs_for_fitting(td)
|
| 989 |
-
# Filter to same-shape, same IH/IW
|
| 990 |
fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
|
| 991 |
|
| 992 |
t_start = time.time()
|
|
@@ -1000,8 +1064,8 @@ def solve_conv_fixed(td, path, time_budget=30.0):
|
|
| 1000 |
pad_h, pad_w = GH - IH, GW - IW
|
| 1001 |
|
| 1002 |
inits = [
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
numpy_helper.from_array(Wconv, 'W'),
|
| 1006 |
]
|
| 1007 |
conv_inputs = ['grid', 'W']
|
|
@@ -1015,10 +1079,7 @@ def solve_conv_fixed(td, path, time_budget=30.0):
|
|
| 1015 |
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1016 |
]
|
| 1017 |
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1018 |
-
nodes.append(
|
| 1019 |
-
helper.make_node('Pad', ['oh_out'], ['output'],
|
| 1020 |
-
pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
|
| 1021 |
-
)
|
| 1022 |
|
| 1023 |
model = mk(nodes, inits)
|
| 1024 |
onnx.save(model, path)
|
|
@@ -1031,7 +1092,6 @@ def solve_conv_variable(td, path, time_budget=30.0):
|
|
| 1031 |
for inp, out in exs:
|
| 1032 |
if inp.shape != out.shape: return None
|
| 1033 |
|
| 1034 |
-
# Use ARC-GEN data for better fitting (variable shape, embedded in 30x30)
|
| 1035 |
fit_exs = get_exs_for_fitting_variable(td)
|
| 1036 |
fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
|
| 1037 |
|
|
@@ -1122,11 +1182,11 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
|
|
| 1122 |
|
| 1123 |
pad_h, pad_w = GH - OH, GW - OW
|
| 1124 |
inits = [
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
numpy_helper.from_array(Wconv, 'W'),
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
]
|
| 1131 |
conv_inputs = ['grid', 'W']
|
| 1132 |
if B is not None:
|
|
@@ -1140,10 +1200,7 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
|
|
| 1140 |
helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
|
| 1141 |
]
|
| 1142 |
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1143 |
-
nodes.append(
|
| 1144 |
-
helper.make_node('Pad', ['oh_out'], ['output'],
|
| 1145 |
-
pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
|
| 1146 |
-
)
|
| 1147 |
|
| 1148 |
model = mk(nodes, inits)
|
| 1149 |
onnx.save(model, path)
|
|
@@ -1151,8 +1208,7 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
|
|
| 1151 |
return None
|
| 1152 |
|
| 1153 |
def solve_conv_var_diff(td, path, time_budget=30.0):
|
| 1154 |
-
"""Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask).
|
| 1155 |
-
Works when output shape differs from input but mapping is convolutional on 30x30 grid."""
|
| 1156 |
exs = get_exs(td)
|
| 1157 |
|
| 1158 |
t_start = time.time()
|
|
@@ -1200,495 +1256,214 @@ def solve_conv_var_diff(td, path, time_budget=30.0):
|
|
| 1200 |
Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
|
| 1201 |
B = None
|
| 1202 |
|
| 1203 |
-
# Use ReduceSum of output channels as mask (sum across channels == 1 for valid pixels)
|
| 1204 |
-
# But we don't know the output mask at inference time from input alone...
|
| 1205 |
-
# We need a way to derive the output mask from the input.
|
| 1206 |
-
# For same-shape: mask = ReduceSum(input, axis=1) works
|
| 1207 |
-
# For diff-shape: we need to compute the output mask differently
|
| 1208 |
-
#
|
| 1209 |
-
# Approach: Conv output at valid positions should have max > threshold,
|
| 1210 |
-
# and at padding positions max ≈ 0. Use the ArgMax+OneHot and then
|
| 1211 |
-
# mask with ReduceSum(input) which is 1 at input positions but 0 at padding.
|
| 1212 |
-
# BUT output may be LARGER than input...
|
| 1213 |
-
#
|
| 1214 |
-
# Alternative: just use Conv -> ArgMax -> Equal+Cast -> Mul(input_mask_expanded)
|
| 1215 |
-
# where input_mask covers the output region too.
|
| 1216 |
-
# This won't work if output extends beyond input region.
|
| 1217 |
-
#
|
| 1218 |
-
# Simplest correct approach: let the conv produce valid one-hot everywhere,
|
| 1219 |
-
# then the padding region should naturally produce channel-0 output.
|
| 1220 |
-
# Since padding is all-zero input, conv output there = bias only.
|
| 1221 |
-
# If no bias, conv output = 0 for all channels -> argmax gives channel 0 -> onehot gives [1,0,...,0]
|
| 1222 |
-
# which equals the padding encoding (channel 0 = 1 in padding).
|
| 1223 |
-
# Wait - that's WRONG for the NeuroGolf format. In the padding region, ALL channels should be 0.
|
| 1224 |
-
# The one-hot encoding has channel[color]=1, but padding = ALL zeros.
|
| 1225 |
-
#
|
| 1226 |
-
# So we NEED a mask. But for diff-shape, what mask?
|
| 1227 |
-
# If output is always top-left aligned and we know max output size...
|
| 1228 |
-
# We can't statically determine the output mask from the input.
|
| 1229 |
-
#
|
| 1230 |
-
# However: we can try the ReduceSum approach anyway — if conv naturally
|
| 1231 |
-
# produces channel-0 dominant output in padding, then:
|
| 1232 |
-
# mask = ReduceSum(input, axis=1) gives 1 for input pixels, 0 for padding
|
| 1233 |
-
# If output region ⊆ input region, this works.
|
| 1234 |
-
# If output region > input region... we need the output's ReduceSum instead.
|
| 1235 |
-
|
| 1236 |
# For tasks where output fits within input bounds, use input mask
|
| 1237 |
all_output_within_input = all(
|
| 1238 |
-
out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
|
| 1239 |
for inp_g, out_g in exs
|
| 1240 |
)
|
| 1241 |
|
| 1242 |
-
if
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
model = mk(nodes, inits)
|
| 1260 |
-
onnx.save(model, path)
|
| 1261 |
-
if validate(path, td): return 'conv_var_diff', model
|
| 1262 |
-
return None
|
| 1263 |
-
|
| 1264 |
-
# ============================================================
|
| 1265 |
-
# PYTORCH LEARNED CONV (gradient descent, multi-seed, ternary snap)
|
| 1266 |
-
# ============================================================
|
| 1267 |
-
|
| 1268 |
-
def _ternary_snap(w, eps=0.2):
|
| 1269 |
-
"""Snap weights to {-1, 0, 1} — smaller model, often still correct."""
|
| 1270 |
-
return np.where(w > eps, 1.0, np.where(w < -eps, -1.0, 0.0)).astype(np.float32)
|
| 1271 |
-
|
| 1272 |
-
def _build_conv_onnx_from_weights(W, ks, use_full_30=False, IH=None, IW=None):
|
| 1273 |
-
"""Build ONNX conv model from numpy weight array W [10,10,ks,ks].
|
| 1274 |
-
For fixed-shape: Slice→Conv→ArgMax→Equal+Cast→Pad
|
| 1275 |
-
For variable/full30: Conv→ArgMax→Equal+Cast→Mul(mask)"""
|
| 1276 |
-
pad = ks // 2
|
| 1277 |
-
if use_full_30:
|
| 1278 |
-
# Variable shape: full 30x30 conv with mask
|
| 1279 |
-
inits = [numpy_helper.from_array(W, 'W')]
|
| 1280 |
-
nodes = [
|
| 1281 |
-
helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
|
| 1282 |
-
helper.make_node('Conv', ['input', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
|
| 1283 |
-
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1284 |
-
]
|
| 1285 |
-
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1286 |
-
nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
|
| 1287 |
-
return mk(nodes, inits)
|
| 1288 |
-
else:
|
| 1289 |
-
# Fixed shape: slice, conv, pad
|
| 1290 |
-
pad_h, pad_w = GH - IH, GW - IW
|
| 1291 |
-
inits = [
|
| 1292 |
-
numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
|
| 1293 |
-
numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
|
| 1294 |
-
numpy_helper.from_array(W, 'W'),
|
| 1295 |
-
]
|
| 1296 |
-
nodes = [
|
| 1297 |
-
helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
|
| 1298 |
-
helper.make_node('Conv', ['grid', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
|
| 1299 |
-
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1300 |
-
]
|
| 1301 |
-
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1302 |
-
nodes.append(
|
| 1303 |
-
helper.make_node('Pad', ['oh_out'], ['output'],
|
| 1304 |
-
pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
|
| 1305 |
-
)
|
| 1306 |
-
return mk(nodes, inits)
|
| 1307 |
-
|
| 1308 |
-
def _build_two_layer_conv_onnx(W1, W2, ks1, ks2, use_full_30=False, IH=None, IW=None):
|
| 1309 |
-
"""Build ONNX two-layer conv: Conv→ReLU→Conv→ArgMax→Equal+Cast→Pad/Mul(mask)."""
|
| 1310 |
-
pad1, pad2 = ks1 // 2, ks2 // 2
|
| 1311 |
-
if use_full_30:
|
| 1312 |
-
inits = [
|
| 1313 |
-
numpy_helper.from_array(W1, 'W1'),
|
| 1314 |
-
numpy_helper.from_array(W2, 'W2'),
|
| 1315 |
-
]
|
| 1316 |
-
nodes = [
|
| 1317 |
-
helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
|
| 1318 |
-
helper.make_node('Conv', ['input', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
|
| 1319 |
-
helper.make_node('Relu', ['h1'], ['h1r']),
|
| 1320 |
-
helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
|
| 1321 |
-
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1322 |
-
]
|
| 1323 |
-
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1324 |
-
nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
|
| 1325 |
-
return mk(nodes, inits)
|
| 1326 |
-
else:
|
| 1327 |
-
pad_h, pad_w = GH - IH, GW - IW
|
| 1328 |
-
inits = [
|
| 1329 |
-
numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
|
| 1330 |
-
numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
|
| 1331 |
-
numpy_helper.from_array(W1, 'W1'),
|
| 1332 |
-
numpy_helper.from_array(W2, 'W2'),
|
| 1333 |
-
]
|
| 1334 |
-
nodes = [
|
| 1335 |
-
helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
|
| 1336 |
-
helper.make_node('Conv', ['grid', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
|
| 1337 |
-
helper.make_node('Relu', ['h1'], ['h1r']),
|
| 1338 |
-
helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
|
| 1339 |
-
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1340 |
-
]
|
| 1341 |
-
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1342 |
-
nodes.append(
|
| 1343 |
-
helper.make_node('Pad', ['oh_out'], ['output'],
|
| 1344 |
-
pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
|
| 1345 |
-
)
|
| 1346 |
-
return mk(nodes, inits)
|
| 1347 |
-
|
| 1348 |
-
def solve_pytorch_conv(td, path, time_budget=30.0):
|
| 1349 |
-
"""PyTorch gradient descent conv solver. Tries single-layer then two-layer.
|
| 1350 |
-
Multi-seed training with ternary weight snapping for smaller models.
|
| 1351 |
-
Validates against arc-gen before accepting."""
|
| 1352 |
-
try:
|
| 1353 |
-
import torch
|
| 1354 |
-
import torch.nn as nn
|
| 1355 |
-
import copy as _copy
|
| 1356 |
-
except ImportError:
|
| 1357 |
-
return None
|
| 1358 |
-
|
| 1359 |
-
exs = get_exs(td)
|
| 1360 |
-
same_shape = all(inp.shape == out.shape for inp, out in exs)
|
| 1361 |
-
if not same_shape:
|
| 1362 |
-
return None # Only handle same-shape for now
|
| 1363 |
-
|
| 1364 |
-
shapes = set(inp.shape for inp, _ in exs)
|
| 1365 |
-
fixed_in = len(shapes) == 1
|
| 1366 |
-
|
| 1367 |
-
# Prepare tensors
|
| 1368 |
-
all_pairs = td['train'] + td['test']
|
| 1369 |
-
inp_list = [to_onehot(p['input'])[0] for p in all_pairs]
|
| 1370 |
-
out_list = [to_onehot(p['output'])[0] for p in all_pairs]
|
| 1371 |
-
inp_t = torch.tensor(np.stack(inp_list), dtype=torch.float32)
|
| 1372 |
-
out_t = torch.tensor(np.stack(out_list), dtype=torch.float32)
|
| 1373 |
-
|
| 1374 |
-
if fixed_in:
|
| 1375 |
-
IH, IW = list(shapes)[0]
|
| 1376 |
-
# Train on cropped region
|
| 1377 |
-
inp_t = inp_t[:, :, :IH, :IW]
|
| 1378 |
-
out_t = out_t[:, :, :IH, :IW]
|
| 1379 |
-
|
| 1380 |
-
t_start = time.time()
|
| 1381 |
-
best_result = None
|
| 1382 |
-
|
| 1383 |
-
# Phase 1: Single-layer conv (multiple kernel sizes and seeds)
|
| 1384 |
-
for ks in [1, 3, 5, 7]:
|
| 1385 |
-
if time.time() - t_start > time_budget * 0.6:
|
| 1386 |
-
break
|
| 1387 |
-
pad = ks // 2
|
| 1388 |
-
for seed in [0, 7, 42]:
|
| 1389 |
-
if time.time() - t_start > time_budget * 0.6:
|
| 1390 |
-
break
|
| 1391 |
-
torch.manual_seed(seed)
|
| 1392 |
-
conv = nn.Conv2d(CH, CH, kernel_size=ks, padding=pad, bias=False)
|
| 1393 |
-
if seed == 0:
|
| 1394 |
-
nn.init.zeros_(conv.weight)
|
| 1395 |
-
opt = torch.optim.Adam(conv.parameters(), lr=0.03)
|
| 1396 |
-
best_loss, best_state = float('inf'), None
|
| 1397 |
-
for step in range(3000):
|
| 1398 |
-
opt.zero_grad()
|
| 1399 |
-
pred = conv(inp_t)
|
| 1400 |
-
loss = nn.functional.mse_loss(pred, out_t)
|
| 1401 |
-
loss.backward()
|
| 1402 |
-
opt.step()
|
| 1403 |
-
if loss.item() < best_loss:
|
| 1404 |
-
best_loss = loss.item()
|
| 1405 |
-
best_state = _copy.deepcopy(conv.state_dict())
|
| 1406 |
-
if best_loss < 1e-8:
|
| 1407 |
-
break
|
| 1408 |
-
if best_state is None:
|
| 1409 |
-
continue
|
| 1410 |
-
conv.load_state_dict(best_state)
|
| 1411 |
-
w = conv.weight.detach().numpy()
|
| 1412 |
-
|
| 1413 |
-
# Try continuous weights, then ternary-snapped
|
| 1414 |
-
for w_cand in [w, _ternary_snap(w)]:
|
| 1415 |
-
use_full = not fixed_in
|
| 1416 |
-
model = _build_conv_onnx_from_weights(
|
| 1417 |
-
w_cand, ks, use_full_30=use_full,
|
| 1418 |
-
IH=IH if fixed_in else None,
|
| 1419 |
-
IW=IW if fixed_in else None
|
| 1420 |
-
)
|
| 1421 |
-
onnx.save(model, path)
|
| 1422 |
-
if validate(path, td):
|
| 1423 |
-
sz = os.path.getsize(path)
|
| 1424 |
-
if best_result is None or sz < best_result[2]:
|
| 1425 |
-
best_result = ('pt_conv', model, sz)
|
| 1426 |
-
|
| 1427 |
-
# Phase 2: Two-layer conv (Conv→ReLU→Conv)
|
| 1428 |
-
for ks1, ks2, hidden in [(3, 1, CH), (5, 1, CH), (3, 3, CH)]:
|
| 1429 |
-
if time.time() - t_start > time_budget:
|
| 1430 |
-
break
|
| 1431 |
-
for seed in [0, 7]:
|
| 1432 |
-
if time.time() - t_start > time_budget:
|
| 1433 |
-
break
|
| 1434 |
-
torch.manual_seed(seed)
|
| 1435 |
-
net = nn.Sequential(
|
| 1436 |
-
nn.Conv2d(CH, hidden, kernel_size=ks1, padding=ks1//2, bias=False),
|
| 1437 |
-
nn.ReLU(),
|
| 1438 |
-
nn.Conv2d(hidden, CH, kernel_size=ks2, padding=ks2//2, bias=False),
|
| 1439 |
-
)
|
| 1440 |
-
opt = torch.optim.Adam(net.parameters(), lr=0.01)
|
| 1441 |
-
best_loss, best_state = float('inf'), None
|
| 1442 |
-
for step in range(2500):
|
| 1443 |
-
opt.zero_grad()
|
| 1444 |
-
pred = net(inp_t)
|
| 1445 |
-
loss = nn.functional.mse_loss(pred, out_t)
|
| 1446 |
-
loss.backward()
|
| 1447 |
-
opt.step()
|
| 1448 |
-
if loss.item() < best_loss:
|
| 1449 |
-
best_loss = loss.item()
|
| 1450 |
-
best_state = _copy.deepcopy(net.state_dict())
|
| 1451 |
-
if best_loss < 1e-8:
|
| 1452 |
-
break
|
| 1453 |
-
if best_state is None:
|
| 1454 |
-
continue
|
| 1455 |
-
net.load_state_dict(best_state)
|
| 1456 |
-
w1 = net[0].weight.detach().numpy()
|
| 1457 |
-
w2 = net[2].weight.detach().numpy()
|
| 1458 |
-
|
| 1459 |
-
for w1c, w2c in [(w1, w2), (_ternary_snap(w1), _ternary_snap(w2))]:
|
| 1460 |
-
use_full = not fixed_in
|
| 1461 |
-
model = _build_two_layer_conv_onnx(
|
| 1462 |
-
w1c, w2c, ks1, ks2, use_full_30=use_full,
|
| 1463 |
-
IH=IH if fixed_in else None,
|
| 1464 |
-
IW=IW if fixed_in else None
|
| 1465 |
-
)
|
| 1466 |
onnx.save(model, path)
|
| 1467 |
-
if validate(path, td):
|
| 1468 |
-
sz = os.path.getsize(path)
|
| 1469 |
-
if best_result is None or sz < best_result[2]:
|
| 1470 |
-
best_result = ('pt_conv2', model, sz)
|
| 1471 |
-
|
| 1472 |
-
if best_result is not None:
|
| 1473 |
-
sname, model, _ = best_result
|
| 1474 |
-
onnx.save(model, path)
|
| 1475 |
-
return sname, model
|
| 1476 |
return None
|
| 1477 |
|
| 1478 |
# ============================================================
|
| 1479 |
-
# MAIN
|
| 1480 |
# ============================================================
|
| 1481 |
|
| 1482 |
ANALYTICAL_SOLVERS = [
|
| 1483 |
-
('identity', s_identity),
|
| 1484 |
-
('
|
| 1485 |
-
('
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1486 |
('nonuniform_scale', s_nonuniform_scale),
|
| 1487 |
-
('mirror_h', s_mirror_h),
|
| 1488 |
-
('
|
|
|
|
|
|
|
|
|
|
| 1489 |
('diagonal_tile', s_diagonal_tile),
|
| 1490 |
('fixed_crop', s_fixed_crop),
|
| 1491 |
('spatial_gather', s_spatial_gather),
|
| 1492 |
-
('shift', s_shift),
|
| 1493 |
('varshape_spatial_gather', s_varshape_spatial_gather),
|
| 1494 |
]
|
| 1495 |
|
| 1496 |
-
def solve_task(tn, td,
|
| 1497 |
-
|
| 1498 |
-
os.
|
| 1499 |
-
path = os.path.join(outdir, f"task{tn:03d}.onnx")
|
| 1500 |
-
|
| 1501 |
-
# Skip excluded tasks
|
| 1502 |
-
if tn in EXCLUDED_TASKS:
|
| 1503 |
-
return False, 'excluded', None, time.time() - t_start, path
|
| 1504 |
|
| 1505 |
-
#
|
| 1506 |
-
for
|
| 1507 |
try:
|
| 1508 |
-
model =
|
| 1509 |
-
|
|
|
|
|
|
|
|
|
|
| 1510 |
onnx.save(model, path)
|
| 1511 |
-
if validate(path, td):
|
| 1512 |
-
|
| 1513 |
-
|
| 1514 |
-
|
| 1515 |
-
|
| 1516 |
-
|
| 1517 |
-
|
| 1518 |
-
|
| 1519 |
-
|
| 1520 |
-
|
| 1521 |
-
|
| 1522 |
-
|
| 1523 |
-
|
| 1524 |
-
|
| 1525 |
-
|
| 1526 |
-
|
| 1527 |
-
|
| 1528 |
-
|
| 1529 |
-
|
| 1530 |
-
|
| 1531 |
-
|
| 1532 |
-
|
| 1533 |
-
|
| 1534 |
-
remaining = max(1, conv_time - (time.time() - t_start))
|
| 1535 |
-
result = solve_pytorch_conv(td, path, time_budget=remaining)
|
| 1536 |
-
if result is not None:
|
| 1537 |
-
sname, model = result
|
| 1538 |
-
return True, sname, os.path.getsize(path), time.time() - t_start, path
|
| 1539 |
-
else:
|
| 1540 |
-
sp = fixed_shapes(td)
|
| 1541 |
-
if sp is not None:
|
| 1542 |
-
(IH,IW),(OH,OW) = sp
|
| 1543 |
-
if OH <= IH and OW <= IW:
|
| 1544 |
-
result = solve_conv_diffshape(td, path, time_budget=conv_time)
|
| 1545 |
-
if result is not None:
|
| 1546 |
-
sname, model = result
|
| 1547 |
-
return True, sname, os.path.getsize(path), time.time() - t_start, path
|
| 1548 |
-
|
| 1549 |
-
# Try variable diff-shape conv (output within input bounds)
|
| 1550 |
-
result = solve_conv_var_diff(td, path, time_budget=conv_time)
|
| 1551 |
if result is not None:
|
| 1552 |
-
|
| 1553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1554 |
|
| 1555 |
-
return
|
| 1556 |
-
|
| 1557 |
-
def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb):
|
| 1558 |
-
results = {}
|
| 1559 |
-
costs_dict = {}
|
| 1560 |
-
total_score = 0
|
| 1561 |
-
for tn in task_nums:
|
| 1562 |
-
if tn not in tasks:
|
| 1563 |
-
continue
|
| 1564 |
-
if tn in EXCLUDED_TASKS:
|
| 1565 |
-
print(f"Task {tn:3d}: EXCLUDED (officially)")
|
| 1566 |
-
continue
|
| 1567 |
-
|
| 1568 |
-
td = tasks[tn]['data']
|
| 1569 |
-
ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
|
| 1570 |
-
|
| 1571 |
-
if ok:
|
| 1572 |
-
macs, memory, params = score_network(model_path)
|
| 1573 |
-
if macs is None:
|
| 1574 |
-
macs, memory, params = 0, 0, 0
|
| 1575 |
-
cost = macs + memory + params
|
| 1576 |
-
score = max(1.0, 25.0 - math.log(max(1, cost)))
|
| 1577 |
-
total_score += score
|
| 1578 |
-
|
| 1579 |
-
results[tn] = (sname, t_task, sz)
|
| 1580 |
-
costs_dict[tn] = cost
|
| 1581 |
-
print(f"Task {tn:3d}: {sname:25s} {score:7.3f} {cost:>12} {t_task:7.3f}s ({sz:>8,} bytes)")
|
| 1582 |
-
else:
|
| 1583 |
-
print(f"Task {tn:3d}: UNSOLVED {t_task:7.3f}s")
|
| 1584 |
-
cost = 0
|
| 1585 |
-
|
| 1586 |
-
if use_wandb and wandb is not None:
|
| 1587 |
-
wandb.log({
|
| 1588 |
-
"task_id": tn,
|
| 1589 |
-
"solver": sname if ok else "unsolved",
|
| 1590 |
-
"onnx_bytes": sz if ok else 0,
|
| 1591 |
-
"task_time_sec": t_task,
|
| 1592 |
-
"cost": cost,
|
| 1593 |
-
"score": score if ok else 0,
|
| 1594 |
-
})
|
| 1595 |
-
|
| 1596 |
-
return results, costs_dict, total_score
|
| 1597 |
-
|
| 1598 |
|
| 1599 |
def main():
|
| 1600 |
-
parser = argparse.ArgumentParser()
|
| 1601 |
-
parser.add_argument('--data_dir', default='ARC-AGI
|
| 1602 |
-
parser.add_argument('--
|
| 1603 |
-
parser.add_argument('--
|
| 1604 |
-
parser.add_argument('--
|
| 1605 |
-
parser.add_argument('--conv_budget', type=float, default=30.0)
|
| 1606 |
-
parser.add_argument('--
|
| 1607 |
-
parser.add_argument('--
|
| 1608 |
-
parser.add_argument('--
|
| 1609 |
args = parser.parse_args()
|
| 1610 |
-
global ORT_PROVIDERS
|
| 1611 |
-
config = {
|
| 1612 |
-
"device": args.device,
|
| 1613 |
-
"conv_budget": args.conv_budget,
|
| 1614 |
-
"data_dir": args.data_dir,
|
| 1615 |
-
"arcgen_dir": args.arcgen_dir,
|
| 1616 |
-
"tasks": args.tasks,
|
| 1617 |
-
}
|
| 1618 |
-
|
| 1619 |
-
if args.device == 'cuda':
|
| 1620 |
-
ORT_PROVIDERS = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
| 1621 |
-
elif args.device == 'cpu':
|
| 1622 |
-
ORT_PROVIDERS = ['CPUExecutionProvider']
|
| 1623 |
-
|
| 1624 |
-
ort.set_default_logger_severity(3)
|
| 1625 |
-
print(f"Using providers: {ORT_PROVIDERS}")
|
| 1626 |
-
|
| 1627 |
-
if args.kaggle:
|
| 1628 |
-
tasks = load_tasks_kaggle(args.data_dir)
|
| 1629 |
-
else:
|
| 1630 |
-
arcgen = args.arcgen_dir if args.arcgen_dir else None
|
| 1631 |
-
tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
|
| 1632 |
|
| 1633 |
-
|
| 1634 |
-
|
| 1635 |
-
print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
|
| 1636 |
-
print(f"Excluded tasks: {sorted(EXCLUDED_TASKS)}")
|
| 1637 |
|
| 1638 |
-
|
| 1639 |
-
active_tasks = [t for t in task_nums if t not in EXCLUDED_TASKS]
|
| 1640 |
-
print(f"Solving {len(active_tasks)} active tasks (skipping {len(task_nums) - len(active_tasks)} excluded)")
|
| 1641 |
-
print(f"Conv budget: {args.conv_budget}s per task")
|
| 1642 |
-
print("=" * 70)
|
| 1643 |
-
t0 = time.time()
|
| 1644 |
|
| 1645 |
-
|
| 1646 |
-
|
| 1647 |
-
|
|
|
|
|
|
|
| 1648 |
else:
|
| 1649 |
-
|
| 1650 |
-
|
| 1651 |
-
|
| 1652 |
-
|
| 1653 |
-
|
| 1654 |
-
|
| 1655 |
-
|
| 1656 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1657 |
|
| 1658 |
-
#
|
| 1659 |
-
|
| 1660 |
-
|
| 1661 |
-
|
| 1662 |
-
|
| 1663 |
|
| 1664 |
-
|
| 1665 |
-
zip_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.zip')
|
| 1666 |
-
buf = io.BytesIO()
|
| 1667 |
-
with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 1668 |
-
for f in sorted(os.listdir(outdir)):
|
| 1669 |
-
if f.endswith('.onnx'):
|
| 1670 |
-
zf.write(os.path.join(outdir, f), f)
|
| 1671 |
-
zip_bytes = buf.getvalue()
|
| 1672 |
-
with open(zip_path, 'wb') as f:
|
| 1673 |
-
f.write(zip_bytes)
|
| 1674 |
-
zip_size = len(zip_bytes)
|
| 1675 |
|
| 1676 |
-
|
| 1677 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1678 |
with open(csv_path, 'w', newline='') as f:
|
| 1679 |
w = csv.writer(f)
|
| 1680 |
-
w.writerow(['
|
| 1681 |
-
for tn in sorted(
|
| 1682 |
-
|
| 1683 |
-
|
| 1684 |
-
|
| 1685 |
-
|
| 1686 |
-
|
| 1687 |
-
|
| 1688 |
-
|
| 1689 |
-
|
| 1690 |
-
|
| 1691 |
-
|
|
|
|
|
|
|
|
|
|
| 1692 |
|
| 1693 |
if __name__ == '__main__':
|
| 1694 |
-
main()
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
ARC-AGI NeuroGolf Championship - Complete Solver v5
|
| 4 |
+
Format: [1,10,30,30] one-hot input/output, opset 17, IR version 8.
|
| 5 |
+
|
| 6 |
+
v5 CHANGES (from v4):
|
| 7 |
+
- Opset 10 → 17, IR 10 → 8
|
| 8 |
+
- s_flip: Slice(step=-1) replaces Gather — 0 MACs (was ~165K)
|
| 9 |
+
- s_rotate k=2: double Slice(step=-1) — 0 MACs (was ~165K)
|
| 10 |
+
- s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
|
| 11 |
+
- All Pad nodes: tensor-based pads input (opset 17 requirement)
|
| 12 |
+
- All other solvers unchanged from v4
|
|
|
|
| 13 |
|
| 14 |
Solvers:
|
| 15 |
- Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
|
|
|
|
| 45 |
BATCH, CH, GH, GW = 1, 10, 30, 30
|
| 46 |
GRID_SHAPE = [BATCH, CH, GH, GW]
|
| 47 |
DT = TensorProto.FLOAT
|
| 48 |
+
IR = 8
|
| 49 |
+
OPSET = [helper.make_opsetid("", 17)]
|
| 50 |
+
|
| 51 |
+
INT64_MIN = int(np.iinfo(np.int64).min)
|
| 52 |
|
| 53 |
# Officially excluded tasks (score 0 regardless)
|
| 54 |
EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
|
|
|
|
| 56 |
# Max ARC-GEN examples to use for validation (to keep runtime reasonable)
|
| 57 |
MAX_ARCGEN_VALIDATE = 30
|
| 58 |
# Max ARC-GEN examples for conv fitting (keep separate from validation!)
|
| 59 |
+
MAX_ARCGEN_FIT = 0
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def get_providers():
|
| 62 |
return ['CPUExecutionProvider']
|
|
|
|
| 75 |
with open(os.path.join(data_dir, f)) as fh:
|
| 76 |
data = json.load(fh)
|
| 77 |
hex_id = f.replace('.json','')
|
|
|
|
| 78 |
if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
|
| 79 |
with open(os.path.join(arcgen_dir, f)) as fh:
|
| 80 |
arcgen_examples = json.load(fh)
|
|
|
|
| 107 |
return arr
|
| 108 |
|
| 109 |
def validate(path, td):
|
| 110 |
+
"""Validate model against ALL examples: train + test + arc-gen."""
|
|
|
|
| 111 |
try:
|
| 112 |
opts = ort.SessionOptions()
|
| 113 |
opts.log_severity_level = 3
|
|
|
|
| 115 |
except:
|
| 116 |
return False
|
| 117 |
examples = td['train'] + td['test']
|
|
|
|
| 118 |
if 'arc-gen' in td:
|
| 119 |
examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
|
| 120 |
for ex in examples:
|
|
|
|
| 160 |
MAX_FILESIZE = int(1.44 * 1024 * 1024)
|
| 161 |
|
| 162 |
def score_network(path):
|
| 163 |
+
"""Static profiler matching Kaggle scoring: cost = macs + memory + params."""
|
|
|
|
| 164 |
if HAS_ONNX_TOOL:
|
| 165 |
try:
|
| 166 |
return _score_network_official(path)
|
|
|
|
| 210 |
|
| 211 |
return int(macs), int(nbytes), int(params)
|
| 212 |
|
| 213 |
+
# ============================================================
|
| 214 |
+
# OPSET 17 HELPERS
|
| 215 |
+
# ============================================================
|
| 216 |
+
|
| 217 |
+
def _make_int64_init(name, values):
|
| 218 |
+
"""Create an int64 tensor initializer from a list of values."""
|
| 219 |
+
return numpy_helper.from_array(np.array(values, dtype=np.int64), name)
|
| 220 |
+
|
| 221 |
+
def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
|
| 222 |
+
"""Build a Pad node with tensor-based pads input (opset 17).
|
| 223 |
+
Pads [0,0,0,0, 0,0,pad_h,pad_w] — only spatial end-padding."""
|
| 224 |
+
pads_name = f'pads{suffix}'
|
| 225 |
+
cv_name = f'pad_cv{suffix}'
|
| 226 |
+
pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
|
| 227 |
+
inits.append(numpy_helper.from_array(pads_arr, pads_name))
|
| 228 |
+
inits.append(numpy_helper.from_array(np.array(0.0, dtype=np.float32), cv_name))
|
| 229 |
+
return helper.make_node('Pad', [input_name, pads_name, cv_name], [output_name], mode='constant')
|
| 230 |
+
|
| 231 |
+
def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
|
| 232 |
+
"""Build Slice node to crop [1,10,30,30] to [1,10,IH,IW]."""
|
| 233 |
+
st_name = f'crop_st{suffix}'
|
| 234 |
+
en_name = f'crop_en{suffix}'
|
| 235 |
+
inits.append(_make_int64_init(st_name, [0, 0, 0, 0]))
|
| 236 |
+
inits.append(_make_int64_init(en_name, [1, 10, IH, IW]))
|
| 237 |
+
return helper.make_node('Slice', [input_name, st_name, en_name], [output_name])
|
| 238 |
+
|
| 239 |
+
def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=''):
|
| 240 |
+
"""Build Slice(step=-1) to reverse one axis. Zero MACs."""
|
| 241 |
+
st_name = f'rev_st{suffix}'
|
| 242 |
+
en_name = f'rev_en{suffix}'
|
| 243 |
+
ax_name = f'rev_ax{suffix}'
|
| 244 |
+
sp_name = f'rev_sp{suffix}'
|
| 245 |
+
inits.append(_make_int64_init(st_name, [dim_size - 1]))
|
| 246 |
+
inits.append(_make_int64_init(en_name, [INT64_MIN]))
|
| 247 |
+
inits.append(_make_int64_init(ax_name, [axis]))
|
| 248 |
+
inits.append(_make_int64_init(sp_name, [-1]))
|
| 249 |
+
return helper.make_node('Slice', [input_name, st_name, en_name, ax_name, sp_name], [output_name])
|
| 250 |
+
|
| 251 |
def mk(nodes, inits=None):
|
| 252 |
x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
|
| 253 |
y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
|
|
|
|
| 260 |
for ex in td['train'] + td['test']]
|
| 261 |
|
| 262 |
def get_exs_for_fitting(td):
|
| 263 |
+
"""Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH."""
|
|
|
|
|
|
|
|
|
|
| 264 |
base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
|
| 265 |
for ex in td['train'] + td['test']]
|
| 266 |
|
| 267 |
if not base_exs:
|
| 268 |
return base_exs
|
| 269 |
|
|
|
|
| 270 |
base_shapes = {inp.shape for inp, _ in base_exs}
|
| 271 |
if len(base_shapes) != 1:
|
| 272 |
+
return base_exs
|
| 273 |
|
| 274 |
base_shape = list(base_shapes)[0]
|
| 275 |
|
|
|
|
| 276 |
ag_exs = []
|
| 277 |
for ex in td.get('arc-gen', []):
|
| 278 |
inp = np.array(ex['input'], dtype=np.int64)
|
|
|
|
| 280 |
if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
|
| 281 |
ag_exs.append((inp, out))
|
| 282 |
|
|
|
|
| 283 |
return base_exs + ag_exs[:10]
|
| 284 |
|
| 285 |
def get_exs_for_fitting_variable(td):
|
| 286 |
+
"""Get examples for variable-shape conv fitting."""
|
|
|
|
|
|
|
| 287 |
base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
|
| 288 |
for ex in td['train'] + td['test']]
|
| 289 |
|
|
|
|
| 290 |
ag_exs = []
|
| 291 |
for ex in td.get('arc-gen', []):
|
| 292 |
inp = np.array(ex['input'], dtype=np.int64)
|
|
|
|
| 303 |
return list(shapes)[0] if len(shapes) == 1 else None
|
| 304 |
|
| 305 |
# ============================================================
|
| 306 |
+
# GATHER HELPERS (kept for solvers that need them)
|
| 307 |
# ============================================================
|
| 308 |
|
| 309 |
def _build_gather_model(OH, OW, idx):
|
| 310 |
+
"""Gather-based spatial remapping. Used for concat, spatial_gather, etc."""
|
|
|
|
| 311 |
flat_idx = np.zeros((GH*GW,), dtype=np.int64)
|
| 312 |
mask = np.zeros((1,1,GH,GW), dtype=np.float32)
|
| 313 |
for oi in range(OH):
|
|
|
|
| 329 |
return mk(nodes, inits)
|
| 330 |
|
| 331 |
def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
|
| 332 |
+
"""Gather-based spatial remapping with constant pixels."""
|
| 333 |
flat_idx = np.zeros((GH*GW,), dtype=np.int64)
|
| 334 |
gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
|
| 335 |
const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
|
|
|
|
| 379 |
if iv in cm and cm[iv] != ov: return None
|
| 380 |
cm[iv] = ov
|
| 381 |
|
|
|
|
| 382 |
is_permutation = (set(cm.keys()) == set(cm.values()))
|
| 383 |
|
| 384 |
if is_permutation:
|
|
|
|
| 385 |
gather_ch = np.arange(10, dtype=np.int32)
|
| 386 |
for src, dst in cm.items():
|
| 387 |
if 0 <= src < 10 and 0 <= dst < 10:
|
|
|
|
| 390 |
nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
|
| 391 |
return mk(nodes, inits)
|
| 392 |
else:
|
|
|
|
| 393 |
W = np.zeros((10,10,1,1), dtype=np.float32)
|
| 394 |
for ic in range(10):
|
| 395 |
W[cm.get(ic,ic), ic, 0, 0] = 1.0
|
|
|
|
| 397 |
[numpy_helper.from_array(W, 'W')])
|
| 398 |
|
| 399 |
def s_transpose(td):
|
| 400 |
+
"""Transpose spatial dimensions. Already near-zero cost with Transpose node."""
|
| 401 |
for ex in td['train']+td['test']:
|
| 402 |
if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
|
| 403 |
return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
|
| 404 |
|
| 405 |
def s_flip(td):
|
| 406 |
+
"""Flip using Slice(step=-1) — zero MACs, replaces old Gather approach."""
|
| 407 |
exs = get_exs(td)
|
| 408 |
sp = fixed_shapes(td)
|
| 409 |
if sp is None: return None
|
| 410 |
(IH,IW),(OH,OW) = sp
|
| 411 |
if (IH,IW) != (OH,OW): return None
|
| 412 |
+
|
| 413 |
for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
|
| 414 |
if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
|
| 415 |
+
# axis 0 = flipud = reverse dim 2 (H)
|
| 416 |
+
# axis 1 = fliplr = reverse dim 3 (W)
|
| 417 |
+
onnx_axis = 2 if axis == 0 else 3
|
| 418 |
+
dim_size = IH if axis == 0 else IW
|
| 419 |
+
pad_h, pad_w = GH - IH, GW - IW
|
| 420 |
+
|
| 421 |
+
inits = []
|
| 422 |
+
nodes = []
|
| 423 |
+
|
| 424 |
+
# Step 1: Crop input to [1,10,IH,IW]
|
| 425 |
+
nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
|
| 426 |
+
|
| 427 |
+
# Step 2: Reverse the target axis
|
| 428 |
+
nodes.append(_build_slice_reverse('cropped', 'flipped', onnx_axis, dim_size, inits))
|
| 429 |
+
|
| 430 |
+
# Step 3: Pad back to [1,10,30,30]
|
| 431 |
+
nodes.append(_build_pad_node('flipped', 'output', pad_h, pad_w, inits))
|
| 432 |
+
|
| 433 |
+
return mk(nodes, inits)
|
| 434 |
return None
|
| 435 |
|
| 436 |
def s_rotate(td):
|
| 437 |
+
"""Rotate using Slice+Transpose combos — zero MACs for square grids and k=2.
|
| 438 |
+
Falls back to Gather for non-square k=1,3 rotations."""
|
| 439 |
exs = get_exs(td)
|
| 440 |
sp = fixed_shapes(td)
|
| 441 |
if sp is None: return None
|
| 442 |
(IH,IW),(OH,OW) = sp
|
| 443 |
+
|
| 444 |
for k in [1, 2, 3]:
|
| 445 |
+
if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
|
| 446 |
+
continue
|
| 447 |
+
|
| 448 |
+
if k == 2:
|
| 449 |
+
# 180° = flipud + fliplr — works for any shape
|
| 450 |
+
# output[r,c] = input[IH-1-r, IW-1-c]
|
| 451 |
+
pad_h, pad_w = GH - OH, GW - OW
|
| 452 |
+
inits = []
|
| 453 |
+
nodes = []
|
| 454 |
+
|
| 455 |
+
# Crop to [1,10,IH,IW]
|
| 456 |
+
nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
|
| 457 |
+
# Reverse axis 2 (H)
|
| 458 |
+
nodes.append(_build_slice_reverse('cropped', 'flip_h', 2, IH, inits, suffix='_h'))
|
| 459 |
+
# Reverse axis 3 (W)
|
| 460 |
+
nodes.append(_build_slice_reverse('flip_h', 'rotated', 3, IW, inits, suffix='_w'))
|
| 461 |
+
# Pad back
|
| 462 |
+
nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
|
| 463 |
+
|
| 464 |
+
return mk(nodes, inits)
|
| 465 |
+
|
| 466 |
+
elif k == 1 and IH == IW:
|
| 467 |
+
# rot90 CCW on square grid: transpose then flipud
|
| 468 |
+
# output[r,c] = input[c, IH-1-r]
|
| 469 |
+
# Step 1: Transpose [0,1,3,2]: temp[r,c] = input[c,r]
|
| 470 |
+
# Step 2: Reverse axis 2: out[r,c] = temp[IH-1-r,c] = input[c,IH-1-r] ✓
|
| 471 |
+
pad_h, pad_w = GH - IH, GW - IW
|
| 472 |
+
inits = []
|
| 473 |
+
nodes = []
|
| 474 |
+
|
| 475 |
+
nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
|
| 476 |
+
nodes.append(helper.make_node('Transpose', ['cropped'], ['transposed'], perm=[0,1,3,2]))
|
| 477 |
+
nodes.append(_build_slice_reverse('transposed', 'rotated', 2, IH, inits))
|
| 478 |
+
nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
|
| 479 |
+
|
| 480 |
+
return mk(nodes, inits)
|
| 481 |
+
|
| 482 |
+
elif k == 3 and IH == IW:
|
| 483 |
+
# rot270 CCW (= 90 CW) on square grid: flipud then transpose
|
| 484 |
+
# output[r,c] = input[IW-1-c, r]
|
| 485 |
+
# Step 1: Reverse axis 2: temp[r,c] = input[IH-1-r,c]
|
| 486 |
+
# Step 2: Transpose [0,1,3,2]: out[r,c] = temp[c,r] = input[IH-1-c,r] ✓ (IH=IW)
|
| 487 |
+
pad_h, pad_w = GH - IH, GW - IW
|
| 488 |
+
inits = []
|
| 489 |
+
nodes = []
|
| 490 |
+
|
| 491 |
+
nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
|
| 492 |
+
nodes.append(_build_slice_reverse('cropped', 'flipped', 2, IH, inits))
|
| 493 |
+
nodes.append(helper.make_node('Transpose', ['flipped'], ['rotated'], perm=[0,1,3,2]))
|
| 494 |
+
nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
|
| 495 |
+
|
| 496 |
+
return mk(nodes, inits)
|
| 497 |
+
|
| 498 |
+
else:
|
| 499 |
+
# Non-square k=1 or k=3: fall back to Gather (still correct, just higher cost)
|
| 500 |
+
idx = np.zeros((OH,OW,2), dtype=np.int64)
|
| 501 |
+
for r in range(OH):
|
| 502 |
+
for c in range(OW):
|
| 503 |
+
if k == 1: sr, sc = c, IH-1-r
|
| 504 |
+
elif k == 3: sr, sc = IW-1-c, r
|
| 505 |
+
idx[r,c] = [sr, sc]
|
| 506 |
+
return _build_gather_model(OH, OW, idx)
|
| 507 |
return None
|
| 508 |
|
| 509 |
def s_spatial_gather(td):
|
|
|
|
| 529 |
def s_varshape_spatial_gather(td):
|
| 530 |
"""Spatial gather that works for variable-shape tasks by embedding in 30x30."""
|
| 531 |
sp = fixed_shapes(td)
|
| 532 |
+
if sp is not None: return None
|
| 533 |
exs = get_exs(td)
|
| 534 |
|
|
|
|
| 535 |
exs_30 = []
|
| 536 |
for inp, out in exs:
|
| 537 |
ih, iw = inp.shape
|
|
|
|
| 583 |
if not np.array_equal(out, np.tile(inp, (rH, rW))): return None
|
| 584 |
pad_h, pad_w = 30-OH, 30-OW
|
| 585 |
inits = [
|
| 586 |
+
_make_int64_init('st', [0,0,0,0]),
|
| 587 |
+
_make_int64_init('en', [1,10,IH,IW]),
|
| 588 |
+
_make_int64_init('rp', [1,1,rH,rW]),
|
| 589 |
]
|
| 590 |
nodes = [
|
| 591 |
helper.make_node('Slice', ['input','st','en'], ['cr']),
|
| 592 |
helper.make_node('Tile', ['cr','rp'], ['tl']),
|
|
|
|
| 593 |
]
|
| 594 |
+
nodes.append(_build_pad_node('tl', 'output', pad_h, pad_w, inits))
|
| 595 |
return mk(nodes, inits)
|
| 596 |
|
| 597 |
def s_upscale(td):
|
|
|
|
| 672 |
(IH,IW),(OH,OW) = sp
|
| 673 |
if IH == OH and IW == OW: return None
|
| 674 |
|
|
|
|
| 675 |
if OH % IH != 0 or OW % IW != 0: return None
|
| 676 |
rH, rW = OH // IH, OW // IW
|
| 677 |
if rH * rW > 16 or rH * rW < 2: return None
|
| 678 |
if OH > 30 or OW > 30: return None
|
| 679 |
|
|
|
|
| 680 |
transforms = [
|
| 681 |
('id', lambda x: x),
|
| 682 |
('fliplr', lambda x: np.fliplr(x)),
|
|
|
|
| 688 |
('T_fliplr', lambda x: np.fliplr(x.T)),
|
| 689 |
]
|
| 690 |
|
|
|
|
| 691 |
block_transforms = {}
|
| 692 |
for bi in range(rH):
|
| 693 |
for bj in range(rW):
|
|
|
|
| 707 |
return None
|
| 708 |
block_transforms[(bi, bj)] = found
|
| 709 |
|
|
|
|
| 710 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 711 |
for bi in range(rH):
|
| 712 |
for bj in range(rW):
|
|
|
|
| 724 |
elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
|
| 725 |
idx[oi, oj] = [sr, sc]
|
| 726 |
|
|
|
|
| 727 |
for inp, out in exs:
|
| 728 |
reconstructed = np.zeros_like(out)
|
| 729 |
for oi in range(OH):
|
|
|
|
| 755 |
else:
|
| 756 |
if not np.all(block == 0):
|
| 757 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
return None
|
| 759 |
|
| 760 |
def s_kronecker(td):
|
|
|
|
| 773 |
if not np.array_equal(out, expected):
|
| 774 |
return None
|
| 775 |
|
|
|
|
| 776 |
idx = np.zeros((OH,OW,2), dtype=np.int64)
|
| 777 |
for r in range(OH):
|
| 778 |
for c in range(OW):
|
|
|
|
| 801 |
if not np.all(block == 0):
|
| 802 |
return None
|
| 803 |
|
|
|
|
| 804 |
idx = np.zeros((OH,OW,2), dtype=np.int64)
|
| 805 |
cst = np.full((OH,OW), -1, dtype=np.int64)
|
| 806 |
for bi in range(rH):
|
|
|
|
| 837 |
if not np.array_equal(shifted, out):
|
| 838 |
ok = False; break
|
| 839 |
if not ok: continue
|
|
|
|
| 840 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 841 |
+
cst = np.full((OH, OW), 0, dtype=np.int64)
|
| 842 |
for r in range(OH):
|
| 843 |
for c in range(OW):
|
| 844 |
sr, sc = r - dr, c - dc
|
|
|
|
| 873 |
|
| 874 |
for d in ('down', 'up', 'left', 'right'):
|
| 875 |
if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
return None
|
| 877 |
return None
|
| 878 |
|
|
|
|
| 887 |
for inp, out in exs:
|
| 888 |
expected = np.concatenate([inp, np.flip(inp, 1)], 1)
|
| 889 |
if not np.array_equal(expected, out): return None
|
|
|
|
| 890 |
idx = np.zeros((OH, OW, 2), dtype=np.int64)
|
| 891 |
for r in range(OH):
|
| 892 |
for c in range(OW):
|
|
|
|
| 1050 |
if len(shapes) != 1: return None
|
| 1051 |
IH, IW = shapes.pop()
|
| 1052 |
|
|
|
|
| 1053 |
fit_exs = get_exs_for_fitting(td)
|
|
|
|
| 1054 |
fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
|
| 1055 |
|
| 1056 |
t_start = time.time()
|
|
|
|
| 1064 |
pad_h, pad_w = GH - IH, GW - IW
|
| 1065 |
|
| 1066 |
inits = [
|
| 1067 |
+
_make_int64_init('sl_st', [0,0,0,0]),
|
| 1068 |
+
_make_int64_init('sl_en', [1,10,IH,IW]),
|
| 1069 |
numpy_helper.from_array(Wconv, 'W'),
|
| 1070 |
]
|
| 1071 |
conv_inputs = ['grid', 'W']
|
|
|
|
| 1079 |
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1080 |
]
|
| 1081 |
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1082 |
+
nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
|
|
|
|
|
|
|
|
|
|
| 1083 |
|
| 1084 |
model = mk(nodes, inits)
|
| 1085 |
onnx.save(model, path)
|
|
|
|
| 1092 |
for inp, out in exs:
|
| 1093 |
if inp.shape != out.shape: return None
|
| 1094 |
|
|
|
|
| 1095 |
fit_exs = get_exs_for_fitting_variable(td)
|
| 1096 |
fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
|
| 1097 |
|
|
|
|
| 1182 |
|
| 1183 |
pad_h, pad_w = GH - OH, GW - OW
|
| 1184 |
inits = [
|
| 1185 |
+
_make_int64_init('sl_st', [0,0,0,0]),
|
| 1186 |
+
_make_int64_init('sl_en', [1,10,IH,IW]),
|
| 1187 |
numpy_helper.from_array(Wconv, 'W'),
|
| 1188 |
+
_make_int64_init('cr_st', [0,0,dr_off,dc_off]),
|
| 1189 |
+
_make_int64_init('cr_en', [1,10,dr_off+OH,dc_off+OW]),
|
| 1190 |
]
|
| 1191 |
conv_inputs = ['grid', 'W']
|
| 1192 |
if B is not None:
|
|
|
|
| 1200 |
helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
|
| 1201 |
]
|
| 1202 |
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1203 |
+
nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
|
|
|
|
|
|
|
|
|
|
| 1204 |
|
| 1205 |
model = mk(nodes, inits)
|
| 1206 |
onnx.save(model, path)
|
|
|
|
| 1208 |
return None
|
| 1209 |
|
| 1210 |
def solve_conv_var_diff(td, path, time_budget=30.0):
|
| 1211 |
+
"""Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask)."""
|
|
|
|
| 1212 |
exs = get_exs(td)
|
| 1213 |
|
| 1214 |
t_start = time.time()
|
|
|
|
| 1256 |
Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
|
| 1257 |
B = None
|
| 1258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
# For tasks where output fits within input bounds, use input mask
|
| 1260 |
all_output_within_input = all(
|
| 1261 |
+
out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
|
| 1262 |
for inp_g, out_g in exs
|
| 1263 |
)
|
| 1264 |
|
| 1265 |
+
if all_output_within_input:
|
| 1266 |
+
inits = [numpy_helper.from_array(Wconv, 'W')]
|
| 1267 |
+
conv_inputs = ['input', 'W']
|
| 1268 |
+
if B is not None:
|
| 1269 |
+
inits.append(numpy_helper.from_array(B, 'B'))
|
| 1270 |
+
conv_inputs.append('B')
|
| 1271 |
+
|
| 1272 |
+
nodes = [
|
| 1273 |
+
helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
|
| 1274 |
+
helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
|
| 1275 |
+
helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
|
| 1276 |
+
]
|
| 1277 |
+
add_onehot_block(nodes, inits, 'am', 'oh_out')
|
| 1278 |
+
nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
|
| 1279 |
+
|
| 1280 |
+
model = mk(nodes, inits)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1281 |
onnx.save(model, path)
|
| 1282 |
+
if validate(path, td): return 'conv_var_diff', model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1283 |
return None
|
| 1284 |
|
| 1285 |
# ============================================================
|
| 1286 |
+
# MAIN SOLVER
|
| 1287 |
# ============================================================
|
| 1288 |
|
| 1289 |
ANALYTICAL_SOLVERS = [
|
| 1290 |
+
('identity', s_identity),
|
| 1291 |
+
('constant', s_constant),
|
| 1292 |
+
('color_map', s_color_map),
|
| 1293 |
+
('transpose', s_transpose),
|
| 1294 |
+
('flip', s_flip),
|
| 1295 |
+
('rotate', s_rotate),
|
| 1296 |
+
('shift', s_shift),
|
| 1297 |
+
('tile', s_tile),
|
| 1298 |
+
('upscale', s_upscale),
|
| 1299 |
+
('kronecker', s_kronecker),
|
| 1300 |
('nonuniform_scale', s_nonuniform_scale),
|
| 1301 |
+
('mirror_h', s_mirror_h),
|
| 1302 |
+
('mirror_v', s_mirror_v),
|
| 1303 |
+
('quad_mirror', s_quad_mirror),
|
| 1304 |
+
('concat', s_concat),
|
| 1305 |
+
('concat_enhanced', s_concat_enhanced),
|
| 1306 |
('diagonal_tile', s_diagonal_tile),
|
| 1307 |
('fixed_crop', s_fixed_crop),
|
| 1308 |
('spatial_gather', s_spatial_gather),
|
|
|
|
| 1309 |
('varshape_spatial_gather', s_varshape_spatial_gather),
|
| 1310 |
]
|
| 1311 |
|
| 1312 |
+
def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
|
| 1313 |
+
"""Try all solvers on a task. Returns (solver_name, score) or None."""
|
| 1314 |
+
path = os.path.join(output_dir, f"task{tn:03d}.onnx")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1315 |
|
| 1316 |
+
# Try analytical solvers first (instant, arc-gen safe)
|
| 1317 |
+
for name, solver in ANALYTICAL_SOLVERS:
|
| 1318 |
try:
|
| 1319 |
+
model = solver(td)
|
| 1320 |
+
except Exception as e:
|
| 1321 |
+
if verbose: print(f" {name}: ERROR {e}")
|
| 1322 |
+
continue
|
| 1323 |
+
if model is not None:
|
| 1324 |
onnx.save(model, path)
|
| 1325 |
+
if validate(path, td):
|
| 1326 |
+
macs, mem, par = score_network(path)
|
| 1327 |
+
if macs is not None:
|
| 1328 |
+
cost = macs + mem + par
|
| 1329 |
+
score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
|
| 1330 |
+
if verbose: print(f" {name}: PASS cost={cost} score={score:.2f}")
|
| 1331 |
+
return name, score
|
| 1332 |
+
else:
|
| 1333 |
+
if verbose: print(f" {name}: model built but FAILED validation")
|
| 1334 |
+
|
| 1335 |
+
# Try conv solvers
|
| 1336 |
+
conv_solvers = [
|
| 1337 |
+
('conv_fixed', solve_conv_fixed),
|
| 1338 |
+
('conv_variable', solve_conv_variable),
|
| 1339 |
+
('conv_diffshape', solve_conv_diffshape),
|
| 1340 |
+
('conv_var_diff', solve_conv_var_diff),
|
| 1341 |
+
]
|
| 1342 |
+
for name, solver in conv_solvers:
|
| 1343 |
+
try:
|
| 1344 |
+
result = solver(td, path, time_budget=conv_budget)
|
| 1345 |
+
except Exception as e:
|
| 1346 |
+
if verbose: print(f" {name}: ERROR {e}")
|
| 1347 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1348 |
if result is not None:
|
| 1349 |
+
solver_type, model = result
|
| 1350 |
+
onnx.save(model, path)
|
| 1351 |
+
macs, mem, par = score_network(path)
|
| 1352 |
+
if macs is not None:
|
| 1353 |
+
cost = macs + mem + par
|
| 1354 |
+
score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
|
| 1355 |
+
if verbose: print(f" {solver_type}: PASS cost={cost} score={score:.2f}")
|
| 1356 |
+
return solver_type, score
|
| 1357 |
|
| 1358 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1359 |
|
| 1360 |
def main():
|
| 1361 |
+
parser = argparse.ArgumentParser(description='NeuroGolf Solver v5')
|
| 1362 |
+
parser.add_argument('--data_dir', type=str, default=None, help='Path to ARC-AGI training data')
|
| 1363 |
+
parser.add_argument('--kaggle_dir', type=str, default=None, help='Path to Kaggle task JSONs')
|
| 1364 |
+
parser.add_argument('--arcgen_dir', type=str, default=None, help='Path to ARC-GEN data directory')
|
| 1365 |
+
parser.add_argument('--output_dir', type=str, default='submission', help='Output directory for ONNX models')
|
| 1366 |
+
parser.add_argument('--conv_budget', type=float, default=30.0, help='Time budget per conv solver per task (seconds)')
|
| 1367 |
+
parser.add_argument('--task', type=int, default=None, help='Solve a single task number')
|
| 1368 |
+
parser.add_argument('--verbose', action='store_true', default=True)
|
| 1369 |
+
parser.add_argument('--quiet', action='store_true', default=False)
|
| 1370 |
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1371 |
|
| 1372 |
+
if args.quiet:
|
| 1373 |
+
args.verbose = False
|
|
|
|
|
|
|
| 1374 |
|
| 1375 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1376 |
|
| 1377 |
+
# Load tasks
|
| 1378 |
+
if args.kaggle_dir:
|
| 1379 |
+
tasks = load_tasks_kaggle(args.kaggle_dir)
|
| 1380 |
+
elif args.data_dir:
|
| 1381 |
+
tasks = load_tasks_dir(args.data_dir, args.arcgen_dir)
|
| 1382 |
else:
|
| 1383 |
+
# Try common paths
|
| 1384 |
+
for p in ['/kaggle/input/competitions/neurogolf-2026/',
|
| 1385 |
+
'ARC-AGI/data/training/']:
|
| 1386 |
+
if os.path.exists(p):
|
| 1387 |
+
if 'kaggle' in p:
|
| 1388 |
+
tasks = load_tasks_kaggle(p)
|
| 1389 |
+
else:
|
| 1390 |
+
tasks = load_tasks_dir(p, args.arcgen_dir)
|
| 1391 |
+
break
|
| 1392 |
+
else:
|
| 1393 |
+
print("ERROR: No data directory found. Use --data_dir or --kaggle_dir")
|
| 1394 |
+
sys.exit(1)
|
| 1395 |
|
| 1396 |
+
# Solve tasks
|
| 1397 |
+
results = {}
|
| 1398 |
+
total_score = 0.0
|
| 1399 |
+
solved = 0
|
| 1400 |
+
t_total = time.time()
|
| 1401 |
|
| 1402 |
+
task_nums = [args.task] if args.task else sorted(tasks.keys())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1403 |
|
| 1404 |
+
for tn in task_nums:
|
| 1405 |
+
if tn in EXCLUDED_TASKS:
|
| 1406 |
+
if args.verbose: print(f"Task {tn:3d}: EXCLUDED")
|
| 1407 |
+
continue
|
| 1408 |
+
if tn not in tasks:
|
| 1409 |
+
if args.verbose: print(f"Task {tn:3d}: NOT FOUND")
|
| 1410 |
+
continue
|
| 1411 |
+
|
| 1412 |
+
td = tasks[tn]['data']
|
| 1413 |
+
hex_id = tasks[tn]['hex']
|
| 1414 |
+
|
| 1415 |
+
if args.verbose: print(f"\nTask {tn:3d} ({hex_id}):")
|
| 1416 |
+
|
| 1417 |
+
result = solve_task(tn, td, args.output_dir, args.conv_budget, args.verbose)
|
| 1418 |
+
|
| 1419 |
+
if result is not None:
|
| 1420 |
+
solver_type, score = result
|
| 1421 |
+
results[tn] = {'solver': solver_type, 'score': score, 'hex': hex_id}
|
| 1422 |
+
total_score += score
|
| 1423 |
+
solved += 1
|
| 1424 |
+
else:
|
| 1425 |
+
# Unsolved tasks score 1.0 (minimum)
|
| 1426 |
+
total_score += 1.0
|
| 1427 |
+
if args.verbose: print(f" UNSOLVED")
|
| 1428 |
+
|
| 1429 |
+
# Summary
|
| 1430 |
+
elapsed = time.time() - t_total
|
| 1431 |
+
print(f"\n{'='*60}")
|
| 1432 |
+
print(f"RESULTS: {solved}/{len(task_nums)} tasks solved")
|
| 1433 |
+
print(f"Total score: {total_score:.1f}")
|
| 1434 |
+
print(f"Time: {elapsed:.1f}s")
|
| 1435 |
+
print(f"{'='*60}")
|
| 1436 |
+
|
| 1437 |
+
# Breakdown by solver type
|
| 1438 |
+
solver_counts = Counter(r['solver'] for r in results.values())
|
| 1439 |
+
solver_scores = {}
|
| 1440 |
+
for tn, r in results.items():
|
| 1441 |
+
st = r['solver']
|
| 1442 |
+
solver_scores[st] = solver_scores.get(st, 0) + r['score']
|
| 1443 |
+
|
| 1444 |
+
print("\nSolver breakdown:")
|
| 1445 |
+
for st in sorted(solver_counts.keys()):
|
| 1446 |
+
print(f" {st}: {solver_counts[st]} tasks, total score {solver_scores[st]:.1f}, avg {solver_scores[st]/solver_counts[st]:.2f}")
|
| 1447 |
+
|
| 1448 |
+
# Generate submission.csv
|
| 1449 |
+
csv_path = os.path.join(args.output_dir, 'submission.csv')
|
| 1450 |
with open(csv_path, 'w', newline='') as f:
|
| 1451 |
w = csv.writer(f)
|
| 1452 |
+
w.writerow(['task_num', 'hex_id', 'solver', 'score', 'onnx_file'])
|
| 1453 |
+
for tn in sorted(results.keys()):
|
| 1454 |
+
r = results[tn]
|
| 1455 |
+
w.writerow([tn, r['hex'], r['solver'], f"{r['score']:.3f}", f"task{tn:03d}.onnx"])
|
| 1456 |
+
|
| 1457 |
+
# Generate submission.zip
|
| 1458 |
+
zip_path = os.path.join(args.output_dir, 'submission.zip')
|
| 1459 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 1460 |
+
for tn in sorted(results.keys()):
|
| 1461 |
+
onnx_path = os.path.join(args.output_dir, f"task{tn:03d}.onnx")
|
| 1462 |
+
if os.path.exists(onnx_path):
|
| 1463 |
+
zf.write(onnx_path, f"task{tn:03d}.onnx")
|
| 1464 |
+
|
| 1465 |
+
print(f"\nSubmission files: {csv_path}, {zip_path}")
|
| 1466 |
+
print(f"Models in zip: {len(results)}")
|
| 1467 |
|
| 1468 |
if __name__ == '__main__':
|
| 1469 |
+
main()
|