rogermt commited on
Commit
5fb5021
·
verified ·
1 Parent(s): 2427dfd

Phase 1: Opset 17 switch — Slice-based flip/rotate, tensor-based Pad, IR=8

Browse files

Changes:
- IR 10→8, OPSET 10→17
- s_flip: Slice(step=-1) instead of Gather (0 MACs vs ~165K)
- s_rotate k=2: double Slice(step=-1) (0 MACs vs ~165K)
- s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
- s_transpose: already zero-cost, no change needed
- All Pad nodes: attribute-based→tensor-based pads input (opset 17 requirement)
- New helpers: _make_int64_init(), _build_pad_node(), _build_slice_crop_pad()
- mk() updated to use IR=8 and opset 17

Files changed (1) hide show
  1. neurogolf_solver.py +336 -561
neurogolf_solver.py CHANGED
@@ -1,16 +1,15 @@
1
  #!/usr/bin/env python3
2
  """
3
- ARC-AGI NeuroGolf Championship - Complete Solver v4
4
- Format: [1,10,30,30] one-hot input/output, opset 10, IR version 10.
5
-
6
- v4 CRITICAL FIXES:
7
- - ARC-GEN data loaded and used for conv fitting (more data = better lstsq)
8
- - ARC-GEN validation: models validated against train+test+arc-gen
9
- - EXCLUDED tasks: {21, 55, 80, 184, 202, 366} skipped
10
- - submission.csv generation for Kaggle
11
- - s_flip fixed: GatherElements -> Gather (opset 10 compat)
12
- - Static profiler: no onnx_tool dependency for cost estimation
13
- - get_exs_for_fitting(): uses train+test+arc-gen for conv fitting
14
 
15
  Solvers:
16
  - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
@@ -46,8 +45,10 @@ except ImportError:
46
  BATCH, CH, GH, GW = 1, 10, 30, 30
47
  GRID_SHAPE = [BATCH, CH, GH, GW]
48
  DT = TensorProto.FLOAT
49
- IR = 10
50
- OPSET = [helper.make_opsetid("", 10)]
 
 
51
 
52
  # Officially excluded tasks (score 0 regardless)
53
  EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
@@ -55,9 +56,7 @@ EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
55
  # Max ARC-GEN examples to use for validation (to keep runtime reasonable)
56
  MAX_ARCGEN_VALIDATE = 30
57
  # Max ARC-GEN examples for conv fitting (keep separate from validation!)
58
- # NOTE: Conv fitting uses train+test only. ARC-GEN is for VALIDATION only.
59
- # lstsq underdetermines with too many variable-size arc-gen examples.
60
- MAX_ARCGEN_FIT = 0 # Don't use arc-gen for fitting — use for validation only
61
 
62
  def get_providers():
63
  return ['CPUExecutionProvider']
@@ -76,7 +75,6 @@ def load_tasks_dir(data_dir, arcgen_dir=None):
76
  with open(os.path.join(data_dir, f)) as fh:
77
  data = json.load(fh)
78
  hex_id = f.replace('.json','')
79
- # Load ARC-GEN data if available
80
  if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
81
  with open(os.path.join(arcgen_dir, f)) as fh:
82
  arcgen_examples = json.load(fh)
@@ -109,8 +107,7 @@ def to_onehot(grid):
109
  return arr
110
 
111
  def validate(path, td):
112
- """Validate model against ALL examples: train + test + arc-gen.
113
- This matches what Kaggle does for scoring."""
114
  try:
115
  opts = ort.SessionOptions()
116
  opts.log_severity_level = 3
@@ -118,7 +115,6 @@ def validate(path, td):
118
  except:
119
  return False
120
  examples = td['train'] + td['test']
121
- # Include arc-gen examples (capped for speed)
122
  if 'arc-gen' in td:
123
  examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
124
  for ex in examples:
@@ -164,8 +160,7 @@ BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
164
  MAX_FILESIZE = int(1.44 * 1024 * 1024)
165
 
166
  def score_network(path):
167
- """Static profiler matching Kaggle scoring: cost = macs + memory + params.
168
- Falls back to official neurogolf_utils if available."""
169
  if HAS_ONNX_TOOL:
170
  try:
171
  return _score_network_official(path)
@@ -215,6 +210,44 @@ def _static_profile(path):
215
 
216
  return int(macs), int(nbytes), int(params)
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def mk(nodes, inits=None):
219
  x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
220
  y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
@@ -227,24 +260,19 @@ def get_exs(td):
227
  for ex in td['train'] + td['test']]
228
 
229
  def get_exs_for_fitting(td):
230
- """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH.
231
- For fixed-size tasks, arc-gen examples have the same grid size,
232
- so they provide more data points for lstsq without changing the feature dimension.
233
- For variable-size tasks, only use train+test (arc-gen varies too much)."""
234
  base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
235
  for ex in td['train'] + td['test']]
236
 
237
  if not base_exs:
238
  return base_exs
239
 
240
- # Check if all base examples have same input shape
241
  base_shapes = {inp.shape for inp, _ in base_exs}
242
  if len(base_shapes) != 1:
243
- return base_exs # Variable sizes — don't add arc-gen
244
 
245
  base_shape = list(base_shapes)[0]
246
 
247
- # Add arc-gen examples that match the base shape
248
  ag_exs = []
249
  for ex in td.get('arc-gen', []):
250
  inp = np.array(ex['input'], dtype=np.int64)
@@ -252,17 +280,13 @@ def get_exs_for_fitting(td):
252
  if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
253
  ag_exs.append((inp, out))
254
 
255
- # Cap to avoid massive lstsq (diminishing returns after ~10)
256
  return base_exs + ag_exs[:10]
257
 
258
  def get_exs_for_fitting_variable(td):
259
- """Get examples for variable-shape conv fitting.
260
- For variable-shape tasks, arc-gen examples may have different sizes per example
261
- but since we embed in 30x30 anyway, we can safely include them."""
262
  base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
263
  for ex in td['train'] + td['test']]
264
 
265
- # For variable shape, include arc-gen examples (they get embedded in 30x30)
266
  ag_exs = []
267
  for ex in td.get('arc-gen', []):
268
  inp = np.array(ex['input'], dtype=np.int64)
@@ -279,12 +303,11 @@ def fixed_shapes(td):
279
  return list(shapes)[0] if len(shapes) == 1 else None
280
 
281
  # ============================================================
282
- # GATHER HELPERS
283
  # ============================================================
284
 
285
  def _build_gather_model(OH, OW, idx):
286
- # Use Gather (opset 1) instead of GatherElements (opset 11)
287
- # Flatten spatial: [1,10,900] -> Gather(axis=2, indices=[900]) -> [1,10,900]
288
  flat_idx = np.zeros((GH*GW,), dtype=np.int64)
289
  mask = np.zeros((1,1,GH,GW), dtype=np.float32)
290
  for oi in range(OH):
@@ -306,7 +329,7 @@ def _build_gather_model(OH, OW, idx):
306
  return mk(nodes, inits)
307
 
308
  def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
309
- # Use Gather (opset 1) instead of GatherElements (opset 11)
310
  flat_idx = np.zeros((GH*GW,), dtype=np.int64)
311
  gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
312
  const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
@@ -356,11 +379,9 @@ def s_color_map(td):
356
  if iv in cm and cm[iv] != ov: return None
357
  cm[iv] = ov
358
 
359
- # Check if it's a permutation (bijective + all mapped colors form a closed set)
360
  is_permutation = (set(cm.keys()) == set(cm.values()))
361
 
362
  if is_permutation:
363
- # Use channel Gather — zero MACs, much cheaper
364
  gather_ch = np.arange(10, dtype=np.int32)
365
  for src, dst in cm.items():
366
  if 0 <= src < 10 and 0 <= dst < 10:
@@ -369,7 +390,6 @@ def s_color_map(td):
369
  nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
370
  return mk(nodes, inits)
371
  else:
372
- # Non-permutation: use Conv 1x1 (has MACs but handles any mapping)
373
  W = np.zeros((10,10,1,1), dtype=np.float32)
374
  for ic in range(10):
375
  W[cm.get(ic,ic), ic, 0, 0] = 1.0
@@ -377,44 +397,113 @@ def s_color_map(td):
377
  [numpy_helper.from_array(W, 'W')])
378
 
379
  def s_transpose(td):
 
380
  for ex in td['train']+td['test']:
381
  if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
382
  return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
383
 
384
  def s_flip(td):
 
385
  exs = get_exs(td)
386
  sp = fixed_shapes(td)
387
  if sp is None: return None
388
  (IH,IW),(OH,OW) = sp
389
  if (IH,IW) != (OH,OW): return None
 
390
  for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
391
  if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
392
- # Build gather index map (using Gather, opset 1 compatible)
393
- idx = np.zeros((OH,OW,2), dtype=np.int64)
394
- for r in range(OH):
395
- for c in range(OW):
396
- if axis == 0:
397
- idx[r,c] = [IH-1-r, c]
398
- else:
399
- idx[r,c] = [r, IW-1-c]
400
- return _build_gather_model(OH, OW, idx)
 
 
 
 
 
 
 
 
 
 
401
  return None
402
 
403
  def s_rotate(td):
 
 
404
  exs = get_exs(td)
405
  sp = fixed_shapes(td)
406
  if sp is None: return None
407
  (IH,IW),(OH,OW) = sp
 
408
  for k in [1, 2, 3]:
409
- if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs): continue
410
- idx = np.zeros((OH,OW,2), dtype=np.int64)
411
- for r in range(OH):
412
- for c in range(OW):
413
- if k == 1: sr, sc = c, IH-1-r
414
- elif k == 2: sr, sc = IH-1-r, IW-1-c
415
- elif k == 3: sr, sc = IW-1-c, r
416
- idx[r,c] = [sr, sc]
417
- return _build_gather_model(OH, OW, idx)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  return None
419
 
420
  def s_spatial_gather(td):
@@ -440,10 +529,9 @@ def s_spatial_gather(td):
440
  def s_varshape_spatial_gather(td):
441
  """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
442
  sp = fixed_shapes(td)
443
- if sp is not None: return None # fixed shapes handled by s_spatial_gather
444
  exs = get_exs(td)
445
 
446
- # Embed all examples in 30x30
447
  exs_30 = []
448
  for inp, out in exs:
449
  ih, iw = inp.shape
@@ -495,15 +583,15 @@ def s_tile(td):
495
  if not np.array_equal(out, np.tile(inp, (rH, rW))): return None
496
  pad_h, pad_w = 30-OH, 30-OW
497
  inits = [
498
- numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'st'),
499
- numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'en'),
500
- numpy_helper.from_array(np.array([1,1,rH,rW], dtype=np.int64), 'rp'),
501
  ]
502
  nodes = [
503
  helper.make_node('Slice', ['input','st','en'], ['cr']),
504
  helper.make_node('Tile', ['cr','rp'], ['tl']),
505
- helper.make_node('Pad', ['tl'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
506
  ]
 
507
  return mk(nodes, inits)
508
 
509
  def s_upscale(td):
@@ -584,13 +672,11 @@ def s_concat_enhanced(td):
584
  (IH,IW),(OH,OW) = sp
585
  if IH == OH and IW == OW: return None
586
 
587
- # Need block decomposition
588
  if OH % IH != 0 or OW % IW != 0: return None
589
  rH, rW = OH // IH, OW // IW
590
  if rH * rW > 16 or rH * rW < 2: return None
591
  if OH > 30 or OW > 30: return None
592
 
593
- # All 8 symmetry transforms of the dihedral group
594
  transforms = [
595
  ('id', lambda x: x),
596
  ('fliplr', lambda x: np.fliplr(x)),
@@ -602,7 +688,6 @@ def s_concat_enhanced(td):
602
  ('T_fliplr', lambda x: np.fliplr(x.T)),
603
  ]
604
 
605
- # For each block, find which transform matches
606
  block_transforms = {}
607
  for bi in range(rH):
608
  for bj in range(rW):
@@ -622,7 +707,6 @@ def s_concat_enhanced(td):
622
  return None
623
  block_transforms[(bi, bj)] = found
624
 
625
- # Build index map
626
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
627
  for bi in range(rH):
628
  for bj in range(rW):
@@ -640,7 +724,6 @@ def s_concat_enhanced(td):
640
  elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
641
  idx[oi, oj] = [sr, sc]
642
 
643
- # Verify
644
  for inp, out in exs:
645
  reconstructed = np.zeros_like(out)
646
  for oi in range(OH):
@@ -672,15 +755,6 @@ def s_input_driven_tile(td):
672
  else:
673
  if not np.all(block == 0):
674
  return None
675
-
676
- # Build gather model: each output pixel at (bi*IH+lr, bj*IW+lc) maps to
677
- # input[lr, lc] if input[bi, bj] != 0, else constant 0
678
- # Problem: whether block is active depends on input value, which varies.
679
- # This needs a different ONNX approach: can't use static gather.
680
- # But we CAN use: Tile input -> Mul by mask derived from input
681
- # Actually we need: for each (bi,bj) block position, multiply by inp[bi,bj] != 0
682
- # This is NOT static - it depends on input content.
683
- # Skip for now - spatial_gather can handle if block positions are fixed.
684
  return None
685
 
686
  def s_kronecker(td):
@@ -699,7 +773,6 @@ def s_kronecker(td):
699
  if not np.array_equal(out, expected):
700
  return None
701
 
702
- # This is identical to upscale - build gather index
703
  idx = np.zeros((OH,OW,2), dtype=np.int64)
704
  for r in range(OH):
705
  for c in range(OW):
@@ -728,7 +801,6 @@ def s_diagonal_tile(td):
728
  if not np.all(block == 0):
729
  return None
730
 
731
- # Build: diagonal blocks map to input, off-diagonal are constant 0
732
  idx = np.zeros((OH,OW,2), dtype=np.int64)
733
  cst = np.full((OH,OW), -1, dtype=np.int64)
734
  for bi in range(rH):
@@ -765,9 +837,8 @@ def s_shift(td):
765
  if not np.array_equal(shifted, out):
766
  ok = False; break
767
  if not ok: continue
768
- # Build gather index
769
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
770
- cst = np.full((OH, OW), 0, dtype=np.int64) # zeros for out-of-bounds
771
  for r in range(OH):
772
  for c in range(OW):
773
  sr, sc = r - dr, c - dc
@@ -802,10 +873,6 @@ def s_gravity(td):
802
 
803
  for d in ('down', 'up', 'left', 'right'):
804
  if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
805
- # Gravity is input-dependent (positions depend on content)
806
- # Can't use static Gather — need Conv to learn it
807
- # But conv also can't learn arbitrary sorting...
808
- # Skip for now — this needs a specialized ONNX graph
809
  return None
810
  return None
811
 
@@ -820,7 +887,6 @@ def s_mirror_h(td):
820
  for inp, out in exs:
821
  expected = np.concatenate([inp, np.flip(inp, 1)], 1)
822
  if not np.array_equal(expected, out): return None
823
- # Build gather index
824
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
825
  for r in range(OH):
826
  for c in range(OW):
@@ -984,9 +1050,7 @@ def solve_conv_fixed(td, path, time_budget=30.0):
984
  if len(shapes) != 1: return None
985
  IH, IW = shapes.pop()
986
 
987
- # Use ARC-GEN data for better fitting
988
  fit_exs = get_exs_for_fitting(td)
989
- # Filter to same-shape, same IH/IW
990
  fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
991
 
992
  t_start = time.time()
@@ -1000,8 +1064,8 @@ def solve_conv_fixed(td, path, time_budget=30.0):
1000
  pad_h, pad_w = GH - IH, GW - IW
1001
 
1002
  inits = [
1003
- numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
1004
- numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
1005
  numpy_helper.from_array(Wconv, 'W'),
1006
  ]
1007
  conv_inputs = ['grid', 'W']
@@ -1015,10 +1079,7 @@ def solve_conv_fixed(td, path, time_budget=30.0):
1015
  helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1016
  ]
1017
  add_onehot_block(nodes, inits, 'am', 'oh_out')
1018
- nodes.append(
1019
- helper.make_node('Pad', ['oh_out'], ['output'],
1020
- pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
1021
- )
1022
 
1023
  model = mk(nodes, inits)
1024
  onnx.save(model, path)
@@ -1031,7 +1092,6 @@ def solve_conv_variable(td, path, time_budget=30.0):
1031
  for inp, out in exs:
1032
  if inp.shape != out.shape: return None
1033
 
1034
- # Use ARC-GEN data for better fitting (variable shape, embedded in 30x30)
1035
  fit_exs = get_exs_for_fitting_variable(td)
1036
  fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
1037
 
@@ -1122,11 +1182,11 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
1122
 
1123
  pad_h, pad_w = GH - OH, GW - OW
1124
  inits = [
1125
- numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
1126
- numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
1127
  numpy_helper.from_array(Wconv, 'W'),
1128
- numpy_helper.from_array(np.array([0,0,dr_off,dc_off], dtype=np.int64), 'cr_st'),
1129
- numpy_helper.from_array(np.array([1,10,dr_off+OH,dc_off+OW], dtype=np.int64), 'cr_en'),
1130
  ]
1131
  conv_inputs = ['grid', 'W']
1132
  if B is not None:
@@ -1140,10 +1200,7 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
1140
  helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
1141
  ]
1142
  add_onehot_block(nodes, inits, 'am', 'oh_out')
1143
- nodes.append(
1144
- helper.make_node('Pad', ['oh_out'], ['output'],
1145
- pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
1146
- )
1147
 
1148
  model = mk(nodes, inits)
1149
  onnx.save(model, path)
@@ -1151,8 +1208,7 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
1151
  return None
1152
 
1153
  def solve_conv_var_diff(td, path, time_budget=30.0):
1154
- """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask).
1155
- Works when output shape differs from input but mapping is convolutional on 30x30 grid."""
1156
  exs = get_exs(td)
1157
 
1158
  t_start = time.time()
@@ -1200,495 +1256,214 @@ def solve_conv_var_diff(td, path, time_budget=30.0):
1200
  Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
1201
  B = None
1202
 
1203
- # Use ReduceSum of output channels as mask (sum across channels == 1 for valid pixels)
1204
- # But we don't know the output mask at inference time from input alone...
1205
- # We need a way to derive the output mask from the input.
1206
- # For same-shape: mask = ReduceSum(input, axis=1) works
1207
- # For diff-shape: we need to compute the output mask differently
1208
- #
1209
- # Approach: Conv output at valid positions should have max > threshold,
1210
- # and at padding positions max ≈ 0. Use the ArgMax+OneHot and then
1211
- # mask with ReduceSum(input) which is 1 at input positions but 0 at padding.
1212
- # BUT output may be LARGER than input...
1213
- #
1214
- # Alternative: just use Conv -> ArgMax -> Equal+Cast -> Mul(input_mask_expanded)
1215
- # where input_mask covers the output region too.
1216
- # This won't work if output extends beyond input region.
1217
- #
1218
- # Simplest correct approach: let the conv produce valid one-hot everywhere,
1219
- # then the padding region should naturally produce channel-0 output.
1220
- # Since padding is all-zero input, conv output there = bias only.
1221
- # If no bias, conv output = 0 for all channels -> argmax gives channel 0 -> onehot gives [1,0,...,0]
1222
- # which equals the padding encoding (channel 0 = 1 in padding).
1223
- # Wait - that's WRONG for the NeuroGolf format. In the padding region, ALL channels should be 0.
1224
- # The one-hot encoding has channel[color]=1, but padding = ALL zeros.
1225
- #
1226
- # So we NEED a mask. But for diff-shape, what mask?
1227
- # If output is always top-left aligned and we know max output size...
1228
- # We can't statically determine the output mask from the input.
1229
- #
1230
- # However: we can try the ReduceSum approach anyway — if conv naturally
1231
- # produces channel-0 dominant output in padding, then:
1232
- # mask = ReduceSum(input, axis=1) gives 1 for input pixels, 0 for padding
1233
- # If output region ⊆ input region, this works.
1234
- # If output region > input region... we need the output's ReduceSum instead.
1235
-
1236
  # For tasks where output fits within input bounds, use input mask
1237
  all_output_within_input = all(
1238
- out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
1239
  for inp_g, out_g in exs
1240
  )
1241
 
1242
- if not all_output_within_input:
1243
- continue # Skip tasks where output extends beyond input
1244
-
1245
- inits = [numpy_helper.from_array(Wconv, 'W')]
1246
- conv_inputs = ['input', 'W']
1247
- if B is not None:
1248
- inits.append(numpy_helper.from_array(B, 'B'))
1249
- conv_inputs.append('B')
1250
-
1251
- nodes = [
1252
- helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
1253
- helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
1254
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1255
- ]
1256
- add_onehot_block(nodes, inits, 'am', 'oh_out')
1257
- nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
1258
-
1259
- model = mk(nodes, inits)
1260
- onnx.save(model, path)
1261
- if validate(path, td): return 'conv_var_diff', model
1262
- return None
1263
-
1264
- # ============================================================
1265
- # PYTORCH LEARNED CONV (gradient descent, multi-seed, ternary snap)
1266
- # ============================================================
1267
-
1268
- def _ternary_snap(w, eps=0.2):
1269
- """Snap weights to {-1, 0, 1} — smaller model, often still correct."""
1270
- return np.where(w > eps, 1.0, np.where(w < -eps, -1.0, 0.0)).astype(np.float32)
1271
-
1272
- def _build_conv_onnx_from_weights(W, ks, use_full_30=False, IH=None, IW=None):
1273
- """Build ONNX conv model from numpy weight array W [10,10,ks,ks].
1274
- For fixed-shape: Slice→Conv→ArgMax→Equal+Cast→Pad
1275
- For variable/full30: Conv→ArgMax→Equal+Cast→Mul(mask)"""
1276
- pad = ks // 2
1277
- if use_full_30:
1278
- # Variable shape: full 30x30 conv with mask
1279
- inits = [numpy_helper.from_array(W, 'W')]
1280
- nodes = [
1281
- helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
1282
- helper.make_node('Conv', ['input', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
1283
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1284
- ]
1285
- add_onehot_block(nodes, inits, 'am', 'oh_out')
1286
- nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
1287
- return mk(nodes, inits)
1288
- else:
1289
- # Fixed shape: slice, conv, pad
1290
- pad_h, pad_w = GH - IH, GW - IW
1291
- inits = [
1292
- numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
1293
- numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
1294
- numpy_helper.from_array(W, 'W'),
1295
- ]
1296
- nodes = [
1297
- helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
1298
- helper.make_node('Conv', ['grid', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
1299
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1300
- ]
1301
- add_onehot_block(nodes, inits, 'am', 'oh_out')
1302
- nodes.append(
1303
- helper.make_node('Pad', ['oh_out'], ['output'],
1304
- pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
1305
- )
1306
- return mk(nodes, inits)
1307
-
1308
- def _build_two_layer_conv_onnx(W1, W2, ks1, ks2, use_full_30=False, IH=None, IW=None):
1309
- """Build ONNX two-layer conv: Conv→ReLU→Conv→ArgMax→Equal+Cast→Pad/Mul(mask)."""
1310
- pad1, pad2 = ks1 // 2, ks2 // 2
1311
- if use_full_30:
1312
- inits = [
1313
- numpy_helper.from_array(W1, 'W1'),
1314
- numpy_helper.from_array(W2, 'W2'),
1315
- ]
1316
- nodes = [
1317
- helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
1318
- helper.make_node('Conv', ['input', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
1319
- helper.make_node('Relu', ['h1'], ['h1r']),
1320
- helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
1321
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1322
- ]
1323
- add_onehot_block(nodes, inits, 'am', 'oh_out')
1324
- nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
1325
- return mk(nodes, inits)
1326
- else:
1327
- pad_h, pad_w = GH - IH, GW - IW
1328
- inits = [
1329
- numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
1330
- numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
1331
- numpy_helper.from_array(W1, 'W1'),
1332
- numpy_helper.from_array(W2, 'W2'),
1333
- ]
1334
- nodes = [
1335
- helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
1336
- helper.make_node('Conv', ['grid', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
1337
- helper.make_node('Relu', ['h1'], ['h1r']),
1338
- helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
1339
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1340
- ]
1341
- add_onehot_block(nodes, inits, 'am', 'oh_out')
1342
- nodes.append(
1343
- helper.make_node('Pad', ['oh_out'], ['output'],
1344
- pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
1345
- )
1346
- return mk(nodes, inits)
1347
-
1348
- def solve_pytorch_conv(td, path, time_budget=30.0):
1349
- """PyTorch gradient descent conv solver. Tries single-layer then two-layer.
1350
- Multi-seed training with ternary weight snapping for smaller models.
1351
- Validates against arc-gen before accepting."""
1352
- try:
1353
- import torch
1354
- import torch.nn as nn
1355
- import copy as _copy
1356
- except ImportError:
1357
- return None
1358
-
1359
- exs = get_exs(td)
1360
- same_shape = all(inp.shape == out.shape for inp, out in exs)
1361
- if not same_shape:
1362
- return None # Only handle same-shape for now
1363
-
1364
- shapes = set(inp.shape for inp, _ in exs)
1365
- fixed_in = len(shapes) == 1
1366
-
1367
- # Prepare tensors
1368
- all_pairs = td['train'] + td['test']
1369
- inp_list = [to_onehot(p['input'])[0] for p in all_pairs]
1370
- out_list = [to_onehot(p['output'])[0] for p in all_pairs]
1371
- inp_t = torch.tensor(np.stack(inp_list), dtype=torch.float32)
1372
- out_t = torch.tensor(np.stack(out_list), dtype=torch.float32)
1373
-
1374
- if fixed_in:
1375
- IH, IW = list(shapes)[0]
1376
- # Train on cropped region
1377
- inp_t = inp_t[:, :, :IH, :IW]
1378
- out_t = out_t[:, :, :IH, :IW]
1379
-
1380
- t_start = time.time()
1381
- best_result = None
1382
-
1383
- # Phase 1: Single-layer conv (multiple kernel sizes and seeds)
1384
- for ks in [1, 3, 5, 7]:
1385
- if time.time() - t_start > time_budget * 0.6:
1386
- break
1387
- pad = ks // 2
1388
- for seed in [0, 7, 42]:
1389
- if time.time() - t_start > time_budget * 0.6:
1390
- break
1391
- torch.manual_seed(seed)
1392
- conv = nn.Conv2d(CH, CH, kernel_size=ks, padding=pad, bias=False)
1393
- if seed == 0:
1394
- nn.init.zeros_(conv.weight)
1395
- opt = torch.optim.Adam(conv.parameters(), lr=0.03)
1396
- best_loss, best_state = float('inf'), None
1397
- for step in range(3000):
1398
- opt.zero_grad()
1399
- pred = conv(inp_t)
1400
- loss = nn.functional.mse_loss(pred, out_t)
1401
- loss.backward()
1402
- opt.step()
1403
- if loss.item() < best_loss:
1404
- best_loss = loss.item()
1405
- best_state = _copy.deepcopy(conv.state_dict())
1406
- if best_loss < 1e-8:
1407
- break
1408
- if best_state is None:
1409
- continue
1410
- conv.load_state_dict(best_state)
1411
- w = conv.weight.detach().numpy()
1412
-
1413
- # Try continuous weights, then ternary-snapped
1414
- for w_cand in [w, _ternary_snap(w)]:
1415
- use_full = not fixed_in
1416
- model = _build_conv_onnx_from_weights(
1417
- w_cand, ks, use_full_30=use_full,
1418
- IH=IH if fixed_in else None,
1419
- IW=IW if fixed_in else None
1420
- )
1421
- onnx.save(model, path)
1422
- if validate(path, td):
1423
- sz = os.path.getsize(path)
1424
- if best_result is None or sz < best_result[2]:
1425
- best_result = ('pt_conv', model, sz)
1426
-
1427
- # Phase 2: Two-layer conv (Conv→ReLU→Conv)
1428
- for ks1, ks2, hidden in [(3, 1, CH), (5, 1, CH), (3, 3, CH)]:
1429
- if time.time() - t_start > time_budget:
1430
- break
1431
- for seed in [0, 7]:
1432
- if time.time() - t_start > time_budget:
1433
- break
1434
- torch.manual_seed(seed)
1435
- net = nn.Sequential(
1436
- nn.Conv2d(CH, hidden, kernel_size=ks1, padding=ks1//2, bias=False),
1437
- nn.ReLU(),
1438
- nn.Conv2d(hidden, CH, kernel_size=ks2, padding=ks2//2, bias=False),
1439
- )
1440
- opt = torch.optim.Adam(net.parameters(), lr=0.01)
1441
- best_loss, best_state = float('inf'), None
1442
- for step in range(2500):
1443
- opt.zero_grad()
1444
- pred = net(inp_t)
1445
- loss = nn.functional.mse_loss(pred, out_t)
1446
- loss.backward()
1447
- opt.step()
1448
- if loss.item() < best_loss:
1449
- best_loss = loss.item()
1450
- best_state = _copy.deepcopy(net.state_dict())
1451
- if best_loss < 1e-8:
1452
- break
1453
- if best_state is None:
1454
- continue
1455
- net.load_state_dict(best_state)
1456
- w1 = net[0].weight.detach().numpy()
1457
- w2 = net[2].weight.detach().numpy()
1458
-
1459
- for w1c, w2c in [(w1, w2), (_ternary_snap(w1), _ternary_snap(w2))]:
1460
- use_full = not fixed_in
1461
- model = _build_two_layer_conv_onnx(
1462
- w1c, w2c, ks1, ks2, use_full_30=use_full,
1463
- IH=IH if fixed_in else None,
1464
- IW=IW if fixed_in else None
1465
- )
1466
  onnx.save(model, path)
1467
- if validate(path, td):
1468
- sz = os.path.getsize(path)
1469
- if best_result is None or sz < best_result[2]:
1470
- best_result = ('pt_conv2', model, sz)
1471
-
1472
- if best_result is not None:
1473
- sname, model, _ = best_result
1474
- onnx.save(model, path)
1475
- return sname, model
1476
  return None
1477
 
1478
  # ============================================================
1479
- # MAIN
1480
  # ============================================================
1481
 
1482
  ANALYTICAL_SOLVERS = [
1483
- ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
1484
- ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
1485
- ('tile', s_tile), ('upscale', s_upscale), ('kronecker', s_kronecker),
 
 
 
 
 
 
 
1486
  ('nonuniform_scale', s_nonuniform_scale),
1487
- ('mirror_h', s_mirror_h), ('mirror_v', s_mirror_v), ('quad_mirror', s_quad_mirror),
1488
- ('concat', s_concat), ('concat_enhanced', s_concat_enhanced),
 
 
 
1489
  ('diagonal_tile', s_diagonal_tile),
1490
  ('fixed_crop', s_fixed_crop),
1491
  ('spatial_gather', s_spatial_gather),
1492
- ('shift', s_shift),
1493
  ('varshape_spatial_gather', s_varshape_spatial_gather),
1494
  ]
1495
 
1496
- def solve_task(tn, td, outdir, conv_budget=30.0):
1497
- t_start = time.time()
1498
- os.makedirs(outdir, exist_ok=True)
1499
- path = os.path.join(outdir, f"task{tn:03d}.onnx")
1500
-
1501
- # Skip excluded tasks
1502
- if tn in EXCLUDED_TASKS:
1503
- return False, 'excluded', None, time.time() - t_start, path
1504
 
1505
- # 1. Try analytical solvers (fast, tiny models)
1506
- for sname, sfn in ANALYTICAL_SOLVERS:
1507
  try:
1508
- model = sfn(td)
1509
- if model is None: continue
 
 
 
1510
  onnx.save(model, path)
1511
- if validate(path, td):
1512
- return True, sname, os.path.getsize(path), time.time() - t_start, path
1513
- except: pass
1514
-
1515
- # 2. Determine task shape category and try conv solvers
1516
- exs = get_exs(td)
1517
- same_shape = all(inp.shape == out.shape for inp, out in exs)
1518
- shapes = set(inp.shape for inp, _ in exs)
1519
- fixed_in = len(shapes) == 1
1520
-
1521
- conv_time = conv_budget
1522
-
1523
- if same_shape:
1524
- if fixed_in:
1525
- result = solve_conv_fixed(td, path, time_budget=conv_time/2)
1526
- if result is not None:
1527
- sname, model = result
1528
- return True, sname, os.path.getsize(path), time.time() - t_start, path
1529
- result = solve_conv_variable(td, path, time_budget=conv_time)
1530
- if result is not None:
1531
- sname, model = result
1532
- return True, sname, os.path.getsize(path), time.time() - t_start, path
1533
- # 3. PyTorch learned conv as fallback for same-shape tasks
1534
- remaining = max(1, conv_time - (time.time() - t_start))
1535
- result = solve_pytorch_conv(td, path, time_budget=remaining)
1536
- if result is not None:
1537
- sname, model = result
1538
- return True, sname, os.path.getsize(path), time.time() - t_start, path
1539
- else:
1540
- sp = fixed_shapes(td)
1541
- if sp is not None:
1542
- (IH,IW),(OH,OW) = sp
1543
- if OH <= IH and OW <= IW:
1544
- result = solve_conv_diffshape(td, path, time_budget=conv_time)
1545
- if result is not None:
1546
- sname, model = result
1547
- return True, sname, os.path.getsize(path), time.time() - t_start, path
1548
-
1549
- # Try variable diff-shape conv (output within input bounds)
1550
- result = solve_conv_var_diff(td, path, time_budget=conv_time)
1551
  if result is not None:
1552
- sname, model = result
1553
- return True, sname, os.path.getsize(path), time.time() - t_start, path
 
 
 
 
 
 
1554
 
1555
- return False, None, None, time.time() - t_start, path
1556
-
1557
- def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb):
1558
- results = {}
1559
- costs_dict = {}
1560
- total_score = 0
1561
- for tn in task_nums:
1562
- if tn not in tasks:
1563
- continue
1564
- if tn in EXCLUDED_TASKS:
1565
- print(f"Task {tn:3d}: EXCLUDED (officially)")
1566
- continue
1567
-
1568
- td = tasks[tn]['data']
1569
- ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
1570
-
1571
- if ok:
1572
- macs, memory, params = score_network(model_path)
1573
- if macs is None:
1574
- macs, memory, params = 0, 0, 0
1575
- cost = macs + memory + params
1576
- score = max(1.0, 25.0 - math.log(max(1, cost)))
1577
- total_score += score
1578
-
1579
- results[tn] = (sname, t_task, sz)
1580
- costs_dict[tn] = cost
1581
- print(f"Task {tn:3d}: {sname:25s} {score:7.3f} {cost:>12} {t_task:7.3f}s ({sz:>8,} bytes)")
1582
- else:
1583
- print(f"Task {tn:3d}: UNSOLVED {t_task:7.3f}s")
1584
- cost = 0
1585
-
1586
- if use_wandb and wandb is not None:
1587
- wandb.log({
1588
- "task_id": tn,
1589
- "solver": sname if ok else "unsolved",
1590
- "onnx_bytes": sz if ok else 0,
1591
- "task_time_sec": t_task,
1592
- "cost": cost,
1593
- "score": score if ok else 0,
1594
- })
1595
-
1596
- return results, costs_dict, total_score
1597
-
1598
 
1599
  def main():
1600
- parser = argparse.ArgumentParser()
1601
- parser.add_argument('--data_dir', default='ARC-AGI/data/training/')
1602
- parser.add_argument('--arcgen_dir', default='', help='Path to ARC-GEN-100K/ directory')
1603
- parser.add_argument('--output_dir', default='submission')
1604
- parser.add_argument('--kaggle', action='store_true')
1605
- parser.add_argument('--conv_budget', type=float, default=30.0)
1606
- parser.add_argument('--tasks', type=str, default='')
1607
- parser.add_argument('--device', type=str, default='auto', choices=['auto','cpu','cuda'])
1608
- parser.add_argument('--use_wandb', action='store_true')
1609
  args = parser.parse_args()
1610
- global ORT_PROVIDERS
1611
- config = {
1612
- "device": args.device,
1613
- "conv_budget": args.conv_budget,
1614
- "data_dir": args.data_dir,
1615
- "arcgen_dir": args.arcgen_dir,
1616
- "tasks": args.tasks,
1617
- }
1618
-
1619
- if args.device == 'cuda':
1620
- ORT_PROVIDERS = ['CUDAExecutionProvider', 'CPUExecutionProvider']
1621
- elif args.device == 'cpu':
1622
- ORT_PROVIDERS = ['CPUExecutionProvider']
1623
-
1624
- ort.set_default_logger_severity(3)
1625
- print(f"Using providers: {ORT_PROVIDERS}")
1626
-
1627
- if args.kaggle:
1628
- tasks = load_tasks_kaggle(args.data_dir)
1629
- else:
1630
- arcgen = args.arcgen_dir if args.arcgen_dir else None
1631
- tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
1632
 
1633
- # Count arc-gen examples
1634
- total_arcgen = sum(len(t['data'].get('arc-gen', [])) for t in tasks.values())
1635
- print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
1636
- print(f"Excluded tasks: {sorted(EXCLUDED_TASKS)}")
1637
 
1638
- task_nums = [int(t) for t in args.tasks.split(',')] if args.tasks else sorted(tasks.keys())
1639
- active_tasks = [t for t in task_nums if t not in EXCLUDED_TASKS]
1640
- print(f"Solving {len(active_tasks)} active tasks (skipping {len(task_nums) - len(active_tasks)} excluded)")
1641
- print(f"Conv budget: {args.conv_budget}s per task")
1642
- print("=" * 70)
1643
- t0 = time.time()
1644
 
1645
- if args.use_wandb and wandb is not None:
1646
- with wandb.init(project="neurogolf", name="solver_run", config=config):
1647
- results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=True)
 
 
1648
  else:
1649
- results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=False)
1650
-
1651
- elapsed = time.time() - t0
1652
- print(f"\n{'='*70}")
1653
- print(f"Solved: {len(results)}/{len(active_tasks)} active tasks in {elapsed:.0f}s")
1654
- solver_names = [v[0] for v in results.values()]
1655
- sc = Counter(solver_names)
1656
- for s, c in sc.most_common(): print(f" {s}: {c}")
 
 
 
 
1657
 
1658
- # Generate submission
1659
- outdir = args.output_dir
1660
- n_files = len([f for f in os.listdir(outdir) if f.endswith('.onnx')])
1661
- total_size = sum(os.path.getsize(os.path.join(outdir, f))
1662
- for f in os.listdir(outdir) if f.endswith('.onnx'))
1663
 
1664
- # Create submission.zip
1665
- zip_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.zip')
1666
- buf = io.BytesIO()
1667
- with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
1668
- for f in sorted(os.listdir(outdir)):
1669
- if f.endswith('.onnx'):
1670
- zf.write(os.path.join(outdir, f), f)
1671
- zip_bytes = buf.getvalue()
1672
- with open(zip_path, 'wb') as f:
1673
- f.write(zip_bytes)
1674
- zip_size = len(zip_bytes)
1675
 
1676
- # Create submission.csv
1677
- csv_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1678
  with open(csv_path, 'w', newline='') as f:
1679
  w = csv.writer(f)
1680
- w.writerow(['task_id', 'total_cost'])
1681
- for tn in sorted(costs_dict.keys()):
1682
- w.writerow([f'task{tn:03d}', costs_dict[tn]])
1683
-
1684
- # Estimate LB score: solved tasks get their score, unsolved get 1.0
1685
- unsolved_count = len(active_tasks) - len(results)
1686
- est_lb = total_score + unsolved_count * 1.0
1687
-
1688
- print(f"\n{n_files} ONNX files, {total_size/1024:.1f} KB uncompressed")
1689
- print(f"ZIP size: {zip_size/1024:.1f} KB / {MAX_FILESIZE/1024:.0f} KB limit {'OK' if zip_size <= MAX_FILESIZE else 'OVER!'}")
1690
- print(f"Estimated LB score: {est_lb:.1f} (solved: {total_score:.1f} + unsolved: {unsolved_count}×1.0)")
1691
- print(f"Written: {zip_path} | {csv_path}")
 
 
 
1692
 
1693
  if __name__ == '__main__':
1694
- main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ ARC-AGI NeuroGolf Championship - Complete Solver v5
4
+ Format: [1,10,30,30] one-hot input/output, opset 17, IR version 8.
5
+
6
+ v5 CHANGES (from v4):
7
+ - Opset 10 17, IR 10 8
8
+ - s_flip: Slice(step=-1) replaces Gather 0 MACs (was ~165K)
9
+ - s_rotate k=2: double Slice(step=-1) 0 MACs (was ~165K)
10
+ - s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
11
+ - All Pad nodes: tensor-based pads input (opset 17 requirement)
12
+ - All other solvers unchanged from v4
 
13
 
14
  Solvers:
15
  - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
 
45
  BATCH, CH, GH, GW = 1, 10, 30, 30
46
  GRID_SHAPE = [BATCH, CH, GH, GW]
47
  DT = TensorProto.FLOAT
48
+ IR = 8
49
+ OPSET = [helper.make_opsetid("", 17)]
50
+
51
+ INT64_MIN = int(np.iinfo(np.int64).min)
52
 
53
  # Officially excluded tasks (score 0 regardless)
54
  EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
 
56
  # Max ARC-GEN examples to use for validation (to keep runtime reasonable)
57
  MAX_ARCGEN_VALIDATE = 30
58
  # Max ARC-GEN examples for conv fitting (keep separate from validation!)
59
+ MAX_ARCGEN_FIT = 0
 
 
60
 
61
  def get_providers():
62
  return ['CPUExecutionProvider']
 
75
  with open(os.path.join(data_dir, f)) as fh:
76
  data = json.load(fh)
77
  hex_id = f.replace('.json','')
 
78
  if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
79
  with open(os.path.join(arcgen_dir, f)) as fh:
80
  arcgen_examples = json.load(fh)
 
107
  return arr
108
 
109
  def validate(path, td):
110
+ """Validate model against ALL examples: train + test + arc-gen."""
 
111
  try:
112
  opts = ort.SessionOptions()
113
  opts.log_severity_level = 3
 
115
  except:
116
  return False
117
  examples = td['train'] + td['test']
 
118
  if 'arc-gen' in td:
119
  examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
120
  for ex in examples:
 
160
  MAX_FILESIZE = int(1.44 * 1024 * 1024)
161
 
162
  def score_network(path):
163
+ """Static profiler matching Kaggle scoring: cost = macs + memory + params."""
 
164
  if HAS_ONNX_TOOL:
165
  try:
166
  return _score_network_official(path)
 
210
 
211
  return int(macs), int(nbytes), int(params)
212
 
213
+ # ============================================================
214
+ # OPSET 17 HELPERS
215
+ # ============================================================
216
+
217
+ def _make_int64_init(name, values):
218
+ """Create an int64 tensor initializer from a list of values."""
219
+ return numpy_helper.from_array(np.array(values, dtype=np.int64), name)
220
+
221
+ def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
222
+ """Build a Pad node with tensor-based pads input (opset 17).
223
+ Pads [0,0,0,0, 0,0,pad_h,pad_w] — only spatial end-padding."""
224
+ pads_name = f'pads{suffix}'
225
+ cv_name = f'pad_cv{suffix}'
226
+ pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
227
+ inits.append(numpy_helper.from_array(pads_arr, pads_name))
228
+ inits.append(numpy_helper.from_array(np.array(0.0, dtype=np.float32), cv_name))
229
+ return helper.make_node('Pad', [input_name, pads_name, cv_name], [output_name], mode='constant')
230
+
231
+ def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
232
+ """Build Slice node to crop [1,10,30,30] to [1,10,IH,IW]."""
233
+ st_name = f'crop_st{suffix}'
234
+ en_name = f'crop_en{suffix}'
235
+ inits.append(_make_int64_init(st_name, [0, 0, 0, 0]))
236
+ inits.append(_make_int64_init(en_name, [1, 10, IH, IW]))
237
+ return helper.make_node('Slice', [input_name, st_name, en_name], [output_name])
238
+
239
+ def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=''):
240
+ """Build Slice(step=-1) to reverse one axis. Zero MACs."""
241
+ st_name = f'rev_st{suffix}'
242
+ en_name = f'rev_en{suffix}'
243
+ ax_name = f'rev_ax{suffix}'
244
+ sp_name = f'rev_sp{suffix}'
245
+ inits.append(_make_int64_init(st_name, [dim_size - 1]))
246
+ inits.append(_make_int64_init(en_name, [INT64_MIN]))
247
+ inits.append(_make_int64_init(ax_name, [axis]))
248
+ inits.append(_make_int64_init(sp_name, [-1]))
249
+ return helper.make_node('Slice', [input_name, st_name, en_name, ax_name, sp_name], [output_name])
250
+
251
  def mk(nodes, inits=None):
252
  x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
253
  y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
 
260
  for ex in td['train'] + td['test']]
261
 
262
  def get_exs_for_fitting(td):
263
+ """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH."""
 
 
 
264
  base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
265
  for ex in td['train'] + td['test']]
266
 
267
  if not base_exs:
268
  return base_exs
269
 
 
270
  base_shapes = {inp.shape for inp, _ in base_exs}
271
  if len(base_shapes) != 1:
272
+ return base_exs
273
 
274
  base_shape = list(base_shapes)[0]
275
 
 
276
  ag_exs = []
277
  for ex in td.get('arc-gen', []):
278
  inp = np.array(ex['input'], dtype=np.int64)
 
280
  if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
281
  ag_exs.append((inp, out))
282
 
 
283
  return base_exs + ag_exs[:10]
284
 
285
  def get_exs_for_fitting_variable(td):
286
+ """Get examples for variable-shape conv fitting."""
 
 
287
  base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
288
  for ex in td['train'] + td['test']]
289
 
 
290
  ag_exs = []
291
  for ex in td.get('arc-gen', []):
292
  inp = np.array(ex['input'], dtype=np.int64)
 
303
  return list(shapes)[0] if len(shapes) == 1 else None
304
 
305
  # ============================================================
306
+ # GATHER HELPERS (kept for solvers that need them)
307
  # ============================================================
308
 
309
  def _build_gather_model(OH, OW, idx):
310
+ """Gather-based spatial remapping. Used for concat, spatial_gather, etc."""
 
311
  flat_idx = np.zeros((GH*GW,), dtype=np.int64)
312
  mask = np.zeros((1,1,GH,GW), dtype=np.float32)
313
  for oi in range(OH):
 
329
  return mk(nodes, inits)
330
 
331
  def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
332
+ """Gather-based spatial remapping with constant pixels."""
333
  flat_idx = np.zeros((GH*GW,), dtype=np.int64)
334
  gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
335
  const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
 
379
  if iv in cm and cm[iv] != ov: return None
380
  cm[iv] = ov
381
 
 
382
  is_permutation = (set(cm.keys()) == set(cm.values()))
383
 
384
  if is_permutation:
 
385
  gather_ch = np.arange(10, dtype=np.int32)
386
  for src, dst in cm.items():
387
  if 0 <= src < 10 and 0 <= dst < 10:
 
390
  nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
391
  return mk(nodes, inits)
392
  else:
 
393
  W = np.zeros((10,10,1,1), dtype=np.float32)
394
  for ic in range(10):
395
  W[cm.get(ic,ic), ic, 0, 0] = 1.0
 
397
  [numpy_helper.from_array(W, 'W')])
398
 
399
  def s_transpose(td):
400
+ """Transpose spatial dimensions. Already near-zero cost with Transpose node."""
401
  for ex in td['train']+td['test']:
402
  if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
403
  return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
404
 
405
  def s_flip(td):
406
+ """Flip using Slice(step=-1) — zero MACs, replaces old Gather approach."""
407
  exs = get_exs(td)
408
  sp = fixed_shapes(td)
409
  if sp is None: return None
410
  (IH,IW),(OH,OW) = sp
411
  if (IH,IW) != (OH,OW): return None
412
+
413
  for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
414
  if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
415
+ # axis 0 = flipud = reverse dim 2 (H)
416
+ # axis 1 = fliplr = reverse dim 3 (W)
417
+ onnx_axis = 2 if axis == 0 else 3
418
+ dim_size = IH if axis == 0 else IW
419
+ pad_h, pad_w = GH - IH, GW - IW
420
+
421
+ inits = []
422
+ nodes = []
423
+
424
+ # Step 1: Crop input to [1,10,IH,IW]
425
+ nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
426
+
427
+ # Step 2: Reverse the target axis
428
+ nodes.append(_build_slice_reverse('cropped', 'flipped', onnx_axis, dim_size, inits))
429
+
430
+ # Step 3: Pad back to [1,10,30,30]
431
+ nodes.append(_build_pad_node('flipped', 'output', pad_h, pad_w, inits))
432
+
433
+ return mk(nodes, inits)
434
  return None
435
 
436
  def s_rotate(td):
437
+ """Rotate using Slice+Transpose combos — zero MACs for square grids and k=2.
438
+ Falls back to Gather for non-square k=1,3 rotations."""
439
  exs = get_exs(td)
440
  sp = fixed_shapes(td)
441
  if sp is None: return None
442
  (IH,IW),(OH,OW) = sp
443
+
444
  for k in [1, 2, 3]:
445
+ if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
446
+ continue
447
+
448
+ if k == 2:
449
+ # 180° = flipud + fliplr works for any shape
450
+ # output[r,c] = input[IH-1-r, IW-1-c]
451
+ pad_h, pad_w = GH - OH, GW - OW
452
+ inits = []
453
+ nodes = []
454
+
455
+ # Crop to [1,10,IH,IW]
456
+ nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
457
+ # Reverse axis 2 (H)
458
+ nodes.append(_build_slice_reverse('cropped', 'flip_h', 2, IH, inits, suffix='_h'))
459
+ # Reverse axis 3 (W)
460
+ nodes.append(_build_slice_reverse('flip_h', 'rotated', 3, IW, inits, suffix='_w'))
461
+ # Pad back
462
+ nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
463
+
464
+ return mk(nodes, inits)
465
+
466
+ elif k == 1 and IH == IW:
467
+ # rot90 CCW on square grid: transpose then flipud
468
+ # output[r,c] = input[c, IH-1-r]
469
+ # Step 1: Transpose [0,1,3,2]: temp[r,c] = input[c,r]
470
+ # Step 2: Reverse axis 2: out[r,c] = temp[IH-1-r,c] = input[c,IH-1-r] ✓
471
+ pad_h, pad_w = GH - IH, GW - IW
472
+ inits = []
473
+ nodes = []
474
+
475
+ nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
476
+ nodes.append(helper.make_node('Transpose', ['cropped'], ['transposed'], perm=[0,1,3,2]))
477
+ nodes.append(_build_slice_reverse('transposed', 'rotated', 2, IH, inits))
478
+ nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
479
+
480
+ return mk(nodes, inits)
481
+
482
+ elif k == 3 and IH == IW:
483
+ # rot270 CCW (= 90 CW) on square grid: flipud then transpose
484
+ # output[r,c] = input[IW-1-c, r]
485
+ # Step 1: Reverse axis 2: temp[r,c] = input[IH-1-r,c]
486
+ # Step 2: Transpose [0,1,3,2]: out[r,c] = temp[c,r] = input[IH-1-c,r] ✓ (IH=IW)
487
+ pad_h, pad_w = GH - IH, GW - IW
488
+ inits = []
489
+ nodes = []
490
+
491
+ nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
492
+ nodes.append(_build_slice_reverse('cropped', 'flipped', 2, IH, inits))
493
+ nodes.append(helper.make_node('Transpose', ['flipped'], ['rotated'], perm=[0,1,3,2]))
494
+ nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
495
+
496
+ return mk(nodes, inits)
497
+
498
+ else:
499
+ # Non-square k=1 or k=3: fall back to Gather (still correct, just higher cost)
500
+ idx = np.zeros((OH,OW,2), dtype=np.int64)
501
+ for r in range(OH):
502
+ for c in range(OW):
503
+ if k == 1: sr, sc = c, IH-1-r
504
+ elif k == 3: sr, sc = IW-1-c, r
505
+ idx[r,c] = [sr, sc]
506
+ return _build_gather_model(OH, OW, idx)
507
  return None
508
 
509
  def s_spatial_gather(td):
 
529
  def s_varshape_spatial_gather(td):
530
  """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
531
  sp = fixed_shapes(td)
532
+ if sp is not None: return None
533
  exs = get_exs(td)
534
 
 
535
  exs_30 = []
536
  for inp, out in exs:
537
  ih, iw = inp.shape
 
583
  if not np.array_equal(out, np.tile(inp, (rH, rW))): return None
584
  pad_h, pad_w = 30-OH, 30-OW
585
  inits = [
586
+ _make_int64_init('st', [0,0,0,0]),
587
+ _make_int64_init('en', [1,10,IH,IW]),
588
+ _make_int64_init('rp', [1,1,rH,rW]),
589
  ]
590
  nodes = [
591
  helper.make_node('Slice', ['input','st','en'], ['cr']),
592
  helper.make_node('Tile', ['cr','rp'], ['tl']),
 
593
  ]
594
+ nodes.append(_build_pad_node('tl', 'output', pad_h, pad_w, inits))
595
  return mk(nodes, inits)
596
 
597
  def s_upscale(td):
 
672
  (IH,IW),(OH,OW) = sp
673
  if IH == OH and IW == OW: return None
674
 
 
675
  if OH % IH != 0 or OW % IW != 0: return None
676
  rH, rW = OH // IH, OW // IW
677
  if rH * rW > 16 or rH * rW < 2: return None
678
  if OH > 30 or OW > 30: return None
679
 
 
680
  transforms = [
681
  ('id', lambda x: x),
682
  ('fliplr', lambda x: np.fliplr(x)),
 
688
  ('T_fliplr', lambda x: np.fliplr(x.T)),
689
  ]
690
 
 
691
  block_transforms = {}
692
  for bi in range(rH):
693
  for bj in range(rW):
 
707
  return None
708
  block_transforms[(bi, bj)] = found
709
 
 
710
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
711
  for bi in range(rH):
712
  for bj in range(rW):
 
724
  elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
725
  idx[oi, oj] = [sr, sc]
726
 
 
727
  for inp, out in exs:
728
  reconstructed = np.zeros_like(out)
729
  for oi in range(OH):
 
755
  else:
756
  if not np.all(block == 0):
757
  return None
 
 
 
 
 
 
 
 
 
758
  return None
759
 
760
  def s_kronecker(td):
 
773
  if not np.array_equal(out, expected):
774
  return None
775
 
 
776
  idx = np.zeros((OH,OW,2), dtype=np.int64)
777
  for r in range(OH):
778
  for c in range(OW):
 
801
  if not np.all(block == 0):
802
  return None
803
 
 
804
  idx = np.zeros((OH,OW,2), dtype=np.int64)
805
  cst = np.full((OH,OW), -1, dtype=np.int64)
806
  for bi in range(rH):
 
837
  if not np.array_equal(shifted, out):
838
  ok = False; break
839
  if not ok: continue
 
840
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
841
+ cst = np.full((OH, OW), 0, dtype=np.int64)
842
  for r in range(OH):
843
  for c in range(OW):
844
  sr, sc = r - dr, c - dc
 
873
 
874
  for d in ('down', 'up', 'left', 'right'):
875
  if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
 
 
 
 
876
  return None
877
  return None
878
 
 
887
  for inp, out in exs:
888
  expected = np.concatenate([inp, np.flip(inp, 1)], 1)
889
  if not np.array_equal(expected, out): return None
 
890
  idx = np.zeros((OH, OW, 2), dtype=np.int64)
891
  for r in range(OH):
892
  for c in range(OW):
 
1050
  if len(shapes) != 1: return None
1051
  IH, IW = shapes.pop()
1052
 
 
1053
  fit_exs = get_exs_for_fitting(td)
 
1054
  fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
1055
 
1056
  t_start = time.time()
 
1064
  pad_h, pad_w = GH - IH, GW - IW
1065
 
1066
  inits = [
1067
+ _make_int64_init('sl_st', [0,0,0,0]),
1068
+ _make_int64_init('sl_en', [1,10,IH,IW]),
1069
  numpy_helper.from_array(Wconv, 'W'),
1070
  ]
1071
  conv_inputs = ['grid', 'W']
 
1079
  helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1080
  ]
1081
  add_onehot_block(nodes, inits, 'am', 'oh_out')
1082
+ nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
 
 
 
1083
 
1084
  model = mk(nodes, inits)
1085
  onnx.save(model, path)
 
1092
  for inp, out in exs:
1093
  if inp.shape != out.shape: return None
1094
 
 
1095
  fit_exs = get_exs_for_fitting_variable(td)
1096
  fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
1097
 
 
1182
 
1183
  pad_h, pad_w = GH - OH, GW - OW
1184
  inits = [
1185
+ _make_int64_init('sl_st', [0,0,0,0]),
1186
+ _make_int64_init('sl_en', [1,10,IH,IW]),
1187
  numpy_helper.from_array(Wconv, 'W'),
1188
+ _make_int64_init('cr_st', [0,0,dr_off,dc_off]),
1189
+ _make_int64_init('cr_en', [1,10,dr_off+OH,dc_off+OW]),
1190
  ]
1191
  conv_inputs = ['grid', 'W']
1192
  if B is not None:
 
1200
  helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
1201
  ]
1202
  add_onehot_block(nodes, inits, 'am', 'oh_out')
1203
+ nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
 
 
 
1204
 
1205
  model = mk(nodes, inits)
1206
  onnx.save(model, path)
 
1208
  return None
1209
 
1210
  def solve_conv_var_diff(td, path, time_budget=30.0):
1211
+ """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask)."""
 
1212
  exs = get_exs(td)
1213
 
1214
  t_start = time.time()
 
1256
  Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
1257
  B = None
1258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1259
  # For tasks where output fits within input bounds, use input mask
1260
  all_output_within_input = all(
1261
+ out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
1262
  for inp_g, out_g in exs
1263
  )
1264
 
1265
+ if all_output_within_input:
1266
+ inits = [numpy_helper.from_array(Wconv, 'W')]
1267
+ conv_inputs = ['input', 'W']
1268
+ if B is not None:
1269
+ inits.append(numpy_helper.from_array(B, 'B'))
1270
+ conv_inputs.append('B')
1271
+
1272
+ nodes = [
1273
+ helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
1274
+ helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
1275
+ helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
1276
+ ]
1277
+ add_onehot_block(nodes, inits, 'am', 'oh_out')
1278
+ nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
1279
+
1280
+ model = mk(nodes, inits)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1281
  onnx.save(model, path)
1282
+ if validate(path, td): return 'conv_var_diff', model
 
 
 
 
 
 
 
 
1283
  return None
1284
 
1285
  # ============================================================
1286
+ # MAIN SOLVER
1287
  # ============================================================
1288
 
1289
  ANALYTICAL_SOLVERS = [
1290
+ ('identity', s_identity),
1291
+ ('constant', s_constant),
1292
+ ('color_map', s_color_map),
1293
+ ('transpose', s_transpose),
1294
+ ('flip', s_flip),
1295
+ ('rotate', s_rotate),
1296
+ ('shift', s_shift),
1297
+ ('tile', s_tile),
1298
+ ('upscale', s_upscale),
1299
+ ('kronecker', s_kronecker),
1300
  ('nonuniform_scale', s_nonuniform_scale),
1301
+ ('mirror_h', s_mirror_h),
1302
+ ('mirror_v', s_mirror_v),
1303
+ ('quad_mirror', s_quad_mirror),
1304
+ ('concat', s_concat),
1305
+ ('concat_enhanced', s_concat_enhanced),
1306
  ('diagonal_tile', s_diagonal_tile),
1307
  ('fixed_crop', s_fixed_crop),
1308
  ('spatial_gather', s_spatial_gather),
 
1309
  ('varshape_spatial_gather', s_varshape_spatial_gather),
1310
  ]
1311
 
1312
+ def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
1313
+ """Try all solvers on a task. Returns (solver_name, score) or None."""
1314
+ path = os.path.join(output_dir, f"task{tn:03d}.onnx")
 
 
 
 
 
1315
 
1316
+ # Try analytical solvers first (instant, arc-gen safe)
1317
+ for name, solver in ANALYTICAL_SOLVERS:
1318
  try:
1319
+ model = solver(td)
1320
+ except Exception as e:
1321
+ if verbose: print(f" {name}: ERROR {e}")
1322
+ continue
1323
+ if model is not None:
1324
  onnx.save(model, path)
1325
+ if validate(path, td):
1326
+ macs, mem, par = score_network(path)
1327
+ if macs is not None:
1328
+ cost = macs + mem + par
1329
+ score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
1330
+ if verbose: print(f" {name}: PASS cost={cost} score={score:.2f}")
1331
+ return name, score
1332
+ else:
1333
+ if verbose: print(f" {name}: model built but FAILED validation")
1334
+
1335
+ # Try conv solvers
1336
+ conv_solvers = [
1337
+ ('conv_fixed', solve_conv_fixed),
1338
+ ('conv_variable', solve_conv_variable),
1339
+ ('conv_diffshape', solve_conv_diffshape),
1340
+ ('conv_var_diff', solve_conv_var_diff),
1341
+ ]
1342
+ for name, solver in conv_solvers:
1343
+ try:
1344
+ result = solver(td, path, time_budget=conv_budget)
1345
+ except Exception as e:
1346
+ if verbose: print(f" {name}: ERROR {e}")
1347
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1348
  if result is not None:
1349
+ solver_type, model = result
1350
+ onnx.save(model, path)
1351
+ macs, mem, par = score_network(path)
1352
+ if macs is not None:
1353
+ cost = macs + mem + par
1354
+ score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
1355
+ if verbose: print(f" {solver_type}: PASS cost={cost} score={score:.2f}")
1356
+ return solver_type, score
1357
 
1358
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1359
 
1360
  def main():
1361
+ parser = argparse.ArgumentParser(description='NeuroGolf Solver v5')
1362
+ parser.add_argument('--data_dir', type=str, default=None, help='Path to ARC-AGI training data')
1363
+ parser.add_argument('--kaggle_dir', type=str, default=None, help='Path to Kaggle task JSONs')
1364
+ parser.add_argument('--arcgen_dir', type=str, default=None, help='Path to ARC-GEN data directory')
1365
+ parser.add_argument('--output_dir', type=str, default='submission', help='Output directory for ONNX models')
1366
+ parser.add_argument('--conv_budget', type=float, default=30.0, help='Time budget per conv solver per task (seconds)')
1367
+ parser.add_argument('--task', type=int, default=None, help='Solve a single task number')
1368
+ parser.add_argument('--verbose', action='store_true', default=True)
1369
+ parser.add_argument('--quiet', action='store_true', default=False)
1370
  args = parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1371
 
1372
+ if args.quiet:
1373
+ args.verbose = False
 
 
1374
 
1375
+ os.makedirs(args.output_dir, exist_ok=True)
 
 
 
 
 
1376
 
1377
+ # Load tasks
1378
+ if args.kaggle_dir:
1379
+ tasks = load_tasks_kaggle(args.kaggle_dir)
1380
+ elif args.data_dir:
1381
+ tasks = load_tasks_dir(args.data_dir, args.arcgen_dir)
1382
  else:
1383
+ # Try common paths
1384
+ for p in ['/kaggle/input/competitions/neurogolf-2026/',
1385
+ 'ARC-AGI/data/training/']:
1386
+ if os.path.exists(p):
1387
+ if 'kaggle' in p:
1388
+ tasks = load_tasks_kaggle(p)
1389
+ else:
1390
+ tasks = load_tasks_dir(p, args.arcgen_dir)
1391
+ break
1392
+ else:
1393
+ print("ERROR: No data directory found. Use --data_dir or --kaggle_dir")
1394
+ sys.exit(1)
1395
 
1396
+ # Solve tasks
1397
+ results = {}
1398
+ total_score = 0.0
1399
+ solved = 0
1400
+ t_total = time.time()
1401
 
1402
+ task_nums = [args.task] if args.task else sorted(tasks.keys())
 
 
 
 
 
 
 
 
 
 
1403
 
1404
+ for tn in task_nums:
1405
+ if tn in EXCLUDED_TASKS:
1406
+ if args.verbose: print(f"Task {tn:3d}: EXCLUDED")
1407
+ continue
1408
+ if tn not in tasks:
1409
+ if args.verbose: print(f"Task {tn:3d}: NOT FOUND")
1410
+ continue
1411
+
1412
+ td = tasks[tn]['data']
1413
+ hex_id = tasks[tn]['hex']
1414
+
1415
+ if args.verbose: print(f"\nTask {tn:3d} ({hex_id}):")
1416
+
1417
+ result = solve_task(tn, td, args.output_dir, args.conv_budget, args.verbose)
1418
+
1419
+ if result is not None:
1420
+ solver_type, score = result
1421
+ results[tn] = {'solver': solver_type, 'score': score, 'hex': hex_id}
1422
+ total_score += score
1423
+ solved += 1
1424
+ else:
1425
+ # Unsolved tasks score 1.0 (minimum)
1426
+ total_score += 1.0
1427
+ if args.verbose: print(f" UNSOLVED")
1428
+
1429
+ # Summary
1430
+ elapsed = time.time() - t_total
1431
+ print(f"\n{'='*60}")
1432
+ print(f"RESULTS: {solved}/{len(task_nums)} tasks solved")
1433
+ print(f"Total score: {total_score:.1f}")
1434
+ print(f"Time: {elapsed:.1f}s")
1435
+ print(f"{'='*60}")
1436
+
1437
+ # Breakdown by solver type
1438
+ solver_counts = Counter(r['solver'] for r in results.values())
1439
+ solver_scores = {}
1440
+ for tn, r in results.items():
1441
+ st = r['solver']
1442
+ solver_scores[st] = solver_scores.get(st, 0) + r['score']
1443
+
1444
+ print("\nSolver breakdown:")
1445
+ for st in sorted(solver_counts.keys()):
1446
+ print(f" {st}: {solver_counts[st]} tasks, total score {solver_scores[st]:.1f}, avg {solver_scores[st]/solver_counts[st]:.2f}")
1447
+
1448
+ # Generate submission.csv
1449
+ csv_path = os.path.join(args.output_dir, 'submission.csv')
1450
  with open(csv_path, 'w', newline='') as f:
1451
  w = csv.writer(f)
1452
+ w.writerow(['task_num', 'hex_id', 'solver', 'score', 'onnx_file'])
1453
+ for tn in sorted(results.keys()):
1454
+ r = results[tn]
1455
+ w.writerow([tn, r['hex'], r['solver'], f"{r['score']:.3f}", f"task{tn:03d}.onnx"])
1456
+
1457
+ # Generate submission.zip
1458
+ zip_path = os.path.join(args.output_dir, 'submission.zip')
1459
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
1460
+ for tn in sorted(results.keys()):
1461
+ onnx_path = os.path.join(args.output_dir, f"task{tn:03d}.onnx")
1462
+ if os.path.exists(onnx_path):
1463
+ zf.write(onnx_path, f"task{tn:03d}.onnx")
1464
+
1465
+ print(f"\nSubmission files: {csv_path}, {zip_path}")
1466
+ print(f"Models in zip: {len(results)}")
1467
 
1468
  if __name__ == '__main__':
1469
+ main()