rogermt commited on
Commit
6303972
·
verified ·
1 Parent(s): 5be5315

v3: 306/400 - Fix GatherElements->Gather (opset 10), add concat_enhanced, varshape_spatial_gather, conv_var_diff solvers

Browse files
Files changed (1) hide show
  1. neurogolf_solver.py +447 -165
neurogolf_solver.py CHANGED
@@ -1,13 +1,15 @@
1
  #!/usr/bin/env python3
2
  """
3
- ARC-AGI NeuroGolf Championship - Complete Solver v2
4
  Format: [1,10,30,30] one-hot input/output, opset 10, IR version 10.
5
  Solvers:
6
- - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale, concat, spatial_gather
7
- - Conv (fixed shape): Slice -> Conv -> ArgMax -> OneHot -> Pad
8
- - Conv (variable shape): Conv(30x30) -> ArgMax -> OneHot -> Mul(mask) [NEW]
9
- - Conv (diff shape): Slice -> Conv -> Slice(crop) -> ArgMax -> OneHot -> Pad [NEW]
10
- Results: 293/400 tasks solved (was 128/400 in v1)
 
 
11
  Usage:
12
  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
13
  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --conv_budget 60
@@ -19,9 +21,17 @@ import onnx
19
  from onnx import helper, TensorProto, numpy_helper
20
  import onnxruntime as ort
21
  from collections import Counter
22
- import wandb
23
 
24
- from neurogolf_utils import score_network
 
 
 
 
 
 
 
 
 
25
 
26
  BATCH, CH, GH, GW = 1, 10, 30, 30
27
  GRID_SHAPE = [BATCH, CH, GH, GW]
@@ -30,10 +40,14 @@ IR = 10
30
  OPSET = [helper.make_opsetid("", 10)]
31
 
32
  def get_providers():
33
- return ['CPUExecutionProvider'] # CPU is faster for tiny 30x30 grids
34
 
35
  ORT_PROVIDERS = get_providers()
36
 
 
 
 
 
37
  def load_tasks_dir(data_dir):
38
  files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
39
  tasks = {}
@@ -94,6 +108,65 @@ def fixed_shapes(td):
94
  shapes.add((inp.shape, out.shape))
95
  return list(shapes)[0] if len(shapes) == 1 else None
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # ============================================================
98
  # ANALYTICAL SOLVERS
99
  # ============================================================
@@ -179,6 +252,44 @@ def s_spatial_gather(td):
179
  if not found and cst[oi,oj] < 0: return None
180
  return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def s_tile(td):
183
  exs = get_exs(td)
184
  in_shapes = set(inp.shape for inp,_ in exs)
@@ -280,6 +391,174 @@ def s_concat(td):
280
  return _build_gather_model(OH, OW, idx)
281
  return None
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  def s_constant(td):
284
  sp = fixed_shapes(td)
285
  if sp is None: return None
@@ -298,22 +577,16 @@ def s_constant(td):
298
  return mk(nodes, inits)
299
 
300
  # ============================================================
301
- # CONV SOLVER (fixed shape) - Slice -> Conv -> ArgMax -> OneHot -> Pad
302
  # ============================================================
303
 
304
  def add_onehot_block(nodes, inits, am_name, oh_name):
305
- """
306
- Replace OneHot with CUDA-friendly Equal + Cast.
307
-
308
- am_name: name of ArgMax output tensor, shape [1,1,H,W]
309
- oh_name: desired float one-hot output name, shape [1,10,H,W]
310
- """
311
  classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
312
  inits.append(numpy_helper.from_array(classes, 'classes'))
313
  nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
314
  nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
315
 
316
-
317
  def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
318
  """Shared lstsq conv fitting. Returns (Wconv, B) or None."""
319
  pad = ks // 2
@@ -391,49 +664,19 @@ def solve_conv_fixed(td, path, time_budget=30.0):
391
  nodes = [
392
  helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
393
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
394
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1), # [1,1,H,W]
395
  ]
396
-
397
- # One-hot via Equal + Cast
398
  add_onehot_block(nodes, inits, 'am', 'oh_out')
399
-
400
  nodes.append(
401
- helper.make_node(
402
- 'Pad', ['oh_out'], ['output'],
403
- pads=[0,0,0,0,0,0,pad_h,pad_w],
404
- value=0.0
405
- )
406
  )
407
 
408
  model = mk(nodes, inits)
409
  onnx.save(model, path)
410
- if validate(path, td): return model
411
  return None
412
 
413
- # ============================================================
414
- # CONV SOLVER (variable shape) - Conv(30x30) -> ArgMax -> OneHot -> Mul(mask)
415
- # ============================================================
416
-
417
- def _add_onehot_equal_cast(nodes, inits, am_name, oh_name):
418
- """
419
- Replace OneHot with CUDA-friendly Equal + Cast.
420
- am_name: name of ArgMax output tensor (shape [1,1,H,W] or [1,1,OH,OW])
421
- oh_name: desired one-hot output name (shape [1,10,H,W] or [1,10,OH,OW])
422
- """
423
- inits.append(
424
- numpy_helper.from_array(
425
- np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1),
426
- 'classes'
427
- )
428
- )
429
- nodes.append(
430
- helper.make_node('Equal', [am_name, 'classes'], ['eq'])
431
- )
432
- nodes.append(
433
- helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT)
434
- )
435
-
436
-
437
  def solve_conv_variable(td, path, time_budget=30.0):
438
  """Variable-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)."""
439
  exs = get_exs(td)
@@ -449,9 +692,7 @@ def solve_conv_variable(td, path, time_budget=30.0):
449
  Wconv, B = result
450
  pad = ks // 2
451
 
452
- inits = [
453
- numpy_helper.from_array(Wconv, 'W'),
454
- ]
455
  conv_inputs = ['input', 'W']
456
  if B is not None:
457
  inits.append(numpy_helper.from_array(B, 'B'))
@@ -460,26 +701,16 @@ def solve_conv_variable(td, path, time_budget=30.0):
460
  nodes = [
461
  helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
462
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
463
- helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1), # [1,1,H,W]
464
  ]
465
-
466
- # One-hot via Equal + Cast
467
  add_onehot_block(nodes, inits, 'am', 'oh_out')
468
-
469
- nodes.append(
470
- helper.make_node('Mul', ['oh_out', 'mask'], ['output'])
471
- )
472
 
473
  model = mk(nodes, inits)
474
  onnx.save(model, path)
475
- if validate(path, td): return model
476
  return None
477
 
478
-
479
- # ============================================================
480
- # CONV SOLVER (diff shape, fixed) - output smaller than input
481
- # ============================================================
482
-
483
  def solve_conv_diffshape(td, path, time_budget=30.0):
484
  """Diff-shape conv for fixed io shapes where output is smaller."""
485
  sp = fixed_shapes(td)
@@ -554,81 +785,129 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
554
  helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
555
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
556
  helper.make_node('Slice', ['co','cr_st','cr_en'], ['co_crop']),
557
- helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1), # [1,1,OH,OW]
558
  ]
559
-
560
- # One-hot via Equal + Cast
561
  add_onehot_block(nodes, inits, 'am', 'oh_out')
562
-
563
  nodes.append(
564
- helper.make_node(
565
- 'Pad', ['oh_out'], ['output'],
566
- pads=[0,0,0,0,0,0,pad_h,pad_w],
567
- value=0.0
568
- )
569
  )
570
 
571
  model = mk(nodes, inits)
572
  onnx.save(model, path)
573
- if validate(path, td): return model
574
  return None
575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
 
577
- # ============================================================
578
- # GATHER HELPERS
579
- # ============================================================
580
-
581
- def _build_gather_model(OH, OW, idx):
582
- flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
583
- mask = np.zeros((1,1,GH,GW), dtype=np.float32)
584
- for oi in range(OH):
585
- for oj in range(OW):
586
- flat_idx[0,:,oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
587
- mask[0,0,oi,oj] = 1.0
588
- inits = [
589
- numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
590
- numpy_helper.from_array(flat_idx, 'idx'),
591
- numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
592
- numpy_helper.from_array(mask, 'mask'),
593
- ]
594
- nodes = [
595
- helper.make_node('Reshape', ['input','fs'], ['flat']),
596
- helper.make_node('GatherElements', ['flat','idx'], ['g'], axis=2),
597
- helper.make_node('Reshape', ['g','os'], ['raw']),
598
- helper.make_node('Mul', ['raw','mask'], ['output']),
599
- ]
600
- return mk(nodes, inits)
601
 
602
- def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
603
- flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
604
- gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
605
- const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
606
- for oi in range(OH):
607
- for oj in range(OW):
608
- if idx[oi,oj,0] >= 0:
609
- flat_idx[0,:,oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
610
- gather_mask[0,0,oi,oj] = 1.0
611
- elif cst[oi,oj] >= 0:
612
- const_oh[0, cst[oi,oj], oi, oj] = 1.0
613
- has_const = np.any(const_oh > 0)
614
- inits = [
615
- numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
616
- numpy_helper.from_array(flat_idx, 'idx'),
617
- numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
618
- numpy_helper.from_array(gather_mask, 'gmask'),
619
- ]
620
- nodes = [
621
- helper.make_node('Reshape', ['input','fs'], ['flat']),
622
- helper.make_node('GatherElements', ['flat','idx'], ['g'], axis=2),
623
- helper.make_node('Reshape', ['g','os'], ['raw']),
624
- helper.make_node('Mul', ['raw','gmask'], ['masked']),
625
- ]
626
- if has_const:
627
- inits.append(numpy_helper.from_array(const_oh, 'cst'))
628
- nodes.append(helper.make_node('Add', ['masked','cst'], ['output']))
629
- else:
630
- nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
631
- return mk(nodes, inits)
632
 
633
  # ============================================================
634
  # MAIN
@@ -637,8 +916,11 @@ def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
637
  ANALYTICAL_SOLVERS = [
638
  ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
639
  ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
640
- ('tile', s_tile), ('upscale', s_upscale), ('concat', s_concat),
 
 
641
  ('spatial_gather', s_spatial_gather),
 
642
  ]
643
 
644
  def solve_task(tn, td, outdir, conv_budget=30.0):
@@ -652,32 +934,43 @@ def solve_task(tn, td, outdir, conv_budget=30.0):
652
  model = sfn(td)
653
  if model is None: continue
654
  onnx.save(model, path)
655
- if validate(path, td): return True, sname, os.path.getsize(path), time.time() - t_start, path
 
656
  except: pass
657
 
658
- # 2. Determine task shape category
659
  exs = get_exs(td)
660
  same_shape = all(inp.shape == out.shape for inp, out in exs)
661
  shapes = set(inp.shape for inp, _ in exs)
662
  fixed_in = len(shapes) == 1
663
 
 
 
664
  if same_shape:
665
  if fixed_in:
666
- # Fixed same-shape: use original conv (Slice->Conv->Pad)
667
- model = solve_conv_fixed(td, path, time_budget=conv_budget)
668
- if model is not None: return True, sname, os.path.getsize(path), time.time() - t_start, path
669
- # Always try variable-shape conv for same-shape tasks
670
- model = solve_conv_variable(td, path, time_budget=conv_budget)
671
- if model is not None: return True, sname, os.path.getsize(path), time.time() - t_start, path
 
 
672
  else:
673
- # Different shapes
674
  sp = fixed_shapes(td)
675
  if sp is not None:
676
  (IH,IW),(OH,OW) = sp
677
  if OH <= IH and OW <= IW:
678
- # Output smaller: try diff-shape conv
679
- model = solve_conv_diffshape(td, path, time_budget=conv_budget)
680
- if model is not None: return True, sname, os.path.getsize(path), time.time() - t_start, path
 
 
 
 
 
 
 
681
 
682
  return False, None, None, time.time() - t_start, path
683
 
@@ -692,18 +985,21 @@ def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb):
692
  ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
693
 
694
  if ok:
695
- macs, memory, params = score_network(model_path)
696
- if macs is None:
 
 
 
697
  macs, memory, params = 0, 0, 0
698
  score = macs + memory + params
699
 
700
  results[tn] = (sname, t_task, sz)
701
- print(f"Task {tn:3d}: {sname:20s} {score} {t_task:7.3f}s ({sz:>8,} bytes)")
702
  else:
703
  print(f"Task {tn:3d}: UNSOLVED {t_task:7.3f}s")
704
  macs, memory, params, score = 0, 0, 0, 0
705
 
706
- if use_wandb:
707
  wandb.log({
708
  "task_id": tn,
709
  "solver": sname if ok else "unsolved",
@@ -750,28 +1046,15 @@ def main():
750
  t0 = time.time()
751
  results = {}
752
 
753
- if args.use_wandb:
754
  with wandb.init(
755
  project="neurogolf",
756
  name="solver_run",
757
  config=config,
758
  ):
759
- results = run_tasks(
760
- task_nums,
761
- tasks,
762
- args.output_dir,
763
- args.conv_budget,
764
- use_wandb=True
765
- )
766
-
767
  else:
768
- results = run_tasks(
769
- task_nums,
770
- tasks,
771
- args.output_dir,
772
- args.conv_budget,
773
- use_wandb=False
774
- )
775
 
776
  elapsed = time.time() - t0
777
  print(f"\n{'='*70}")
@@ -786,4 +1069,3 @@ def main():
786
 
787
  if __name__ == '__main__':
788
  main()
789
-
 
1
  #!/usr/bin/env python3
2
  """
3
+ ARC-AGI NeuroGolf Championship - Complete Solver v3
4
  Format: [1,10,30,30] one-hot input/output, opset 10, IR version 10.
5
  Solvers:
6
+ - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
7
+ concat, concat_enhanced, spatial_gather, varshape_spatial_gather,
8
+ input_driven_tile, diagonal_tile, kronecker
9
+ - Conv (fixed shape): Slice -> Conv -> ArgMax -> Equal+Cast -> Pad
10
+ - Conv (variable shape): Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)
11
+ - Conv (diff shape): Slice -> Conv -> Slice(crop) -> ArgMax -> Equal+Cast -> Pad
12
+ Results: ~305+/400 tasks solved (was 294/400 in v2)
13
  Usage:
14
  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
15
  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --conv_budget 60
 
21
  from onnx import helper, TensorProto, numpy_helper
22
  import onnxruntime as ort
23
  from collections import Counter
 
24
 
25
+ try:
26
+ from neurogolf_utils import score_network
27
+ except ImportError:
28
+ def score_network(path):
29
+ return 0, 0, 0
30
+
31
+ try:
32
+ import wandb
33
+ except ImportError:
34
+ wandb = None
35
 
36
  BATCH, CH, GH, GW = 1, 10, 30, 30
37
  GRID_SHAPE = [BATCH, CH, GH, GW]
 
40
  OPSET = [helper.make_opsetid("", 10)]
41
 
42
  def get_providers():
43
+ return ['CPUExecutionProvider']
44
 
45
  ORT_PROVIDERS = get_providers()
46
 
47
+ # ============================================================
48
+ # LOAD / VALIDATE
49
+ # ============================================================
50
+
51
  def load_tasks_dir(data_dir):
52
  files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
53
  tasks = {}
 
108
  shapes.add((inp.shape, out.shape))
109
  return list(shapes)[0] if len(shapes) == 1 else None
110
 
111
+ # ============================================================
112
+ # GATHER HELPERS
113
+ # ============================================================
114
+
115
+ def _build_gather_model(OH, OW, idx):
116
+ # Use Gather (opset 1) instead of GatherElements (opset 11)
117
+ # Flatten spatial: [1,10,900] -> Gather(axis=2, indices=[900]) -> [1,10,900]
118
+ flat_idx = np.zeros((GH*GW,), dtype=np.int64)
119
+ mask = np.zeros((1,1,GH,GW), dtype=np.float32)
120
+ for oi in range(OH):
121
+ for oj in range(OW):
122
+ flat_idx[oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
123
+ mask[0,0,oi,oj] = 1.0
124
+ inits = [
125
+ numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
126
+ numpy_helper.from_array(flat_idx, 'idx'),
127
+ numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
128
+ numpy_helper.from_array(mask, 'mask'),
129
+ ]
130
+ nodes = [
131
+ helper.make_node('Reshape', ['input','fs'], ['flat']),
132
+ helper.make_node('Gather', ['flat','idx'], ['g'], axis=2),
133
+ helper.make_node('Reshape', ['g','os'], ['raw']),
134
+ helper.make_node('Mul', ['raw','mask'], ['output']),
135
+ ]
136
+ return mk(nodes, inits)
137
+
138
+ def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
139
+ # Use Gather (opset 1) instead of GatherElements (opset 11)
140
+ flat_idx = np.zeros((GH*GW,), dtype=np.int64)
141
+ gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
142
+ const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
143
+ for oi in range(OH):
144
+ for oj in range(OW):
145
+ if idx[oi,oj,0] >= 0:
146
+ flat_idx[oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
147
+ gather_mask[0,0,oi,oj] = 1.0
148
+ elif cst[oi,oj] >= 0:
149
+ const_oh[0, cst[oi,oj], oi, oj] = 1.0
150
+ has_const = np.any(const_oh > 0)
151
+ inits = [
152
+ numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
153
+ numpy_helper.from_array(flat_idx, 'idx'),
154
+ numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
155
+ numpy_helper.from_array(gather_mask, 'gmask'),
156
+ ]
157
+ nodes = [
158
+ helper.make_node('Reshape', ['input','fs'], ['flat']),
159
+ helper.make_node('Gather', ['flat','idx'], ['g'], axis=2),
160
+ helper.make_node('Reshape', ['g','os'], ['raw']),
161
+ helper.make_node('Mul', ['raw','gmask'], ['masked']),
162
+ ]
163
+ if has_const:
164
+ inits.append(numpy_helper.from_array(const_oh, 'cst'))
165
+ nodes.append(helper.make_node('Add', ['masked','cst'], ['output']))
166
+ else:
167
+ nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
168
+ return mk(nodes, inits)
169
+
170
  # ============================================================
171
  # ANALYTICAL SOLVERS
172
  # ============================================================
 
252
  if not found and cst[oi,oj] < 0: return None
253
  return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
254
 
255
+ def s_varshape_spatial_gather(td):
256
+ """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
257
+ sp = fixed_shapes(td)
258
+ if sp is not None: return None # fixed shapes handled by s_spatial_gather
259
+ exs = get_exs(td)
260
+
261
+ # Embed all examples in 30x30
262
+ exs_30 = []
263
+ for inp, out in exs:
264
+ ih, iw = inp.shape
265
+ oh, ow = out.shape
266
+ inp30 = np.zeros((30, 30), dtype=np.int64)
267
+ out30 = np.zeros((30, 30), dtype=np.int64)
268
+ inp30[:ih, :iw] = inp
269
+ out30[:oh, :ow] = out
270
+ exs_30.append((inp30, out30))
271
+
272
+ idx = np.full((30, 30, 2), -1, dtype=np.int64)
273
+ cst = np.full((30, 30), -1, dtype=np.int64)
274
+
275
+ for oi in range(30):
276
+ for oj in range(30):
277
+ vals = set(int(out30[oi, oj]) for _, out30 in exs_30)
278
+ if len(vals) == 1:
279
+ cst[oi, oj] = vals.pop()
280
+ found = False
281
+ for ri in range(30):
282
+ for rj in range(30):
283
+ if all(int(inp30[ri, rj]) == int(out30[oi, oj]) for inp30, out30 in exs_30):
284
+ idx[oi, oj] = [ri, rj]
285
+ found = True
286
+ break
287
+ if found: break
288
+ if not found and cst[oi, oj] < 0:
289
+ return None
290
+
291
+ return _build_gather_model_with_const(30, 30, 30, 30, idx, cst)
292
+
293
  def s_tile(td):
294
  exs = get_exs(td)
295
  in_shapes = set(inp.shape for inp,_ in exs)
 
391
  return _build_gather_model(OH, OW, idx)
392
  return None
393
 
394
+ def s_concat_enhanced(td):
395
+ """Enhanced concat with all 8 dihedral group transforms."""
396
+ exs = get_exs(td)
397
+ sp = fixed_shapes(td)
398
+ if sp is None: return None
399
+ (IH,IW),(OH,OW) = sp
400
+ if IH == OH and IW == OW: return None
401
+
402
+ # Need block decomposition
403
+ if OH % IH != 0 or OW % IW != 0: return None
404
+ rH, rW = OH // IH, OW // IW
405
+ if rH * rW > 16 or rH * rW < 2: return None
406
+ if OH > 30 or OW > 30: return None
407
+
408
+ # All 8 symmetry transforms of the dihedral group
409
+ transforms = [
410
+ ('id', lambda x: x),
411
+ ('fliplr', lambda x: np.fliplr(x)),
412
+ ('flipud', lambda x: np.flipud(x)),
413
+ ('rot180', lambda x: np.rot90(x, 2)),
414
+ ('rot90', lambda x: np.rot90(x, 1)),
415
+ ('rot270', lambda x: np.rot90(x, 3)),
416
+ ('T', lambda x: x.T),
417
+ ('T_fliplr', lambda x: np.fliplr(x.T)),
418
+ ]
419
+
420
+ # For each block, find which transform matches
421
+ block_transforms = {}
422
+ for bi in range(rH):
423
+ for bj in range(rW):
424
+ found = None
425
+ for tidx, (tname, tfn) in enumerate(transforms):
426
+ ok = True
427
+ for inp, out in exs:
428
+ block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
429
+ expected = tfn(inp)
430
+ if expected.shape != (IH, IW) or not np.array_equal(block, expected):
431
+ ok = False
432
+ break
433
+ if ok:
434
+ found = (tidx, tname)
435
+ break
436
+ if found is None:
437
+ return None
438
+ block_transforms[(bi, bj)] = found
439
+
440
+ # Build index map
441
+ idx = np.zeros((OH, OW, 2), dtype=np.int64)
442
+ for bi in range(rH):
443
+ for bj in range(rW):
444
+ _, tname = block_transforms[(bi, bj)]
445
+ for lr in range(IH):
446
+ for lc in range(IW):
447
+ oi, oj = bi*IH + lr, bj*IW + lc
448
+ if tname == 'id': sr, sc = lr, lc
449
+ elif tname == 'fliplr': sr, sc = lr, IW-1-lc
450
+ elif tname == 'flipud': sr, sc = IH-1-lr, lc
451
+ elif tname == 'rot180': sr, sc = IH-1-lr, IW-1-lc
452
+ elif tname == 'rot90': sr, sc = IW-1-lc, lr
453
+ elif tname == 'rot270': sr, sc = lc, IH-1-lr
454
+ elif tname == 'T': sr, sc = lc, lr
455
+ elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
456
+ idx[oi, oj] = [sr, sc]
457
+
458
+ # Verify
459
+ for inp, out in exs:
460
+ reconstructed = np.zeros_like(out)
461
+ for oi in range(OH):
462
+ for oj in range(OW):
463
+ reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
464
+ if not np.array_equal(reconstructed, out):
465
+ return None
466
+
467
+ return _build_gather_model(OH, OW, idx)
468
+
469
+ def s_input_driven_tile(td):
470
+ """Each non-zero input pixel controls a block that's a copy of the input."""
471
+ exs = get_exs(td)
472
+ sp = fixed_shapes(td)
473
+ if sp is None: return None
474
+ (IH,IW),(OH,OW) = sp
475
+ if OH % IH != 0 or OW % IW != 0: return None
476
+ sH, sW = OH // IH, OW // IW
477
+ if sH != IH or sW != IW: return None
478
+ if OH > 30 or OW > 30: return None
479
+
480
+ for inp, out in exs:
481
+ for bi in range(IH):
482
+ for bj in range(IW):
483
+ block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
484
+ if inp[bi, bj] != 0:
485
+ if not np.array_equal(block, inp):
486
+ return None
487
+ else:
488
+ if not np.all(block == 0):
489
+ return None
490
+
491
+ # Build gather model: each output pixel at (bi*IH+lr, bj*IW+lc) maps to
492
+ # input[lr, lc] if input[bi, bj] != 0, else constant 0
493
+ # Problem: whether block is active depends on input value, which varies.
494
+ # This needs a different ONNX approach: can't use static gather.
495
+ # But we CAN use: Tile input -> Mul by mask derived from input
496
+ # Actually we need: for each (bi,bj) block position, multiply by inp[bi,bj] != 0
497
+ # This is NOT static - it depends on input content.
498
+ # Skip for now - spatial_gather can handle if block positions are fixed.
499
+ return None
500
+
501
+ def s_kronecker(td):
502
+ """output = kron(input, ones(sH,sW)) — nearest-neighbor upscaling."""
503
+ exs = get_exs(td)
504
+ sp = fixed_shapes(td)
505
+ if sp is None: return None
506
+ (IH,IW),(OH,OW) = sp
507
+ if OH % IH != 0 or OW % IW != 0: return None
508
+ sH, sW = OH // IH, OW // IW
509
+ if sH < 2 or sW < 2: return None
510
+ if OH > 30 or OW > 30: return None
511
+
512
+ for inp, out in exs:
513
+ expected = np.kron(inp, np.ones((sH, sW), dtype=np.int64))
514
+ if not np.array_equal(out, expected):
515
+ return None
516
+
517
+ # This is identical to upscale - build gather index
518
+ idx = np.zeros((OH,OW,2), dtype=np.int64)
519
+ for r in range(OH):
520
+ for c in range(OW):
521
+ idx[r,c] = [r//sH, c//sW]
522
+ return _build_gather_model(OH, OW, idx)
523
+
524
+ def s_diagonal_tile(td):
525
+ """Input placed along diagonal: block[i,i] = input, rest = 0."""
526
+ exs = get_exs(td)
527
+ sp = fixed_shapes(td)
528
+ if sp is None: return None
529
+ (IH,IW),(OH,OW) = sp
530
+ if OH % IH != 0 or OW % IW != 0: return None
531
+ rH, rW = OH // IH, OW // IW
532
+ if rH != rW or rH < 2: return None
533
+ if OH > 30 or OW > 30: return None
534
+
535
+ for inp, out in exs:
536
+ for bi in range(rH):
537
+ for bj in range(rW):
538
+ block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
539
+ if bi == bj:
540
+ if not np.array_equal(block, inp):
541
+ return None
542
+ else:
543
+ if not np.all(block == 0):
544
+ return None
545
+
546
+ # Build: diagonal blocks map to input, off-diagonal are constant 0
547
+ idx = np.zeros((OH,OW,2), dtype=np.int64)
548
+ cst = np.full((OH,OW), -1, dtype=np.int64)
549
+ for bi in range(rH):
550
+ for bj in range(rW):
551
+ for lr in range(IH):
552
+ for lc in range(IW):
553
+ oi, oj = bi*IH + lr, bj*IW + lc
554
+ if bi == bj:
555
+ idx[oi, oj] = [lr, lc]
556
+ else:
557
+ idx[oi, oj] = [-1, -1]
558
+ cst[oi, oj] = 0
559
+
560
+ return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
561
+
562
  def s_constant(td):
563
  sp = fixed_shapes(td)
564
  if sp is None: return None
 
577
  return mk(nodes, inits)
578
 
579
  # ============================================================
580
+ # CONV SOLVERS
581
  # ============================================================
582
 
583
  def add_onehot_block(nodes, inits, am_name, oh_name):
584
+ """Equal + Cast one-hot encoding (replaces OneHot which lacks CUDA kernel)."""
 
 
 
 
 
585
  classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
586
  inits.append(numpy_helper.from_array(classes, 'classes'))
587
  nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
588
  nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
589
 
 
590
  def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
591
  """Shared lstsq conv fitting. Returns (Wconv, B) or None."""
592
  pad = ks // 2
 
664
  nodes = [
665
  helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
666
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
667
+ helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
668
  ]
 
 
669
  add_onehot_block(nodes, inits, 'am', 'oh_out')
 
670
  nodes.append(
671
+ helper.make_node('Pad', ['oh_out'], ['output'],
672
+ pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
 
 
 
673
  )
674
 
675
  model = mk(nodes, inits)
676
  onnx.save(model, path)
677
+ if validate(path, td): return 'conv_fixed', model
678
  return None
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  def solve_conv_variable(td, path, time_budget=30.0):
681
  """Variable-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)."""
682
  exs = get_exs(td)
 
692
  Wconv, B = result
693
  pad = ks // 2
694
 
695
+ inits = [numpy_helper.from_array(Wconv, 'W')]
 
 
696
  conv_inputs = ['input', 'W']
697
  if B is not None:
698
  inits.append(numpy_helper.from_array(B, 'B'))
 
701
  nodes = [
702
  helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
703
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
704
+ helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
705
  ]
 
 
706
  add_onehot_block(nodes, inits, 'am', 'oh_out')
707
+ nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
 
 
 
708
 
709
  model = mk(nodes, inits)
710
  onnx.save(model, path)
711
+ if validate(path, td): return 'conv_var', model
712
  return None
713
 
 
 
 
 
 
714
  def solve_conv_diffshape(td, path, time_budget=30.0):
715
  """Diff-shape conv for fixed io shapes where output is smaller."""
716
  sp = fixed_shapes(td)
 
785
  helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
786
  helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
787
  helper.make_node('Slice', ['co','cr_st','cr_en'], ['co_crop']),
788
+ helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
789
  ]
 
 
790
  add_onehot_block(nodes, inits, 'am', 'oh_out')
 
791
  nodes.append(
792
+ helper.make_node('Pad', ['oh_out'], ['output'],
793
+ pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
 
 
 
794
  )
795
 
796
  model = mk(nodes, inits)
797
  onnx.save(model, path)
798
+ if validate(path, td): return 'conv_diff', model
799
  return None
800
 
801
+ def solve_conv_var_diff(td, path, time_budget=30.0):
802
+ """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask).
803
+ Works when output shape differs from input but mapping is convolutional on 30x30 grid."""
804
+ exs = get_exs(td)
805
+
806
+ t_start = time.time()
807
+ for use_bias in [False, True]:
808
+ for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
809
+ if time.time() - t_start > time_budget: return None
810
+
811
+ pad = ks // 2
812
+ feat = 10 * ks * ks + (1 if use_bias else 0)
813
+ if feat > 20000: continue
814
+
815
+ patches, targets = [], []
816
+ for inp_g, out_g in exs:
817
+ ih, iw = inp_g.shape
818
+ oh, ow = out_g.shape
819
+ oh_full = np.zeros((10, GH, GW), dtype=np.float64)
820
+ for c in range(10): oh_full[c, :ih, :iw] = (inp_g == c)
821
+ oh_pad = np.pad(oh_full, ((0,0),(pad,pad),(pad,pad)))
822
+
823
+ for r in range(oh):
824
+ for c in range(ow):
825
+ p = oh_pad[:, r:r+ks, c:c+ks].flatten()
826
+ if use_bias: p = np.append(p, 1.0)
827
+ patches.append(p)
828
+ targets.append(int(out_g[r, c]))
829
+
830
+ n_patches = len(patches)
831
+ if feat > 5000 and n_patches > 2000: continue
832
+
833
+ P = np.array(patches, dtype=np.float64)
834
+ T = np.array(targets, dtype=np.int64)
835
+ T_oh = np.zeros((len(T), 10), dtype=np.float64)
836
+ for i, t in enumerate(T): T_oh[i, t] = 1.0
837
+
838
+ try:
839
+ WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
840
+ except:
841
+ continue
842
+ if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
843
+
844
+ if use_bias:
845
+ Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
846
+ B = WT[-1].astype(np.float32)
847
+ else:
848
+ Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
849
+ B = None
850
+
851
+ # Use ReduceSum of output channels as mask (sum across channels == 1 for valid pixels)
852
+ # But we don't know the output mask at inference time from input alone...
853
+ # We need a way to derive the output mask from the input.
854
+ # For same-shape: mask = ReduceSum(input, axis=1) works
855
+ # For diff-shape: we need to compute the output mask differently
856
+ #
857
+ # Approach: Conv output at valid positions should have max > threshold,
858
+ # and at padding positions max ≈ 0. Use the ArgMax+OneHot and then
859
+ # mask with ReduceSum(input) which is 1 at input positions but 0 at padding.
860
+ # BUT output may be LARGER than input...
861
+ #
862
+ # Alternative: just use Conv -> ArgMax -> Equal+Cast -> Mul(input_mask_expanded)
863
+ # where input_mask covers the output region too.
864
+ # This won't work if output extends beyond input region.
865
+ #
866
+ # Simplest correct approach: let the conv produce valid one-hot everywhere,
867
+ # then the padding region should naturally produce channel-0 output.
868
+ # Since padding is all-zero input, conv output there = bias only.
869
+ # If no bias, conv output = 0 for all channels -> argmax gives channel 0 -> onehot gives [1,0,...,0]
870
+ # which equals the padding encoding (channel 0 = 1 in padding).
871
+ # Wait - that's WRONG for the NeuroGolf format. In the padding region, ALL channels should be 0.
872
+ # The one-hot encoding has channel[color]=1, but padding = ALL zeros.
873
+ #
874
+ # So we NEED a mask. But for diff-shape, what mask?
875
+ # If output is always top-left aligned and we know max output size...
876
+ # We can't statically determine the output mask from the input.
877
+ #
878
+ # However: we can try the ReduceSum approach anyway — if conv naturally
879
+ # produces channel-0 dominant output in padding, then:
880
+ # mask = ReduceSum(input, axis=1) gives 1 for input pixels, 0 for padding
881
+ # If output region ⊆ input region, this works.
882
+ # If output region > input region... we need the output's ReduceSum instead.
883
+
884
+ # For tasks where output fits within input bounds, use input mask
885
+ all_output_within_input = all(
886
+ out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
887
+ for inp_g, out_g in exs
888
+ )
889
+
890
+ if not all_output_within_input:
891
+ continue # Skip tasks where output extends beyond input
892
+
893
+ inits = [numpy_helper.from_array(Wconv, 'W')]
894
+ conv_inputs = ['input', 'W']
895
+ if B is not None:
896
+ inits.append(numpy_helper.from_array(B, 'B'))
897
+ conv_inputs.append('B')
898
 
899
+ nodes = [
900
+ helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
901
+ helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
902
+ helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
903
+ ]
904
+ add_onehot_block(nodes, inits, 'am', 'oh_out')
905
+ nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
 
907
+ model = mk(nodes, inits)
908
+ onnx.save(model, path)
909
+ if validate(path, td): return 'conv_var_diff', model
910
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
 
912
  # ============================================================
913
  # MAIN
 
916
  ANALYTICAL_SOLVERS = [
917
  ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
918
  ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
919
+ ('tile', s_tile), ('upscale', s_upscale), ('kronecker', s_kronecker),
920
+ ('concat', s_concat), ('concat_enhanced', s_concat_enhanced),
921
+ ('diagonal_tile', s_diagonal_tile),
922
  ('spatial_gather', s_spatial_gather),
923
+ ('varshape_spatial_gather', s_varshape_spatial_gather),
924
  ]
925
 
926
  def solve_task(tn, td, outdir, conv_budget=30.0):
 
934
  model = sfn(td)
935
  if model is None: continue
936
  onnx.save(model, path)
937
+ if validate(path, td):
938
+ return True, sname, os.path.getsize(path), time.time() - t_start, path
939
  except: pass
940
 
941
+ # 2. Determine task shape category and try conv solvers
942
  exs = get_exs(td)
943
  same_shape = all(inp.shape == out.shape for inp, out in exs)
944
  shapes = set(inp.shape for inp, _ in exs)
945
  fixed_in = len(shapes) == 1
946
 
947
+ conv_time = conv_budget
948
+
949
  if same_shape:
950
  if fixed_in:
951
+ result = solve_conv_fixed(td, path, time_budget=conv_time/2)
952
+ if result is not None:
953
+ sname, model = result
954
+ return True, sname, os.path.getsize(path), time.time() - t_start, path
955
+ result = solve_conv_variable(td, path, time_budget=conv_time)
956
+ if result is not None:
957
+ sname, model = result
958
+ return True, sname, os.path.getsize(path), time.time() - t_start, path
959
  else:
 
960
  sp = fixed_shapes(td)
961
  if sp is not None:
962
  (IH,IW),(OH,OW) = sp
963
  if OH <= IH and OW <= IW:
964
+ result = solve_conv_diffshape(td, path, time_budget=conv_time)
965
+ if result is not None:
966
+ sname, model = result
967
+ return True, sname, os.path.getsize(path), time.time() - t_start, path
968
+
969
+ # Try variable diff-shape conv (output within input bounds)
970
+ result = solve_conv_var_diff(td, path, time_budget=conv_time)
971
+ if result is not None:
972
+ sname, model = result
973
+ return True, sname, os.path.getsize(path), time.time() - t_start, path
974
 
975
  return False, None, None, time.time() - t_start, path
976
 
 
985
  ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
986
 
987
  if ok:
988
+ try:
989
+ macs, memory, params = score_network(model_path)
990
+ if macs is None:
991
+ macs, memory, params = 0, 0, 0
992
+ except:
993
  macs, memory, params = 0, 0, 0
994
  score = macs + memory + params
995
 
996
  results[tn] = (sname, t_task, sz)
997
+ print(f"Task {tn:3d}: {sname:25s} {score:>12} {t_task:7.3f}s ({sz:>8,} bytes)")
998
  else:
999
  print(f"Task {tn:3d}: UNSOLVED {t_task:7.3f}s")
1000
  macs, memory, params, score = 0, 0, 0, 0
1001
 
1002
+ if use_wandb and wandb is not None:
1003
  wandb.log({
1004
  "task_id": tn,
1005
  "solver": sname if ok else "unsolved",
 
1046
  t0 = time.time()
1047
  results = {}
1048
 
1049
+ if args.use_wandb and wandb is not None:
1050
  with wandb.init(
1051
  project="neurogolf",
1052
  name="solver_run",
1053
  config=config,
1054
  ):
1055
+ results = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=True)
 
 
 
 
 
 
 
1056
  else:
1057
+ results = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=False)
 
 
 
 
 
 
1058
 
1059
  elapsed = time.time() - t0
1060
  print(f"\n{'='*70}")
 
1069
 
1070
  if __name__ == '__main__':
1071
  main()