Fix ReduceSum axes for opset 17 (axes must be tensor input, not attribute)

Three locations fixed:
- s_constant: ReduceSum axes=[1,2,3] → tensor input
- solve_conv_variable: ReduceSum axes=[1] → tensor input
- solve_conv_var_diff: ReduceSum axes=[1] → tensor input

Also fixes solve_conv_var_diff which was truncated in previous upload.

Files changed (1) hide show

neurogolf_solver.py +65 -235

neurogolf_solver.py CHANGED Viewed

@@ -9,6 +9,7 @@ v5 CHANGES (from v4):
   - s_rotate k=2: double Slice(step=-1) — 0 MACs (was ~165K)
   - s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
   - All Pad nodes: tensor-based pads input (opset 17 requirement)
   - All other solvers unchanged from v4
 Solvers:
@@ -50,12 +51,8 @@ OPSET = [helper.make_opsetid("", 17)]
 INT64_MIN = int(np.iinfo(np.int64).min)
-# Officially excluded tasks (score 0 regardless)
 EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
-# Max ARC-GEN examples to use for validation (to keep runtime reasonable)
 MAX_ARCGEN_VALIDATE = 30
-# Max ARC-GEN examples for conv fitting (keep separate from validation!)
 MAX_ARCGEN_FIT = 0
 def get_providers():
@@ -68,7 +65,6 @@ ORT_PROVIDERS = get_providers()
 # ============================================================
 def load_tasks_dir(data_dir, arcgen_dir=None):
-    """Load ARC-AGI tasks and optionally merge ARC-GEN data."""
     files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
     tasks = {}
     for i, f in enumerate(files):
@@ -86,7 +82,6 @@ def load_tasks_dir(data_dir, arcgen_dir=None):
     return tasks
 def load_tasks_kaggle(data_dir):
-    """Load Kaggle format tasks (already have arc-gen embedded)."""
     tasks = {}
     for tn in range(1, 401):
         path = os.path.join(data_dir, f"task{tn:03d}.json")
@@ -107,7 +102,6 @@ def to_onehot(grid):
     return arr
 def validate(path, td):
-    """Validate model against ALL examples: train + test + arc-gen."""
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
@@ -130,7 +124,6 @@ def validate(path, td):
     return True
 def validate_raw(raw_bytes, td):
-    """Validate model from raw bytes against ALL examples."""
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
@@ -153,14 +146,13 @@ def validate_raw(raw_bytes, td):
     return True
 # ============================================================
-# STATIC PROFILER (no onnx_tool dependency)
 # ============================================================
 BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
 MAX_FILESIZE = int(1.44 * 1024 * 1024)
 def score_network(path):
-    """Static profiler matching Kaggle scoring: cost = macs + memory + params."""
     if HAS_ONNX_TOOL:
         try:
             return _score_network_official(path)
@@ -169,23 +161,19 @@ def score_network(path):
     return _static_profile(path)
 def _static_profile(path):
-    """Compute cost without onnx_tool: params + nbytes + macs."""
     try:
         model = onnx.load(path)
     except:
         return None, None, None
     tensors = {}
     params = 0
     nbytes = 0
     macs = 0
     for init in model.graph.initializer:
         a = numpy_helper.to_array(init)
         tensors[init.name] = a
         params += a.size
         nbytes += a.nbytes
     for nd in model.graph.node:
         if nd.op_type == 'Constant':
             for attr in nd.attribute:
@@ -198,16 +186,13 @@ def _static_profile(path):
                         nbytes += a.nbytes
                     except:
                         pass
         if nd.op_type in BANNED_OPS:
             return None, None, None
         if nd.op_type == 'Conv' and len(nd.input) >= 2 and nd.input[1] in tensors:
             w = tensors[nd.input[1]]
             if w.ndim == 4:
                 co, ci, kh, kw = w.shape
                 macs += co * ci * kh * kw * GH * GW
     return int(macs), int(nbytes), int(params)
 # ============================================================
@@ -215,12 +200,10 @@ def _static_profile(path):
 # ============================================================
 def _make_int64_init(name, values):
-    """Create an int64 tensor initializer from a list of values."""
     return numpy_helper.from_array(np.array(values, dtype=np.int64), name)
 def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
-    """Build a Pad node with tensor-based pads input (opset 17).
-    Pads [0,0,0,0, 0,0,pad_h,pad_w] — only spatial end-padding."""
     pads_name = f'pads{suffix}'
     cv_name = f'pad_cv{suffix}'
     pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
@@ -229,7 +212,7 @@ def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
     return helper.make_node('Pad', [input_name, pads_name, cv_name], [output_name], mode='constant')
 def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
-    """Build Slice node to crop [1,10,30,30] to [1,10,IH,IW]."""
     st_name = f'crop_st{suffix}'
     en_name = f'crop_en{suffix}'
     inits.append(_make_int64_init(st_name, [0, 0, 0, 0]))
@@ -237,7 +220,7 @@ def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
     return helper.make_node('Slice', [input_name, st_name, en_name], [output_name])
 def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=''):
-    """Build Slice(step=-1) to reverse one axis. Zero MACs."""
     st_name = f'rev_st{suffix}'
     en_name = f'rev_en{suffix}'
     ax_name = f'rev_ax{suffix}'
@@ -248,6 +231,12 @@ def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=
     inits.append(_make_int64_init(sp_name, [-1]))
     return helper.make_node('Slice', [input_name, st_name, en_name, ax_name, sp_name], [output_name])
 def mk(nodes, inits=None):
     x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
     y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
@@ -255,45 +244,35 @@ def mk(nodes, inits=None):
     return helper.make_model(g, ir_version=IR, opset_imports=OPSET)
 def get_exs(td):
-    """Get examples for analytical solvers (train+test only)."""
     return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
             for ex in td['train'] + td['test']]
 def get_exs_for_fitting(td):
-    """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     if not base_exs:
         return base_exs
     base_shapes = {inp.shape for inp, _ in base_exs}
     if len(base_shapes) != 1:
         return base_exs
     base_shape = list(base_shapes)[0]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
         out = np.array(ex['output'], dtype=np.int64)
         if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
             ag_exs.append((inp, out))
     return base_exs + ag_exs[:10]
 def get_exs_for_fitting_variable(td):
-    """Get examples for variable-shape conv fitting."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
         out = np.array(ex['output'], dtype=np.int64)
         if inp.shape == out.shape and inp.shape[0] <= 30 and inp.shape[1] <= 30:
             ag_exs.append((inp, out))
     return base_exs + ag_exs[:20]
 def fixed_shapes(td):
@@ -303,11 +282,10 @@ def fixed_shapes(td):
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
-# GATHER HELPERS (kept for solvers that need them)
 # ============================================================
 def _build_gather_model(OH, OW, idx):
-    """Gather-based spatial remapping. Used for concat, spatial_gather, etc."""
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
@@ -329,7 +307,6 @@ def _build_gather_model(OH, OW, idx):
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
-    """Gather-based spatial remapping with constant pixels."""
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
@@ -378,9 +355,7 @@ def s_color_map(td):
             iv, ov = int(iv), int(ov)
             if iv in cm and cm[iv] != ov: return None
             cm[iv] = ov
     is_permutation = (set(cm.keys()) == set(cm.values()))
     if is_permutation:
         gather_ch = np.arange(10, dtype=np.int32)
         for src, dst in cm.items():
@@ -397,106 +372,71 @@ def s_color_map(td):
                   [numpy_helper.from_array(W, 'W')])
 def s_transpose(td):
-    """Transpose spatial dimensions. Already near-zero cost with Transpose node."""
     for ex in td['train']+td['test']:
         if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
     return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
 def s_flip(td):
-    """Flip using Slice(step=-1) — zero MACs, replaces old Gather approach."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if (IH,IW) != (OH,OW): return None
     for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
-            # axis 0 = flipud = reverse dim 2 (H)
-            # axis 1 = fliplr = reverse dim 3 (W)
             onnx_axis = 2 if axis == 0 else 3
             dim_size = IH if axis == 0 else IW
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
-            # Step 1: Crop input to [1,10,IH,IW]
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            # Step 2: Reverse the target axis
             nodes.append(_build_slice_reverse('cropped', 'flipped', onnx_axis, dim_size, inits))
-            # Step 3: Pad back to [1,10,30,30]
             nodes.append(_build_pad_node('flipped', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
     return None
 def s_rotate(td):
-    """Rotate using Slice+Transpose combos — zero MACs for square grids and k=2.
-    Falls back to Gather for non-square k=1,3 rotations."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
         if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
             continue
         if k == 2:
-            # 180° = flipud + fliplr — works for any shape
-            # output[r,c] = input[IH-1-r, IW-1-c]
             pad_h, pad_w = GH - OH, GW - OW
             inits = []
             nodes = []
-            # Crop to [1,10,IH,IW]
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            # Reverse axis 2 (H)
             nodes.append(_build_slice_reverse('cropped', 'flip_h', 2, IH, inits, suffix='_h'))
-            # Reverse axis 3 (W)
             nodes.append(_build_slice_reverse('flip_h', 'rotated', 3, IW, inits, suffix='_w'))
-            # Pad back
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         elif k == 1 and IH == IW:
-            # rot90 CCW on square grid: transpose then flipud
-            # output[r,c] = input[c, IH-1-r]
-            # Step 1: Transpose [0,1,3,2]: temp[r,c] = input[c,r]
-            # Step 2: Reverse axis 2: out[r,c] = temp[IH-1-r,c] = input[c,IH-1-r] ✓
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(helper.make_node('Transpose', ['cropped'], ['transposed'], perm=[0,1,3,2]))
             nodes.append(_build_slice_reverse('transposed', 'rotated', 2, IH, inits))
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         elif k == 3 and IH == IW:
-            # rot270 CCW (= 90 CW) on square grid: flipud then transpose
-            # output[r,c] = input[IW-1-c, r]
-            # Step 1: Reverse axis 2: temp[r,c] = input[IH-1-r,c]
-            # Step 2: Transpose [0,1,3,2]: out[r,c] = temp[c,r] = input[IH-1-c,r] ✓ (IH=IW)
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(_build_slice_reverse('cropped', 'flipped', 2, IH, inits))
             nodes.append(helper.make_node('Transpose', ['flipped'], ['rotated'], perm=[0,1,3,2]))
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         else:
-            # Non-square k=1 or k=3: fall back to Gather (still correct, just higher cost)
             idx = np.zeros((OH,OW,2), dtype=np.int64)
             for r in range(OH):
                 for c in range(OW):
@@ -527,11 +467,9 @@ def s_spatial_gather(td):
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_varshape_spatial_gather(td):
-    """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
     sp = fixed_shapes(td)
     if sp is not None: return None
     exs = get_exs(td)
     exs_30 = []
     for inp, out in exs:
         ih, iw = inp.shape
@@ -541,10 +479,8 @@ def s_varshape_spatial_gather(td):
         inp30[:ih, :iw] = inp
         out30[:oh, :ow] = out
         exs_30.append((inp30, out30))
     idx = np.full((30, 30, 2), -1, dtype=np.int64)
     cst = np.full((30, 30), -1, dtype=np.int64)
     for oi in range(30):
         for oj in range(30):
             vals = set(int(out30[oi, oj]) for _, out30 in exs_30)
@@ -560,7 +496,6 @@ def s_varshape_spatial_gather(td):
                 if found: break
             if not found and cst[oi, oj] < 0:
                 return None
     return _build_gather_model_with_const(30, 30, 30, 30, idx, cst)
 def s_tile(td):
@@ -665,29 +600,21 @@ def s_concat(td):
     return None
 def s_concat_enhanced(td):
-    """Enhanced concat with all 8 dihedral group transforms."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if IH == OH and IW == OW: return None
     if OH % IH != 0 or OW % IW != 0: return None
     rH, rW = OH // IH, OW // IW
     if rH * rW > 16 or rH * rW < 2: return None
     if OH > 30 or OW > 30: return None
     transforms = [
-        ('id', lambda x: x),
-        ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)),
-        ('rot180', lambda x: np.rot90(x, 2)),
-        ('rot90', lambda x: np.rot90(x, 1)),
-        ('rot270', lambda x: np.rot90(x, 3)),
-        ('T', lambda x: x.T),
-        ('T_fliplr', lambda x: np.fliplr(x.T)),
     ]
     block_transforms = {}
     for bi in range(rH):
         for bj in range(rW):
@@ -698,15 +625,11 @@ def s_concat_enhanced(td):
                     block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                     expected = tfn(inp)
                     if expected.shape != (IH, IW) or not np.array_equal(block, expected):
-                        ok = False
-                        break
                 if ok:
-                    found = (tidx, tname)
-                    break
-            if found is None:
-                return None
             block_transforms[(bi, bj)] = found
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for bi in range(rH):
         for bj in range(rW):
@@ -723,19 +646,15 @@ def s_concat_enhanced(td):
                     elif tname == 'T': sr, sc = lc, lr
                     elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
                     idx[oi, oj] = [sr, sc]
     for inp, out in exs:
         reconstructed = np.zeros_like(out)
         for oi in range(OH):
             for oj in range(OW):
                 reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
-        if not np.array_equal(reconstructed, out):
-            return None
     return _build_gather_model(OH, OW, idx)
 def s_input_driven_tile(td):
-    """Each non-zero input pixel controls a block that's a copy of the input."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -744,21 +663,17 @@ def s_input_driven_tile(td):
     sH, sW = OH // IH, OW // IW
     if sH != IH or sW != IW: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(IH):
             for bj in range(IW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if inp[bi, bj] != 0:
-                    if not np.array_equal(block, inp):
-                        return None
                 else:
-                    if not np.all(block == 0):
-                        return None
     return None
 def s_kronecker(td):
-    """output = kron(input, ones(sH,sW)) — nearest-neighbor upscaling."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -767,12 +682,8 @@ def s_kronecker(td):
     sH, sW = OH // IH, OW // IW
     if sH < 2 or sW < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
-        expected = np.kron(inp, np.ones((sH, sW), dtype=np.int64))
-        if not np.array_equal(out, expected):
-            return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
@@ -780,7 +691,6 @@ def s_kronecker(td):
     return _build_gather_model(OH, OW, idx)
 def s_diagonal_tile(td):
-    """Input placed along diagonal: block[i,i] = input, rest = 0."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -789,18 +699,14 @@ def s_diagonal_tile(td):
     rH, rW = OH // IH, OW // IW
     if rH != rW or rH < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(rH):
             for bj in range(rW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if bi == bj:
-                    if not np.array_equal(block, inp):
-                        return None
                 else:
-                    if not np.all(block == 0):
-                        return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     cst = np.full((OH,OW), -1, dtype=np.int64)
     for bi in range(rH):
@@ -813,11 +719,9 @@ def s_diagonal_tile(td):
                     else:
                         idx[oi, oj] = [-1, -1]
                         cst[oi, oj] = 0
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_shift(td):
-    """Detect constant spatial shift of the grid."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -850,13 +754,11 @@ def s_shift(td):
     return None
 def s_gravity(td):
-    """Detect gravity-like compaction in one direction."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH, IW), (OH, OW) = sp
     if (IH, IW) != (OH, OW): return None
     def _gravity(grid, direction):
         r = np.zeros_like(grid); h, w = grid.shape
         if direction in ('down', 'up'):
@@ -870,14 +772,12 @@ def s_gravity(td):
                 if direction == 'right': r[rr, w-len(nz):w] = nz
                 else: r[rr, :len(nz)] = nz
         return r
     for d in ('down', 'up', 'left', 'right'):
         if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
             return None
     return None
 def s_mirror_h(td):
-    """Output = input | flip(input, horizontal), doubling width."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -885,8 +785,7 @@ def s_mirror_h(td):
     if OH != IH or OW != 2 * IW: return None
     if OW > 30: return None
     for inp, out in exs:
-        expected = np.concatenate([inp, np.flip(inp, 1)], 1)
-        if not np.array_equal(expected, out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
@@ -895,7 +794,6 @@ def s_mirror_h(td):
     return _build_gather_model(OH, OW, idx)
 def s_mirror_v(td):
-    """Output = input over flip(input, vertical), doubling height."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -903,8 +801,7 @@ def s_mirror_v(td):
     if OW != IW or OH != 2 * IH: return None
     if OH > 30: return None
     for inp, out in exs:
-        expected = np.concatenate([inp, np.flip(inp, 0)], 0)
-        if not np.array_equal(expected, out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
@@ -913,7 +810,6 @@ def s_mirror_v(td):
     return _build_gather_model(OH, OW, idx)
 def s_quad_mirror(td):
-    """Output = 2x2 block of input with h/v flips."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -921,10 +817,8 @@ def s_quad_mirror(td):
     if OH != 2 * IH or OW != 2 * IW: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
-        expected = np.block([
-            [inp, np.flip(inp, 1)],
-            [np.flip(inp, 0), np.flip(np.flip(inp, 0), 1)]
-        ])
         if not np.array_equal(expected, out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
@@ -935,7 +829,6 @@ def s_quad_mirror(td):
     return _build_gather_model(OH, OW, idx)
 def s_fixed_crop(td):
-    """Output = fixed subregion of input."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -952,7 +845,6 @@ def s_fixed_crop(td):
     return None
 def s_nonuniform_scale(td):
-    """Output = input scaled by different factors in h and w."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -969,6 +861,7 @@ def s_nonuniform_scale(td):
     return None
 def s_constant(td):
     sp = fixed_shapes(td)
     if sp is None: return None
     exs = get_exs(td)
@@ -978,11 +871,16 @@ def s_constant(td):
     for r, row in enumerate(outs[0]):
         for c, v in enumerate(row):
             const[0, int(v), r, c] = 1.0
-    inits = [numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
-             numpy_helper.from_array(const, 'c')]
-    nodes = [helper.make_node('Mul', ['input','z'], ['zd']),
-             helper.make_node('ReduceSum', ['zd'], ['s'], axes=[1,2,3], keepdims=1),
-             helper.make_node('Add', ['s','c'], ['output'])]
     return mk(nodes, inits)
 # ============================================================
@@ -990,18 +888,15 @@ def s_constant(td):
 # ============================================================
 def add_onehot_block(nodes, inits, am_name, oh_name):
-    """Equal + Cast one-hot encoding (replaces OneHot which lacks CUDA kernel)."""
     classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
     inits.append(numpy_helper.from_array(classes, 'classes'))
     nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
     nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
 def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
-    """Shared lstsq conv fitting. Returns (Wconv, B) or None."""
     pad = ks // 2
     feat = 10 * ks * ks + (1 if use_bias else 0)
     if feat > 20000: return None
     patches, targets = [], []
     for inp_g, out_g in exs_raw:
         ih, iw = inp_g.shape
@@ -1013,7 +908,6 @@ def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
             oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
             for c in range(10): oh_enc[c] = (inp_g == c)
             oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
         oh, ow = out_g.shape
         for r in range(oh):
             for c in range(ow):
@@ -1021,18 +915,14 @@ def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
                 if use_bias: p = np.append(p, 1.0)
                 patches.append(p)
                 targets.append(int(out_g[r, c]))
     n_patches = len(patches)
     if feat > 5000 and n_patches > 2000: return None
     P = np.array(patches, dtype=np.float64)
     T = np.array(targets, dtype=np.int64)
     T_oh = np.zeros((len(T), 10), dtype=np.float64)
     for i, t in enumerate(T): T_oh[i, t] = 1.0
     WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
     if not np.array_equal(np.argmax(P @ WT, axis=1), T): return None
     if use_bias:
         Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
         B = WT[-1].astype(np.float32)
@@ -1042,17 +932,14 @@ def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
     return Wconv, B
 def solve_conv_fixed(td, path, time_budget=30.0):
-    """Fixed-shape conv: Slice -> Conv -> ArgMax -> Equal+Cast -> Pad."""
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
     shapes = set(inp.shape for inp, _ in exs)
     if len(shapes) != 1: return None
     IH, IW = shapes.pop()
     fit_exs = get_exs_for_fitting(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
@@ -1062,7 +949,6 @@ def solve_conv_fixed(td, path, time_budget=30.0):
             Wconv, B = result
             pad = ks // 2
             pad_h, pad_w = GH - IH, GW - IW
             inits = [
                 _make_int64_init('sl_st', [0,0,0,0]),
                 _make_int64_init('sl_en', [1,10,IH,IW]),
@@ -1072,7 +958,6 @@ def solve_conv_fixed(td, path, time_budget=30.0):
             if B is not None:
                 inits.append(numpy_helper.from_array(B, 'B'))
                 conv_inputs.append('B')
             nodes = [
                 helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
                 helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
@@ -1080,21 +965,18 @@ def solve_conv_fixed(td, path, time_budget=30.0):
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
             nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
             model = mk(nodes, inits)
             onnx.save(model, path)
             if validate(path, td): return 'conv_fixed', model
     return None
 def solve_conv_variable(td, path, time_budget=30.0):
-    """Variable-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)."""
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
     fit_exs = get_exs_for_fitting_variable(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
@@ -1103,38 +985,35 @@ def solve_conv_variable(td, path, time_budget=30.0):
             if result is None: continue
             Wconv, B = result
             pad = ks // 2
-            inits = [numpy_helper.from_array(Wconv, 'W')]
             conv_inputs = ['input', 'W']
             if B is not None:
                 inits.append(numpy_helper.from_array(B, 'B'))
                 conv_inputs.append('B')
             nodes = [
-                helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
                 helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
             nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
             model = mk(nodes, inits)
             onnx.save(model, path)
             if validate(path, td): return 'conv_var', model
     return None
 def solve_conv_diffshape(td, path, time_budget=30.0):
-    """Diff-shape conv for fixed io shapes where output is smaller."""
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH, IW), (OH, OW) = sp
     if IH == OH and IW == OW: return None
     if OH > IH or OW > IW: return None
     if OH > 30 or OW > 30: return None
     exs = get_exs(td)
     t_start = time.time()
     for dr_off, dc_off in [(0, 0), ((IH-OH)//2, (IW-OW)//2)]:
         for use_bias in [False, True]:
             for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]:
@@ -1142,7 +1021,6 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
                 pad = ks // 2
                 feat = 10 * ks * ks + (1 if use_bias else 0)
                 if feat > 10000: continue
                 patches, targets = [], []
                 valid = True
                 for inp_g, out_g in exs:
@@ -1161,25 +1039,20 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
                         if not valid: break
                     if not valid: break
                 if not valid: continue
                 n_patches = len(patches)
                 if feat > 5000 and n_patches > 2000: continue
                 P = np.array(patches, dtype=np.float64)
                 T = np.array(targets, dtype=np.int64)
                 T_oh = np.zeros((len(T), 10), dtype=np.float64)
                 for i, t in enumerate(T): T_oh[i, t] = 1.0
                 WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
                 if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
                 if use_bias:
                     Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                     B = WT[-1].astype(np.float32)
                 else:
                     Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                     B = None
                 pad_h, pad_w = GH - OH, GW - OW
                 inits = [
                     _make_int64_init('sl_st', [0,0,0,0]),
@@ -1192,7 +1065,6 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
                 if B is not None:
                     inits.append(numpy_helper.from_array(B, 'B'))
                     conv_inputs.append('B')
                 nodes = [
                     helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
                     helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
@@ -1201,25 +1073,21 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
                 nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
                 model = mk(nodes, inits)
                 onnx.save(model, path)
                 if validate(path, td): return 'conv_diff', model
     return None
 def solve_conv_var_diff(td, path, time_budget=30.0):
-    """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask)."""
     exs = get_exs(td)
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
             if time.time() - t_start > time_budget: return None
             pad = ks // 2
             feat = 10 * ks * ks + (1 if use_bias else 0)
             if feat > 20000: continue
             patches, targets = [], []
             for inp_g, out_g in exs:
                 ih, iw = inp_g.shape
@@ -1227,56 +1095,49 @@ def solve_conv_var_diff(td, path, time_budget=30.0):
                 oh_full = np.zeros((10, GH, GW), dtype=np.float64)
                 for c in range(10): oh_full[c, :ih, :iw] = (inp_g == c)
                 oh_pad = np.pad(oh_full, ((0,0),(pad,pad),(pad,pad)))
                 for r in range(oh):
                     for c in range(ow):
                         p = oh_pad[:, r:r+ks, c:c+ks].flatten()
                         if use_bias: p = np.append(p, 1.0)
                         patches.append(p)
                         targets.append(int(out_g[r, c]))
             n_patches = len(patches)
             if feat > 5000 and n_patches > 2000: continue
             P = np.array(patches, dtype=np.float64)
             T = np.array(targets, dtype=np.int64)
             T_oh = np.zeros((len(T), 10), dtype=np.float64)
             for i, t in enumerate(T): T_oh[i, t] = 1.0
             try:
                 WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
             except:
                 continue
             if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
             if use_bias:
                 Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = WT[-1].astype(np.float32)
             else:
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
-            # For tasks where output fits within input bounds, use input mask
             all_output_within_input = all(
                 out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
                 for inp_g, out_g in exs
             )
             if all_output_within_input:
-                inits = [numpy_helper.from_array(Wconv, 'W')]
                 conv_inputs = ['input', 'W']
                 if B is not None:
                     inits.append(numpy_helper.from_array(B, 'B'))
                     conv_inputs.append('B')
                 nodes = [
-                    helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
                     helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                     helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
                 nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
                 model = mk(nodes, inits)
                 onnx.save(model, path)
                 if validate(path, td): return 'conv_var_diff', model
@@ -1310,10 +1171,7 @@ ANALYTICAL_SOLVERS = [
 ]
 def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
-    """Try all solvers on a task. Returns (solver_name, score) or None."""
     path = os.path.join(output_dir, f"task{tn:03d}.onnx")
-    # Try analytical solvers first (instant, arc-gen safe)
     for name, solver in ANALYTICAL_SOLVERS:
         try:
             model = solver(td)
@@ -1331,8 +1189,6 @@ def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
                     return name, score
             else:
                 if verbose: print(f"  {name}: model built but FAILED validation")
-    # Try conv solvers
     conv_solvers = [
         ('conv_fixed', solve_conv_fixed),
         ('conv_variable', solve_conv_variable),
@@ -1354,35 +1210,28 @@ def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
                 score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
                 if verbose: print(f"  {solver_type}: PASS  cost={cost}  score={score:.2f}")
                 return solver_type, score
     return None
 def main():
     parser = argparse.ArgumentParser(description='NeuroGolf Solver v5')
-    parser.add_argument('--data_dir', type=str, default=None, help='Path to ARC-AGI training data')
-    parser.add_argument('--kaggle_dir', type=str, default=None, help='Path to Kaggle task JSONs')
-    parser.add_argument('--arcgen_dir', type=str, default=None, help='Path to ARC-GEN data directory')
-    parser.add_argument('--output_dir', type=str, default='submission', help='Output directory for ONNX models')
-    parser.add_argument('--conv_budget', type=float, default=30.0, help='Time budget per conv solver per task (seconds)')
-    parser.add_argument('--task', type=int, default=None, help='Solve a single task number')
     parser.add_argument('--verbose', action='store_true', default=True)
     parser.add_argument('--quiet', action='store_true', default=False)
     args = parser.parse_args()
     if args.quiet:
         args.verbose = False
     os.makedirs(args.output_dir, exist_ok=True)
-    # Load tasks
     if args.kaggle_dir:
         tasks = load_tasks_kaggle(args.kaggle_dir)
     elif args.data_dir:
         tasks = load_tasks_dir(args.data_dir, args.arcgen_dir)
     else:
-        # Try common paths
-        for p in ['/kaggle/input/competitions/neurogolf-2026/',
-                  'ARC-AGI/data/training/']:
             if os.path.exists(p):
                 if 'kaggle' in p:
                     tasks = load_tasks_kaggle(p)
@@ -1392,15 +1241,11 @@ def main():
         else:
             print("ERROR: No data directory found. Use --data_dir or --kaggle_dir")
             sys.exit(1)
-    # Solve tasks
     results = {}
     total_score = 0.0
     solved = 0
     t_total = time.time()
     task_nums = [args.task] if args.task else sorted(tasks.keys())
     for tn in task_nums:
         if tn in EXCLUDED_TASKS:
             if args.verbose: print(f"Task {tn:3d}: EXCLUDED")
@@ -1408,44 +1253,32 @@ def main():
         if tn not in tasks:
             if args.verbose: print(f"Task {tn:3d}: NOT FOUND")
             continue
         td = tasks[tn]['data']
         hex_id = tasks[tn]['hex']
         if args.verbose: print(f"\nTask {tn:3d} ({hex_id}):")
         result = solve_task(tn, td, args.output_dir, args.conv_budget, args.verbose)
         if result is not None:
             solver_type, score = result
             results[tn] = {'solver': solver_type, 'score': score, 'hex': hex_id}
             total_score += score
             solved += 1
         else:
-            # Unsolved tasks score 1.0 (minimum)
             total_score += 1.0
             if args.verbose: print(f"  UNSOLVED")
-    # Summary
     elapsed = time.time() - t_total
     print(f"\n{'='*60}")
     print(f"RESULTS: {solved}/{len(task_nums)} tasks solved")
     print(f"Total score: {total_score:.1f}")
     print(f"Time: {elapsed:.1f}s")
     print(f"{'='*60}")
-    # Breakdown by solver type
     solver_counts = Counter(r['solver'] for r in results.values())
     solver_scores = {}
     for tn, r in results.items():
         st = r['solver']
         solver_scores[st] = solver_scores.get(st, 0) + r['score']
     print("\nSolver breakdown:")
     for st in sorted(solver_counts.keys()):
         print(f"  {st}: {solver_counts[st]} tasks, total score {solver_scores[st]:.1f}, avg {solver_scores[st]/solver_counts[st]:.2f}")
-    # Generate submission.csv
     csv_path = os.path.join(args.output_dir, 'submission.csv')
     with open(csv_path, 'w', newline='') as f:
         w = csv.writer(f)
@@ -1453,15 +1286,12 @@ def main():
         for tn in sorted(results.keys()):
             r = results[tn]
             w.writerow([tn, r['hex'], r['solver'], f"{r['score']:.3f}", f"task{tn:03d}.onnx"])
-    # Generate submission.zip
     zip_path = os.path.join(args.output_dir, 'submission.zip')
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
         for tn in sorted(results.keys()):
             onnx_path = os.path.join(args.output_dir, f"task{tn:03d}.onnx")
             if os.path.exists(onnx_path):
                 zf.write(onnx_path, f"task{tn:03d}.onnx")
     print(f"\nSubmission files: {csv_path}, {zip_path}")
     print(f"Models in zip: {len(results)}")

   - s_rotate k=2: double Slice(step=-1) — 0 MACs (was ~165K)
   - s_rotate k=1,3: Slice+Transpose for square grids (0 MACs), Gather fallback for non-square
   - All Pad nodes: tensor-based pads input (opset 17 requirement)
+  - All ReduceSum nodes: axes as tensor input (opset 13+ requirement)
   - All other solvers unchanged from v4
 Solvers:
 INT64_MIN = int(np.iinfo(np.int64).min)
 EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
 MAX_ARCGEN_VALIDATE = 30
 MAX_ARCGEN_FIT = 0
 def get_providers():
 # ============================================================
 def load_tasks_dir(data_dir, arcgen_dir=None):
     files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
     tasks = {}
     for i, f in enumerate(files):
     return tasks
 def load_tasks_kaggle(data_dir):
     tasks = {}
     for tn in range(1, 401):
         path = os.path.join(data_dir, f"task{tn:03d}.json")
     return arr
 def validate(path, td):
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
     return True
 def validate_raw(raw_bytes, td):
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
     return True
 # ============================================================
+# STATIC PROFILER
 # ============================================================
 BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
 MAX_FILESIZE = int(1.44 * 1024 * 1024)
 def score_network(path):
     if HAS_ONNX_TOOL:
         try:
             return _score_network_official(path)
     return _static_profile(path)
 def _static_profile(path):
     try:
         model = onnx.load(path)
     except:
         return None, None, None
     tensors = {}
     params = 0
     nbytes = 0
     macs = 0
     for init in model.graph.initializer:
         a = numpy_helper.to_array(init)
         tensors[init.name] = a
         params += a.size
         nbytes += a.nbytes
     for nd in model.graph.node:
         if nd.op_type == 'Constant':
             for attr in nd.attribute:
                         nbytes += a.nbytes
                     except:
                         pass
         if nd.op_type in BANNED_OPS:
             return None, None, None
         if nd.op_type == 'Conv' and len(nd.input) >= 2 and nd.input[1] in tensors:
             w = tensors[nd.input[1]]
             if w.ndim == 4:
                 co, ci, kh, kw = w.shape
                 macs += co * ci * kh * kw * GH * GW
     return int(macs), int(nbytes), int(params)
 # ============================================================
 # ============================================================
 def _make_int64_init(name, values):
     return numpy_helper.from_array(np.array(values, dtype=np.int64), name)
 def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
+    """Pad with tensor-based pads input (opset 11+)."""
     pads_name = f'pads{suffix}'
     cv_name = f'pad_cv{suffix}'
     pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
     return helper.make_node('Pad', [input_name, pads_name, cv_name], [output_name], mode='constant')
 def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
+    """Slice to crop [1,10,30,30] to [1,10,IH,IW]."""
     st_name = f'crop_st{suffix}'
     en_name = f'crop_en{suffix}'
     inits.append(_make_int64_init(st_name, [0, 0, 0, 0]))
     return helper.make_node('Slice', [input_name, st_name, en_name], [output_name])
 def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=''):
+    """Slice(step=-1) to reverse one axis. Zero MACs."""
     st_name = f'rev_st{suffix}'
     en_name = f'rev_en{suffix}'
     ax_name = f'rev_ax{suffix}'
     inits.append(_make_int64_init(sp_name, [-1]))
     return helper.make_node('Slice', [input_name, st_name, en_name, ax_name, sp_name], [output_name])
+def _build_reducesum(input_name, output_name, axes_list, inits, suffix=''):
+    """ReduceSum with axes as tensor input (opset 13+). keepdims=1."""
+    axes_name = f'rs_axes{suffix}'
+    inits.append(_make_int64_init(axes_name, axes_list))
+    return helper.make_node('ReduceSum', [input_name, axes_name], [output_name], keepdims=1)
 def mk(nodes, inits=None):
     x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
     y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
     return helper.make_model(g, ir_version=IR, opset_imports=OPSET)
 def get_exs(td):
     return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
             for ex in td['train'] + td['test']]
 def get_exs_for_fitting(td):
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     if not base_exs:
         return base_exs
     base_shapes = {inp.shape for inp, _ in base_exs}
     if len(base_shapes) != 1:
         return base_exs
     base_shape = list(base_shapes)[0]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
         out = np.array(ex['output'], dtype=np.int64)
         if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
             ag_exs.append((inp, out))
     return base_exs + ag_exs[:10]
 def get_exs_for_fitting_variable(td):
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
         out = np.array(ex['output'], dtype=np.int64)
         if inp.shape == out.shape and inp.shape[0] <= 30 and inp.shape[1] <= 30:
             ag_exs.append((inp, out))
     return base_exs + ag_exs[:20]
 def fixed_shapes(td):
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
+# GATHER HELPERS
 # ============================================================
 def _build_gather_model(OH, OW, idx):
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
             iv, ov = int(iv), int(ov)
             if iv in cm and cm[iv] != ov: return None
             cm[iv] = ov
     is_permutation = (set(cm.keys()) == set(cm.values()))
     if is_permutation:
         gather_ch = np.arange(10, dtype=np.int32)
         for src, dst in cm.items():
                   [numpy_helper.from_array(W, 'W')])
 def s_transpose(td):
     for ex in td['train']+td['test']:
         if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
     return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
 def s_flip(td):
+    """Flip using Slice(step=-1) — zero MACs."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if (IH,IW) != (OH,OW): return None
     for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
             onnx_axis = 2 if axis == 0 else 3
             dim_size = IH if axis == 0 else IW
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(_build_slice_reverse('cropped', 'flipped', onnx_axis, dim_size, inits))
             nodes.append(_build_pad_node('flipped', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
     return None
 def s_rotate(td):
+    """Rotate using Slice+Transpose — zero MACs for square grids and k=2.
+    Gather fallback for non-square k=1,3."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
         if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
             continue
         if k == 2:
             pad_h, pad_w = GH - OH, GW - OW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(_build_slice_reverse('cropped', 'flip_h', 2, IH, inits, suffix='_h'))
             nodes.append(_build_slice_reverse('flip_h', 'rotated', 3, IW, inits, suffix='_w'))
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         elif k == 1 and IH == IW:
+            # rot90 CCW square: Transpose then flip axis 2
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(helper.make_node('Transpose', ['cropped'], ['transposed'], perm=[0,1,3,2]))
             nodes.append(_build_slice_reverse('transposed', 'rotated', 2, IH, inits))
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         elif k == 3 and IH == IW:
+            # rot270 CCW square: flip axis 2 then Transpose
             pad_h, pad_w = GH - IH, GW - IW
             inits = []
             nodes = []
             nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
             nodes.append(_build_slice_reverse('cropped', 'flipped', 2, IH, inits))
             nodes.append(helper.make_node('Transpose', ['flipped'], ['rotated'], perm=[0,1,3,2]))
             nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
             return mk(nodes, inits)
         else:
+            # Non-square k=1 or k=3: Gather fallback
             idx = np.zeros((OH,OW,2), dtype=np.int64)
             for r in range(OH):
                 for c in range(OW):
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_varshape_spatial_gather(td):
     sp = fixed_shapes(td)
     if sp is not None: return None
     exs = get_exs(td)
     exs_30 = []
     for inp, out in exs:
         ih, iw = inp.shape
         inp30[:ih, :iw] = inp
         out30[:oh, :ow] = out
         exs_30.append((inp30, out30))
     idx = np.full((30, 30, 2), -1, dtype=np.int64)
     cst = np.full((30, 30), -1, dtype=np.int64)
     for oi in range(30):
         for oj in range(30):
             vals = set(int(out30[oi, oj]) for _, out30 in exs_30)
                 if found: break
             if not found and cst[oi, oj] < 0:
                 return None
     return _build_gather_model_with_const(30, 30, 30, 30, idx, cst)
 def s_tile(td):
     return None
 def s_concat_enhanced(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if IH == OH and IW == OW: return None
     if OH % IH != 0 or OW % IW != 0: return None
     rH, rW = OH // IH, OW // IW
     if rH * rW > 16 or rH * rW < 2: return None
     if OH > 30 or OW > 30: return None
     transforms = [
+        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
+        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
+        ('rot90', lambda x: np.rot90(x, 1)), ('rot270', lambda x: np.rot90(x, 3)),
+        ('T', lambda x: x.T), ('T_fliplr', lambda x: np.fliplr(x.T)),
     ]
     block_transforms = {}
     for bi in range(rH):
         for bj in range(rW):
                     block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                     expected = tfn(inp)
                     if expected.shape != (IH, IW) or not np.array_equal(block, expected):
+                        ok = False; break
                 if ok:
+                    found = (tidx, tname); break
+            if found is None: return None
             block_transforms[(bi, bj)] = found
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for bi in range(rH):
         for bj in range(rW):
                     elif tname == 'T': sr, sc = lc, lr
                     elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
                     idx[oi, oj] = [sr, sc]
     for inp, out in exs:
         reconstructed = np.zeros_like(out)
         for oi in range(OH):
             for oj in range(OW):
                 reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
+        if not np.array_equal(reconstructed, out): return None
     return _build_gather_model(OH, OW, idx)
 def s_input_driven_tile(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     sH, sW = OH // IH, OW // IW
     if sH != IH or sW != IW: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(IH):
             for bj in range(IW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if inp[bi, bj] != 0:
+                    if not np.array_equal(block, inp): return None
                 else:
+                    if not np.all(block == 0): return None
     return None
 def s_kronecker(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     sH, sW = OH // IH, OW // IW
     if sH < 2 or sW < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
+        if not np.array_equal(out, np.kron(inp, np.ones((sH, sW), dtype=np.int64))): return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
     return _build_gather_model(OH, OW, idx)
 def s_diagonal_tile(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     rH, rW = OH // IH, OW // IW
     if rH != rW or rH < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(rH):
             for bj in range(rW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if bi == bj:
+                    if not np.array_equal(block, inp): return None
                 else:
+                    if not np.all(block == 0): return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     cst = np.full((OH,OW), -1, dtype=np.int64)
     for bi in range(rH):
                     else:
                         idx[oi, oj] = [-1, -1]
                         cst[oi, oj] = 0
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_shift(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return None
 def s_gravity(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH, IW), (OH, OW) = sp
     if (IH, IW) != (OH, OW): return None
     def _gravity(grid, direction):
         r = np.zeros_like(grid); h, w = grid.shape
         if direction in ('down', 'up'):
                 if direction == 'right': r[rr, w-len(nz):w] = nz
                 else: r[rr, :len(nz)] = nz
         return r
     for d in ('down', 'up', 'left', 'right'):
         if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
             return None
     return None
 def s_mirror_h(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     if OH != IH or OW != 2 * IW: return None
     if OW > 30: return None
     for inp, out in exs:
+        if not np.array_equal(np.concatenate([inp, np.flip(inp, 1)], 1), out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
     return _build_gather_model(OH, OW, idx)
 def s_mirror_v(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     if OW != IW or OH != 2 * IH: return None
     if OH > 30: return None
     for inp, out in exs:
+        if not np.array_equal(np.concatenate([inp, np.flip(inp, 0)], 0), out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
     return _build_gather_model(OH, OW, idx)
 def s_quad_mirror(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     if OH != 2 * IH or OW != 2 * IW: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
+        expected = np.block([[inp, np.flip(inp, 1)],
+                             [np.flip(inp, 0), np.flip(np.flip(inp, 0), 1)]])
         if not np.array_equal(expected, out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
     return _build_gather_model(OH, OW, idx)
 def s_fixed_crop(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return None
 def s_nonuniform_scale(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return None
 def s_constant(td):
+    """Constant output. Uses opset 17 ReduceSum with tensor axes input."""
     sp = fixed_shapes(td)
     if sp is None: return None
     exs = get_exs(td)
     for r, row in enumerate(outs[0]):
         for c, v in enumerate(row):
             const[0, int(v), r, c] = 1.0
+    inits = [
+        numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
+        numpy_helper.from_array(const, 'c'),
+        _make_int64_init('rs_axes_cst', [1, 2, 3]),
+    ]
+    nodes = [
+        helper.make_node('Mul', ['input','z'], ['zd']),
+        helper.make_node('ReduceSum', ['zd', 'rs_axes_cst'], ['s'], keepdims=1),
+        helper.make_node('Add', ['s','c'], ['output']),
+    ]
     return mk(nodes, inits)
 # ============================================================
 # ============================================================
 def add_onehot_block(nodes, inits, am_name, oh_name):
     classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
     inits.append(numpy_helper.from_array(classes, 'classes'))
     nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
     nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
 def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
     pad = ks // 2
     feat = 10 * ks * ks + (1 if use_bias else 0)
     if feat > 20000: return None
     patches, targets = [], []
     for inp_g, out_g in exs_raw:
         ih, iw = inp_g.shape
             oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
             for c in range(10): oh_enc[c] = (inp_g == c)
             oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
         oh, ow = out_g.shape
         for r in range(oh):
             for c in range(ow):
                 if use_bias: p = np.append(p, 1.0)
                 patches.append(p)
                 targets.append(int(out_g[r, c]))
     n_patches = len(patches)
     if feat > 5000 and n_patches > 2000: return None
     P = np.array(patches, dtype=np.float64)
     T = np.array(targets, dtype=np.int64)
     T_oh = np.zeros((len(T), 10), dtype=np.float64)
     for i, t in enumerate(T): T_oh[i, t] = 1.0
     WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
     if not np.array_equal(np.argmax(P @ WT, axis=1), T): return None
     if use_bias:
         Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
         B = WT[-1].astype(np.float32)
     return Wconv, B
 def solve_conv_fixed(td, path, time_budget=30.0):
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
     shapes = set(inp.shape for inp, _ in exs)
     if len(shapes) != 1: return None
     IH, IW = shapes.pop()
     fit_exs = get_exs_for_fitting(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
             Wconv, B = result
             pad = ks // 2
             pad_h, pad_w = GH - IH, GW - IW
             inits = [
                 _make_int64_init('sl_st', [0,0,0,0]),
                 _make_int64_init('sl_en', [1,10,IH,IW]),
             if B is not None:
                 inits.append(numpy_helper.from_array(B, 'B'))
                 conv_inputs.append('B')
             nodes = [
                 helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
                 helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
             nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
             model = mk(nodes, inits)
             onnx.save(model, path)
             if validate(path, td): return 'conv_fixed', model
     return None
 def solve_conv_variable(td, path, time_budget=30.0):
+    """Variable-shape conv with opset 17 ReduceSum (axes as tensor input)."""
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
     fit_exs = get_exs_for_fitting_variable(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
             if result is None: continue
             Wconv, B = result
             pad = ks // 2
+            inits = [
+                numpy_helper.from_array(Wconv, 'W'),
+                _make_int64_init('rs_axes_var', [1]),
+            ]
             conv_inputs = ['input', 'W']
             if B is not None:
                 inits.append(numpy_helper.from_array(B, 'B'))
                 conv_inputs.append('B')
             nodes = [
+                helper.make_node('ReduceSum', ['input', 'rs_axes_var'], ['mask'], keepdims=1),
                 helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
             nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
             model = mk(nodes, inits)
             onnx.save(model, path)
             if validate(path, td): return 'conv_var', model
     return None
 def solve_conv_diffshape(td, path, time_budget=30.0):
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH, IW), (OH, OW) = sp
     if IH == OH and IW == OW: return None
     if OH > IH or OW > IW: return None
     if OH > 30 or OW > 30: return None
     exs = get_exs(td)
     t_start = time.time()
     for dr_off, dc_off in [(0, 0), ((IH-OH)//2, (IW-OW)//2)]:
         for use_bias in [False, True]:
             for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]:
                 pad = ks // 2
                 feat = 10 * ks * ks + (1 if use_bias else 0)
                 if feat > 10000: continue
                 patches, targets = [], []
                 valid = True
                 for inp_g, out_g in exs:
                         if not valid: break
                     if not valid: break
                 if not valid: continue
                 n_patches = len(patches)
                 if feat > 5000 and n_patches > 2000: continue
                 P = np.array(patches, dtype=np.float64)
                 T = np.array(targets, dtype=np.int64)
                 T_oh = np.zeros((len(T), 10), dtype=np.float64)
                 for i, t in enumerate(T): T_oh[i, t] = 1.0
                 WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
                 if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
                 if use_bias:
                     Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                     B = WT[-1].astype(np.float32)
                 else:
                     Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                     B = None
                 pad_h, pad_w = GH - OH, GW - OW
                 inits = [
                     _make_int64_init('sl_st', [0,0,0,0]),
                 if B is not None:
                     inits.append(numpy_helper.from_array(B, 'B'))
                     conv_inputs.append('B')
                 nodes = [
                     helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
                     helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
                 nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
                 model = mk(nodes, inits)
                 onnx.save(model, path)
                 if validate(path, td): return 'conv_diff', model
     return None
 def solve_conv_var_diff(td, path, time_budget=30.0):
+    """Variable diff-shape conv with opset 17 ReduceSum."""
     exs = get_exs(td)
     t_start = time.time()
     for use_bias in [False, True]:
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
             if time.time() - t_start > time_budget: return None
             pad = ks // 2
             feat = 10 * ks * ks + (1 if use_bias else 0)
             if feat > 20000: continue
             patches, targets = [], []
             for inp_g, out_g in exs:
                 ih, iw = inp_g.shape
                 oh_full = np.zeros((10, GH, GW), dtype=np.float64)
                 for c in range(10): oh_full[c, :ih, :iw] = (inp_g == c)
                 oh_pad = np.pad(oh_full, ((0,0),(pad,pad),(pad,pad)))
                 for r in range(oh):
                     for c in range(ow):
                         p = oh_pad[:, r:r+ks, c:c+ks].flatten()
                         if use_bias: p = np.append(p, 1.0)
                         patches.append(p)
                         targets.append(int(out_g[r, c]))
             n_patches = len(patches)
             if feat > 5000 and n_patches > 2000: continue
             P = np.array(patches, dtype=np.float64)
             T = np.array(targets, dtype=np.int64)
             T_oh = np.zeros((len(T), 10), dtype=np.float64)
             for i, t in enumerate(T): T_oh[i, t] = 1.0
             try:
                 WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
             except:
                 continue
             if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
             if use_bias:
                 Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = WT[-1].astype(np.float32)
             else:
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
             all_output_within_input = all(
                 out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
                 for inp_g, out_g in exs
             )
             if all_output_within_input:
+                inits = [
+                    numpy_helper.from_array(Wconv, 'W'),
+                    _make_int64_init('rs_axes_vd', [1]),
+                ]
                 conv_inputs = ['input', 'W']
                 if B is not None:
                     inits.append(numpy_helper.from_array(B, 'B'))
                     conv_inputs.append('B')
                 nodes = [
+                    helper.make_node('ReduceSum', ['input', 'rs_axes_vd'], ['mask'], keepdims=1),
                     helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                     helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
                 nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
                 model = mk(nodes, inits)
                 onnx.save(model, path)
                 if validate(path, td): return 'conv_var_diff', model
 ]
 def solve_task(tn, td, output_dir, conv_budget=30.0, verbose=True):
     path = os.path.join(output_dir, f"task{tn:03d}.onnx")
     for name, solver in ANALYTICAL_SOLVERS:
         try:
             model = solver(td)
                     return name, score
             else:
                 if verbose: print(f"  {name}: model built but FAILED validation")
     conv_solvers = [
         ('conv_fixed', solve_conv_fixed),
         ('conv_variable', solve_conv_variable),
                 score = max(1.0, 25.0 - math.log(cost)) if cost > 0 else 25.0
                 if verbose: print(f"  {solver_type}: PASS  cost={cost}  score={score:.2f}")
                 return solver_type, score
     return None
 def main():
     parser = argparse.ArgumentParser(description='NeuroGolf Solver v5')
+    parser.add_argument('--data_dir', type=str, default=None)
+    parser.add_argument('--kaggle_dir', type=str, default=None)
+    parser.add_argument('--arcgen_dir', type=str, default=None)
+    parser.add_argument('--output_dir', type=str, default='submission')
+    parser.add_argument('--conv_budget', type=float, default=30.0)
+    parser.add_argument('--task', type=int, default=None)
     parser.add_argument('--verbose', action='store_true', default=True)
     parser.add_argument('--quiet', action='store_true', default=False)
     args = parser.parse_args()
     if args.quiet:
         args.verbose = False
     os.makedirs(args.output_dir, exist_ok=True)
     if args.kaggle_dir:
         tasks = load_tasks_kaggle(args.kaggle_dir)
     elif args.data_dir:
         tasks = load_tasks_dir(args.data_dir, args.arcgen_dir)
     else:
+        for p in ['/kaggle/input/competitions/neurogolf-2026/', 'ARC-AGI/data/training/']:
             if os.path.exists(p):
                 if 'kaggle' in p:
                     tasks = load_tasks_kaggle(p)
         else:
             print("ERROR: No data directory found. Use --data_dir or --kaggle_dir")
             sys.exit(1)
     results = {}
     total_score = 0.0
     solved = 0
     t_total = time.time()
     task_nums = [args.task] if args.task else sorted(tasks.keys())
     for tn in task_nums:
         if tn in EXCLUDED_TASKS:
             if args.verbose: print(f"Task {tn:3d}: EXCLUDED")
         if tn not in tasks:
             if args.verbose: print(f"Task {tn:3d}: NOT FOUND")
             continue
         td = tasks[tn]['data']
         hex_id = tasks[tn]['hex']
         if args.verbose: print(f"\nTask {tn:3d} ({hex_id}):")
         result = solve_task(tn, td, args.output_dir, args.conv_budget, args.verbose)
         if result is not None:
             solver_type, score = result
             results[tn] = {'solver': solver_type, 'score': score, 'hex': hex_id}
             total_score += score
             solved += 1
         else:
             total_score += 1.0
             if args.verbose: print(f"  UNSOLVED")
     elapsed = time.time() - t_total
     print(f"\n{'='*60}")
     print(f"RESULTS: {solved}/{len(task_nums)} tasks solved")
     print(f"Total score: {total_score:.1f}")
     print(f"Time: {elapsed:.1f}s")
     print(f"{'='*60}")
     solver_counts = Counter(r['solver'] for r in results.values())
     solver_scores = {}
     for tn, r in results.items():
         st = r['solver']
         solver_scores[st] = solver_scores.get(st, 0) + r['score']
     print("\nSolver breakdown:")
     for st in sorted(solver_counts.keys()):
         print(f"  {st}: {solver_counts[st]} tasks, total score {solver_scores[st]:.1f}, avg {solver_scores[st]/solver_counts[st]:.2f}")
     csv_path = os.path.join(args.output_dir, 'submission.csv')
     with open(csv_path, 'w', newline='') as f:
         w = csv.writer(f)
         for tn in sorted(results.keys()):
             r = results[tn]
             w.writerow([tn, r['hex'], r['solver'], f"{r['score']:.3f}", f"task{tn:03d}.onnx"])
     zip_path = os.path.join(args.output_dir, 'submission.zip')
     with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
         for tn in sorted(results.keys()):
             onnx_path = os.path.join(args.output_dir, f"task{tn:03d}.onnx")
             if os.path.exists(onnx_path):
                 zf.write(onnx_path, f"task{tn:03d}.onnx")
     print(f"\nSubmission files: {csv_path}, {zip_path}")
     print(f"Models in zip: {len(results)}")