rogermt
/

neurogolf-solver

Model card Files Files and versions

xet

Community

rogermt commited on 14 days ago

Commit

2427dfd

verified ·

1 Parent(s): 7c05244

Update neurogolf_solver.py

Browse files

Files changed (1) hide show

neurogolf_solver.py +488 -713

neurogolf_solver.py CHANGED Viewed

@@ -1,34 +1,31 @@
 #!/usr/bin/env python3
 """
-ARC-AGI NeuroGolf Championship - Complete Solver v5
-Format: [1,10,30,30] one-hot input/output, opset 17, IR version 10.
-v5 CHANGES:
-  - Switched to opset 17 (Kaggle-compatible) for cheaper analytical solvers
-  - Slice-based analytical solvers: rotation, flip, transpose (near-zero cost)
-  - LOOCV Ridge tuning in _lstsq_conv with condition number check + SVD-based λ auto-tune
-  - stride_tricks speedup for patch extraction
-  - Composition detectors: rotation+color, flip+color, transpose+color
-  - Channel reduction wrapper for tasks with <8 colors
-  - ARC-GEN validation, EXCLUDED tasks skipped, submission.csv generation
 Solvers:
   - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
                 concat, concat_enhanced, spatial_gather, varshape_spatial_gather,
-                diagonal_tile, kronecker, shift, mirror_h, mirror_v, quad_mirror,
-                fixed_crop, nonuniform_scale
-  - Composition: rotate+color_map, flip+color_map, transpose+color_map
   - Conv (fixed shape): Slice -> Conv -> ArgMax -> Equal+Cast -> Pad
   - Conv (variable shape): Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)
   - Conv (diff shape): Slice -> Conv -> Slice(crop) -> ArgMax -> Equal+Cast -> Pad
-  - Channel reduction: Conv1x1(10->N) -> transform -> Conv1x1(N->10)
 Usage:
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --conv_budget 60 --arcgen_dir ARC-GEN-100K/
 """
-import json, os, sys, math, time, argparse, csv, io, zipfile, warnings
 import numpy as np
 import onnx
 from onnx import helper, TensorProto, numpy_helper
@@ -50,17 +47,17 @@ BATCH, CH, GH, GW = 1, 10, 30, 30
 GRID_SHAPE = [BATCH, CH, GH, GW]
 DT = TensorProto.FLOAT
 IR = 10
-# v5: opset 17 for cheaper Slice-based transforms
-OPSET = [helper.make_opsetid("", 17)]
 # Officially excluded tasks (score 0 regardless)
 EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
 # Max ARC-GEN examples to use for validation (to keep runtime reasonable)
 MAX_ARCGEN_VALIDATE = 30
-# Max ARC-GEN examples for conv fitting
-MAX_ARCGEN_FIT = 0
 def get_providers():
     return ['CPUExecutionProvider']
@@ -79,6 +76,7 @@ def load_tasks_dir(data_dir, arcgen_dir=None):
         with open(os.path.join(data_dir, f)) as fh:
             data = json.load(fh)
         hex_id = f.replace('.json','')
         if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
             with open(os.path.join(arcgen_dir, f)) as fh:
                 arcgen_examples = json.load(fh)
@@ -111,7 +109,8 @@ def to_onehot(grid):
     return arr
 def validate(path, td):
-    """Validate model against ALL examples: train + test + arc-gen."""
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
@@ -119,6 +118,7 @@ def validate(path, td):
     except:
         return False
     examples = td['train'] + td['test']
     if 'arc-gen' in td:
         examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
     for ex in examples:
@@ -164,7 +164,8 @@ BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
 MAX_FILESIZE = int(1.44 * 1024 * 1024)
 def score_network(path):
-    """Static profiler matching Kaggle scoring: cost = macs + memory + params."""
     if HAS_ONNX_TOOL:
         try:
             return _score_network_official(path)
@@ -226,19 +227,24 @@ def get_exs(td):
             for ex in td['train'] + td['test']]
 def get_exs_for_fitting(td):
-    """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     if not base_exs:
         return base_exs
     base_shapes = {inp.shape for inp, _ in base_exs}
     if len(base_shapes) != 1:
-        return base_exs
     base_shape = list(base_shapes)[0]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
@@ -246,13 +252,17 @@ def get_exs_for_fitting(td):
         if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
             ag_exs.append((inp, out))
     return base_exs + ag_exs[:10]
 def get_exs_for_fitting_variable(td):
-    """Get examples for variable-shape conv fitting."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
@@ -269,11 +279,12 @@ def fixed_shapes(td):
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
-# GATHER HELPERS (opset 17 compatible)
 # ============================================================
 def _build_gather_model(OH, OW, idx):
-    """Build Gather-based spatial remapping model."""
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
@@ -295,7 +306,7 @@ def _build_gather_model(OH, OW, idx):
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
-    """Build Gather model with constant fill for unmapped positions."""
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
@@ -326,168 +337,6 @@ def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
         nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
     return mk(nodes, inits)
-# ============================================================
-# SLICE-BASED ANALYTICAL SOLVERS (opset 17, ~0 cost)
-# ============================================================
-def _build_pad_nodes(input_name, IH, IW, output_name='output', pad_name='pads'):
-    """Build Pad nodes to pad spatial dims to 30x30 (opset 17 with tensor pads).
-    Returns (pad_inits, pad_node)."""
-    pad_h, pad_w = GH - IH, GW - IW
-    if pad_h > 0 or pad_w > 0:
-        pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
-        pad_inits = [numpy_helper.from_array(pads_arr, pad_name)]
-        pad_node = helper.make_node('Pad', [input_name, pad_name], [output_name], mode='constant')
-        return pad_inits, pad_node
-    else:
-        return [], helper.make_node('Identity', [input_name], [output_name])
-def _build_slice_flip_model(axis, IH, IW):
-    """Build a Slice-based flip model using negative steps (opset 17).
-    Extracts content, applies flip, pads back to 30x30.
-    axis=0: vertical flip (reverse rows), axis=1: horizontal flip (reverse cols).
-    """
-    # Step 1: Extract content region [1,10,30,30] -> [1,10,IH,IW]
-    ex_st = np.array([0,0,0,0], dtype=np.int64)
-    ex_en = np.array([1,10,IH,IW], dtype=np.int64)
-    # Step 2: Flip with negative step Slice
-    if axis == 0:
-        starts = np.array([IH-1], dtype=np.int64)
-        ends = np.array([-IH-1], dtype=np.int64)
-        axes = np.array([2], dtype=np.int64)
-        steps = np.array([-1], dtype=np.int64)
-    else:
-        starts = np.array([IW-1], dtype=np.int64)
-        ends = np.array([-IW-1], dtype=np.int64)
-        axes = np.array([3], dtype=np.int64)
-        steps = np.array([-1], dtype=np.int64)
-    inits = [
-        numpy_helper.from_array(ex_st, 'ex_st'),
-        numpy_helper.from_array(ex_en, 'ex_en'),
-        numpy_helper.from_array(starts, 'sl_st'),
-        numpy_helper.from_array(ends, 'sl_en'),
-        numpy_helper.from_array(axes, 'sl_ax'),
-        numpy_helper.from_array(steps, 'sl_sp'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input','ex_st','ex_en'], ['content']),
-        helper.make_node('Slice', ['content','sl_st','sl_en','sl_ax','sl_sp'], ['flipped']),
-    ]
-    # Step 3: Pad back to 30x30 if needed
-    pad_inits, pad_node = _build_pad_nodes('flipped', IH, IW)
-    inits.extend(pad_inits)
-    nodes.append(pad_node)
-    return mk(nodes, inits)
-def _build_slice_transpose_model(IH, IW):
-    """Build a Transpose-based transpose model (perm=[0,1,3,2]).
-    Extracts content, transposes, pads back to 30x30."""
-    # Step 1: Extract content [1,10,30,30] -> [1,10,IH,IW]
-    ex_st = np.array([0,0,0,0], dtype=np.int64)
-    ex_en = np.array([1,10,IH,IW], dtype=np.int64)
-    inits = [
-        numpy_helper.from_array(ex_st, 'ex_st'),
-        numpy_helper.from_array(ex_en, 'ex_en'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input','ex_st','ex_en'], ['content']),
-        helper.make_node('Transpose', ['content'], ['transposed'], perm=[0,1,3,2]),
-    ]
-    # After transpose, shape is [1,10,IW,IH]. Need to pad to [1,10,30,30].
-    pad_inits, pad_node = _build_pad_nodes('transposed', IW, IH)
-    nodes.append(pad_node)
-    return mk(nodes, inits + pad_inits)
-def _build_slice_rotate_model(k, IH, IW):
-    """Build a rotation model using Transpose + Slice (opset 17).
-    Extracts content, applies rotation, pads back to 30x30.
-    Matches existing s_rotate behavior (np.rot90):
-    k=1: 90° CCW = Transpose then vflip (reverse rows)
-    k=2: 180° = hflip then vflip
-    k=3: 270° CCW = Transpose then hflip (reverse cols)
-    """
-    # Step 1: Extract content [1,10,30,30] -> [1,10,IH,IW]
-    ex_st = np.array([0,0,0,0], dtype=np.int64)
-    ex_en = np.array([1,10,IH,IW], dtype=np.int64)
-    inits = [
-        numpy_helper.from_array(ex_st, 'ex_st'),
-        numpy_helper.from_array(ex_en, 'ex_en'),
-    ]
-    nodes = [helper.make_node('Slice', ['input','ex_st','ex_en'], ['content'])]
-    current = 'content'
-    if k in (1, 3):
-        # Transpose: [1,10,IH,IW] -> [1,10,IW,IH]
-        nodes.append(helper.make_node('Transpose', [current], ['t'], perm=[0,1,3,2]))
-        current = 't'
-        new_IH, new_IW = IW, IH
-    else:
-        new_IH, new_IW = IH, IW
-    # Apply flips with negative step Slice
-    if k == 1:
-        # vflip (reverse rows, axis=2) after transpose
-        starts = np.array([new_IH-1], dtype=np.int64)
-        ends = np.array([-new_IH-1], dtype=np.int64)
-        axes = np.array([2], dtype=np.int64)
-        steps = np.array([-1], dtype=np.int64)
-    elif k == 2:
-        # 180° = hflip then vflip
-        starts_h = np.array([new_IW-1], dtype=np.int64)
-        ends_h = np.array([-new_IW-1], dtype=np.int64)
-        axes_h = np.array([3], dtype=np.int64)
-        steps_h = np.array([-1], dtype=np.int64)
-        inits.extend([
-            numpy_helper.from_array(starts_h, 'st_h'),
-            numpy_helper.from_array(ends_h, 'en_h'),
-            numpy_helper.from_array(axes_h, 'ax_h'),
-            numpy_helper.from_array(steps_h, 'sp_h'),
-        ])
-        nodes.append(helper.make_node('Slice', [current,'st_h','en_h','ax_h','sp_h'], ['fh']))
-        current = 'fh'
-        starts_v = np.array([new_IH-1], dtype=np.int64)
-        ends_v = np.array([-new_IH-1], dtype=np.int64)
-        axes_v = np.array([2], dtype=np.int64)
-        steps_v = np.array([-1], dtype=np.int64)
-        inits.extend([
-            numpy_helper.from_array(starts_v, 'st_v'),
-            numpy_helper.from_array(ends_v, 'en_v'),
-            numpy_helper.from_array(axes_v, 'ax_v'),
-            numpy_helper.from_array(steps_v, 'sp_v'),
-        ])
-        nodes.append(helper.make_node('Slice', [current,'st_v','en_v','ax_v','sp_v'], ['rot']))
-        current = 'rot'
-        pad_inits, pad_node = _build_pad_nodes(current, new_IH, new_IW)
-        nodes.append(pad_node)
-        return mk(nodes, inits + pad_inits)
-    elif k == 3:
-        # hflip (reverse cols, axis=3) after transpose
-        starts = np.array([new_IW-1], dtype=np.int64)
-        ends = np.array([-new_IW-1], dtype=np.int64)
-        axes = np.array([3], dtype=np.int64)
-        steps = np.array([-1], dtype=np.int64)
-    inits.extend([
-        numpy_helper.from_array(starts, 'sl_st'),
-        numpy_helper.from_array(ends, 'sl_en'),
-        numpy_helper.from_array(axes, 'sl_ax'),
-        numpy_helper.from_array(steps, 'sl_sp'),
-    ])
-    nodes.append(helper.make_node('Slice', [current,'sl_st','sl_en','sl_ax','sl_sp'], ['rot']))
-    current = 'rot'
-    # Pad back to 30x30
-    pad_inits, pad_node = _build_pad_nodes(current, new_IH, new_IW)
-    nodes.append(pad_node)
-    return mk(nodes, inits + pad_inits)
 # ============================================================
 # ANALYTICAL SOLVERS
 # ============================================================
@@ -497,8 +346,7 @@ def s_identity(td):
         if ex['input'] != ex['output']: return None
     return mk([helper.make_node('Identity', ['input'], ['output'])])
-def _get_color_map(td):
-    """Extract color map if consistent across all examples, or None."""
     cm = {}
     for ex in td['train']+td['test']:
         inp, out = np.array(ex['input']), np.array(ex['output'])
@@ -507,14 +355,12 @@ def _get_color_map(td):
             iv, ov = int(iv), int(ov)
             if iv in cm and cm[iv] != ov: return None
             cm[iv] = ov
-    return cm
-def _build_color_map_model(cm, is_permutation=None):
-    """Build ONNX model for a color map."""
-    if is_permutation is None:
-        is_permutation = (set(cm.keys()) == set(cm.values()))
     if is_permutation:
         gather_ch = np.arange(10, dtype=np.int32)
         for src, dst in cm.items():
             if 0 <= src < 10 and 0 <= dst < 10:
@@ -523,25 +369,17 @@ def _build_color_map_model(cm, is_permutation=None):
         nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
         return mk(nodes, inits)
     else:
         W = np.zeros((10,10,1,1), dtype=np.float32)
         for ic in range(10):
             W[cm.get(ic,ic), ic, 0, 0] = 1.0
         return mk([helper.make_node('Conv', ['input','W'], ['output'], kernel_shape=[1,1])],
                   [numpy_helper.from_array(W, 'W')])
-def s_color_map(td):
-    cm = _get_color_map(td)
-    if cm is None: return None
-    is_permutation = (set(cm.keys()) == set(cm.values()))
-    return _build_color_map_model(cm, is_permutation)
 def s_transpose(td):
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if not all(np.array_equal(out, inp.T) for inp, out in exs): return None
-    return _build_slice_transpose_model(IH, IW)
 def s_flip(td):
     exs = get_exs(td)
@@ -551,7 +389,15 @@ def s_flip(td):
     if (IH,IW) != (OH,OW): return None
     for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
-            return _build_slice_flip_model(axis, IH, IW)
     return None
 def s_rotate(td):
@@ -560,8 +406,15 @@ def s_rotate(td):
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
-        if all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
-            return _build_slice_rotate_model(k, IH, IW)
     return None
 def s_spatial_gather(td):
@@ -587,9 +440,10 @@ def s_spatial_gather(td):
 def s_varshape_spatial_gather(td):
     """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
     sp = fixed_shapes(td)
-    if sp is not None: return None
     exs = get_exs(td)
     exs_30 = []
     for inp, out in exs:
         ih, iw = inp.shape
@@ -645,14 +499,11 @@ def s_tile(td):
         numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'en'),
         numpy_helper.from_array(np.array([1,1,rH,rW], dtype=np.int64), 'rp'),
     ]
-    pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
-    tile_pads = numpy_helper.from_array(pads_arr, 'tile_pads')
     nodes = [
         helper.make_node('Slice', ['input','st','en'], ['cr']),
         helper.make_node('Tile', ['cr','rp'], ['tl']),
-        helper.make_node('Pad', ['tl', 'tile_pads'], ['output'], mode='constant'),
     ]
-    inits.append(tile_pads)
     return mk(nodes, inits)
 def s_upscale(td):
@@ -732,18 +583,26 @@ def s_concat_enhanced(td):
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if IH == OH and IW == OW: return None
     if OH % IH != 0 or OW % IW != 0: return None
     rH, rW = OH // IH, OW // IW
     if rH * rW > 16 or rH * rW < 2: return None
     if OH > 30 or OW > 30: return None
     transforms = [
-        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
-        ('rot90', lambda x: np.rot90(x, 1)), ('rot270', lambda x: np.rot90(x, 3)),
-        ('T', lambda x: x.T), ('T_fliplr', lambda x: np.fliplr(x.T)),
     ]
     block_transforms = {}
     for bi in range(rH):
         for bj in range(rW):
@@ -754,13 +613,16 @@ def s_concat_enhanced(td):
                     block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                     expected = tfn(inp)
                     if expected.shape != (IH, IW) or not np.array_equal(block, expected):
-                        ok = False; break
                 if ok:
                     found = (tidx, tname)
                     break
-            if found is None: return None
             block_transforms[(bi, bj)] = found
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for bi in range(rH):
         for bj in range(rW):
@@ -778,16 +640,51 @@ def s_concat_enhanced(td):
                     elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
                     idx[oi, oj] = [sr, sc]
     for inp, out in exs:
         reconstructed = np.zeros_like(out)
         for oi in range(OH):
             for oj in range(OW):
                 reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
-        if not np.array_equal(reconstructed, out): return None
     return _build_gather_model(OH, OW, idx)
 def s_kronecker(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -796,9 +693,13 @@ def s_kronecker(td):
     sH, sW = OH // IH, OW // IW
     if sH < 2 or sW < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         expected = np.kron(inp, np.ones((sH, sW), dtype=np.int64))
-        if not np.array_equal(out, expected): return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
@@ -806,6 +707,7 @@ def s_kronecker(td):
     return _build_gather_model(OH, OW, idx)
 def s_diagonal_tile(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -814,14 +716,19 @@ def s_diagonal_tile(td):
     rH, rW = OH // IH, OW // IW
     if rH != rW or rH < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(rH):
             for bj in range(rW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if bi == bj:
-                    if not np.array_equal(block, inp): return None
                 else:
-                    if not np.all(block == 0): return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     cst = np.full((OH,OW), -1, dtype=np.int64)
     for bi in range(rH):
@@ -829,11 +736,16 @@ def s_diagonal_tile(td):
             for lr in range(IH):
                 for lc in range(IW):
                     oi, oj = bi*IH + lr, bj*IW + lc
-                    if bi == bj: idx[oi, oj] = [lr, lc]
-                    else: idx[oi, oj] = [-1, -1]; cst[oi, oj] = 0
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_shift(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -853,17 +765,52 @@ def s_shift(td):
                 if not np.array_equal(shifted, out):
                     ok = False; break
             if not ok: continue
             idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            cst = np.full((OH, OW), 0, dtype=np.int64)
             for r in range(OH):
                 for c in range(OW):
                     sr, sc = r - dr, c - dc
-                    if 0 <= sr < IH and 0 <= sc < IW: idx[r, c] = [sr, sc]
-                    else: idx[r, c] = [-1, -1]
             return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
     return None
 def s_mirror_h(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -873,6 +820,7 @@ def s_mirror_h(td):
     for inp, out in exs:
         expected = np.concatenate([inp, np.flip(inp, 1)], 1)
         if not np.array_equal(expected, out): return None
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
@@ -881,6 +829,7 @@ def s_mirror_h(td):
     return _build_gather_model(OH, OW, idx)
 def s_mirror_v(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -898,6 +847,7 @@ def s_mirror_v(td):
     return _build_gather_model(OH, OW, idx)
 def s_quad_mirror(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -919,6 +869,7 @@ def s_quad_mirror(td):
     return _build_gather_model(OH, OW, idx)
 def s_fixed_crop(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -935,6 +886,7 @@ def s_fixed_crop(td):
     return None
 def s_nonuniform_scale(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -967,289 +919,8 @@ def s_constant(td):
              helper.make_node('Add', ['s','c'], ['output'])]
     return mk(nodes, inits)
-def _attr_to_dict(attr_proto):
-    """Convert ONNX AttributeProto to Python native type."""
-    from onnx import AttributeProto
-    if attr_proto.type == AttributeProto.INT:
-        return attr_proto.i
-    elif attr_proto.type == AttributeProto.INTS:
-        return list(attr_proto.ints)
-    elif attr_proto.type == AttributeProto.FLOAT:
-        return attr_proto.f
-    elif attr_proto.type == AttributeProto.FLOATS:
-        return list(attr_proto.floats)
-    elif attr_proto.type == AttributeProto.STRING:
-        return attr_proto.s.decode('utf-8')
-    elif attr_proto.type == AttributeProto.STRINGS:
-        return [s.decode('utf-8') for s in attr_proto.strings]
-    elif attr_proto.type == AttributeProto.TENSOR:
-        return numpy_helper.to_array(attr_proto.t)
-    else:
-        return None
 # ============================================================
-# COMPOSITION DETECTORS (transform + color_map)
-# ============================================================
-def _apply_transform(inp, transform_name):
-    """Apply a named transform to a numpy array."""
-    if transform_name == 'id': return inp
-    elif transform_name == 'fliplr': return np.fliplr(inp)
-    elif transform_name == 'flipud': return np.flipud(inp)
-    elif transform_name == 'rot90': return np.rot90(inp, 1)
-    elif transform_name == 'rot180': return np.rot90(inp, 2)
-    elif transform_name == 'rot270': return np.rot90(inp, 3)
-    elif transform_name == 'T': return inp.T
-    else: return inp
-def s_composition_rotate_color(td):
-    """Detect rotation + color_map composition."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if (IH,IW) != (OH,OW): return None
-    for k in [1, 2, 3]:
-        # Try each rotation, then check if consistent color_map remains
-        cm = {}
-        valid = True
-        for inp, out in exs:
-            rotated = np.rot90(inp, k)
-            if rotated.shape != out.shape: valid = False; break
-            for iv, ov in zip(rotated.flat, out.flat):
-                iv, ov = int(iv), int(ov)
-                if iv in cm and cm[iv] != ov: valid = False; break
-                cm[iv] = ov
-            if not valid: break
-        if not valid: continue
-        # Build: rotate first (Slice-based), then color_map
-        rot_model = _build_slice_rotate_model(k, IH, IW)
-        # Extract nodes from rot_model, prepend to color_map
-        cm_model = _build_color_map_model(cm)
-        # Combine: input -> rot_nodes -> color_map -> output
-        # We need to chain the graphs
-        combined_nodes = []
-        combined_inits = []
-        # Add rotation nodes with renamed intermediates
-        for node in rot_model.graph.node:
-            if node.output[0] == 'output':
-                # Last node of rotation feeds into color map
-                new_node = helper.make_node(node.op_type, list(node.input), ['rot_out'],
-                                           **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-            else:
-                new_node = node
-            combined_nodes.append(new_node)
-        for init in rot_model.graph.initializer:
-            combined_inits.append(init)
-        # Add color map nodes with input = rot_out
-        for node in cm_model.graph.node:
-            if node.input[0] == 'input':
-                new_node = helper.make_node(node.op_type, ['rot_out'] + list(node.input[1:]), list(node.output),
-                                           **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-            else:
-                new_node = node
-            combined_nodes.append(new_node)
-        for init in cm_model.graph.initializer:
-            combined_inits.append(init)
-        return mk(combined_nodes, combined_inits)
-    return None
-def s_composition_flip_color(td):
-    """Detect flip + color_map composition."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if (IH,IW) != (OH,OW): return None
-    for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
-        cm = {}
-        valid = True
-        for inp, out in exs:
-            flipped = flip_fn(inp)
-            if flipped.shape != out.shape: valid = False; break
-            for iv, ov in zip(flipped.flat, out.flat):
-                iv, ov = int(iv), int(ov)
-                if iv in cm and cm[iv] != ov: valid = False; break
-                cm[iv] = ov
-            if not valid: break
-        if not valid: continue
-        flip_model = _build_slice_flip_model(axis, IH, IW)
-        cm_model = _build_color_map_model(cm)
-        combined_nodes = []
-        combined_inits = []
-        for node in flip_model.graph.node:
-            if node.output[0] == 'output':
-                new_node = helper.make_node(node.op_type, list(node.input), ['flip_out'],
-                                           **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-            else:
-                new_node = node
-            combined_nodes.append(new_node)
-        for init in flip_model.graph.initializer:
-            combined_inits.append(init)
-        for node in cm_model.graph.node:
-            if node.input[0] == 'input':
-                new_node = helper.make_node(node.op_type, ['flip_out'] + list(node.input[1:]), list(node.output),
-                                           **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-            else:
-                new_node = node
-            combined_nodes.append(new_node)
-        for init in cm_model.graph.initializer:
-            combined_inits.append(init)
-        return mk(combined_nodes, combined_inits)
-    return None
-def s_composition_transpose_color(td):
-    """Detect transpose + color_map composition."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    cm = {}
-    valid = True
-    for inp, out in exs:
-        transposed = inp.T
-        if transposed.shape != out.shape: valid = False; break
-        for iv, ov in zip(transposed.flat, out.flat):
-            iv, ov = int(iv), int(ov)
-            if iv in cm and cm[iv] != ov: valid = False; break
-            cm[iv] = ov
-        if not valid: break
-    if not valid: return None
-    trans_model = _build_slice_transpose_model(IH, IW)
-    cm_model = _build_color_map_model(cm)
-    combined_nodes = []
-    combined_inits = []
-    for node in trans_model.graph.node:
-        if node.output[0] == 'output':
-            new_node = helper.make_node(node.op_type, list(node.input), ['trans_out'],
-                                       **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-        else:
-            new_node = node
-        combined_nodes.append(new_node)
-    for init in trans_model.graph.initializer:
-        combined_inits.append(init)
-    for node in cm_model.graph.node:
-        if node.input[0] == 'input':
-            new_node = helper.make_node(node.op_type, ['trans_out'] + list(node.input[1:]), list(node.output),
-                                       **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-        else:
-            new_node = node
-        combined_nodes.append(new_node)
-    for init in cm_model.graph.initializer:
-        combined_inits.append(init)
-    return mk(combined_nodes, combined_inits)
-# ============================================================
-# CHANNEL REDUCTION WRAPPER
-# ============================================================
-def _get_active_colors(td):
-    """Returns set of all colors appearing in inputs and outputs."""
-    colors = set()
-    for ex in td['train'] + td['test']:
-        for row in ex['input']:
-            colors.update(row)
-        for row in ex['output']:
-            colors.update(row)
-    return colors
-def _build_channel_reduced_model(inner_model, input_colors, output_colors):
-    """Wrap a model with channel reduction: Conv1x1(10->N) -> inner -> Conv1x1(N->10).
-    This saves MACs when N < 10."""
-    n_in = len(input_colors)
-    n_out = len(output_colors)
-    # Maps from full 10 channels to reduced set
-    in_map = sorted(input_colors)
-    out_map = sorted(output_colors)
-    # W_reduce: [n_in, 10, 1, 1] - maps 10 channels to n_in
-    W_reduce = np.zeros((n_in, 10, 1, 1), dtype=np.float32)
-    for i, c in enumerate(in_map):
-        W_reduce[i, c, 0, 0] = 1.0
-    # W_expand: [10, n_out, 1, 1] - maps n_out channels back to 10
-    W_expand = np.zeros((10, n_out, 1, 1), dtype=np.float32)
-    for i, c in enumerate(out_map):
-        W_expand[c, i, 0, 0] = 1.0
-    # Build the wrapped model
-    nodes = [
-        helper.make_node('Conv', ['input', 'W_reduce'], ['reduced'], kernel_shape=[1,1]),
-    ]
-    inits = [numpy_helper.from_array(W_reduce, 'W_reduce')]
-    # Add inner model nodes with input='reduced' and output renamed
-    for node in inner_model.graph.node:
-        if node.input[0] == 'input':
-            new_inputs = ['reduced'] + list(node.input[1:])
-        else:
-            new_inputs = list(node.input)
-        if node.output[0] == 'output':
-            new_outputs = ['inner_out']
-        else:
-            new_outputs = list(node.output)
-        new_node = helper.make_node(node.op_type, new_inputs, new_outputs,
-                                    **{attr.name: _attr_to_dict(attr) for attr in node.attribute})
-        nodes.append(new_node)
-    for init in inner_model.graph.initializer:
-        if init.name != 'W_reduce':  # avoid conflict
-            inits.append(init)
-    nodes.append(helper.make_node('Conv', ['inner_out', 'W_expand'], ['output'], kernel_shape=[1,1]))
-    inits.append(numpy_helper.from_array(W_expand, 'W_expand'))
-    return mk(nodes, inits)
-def _try_channel_reduction(solver_fn, td):
-    """Try a solver with channel reduction wrapper if it reduces cost.
-    NOTE: Currently disabled for Gather-based models (spatial_gather, etc.)
-    as they hardcode channel=10 in Reshape operations."""
-    model = solver_fn(td)
-    if model is None: return None
-    # DISABLED: Channel reduction breaks Gather-based models
-    # that reshape to [1,10,900]. Only applies to Conv-based models.
-    # colors = _get_active_colors(td)
-    # if len(colors) >= 8:
-    #     return model
-    # try:
-    #     wrapped = _build_channel_reduced_model(model, colors, colors)
-    #     return wrapped
-    # except Exception:
-    #     return model
-    return model
-# ============================================================
-# CONV SOLVERS WITH LOOCV RIDGE + STRIDE TRICKS
 # ============================================================
 def add_onehot_block(nodes, inits, am_name, oh_name):
@@ -1259,79 +930,13 @@ def add_onehot_block(nodes, inits, am_name, oh_name):
     nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
     nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
-def _extract_patches_strided(oh_pad, ks, out_shape):
-    """Extract patches using stride_tricks for speedup.
-    oh_pad: [C, H+2p, W+2p] padded one-hot array
-    ks: kernel size
-    out_shape: (OH, OW) output shape
-    Returns: patches array [OH*OW, C*ks*ks]
-    """
-    C, Hp, Wp = oh_pad.shape
-    OH, OW = out_shape
-    # Use as_strided to create sliding window view over padded array
-    stride_c = oh_pad.strides[0]
-    stride_h = oh_pad.strides[1]
-    stride_w = oh_pad.strides[2]
-    # Ensure base covers all needed elements: up to (OH-1+ks, OW-1+ks)
-    needed_h = min(OH - 1 + ks, Hp)
-    needed_w = min(OW - 1 + ks, Wp)
-    base = oh_pad[:, :needed_h, :needed_w]
-    # Shape: [OH, OW, C, ks, ks]
-    shape = (OH, OW, C, ks, ks)
-    strides = (stride_h, stride_w, stride_c, stride_h, stride_w)
-    patches_view = np.lib.stride_tricks.as_strided(base, shape=shape, strides=strides)
-    # Reshape to [OH*OW, C*ks*ks]
-    return patches_view.reshape(OH * OW, C * ks * ks)
-def _effective_rank(P):
-    """Compute effective rank r(Σ) = Tr(Σ) / ‖Σ‖."""
-    Sigma = np.cov(P, rowvar=False)
-    evals = np.linalg.eigvalsh(Sigma)
-    evals = evals[evals > 1e-12]
-    if len(evals) == 0: return 0
-    return np.sum(evals) / np.max(evals)
-def _tune_ridge_loocv(P, T_oh, lambdas):
-    """Find best λ using efficient LOOCV via Hat Matrix diagonal (SVD shortcut).
-    Cawley & Talbot (2010), JMLR.
-    """
-    n, p = P.shape
-    try:
-        U, s, Vt = np.linalg.svd(P, full_matrices=False)
-    except Exception:
-        return None
-    best_lambda, min_err = None, float('inf')
-    for lam in lambdas:
-        d = (s**2) / (s**2 + lam)
-        y_hat = (U * d) @ (U.T @ T_oh)
-        # Ridge hat matrix diagonal: h_ii = Σ_j U_ij^2 * s_j^2 / (s_j^2 + λ)
-        h_ii = np.sum((U**2) * d[np.newaxis, :], axis=1)
-        # LOOCV shortcut: error_i = (y_i - ŷ_i) / (1 - h_ii)
-        denom = 1 - h_ii
-        denom = np.where(np.abs(denom) < 1e-10, 1e-10, denom)
-        errors = (T_oh - y_hat) / denom[:, np.newaxis]
-        mse = np.mean(errors**2)
-        if mse < min_err:
-            min_err, best_lambda = mse, lam
-    return best_lambda
-def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False, use_ridge=True):
-    """Shared lstsq conv fitting with optional LOOCV Ridge tuning.
-    Returns (Wconv, B) or None."""
     pad = ks // 2
     feat = 10 * ks * ks + (1 if use_bias else 0)
     if feat > 20000: return None
-    patches_list, targets = [], []
     for inp_g, out_g in exs_raw:
         ih, iw = inp_g.shape
         if use_full_30:
@@ -1344,65 +949,22 @@ def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False, use_ridge=True):
             oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
         oh, ow = out_g.shape
-        # Try stride_tricks for speedup
-        try:
-            patches = _extract_patches_strided(oh_pad, ks, (oh, ow))
-            if use_bias:
-                bias_col = np.ones((patches.shape[0], 1), dtype=np.float64)
-                patches = np.concatenate([patches, bias_col], axis=1)
-            patches_list.append(patches)
-            targets.append(out_g.flatten())
-        except Exception:
-            # Fallback to loop-based extraction
-            for r in range(oh):
-                for c in range(ow):
-                    p = oh_pad[:, r:r+ks, c:c+ks].flatten()
-                    if use_bias: p = np.append(p, 1.0)
-                    patches_list.append(p)
-                    targets.append(int(out_g[r, c]))
-    if len(patches_list) > 0 and isinstance(patches_list[0], np.ndarray) and patches_list[0].ndim == 2:
-        P = np.concatenate(patches_list, axis=0)
-        T = np.concatenate(targets)
-    else:
-        P = np.array(patches_list, dtype=np.float64)
-        T = np.array(targets, dtype=np.int64)
-    n_patches = P.shape[0]
     if feat > 5000 and n_patches > 2000: return None
     T_oh = np.zeros((len(T), 10), dtype=np.float64)
     for i, t in enumerate(T): T_oh[i, t] = 1.0
-    # Quick condition number estimate using norm ratio (cheaper than full SVD)
-    # Only skip if clearly pathological; otherwise try lstsq
-    cond_estimate = None
-    try:
-        # Use 2-norm estimate: cond ≈ ||P||_2 * ||P^+||_2 ≈ max_singular / min_singular
-        # We approximate with norm ratios for speed
-        p_norm = np.linalg.norm(P, 2)
-        if p_norm > 0:
-            # Estimate using power method approximation or just try lstsq
-            pass  # Don't waste time on condition number - lstsq will handle it
-    except Exception:
-        pass
-    if use_ridge and n_patches <= feat * 1.5:
-        # Use LOOCV Ridge tuning when system is underdetermined or near interpolation threshold
-        lambdas = np.logspace(-4, 2, 10)
-        best_lam = _tune_ridge_loocv(P, T_oh, lambdas)
-        if best_lam is not None:
-            # Ridge solve: (P^T P + λI)^-1 P^T T
-            try:
-                WT = np.linalg.solve(P.T @ P + best_lam * np.eye(P.shape[1]), P.T @ T_oh)
-            except Exception:
-                WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-        else:
-            WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-    else:
-        WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
     if not np.array_equal(np.argmax(P @ WT, axis=1), T): return None
     if use_bias:
@@ -1413,21 +975,6 @@ def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False, use_ridge=True):
         B = None
     return Wconv, B
-# ============================================================
-# CONV SOLVER WRAPPERS
-# ============================================================
-def _get_ks_for_budget(time_budget):
-    """Return kernel sizes to try based on time budget."""
-    if time_budget < 5:
-        return [1, 3, 5]
-    elif time_budget < 10:
-        return [1, 3, 5, 7, 9]
-    elif time_budget < 20:
-        return [1, 3, 5, 7, 9, 11, 13, 15, 17]
-    else:
-        return [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]
 def solve_conv_fixed(td, path, time_budget=30.0):
     """Fixed-shape conv: Slice -> Conv -> ArgMax -> Equal+Cast -> Pad."""
     exs = get_exs(td)
@@ -1437,12 +984,14 @@ def solve_conv_fixed(td, path, time_budget=30.0):
     if len(shapes) != 1: return None
     IH, IW = shapes.pop()
     fit_exs = get_exs_for_fitting(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
     t_start = time.time()
     for use_bias in [False, True]:
-        for ks in _get_ks_for_budget(time_budget):
             if time.time() - t_start > time_budget: return None
             result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=False)
             if result is None: continue
@@ -1466,10 +1015,9 @@ def solve_conv_fixed(td, path, time_budget=30.0):
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
-            cf_pads = numpy_helper.from_array(np.array([0,0,0,0,0,0,pad_h,pad_w], dtype=np.int64), 'cf_pads')
-            inits.append(cf_pads)
             nodes.append(
-                helper.make_node('Pad', ['oh_out', 'cf_pads'], ['output'], mode='constant')
             )
             model = mk(nodes, inits)
@@ -1483,6 +1031,7 @@ def solve_conv_variable(td, path, time_budget=30.0):
     for inp, out in exs:
         if inp.shape != out.shape: return None
     fit_exs = get_exs_for_fitting_variable(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
@@ -1591,10 +1140,9 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
                     helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
-                diff_pads = numpy_helper.from_array(np.array([0,0,0,0,0,0,pad_h,pad_w], dtype=np.int64), 'diff_pads')
-                inits.append(diff_pads)
                 nodes.append(
-                    helper.make_node('Pad', ['oh_out', 'diff_pads'], ['output'], mode='constant')
                 )
                 model = mk(nodes, inits)
@@ -1603,7 +1151,8 @@ def solve_conv_diffshape(td, path, time_budget=30.0):
     return None
 def solve_conv_var_diff(td, path, time_budget=30.0):
-    """Variable diff-shape conv."""
     exs = get_exs(td)
     t_start = time.time()
@@ -1651,13 +1200,47 @@ def solve_conv_var_diff(td, path, time_budget=30.0):
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
             all_output_within_input = all(
                 out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
                 for inp_g, out_g in exs
             )
             if not all_output_within_input:
-                continue
             inits = [numpy_helper.from_array(Wconv, 'W')]
             conv_inputs = ['input', 'W']
@@ -1679,25 +1262,230 @@ def solve_conv_var_diff(td, path, time_budget=30.0):
     return None
 # ============================================================
-# MAIN SOLVER PIPELINE
 # ============================================================
 ANALYTICAL_SOLVERS = [
-    ('identity', s_identity),
-    ('constant', s_constant),
-    ('color_map', s_color_map),
-    ('transpose', s_transpose),
-    ('flip', s_flip),
-    ('rotate', s_rotate),
-    ('tile', s_tile),
-    ('upscale', s_upscale),
-    ('kronecker', s_kronecker),
     ('nonuniform_scale', s_nonuniform_scale),
-    ('mirror_h', s_mirror_h),
-    ('mirror_v', s_mirror_v),
-    ('quad_mirror', s_quad_mirror),
-    ('concat', s_concat),
-    ('concat_enhanced', s_concat_enhanced),
     ('diagonal_tile', s_diagonal_tile),
     ('fixed_crop', s_fixed_crop),
     ('spatial_gather', s_spatial_gather),
@@ -1705,46 +1493,26 @@ ANALYTICAL_SOLVERS = [
     ('varshape_spatial_gather', s_varshape_spatial_gather),
 ]
-COMPOSITION_SOLVERS = [
-    ('rotate_color', s_composition_rotate_color),
-    ('flip_color', s_composition_flip_color),
-    ('transpose_color', s_composition_transpose_color),
-]
-def solve_task(tn, td, outdir, conv_budget=30.0, use_channel_reduction=True):
     t_start = time.time()
     os.makedirs(outdir, exist_ok=True)
     path = os.path.join(outdir, f"task{tn:03d}.onnx")
     if tn in EXCLUDED_TASKS:
         return False, 'excluded', None, time.time() - t_start, path
     # 1. Try analytical solvers (fast, tiny models)
     for sname, sfn in ANALYTICAL_SOLVERS:
-        try:
-            if use_channel_reduction and sname in ('transpose', 'flip', 'rotate', 'mirror_h', 'mirror_v', 'quad_mirror', 'shift', 'spatial_gather', 'varshape_spatial_gather'):
-                model = _try_channel_reduction(sfn, td)
-            else:
-                model = sfn(td)
-            if model is None: continue
-            onnx.save(model, path)
-            if validate(path, td):
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-        except Exception as e:
-            pass
-    # 2. Try composition solvers
-    for sname, sfn in COMPOSITION_SOLVERS:
         try:
             model = sfn(td)
             if model is None: continue
             onnx.save(model, path)
-            if validate(path, td):
                 return True, sname, os.path.getsize(path), time.time() - t_start, path
-        except Exception:
-            pass
-    # 3. Determine task shape category and try conv solvers
     exs = get_exs(td)
     same_shape = all(inp.shape == out.shape for inp, out in exs)
     shapes = set(inp.shape for inp, _ in exs)
@@ -1762,6 +1530,12 @@ def solve_task(tn, td, outdir, conv_budget=30.0, use_channel_reduction=True):
         if result is not None:
             sname, model = result
             return True, sname, os.path.getsize(path), time.time() - t_start, path
     else:
         sp = fixed_shapes(td)
         if sp is not None:
@@ -1772,6 +1546,7 @@ def solve_task(tn, td, outdir, conv_budget=30.0, use_channel_reduction=True):
                     sname, model = result
                     return True, sname, os.path.getsize(path), time.time() - t_start, path
         result = solve_conv_var_diff(td, path, time_budget=conv_time)
         if result is not None:
             sname, model = result
@@ -1779,7 +1554,7 @@ def solve_task(tn, td, outdir, conv_budget=30.0, use_channel_reduction=True):
     return False, None, None, time.time() - t_start, path
-def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb, use_channel_reduction=True):
     results = {}
     costs_dict = {}
     total_score = 0
@@ -1791,7 +1566,7 @@ def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb, use_channel_
             continue
         td = tasks[tn]['data']
-        ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget, use_channel_reduction)
         if ok:
             macs, memory, params = score_network(model_path)
@@ -1831,7 +1606,6 @@ def main():
     parser.add_argument('--tasks', type=str, default='')
     parser.add_argument('--device', type=str, default='auto', choices=['auto','cpu','cuda'])
     parser.add_argument('--use_wandb', action='store_true')
-    parser.add_argument('--no_channel_reduction', action='store_true', help='Disable channel reduction wrapper')
     args = parser.parse_args()
     global ORT_PROVIDERS
     config = {
@@ -1849,7 +1623,6 @@ def main():
     ort.set_default_logger_severity(3)
     print(f"Using providers: {ORT_PROVIDERS}")
-    print(f"OPSET: 17 (v5)")
     if args.kaggle:
         tasks = load_tasks_kaggle(args.data_dir)
@@ -1857,6 +1630,7 @@ def main():
         arcgen = args.arcgen_dir if args.arcgen_dir else None
         tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
     total_arcgen = sum(len(t['data'].get('arc-gen', [])) for t in tasks.values())
     print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
     print(f"Excluded tasks: {sorted(EXCLUDED_TASKS)}")
@@ -1865,17 +1639,14 @@ def main():
     active_tasks = [t for t in task_nums if t not in EXCLUDED_TASKS]
     print(f"Solving {len(active_tasks)} active tasks (skipping {len(task_nums) - len(active_tasks)} excluded)")
     print(f"Conv budget: {args.conv_budget}s per task")
-    print(f"Channel reduction: {'enabled' if not args.no_channel_reduction else 'disabled'}")
     print("=" * 70)
     t0 = time.time()
-    use_ch_red = not args.no_channel_reduction
     if args.use_wandb and wandb is not None:
         with wandb.init(project="neurogolf", name="solver_run", config=config):
-            results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=True, use_channel_reduction=use_ch_red)
     else:
-        results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=False, use_channel_reduction=use_ch_red)
     elapsed = time.time() - t0
     print(f"\n{'='*70}")
@@ -1884,11 +1655,13 @@ def main():
     sc = Counter(solver_names)
     for s, c in sc.most_common(): print(f"  {s}: {c}")
     outdir = args.output_dir
     n_files = len([f for f in os.listdir(outdir) if f.endswith('.onnx')])
     total_size = sum(os.path.getsize(os.path.join(outdir, f))
                      for f in os.listdir(outdir) if f.endswith('.onnx'))
     zip_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.zip')
     buf = io.BytesIO()
     with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -1900,6 +1673,7 @@ def main():
         f.write(zip_bytes)
     zip_size = len(zip_bytes)
     csv_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.csv')
     with open(csv_path, 'w', newline='') as f:
         w = csv.writer(f)
@@ -1907,6 +1681,7 @@ def main():
         for tn in sorted(costs_dict.keys()):
             w.writerow([f'task{tn:03d}', costs_dict[tn]])
     unsolved_count = len(active_tasks) - len(results)
     est_lb = total_score + unsolved_count * 1.0
@@ -1916,4 +1691,4 @@ def main():
     print(f"Written: {zip_path} | {csv_path}")
 if __name__ == '__main__':
-    main()

 #!/usr/bin/env python3
 """
+ARC-AGI NeuroGolf Championship - Complete Solver v4
+Format: [1,10,30,30] one-hot input/output, opset 10, IR version 10.
+v4 CRITICAL FIXES:
+  - ARC-GEN data loaded and used for conv fitting (more data = better lstsq)
+  - ARC-GEN validation: models validated against train+test+arc-gen
+  - EXCLUDED tasks: {21, 55, 80, 184, 202, 366} skipped
+  - submission.csv generation for Kaggle
+  - s_flip fixed: GatherElements -> Gather (opset 10 compat)
+  - Static profiler: no onnx_tool dependency for cost estimation
+  - get_exs_for_fitting(): uses train+test+arc-gen for conv fitting
 Solvers:
   - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
                 concat, concat_enhanced, spatial_gather, varshape_spatial_gather,
+                diagonal_tile, kronecker
   - Conv (fixed shape): Slice -> Conv -> ArgMax -> Equal+Cast -> Pad
   - Conv (variable shape): Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)
   - Conv (diff shape): Slice -> Conv -> Slice(crop) -> ArgMax -> Equal+Cast -> Pad
 Usage:
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --conv_budget 60 --arcgen_dir ARC-GEN-100K/
 """
+import json, os, sys, math, time, argparse, csv, io, zipfile
 import numpy as np
 import onnx
 from onnx import helper, TensorProto, numpy_helper
 GRID_SHAPE = [BATCH, CH, GH, GW]
 DT = TensorProto.FLOAT
 IR = 10
+OPSET = [helper.make_opsetid("", 10)]
 # Officially excluded tasks (score 0 regardless)
 EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
 # Max ARC-GEN examples to use for validation (to keep runtime reasonable)
 MAX_ARCGEN_VALIDATE = 30
+# Max ARC-GEN examples for conv fitting (keep separate from validation!)
+# NOTE: Conv fitting uses train+test only. ARC-GEN is for VALIDATION only.
+# lstsq underdetermines with too many variable-size arc-gen examples.
+MAX_ARCGEN_FIT = 0  # Don't use arc-gen for fitting — use for validation only
 def get_providers():
     return ['CPUExecutionProvider']
         with open(os.path.join(data_dir, f)) as fh:
             data = json.load(fh)
         hex_id = f.replace('.json','')
+        # Load ARC-GEN data if available
         if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
             with open(os.path.join(arcgen_dir, f)) as fh:
                 arcgen_examples = json.load(fh)
     return arr
 def validate(path, td):
+    """Validate model against ALL examples: train + test + arc-gen.
+    This matches what Kaggle does for scoring."""
     try:
         opts = ort.SessionOptions()
         opts.log_severity_level = 3
     except:
         return False
     examples = td['train'] + td['test']
+    # Include arc-gen examples (capped for speed)
     if 'arc-gen' in td:
         examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
     for ex in examples:
 MAX_FILESIZE = int(1.44 * 1024 * 1024)
 def score_network(path):
+    """Static profiler matching Kaggle scoring: cost = macs + memory + params.
+    Falls back to official neurogolf_utils if available."""
     if HAS_ONNX_TOOL:
         try:
             return _score_network_official(path)
             for ex in td['train'] + td['test']]
 def get_exs_for_fitting(td):
+    """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH.
+    For fixed-size tasks, arc-gen examples have the same grid size,
+    so they provide more data points for lstsq without changing the feature dimension.
+    For variable-size tasks, only use train+test (arc-gen varies too much)."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
     if not base_exs:
         return base_exs
+    # Check if all base examples have same input shape
     base_shapes = {inp.shape for inp, _ in base_exs}
     if len(base_shapes) != 1:
+        return base_exs  # Variable sizes — don't add arc-gen
     base_shape = list(base_shapes)[0]
+    # Add arc-gen examples that match the base shape
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
         if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
             ag_exs.append((inp, out))
+    # Cap to avoid massive lstsq (diminishing returns after ~10)
     return base_exs + ag_exs[:10]
 def get_exs_for_fitting_variable(td):
+    """Get examples for variable-shape conv fitting.
+    For variable-shape tasks, arc-gen examples may have different sizes per example
+    but since we embed in 30x30 anyway, we can safely include them."""
     base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
                 for ex in td['train'] + td['test']]
+    # For variable shape, include arc-gen examples (they get embedded in 30x30)
     ag_exs = []
     for ex in td.get('arc-gen', []):
         inp = np.array(ex['input'], dtype=np.int64)
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
+# GATHER HELPERS
 # ============================================================
 def _build_gather_model(OH, OW, idx):
+    # Use Gather (opset 1) instead of GatherElements (opset 11)
+    # Flatten spatial: [1,10,900] -> Gather(axis=2, indices=[900]) -> [1,10,900]
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
+    # Use Gather (opset 1) instead of GatherElements (opset 11)
     flat_idx = np.zeros((GH*GW,), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
         nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
     return mk(nodes, inits)
 # ============================================================
 # ANALYTICAL SOLVERS
 # ============================================================
         if ex['input'] != ex['output']: return None
     return mk([helper.make_node('Identity', ['input'], ['output'])])
+def s_color_map(td):
     cm = {}
     for ex in td['train']+td['test']:
         inp, out = np.array(ex['input']), np.array(ex['output'])
             iv, ov = int(iv), int(ov)
             if iv in cm and cm[iv] != ov: return None
             cm[iv] = ov
+    # Check if it's a permutation (bijective + all mapped colors form a closed set)
+    is_permutation = (set(cm.keys()) == set(cm.values()))
     if is_permutation:
+        # Use channel Gather — zero MACs, much cheaper
         gather_ch = np.arange(10, dtype=np.int32)
         for src, dst in cm.items():
             if 0 <= src < 10 and 0 <= dst < 10:
         nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
         return mk(nodes, inits)
     else:
+        # Non-permutation: use Conv 1x1 (has MACs but handles any mapping)
         W = np.zeros((10,10,1,1), dtype=np.float32)
         for ic in range(10):
             W[cm.get(ic,ic), ic, 0, 0] = 1.0
         return mk([helper.make_node('Conv', ['input','W'], ['output'], kernel_shape=[1,1])],
                   [numpy_helper.from_array(W, 'W')])
 def s_transpose(td):
+    for ex in td['train']+td['test']:
+        if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
+    return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
 def s_flip(td):
     exs = get_exs(td)
     if (IH,IW) != (OH,OW): return None
     for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
+            # Build gather index map (using Gather, opset 1 compatible)
+            idx = np.zeros((OH,OW,2), dtype=np.int64)
+            for r in range(OH):
+                for c in range(OW):
+                    if axis == 0:
+                        idx[r,c] = [IH-1-r, c]
+                    else:
+                        idx[r,c] = [r, IW-1-c]
+            return _build_gather_model(OH, OW, idx)
     return None
 def s_rotate(td):
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
+        if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs): continue
+        idx = np.zeros((OH,OW,2), dtype=np.int64)
+        for r in range(OH):
+            for c in range(OW):
+                if k == 1: sr, sc = c, IH-1-r
+                elif k == 2: sr, sc = IH-1-r, IW-1-c
+                elif k == 3: sr, sc = IW-1-c, r
+                idx[r,c] = [sr, sc]
+        return _build_gather_model(OH, OW, idx)
     return None
 def s_spatial_gather(td):
 def s_varshape_spatial_gather(td):
     """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
     sp = fixed_shapes(td)
+    if sp is not None: return None  # fixed shapes handled by s_spatial_gather
     exs = get_exs(td)
+    # Embed all examples in 30x30
     exs_30 = []
     for inp, out in exs:
         ih, iw = inp.shape
         numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'en'),
         numpy_helper.from_array(np.array([1,1,rH,rW], dtype=np.int64), 'rp'),
     ]
     nodes = [
         helper.make_node('Slice', ['input','st','en'], ['cr']),
         helper.make_node('Tile', ['cr','rp'], ['tl']),
+        helper.make_node('Pad', ['tl'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
     ]
     return mk(nodes, inits)
 def s_upscale(td):
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     if IH == OH and IW == OW: return None
+    # Need block decomposition
     if OH % IH != 0 or OW % IW != 0: return None
     rH, rW = OH // IH, OW // IW
     if rH * rW > 16 or rH * rW < 2: return None
     if OH > 30 or OW > 30: return None
+    # All 8 symmetry transforms of the dihedral group
     transforms = [
+        ('id', lambda x: x),
+        ('fliplr', lambda x: np.fliplr(x)),
+        ('flipud', lambda x: np.flipud(x)),
+        ('rot180', lambda x: np.rot90(x, 2)),
+        ('rot90', lambda x: np.rot90(x, 1)),
+        ('rot270', lambda x: np.rot90(x, 3)),
+        ('T', lambda x: x.T),
+        ('T_fliplr', lambda x: np.fliplr(x.T)),
     ]
+    # For each block, find which transform matches
     block_transforms = {}
     for bi in range(rH):
         for bj in range(rW):
                     block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                     expected = tfn(inp)
                     if expected.shape != (IH, IW) or not np.array_equal(block, expected):
+                        ok = False
+                        break
                 if ok:
                     found = (tidx, tname)
                     break
+            if found is None:
+                return None
             block_transforms[(bi, bj)] = found
+    # Build index map
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for bi in range(rH):
         for bj in range(rW):
                     elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
                     idx[oi, oj] = [sr, sc]
+    # Verify
     for inp, out in exs:
         reconstructed = np.zeros_like(out)
         for oi in range(OH):
             for oj in range(OW):
                 reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
+        if not np.array_equal(reconstructed, out):
+            return None
     return _build_gather_model(OH, OW, idx)
+def s_input_driven_tile(td):
+    """Each non-zero input pixel controls a block that's a copy of the input."""
+    exs = get_exs(td)
+    sp = fixed_shapes(td)
+    if sp is None: return None
+    (IH,IW),(OH,OW) = sp
+    if OH % IH != 0 or OW % IW != 0: return None
+    sH, sW = OH // IH, OW // IW
+    if sH != IH or sW != IW: return None
+    if OH > 30 or OW > 30: return None
+    for inp, out in exs:
+        for bi in range(IH):
+            for bj in range(IW):
+                block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
+                if inp[bi, bj] != 0:
+                    if not np.array_equal(block, inp):
+                        return None
+                else:
+                    if not np.all(block == 0):
+                        return None
+    # Build gather model: each output pixel at (bi*IH+lr, bj*IW+lc) maps to
+    # input[lr, lc] if input[bi, bj] != 0, else constant 0
+    # Problem: whether block is active depends on input value, which varies.
+    # This needs a different ONNX approach: can't use static gather.
+    # But we CAN use: Tile input -> Mul by mask derived from input
+    # Actually we need: for each (bi,bj) block position, multiply by inp[bi,bj] != 0
+    # This is NOT static - it depends on input content.
+    # Skip for now - spatial_gather can handle if block positions are fixed.
+    return None
 def s_kronecker(td):
+    """output = kron(input, ones(sH,sW)) — nearest-neighbor upscaling."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     sH, sW = OH // IH, OW // IW
     if sH < 2 or sW < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         expected = np.kron(inp, np.ones((sH, sW), dtype=np.int64))
+        if not np.array_equal(out, expected):
+            return None
+    # This is identical to upscale - build gather index
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
     return _build_gather_model(OH, OW, idx)
 def s_diagonal_tile(td):
+    """Input placed along diagonal: block[i,i] = input, rest = 0."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     rH, rW = OH // IH, OW // IW
     if rH != rW or rH < 2: return None
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
         for bi in range(rH):
             for bj in range(rW):
                 block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
                 if bi == bj:
+                    if not np.array_equal(block, inp):
+                        return None
                 else:
+                    if not np.all(block == 0):
+                        return None
+    # Build: diagonal blocks map to input, off-diagonal are constant 0
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     cst = np.full((OH,OW), -1, dtype=np.int64)
     for bi in range(rH):
             for lr in range(IH):
                 for lc in range(IW):
                     oi, oj = bi*IH + lr, bj*IW + lc
+                    if bi == bj:
+                        idx[oi, oj] = [lr, lc]
+                    else:
+                        idx[oi, oj] = [-1, -1]
+                        cst[oi, oj] = 0
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_shift(td):
+    """Detect constant spatial shift of the grid."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
                 if not np.array_equal(shifted, out):
                     ok = False; break
             if not ok: continue
+            # Build gather index
             idx = np.zeros((OH, OW, 2), dtype=np.int64)
+            cst = np.full((OH, OW), 0, dtype=np.int64)  # zeros for out-of-bounds
             for r in range(OH):
                 for c in range(OW):
                     sr, sc = r - dr, c - dc
+                    if 0 <= sr < IH and 0 <= sc < IW:
+                        idx[r, c] = [sr, sc]
+                    else:
+                        idx[r, c] = [-1, -1]
             return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
     return None
+def s_gravity(td):
+    """Detect gravity-like compaction in one direction."""
+    exs = get_exs(td)
+    sp = fixed_shapes(td)
+    if sp is None: return None
+    (IH, IW), (OH, OW) = sp
+    if (IH, IW) != (OH, OW): return None
+    def _gravity(grid, direction):
+        r = np.zeros_like(grid); h, w = grid.shape
+        if direction in ('down', 'up'):
+            for c in range(w):
+                nz = grid[:, c][grid[:, c] != 0]
+                if direction == 'down': r[h-len(nz):h, c] = nz
+                else: r[:len(nz), c] = nz
+        else:
+            for rr in range(h):
+                nz = grid[rr, :][grid[rr, :] != 0]
+                if direction == 'right': r[rr, w-len(nz):w] = nz
+                else: r[rr, :len(nz)] = nz
+        return r
+    for d in ('down', 'up', 'left', 'right'):
+        if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
+            # Gravity is input-dependent (positions depend on content)
+            # Can't use static Gather — need Conv to learn it
+            # But conv also can't learn arbitrary sorting...
+            # Skip for now — this needs a specialized ONNX graph
+            return None
+    return None
 def s_mirror_h(td):
+    """Output = input | flip(input, horizontal), doubling width."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     for inp, out in exs:
         expected = np.concatenate([inp, np.flip(inp, 1)], 1)
         if not np.array_equal(expected, out): return None
+    # Build gather index
     idx = np.zeros((OH, OW, 2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
     return _build_gather_model(OH, OW, idx)
 def s_mirror_v(td):
+    """Output = input over flip(input, vertical), doubling height."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return _build_gather_model(OH, OW, idx)
 def s_quad_mirror(td):
+    """Output = 2x2 block of input with h/v flips."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return _build_gather_model(OH, OW, idx)
 def s_fixed_crop(td):
+    """Output = fixed subregion of input."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     return None
 def s_nonuniform_scale(td):
+    """Output = input scaled by different factors in h and w."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
              helper.make_node('Add', ['s','c'], ['output'])]
     return mk(nodes, inits)
 # ============================================================
+# CONV SOLVERS
 # ============================================================
 def add_onehot_block(nodes, inits, am_name, oh_name):
     nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
     nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
+def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
+    """Shared lstsq conv fitting. Returns (Wconv, B) or None."""
     pad = ks // 2
     feat = 10 * ks * ks + (1 if use_bias else 0)
     if feat > 20000: return None
+    patches, targets = [], []
     for inp_g, out_g in exs_raw:
         ih, iw = inp_g.shape
         if use_full_30:
             oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
         oh, ow = out_g.shape
+        for r in range(oh):
+            for c in range(ow):
+                p = oh_pad[:, r:r+ks, c:c+ks].flatten()
+                if use_bias: p = np.append(p, 1.0)
+                patches.append(p)
+                targets.append(int(out_g[r, c]))
+    n_patches = len(patches)
     if feat > 5000 and n_patches > 2000: return None
+    P = np.array(patches, dtype=np.float64)
+    T = np.array(targets, dtype=np.int64)
     T_oh = np.zeros((len(T), 10), dtype=np.float64)
     for i, t in enumerate(T): T_oh[i, t] = 1.0
+    WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
     if not np.array_equal(np.argmax(P @ WT, axis=1), T): return None
     if use_bias:
         B = None
     return Wconv, B
 def solve_conv_fixed(td, path, time_budget=30.0):
     """Fixed-shape conv: Slice -> Conv -> ArgMax -> Equal+Cast -> Pad."""
     exs = get_exs(td)
     if len(shapes) != 1: return None
     IH, IW = shapes.pop()
+    # Use ARC-GEN data for better fitting
     fit_exs = get_exs_for_fitting(td)
+    # Filter to same-shape, same IH/IW
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
     t_start = time.time()
     for use_bias in [False, True]:
+        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
             if time.time() - t_start > time_budget: return None
             result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=False)
             if result is None: continue
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
             ]
             add_onehot_block(nodes, inits, 'am', 'oh_out')
             nodes.append(
+                helper.make_node('Pad', ['oh_out'], ['output'],
+                    pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
             )
             model = mk(nodes, inits)
     for inp, out in exs:
         if inp.shape != out.shape: return None
+    # Use ARC-GEN data for better fitting (variable shape, embedded in 30x30)
     fit_exs = get_exs_for_fitting_variable(td)
     fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
                     helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
                 ]
                 add_onehot_block(nodes, inits, 'am', 'oh_out')
                 nodes.append(
+                    helper.make_node('Pad', ['oh_out'], ['output'],
+                        pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
                 )
                 model = mk(nodes, inits)
     return None
 def solve_conv_var_diff(td, path, time_budget=30.0):
+    """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask).
+    Works when output shape differs from input but mapping is convolutional on 30x30 grid."""
     exs = get_exs(td)
     t_start = time.time()
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
+            # Use ReduceSum of output channels as mask (sum across channels == 1 for valid pixels)
+            # But we don't know the output mask at inference time from input alone...
+            # We need a way to derive the output mask from the input.
+            # For same-shape: mask = ReduceSum(input, axis=1) works
+            # For diff-shape: we need to compute the output mask differently
+            #
+            # Approach: Conv output at valid positions should have max > threshold,
+            # and at padding positions max ≈ 0. Use the ArgMax+OneHot and then
+            # mask with ReduceSum(input) which is 1 at input positions but 0 at padding.
+            # BUT output may be LARGER than input...
+            #
+            # Alternative: just use Conv -> ArgMax -> Equal+Cast -> Mul(input_mask_expanded)
+            # where input_mask covers the output region too.
+            # This won't work if output extends beyond input region.
+            #
+            # Simplest correct approach: let the conv produce valid one-hot everywhere,
+            # then the padding region should naturally produce channel-0 output.
+            # Since padding is all-zero input, conv output there = bias only.
+            # If no bias, conv output = 0 for all channels -> argmax gives channel 0 -> onehot gives [1,0,...,0]
+            # which equals the padding encoding (channel 0 = 1 in padding).
+            # Wait - that's WRONG for the NeuroGolf format. In the padding region, ALL channels should be 0.
+            # The one-hot encoding has channel[color]=1, but padding = ALL zeros.
+            #
+            # So we NEED a mask. But for diff-shape, what mask?
+            # If output is always top-left aligned and we know max output size...
+            # We can't statically determine the output mask from the input.
+            #
+            # However: we can try the ReduceSum approach anyway — if conv naturally
+            # produces channel-0 dominant output in padding, then:
+            # mask = ReduceSum(input, axis=1) gives 1 for input pixels, 0 for padding
+            # If output region ⊆ input region, this works.
+            # If output region > input region... we need the output's ReduceSum instead.
+            # For tasks where output fits within input bounds, use input mask
             all_output_within_input = all(
                 out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
                 for inp_g, out_g in exs
             )
             if not all_output_within_input:
+                continue  # Skip tasks where output extends beyond input
             inits = [numpy_helper.from_array(Wconv, 'W')]
             conv_inputs = ['input', 'W']
     return None
 # ============================================================
+# PYTORCH LEARNED CONV (gradient descent, multi-seed, ternary snap)
+# ============================================================
+def _ternary_snap(w, eps=0.2):
+    """Snap weights to {-1, 0, 1} — smaller model, often still correct."""
+    return np.where(w > eps, 1.0, np.where(w < -eps, -1.0, 0.0)).astype(np.float32)
+def _build_conv_onnx_from_weights(W, ks, use_full_30=False, IH=None, IW=None):
+    """Build ONNX conv model from numpy weight array W [10,10,ks,ks].
+    For fixed-shape: Slice→Conv→ArgMax→Equal+Cast→Pad
+    For variable/full30: Conv→ArgMax→Equal+Cast→Mul(mask)"""
+    pad = ks // 2
+    if use_full_30:
+        # Variable shape: full 30x30 conv with mask
+        inits = [numpy_helper.from_array(W, 'W')]
+        nodes = [
+            helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
+            helper.make_node('Conv', ['input', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
+            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
+        ]
+        add_onehot_block(nodes, inits, 'am', 'oh_out')
+        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
+        return mk(nodes, inits)
+    else:
+        # Fixed shape: slice, conv, pad
+        pad_h, pad_w = GH - IH, GW - IW
+        inits = [
+            numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
+            numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
+            numpy_helper.from_array(W, 'W'),
+        ]
+        nodes = [
+            helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
+            helper.make_node('Conv', ['grid', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
+            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
+        ]
+        add_onehot_block(nodes, inits, 'am', 'oh_out')
+        nodes.append(
+            helper.make_node('Pad', ['oh_out'], ['output'],
+                pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
+        )
+        return mk(nodes, inits)
+def _build_two_layer_conv_onnx(W1, W2, ks1, ks2, use_full_30=False, IH=None, IW=None):
+    """Build ONNX two-layer conv: Conv→ReLU→Conv→ArgMax→Equal+Cast→Pad/Mul(mask)."""
+    pad1, pad2 = ks1 // 2, ks2 // 2
+    if use_full_30:
+        inits = [
+            numpy_helper.from_array(W1, 'W1'),
+            numpy_helper.from_array(W2, 'W2'),
+        ]
+        nodes = [
+            helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
+            helper.make_node('Conv', ['input', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
+            helper.make_node('Relu', ['h1'], ['h1r']),
+            helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
+            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
+        ]
+        add_onehot_block(nodes, inits, 'am', 'oh_out')
+        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
+        return mk(nodes, inits)
+    else:
+        pad_h, pad_w = GH - IH, GW - IW
+        inits = [
+            numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
+            numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
+            numpy_helper.from_array(W1, 'W1'),
+            numpy_helper.from_array(W2, 'W2'),
+        ]
+        nodes = [
+            helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
+            helper.make_node('Conv', ['grid', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
+            helper.make_node('Relu', ['h1'], ['h1r']),
+            helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
+            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
+        ]
+        add_onehot_block(nodes, inits, 'am', 'oh_out')
+        nodes.append(
+            helper.make_node('Pad', ['oh_out'], ['output'],
+                pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
+        )
+        return mk(nodes, inits)
+def solve_pytorch_conv(td, path, time_budget=30.0):
+    """PyTorch gradient descent conv solver. Tries single-layer then two-layer.
+    Multi-seed training with ternary weight snapping for smaller models.
+    Validates against arc-gen before accepting."""
+    try:
+        import torch
+        import torch.nn as nn
+        import copy as _copy
+    except ImportError:
+        return None
+    exs = get_exs(td)
+    same_shape = all(inp.shape == out.shape for inp, out in exs)
+    if not same_shape:
+        return None  # Only handle same-shape for now
+    shapes = set(inp.shape for inp, _ in exs)
+    fixed_in = len(shapes) == 1
+    # Prepare tensors
+    all_pairs = td['train'] + td['test']
+    inp_list = [to_onehot(p['input'])[0] for p in all_pairs]
+    out_list = [to_onehot(p['output'])[0] for p in all_pairs]
+    inp_t = torch.tensor(np.stack(inp_list), dtype=torch.float32)
+    out_t = torch.tensor(np.stack(out_list), dtype=torch.float32)
+    if fixed_in:
+        IH, IW = list(shapes)[0]
+        # Train on cropped region
+        inp_t = inp_t[:, :, :IH, :IW]
+        out_t = out_t[:, :, :IH, :IW]
+    t_start = time.time()
+    best_result = None
+    # Phase 1: Single-layer conv (multiple kernel sizes and seeds)
+    for ks in [1, 3, 5, 7]:
+        if time.time() - t_start > time_budget * 0.6:
+            break
+        pad = ks // 2
+        for seed in [0, 7, 42]:
+            if time.time() - t_start > time_budget * 0.6:
+                break
+            torch.manual_seed(seed)
+            conv = nn.Conv2d(CH, CH, kernel_size=ks, padding=pad, bias=False)
+            if seed == 0:
+                nn.init.zeros_(conv.weight)
+            opt = torch.optim.Adam(conv.parameters(), lr=0.03)
+            best_loss, best_state = float('inf'), None
+            for step in range(3000):
+                opt.zero_grad()
+                pred = conv(inp_t)
+                loss = nn.functional.mse_loss(pred, out_t)
+                loss.backward()
+                opt.step()
+                if loss.item() < best_loss:
+                    best_loss = loss.item()
+                    best_state = _copy.deepcopy(conv.state_dict())
+                if best_loss < 1e-8:
+                    break
+            if best_state is None:
+                continue
+            conv.load_state_dict(best_state)
+            w = conv.weight.detach().numpy()
+            # Try continuous weights, then ternary-snapped
+            for w_cand in [w, _ternary_snap(w)]:
+                use_full = not fixed_in
+                model = _build_conv_onnx_from_weights(
+                    w_cand, ks, use_full_30=use_full,
+                    IH=IH if fixed_in else None,
+                    IW=IW if fixed_in else None
+                )
+                onnx.save(model, path)
+                if validate(path, td):
+                    sz = os.path.getsize(path)
+                    if best_result is None or sz < best_result[2]:
+                        best_result = ('pt_conv', model, sz)
+    # Phase 2: Two-layer conv (Conv→ReLU→Conv)
+    for ks1, ks2, hidden in [(3, 1, CH), (5, 1, CH), (3, 3, CH)]:
+        if time.time() - t_start > time_budget:
+            break
+        for seed in [0, 7]:
+            if time.time() - t_start > time_budget:
+                break
+            torch.manual_seed(seed)
+            net = nn.Sequential(
+                nn.Conv2d(CH, hidden, kernel_size=ks1, padding=ks1//2, bias=False),
+                nn.ReLU(),
+                nn.Conv2d(hidden, CH, kernel_size=ks2, padding=ks2//2, bias=False),
+            )
+            opt = torch.optim.Adam(net.parameters(), lr=0.01)
+            best_loss, best_state = float('inf'), None
+            for step in range(2500):
+                opt.zero_grad()
+                pred = net(inp_t)
+                loss = nn.functional.mse_loss(pred, out_t)
+                loss.backward()
+                opt.step()
+                if loss.item() < best_loss:
+                    best_loss = loss.item()
+                    best_state = _copy.deepcopy(net.state_dict())
+                if best_loss < 1e-8:
+                    break
+            if best_state is None:
+                continue
+            net.load_state_dict(best_state)
+            w1 = net[0].weight.detach().numpy()
+            w2 = net[2].weight.detach().numpy()
+            for w1c, w2c in [(w1, w2), (_ternary_snap(w1), _ternary_snap(w2))]:
+                use_full = not fixed_in
+                model = _build_two_layer_conv_onnx(
+                    w1c, w2c, ks1, ks2, use_full_30=use_full,
+                    IH=IH if fixed_in else None,
+                    IW=IW if fixed_in else None
+                )
+                onnx.save(model, path)
+                if validate(path, td):
+                    sz = os.path.getsize(path)
+                    if best_result is None or sz < best_result[2]:
+                        best_result = ('pt_conv2', model, sz)
+    if best_result is not None:
+        sname, model, _ = best_result
+        onnx.save(model, path)
+        return sname, model
+    return None
+# ============================================================
+# MAIN
 # ============================================================
 ANALYTICAL_SOLVERS = [
+    ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
+    ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
+    ('tile', s_tile), ('upscale', s_upscale), ('kronecker', s_kronecker),
     ('nonuniform_scale', s_nonuniform_scale),
+    ('mirror_h', s_mirror_h), ('mirror_v', s_mirror_v), ('quad_mirror', s_quad_mirror),
+    ('concat', s_concat), ('concat_enhanced', s_concat_enhanced),
     ('diagonal_tile', s_diagonal_tile),
     ('fixed_crop', s_fixed_crop),
     ('spatial_gather', s_spatial_gather),
     ('varshape_spatial_gather', s_varshape_spatial_gather),
 ]
+def solve_task(tn, td, outdir, conv_budget=30.0):
     t_start = time.time()
     os.makedirs(outdir, exist_ok=True)
     path = os.path.join(outdir, f"task{tn:03d}.onnx")
+    # Skip excluded tasks
     if tn in EXCLUDED_TASKS:
         return False, 'excluded', None, time.time() - t_start, path
     # 1. Try analytical solvers (fast, tiny models)
     for sname, sfn in ANALYTICAL_SOLVERS:
         try:
             model = sfn(td)
             if model is None: continue
             onnx.save(model, path)
+            if validate(path, td):
                 return True, sname, os.path.getsize(path), time.time() - t_start, path
+        except: pass
+    # 2. Determine task shape category and try conv solvers
     exs = get_exs(td)
     same_shape = all(inp.shape == out.shape for inp, out in exs)
     shapes = set(inp.shape for inp, _ in exs)
         if result is not None:
             sname, model = result
             return True, sname, os.path.getsize(path), time.time() - t_start, path
+        # 3. PyTorch learned conv as fallback for same-shape tasks
+        remaining = max(1, conv_time - (time.time() - t_start))
+        result = solve_pytorch_conv(td, path, time_budget=remaining)
+        if result is not None:
+            sname, model = result
+            return True, sname, os.path.getsize(path), time.time() - t_start, path
     else:
         sp = fixed_shapes(td)
         if sp is not None:
                     sname, model = result
                     return True, sname, os.path.getsize(path), time.time() - t_start, path
+        # Try variable diff-shape conv (output within input bounds)
         result = solve_conv_var_diff(td, path, time_budget=conv_time)
         if result is not None:
             sname, model = result
     return False, None, None, time.time() - t_start, path
+def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb):
     results = {}
     costs_dict = {}
     total_score = 0
             continue
         td = tasks[tn]['data']
+        ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
         if ok:
             macs, memory, params = score_network(model_path)
     parser.add_argument('--tasks', type=str, default='')
     parser.add_argument('--device', type=str, default='auto', choices=['auto','cpu','cuda'])
     parser.add_argument('--use_wandb', action='store_true')
     args = parser.parse_args()
     global ORT_PROVIDERS
     config = {
     ort.set_default_logger_severity(3)
     print(f"Using providers: {ORT_PROVIDERS}")
     if args.kaggle:
         tasks = load_tasks_kaggle(args.data_dir)
         arcgen = args.arcgen_dir if args.arcgen_dir else None
         tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
+    # Count arc-gen examples
     total_arcgen = sum(len(t['data'].get('arc-gen', [])) for t in tasks.values())
     print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
     print(f"Excluded tasks: {sorted(EXCLUDED_TASKS)}")
     active_tasks = [t for t in task_nums if t not in EXCLUDED_TASKS]
     print(f"Solving {len(active_tasks)} active tasks (skipping {len(task_nums) - len(active_tasks)} excluded)")
     print(f"Conv budget: {args.conv_budget}s per task")
     print("=" * 70)
     t0 = time.time()
     if args.use_wandb and wandb is not None:
         with wandb.init(project="neurogolf", name="solver_run", config=config):
+            results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=True)
     else:
+        results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=False)
     elapsed = time.time() - t0
     print(f"\n{'='*70}")
     sc = Counter(solver_names)
     for s, c in sc.most_common(): print(f"  {s}: {c}")
+    # Generate submission
     outdir = args.output_dir
     n_files = len([f for f in os.listdir(outdir) if f.endswith('.onnx')])
     total_size = sum(os.path.getsize(os.path.join(outdir, f))
                      for f in os.listdir(outdir) if f.endswith('.onnx'))
+    # Create submission.zip
     zip_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.zip')
     buf = io.BytesIO()
     with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
         f.write(zip_bytes)
     zip_size = len(zip_bytes)
+    # Create submission.csv
     csv_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.csv')
     with open(csv_path, 'w', newline='') as f:
         w = csv.writer(f)
         for tn in sorted(costs_dict.keys()):
             w.writerow([f'task{tn:03d}', costs_dict[tn]])
+    # Estimate LB score: solved tasks get their score, unsolved get 1.0
     unsolved_count = len(active_tasks) - len(results)
     est_lb = total_score + unsolved_count * 1.0
     print(f"Written: {zip_path} | {csv_path}")
 if __name__ == '__main__':
+    main()