rogermt
/

neurogolf-solver

Model card Files Files and versions

xet

Community

rogermt commited on 14 days ago

Commit

260c341

verified ·

1 Parent(s): 760f72a

Upload neurogolf_solver.py

Browse files

Files changed (1) hide show

neurogolf_solver.py +1 -1694

neurogolf_solver.py CHANGED Viewed

@@ -1,1694 +1 @@
-#!/usr/bin/env python3
-"""
-ARC-AGI NeuroGolf Championship - Complete Solver v4
-Format: [1,10,30,30] one-hot input/output, opset 10, IR version 10.
-v4 CRITICAL FIXES:
-  - ARC-GEN data loaded and used for conv fitting (more data = better lstsq)
-  - ARC-GEN validation: models validated against train+test+arc-gen
-  - EXCLUDED tasks: {21, 55, 80, 184, 202, 366} skipped
-  - submission.csv generation for Kaggle
-  - s_flip fixed: GatherElements -> Gather (opset 10 compat)
-  - Static profiler: no onnx_tool dependency for cost estimation
-  - get_exs_for_fitting(): uses train+test+arc-gen for conv fitting
-Solvers:
-  - Analytical: identity, constant, color_map, transpose, flip, rotate, tile, upscale,
-                concat, concat_enhanced, spatial_gather, varshape_spatial_gather,
-                diagonal_tile, kronecker
-  - Conv (fixed shape): Slice -> Conv -> ArgMax -> Equal+Cast -> Pad
-  - Conv (variable shape): Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)
-  - Conv (diff shape): Slice -> Conv -> Slice(crop) -> ArgMax -> Equal+Cast -> Pad
-Usage:
-  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
-  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --conv_budget 60 --arcgen_dir ARC-GEN-100K/
-"""
-import json, os, sys, math, time, argparse, csv, io, zipfile
-import numpy as np
-import onnx
-from onnx import helper, TensorProto, numpy_helper
-import onnxruntime as ort
-from collections import Counter
-try:
-    from neurogolf_utils import score_network as _score_network_official
-    HAS_ONNX_TOOL = True
-except ImportError:
-    HAS_ONNX_TOOL = False
-try:
-    import wandb
-except ImportError:
-    wandb = None
-BATCH, CH, GH, GW = 1, 10, 30, 30
-GRID_SHAPE = [BATCH, CH, GH, GW]
-DT = TensorProto.FLOAT
-IR = 10
-OPSET = [helper.make_opsetid("", 10)]
-# Officially excluded tasks (score 0 regardless)
-EXCLUDED_TASKS = {21, 55, 80, 184, 202, 366}
-# Max ARC-GEN examples to use for validation (to keep runtime reasonable)
-MAX_ARCGEN_VALIDATE = 30
-# Max ARC-GEN examples for conv fitting (keep separate from validation!)
-# NOTE: Conv fitting uses train+test only. ARC-GEN is for VALIDATION only.
-# lstsq underdetermines with too many variable-size arc-gen examples.
-MAX_ARCGEN_FIT = 0  # Don't use arc-gen for fitting — use for validation only
-def get_providers():
-    return ['CPUExecutionProvider']
-ORT_PROVIDERS = get_providers()
-# ============================================================
-# LOAD / VALIDATE
-# ============================================================
-def load_tasks_dir(data_dir, arcgen_dir=None):
-    """Load ARC-AGI tasks and optionally merge ARC-GEN data."""
-    files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
-    tasks = {}
-    for i, f in enumerate(files):
-        with open(os.path.join(data_dir, f)) as fh:
-            data = json.load(fh)
-        hex_id = f.replace('.json','')
-        # Load ARC-GEN data if available
-        if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
-            with open(os.path.join(arcgen_dir, f)) as fh:
-                arcgen_examples = json.load(fh)
-            if isinstance(arcgen_examples, list):
-                data['arc-gen'] = arcgen_examples
-        if 'arc-gen' not in data:
-            data['arc-gen'] = []
-        tasks[i+1] = {'hex': hex_id, 'data': data}
-    return tasks
-def load_tasks_kaggle(data_dir):
-    """Load Kaggle format tasks (already have arc-gen embedded)."""
-    tasks = {}
-    for tn in range(1, 401):
-        path = os.path.join(data_dir, f"task{tn:03d}.json")
-        if os.path.exists(path):
-            with open(path) as f:
-                data = json.load(f)
-            if 'arc-gen' not in data:
-                data['arc-gen'] = []
-            tasks[tn] = {'hex': f'task{tn:03d}', 'data': data}
-    return tasks
-def to_onehot(grid):
-    arr = np.zeros((1, CH, GH, GW), dtype=np.float32)
-    for r, row in enumerate(grid):
-        for c, v in enumerate(row):
-            if r < GH and c < GW and 0 <= v < CH:
-                arr[0, v, r, c] = 1.0
-    return arr
-def validate(path, td):
-    """Validate model against ALL examples: train + test + arc-gen.
-    This matches what Kaggle does for scoring."""
-    try:
-        opts = ort.SessionOptions()
-        opts.log_severity_level = 3
-        sess = ort.InferenceSession(path, sess_options=opts, providers=ORT_PROVIDERS)
-    except:
-        return False
-    examples = td['train'] + td['test']
-    # Include arc-gen examples (capped for speed)
-    if 'arc-gen' in td:
-        examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
-    for ex in examples:
-        inp = to_onehot(ex['input'])
-        exp = to_onehot(ex['output'])
-        try:
-            out = sess.run(['output'], {'input': inp})[0]
-            out = (out > 0.0).astype(np.float32)
-        except:
-            return False
-        if not np.array_equal(out, exp):
-            return False
-    return True
-def validate_raw(raw_bytes, td):
-    """Validate model from raw bytes against ALL examples."""
-    try:
-        opts = ort.SessionOptions()
-        opts.log_severity_level = 3
-        sess = ort.InferenceSession(raw_bytes, sess_options=opts, providers=ORT_PROVIDERS)
-    except:
-        return False
-    examples = td['train'] + td['test']
-    if 'arc-gen' in td:
-        examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
-    for ex in examples:
-        inp = to_onehot(ex['input'])
-        exp = to_onehot(ex['output'])
-        try:
-            out = sess.run(['output'], {'input': inp})[0]
-            out = (out > 0.0).astype(np.float32)
-        except:
-            return False
-        if not np.array_equal(out, exp):
-            return False
-    return True
-# ============================================================
-# STATIC PROFILER (no onnx_tool dependency)
-# ============================================================
-BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'If', 'Function'}
-MAX_FILESIZE = int(1.44 * 1024 * 1024)
-def score_network(path):
-    """Static profiler matching Kaggle scoring: cost = macs + memory + params.
-    Falls back to official neurogolf_utils if available."""
-    if HAS_ONNX_TOOL:
-        try:
-            return _score_network_official(path)
-        except:
-            pass
-    return _static_profile(path)
-def _static_profile(path):
-    """Compute cost without onnx_tool: params + nbytes + macs."""
-    try:
-        model = onnx.load(path)
-    except:
-        return None, None, None
-    tensors = {}
-    params = 0
-    nbytes = 0
-    macs = 0
-    for init in model.graph.initializer:
-        a = numpy_helper.to_array(init)
-        tensors[init.name] = a
-        params += a.size
-        nbytes += a.nbytes
-    for nd in model.graph.node:
-        if nd.op_type == 'Constant':
-            for attr in nd.attribute:
-                if attr.t and attr.t.ByteSize() > 0:
-                    try:
-                        a = numpy_helper.to_array(attr.t)
-                        if nd.output:
-                            tensors[nd.output[0]] = a
-                        params += a.size
-                        nbytes += a.nbytes
-                    except:
-                        pass
-        if nd.op_type in BANNED_OPS:
-            return None, None, None
-        if nd.op_type == 'Conv' and len(nd.input) >= 2 and nd.input[1] in tensors:
-            w = tensors[nd.input[1]]
-            if w.ndim == 4:
-                co, ci, kh, kw = w.shape
-                macs += co * ci * kh * kw * GH * GW
-    return int(macs), int(nbytes), int(params)
-def mk(nodes, inits=None):
-    x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
-    y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
-    g = helper.make_graph(nodes, "g", [x], [y], initializer=inits or [])
-    return helper.make_model(g, ir_version=IR, opset_imports=OPSET)
-def get_exs(td):
-    """Get examples for analytical solvers (train+test only)."""
-    return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-            for ex in td['train'] + td['test']]
-def get_exs_for_fitting(td):
-    """Get examples for conv fitting. Uses train+test + arc-gen WHERE SIZES MATCH.
-    For fixed-size tasks, arc-gen examples have the same grid size,
-    so they provide more data points for lstsq without changing the feature dimension.
-    For variable-size tasks, only use train+test (arc-gen varies too much)."""
-    base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-                for ex in td['train'] + td['test']]
-    if not base_exs:
-        return base_exs
-    # Check if all base examples have same input shape
-    base_shapes = {inp.shape for inp, _ in base_exs}
-    if len(base_shapes) != 1:
-        return base_exs  # Variable sizes — don't add arc-gen
-    base_shape = list(base_shapes)[0]
-    # Add arc-gen examples that match the base shape
-    ag_exs = []
-    for ex in td.get('arc-gen', []):
-        inp = np.array(ex['input'], dtype=np.int64)
-        out = np.array(ex['output'], dtype=np.int64)
-        if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
-            ag_exs.append((inp, out))
-    # Cap to avoid massive lstsq (diminishing returns after ~10)
-    return base_exs + ag_exs[:10]
-def get_exs_for_fitting_variable(td):
-    """Get examples for variable-shape conv fitting.
-    For variable-shape tasks, arc-gen examples may have different sizes per example
-    but since we embed in 30x30 anyway, we can safely include them."""
-    base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-                for ex in td['train'] + td['test']]
-    # For variable shape, include arc-gen examples (they get embedded in 30x30)
-    ag_exs = []
-    for ex in td.get('arc-gen', []):
-        inp = np.array(ex['input'], dtype=np.int64)
-        out = np.array(ex['output'], dtype=np.int64)
-        if inp.shape == out.shape and inp.shape[0] <= 30 and inp.shape[1] <= 30:
-            ag_exs.append((inp, out))
-    return base_exs + ag_exs[:20]
-def fixed_shapes(td):
-    shapes = set()
-    for inp, out in get_exs(td):
-        shapes.add((inp.shape, out.shape))
-    return list(shapes)[0] if len(shapes) == 1 else None
-# ============================================================
-# GATHER HELPERS
-# ============================================================
-def _build_gather_model(OH, OW, idx):
-    # Use Gather (opset 1) instead of GatherElements (opset 11)
-    # Flatten spatial: [1,10,900] -> Gather(axis=2, indices=[900]) -> [1,10,900]
-    flat_idx = np.zeros((GH*GW,), dtype=np.int64)
-    mask = np.zeros((1,1,GH,GW), dtype=np.float32)
-    for oi in range(OH):
-        for oj in range(OW):
-            flat_idx[oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
-            mask[0,0,oi,oj] = 1.0
-    inits = [
-        numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
-        numpy_helper.from_array(flat_idx, 'idx'),
-        numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
-        numpy_helper.from_array(mask, 'mask'),
-    ]
-    nodes = [
-        helper.make_node('Reshape', ['input','fs'], ['flat']),
-        helper.make_node('Gather', ['flat','idx'], ['g'], axis=2),
-        helper.make_node('Reshape', ['g','os'], ['raw']),
-        helper.make_node('Mul', ['raw','mask'], ['output']),
-    ]
-    return mk(nodes, inits)
-def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
-    # Use Gather (opset 1) instead of GatherElements (opset 11)
-    flat_idx = np.zeros((GH*GW,), dtype=np.int64)
-    gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
-    const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
-    for oi in range(OH):
-        for oj in range(OW):
-            if idx[oi,oj,0] >= 0:
-                flat_idx[oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
-                gather_mask[0,0,oi,oj] = 1.0
-            elif cst[oi,oj] >= 0:
-                const_oh[0, cst[oi,oj], oi, oj] = 1.0
-    has_const = np.any(const_oh > 0)
-    inits = [
-        numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
-        numpy_helper.from_array(flat_idx, 'idx'),
-        numpy_helper.from_array(np.array([1,10,GH,GW], dtype=np.int64), 'os'),
-        numpy_helper.from_array(gather_mask, 'gmask'),
-    ]
-    nodes = [
-        helper.make_node('Reshape', ['input','fs'], ['flat']),
-        helper.make_node('Gather', ['flat','idx'], ['g'], axis=2),
-        helper.make_node('Reshape', ['g','os'], ['raw']),
-        helper.make_node('Mul', ['raw','gmask'], ['masked']),
-    ]
-    if has_const:
-        inits.append(numpy_helper.from_array(const_oh, 'cst'))
-        nodes.append(helper.make_node('Add', ['masked','cst'], ['output']))
-    else:
-        nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
-    return mk(nodes, inits)
-# ============================================================
-# ANALYTICAL SOLVERS
-# ============================================================
-def s_identity(td):
-    for ex in td['train']+td['test']:
-        if ex['input'] != ex['output']: return None
-    return mk([helper.make_node('Identity', ['input'], ['output'])])
-def s_color_map(td):
-    cm = {}
-    for ex in td['train']+td['test']:
-        inp, out = np.array(ex['input']), np.array(ex['output'])
-        if inp.shape != out.shape: return None
-        for iv, ov in zip(inp.flat, out.flat):
-            iv, ov = int(iv), int(ov)
-            if iv in cm and cm[iv] != ov: return None
-            cm[iv] = ov
-    # Check if it's a permutation (bijective + all mapped colors form a closed set)
-    is_permutation = (set(cm.keys()) == set(cm.values()))
-    if is_permutation:
-        # Use channel Gather — zero MACs, much cheaper
-        gather_ch = np.arange(10, dtype=np.int32)
-        for src, dst in cm.items():
-            if 0 <= src < 10 and 0 <= dst < 10:
-                gather_ch[dst] = src
-        inits = [numpy_helper.from_array(gather_ch, 'gi')]
-        nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
-        return mk(nodes, inits)
-    else:
-        # Non-permutation: use Conv 1x1 (has MACs but handles any mapping)
-        W = np.zeros((10,10,1,1), dtype=np.float32)
-        for ic in range(10):
-            W[cm.get(ic,ic), ic, 0, 0] = 1.0
-        return mk([helper.make_node('Conv', ['input','W'], ['output'], kernel_shape=[1,1])],
-                  [numpy_helper.from_array(W, 'W')])
-def s_transpose(td):
-    for ex in td['train']+td['test']:
-        if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
-    return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
-def s_flip(td):
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if (IH,IW) != (OH,OW): return None
-    for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
-        if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
-            # Build gather index map (using Gather, opset 1 compatible)
-            idx = np.zeros((OH,OW,2), dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    if axis == 0:
-                        idx[r,c] = [IH-1-r, c]
-                    else:
-                        idx[r,c] = [r, IW-1-c]
-            return _build_gather_model(OH, OW, idx)
-    return None
-def s_rotate(td):
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    for k in [1, 2, 3]:
-        if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs): continue
-        idx = np.zeros((OH,OW,2), dtype=np.int64)
-        for r in range(OH):
-            for c in range(OW):
-                if k == 1: sr, sc = c, IH-1-r
-                elif k == 2: sr, sc = IH-1-r, IW-1-c
-                elif k == 3: sr, sc = IW-1-c, r
-                idx[r,c] = [sr, sc]
-        return _build_gather_model(OH, OW, idx)
-    return None
-def s_spatial_gather(td):
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    exs = get_exs(td)
-    idx = np.full((OH,OW,2), -1, dtype=np.int64)
-    cst = np.full((OH,OW), -1, dtype=np.int64)
-    for oi in range(OH):
-        for oj in range(OW):
-            vals = set(int(out[oi,oj]) for _,out in exs)
-            if len(vals) == 1: cst[oi,oj] = vals.pop()
-            found = False
-            for ri in range(IH):
-                for rj in range(IW):
-                    if all(int(inp[ri,rj]) == int(out[oi,oj]) for inp,out in exs):
-                        idx[oi,oj] = [ri, rj]; found = True; break
-                if found: break
-            if not found and cst[oi,oj] < 0: return None
-    return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-def s_varshape_spatial_gather(td):
-    """Spatial gather that works for variable-shape tasks by embedding in 30x30."""
-    sp = fixed_shapes(td)
-    if sp is not None: return None  # fixed shapes handled by s_spatial_gather
-    exs = get_exs(td)
-    # Embed all examples in 30x30
-    exs_30 = []
-    for inp, out in exs:
-        ih, iw = inp.shape
-        oh, ow = out.shape
-        inp30 = np.zeros((30, 30), dtype=np.int64)
-        out30 = np.zeros((30, 30), dtype=np.int64)
-        inp30[:ih, :iw] = inp
-        out30[:oh, :ow] = out
-        exs_30.append((inp30, out30))
-    idx = np.full((30, 30, 2), -1, dtype=np.int64)
-    cst = np.full((30, 30), -1, dtype=np.int64)
-    for oi in range(30):
-        for oj in range(30):
-            vals = set(int(out30[oi, oj]) for _, out30 in exs_30)
-            if len(vals) == 1:
-                cst[oi, oj] = vals.pop()
-            found = False
-            for ri in range(30):
-                for rj in range(30):
-                    if all(int(inp30[ri, rj]) == int(out30[oi, oj]) for inp30, out30 in exs_30):
-                        idx[oi, oj] = [ri, rj]
-                        found = True
-                        break
-                if found: break
-            if not found and cst[oi, oj] < 0:
-                return None
-    return _build_gather_model_with_const(30, 30, 30, 30, idx, cst)
-def s_tile(td):
-    exs = get_exs(td)
-    in_shapes = set(inp.shape for inp,_ in exs)
-    if len(in_shapes) != 1: return None
-    IH, IW = in_shapes.pop()
-    tiles = set()
-    for inp, out in exs:
-        OH, OW = out.shape
-        if OH % IH or OW % IW: return None
-        rH, rW = OH//IH, OW//IW
-        if rH < 1 or rW < 1 or (rH==1 and rW==1): return None
-        tiles.add((rH, rW))
-    if len(tiles) != 1: return None
-    rH, rW = tiles.pop()
-    OH, OW = IH*rH, IW*rW
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        if not np.array_equal(out, np.tile(inp, (rH, rW))): return None
-    pad_h, pad_w = 30-OH, 30-OW
-    inits = [
-        numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'st'),
-        numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'en'),
-        numpy_helper.from_array(np.array([1,1,rH,rW], dtype=np.int64), 'rp'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input','st','en'], ['cr']),
-        helper.make_node('Tile', ['cr','rp'], ['tl']),
-        helper.make_node('Pad', ['tl'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
-    ]
-    return mk(nodes, inits)
-def s_upscale(td):
-    exs = get_exs(td)
-    in_shapes = set(inp.shape for inp,_ in exs)
-    if len(in_shapes) != 1: return None
-    IH, IW = in_shapes.pop()
-    scales = set()
-    for inp, out in exs:
-        OH, OW = out.shape
-        if OH % IH or OW % IW: return None
-        sH, sW = OH//IH, OW//IW
-        if sH < 2 or sW < 2: return None
-        scales.add((sH, sW))
-    if len(scales) != 1: return None
-    sH, sW = scales.pop()
-    OH, OW = IH*sH, IW*sW
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        if not np.array_equal(out, np.repeat(np.repeat(inp, sH, 0), sW, 1)): return None
-    idx = np.zeros((OH,OW,2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            idx[r,c] = [r//sH, c//sW]
-    return _build_gather_model(OH, OW, idx)
-def s_concat(td):
-    from itertools import product as iproduct
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    transforms = [
-        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
-    ]
-    if OH == IH and OW % IW == 0 and OW > IW:
-        n = OW // IW
-        if 2 <= n <= 4:
-            for combo in iproduct(range(4), repeat=n):
-                if all(np.array_equal(out, np.concatenate([transforms[t][1](inp) for t in combo], axis=1))
-                       for inp, out in exs):
-                    idx = np.zeros((OH,OW,2), dtype=np.int64)
-                    for oi in range(OH):
-                        for oj in range(OW):
-                            bj = oj // IW; lr, lc = oi, oj % IW
-                            t = transforms[combo[bj]][0]
-                            if t == 'id': sr, sc = lr, lc
-                            elif t == 'fliplr': sr, sc = lr, IW-1-lc
-                            elif t == 'flipud': sr, sc = IH-1-lr, lc
-                            elif t == 'rot180': sr, sc = IH-1-lr, IW-1-lc
-                            idx[oi,oj] = [sr, sc]
-                    return _build_gather_model(OH, OW, idx)
-    if OW == IW and OH % IH == 0 and OH > IH:
-        n = OH // IH
-        if 2 <= n <= 4:
-            for combo in iproduct(range(4), repeat=n):
-                if all(np.array_equal(out, np.concatenate([transforms[t][1](inp) for t in combo], axis=0))
-                       for inp, out in exs):
-                    idx = np.zeros((OH,OW,2), dtype=np.int64)
-                    for oi in range(OH):
-                        for oj in range(OW):
-                            bi = oi // IH; lr, lc = oi % IH, oj
-                            t = transforms[combo[bi]][0]
-                            if t == 'id': sr, sc = lr, lc
-                            elif t == 'fliplr': sr, sc = lr, IW-1-lc
-                            elif t == 'flipud': sr, sc = IH-1-lr, lc
-                            elif t == 'rot180': sr, sc = IH-1-lr, IW-1-lc
-                            idx[oi,oj] = [sr, sc]
-                    return _build_gather_model(OH, OW, idx)
-    return None
-def s_concat_enhanced(td):
-    """Enhanced concat with all 8 dihedral group transforms."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if IH == OH and IW == OW: return None
-    # Need block decomposition
-    if OH % IH != 0 or OW % IW != 0: return None
-    rH, rW = OH // IH, OW // IW
-    if rH * rW > 16 or rH * rW < 2: return None
-    if OH > 30 or OW > 30: return None
-    # All 8 symmetry transforms of the dihedral group
-    transforms = [
-        ('id', lambda x: x),
-        ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)),
-        ('rot180', lambda x: np.rot90(x, 2)),
-        ('rot90', lambda x: np.rot90(x, 1)),
-        ('rot270', lambda x: np.rot90(x, 3)),
-        ('T', lambda x: x.T),
-        ('T_fliplr', lambda x: np.fliplr(x.T)),
-    ]
-    # For each block, find which transform matches
-    block_transforms = {}
-    for bi in range(rH):
-        for bj in range(rW):
-            found = None
-            for tidx, (tname, tfn) in enumerate(transforms):
-                ok = True
-                for inp, out in exs:
-                    block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
-                    expected = tfn(inp)
-                    if expected.shape != (IH, IW) or not np.array_equal(block, expected):
-                        ok = False
-                        break
-                if ok:
-                    found = (tidx, tname)
-                    break
-            if found is None:
-                return None
-            block_transforms[(bi, bj)] = found
-    # Build index map
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for bi in range(rH):
-        for bj in range(rW):
-            _, tname = block_transforms[(bi, bj)]
-            for lr in range(IH):
-                for lc in range(IW):
-                    oi, oj = bi*IH + lr, bj*IW + lc
-                    if tname == 'id': sr, sc = lr, lc
-                    elif tname == 'fliplr': sr, sc = lr, IW-1-lc
-                    elif tname == 'flipud': sr, sc = IH-1-lr, lc
-                    elif tname == 'rot180': sr, sc = IH-1-lr, IW-1-lc
-                    elif tname == 'rot90': sr, sc = IW-1-lc, lr
-                    elif tname == 'rot270': sr, sc = lc, IH-1-lr
-                    elif tname == 'T': sr, sc = lc, lr
-                    elif tname == 'T_fliplr': sr, sc = IW-1-lc, lr
-                    idx[oi, oj] = [sr, sc]
-    # Verify
-    for inp, out in exs:
-        reconstructed = np.zeros_like(out)
-        for oi in range(OH):
-            for oj in range(OW):
-                reconstructed[oi,oj] = inp[idx[oi,oj,0], idx[oi,oj,1]]
-        if not np.array_equal(reconstructed, out):
-            return None
-    return _build_gather_model(OH, OW, idx)
-def s_input_driven_tile(td):
-    """Each non-zero input pixel controls a block that's a copy of the input."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if OH % IH != 0 or OW % IW != 0: return None
-    sH, sW = OH // IH, OW // IW
-    if sH != IH or sW != IW: return None
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        for bi in range(IH):
-            for bj in range(IW):
-                block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
-                if inp[bi, bj] != 0:
-                    if not np.array_equal(block, inp):
-                        return None
-                else:
-                    if not np.all(block == 0):
-                        return None
-    # Build gather model: each output pixel at (bi*IH+lr, bj*IW+lc) maps to
-    # input[lr, lc] if input[bi, bj] != 0, else constant 0
-    # Problem: whether block is active depends on input value, which varies.
-    # This needs a different ONNX approach: can't use static gather.
-    # But we CAN use: Tile input -> Mul by mask derived from input
-    # Actually we need: for each (bi,bj) block position, multiply by inp[bi,bj] != 0
-    # This is NOT static - it depends on input content.
-    # Skip for now - spatial_gather can handle if block positions are fixed.
-    return None
-def s_kronecker(td):
-    """output = kron(input, ones(sH,sW)) — nearest-neighbor upscaling."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if OH % IH != 0 or OW % IW != 0: return None
-    sH, sW = OH // IH, OW // IW
-    if sH < 2 or sW < 2: return None
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        expected = np.kron(inp, np.ones((sH, sW), dtype=np.int64))
-        if not np.array_equal(out, expected):
-            return None
-    # This is identical to upscale - build gather index
-    idx = np.zeros((OH,OW,2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            idx[r,c] = [r//sH, c//sW]
-    return _build_gather_model(OH, OW, idx)
-def s_diagonal_tile(td):
-    """Input placed along diagonal: block[i,i] = input, rest = 0."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH,IW),(OH,OW) = sp
-    if OH % IH != 0 or OW % IW != 0: return None
-    rH, rW = OH // IH, OW // IW
-    if rH != rW or rH < 2: return None
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        for bi in range(rH):
-            for bj in range(rW):
-                block = out[bi*IH:(bi+1)*IH, bj*IW:(bj+1)*IW]
-                if bi == bj:
-                    if not np.array_equal(block, inp):
-                        return None
-                else:
-                    if not np.all(block == 0):
-                        return None
-    # Build: diagonal blocks map to input, off-diagonal are constant 0
-    idx = np.zeros((OH,OW,2), dtype=np.int64)
-    cst = np.full((OH,OW), -1, dtype=np.int64)
-    for bi in range(rH):
-        for bj in range(rW):
-            for lr in range(IH):
-                for lc in range(IW):
-                    oi, oj = bi*IH + lr, bj*IW + lc
-                    if bi == bj:
-                        idx[oi, oj] = [lr, lc]
-                    else:
-                        idx[oi, oj] = [-1, -1]
-                        cst[oi, oj] = 0
-    return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-def s_shift(td):
-    """Detect constant spatial shift of the grid."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW): return None
-    for dr in range(-5, 6):
-        for dc in range(-5, 6):
-            if dr == 0 and dc == 0: continue
-            ok = True
-            for inp, out in exs:
-                shifted = np.zeros_like(inp)
-                r0, r1 = max(0, dr), min(IH, IH + dr)
-                c0, c1 = max(0, dc), min(IW, IW + dc)
-                if r1 > r0 and c1 > c0:
-                    sr0, sc0 = max(0, -dr), max(0, -dc)
-                    shifted[r0:r1, c0:c1] = inp[sr0:sr0+(r1-r0), sc0:sc0+(c1-c0)]
-                if not np.array_equal(shifted, out):
-                    ok = False; break
-            if not ok: continue
-            # Build gather index
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            cst = np.full((OH, OW), 0, dtype=np.int64)  # zeros for out-of-bounds
-            for r in range(OH):
-                for c in range(OW):
-                    sr, sc = r - dr, c - dc
-                    if 0 <= sr < IH and 0 <= sc < IW:
-                        idx[r, c] = [sr, sc]
-                    else:
-                        idx[r, c] = [-1, -1]
-            return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-    return None
-def s_gravity(td):
-    """Detect gravity-like compaction in one direction."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW): return None
-    def _gravity(grid, direction):
-        r = np.zeros_like(grid); h, w = grid.shape
-        if direction in ('down', 'up'):
-            for c in range(w):
-                nz = grid[:, c][grid[:, c] != 0]
-                if direction == 'down': r[h-len(nz):h, c] = nz
-                else: r[:len(nz), c] = nz
-        else:
-            for rr in range(h):
-                nz = grid[rr, :][grid[rr, :] != 0]
-                if direction == 'right': r[rr, w-len(nz):w] = nz
-                else: r[rr, :len(nz)] = nz
-        return r
-    for d in ('down', 'up', 'left', 'right'):
-        if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
-            # Gravity is input-dependent (positions depend on content)
-            # Can't use static Gather — need Conv to learn it
-            # But conv also can't learn arbitrary sorting...
-            # Skip for now — this needs a specialized ONNX graph
-            return None
-    return None
-def s_mirror_h(td):
-    """Output = input | flip(input, horizontal), doubling width."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if OH != IH or OW != 2 * IW: return None
-    if OW > 30: return None
-    for inp, out in exs:
-        expected = np.concatenate([inp, np.flip(inp, 1)], 1)
-        if not np.array_equal(expected, out): return None
-    # Build gather index
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sc = c if c < IW else 2*IW - 1 - c
-            idx[r, c] = [r, sc]
-    return _build_gather_model(OH, OW, idx)
-def s_mirror_v(td):
-    """Output = input over flip(input, vertical), doubling height."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if OW != IW or OH != 2 * IH: return None
-    if OH > 30: return None
-    for inp, out in exs:
-        expected = np.concatenate([inp, np.flip(inp, 0)], 0)
-        if not np.array_equal(expected, out): return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sr = r if r < IH else 2*IH - 1 - r
-            idx[r, c] = [sr, c]
-    return _build_gather_model(OH, OW, idx)
-def s_quad_mirror(td):
-    """Output = 2x2 block of input with h/v flips."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if OH != 2 * IH or OW != 2 * IW: return None
-    if OH > 30 or OW > 30: return None
-    for inp, out in exs:
-        expected = np.block([
-            [inp, np.flip(inp, 1)],
-            [np.flip(inp, 0), np.flip(np.flip(inp, 0), 1)]
-        ])
-        if not np.array_equal(expected, out): return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sr = r if r < IH else 2*IH - 1 - r
-            sc = c if c < IW else 2*IW - 1 - c
-            idx[r, c] = [sr, sc]
-    return _build_gather_model(OH, OW, idx)
-def s_fixed_crop(td):
-    """Output = fixed subregion of input."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if OH > IH or OW > IW or (OH == IH and OW == IW): return None
-    for r0 in range(IH - OH + 1):
-        for c0 in range(IW - OW + 1):
-            if all(np.array_equal(inp[r0:r0+OH, c0:c0+OW], out) for inp, out in exs):
-                idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                for r in range(OH):
-                    for c in range(OW):
-                        idx[r, c] = [r0 + r, c0 + c]
-                return _build_gather_model(OH, OW, idx)
-    return None
-def s_nonuniform_scale(td):
-    """Output = input scaled by different factors in h and w."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    for fh, fw in [(1,2),(2,1),(1,3),(3,1),(2,3),(3,2),(1,4),(4,1),(2,4),(4,2)]:
-        if OH != IH*fh or OW != IW*fw: continue
-        if OH > 30 or OW > 30: continue
-        if all(np.array_equal(np.repeat(np.repeat(inp, fh, 0), fw, 1), out) for inp, out in exs):
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    idx[r, c] = [r//fh, c//fw]
-            return _build_gather_model(OH, OW, idx)
-    return None
-def s_constant(td):
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    exs = get_exs(td)
-    outs = [out for _,out in exs]
-    if not all(np.array_equal(outs[0], o) for o in outs[1:]): return None
-    const = np.zeros((1,10,30,30), dtype=np.float32)
-    for r, row in enumerate(outs[0]):
-        for c, v in enumerate(row):
-            const[0, int(v), r, c] = 1.0
-    inits = [numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
-             numpy_helper.from_array(const, 'c')]
-    nodes = [helper.make_node('Mul', ['input','z'], ['zd']),
-             helper.make_node('ReduceSum', ['zd'], ['s'], axes=[1,2,3], keepdims=1),
-             helper.make_node('Add', ['s','c'], ['output'])]
-    return mk(nodes, inits)
-# ============================================================
-# CONV SOLVERS
-# ============================================================
-def add_onehot_block(nodes, inits, am_name, oh_name):
-    """Equal + Cast one-hot encoding (replaces OneHot which lacks CUDA kernel)."""
-    classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
-    inits.append(numpy_helper.from_array(classes, 'classes'))
-    nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
-    nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))
-def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
-    """Shared lstsq conv fitting. Returns (Wconv, B) or None."""
-    pad = ks // 2
-    feat = 10 * ks * ks + (1 if use_bias else 0)
-    if feat > 20000: return None
-    patches, targets = [], []
-    for inp_g, out_g in exs_raw:
-        ih, iw = inp_g.shape
-        if use_full_30:
-            oh_full = np.zeros((10, GH, GW), dtype=np.float64)
-            for c in range(10): oh_full[c, :ih, :iw] = (inp_g == c)
-            oh_pad = np.pad(oh_full, ((0,0),(pad,pad),(pad,pad)))
-        else:
-            oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
-            for c in range(10): oh_enc[c] = (inp_g == c)
-            oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
-        oh, ow = out_g.shape
-        for r in range(oh):
-            for c in range(ow):
-                p = oh_pad[:, r:r+ks, c:c+ks].flatten()
-                if use_bias: p = np.append(p, 1.0)
-                patches.append(p)
-                targets.append(int(out_g[r, c]))
-    n_patches = len(patches)
-    if feat > 5000 and n_patches > 2000: return None
-    P = np.array(patches, dtype=np.float64)
-    T = np.array(targets, dtype=np.int64)
-    T_oh = np.zeros((len(T), 10), dtype=np.float64)
-    for i, t in enumerate(T): T_oh[i, t] = 1.0
-    WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-    if not np.array_equal(np.argmax(P @ WT, axis=1), T): return None
-    if use_bias:
-        Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
-        B = WT[-1].astype(np.float32)
-    else:
-        Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
-        B = None
-    return Wconv, B
-def solve_conv_fixed(td, path, time_budget=30.0):
-    """Fixed-shape conv: Slice -> Conv -> ArgMax -> Equal+Cast -> Pad."""
-    exs = get_exs(td)
-    for inp, out in exs:
-        if inp.shape != out.shape: return None
-    shapes = set(inp.shape for inp, _ in exs)
-    if len(shapes) != 1: return None
-    IH, IW = shapes.pop()
-    # Use ARC-GEN data for better fitting
-    fit_exs = get_exs_for_fitting(td)
-    # Filter to same-shape, same IH/IW
-    fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
-    t_start = time.time()
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget: return None
-            result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=False)
-            if result is None: continue
-            Wconv, B = result
-            pad = ks // 2
-            pad_h, pad_w = GH - IH, GW - IW
-            inits = [
-                numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
-                numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
-                numpy_helper.from_array(Wconv, 'W'),
-            ]
-            conv_inputs = ['grid', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(
-                helper.make_node('Pad', ['oh_out'], ['output'],
-                    pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
-            )
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td): return 'conv_fixed', model
-    return None
-def solve_conv_variable(td, path, time_budget=30.0):
-    """Variable-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(mask)."""
-    exs = get_exs(td)
-    for inp, out in exs:
-        if inp.shape != out.shape: return None
-    # Use ARC-GEN data for better fitting (variable shape, embedded in 30x30)
-    fit_exs = get_exs_for_fitting_variable(td)
-    fit_exs = [(i,o) for i,o in fit_exs if i.shape == o.shape]
-    t_start = time.time()
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget: return None
-            result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=True)
-            if result is None: continue
-            Wconv, B = result
-            pad = ks // 2
-            inits = [numpy_helper.from_array(Wconv, 'W')]
-            conv_inputs = ['input', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td): return 'conv_var', model
-    return None
-def solve_conv_diffshape(td, path, time_budget=30.0):
-    """Diff-shape conv for fixed io shapes where output is smaller."""
-    sp = fixed_shapes(td)
-    if sp is None: return None
-    (IH, IW), (OH, OW) = sp
-    if IH == OH and IW == OW: return None
-    if OH > IH or OW > IW: return None
-    if OH > 30 or OW > 30: return None
-    exs = get_exs(td)
-    t_start = time.time()
-    for dr_off, dc_off in [(0, 0), ((IH-OH)//2, (IW-OW)//2)]:
-        for use_bias in [False, True]:
-            for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]:
-                if time.time() - t_start > time_budget: return None
-                pad = ks // 2
-                feat = 10 * ks * ks + (1 if use_bias else 0)
-                if feat > 10000: continue
-                patches, targets = [], []
-                valid = True
-                for inp_g, out_g in exs:
-                    oh_enc = np.zeros((10, IH, IW), dtype=np.float64)
-                    for c in range(10): oh_enc[c] = (inp_g == c)
-                    oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
-                    for r in range(OH):
-                        for c in range(OW):
-                            sr, sc = r + dr_off, c + dc_off
-                            if sr < 0 or sr >= IH or sc < 0 or sc >= IW:
-                                valid = False; break
-                            p = oh_pad[:, sr:sr+ks, sc:sc+ks].flatten()
-                            if use_bias: p = np.append(p, 1.0)
-                            patches.append(p)
-                            targets.append(int(out_g[r, c]))
-                        if not valid: break
-                    if not valid: break
-                if not valid: continue
-                n_patches = len(patches)
-                if feat > 5000 and n_patches > 2000: continue
-                P = np.array(patches, dtype=np.float64)
-                T = np.array(targets, dtype=np.int64)
-                T_oh = np.zeros((len(T), 10), dtype=np.float64)
-                for i, t in enumerate(T): T_oh[i, t] = 1.0
-                WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-                if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
-                if use_bias:
-                    Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
-                    B = WT[-1].astype(np.float32)
-                else:
-                    Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
-                    B = None
-                pad_h, pad_w = GH - OH, GW - OW
-                inits = [
-                    numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
-                    numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
-                    numpy_helper.from_array(Wconv, 'W'),
-                    numpy_helper.from_array(np.array([0,0,dr_off,dc_off], dtype=np.int64), 'cr_st'),
-                    numpy_helper.from_array(np.array([1,10,dr_off+OH,dc_off+OW], dtype=np.int64), 'cr_en'),
-                ]
-                conv_inputs = ['grid', 'W']
-                if B is not None:
-                    inits.append(numpy_helper.from_array(B, 'B'))
-                    conv_inputs.append('B')
-                nodes = [
-                    helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
-                    helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-                    helper.make_node('Slice', ['co','cr_st','cr_en'], ['co_crop']),
-                    helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
-                ]
-                add_onehot_block(nodes, inits, 'am', 'oh_out')
-                nodes.append(
-                    helper.make_node('Pad', ['oh_out'], ['output'],
-                        pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
-                )
-                model = mk(nodes, inits)
-                onnx.save(model, path)
-                if validate(path, td): return 'conv_diff', model
-    return None
-def solve_conv_var_diff(td, path, time_budget=30.0):
-    """Variable diff-shape conv: Conv(30x30) -> ArgMax -> Equal+Cast -> Mul(output_mask).
-    Works when output shape differs from input but mapping is convolutional on 30x30 grid."""
-    exs = get_exs(td)
-    t_start = time.time()
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget: return None
-            pad = ks // 2
-            feat = 10 * ks * ks + (1 if use_bias else 0)
-            if feat > 20000: continue
-            patches, targets = [], []
-            for inp_g, out_g in exs:
-                ih, iw = inp_g.shape
-                oh, ow = out_g.shape
-                oh_full = np.zeros((10, GH, GW), dtype=np.float64)
-                for c in range(10): oh_full[c, :ih, :iw] = (inp_g == c)
-                oh_pad = np.pad(oh_full, ((0,0),(pad,pad),(pad,pad)))
-                for r in range(oh):
-                    for c in range(ow):
-                        p = oh_pad[:, r:r+ks, c:c+ks].flatten()
-                        if use_bias: p = np.append(p, 1.0)
-                        patches.append(p)
-                        targets.append(int(out_g[r, c]))
-            n_patches = len(patches)
-            if feat > 5000 and n_patches > 2000: continue
-            P = np.array(patches, dtype=np.float64)
-            T = np.array(targets, dtype=np.int64)
-            T_oh = np.zeros((len(T), 10), dtype=np.float64)
-            for i, t in enumerate(T): T_oh[i, t] = 1.0
-            try:
-                WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-            except:
-                continue
-            if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
-            if use_bias:
-                Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
-                B = WT[-1].astype(np.float32)
-            else:
-                Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
-                B = None
-            # Use ReduceSum of output channels as mask (sum across channels == 1 for valid pixels)
-            # But we don't know the output mask at inference time from input alone...
-            # We need a way to derive the output mask from the input.
-            # For same-shape: mask = ReduceSum(input, axis=1) works
-            # For diff-shape: we need to compute the output mask differently
-            #
-            # Approach: Conv output at valid positions should have max > threshold,
-            # and at padding positions max ≈ 0. Use the ArgMax+OneHot and then
-            # mask with ReduceSum(input) which is 1 at input positions but 0 at padding.
-            # BUT output may be LARGER than input...
-            #
-            # Alternative: just use Conv -> ArgMax -> Equal+Cast -> Mul(input_mask_expanded)
-            # where input_mask covers the output region too.
-            # This won't work if output extends beyond input region.
-            #
-            # Simplest correct approach: let the conv produce valid one-hot everywhere,
-            # then the padding region should naturally produce channel-0 output.
-            # Since padding is all-zero input, conv output there = bias only.
-            # If no bias, conv output = 0 for all channels -> argmax gives channel 0 -> onehot gives [1,0,...,0]
-            # which equals the padding encoding (channel 0 = 1 in padding).
-            # Wait - that's WRONG for the NeuroGolf format. In the padding region, ALL channels should be 0.
-            # The one-hot encoding has channel[color]=1, but padding = ALL zeros.
-            #
-            # So we NEED a mask. But for diff-shape, what mask?
-            # If output is always top-left aligned and we know max output size...
-            # We can't statically determine the output mask from the input.
-            #
-            # However: we can try the ReduceSum approach anyway — if conv naturally
-            # produces channel-0 dominant output in padding, then:
-            # mask = ReduceSum(input, axis=1) gives 1 for input pixels, 0 for padding
-            # If output region ⊆ input region, this works.
-            # If output region > input region... we need the output's ReduceSum instead.
-            # For tasks where output fits within input bounds, use input mask
-            all_output_within_input = all(
-                out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
-                for inp_g, out_g in exs
-            )
-            if not all_output_within_input:
-                continue  # Skip tasks where output extends beyond input
-            inits = [numpy_helper.from_array(Wconv, 'W')]
-            conv_inputs = ['input', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td): return 'conv_var_diff', model
-    return None
-# ============================================================
-# PYTORCH LEARNED CONV (gradient descent, multi-seed, ternary snap)
-# ============================================================
-def _ternary_snap(w, eps=0.2):
-    """Snap weights to {-1, 0, 1} — smaller model, often still correct."""
-    return np.where(w > eps, 1.0, np.where(w < -eps, -1.0, 0.0)).astype(np.float32)
-def _build_conv_onnx_from_weights(W, ks, use_full_30=False, IH=None, IW=None):
-    """Build ONNX conv model from numpy weight array W [10,10,ks,ks].
-    For fixed-shape: Slice→Conv→ArgMax→Equal+Cast→Pad
-    For variable/full30: Conv→ArgMax→Equal+Cast→Mul(mask)"""
-    pad = ks // 2
-    if use_full_30:
-        # Variable shape: full 30x30 conv with mask
-        inits = [numpy_helper.from_array(W, 'W')]
-        nodes = [
-            helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
-            helper.make_node('Conv', ['input', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-        return mk(nodes, inits)
-    else:
-        # Fixed shape: slice, conv, pad
-        pad_h, pad_w = GH - IH, GW - IW
-        inits = [
-            numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
-            numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
-            numpy_helper.from_array(W, 'W'),
-        ]
-        nodes = [
-            helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
-            helper.make_node('Conv', ['grid', 'W'], ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
-            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(
-            helper.make_node('Pad', ['oh_out'], ['output'],
-                pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
-        )
-        return mk(nodes, inits)
-def _build_two_layer_conv_onnx(W1, W2, ks1, ks2, use_full_30=False, IH=None, IW=None):
-    """Build ONNX two-layer conv: Conv→ReLU→Conv→ArgMax→Equal+Cast→Pad/Mul(mask)."""
-    pad1, pad2 = ks1 // 2, ks2 // 2
-    if use_full_30:
-        inits = [
-            numpy_helper.from_array(W1, 'W1'),
-            numpy_helper.from_array(W2, 'W2'),
-        ]
-        nodes = [
-            helper.make_node('ReduceSum', ['input'], ['mask'], axes=[1], keepdims=1),
-            helper.make_node('Conv', ['input', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
-            helper.make_node('Relu', ['h1'], ['h1r']),
-            helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
-            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-        return mk(nodes, inits)
-    else:
-        pad_h, pad_w = GH - IH, GW - IW
-        inits = [
-            numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
-            numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
-            numpy_helper.from_array(W1, 'W1'),
-            numpy_helper.from_array(W2, 'W2'),
-        ]
-        nodes = [
-            helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
-            helper.make_node('Conv', ['grid', 'W1'], ['h1'], kernel_shape=[ks1,ks1], pads=[pad1]*4),
-            helper.make_node('Relu', ['h1'], ['h1r']),
-            helper.make_node('Conv', ['h1r', 'W2'], ['co'], kernel_shape=[ks2,ks2], pads=[pad2]*4),
-            helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(
-            helper.make_node('Pad', ['oh_out'], ['output'],
-                pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0)
-        )
-        return mk(nodes, inits)
-def solve_pytorch_conv(td, path, time_budget=30.0):
-    """PyTorch gradient descent conv solver. Tries single-layer then two-layer.
-    Multi-seed training with ternary weight snapping for smaller models.
-    Validates against arc-gen before accepting."""
-    try:
-        import torch
-        import torch.nn as nn
-        import copy as _copy
-    except ImportError:
-        return None
-    exs = get_exs(td)
-    same_shape = all(inp.shape == out.shape for inp, out in exs)
-    if not same_shape:
-        return None  # Only handle same-shape for now
-    shapes = set(inp.shape for inp, _ in exs)
-    fixed_in = len(shapes) == 1
-    # Prepare tensors
-    all_pairs = td['train'] + td['test']
-    inp_list = [to_onehot(p['input'])[0] for p in all_pairs]
-    out_list = [to_onehot(p['output'])[0] for p in all_pairs]
-    inp_t = torch.tensor(np.stack(inp_list), dtype=torch.float32)
-    out_t = torch.tensor(np.stack(out_list), dtype=torch.float32)
-    if fixed_in:
-        IH, IW = list(shapes)[0]
-        # Train on cropped region
-        inp_t = inp_t[:, :, :IH, :IW]
-        out_t = out_t[:, :, :IH, :IW]
-    t_start = time.time()
-    best_result = None
-    # Phase 1: Single-layer conv (multiple kernel sizes and seeds)
-    for ks in [1, 3, 5, 7]:
-        if time.time() - t_start > time_budget * 0.6:
-            break
-        pad = ks // 2
-        for seed in [0, 7, 42]:
-            if time.time() - t_start > time_budget * 0.6:
-                break
-            torch.manual_seed(seed)
-            conv = nn.Conv2d(CH, CH, kernel_size=ks, padding=pad, bias=False)
-            if seed == 0:
-                nn.init.zeros_(conv.weight)
-            opt = torch.optim.Adam(conv.parameters(), lr=0.03)
-            best_loss, best_state = float('inf'), None
-            for step in range(3000):
-                opt.zero_grad()
-                pred = conv(inp_t)
-                loss = nn.functional.mse_loss(pred, out_t)
-                loss.backward()
-                opt.step()
-                if loss.item() < best_loss:
-                    best_loss = loss.item()
-                    best_state = _copy.deepcopy(conv.state_dict())
-                if best_loss < 1e-8:
-                    break
-            if best_state is None:
-                continue
-            conv.load_state_dict(best_state)
-            w = conv.weight.detach().numpy()
-            # Try continuous weights, then ternary-snapped
-            for w_cand in [w, _ternary_snap(w)]:
-                use_full = not fixed_in
-                model = _build_conv_onnx_from_weights(
-                    w_cand, ks, use_full_30=use_full,
-                    IH=IH if fixed_in else None,
-                    IW=IW if fixed_in else None
-                )
-                onnx.save(model, path)
-                if validate(path, td):
-                    sz = os.path.getsize(path)
-                    if best_result is None or sz < best_result[2]:
-                        best_result = ('pt_conv', model, sz)
-    # Phase 2: Two-layer conv (Conv→ReLU→Conv)
-    for ks1, ks2, hidden in [(3, 1, CH), (5, 1, CH), (3, 3, CH)]:
-        if time.time() - t_start > time_budget:
-            break
-        for seed in [0, 7]:
-            if time.time() - t_start > time_budget:
-                break
-            torch.manual_seed(seed)
-            net = nn.Sequential(
-                nn.Conv2d(CH, hidden, kernel_size=ks1, padding=ks1//2, bias=False),
-                nn.ReLU(),
-                nn.Conv2d(hidden, CH, kernel_size=ks2, padding=ks2//2, bias=False),
-            )
-            opt = torch.optim.Adam(net.parameters(), lr=0.01)
-            best_loss, best_state = float('inf'), None
-            for step in range(2500):
-                opt.zero_grad()
-                pred = net(inp_t)
-                loss = nn.functional.mse_loss(pred, out_t)
-                loss.backward()
-                opt.step()
-                if loss.item() < best_loss:
-                    best_loss = loss.item()
-                    best_state = _copy.deepcopy(net.state_dict())
-                if best_loss < 1e-8:
-                    break
-            if best_state is None:
-                continue
-            net.load_state_dict(best_state)
-            w1 = net[0].weight.detach().numpy()
-            w2 = net[2].weight.detach().numpy()
-            for w1c, w2c in [(w1, w2), (_ternary_snap(w1), _ternary_snap(w2))]:
-                use_full = not fixed_in
-                model = _build_two_layer_conv_onnx(
-                    w1c, w2c, ks1, ks2, use_full_30=use_full,
-                    IH=IH if fixed_in else None,
-                    IW=IW if fixed_in else None
-                )
-                onnx.save(model, path)
-                if validate(path, td):
-                    sz = os.path.getsize(path)
-                    if best_result is None or sz < best_result[2]:
-                        best_result = ('pt_conv2', model, sz)
-    if best_result is not None:
-        sname, model, _ = best_result
-        onnx.save(model, path)
-        return sname, model
-    return None
-# ============================================================
-# MAIN
-# ============================================================
-ANALYTICAL_SOLVERS = [
-    ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
-    ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
-    ('tile', s_tile), ('upscale', s_upscale), ('kronecker', s_kronecker),
-    ('nonuniform_scale', s_nonuniform_scale),
-    ('mirror_h', s_mirror_h), ('mirror_v', s_mirror_v), ('quad_mirror', s_quad_mirror),
-    ('concat', s_concat), ('concat_enhanced', s_concat_enhanced),
-    ('diagonal_tile', s_diagonal_tile),
-    ('fixed_crop', s_fixed_crop),
-    ('spatial_gather', s_spatial_gather),
-    ('shift', s_shift),
-    ('varshape_spatial_gather', s_varshape_spatial_gather),
-]
-def solve_task(tn, td, outdir, conv_budget=30.0):
-    t_start = time.time()
-    os.makedirs(outdir, exist_ok=True)
-    path = os.path.join(outdir, f"task{tn:03d}.onnx")
-    # Skip excluded tasks
-    if tn in EXCLUDED_TASKS:
-        return False, 'excluded', None, time.time() - t_start, path
-    # 1. Try analytical solvers (fast, tiny models)
-    for sname, sfn in ANALYTICAL_SOLVERS:
-        try:
-            model = sfn(td)
-            if model is None: continue
-            onnx.save(model, path)
-            if validate(path, td):
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-        except: pass
-    # 2. Determine task shape category and try conv solvers
-    exs = get_exs(td)
-    same_shape = all(inp.shape == out.shape for inp, out in exs)
-    shapes = set(inp.shape for inp, _ in exs)
-    fixed_in = len(shapes) == 1
-    conv_time = conv_budget
-    if same_shape:
-        if fixed_in:
-            result = solve_conv_fixed(td, path, time_budget=conv_time/2)
-            if result is not None:
-                sname, model = result
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-        result = solve_conv_variable(td, path, time_budget=conv_time)
-        if result is not None:
-            sname, model = result
-            return True, sname, os.path.getsize(path), time.time() - t_start, path
-        # 3. PyTorch learned conv as fallback for same-shape tasks
-        remaining = max(1, conv_time - (time.time() - t_start))
-        result = solve_pytorch_conv(td, path, time_budget=remaining)
-        if result is not None:
-            sname, model = result
-            return True, sname, os.path.getsize(path), time.time() - t_start, path
-    else:
-        sp = fixed_shapes(td)
-        if sp is not None:
-            (IH,IW),(OH,OW) = sp
-            if OH <= IH and OW <= IW:
-                result = solve_conv_diffshape(td, path, time_budget=conv_time)
-                if result is not None:
-                    sname, model = result
-                    return True, sname, os.path.getsize(path), time.time() - t_start, path
-        # Try variable diff-shape conv (output within input bounds)
-        result = solve_conv_var_diff(td, path, time_budget=conv_time)
-        if result is not None:
-            sname, model = result
-            return True, sname, os.path.getsize(path), time.time() - t_start, path
-    return False, None, None, time.time() - t_start, path
-def run_tasks(task_nums, tasks, output_dir, conv_budget, use_wandb):
-    results = {}
-    costs_dict = {}
-    total_score = 0
-    for tn in task_nums:
-        if tn not in tasks:
-            continue
-        if tn in EXCLUDED_TASKS:
-            print(f"Task {tn:3d}: EXCLUDED (officially)")
-            continue
-        td = tasks[tn]['data']
-        ok, sname, sz, t_task, model_path = solve_task(tn, td, output_dir, conv_budget)
-        if ok:
-            macs, memory, params = score_network(model_path)
-            if macs is None:
-                macs, memory, params = 0, 0, 0
-            cost = macs + memory + params
-            score = max(1.0, 25.0 - math.log(max(1, cost)))
-            total_score += score
-            results[tn] = (sname, t_task, sz)
-            costs_dict[tn] = cost
-            print(f"Task {tn:3d}: {sname:25s} {score:7.3f} {cost:>12} {t_task:7.3f}s  ({sz:>8,} bytes)")
-        else:
-            print(f"Task {tn:3d}: UNSOLVED  {t_task:7.3f}s")
-            cost = 0
-        if use_wandb and wandb is not None:
-            wandb.log({
-                "task_id": tn,
-                "solver": sname if ok else "unsolved",
-                "onnx_bytes": sz if ok else 0,
-                "task_time_sec": t_task,
-                "cost": cost,
-                "score": score if ok else 0,
-            })
-    return results, costs_dict, total_score
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--data_dir', default='ARC-AGI/data/training/')
-    parser.add_argument('--arcgen_dir', default='', help='Path to ARC-GEN-100K/ directory')
-    parser.add_argument('--output_dir', default='submission')
-    parser.add_argument('--kaggle', action='store_true')
-    parser.add_argument('--conv_budget', type=float, default=30.0)
-    parser.add_argument('--tasks', type=str, default='')
-    parser.add_argument('--device', type=str, default='auto', choices=['auto','cpu','cuda'])
-    parser.add_argument('--use_wandb', action='store_true')
-    args = parser.parse_args()
-    global ORT_PROVIDERS
-    config = {
-        "device": args.device,
-        "conv_budget": args.conv_budget,
-        "data_dir": args.data_dir,
-        "arcgen_dir": args.arcgen_dir,
-        "tasks": args.tasks,
-    }
-    if args.device == 'cuda':
-        ORT_PROVIDERS = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    elif args.device == 'cpu':
-        ORT_PROVIDERS = ['CPUExecutionProvider']
-    ort.set_default_logger_severity(3)
-    print(f"Using providers: {ORT_PROVIDERS}")
-    if args.kaggle:
-        tasks = load_tasks_kaggle(args.data_dir)
-    else:
-        arcgen = args.arcgen_dir if args.arcgen_dir else None
-        tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
-    # Count arc-gen examples
-    total_arcgen = sum(len(t['data'].get('arc-gen', [])) for t in tasks.values())
-    print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
-    print(f"Excluded tasks: {sorted(EXCLUDED_TASKS)}")
-    task_nums = [int(t) for t in args.tasks.split(',')] if args.tasks else sorted(tasks.keys())
-    active_tasks = [t for t in task_nums if t not in EXCLUDED_TASKS]
-    print(f"Solving {len(active_tasks)} active tasks (skipping {len(task_nums) - len(active_tasks)} excluded)")
-    print(f"Conv budget: {args.conv_budget}s per task")
-    print("=" * 70)
-    t0 = time.time()
-    if args.use_wandb and wandb is not None:
-        with wandb.init(project="neurogolf", name="solver_run", config=config):
-            results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=True)
-    else:
-        results, costs_dict, total_score = run_tasks(task_nums, tasks, args.output_dir, args.conv_budget, use_wandb=False)
-    elapsed = time.time() - t0
-    print(f"\n{'='*70}")
-    print(f"Solved: {len(results)}/{len(active_tasks)} active tasks in {elapsed:.0f}s")
-    solver_names = [v[0] for v in results.values()]
-    sc = Counter(solver_names)
-    for s, c in sc.most_common(): print(f"  {s}: {c}")
-    # Generate submission
-    outdir = args.output_dir
-    n_files = len([f for f in os.listdir(outdir) if f.endswith('.onnx')])
-    total_size = sum(os.path.getsize(os.path.join(outdir, f))
-                     for f in os.listdir(outdir) if f.endswith('.onnx'))
-    # Create submission.zip
-    zip_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.zip')
-    buf = io.BytesIO()
-    with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
-        for f in sorted(os.listdir(outdir)):
-            if f.endswith('.onnx'):
-                zf.write(os.path.join(outdir, f), f)
-    zip_bytes = buf.getvalue()
-    with open(zip_path, 'wb') as f:
-        f.write(zip_bytes)
-    zip_size = len(zip_bytes)
-    # Create submission.csv
-    csv_path = os.path.join(os.path.dirname(outdir) or '.', 'submission.csv')
-    with open(csv_path, 'w', newline='') as f:
-        w = csv.writer(f)
-        w.writerow(['task_id', 'total_cost'])
-        for tn in sorted(costs_dict.keys()):
-            w.writerow([f'task{tn:03d}', costs_dict[tn]])
-    # Estimate LB score: solved tasks get their score, unsolved get 1.0
-    unsolved_count = len(active_tasks) - len(results)
-    est_lb = total_score + unsolved_count * 1.0
-    print(f"\n{n_files} ONNX files, {total_size/1024:.1f} KB uncompressed")
-    print(f"ZIP size: {zip_size/1024:.1f} KB / {MAX_FILESIZE/1024:.0f} KB limit {'OK' if zip_size <= MAX_FILESIZE else 'OVER!'}")
-    print(f"Estimated LB score: {est_lb:.1f} (solved: {total_score:.1f} + unsolved: {unsolved_count}×1.0)")
-    print(f"Written: {zip_path} | {csv_path}")
-if __name__ == '__main__':
-    main()


1	+ FILE_CONTENT_PLACEHOLDER