rogermt
/

neurogolf-solver

Model card Files Files and versions

xet

Community

rogermt commited on 16 days ago

Commit

4a06c47

verified ·

1 Parent(s): 2cde25d

v2: add --device cpu/cuda/auto flag, fix providers

Browse files

Files changed (1) hide show

neurogolf_solver.py +67 -145

neurogolf_solver.py CHANGED Viewed

@@ -6,9 +6,7 @@ Pipeline: Slice -> Conv -> ArgMax -> OneHot -> Pad
 Usage:
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
-On Kaggle:
-  python neurogolf_solver.py --data_dir /kaggle/input/competitions/neurogolf-2026/ --output_dir submission --kaggle
 """
 import json, os, sys, math, time, argparse
@@ -18,16 +16,22 @@ from onnx import helper, TensorProto, numpy_helper
 import onnxruntime as ort
 from collections import Counter
-# Constants
 BATCH, CH, GH, GW = 1, 10, 30, 30
 GRID_SHAPE = [BATCH, CH, GH, GW]
 DT = TensorProto.FLOAT
 IR = 10
 OPSET = [helper.make_opsetid("", 10)]
 def load_tasks_dir(data_dir):
-    """Load tasks from directory of JSON files."""
     files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
     tasks = {}
     for i, f in enumerate(files):
@@ -35,9 +39,7 @@ def load_tasks_dir(data_dir):
             tasks[i+1] = {'hex': f.replace('.json',''), 'data': json.load(fh)}
     return tasks
 def load_tasks_kaggle(data_dir):
-    """Load tasks from Kaggle competition format (task001.json etc.)."""
     tasks = {}
     for tn in range(1, 401):
         path = os.path.join(data_dir, f"task{tn:03d}.json")
@@ -46,7 +48,6 @@ def load_tasks_kaggle(data_dir):
                 tasks[tn] = {'hex': f'task{tn:03d}', 'data': json.load(f)}
     return tasks
 def to_onehot(grid):
     arr = np.zeros((1, CH, GH, GW), dtype=np.float32)
     for r, row in enumerate(grid):
@@ -54,11 +55,9 @@ def to_onehot(grid):
             arr[0, v, r, c] = 1.0
     return arr
 def validate(path, td):
-    """Validate ONNX model against all train+test examples."""
     try:
-        sess = ort.InferenceSession(path, providers=['CPUExecutionProvider'])
     except:
         return False
     examples = td['train'] + td['test']
@@ -76,39 +75,32 @@ def validate(path, td):
             return False
     return True
 def mk(nodes, inits=None):
     x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
     y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
     g = helper.make_graph(nodes, "g", [x], [y], initializer=inits or [])
     return helper.make_model(g, ir_version=IR, opset_imports=OPSET)
 def get_exs(td):
     return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
             for ex in td['train'] + td['test']]
 def fixed_shapes(td):
     shapes = set()
     for inp, out in get_exs(td):
         shapes.add((inp.shape, out.shape))
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
 # SOLVERS
 # ============================================================
 def s_identity(td):
     for ex in td['train']+td['test']:
-        if ex['input'] != ex['output']:
-            return None
     return mk([helper.make_node('Identity', ['input'], ['output'])])
 def s_color_map(td):
-    """1x1 conv implementing color permutation."""
     cm = {}
     for ex in td['train']+td['test']:
         inp, out = np.array(ex['input']), np.array(ex['output'])
@@ -123,17 +115,12 @@ def s_color_map(td):
     return mk([helper.make_node('Conv', ['input','W'], ['output'], kernel_shape=[1,1])],
               [numpy_helper.from_array(W, 'W')])
 def s_transpose(td):
-    """Swap rows and columns."""
     for ex in td['train']+td['test']:
-        if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T):
-            return None
     return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
 def s_flip(td):
-    """Flip vertically or horizontally using GatherElements."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
@@ -143,29 +130,22 @@ def s_flip(td):
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
             if axis == 0:
                 idx = np.arange(GH).reshape(1,1,GH,1).repeat(CH,1).repeat(GW,3)
-                for r in range(IH):
-                    idx[0,:,r,:] = IH - 1 - r
             else:
                 idx = np.arange(GW).reshape(1,1,1,GW).repeat(CH,1).repeat(GH,2)
-                for c in range(IW):
-                    idx[0,:,:,c] = IW - 1 - c
             ax = 2 if axis == 0 else 3
-            return mk(
-                [helper.make_node('GatherElements', ['input','idx'], ['output'], axis=ax)],
-                [numpy_helper.from_array(idx.astype(np.int64), 'idx')]
-            )
     return None
 def s_rotate(td):
-    """Rotate 90/180/270 degrees."""
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
-        if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
-            continue
         idx = np.zeros((OH,OW,2), dtype=np.int64)
         for r in range(OH):
             for c in range(OW):
@@ -176,9 +156,7 @@ def s_rotate(td):
         return _build_gather_model(OH, OW, idx)
     return None
 def s_spatial_gather(td):
-    """Each output pixel copied from a fixed input pixel."""
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
@@ -188,23 +166,17 @@ def s_spatial_gather(td):
     for oi in range(OH):
         for oj in range(OW):
             vals = set(int(out[oi,oj]) for _,out in exs)
-            if len(vals) == 1:
-                cst[oi,oj] = vals.pop()
             found = False
             for ri in range(IH):
                 for rj in range(IW):
                     if all(int(inp[ri,rj]) == int(out[oi,oj]) for inp,out in exs):
-                        idx[oi,oj] = [ri, rj]
-                        found = True
-                        break
                 if found: break
-            if not found and cst[oi,oj] < 0:
-                return None
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_tile(td):
-    """Tile input NxM times."""
     exs = get_exs(td)
     in_shapes = set(inp.shape for inp,_ in exs)
     if len(in_shapes) != 1: return None
@@ -231,14 +203,11 @@ def s_tile(td):
     nodes = [
         helper.make_node('Slice', ['input','st','en'], ['cr']),
         helper.make_node('Tile', ['cr','rp'], ['tl']),
-        helper.make_node('Pad', ['tl'], ['output'],
-                         pads=[0,0,0,0, 0,0,pad_h,pad_w], value=0.0),
     ]
     return mk(nodes, inits)
 def s_upscale(td):
-    """Nearest-neighbor upscale by integer factor."""
     exs = get_exs(td)
     in_shapes = set(inp.shape for inp,_ in exs)
     if len(in_shapes) != 1: return None
@@ -255,27 +224,22 @@ def s_upscale(td):
     OH, OW = IH*sH, IW*sW
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
-        if not np.array_equal(out, np.repeat(np.repeat(inp, sH, 0), sW, 1)):
-            return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
             idx[r,c] = [r//sH, c//sW]
     return _build_gather_model(OH, OW, idx)
 def s_concat(td):
-    """Output = concat of transformed copies of input."""
     from itertools import product as iproduct
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     transforms = [
-        ('id', lambda x: x),
-        ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)),
-        ('rot180', lambda x: np.rot90(x, 2)),
     ]
     if OH == IH and OW % IW == 0 and OW > IW:
         n = OW // IW
@@ -286,8 +250,7 @@ def s_concat(td):
                     idx = np.zeros((OH,OW,2), dtype=np.int64)
                     for oi in range(OH):
                         for oj in range(OW):
-                            bj = oj // IW
-                            lr, lc = oi, oj % IW
                             t = transforms[combo[bj]][0]
                             if t == 'id': sr, sc = lr, lc
                             elif t == 'fliplr': sr, sc = lr, IW-1-lc
@@ -304,8 +267,7 @@ def s_concat(td):
                     idx = np.zeros((OH,OW,2), dtype=np.int64)
                     for oi in range(OH):
                         for oj in range(OW):
-                            bi = oi // IH
-                            lr, lc = oi % IH, oj
                             t = transforms[combo[bi]][0]
                             if t == 'id': sr, sc = lr, lc
                             elif t == 'fliplr': sr, sc = lr, IW-1-lc
@@ -315,9 +277,7 @@ def s_concat(td):
                     return _build_gather_model(OH, OW, idx)
     return None
 def s_constant(td):
-    """Output is always the same."""
     sp = fixed_shapes(td)
     if sp is None: return None
     exs = get_exs(td)
@@ -327,25 +287,18 @@ def s_constant(td):
     for r, row in enumerate(outs[0]):
         for c, v in enumerate(row):
             const[0, int(v), r, c] = 1.0
-    inits = [
-        numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
-        numpy_helper.from_array(const, 'c'),
-    ]
-    nodes = [
-        helper.make_node('Mul', ['input','z'], ['zd']),
-        helper.make_node('ReduceSum', ['zd'], ['s'], axes=[1,2,3], keepdims=1),
-        helper.make_node('Add', ['s','c'], ['output']),
-    ]
     return mk(nodes, inits)
 # ============================================================
-# CONV SOLVER (the main workhorse)
 # ============================================================
 def solve_conv(td, path, time_budget=30.0, try_bias=True):
-    """Solve same-shape task with one-hot conv + ArgMax + OneHot.
-    Returns model or None."""
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
@@ -355,43 +308,36 @@ def solve_conv(td, path, time_budget=30.0, try_bias=True):
     t_start = time.time()
     for use_bias in ([False, True] if try_bias else [False]):
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget:
-                return None
             pad = ks // 2
             feat = 10 * ks * ks + (1 if use_bias else 0)
             n_grid = sum(inp.size for inp, _ in exs)
-            if feat > 20000 or (feat > 5000 and n_grid > 2000):
-                continue
             patches, targets = [], []
             for inp_g, out_g in exs:
                 ih, iw = inp_g.shape
                 oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
-                for c in range(10):
-                    oh_enc[c] = (inp_g == c)
                 oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
                 for r in range(ih):
                     for c in range(iw):
                         p = oh_pad[:, r:r+ks, c:c+ks].flatten()
-                        if use_bias:
-                            p = np.append(p, 1.0)
                         patches.append(p)
                         targets.append(int(out_g[r, c]))
             P = np.array(patches, dtype=np.float64)
             T = np.array(targets, dtype=np.int64)
             T_oh = np.zeros((len(T), 10), dtype=np.float64)
-            for i, t in enumerate(T):
-                T_oh[i, t] = 1.0
             WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-            if not np.array_equal(np.argmax(P @ WT, axis=1), T):
-                continue
             if use_bias:
                 Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = WT[-1].astype(np.float32)
             else:
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
-            pad_h = GH - IH
-            pad_w = GW - IW
             inits = [
                 numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
                 numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
@@ -405,32 +351,26 @@ def solve_conv(td, path, time_budget=30.0, try_bias=True):
                 conv_inputs.append('B')
             nodes = [
                 helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
-                helper.make_node('Conv', conv_inputs, ['co'],
-                                 kernel_shape=[ks,ks], pads=[pad]*4),
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=0),
                 helper.make_node('OneHot', ['am','depth','ohvals'], ['oh_out'], axis=1),
-                helper.make_node('Pad', ['oh_out'], ['output'],
-                                 pads=[0,0,0,0, 0,0,pad_h,pad_w], value=0.0),
             ]
             model = mk(nodes, inits)
             onnx.save(model, path)
-            if validate(path, td):
-                return model
     return None
 # ============================================================
 # GATHER HELPERS
 # ============================================================
 def _build_gather_model(OH, OW, idx):
-    """Build model from index array idx[OH,OW,2] -> (src_r, src_c)."""
     flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
         for oj in range(OW):
-            flat = idx[oi,oj,0]*GW + idx[oi,oj,1]
-            flat_idx[0,:,oi*GW+oj] = flat
             mask[0,0,oi,oj] = 1.0
     inits = [
         numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
@@ -446,17 +386,14 @@ def _build_gather_model(OH, OW, idx):
     ]
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
-    """Build gather model with constant values for some positions."""
     flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
     for oi in range(OH):
         for oj in range(OW):
             if idx[oi,oj,0] >= 0:
-                flat = idx[oi,oj,0]*GW + idx[oi,oj,1]
-                flat_idx[0,:,oi*GW+oj] = flat
                 gather_mask[0,0,oi,oj] = 1.0
             elif cst[oi,oj] >= 0:
                 const_oh[0, cst[oi,oj], oi, oj] = 1.0
@@ -480,69 +417,56 @@ def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
         nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
     return mk(nodes, inits)
 # ============================================================
-# MAIN SOLVER
 # ============================================================
 ANALYTICAL_SOLVERS = [
-    ('identity', s_identity),
-    ('constant', s_constant),
-    ('color_map', s_color_map),
-    ('transpose', s_transpose),
-    ('flip', s_flip),
-    ('rotate', s_rotate),
-    ('tile', s_tile),
-    ('upscale', s_upscale),
-    ('concat', s_concat),
     ('spatial_gather', s_spatial_gather),
 ]
 def solve_task(tn, td, outdir, conv_budget=30.0):
-    """Solve one task. Returns (solved, solver_name, file_size)."""
     os.makedirs(outdir, exist_ok=True)
     path = os.path.join(outdir, f"task{tn:03d}.onnx")
     for sname, sfn in ANALYTICAL_SOLVERS:
         try:
             model = sfn(td)
-            if model is None:
-                continue
             onnx.save(model, path)
-            if validate(path, td):
-                return True, sname, os.path.getsize(path)
-        except:
-            pass
     model = solve_conv(td, path, time_budget=conv_budget)
-    if model is not None:
-        return True, 'conv', os.path.getsize(path)
     return False, None, None
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data_dir', default='ARC-AGI/data/training/')
     parser.add_argument('--output_dir', default='submission')
-    parser.add_argument('--kaggle', action='store_true', help='Use Kaggle format')
-    parser.add_argument('--conv_budget', type=float, default=30.0, help='Seconds per task for conv')
-    parser.add_argument('--tasks', type=str, default='', help='Comma-separated task numbers to solve')
     args = parser.parse_args()
-    if args.kaggle:
-        tasks = load_tasks_kaggle(args.data_dir)
-    else:
-        tasks = load_tasks_dir(args.data_dir)
-    if args.tasks:
-        task_nums = [int(t) for t in args.tasks.split(',')]
-    else:
-        task_nums = sorted(tasks.keys())
     print(f"Loaded {len(tasks)} tasks, solving {len(task_nums)}")
     print(f"Conv budget: {args.conv_budget}s per task")
     print("=" * 70)
     t0 = time.time()
     results = {}
     for tn in task_nums:
-        if tn not in tasks:
-            continue
         td = tasks[tn]['data']
         ok, sname, sz = solve_task(tn, td, args.output_dir, args.conv_budget)
         if ok:
@@ -554,13 +478,11 @@ def main():
     print(f"\n{'='*70}")
     print(f"Solved: {len(results)}/{len(task_nums)} in {elapsed:.0f}s")
     sc = Counter(results.values())
-    for s, c in sc.most_common():
-        print(f"  {s}: {c}")
     n_files = len([f for f in os.listdir(args.output_dir) if f.endswith('.onnx')])
     total_size = sum(os.path.getsize(os.path.join(args.output_dir, f))
                      for f in os.listdir(args.output_dir) if f.endswith('.onnx'))
     print(f"\n{n_files} ONNX files, total {total_size/1024:.1f} KB")
 if __name__ == '__main__':
     main()

 Usage:
   python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission
+  python neurogolf_solver.py --data_dir ARC-AGI/data/training/ --output_dir submission --device cuda --conv_budget 60
 """
 import json, os, sys, math, time, argparse
 import onnxruntime as ort
 from collections import Counter
 BATCH, CH, GH, GW = 1, 10, 30, 30
 GRID_SHAPE = [BATCH, CH, GH, GW]
 DT = TensorProto.FLOAT
 IR = 10
 OPSET = [helper.make_opsetid("", 10)]
+def get_providers():
+    available = ort.get_available_providers()
+    if 'CUDAExecutionProvider' in available:
+        return ['CUDAExecutionProvider', 'CPUExecutionProvider']
+    return ['CPUExecutionProvider']
+ORT_PROVIDERS = get_providers()
+print(f"ONNX Runtime providers: {ORT_PROVIDERS}")
 def load_tasks_dir(data_dir):
     files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
     tasks = {}
     for i, f in enumerate(files):
             tasks[i+1] = {'hex': f.replace('.json',''), 'data': json.load(fh)}
     return tasks
 def load_tasks_kaggle(data_dir):
     tasks = {}
     for tn in range(1, 401):
         path = os.path.join(data_dir, f"task{tn:03d}.json")
                 tasks[tn] = {'hex': f'task{tn:03d}', 'data': json.load(f)}
     return tasks
 def to_onehot(grid):
     arr = np.zeros((1, CH, GH, GW), dtype=np.float32)
     for r, row in enumerate(grid):
             arr[0, v, r, c] = 1.0
     return arr
 def validate(path, td):
     try:
+        sess = ort.InferenceSession(path, providers=ORT_PROVIDERS)
     except:
         return False
     examples = td['train'] + td['test']
             return False
     return True
 def mk(nodes, inits=None):
     x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
     y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
     g = helper.make_graph(nodes, "g", [x], [y], initializer=inits or [])
     return helper.make_model(g, ir_version=IR, opset_imports=OPSET)
 def get_exs(td):
     return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
             for ex in td['train'] + td['test']]
 def fixed_shapes(td):
     shapes = set()
     for inp, out in get_exs(td):
         shapes.add((inp.shape, out.shape))
     return list(shapes)[0] if len(shapes) == 1 else None
 # ============================================================
 # SOLVERS
 # ============================================================
 def s_identity(td):
     for ex in td['train']+td['test']:
+        if ex['input'] != ex['output']: return None
     return mk([helper.make_node('Identity', ['input'], ['output'])])
 def s_color_map(td):
     cm = {}
     for ex in td['train']+td['test']:
         inp, out = np.array(ex['input']), np.array(ex['output'])
     return mk([helper.make_node('Conv', ['input','W'], ['output'], kernel_shape=[1,1])],
               [numpy_helper.from_array(W, 'W')])
 def s_transpose(td):
     for ex in td['train']+td['test']:
+        if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T): return None
     return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0,1,3,2])])
 def s_flip(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
         if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
             if axis == 0:
                 idx = np.arange(GH).reshape(1,1,GH,1).repeat(CH,1).repeat(GW,3)
+                for r in range(IH): idx[0,:,r,:] = IH - 1 - r
             else:
                 idx = np.arange(GW).reshape(1,1,1,GW).repeat(CH,1).repeat(GH,2)
+                for c in range(IW): idx[0,:,:,c] = IW - 1 - c
             ax = 2 if axis == 0 else 3
+            return mk([helper.make_node('GatherElements', ['input','idx'], ['output'], axis=ax)],
+                      [numpy_helper.from_array(idx.astype(np.int64), 'idx')])
     return None
 def s_rotate(td):
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for k in [1, 2, 3]:
+        if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs): continue
         idx = np.zeros((OH,OW,2), dtype=np.int64)
         for r in range(OH):
             for c in range(OW):
         return _build_gather_model(OH, OW, idx)
     return None
 def s_spatial_gather(td):
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     for oi in range(OH):
         for oj in range(OW):
             vals = set(int(out[oi,oj]) for _,out in exs)
+            if len(vals) == 1: cst[oi,oj] = vals.pop()
             found = False
             for ri in range(IH):
                 for rj in range(IW):
                     if all(int(inp[ri,rj]) == int(out[oi,oj]) for inp,out in exs):
+                        idx[oi,oj] = [ri, rj]; found = True; break
                 if found: break
+            if not found and cst[oi,oj] < 0: return None
     return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
 def s_tile(td):
     exs = get_exs(td)
     in_shapes = set(inp.shape for inp,_ in exs)
     if len(in_shapes) != 1: return None
     nodes = [
         helper.make_node('Slice', ['input','st','en'], ['cr']),
         helper.make_node('Tile', ['cr','rp'], ['tl']),
+        helper.make_node('Pad', ['tl'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
     ]
     return mk(nodes, inits)
 def s_upscale(td):
     exs = get_exs(td)
     in_shapes = set(inp.shape for inp,_ in exs)
     if len(in_shapes) != 1: return None
     OH, OW = IH*sH, IW*sW
     if OH > 30 or OW > 30: return None
     for inp, out in exs:
+        if not np.array_equal(out, np.repeat(np.repeat(inp, sH, 0), sW, 1)): return None
     idx = np.zeros((OH,OW,2), dtype=np.int64)
     for r in range(OH):
         for c in range(OW):
             idx[r,c] = [r//sH, c//sW]
     return _build_gather_model(OH, OW, idx)
 def s_concat(td):
     from itertools import product as iproduct
     exs = get_exs(td)
     sp = fixed_shapes(td)
     if sp is None: return None
     (IH,IW),(OH,OW) = sp
     transforms = [
+        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
+        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
     ]
     if OH == IH and OW % IW == 0 and OW > IW:
         n = OW // IW
                     idx = np.zeros((OH,OW,2), dtype=np.int64)
                     for oi in range(OH):
                         for oj in range(OW):
+                            bj = oj // IW; lr, lc = oi, oj % IW
                             t = transforms[combo[bj]][0]
                             if t == 'id': sr, sc = lr, lc
                             elif t == 'fliplr': sr, sc = lr, IW-1-lc
                     idx = np.zeros((OH,OW,2), dtype=np.int64)
                     for oi in range(OH):
                         for oj in range(OW):
+                            bi = oi // IH; lr, lc = oi % IH, oj
                             t = transforms[combo[bi]][0]
                             if t == 'id': sr, sc = lr, lc
                             elif t == 'fliplr': sr, sc = lr, IW-1-lc
                     return _build_gather_model(OH, OW, idx)
     return None
 def s_constant(td):
     sp = fixed_shapes(td)
     if sp is None: return None
     exs = get_exs(td)
     for r, row in enumerate(outs[0]):
         for c, v in enumerate(row):
             const[0, int(v), r, c] = 1.0
+    inits = [numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
+             numpy_helper.from_array(const, 'c')]
+    nodes = [helper.make_node('Mul', ['input','z'], ['zd']),
+             helper.make_node('ReduceSum', ['zd'], ['s'], axes=[1,2,3], keepdims=1),
+             helper.make_node('Add', ['s','c'], ['output'])]
     return mk(nodes, inits)
 # ============================================================
+# CONV SOLVER
 # ============================================================
 def solve_conv(td, path, time_budget=30.0, try_bias=True):
     exs = get_exs(td)
     for inp, out in exs:
         if inp.shape != out.shape: return None
     t_start = time.time()
     for use_bias in ([False, True] if try_bias else [False]):
         for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
+            if time.time() - t_start > time_budget: return None
             pad = ks // 2
             feat = 10 * ks * ks + (1 if use_bias else 0)
             n_grid = sum(inp.size for inp, _ in exs)
+            if feat > 20000 or (feat > 5000 and n_grid > 2000): continue
             patches, targets = [], []
             for inp_g, out_g in exs:
                 ih, iw = inp_g.shape
                 oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
+                for c in range(10): oh_enc[c] = (inp_g == c)
                 oh_pad = np.pad(oh_enc, ((0,0),(pad,pad),(pad,pad)))
                 for r in range(ih):
                     for c in range(iw):
                         p = oh_pad[:, r:r+ks, c:c+ks].flatten()
+                        if use_bias: p = np.append(p, 1.0)
                         patches.append(p)
                         targets.append(int(out_g[r, c]))
             P = np.array(patches, dtype=np.float64)
             T = np.array(targets, dtype=np.int64)
             T_oh = np.zeros((len(T), 10), dtype=np.float64)
+            for i, t in enumerate(T): T_oh[i, t] = 1.0
             WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
+            if not np.array_equal(np.argmax(P @ WT, axis=1), T): continue
             if use_bias:
                 Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = WT[-1].astype(np.float32)
             else:
                 Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
                 B = None
+            pad_h, pad_w = GH - IH, GW - IW
             inits = [
                 numpy_helper.from_array(np.array([0,0,0,0], dtype=np.int64), 'sl_st'),
                 numpy_helper.from_array(np.array([1,10,IH,IW], dtype=np.int64), 'sl_en'),
                 conv_inputs.append('B')
             nodes = [
                 helper.make_node('Slice', ['input','sl_st','sl_en'], ['grid']),
+                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks,ks], pads=[pad]*4),
                 helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=0),
                 helper.make_node('OneHot', ['am','depth','ohvals'], ['oh_out'], axis=1),
+                helper.make_node('Pad', ['oh_out'], ['output'], pads=[0,0,0,0,0,0,pad_h,pad_w], value=0.0),
             ]
             model = mk(nodes, inits)
             onnx.save(model, path)
+            if validate(path, td): return model
     return None
 # ============================================================
 # GATHER HELPERS
 # ============================================================
 def _build_gather_model(OH, OW, idx):
     flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
     mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     for oi in range(OH):
         for oj in range(OW):
+            flat_idx[0,:,oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
             mask[0,0,oi,oj] = 1.0
     inits = [
         numpy_helper.from_array(np.array([1,10,GH*GW], dtype=np.int64), 'fs'),
     ]
     return mk(nodes, inits)
 def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
     flat_idx = np.zeros((1,10,GH*GW), dtype=np.int64)
     gather_mask = np.zeros((1,1,GH,GW), dtype=np.float32)
     const_oh = np.zeros((1,10,GH,GW), dtype=np.float32)
     for oi in range(OH):
         for oj in range(OW):
             if idx[oi,oj,0] >= 0:
+                flat_idx[0,:,oi*GW+oj] = idx[oi,oj,0]*GW + idx[oi,oj,1]
                 gather_mask[0,0,oi,oj] = 1.0
             elif cst[oi,oj] >= 0:
                 const_oh[0, cst[oi,oj], oi, oj] = 1.0
         nodes[-1] = helper.make_node('Mul', ['raw','gmask'], ['output'])
     return mk(nodes, inits)
 # ============================================================
+# MAIN
 # ============================================================
 ANALYTICAL_SOLVERS = [
+    ('identity', s_identity), ('constant', s_constant), ('color_map', s_color_map),
+    ('transpose', s_transpose), ('flip', s_flip), ('rotate', s_rotate),
+    ('tile', s_tile), ('upscale', s_upscale), ('concat', s_concat),
     ('spatial_gather', s_spatial_gather),
 ]
 def solve_task(tn, td, outdir, conv_budget=30.0):
     os.makedirs(outdir, exist_ok=True)
     path = os.path.join(outdir, f"task{tn:03d}.onnx")
     for sname, sfn in ANALYTICAL_SOLVERS:
         try:
             model = sfn(td)
+            if model is None: continue
             onnx.save(model, path)
+            if validate(path, td): return True, sname, os.path.getsize(path)
+        except: pass
     model = solve_conv(td, path, time_budget=conv_budget)
+    if model is not None: return True, 'conv', os.path.getsize(path)
     return False, None, None
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data_dir', default='ARC-AGI/data/training/')
     parser.add_argument('--output_dir', default='submission')
+    parser.add_argument('--kaggle', action='store_true')
+    parser.add_argument('--conv_budget', type=float, default=30.0)
+    parser.add_argument('--tasks', type=str, default='')
+    parser.add_argument('--device', type=str, default='auto', choices=['auto','cpu','cuda'])
     args = parser.parse_args()
+    global ORT_PROVIDERS
+    if args.device == 'cuda':
+        ORT_PROVIDERS = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+    elif args.device == 'cpu':
+        ORT_PROVIDERS = ['CPUExecutionProvider']
+    print(f"Using providers: {ORT_PROVIDERS}")
+    if args.kaggle: tasks = load_tasks_kaggle(args.data_dir)
+    else: tasks = load_tasks_dir(args.data_dir)
+    task_nums = [int(t) for t in args.tasks.split(',')] if args.tasks else sorted(tasks.keys())
     print(f"Loaded {len(tasks)} tasks, solving {len(task_nums)}")
     print(f"Conv budget: {args.conv_budget}s per task")
     print("=" * 70)
     t0 = time.time()
     results = {}
     for tn in task_nums:
+        if tn not in tasks: continue
         td = tasks[tn]['data']
         ok, sname, sz = solve_task(tn, td, args.output_dir, args.conv_budget)
         if ok:
     print(f"\n{'='*70}")
     print(f"Solved: {len(results)}/{len(task_nums)} in {elapsed:.0f}s")
     sc = Counter(results.values())
+    for s, c in sc.most_common(): print(f"  {s}: {c}")
     n_files = len([f for f in os.listdir(args.output_dir) if f.endswith('.onnx')])
     total_size = sum(os.path.getsize(os.path.join(args.output_dir, f))
                      for f in os.listdir(args.output_dir) if f.endswith('.onnx'))
     print(f"\n{n_files} ONNX files, total {total_size/1024:.1f} KB")
 if __name__ == '__main__':
     main()