rogermt commited on 11 days ago

Commit

e54021c

verified ·

1 Parent(s): ad22281

Remove original files from root (now in own-solver/)

Browse files

Files changed (21) hide show

neurogolf_solver/__init__.py +0 -7
neurogolf_solver/config.py +0 -14
neurogolf_solver/constants.py +0 -26
neurogolf_solver/data_loader.py +0 -96
neurogolf_solver/gather_helpers.py +0 -63
neurogolf_solver/main.py +0 -136
neurogolf_solver/onnx_helpers.py +0 -67
neurogolf_solver/profiler.py +0 -84
neurogolf_solver/solvers/WAVE2_SCAN.md +0 -48
neurogolf_solver/solvers/__init__.py +0 -6
neurogolf_solver/solvers/analytical.py +0 -78
neurogolf_solver/solvers/conv.py +0 -544
neurogolf_solver/solvers/edge.py +0 -99
neurogolf_solver/solvers/geometric.py +0 -177
neurogolf_solver/solvers/gravity.py +0 -140
neurogolf_solver/solvers/mode.py +0 -63
neurogolf_solver/solvers/solver_registry.py +0 -163
neurogolf_solver/solvers/tiling.py +0 -429
neurogolf_solver/solvers/wave1.py +0 -277
neurogolf_solver/submission.py +0 -150
neurogolf_solver/validators.py +0 -125

neurogolf_solver/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-#!/usr/bin/env python3
-"""
-ARC-AGI NeuroGolf Championship - Complete Solver v5
-Refactored into modular components.
-"""
-__version__ = '5.0.0'

neurogolf_solver/config.py DELETED Viewed

@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-"""Runtime configuration management."""
-from onnx import helper
-def get_providers(device='auto'):
-    """Get ONNX Runtime execution providers based on device."""
-    if device == 'cuda':
-        return ['CUDAExecutionProvider', 'CPUExecutionProvider']
-    return ['CPUExecutionProvider']
-def make_opset(version=17):
-    """Create ONNX opset identifier."""
-    return [helper.make_opsetid("", version)]

neurogolf_solver/constants.py DELETED Viewed

@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-"""Constants and configuration values for ARC-AGI NeuroGolf Championship."""
-import numpy as np
-from onnx import TensorProto
-# Grid dimensions
-BATCH, CH, GH, GW = 1, 10, 30, 30
-GRID_SHAPE = [BATCH, CH, GH, GW]
-# ONNX settings
-DT = TensorProto.FLOAT
-IR = 8
-OPSET_VERSION = 17
-# Limits
-INT64_MIN = int(np.iinfo(np.int64).min)
-BANNED_OPS = {'Loop', 'Scan', 'NonZero', 'Unique', 'Script', 'Function'}
-MAX_ONNX_FILESIZE = int(1.44 * 1024 * 1024)  # per .onnx file, NOT submission zip
-# Task exclusions — NONE. All 400 tasks count.
-EXCLUDED_TASKS = set()
-# ARC-GEN limits
-MAX_ARCGEN_VALIDATE = 30
-MAX_ARCGEN_FIT = 0

neurogolf_solver/data_loader.py DELETED Viewed

@@ -1,96 +0,0 @@
-#!/usr/bin/env python3
-"""Data loading utilities for ARC-AGI tasks."""
-import json
-import os
-import numpy as np
-from .constants import CH, GH, GW
-def load_tasks_dir(data_dir, arcgen_dir=None):
-    """Load tasks from directory structure."""
-    files = sorted(f for f in os.listdir(data_dir) if f.endswith('.json'))
-    tasks = {}
-    for i, f in enumerate(files):
-        with open(os.path.join(data_dir, f)) as fh:
-            data = json.load(fh)
-        hex_id = f.replace('.json', '')
-        if arcgen_dir and os.path.exists(os.path.join(arcgen_dir, f)):
-            with open(os.path.join(arcgen_dir, f)) as fh:
-                arcgen_examples = json.load(fh)
-            if isinstance(arcgen_examples, list):
-                data['arc-gen'] = arcgen_examples
-        if 'arc-gen' not in data:
-            data['arc-gen'] = []
-        tasks[i + 1] = {'hex': hex_id, 'data': data}
-    return tasks
-def load_tasks_kaggle(data_dir):
-    """Load tasks from Kaggle format."""
-    tasks = {}
-    for tn in range(1, 401):
-        path = os.path.join(data_dir, f"task{tn:03d}.json")
-        if os.path.exists(path):
-            with open(path) as f:
-                data = json.load(f)
-            if 'arc-gen' not in data:
-                data['arc-gen'] = []
-            tasks[tn] = {'hex': f'task{tn:03d}', 'data': data}
-    return tasks
-def to_onehot(grid):
-    """Convert grid to one-hot encoding."""
-    arr = np.zeros((1, CH, GH, GW), dtype=np.float32)
-    for r, row in enumerate(grid):
-        for c, v in enumerate(row):
-            if r < GH and c < GW and 0 <= v < CH:
-                arr[0, v, r, c] = 1.0
-    return arr
-def get_exs(td):
-    """Get examples as numpy arrays."""
-    return [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-            for ex in td['train'] + td['test']]
-def get_exs_for_fitting(td):
-    """Get examples for fitting with ARC-GEN augmentation."""
-    base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-                for ex in td['train'] + td['test']]
-    if not base_exs:
-        return base_exs
-    base_shapes = {inp.shape for inp, _ in base_exs}
-    if len(base_shapes) != 1:
-        return base_exs
-    base_shape = list(base_shapes)[0]
-    ag_exs = []
-    for ex in td.get('arc-gen', []):
-        inp = np.array(ex['input'], dtype=np.int64)
-        out = np.array(ex['output'], dtype=np.int64)
-        if inp.shape == base_shape and out.shape == base_exs[0][1].shape:
-            ag_exs.append((inp, out))
-    return base_exs + ag_exs[:10]
-def get_exs_for_fitting_variable(td):
-    """Get examples for variable-shape fitting."""
-    base_exs = [(np.array(ex['input'], dtype=np.int64), np.array(ex['output'], dtype=np.int64))
-                for ex in td['train'] + td['test']]
-    ag_exs = []
-    for ex in td.get('arc-gen', []):
-        inp = np.array(ex['input'], dtype=np.int64)
-        out = np.array(ex['output'], dtype=np.int64)
-        if inp.shape == out.shape and inp.shape[0] <= 30 and inp.shape[1] <= 30:
-            ag_exs.append((inp, out))
-    return base_exs + ag_exs[:20]
-def fixed_shapes(td):
-    """Check if task has fixed input/output shapes."""
-    shapes = set()
-    for inp, out in get_exs(td):
-        shapes.add((inp.shape, out.shape))
-    return list(shapes)[0] if len(shapes) == 1 else None

neurogolf_solver/gather_helpers.py DELETED Viewed

@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-"""Gather-based model building utilities."""
-import numpy as np
-from onnx import numpy_helper, helper
-from .onnx_helpers import mk
-from .constants import GH, GW
-def _build_gather_model(OH, OW, idx):
-    """Build gather model from index mapping."""
-    flat_idx = np.zeros((GH * GW,), dtype=np.int64)
-    mask = np.zeros((1, 1, GH, GW), dtype=np.float32)
-    for oi in range(OH):
-        for oj in range(OW):
-            flat_idx[oi * GW + oj] = idx[oi, oj, 0] * GW + idx[oi, oj, 1]
-            mask[0, 0, oi, oj] = 1.0
-    inits = [
-        numpy_helper.from_array(np.array([1, 10, GH * GW], dtype=np.int64), 'fs'),
-        numpy_helper.from_array(flat_idx, 'idx'),
-        numpy_helper.from_array(np.array([1, 10, GH, GW], dtype=np.int64), 'os'),
-        numpy_helper.from_array(mask, 'mask'),
-    ]
-    nodes = [
-        helper.make_node('Reshape', ['input', 'fs'], ['flat']),
-        helper.make_node('Gather', ['flat', 'idx'], ['g'], axis=2),
-        helper.make_node('Reshape', ['g', 'os'], ['raw']),
-        helper.make_node('Mul', ['raw', 'mask'], ['output']),
-    ]
-    return mk(nodes, inits)
-def _build_gather_model_with_const(IH, IW, OH, OW, idx, cst):
-    """Build gather model with constant values."""
-    flat_idx = np.zeros((GH * GW,), dtype=np.int64)
-    gather_mask = np.zeros((1, 1, GH, GW), dtype=np.float32)
-    const_oh = np.zeros((1, 10, GH, GW), dtype=np.float32)
-    for oi in range(OH):
-        for oj in range(OW):
-            if idx[oi, oj, 0] >= 0:
-                flat_idx[oi * GW + oj] = idx[oi, oj, 0] * GW + idx[oi, oj, 1]
-                gather_mask[0, 0, oi, oj] = 1.0
-            elif cst[oi, oj] >= 0:
-                const_oh[0, cst[oi, oj], oi, oj] = 1.0
-    has_const = np.any(const_oh > 0)
-    inits = [
-        numpy_helper.from_array(np.array([1, 10, GH * GW], dtype=np.int64), 'fs'),
-        numpy_helper.from_array(flat_idx, 'idx'),
-        numpy_helper.from_array(np.array([1, 10, GH, GW], dtype=np.int64), 'os'),
-        numpy_helper.from_array(gather_mask, 'gmask'),
-    ]
-    nodes = [
-        helper.make_node('Reshape', ['input', 'fs'], ['flat']),
-        helper.make_node('Gather', ['flat', 'idx'], ['g'], axis=2),
-        helper.make_node('Reshape', ['g', 'os'], ['raw']),
-        helper.make_node('Mul', ['raw', 'gmask'], ['masked']),
-    ]
-    if has_const:
-        inits.append(numpy_helper.from_array(const_oh, 'cst'))
-        nodes.append(helper.make_node('Add', ['masked', 'cst'], ['output']))
-    else:
-        nodes[-1] = helper.make_node('Mul', ['raw', 'gmask'], ['output'])
-    return mk(nodes, inits)

neurogolf_solver/main.py DELETED Viewed

@@ -1,136 +0,0 @@
-#!/usr/bin/env python3
-"""
-ARC-AGI NeuroGolf Championship - Main Entry Point
-Usage:
-  python -m neurogolf_solver.main --data_dir ARC-AGI/data/training/ --output_dir submission
-  python -m neurogolf_solver.main --kaggle --output_dir /kaggle/working/submission
-  python -m neurogolf_solver.main --data_dir ARC-AGI/data/training/ --arcgen_dir ARC-GEN-100K/ --use_wandb
-"""
-import argparse
-import os
-import sys
-import time
-import onnxruntime as ort
-from .config import get_providers
-from .data_loader import load_tasks_dir, load_tasks_kaggle
-from .submission import run_tasks, generate_submission, print_summary
-from .profiler import score_network
-from .constants import EXCLUDED_TASKS, MAX_ONNX_FILESIZE
-try:
-    import wandb
-except ImportError:
-    wandb = None
-def check_all_models(output_dir, strict_size, strict_score):
-    """Check all .onnx files for size limit and scoreability.
-    strict_size=True → halt on oversized files.
-    strict_score=False → warn on unscorable but don't halt."""
-    size_problems = []
-    score_problems = []
-    for f in sorted(os.listdir(output_dir)):
-        if not f.endswith('.onnx'):
-            continue
-        fpath = os.path.join(output_dir, f)
-        fsize = os.path.getsize(fpath)
-        if fsize > MAX_ONNX_FILESIZE:
-            size_problems.append((f, fsize))
-        macs, memory, params = score_network(fpath)
-        if macs is None or memory is None or params is None:
-            score_problems.append(f)
-    if size_problems:
-        print(f"\n{'!'*70}")
-        print(f"FATAL: {len(size_problems)} .onnx files exceed 1.44MB limit:")
-        for f, sz in size_problems:
-            print(f"  {f}: {sz:,} bytes ({sz/1024:.1f} KB)")
-        print(f"{'!'*70}")
-        if strict_size:
-            sys.exit(1)
-    if score_problems:
-        print(f"\nWARNING: {len(score_problems)} .onnx files unscorable by onnx_tool:")
-        for f in score_problems:
-            print(f"  {f}")
-        if strict_score:
-            print("Stopping (--strict_score is on).")
-            sys.exit(1)
-    if not size_problems and not score_problems:
-        print(f"\nAll .onnx files pass size and score checks.")
-def main():
-    parser = argparse.ArgumentParser(description='NeuroGolf Solver v5')
-    parser.add_argument('--data_dir', default='ARC-AGI/data/training/')
-    parser.add_argument('--arcgen_dir', default='', help='Path to ARC-GEN-100K/ directory')
-    parser.add_argument('--output_dir', default='/kaggle/working/submission')
-    parser.add_argument('--kaggle', action='store_true', help='Use Kaggle task format')
-    parser.add_argument('--conv_budget', type=float, default=30.0, help='Seconds per conv solver per task')
-    parser.add_argument('--tasks', type=str, default='', help='Comma-separated task numbers')
-    parser.add_argument('--device', type=str, default='auto', choices=['auto', 'cpu', 'cuda'])
-    parser.add_argument('--use_wandb', action='store_true', help='Enable W&B logging')
-    parser.add_argument('--strict_size', type=bool, default=True, help='Halt if any .onnx > 1.44MB (default: True)')
-    parser.add_argument('--strict_score', type=bool, default=False, help='Halt if any model unscorable (default: False)')
-    args = parser.parse_args()
-    providers = get_providers(args.device)
-    config = {
-        "device": args.device,
-        "conv_budget": args.conv_budget,
-        "data_dir": args.data_dir,
-        "arcgen_dir": args.arcgen_dir,
-        "tasks": args.tasks,
-    }
-    ort.set_default_logger_severity(3)
-    print(f"Using providers: {providers}")
-    print(f"Strict size: {args.strict_size} | Strict score: {args.strict_score}")
-    print(f"Max .onnx file size: {MAX_ONNX_FILESIZE:,} bytes")
-    # Load tasks
-    if args.kaggle:
-        tasks = load_tasks_kaggle(args.data_dir)
-    else:
-        arcgen = args.arcgen_dir if args.arcgen_dir else None
-        tasks = load_tasks_dir(args.data_dir, arcgen_dir=arcgen)
-    total_arcgen = sum(len(t['data'].get('arc-gen', [])) for t in tasks.values())
-    print(f"Loaded {len(tasks)} tasks ({total_arcgen} ARC-GEN examples)")
-    task_nums = [int(t) for t in args.tasks.split(',')] if args.tasks else sorted(tasks.keys())
-    print(f"Solving {len(task_nums)} tasks")
-    print(f"Conv budget: {args.conv_budget}s per task")
-    print("=" * 70)
-    t0 = time.time()
-    if args.use_wandb and wandb is not None:
-        with wandb.init(project="neurogolf", name="solver_run", config=config):
-            results, costs_dict, total_score = run_tasks(
-                task_nums, tasks, args.output_dir, providers,
-                args.conv_budget, EXCLUDED_TASKS, use_wandb=True
-            )
-    else:
-        results, costs_dict, total_score = run_tasks(
-            task_nums, tasks, args.output_dir, providers,
-            args.conv_budget, EXCLUDED_TASKS, use_wandb=False
-        )
-    elapsed = time.time() - t0
-    # Check all output files BEFORE generating submission
-    check_all_models(args.output_dir, args.strict_size, args.strict_score)
-    submission_info = generate_submission(args.output_dir, results, costs_dict, task_nums)
-    print_summary(results, submission_info, elapsed)
-if __name__ == '__main__':
-    main()

neurogolf_solver/onnx_helpers.py DELETED Viewed

@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-"""ONNX model building helper functions (opset 17)."""
-import numpy as np
-from onnx import helper, TensorProto, numpy_helper
-from .constants import DT, IR, GRID_SHAPE, INT64_MIN, GH, GW
-from .config import make_opset
-def _make_int64_init(name, values):
-    """Create int64 initializer."""
-    return numpy_helper.from_array(np.array(values, dtype=np.int64), name)
-def _build_pad_node(input_name, output_name, pad_h, pad_w, inits, suffix=''):
-    """Pad with tensor-based pads input (opset 11+)."""
-    pads_name = f'pads{suffix}'
-    cv_name = f'pad_cv{suffix}'
-    pads_arr = np.array([0, 0, 0, 0, 0, 0, pad_h, pad_w], dtype=np.int64)
-    inits.append(numpy_helper.from_array(pads_arr, pads_name))
-    inits.append(numpy_helper.from_array(np.array(0.0, dtype=np.float32), cv_name))
-    return helper.make_node('Pad', [input_name, pads_name, cv_name], [output_name], mode='constant')
-def _build_slice_crop(input_name, output_name, IH, IW, inits, suffix=''):
-    """Slice to crop [1,10,30,30] to [1,10,IH,IW]."""
-    st_name = f'crop_st{suffix}'
-    en_name = f'crop_en{suffix}'
-    inits.append(_make_int64_init(st_name, [0, 0, 0, 0]))
-    inits.append(_make_int64_init(en_name, [1, 10, IH, IW]))
-    return helper.make_node('Slice', [input_name, st_name, en_name], [output_name])
-def _build_slice_reverse(input_name, output_name, axis, dim_size, inits, suffix=''):
-    """Slice(step=-1) to reverse one axis. Zero MACs."""
-    st_name = f'rev_st{suffix}'
-    en_name = f'rev_en{suffix}'
-    ax_name = f'rev_ax{suffix}'
-    sp_name = f'rev_sp{suffix}'
-    inits.append(_make_int64_init(st_name, [dim_size - 1]))
-    inits.append(_make_int64_init(en_name, [INT64_MIN]))
-    inits.append(_make_int64_init(ax_name, [axis]))
-    inits.append(_make_int64_init(sp_name, [-1]))
-    return helper.make_node('Slice', [input_name, st_name, en_name, ax_name, sp_name], [output_name])
-def _build_reducesum(input_name, output_name, axes_list, inits, suffix=''):
-    """ReduceSum with axes as tensor input (opset 13+). keepdims=1."""
-    axes_name = f'rs_axes{suffix}'
-    inits.append(_make_int64_init(axes_name, axes_list))
-    return helper.make_node('ReduceSum', [input_name, axes_name], [output_name], keepdims=1)
-def mk(nodes, inits=None, opset_version=17):
-    """Create ONNX model from nodes and initializers."""
-    x = helper.make_tensor_value_info("input", DT, GRID_SHAPE)
-    y = helper.make_tensor_value_info("output", DT, GRID_SHAPE)
-    g = helper.make_graph(nodes, "g", [x], [y], initializer=inits or [])
-    return helper.make_model(g, ir_version=IR, opset_imports=make_opset(opset_version))
-def add_onehot_block(nodes, inits, am_name, oh_name):
-    """Add ArgMax one-hot conversion block."""
-    classes = np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1)
-    inits.append(numpy_helper.from_array(classes, 'classes'))
-    nodes.append(helper.make_node('Equal', [am_name, 'classes'], ['eq']))
-    nodes.append(helper.make_node('Cast', ['eq'], [oh_name], to=TensorProto.FLOAT))

neurogolf_solver/profiler.py DELETED Viewed

@@ -1,84 +0,0 @@
-#!/usr/bin/env python3
-"""Static profiling for ONNX models.
-Uses neurogolf_utils.score_network() (onnx_tool) when available — this is
-the ONLY scoring that matches Kaggle. The static fallback is approximate
-and prints a WARNING. If onnx_tool returns (None, None, None), the model
-is REJECTED — do not submit it.
-"""
-import onnx
-from onnx import numpy_helper
-from .constants import BANNED_OPS, GH, GW
-try:
-    from neurogolf_utils import score_network as _score_network_official
-    HAS_ONNX_TOOL = True
-except ImportError:
-    HAS_ONNX_TOOL = False
-_WARNED_NO_ONNX_TOOL = False
-def score_network(path):
-    """Score network. Returns (macs, memory, params) or (None, None, None).
-    If onnx_tool is available: uses official scorer. (None,None,None) = REJECTED.
-    If onnx_tool is NOT available: uses static fallback with WARNING.
-    """
-    global _WARNED_NO_ONNX_TOOL
-    if HAS_ONNX_TOOL:
-        # Official scorer — trust its result. Do NOT catch exceptions silently.
-        try:
-            result = _score_network_official(path)
-        except Exception as e:
-            print(f"WARNING: onnx_tool score_network failed on {path}: {e}")
-            return None, None, None
-        return result
-    else:
-        if not _WARNED_NO_ONNX_TOOL:
-            print("WARNING: onnx_tool not installed. Scores are APPROXIMATE and may not match Kaggle.")
-            print("WARNING: Models that fail onnx_tool profiling will be REJECTED on Kaggle.")
-            print("WARNING: Run neurogolf_utils.verify_network() in a Kaggle notebook before submitting.")
-            _WARNED_NO_ONNX_TOOL = True
-        return _static_profile(path)
-def _static_profile(path):
-    """Static profiling fallback. APPROXIMATE — does not match Kaggle scoring.
-    Only used when onnx_tool is not installed."""
-    try:
-        model = onnx.load(path)
-    except:
-        return None, None, None
-    tensors = {}
-    params = 0
-    nbytes = 0
-    macs = 0
-    for init in model.graph.initializer:
-        a = numpy_helper.to_array(init)
-        tensors[init.name] = a
-        params += a.size
-        nbytes += a.nbytes
-    for nd in model.graph.node:
-        if nd.op_type == 'Constant':
-            for attr in nd.attribute:
-                if attr.t and attr.t.ByteSize() > 0:
-                    try:
-                        a = numpy_helper.to_array(attr.t)
-                        if nd.output:
-                            tensors[nd.output[0]] = a
-                        params += a.size
-                        nbytes += a.nbytes
-                    except:
-                        pass
-        # Banned op check — UPPERCASE to match Kaggle
-        if nd.op_type.upper() in {op.upper() for op in BANNED_OPS}:
-            print(f"WARNING: Banned op '{nd.op_type}' found in {path}")
-            return None, None, None
-        if nd.op_type == 'Conv' and len(nd.input) >= 2 and nd.input[1] in tensors:
-            w = tensors[nd.input[1]]
-            if w.ndim == 4:
-                co, ci, kh, kw = w.shape
-                macs += co * ci * kh * kw * GH * GW
-    return int(macs), int(nbytes), int(params)

neurogolf_solver/solvers/WAVE2_SCAN.md DELETED Viewed

@@ -1,48 +0,0 @@
-# Wave 2 + Flood Fill Scan Results (2026-04-27)
-## Wave 2 — Composition & Mode Extensions
-| Pattern | Matches |
-|---------|---------|
-| transform_then_recolor (flip/rot/transpose + color map) | 0 |
-| recolor_then_transform (reverse order) | 0 |
-| row_mode_fill (each row → dominant color) | 0 |
-| col_mode_fill (each col → dominant color) | 0 |
-| fill_bg_with_mode (zeros → global mode) | 0 |
-| fill_bg_with_color (zeros → fixed color, all examples) | 0 |
-## Flood Fill
-| Pattern | Matches |
-|---------|---------|
-| flood_fill_replace (seed spreads into passable, all become fill_color) | 0 |
-| flood_fill_keep_seed (seed stays, passable neighbors become fill_color) | 0 |
-## Pattern Inpainting
-| Pattern | Matches |
-|---------|---------|
-| Tile inpainting (output = perfect tile, input = tile with holes) | 0 |
-## What the tasks ACTUALLY need (from manual inspection):
-- **Task 5**: Pattern stamping at positions indicated by markers
-- **Task 17**: Wallpaper defect restoration (NOT simple tile inpainting)
-- **Task 20**: Diamond symmetry completion with color-specific rules
-- **Task 27**: Shape-relative region filling (notch detection)
-These require **object-level reasoning**: detect shapes, understand spatial relationships
-between objects, apply context-dependent rules. Cannot be solved by pixel-level operations
-(flood fill, mode fill, color mapping) alone.
-## Conclusion:
-Simple analytical solvers (Waves 1-2) and pixel-level propagation (flood fill)
-have reached their ceiling. The remaining 349 tasks need:
-1. Object detection/segmentation
-2. Spatial relationship reasoning
-3. Context-dependent rule application
-4. Pattern recognition beyond tiling
-These are fundamentally in the domain of learned models (conv lstsq already does this
-for some tasks) or much more complex hand-crafted solvers.

neurogolf_solver/solvers/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-"""Solvers package for ARC-AGI NeuroGolf Championship."""
-from .solver_registry import ANALYTICAL_SOLVERS, solve_task
-__all__ = ['ANALYTICAL_SOLVERS', 'solve_task']

neurogolf_solver/solvers/analytical.py DELETED Viewed

@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-"""Basic analytical solvers: identity, constant, color_map, transpose."""
-import numpy as np
-from onnx import helper, numpy_helper, TensorProto
-from ..onnx_helpers import mk, _make_int64_init
-from ..data_loader import get_exs, fixed_shapes
-def s_identity(td):
-    """Identity solver."""
-    for ex in td['train'] + td['test']:
-        if ex['input'] != ex['output']:
-            return None
-    return mk([helper.make_node('Identity', ['input'], ['output'])])
-def s_color_map(td):
-    """Color mapping solver."""
-    cm = {}
-    for ex in td['train'] + td['test']:
-        inp, out = np.array(ex['input']), np.array(ex['output'])
-        if inp.shape != out.shape:
-            return None
-        for iv, ov in zip(inp.flat, out.flat):
-            iv, ov = int(iv), int(ov)
-            if iv in cm and cm[iv] != ov:
-                return None
-            cm[iv] = ov
-    is_permutation = (set(cm.keys()) == set(cm.values()))
-    if is_permutation:
-        gather_ch = np.arange(10, dtype=np.int32)
-        for src, dst in cm.items():
-            if 0 <= src < 10 and 0 <= dst < 10:
-                gather_ch[dst] = src
-        inits = [numpy_helper.from_array(gather_ch, 'gi')]
-        nodes = [helper.make_node('Gather', ['input', 'gi'], ['output'], axis=1)]
-        return mk(nodes, inits)
-    else:
-        W = np.zeros((10, 10, 1, 1), dtype=np.float32)
-        for ic in range(10):
-            W[cm.get(ic, ic), ic, 0, 0] = 1.0
-        return mk([helper.make_node('Conv', ['input', 'W'], ['output'], kernel_shape=[1, 1])],
-                  [numpy_helper.from_array(W, 'W')])
-def s_transpose(td):
-    """Transpose solver."""
-    for ex in td['train'] + td['test']:
-        if not np.array_equal(np.array(ex['output']), np.array(ex['input']).T):
-            return None
-    return mk([helper.make_node('Transpose', ['input'], ['output'], perm=[0, 1, 3, 2])])
-def s_constant(td):
-    """Constant output solver using opset 17 ReduceSum."""
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    exs = get_exs(td)
-    outs = [out for _, out in exs]
-    if not all(np.array_equal(outs[0], o) for o in outs[1:]):
-        return None
-    const = np.zeros((1, 10, 30, 30), dtype=np.float32)
-    for r, row in enumerate(outs[0]):
-        for c, v in enumerate(row):
-            const[0, int(v), r, c] = 1.0
-    inits = [
-        numpy_helper.from_array(np.array(0.0, dtype=np.float32), 'z'),
-        numpy_helper.from_array(const, 'c'),
-        _make_int64_init('rs_axes_cst', [1, 2, 3]),
-    ]
-    nodes = [
-        helper.make_node('Mul', ['input', 'z'], ['zd']),
-        helper.make_node('ReduceSum', ['zd', 'rs_axes_cst'], ['s'], keepdims=1),
-        helper.make_node('Add', ['s', 'c'], ['output']),
-    ]
-    return mk(nodes, inits)

neurogolf_solver/solvers/conv.py DELETED Viewed

@@ -1,544 +0,0 @@
-#!/usr/bin/env python3
-"""Convolutional solvers with least squares fitting.
-v5.1: Refactored into composable primitives (_build_patch_matrix, _solve_weights,
-_extract_weights) + PCR (PCA regression) fallback via _solve_weights_pcr.
-PCR tested on 400 tasks: 0 new solves but no regressions. Code kept for
-future experiments (Lasso, Ridge can reuse the same _solve_weights interface).
-"""
-import time
-import numpy as np
-import onnx
-from onnx import helper, numpy_helper
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node, add_onehot_block
-from ..data_loader import get_exs, get_exs_for_fitting, get_exs_for_fitting_variable, fixed_shapes
-from ..validators import validate
-from ..constants import GH, GW
-# ---------------------------------------------------------------------------
-# Core fitting primitives (composable: mix _build_patch_matrix with any solver)
-# ---------------------------------------------------------------------------
-def _build_patch_matrix(exs_raw, ks, use_bias, use_full_30=False):
-    """Build patch matrix P and target matrix T_oh from examples.
-    Returns (P, T, T_oh) or None if infeasible."""
-    pad = ks // 2
-    feat = 10 * ks * ks + (1 if use_bias else 0)
-    if feat > 20000:
-        return None
-    patches, targets = [], []
-    for inp_g, out_g in exs_raw:
-        ih, iw = inp_g.shape
-        if use_full_30:
-            oh_full = np.zeros((10, GH, GW), dtype=np.float64)
-            for c in range(10):
-                oh_full[c, :ih, :iw] = (inp_g == c)
-            oh_pad = np.pad(oh_full, ((0, 0), (pad, pad), (pad, pad)))
-        else:
-            oh_enc = np.zeros((10, ih, iw), dtype=np.float64)
-            for c in range(10):
-                oh_enc[c] = (inp_g == c)
-            oh_pad = np.pad(oh_enc, ((0, 0), (pad, pad), (pad, pad)))
-        oh, ow = out_g.shape
-        for r in range(oh):
-            for c in range(ow):
-                p = oh_pad[:, r:r + ks, c:c + ks].flatten()
-                if use_bias:
-                    p = np.append(p, 1.0)
-                patches.append(p)
-                targets.append(int(out_g[r, c]))
-    n_patches = len(patches)
-    if feat > 5000 and n_patches > 2000:
-        return None
-    P = np.array(patches, dtype=np.float64)
-    T = np.array(targets, dtype=np.int64)
-    T_oh = np.zeros((len(T), 10), dtype=np.float64)
-    for i, t in enumerate(T):
-        T_oh[i, t] = 1.0
-    return P, T, T_oh
-def _solve_weights(P, T, T_oh):
-    """Raw lstsq solve. Returns WT (p×10) or None."""
-    try:
-        WT = np.linalg.lstsq(P, T_oh, rcond=None)[0]
-    except (np.linalg.LinAlgError, ValueError):
-        return None
-    if not np.array_equal(np.argmax(P @ WT, axis=1), T):
-        return None
-    return WT
-def _solve_weights_pcr(P, T, T_oh, var_thresholds=(0.999, 0.99, 0.95)):
-    """PCA/Truncated SVD regression. Try multiple variance thresholds.
-    Returns WT (p×10) or None.
-    Only attempted when p/n > 0.5 (potential overfitting zone).
-    Tested 2026-04-26: improves arc-gen accuracy by 3-9% on 4/345 unsolved
-    tasks but never reaches 100% required for validation. Kept as fallback
-    for marginal cases and for future combination with more arc-gen data."""
-    n, p = P.shape
-    if p / max(n, 1) <= 0.5:
-        return None  # lstsq is safe here, no need for PCR
-    try:
-        U, s, Vt = np.linalg.svd(P, full_matrices=False)
-    except (np.linalg.LinAlgError, ValueError):
-        return None
-    cumvar = np.cumsum(s**2) / np.sum(s**2)
-    for thresh in var_thresholds:
-        k = int(np.searchsorted(cumvar, thresh)) + 1
-        k = max(k, 5)
-        k = min(k, min(n, p))
-        P_red = U[:, :k] * s[:k]
-        try:
-            w_red = np.linalg.lstsq(P_red, T_oh, rcond=None)[0]
-        except (np.linalg.LinAlgError, ValueError):
-            continue
-        if not np.array_equal(np.argmax(P_red @ w_red, axis=1), T):
-            continue
-        # Map back to full p-dimensional weights for ONNX conv
-        WT = Vt[:k].T @ w_red
-        # Verify full-space predictions match
-        if np.array_equal(np.argmax(P @ WT, axis=1), T):
-            return WT
-    return None
-def _extract_weights(WT, ks, use_bias):
-    """Extract Wconv and B from weight matrix WT."""
-    if use_bias:
-        Wconv = WT[:-1].T.reshape(10, 10, ks, ks).astype(np.float32)
-        B = WT[-1].astype(np.float32)
-    else:
-        Wconv = WT.T.reshape(10, 10, ks, ks).astype(np.float32)
-        B = None
-    return Wconv, B
-# ---------------------------------------------------------------------------
-# Convenience wrappers (combine primitives into single-call fitting)
-# ---------------------------------------------------------------------------
-def _lstsq_conv(exs_raw, ks, use_bias, use_full_30=False):
-    """Least squares convolutional weight fitting.
-    Returns (Wconv, B) or None."""
-    ptm = _build_patch_matrix(exs_raw, ks, use_bias, use_full_30)
-    if ptm is None:
-        return None
-    P, T, T_oh = ptm
-    WT = _solve_weights(P, T, T_oh)
-    if WT is None:
-        return None
-    return _extract_weights(WT, ks, use_bias)
-def _lstsq_conv_pcr(exs_raw, ks, use_bias, use_full_30=False):
-    """PCA regression convolutional weight fitting.
-    Returns (Wconv, B) or None. Fallback when raw lstsq overfits."""
-    ptm = _build_patch_matrix(exs_raw, ks, use_bias, use_full_30)
-    if ptm is None:
-        return None
-    P, T, T_oh = ptm
-    WT = _solve_weights_pcr(P, T, T_oh)
-    if WT is None:
-        return None
-    return _extract_weights(WT, ks, use_bias)
-# ---------------------------------------------------------------------------
-# Solver functions (called from solver_registry.py)
-# ---------------------------------------------------------------------------
-def _build_and_validate_conv_fixed(fit_fn, fit_exs, ks, use_bias, IH, IW, td, path, providers):
-    """Build ONNX model with given fit function, validate it. Returns (tag, model) or None."""
-    result = fit_fn(fit_exs, ks, use_bias, use_full_30=False)
-    if result is None:
-        return None
-    Wconv, B = result
-    pad = ks // 2
-    pad_h, pad_w = GH - IH, GW - IW
-    inits = [
-        _make_int64_init('sl_st', [0, 0, 0, 0]),
-        _make_int64_init('sl_en', [1, 10, IH, IW]),
-        numpy_helper.from_array(Wconv, 'W'),
-    ]
-    conv_inputs = ['grid', 'W']
-    if B is not None:
-        inits.append(numpy_helper.from_array(B, 'B'))
-        conv_inputs.append('B')
-    nodes = [
-        helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['grid']),
-        helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-        helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-    ]
-    add_onehot_block(nodes, inits, 'am', 'oh_out')
-    nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
-    model = mk(nodes, inits)
-    onnx.save(model, path)
-    if validate(path, td, providers):
-        tag = 'conv_fixed' if fit_fn == _lstsq_conv else 'conv_fixed_pcr'
-        return tag, model
-    return None
-def solve_conv_fixed(td, path, providers, time_budget=30.0):
-    """Fixed-shape convolutional solver. Tries lstsq first, PCR as second pass."""
-    exs = get_exs(td)
-    for inp, out in exs:
-        if inp.shape != out.shape:
-            return None
-    shapes = set(inp.shape for inp, _ in exs)
-    if len(shapes) != 1:
-        return None
-    IH, IW = shapes.pop()
-    fit_exs = get_exs_for_fitting(td)
-    fit_exs = [(i, o) for i, o in fit_exs if i.shape == o.shape and i.shape == (IH, IW)]
-    t_start = time.time()
-    # Pass 1: raw lstsq (same as baseline)
-    failed_ks = []  # (ks, use_bias) pairs where lstsq fit train but failed validation
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget:
-                return None
-            result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=False)
-            if result is None:
-                continue
-            Wconv, B = result
-            pad = ks // 2
-            pad_h, pad_w = GH - IH, GW - IW
-            inits = [
-                _make_int64_init('sl_st', [0, 0, 0, 0]),
-                _make_int64_init('sl_en', [1, 10, IH, IW]),
-                numpy_helper.from_array(Wconv, 'W'),
-            ]
-            conv_inputs = ['grid', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['grid']),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td, providers):
-                return 'conv_fixed', model
-            # lstsq fit train but failed validation — candidate for PCR
-            failed_ks.append((ks, use_bias))
-    # Pass 2: PCR on failed ks values (only if time remains)
-    for ks, use_bias in failed_ks:
-        if time.time() - t_start > time_budget:
-            return None
-        r = _build_and_validate_conv_fixed(_lstsq_conv_pcr, fit_exs, ks, use_bias, IH, IW, td, path, providers)
-        if r is not None:
-            return r
-    return None
-def _build_and_validate_conv_var(fit_fn, fit_exs, ks, use_bias, td, path, providers):
-    """Build variable-shape ONNX model with given fit function. Returns (tag, model) or None."""
-    result = fit_fn(fit_exs, ks, use_bias, use_full_30=True)
-    if result is None:
-        return None
-    Wconv, B = result
-    pad = ks // 2
-    inits = [
-        numpy_helper.from_array(Wconv, 'W'),
-        _make_int64_init('rs_axes_var', [1]),
-    ]
-    conv_inputs = ['input', 'W']
-    if B is not None:
-        inits.append(numpy_helper.from_array(B, 'B'))
-        conv_inputs.append('B')
-    nodes = [
-        helper.make_node('ReduceSum', ['input', 'rs_axes_var'], ['mask'], keepdims=1),
-        helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-        helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-    ]
-    add_onehot_block(nodes, inits, 'am', 'oh_out')
-    nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-    model = mk(nodes, inits)
-    onnx.save(model, path)
-    if validate(path, td, providers):
-        tag = 'conv_var' if fit_fn == _lstsq_conv else 'conv_var_pcr'
-        return tag, model
-    return None
-def solve_conv_variable(td, path, providers, time_budget=30.0):
-    """Variable-shape conv. Tries lstsq first, PCR as second pass."""
-    exs = get_exs(td)
-    for inp, out in exs:
-        if inp.shape != out.shape:
-            return None
-    fit_exs = get_exs_for_fitting_variable(td)
-    fit_exs = [(i, o) for i, o in fit_exs if i.shape == o.shape]
-    t_start = time.time()
-    # Pass 1: raw lstsq
-    failed_ks = []
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget:
-                return None
-            result = _lstsq_conv(fit_exs, ks, use_bias, use_full_30=True)
-            if result is None:
-                continue
-            Wconv, B = result
-            pad = ks // 2
-            inits = [
-                numpy_helper.from_array(Wconv, 'W'),
-                _make_int64_init('rs_axes_var', [1]),
-            ]
-            conv_inputs = ['input', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('ReduceSum', ['input', 'rs_axes_var'], ['mask'], keepdims=1),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td, providers):
-                return 'conv_var', model
-            failed_ks.append((ks, use_bias))
-    # Pass 2: PCR on failed ks values
-    for ks, use_bias in failed_ks:
-        if time.time() - t_start > time_budget:
-            return None
-        r = _build_and_validate_conv_var(_lstsq_conv_pcr, fit_exs, ks, use_bias, td, path, providers)
-        if r is not None:
-            return r
-    return None
-def solve_conv_diffshape(td, path, providers, time_budget=30.0):
-    """Different-shape convolutional solver. Tries lstsq first, PCR as second pass."""
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if IH == OH and IW == OW:
-        return None
-    if OH > IH or OW > IW:
-        return None
-    if OH > 30 or OW > 30:
-        return None
-    exs = get_exs(td)
-    t_start = time.time()
-    failed_configs = []  # (P, T, T_oh, ks, use_bias, dr_off, dc_off) for PCR retry
-    for dr_off, dc_off in [(0, 0), ((IH - OH) // 2, (IW - OW) // 2)]:
-        for use_bias in [False, True]:
-            for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]:
-                if time.time() - t_start > time_budget:
-                    break
-                pad = ks // 2
-                feat = 10 * ks * ks + (1 if use_bias else 0)
-                if feat > 10000:
-                    continue
-                patches, targets = [], []
-                valid = True
-                for inp_g, out_g in exs:
-                    oh_enc = np.zeros((10, IH, IW), dtype=np.float64)
-                    for c in range(10):
-                        oh_enc[c] = (inp_g == c)
-                    oh_pad = np.pad(oh_enc, ((0, 0), (pad, pad), (pad, pad)))
-                    for r in range(OH):
-                        for c in range(OW):
-                            sr, sc = r + dr_off, c + dc_off
-                            if sr < 0 or sr >= IH or sc < 0 or sc >= IW:
-                                valid = False
-                                break
-                            p = oh_pad[:, sr:sr + ks, sc:sc + ks].flatten()
-                            if use_bias:
-                                p = np.append(p, 1.0)
-                            patches.append(p)
-                            targets.append(int(out_g[r, c]))
-                        if not valid:
-                            break
-                    if not valid:
-                        break
-                if not valid:
-                    continue
-                n_patches = len(patches)
-                if feat > 5000 and n_patches > 2000:
-                    continue
-                P = np.array(patches, dtype=np.float64)
-                T = np.array(targets, dtype=np.int64)
-                T_oh = np.zeros((len(T), 10), dtype=np.float64)
-                for i, t in enumerate(T):
-                    T_oh[i, t] = 1.0
-                # Pass 1: raw lstsq
-                WT = _solve_weights(P, T, T_oh)
-                if WT is None:
-                    continue
-                Wconv, B = _extract_weights(WT, ks, use_bias)
-                pad_h, pad_w = GH - OH, GW - OW
-                inits = [
-                    _make_int64_init('sl_st', [0, 0, 0, 0]),
-                    _make_int64_init('sl_en', [1, 10, IH, IW]),
-                    numpy_helper.from_array(Wconv, 'W'),
-                    _make_int64_init('cr_st', [0, 0, dr_off, dc_off]),
-                    _make_int64_init('cr_en', [1, 10, dr_off + OH, dc_off + OW]),
-                ]
-                conv_inputs = ['grid', 'W']
-                if B is not None:
-                    inits.append(numpy_helper.from_array(B, 'B'))
-                    conv_inputs.append('B')
-                nodes = [
-                    helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['grid']),
-                    helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-                    helper.make_node('Slice', ['co', 'cr_st', 'cr_en'], ['co_crop']),
-                    helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
-                ]
-                add_onehot_block(nodes, inits, 'am', 'oh_out')
-                nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
-                model = mk(nodes, inits)
-                onnx.save(model, path)
-                if validate(path, td, providers):
-                    return 'conv_diff', model
-                # Failed validation — save for PCR retry
-                failed_configs.append((P, T, T_oh, ks, use_bias, dr_off, dc_off))
-    # Pass 2: PCR on failed configs
-    for P, T, T_oh, ks, use_bias, dr_off, dc_off in failed_configs:
-        if time.time() - t_start > time_budget:
-            return None
-        WT = _solve_weights_pcr(P, T, T_oh)
-        if WT is None:
-            continue
-        Wconv, B = _extract_weights(WT, ks, use_bias)
-        pad_h, pad_w = GH - OH, GW - OW
-        inits = [
-            _make_int64_init('sl_st', [0, 0, 0, 0]),
-            _make_int64_init('sl_en', [1, 10, IH, IW]),
-            numpy_helper.from_array(Wconv, 'W'),
-            _make_int64_init('cr_st', [0, 0, dr_off, dc_off]),
-            _make_int64_init('cr_en', [1, 10, dr_off + OH, dc_off + OW]),
-        ]
-        conv_inputs = ['grid', 'W']
-        if B is not None:
-            inits.append(numpy_helper.from_array(B, 'B'))
-            conv_inputs.append('B')
-        nodes = [
-            helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['grid']),
-            helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-            helper.make_node('Slice', ['co', 'cr_st', 'cr_en'], ['co_crop']),
-            helper.make_node('ArgMax', ['co_crop'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(_build_pad_node('oh_out', 'output', pad_h, pad_w, inits))
-        model = mk(nodes, inits)
-        onnx.save(model, path)
-        if validate(path, td, providers):
-            return 'conv_diff_pcr', model
-    return None
-def solve_conv_var_diff(td, path, providers, time_budget=30.0):
-    """Variable diff-shape conv. Tries lstsq first, PCR as second pass."""
-    exs = get_exs(td)
-    t_start = time.time()
-    failed_configs = []  # (P, T, T_oh, ks, use_bias) for PCR retry
-    for use_bias in [False, True]:
-        for ks in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]:
-            if time.time() - t_start > time_budget:
-                break
-            pad = ks // 2
-            feat = 10 * ks * ks + (1 if use_bias else 0)
-            if feat > 20000:
-                continue
-            patches, targets = [], []
-            for inp_g, out_g in exs:
-                ih, iw = inp_g.shape
-                oh, ow = out_g.shape
-                oh_full = np.zeros((10, GH, GW), dtype=np.float64)
-                for c in range(10):
-                    oh_full[c, :ih, :iw] = (inp_g == c)
-                oh_pad = np.pad(oh_full, ((0, 0), (pad, pad), (pad, pad)))
-                for r in range(oh):
-                    for c in range(ow):
-                        p = oh_pad[:, r:r + ks, c:c + ks].flatten()
-                        if use_bias:
-                            p = np.append(p, 1.0)
-                        patches.append(p)
-                        targets.append(int(out_g[r, c]))
-            n_patches = len(patches)
-            if feat > 5000 and n_patches > 2000:
-                continue
-            P = np.array(patches, dtype=np.float64)
-            T = np.array(targets, dtype=np.int64)
-            T_oh = np.zeros((len(T), 10), dtype=np.float64)
-            for i, t in enumerate(T):
-                T_oh[i, t] = 1.0
-            # Pass 1: raw lstsq
-            WT = _solve_weights(P, T, T_oh)
-            if WT is None:
-                continue
-            Wconv, B = _extract_weights(WT, ks, use_bias)
-            all_output_within_input = all(
-                out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
-                for inp_g, out_g in exs
-            )
-            if all_output_within_input:
-                inits = [
-                    numpy_helper.from_array(Wconv, 'W'),
-                    _make_int64_init('rs_axes_vd', [1]),
-                ]
-                conv_inputs = ['input', 'W']
-                if B is not None:
-                    inits.append(numpy_helper.from_array(B, 'B'))
-                    conv_inputs.append('B')
-                nodes = [
-                    helper.make_node('ReduceSum', ['input', 'rs_axes_vd'], ['mask'], keepdims=1),
-                    helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-                    helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-                ]
-                add_onehot_block(nodes, inits, 'am', 'oh_out')
-                nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-                model = mk(nodes, inits)
-                onnx.save(model, path)
-                if validate(path, td, providers):
-                    return 'conv_var_diff', model
-                # Failed validation — save for PCR
-                failed_configs.append((P, T, T_oh, ks, use_bias))
-    # Pass 2: PCR on failed configs
-    for P, T, T_oh, ks, use_bias in failed_configs:
-        if time.time() - t_start > time_budget:
-            return None
-        WT = _solve_weights_pcr(P, T, T_oh)
-        if WT is None:
-            continue
-        Wconv, B = _extract_weights(WT, ks, use_bias)
-        all_output_within_input = all(
-            out_g.shape[0] <= inp_g.shape[0] and out_g.shape[1] <= inp_g.shape[1]
-            for inp_g, out_g in exs
-        )
-        if all_output_within_input:
-            inits = [
-                numpy_helper.from_array(Wconv, 'W'),
-                _make_int64_init('rs_axes_vd', [1]),
-            ]
-            conv_inputs = ['input', 'W']
-            if B is not None:
-                inits.append(numpy_helper.from_array(B, 'B'))
-                conv_inputs.append('B')
-            nodes = [
-                helper.make_node('ReduceSum', ['input', 'rs_axes_vd'], ['mask'], keepdims=1),
-                helper.make_node('Conv', conv_inputs, ['co'], kernel_shape=[ks, ks], pads=[pad] * 4),
-                helper.make_node('ArgMax', ['co'], ['am'], axis=1, keepdims=1),
-            ]
-            add_onehot_block(nodes, inits, 'am', 'oh_out')
-            nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-            model = mk(nodes, inits)
-            onnx.save(model, path)
-            if validate(path, td, providers):
-                return 'conv_var_diff_pcr', model
-    return None

neurogolf_solver/solvers/edge.py DELETED Viewed

@@ -1,99 +0,0 @@
-#!/usr/bin/env python3
-"""Edge/boundary detection solver — Laplacian Conv.
-v5.2: 0 matches in current task set (edge definition too strict).
-Kept for future variants (per-color edges, interior-only edges, etc.).
-"""
-import numpy as np
-from onnx import helper, numpy_helper
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node
-from ..data_loader import get_exs, fixed_shapes
-from ..constants import GH, GW
-def _has_edges(inp, out, edge_color, bg_color=0):
-    """Check if output is edge detection of input."""
-    h, w = inp.shape
-    for r in range(h):
-        for c in range(w):
-            pix = inp[r, c]
-            is_edge = False
-            if pix != bg_color:
-                for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
-                    nr, nc = r + dr, c + dc
-                    if 0 <= nr < h and 0 <= nc < w:
-                        if inp[nr, nc] != pix:
-                            is_edge = True
-                            break
-                    else:
-                        is_edge = True
-                        break
-            expected = edge_color if is_edge else bg_color
-            if out[r, c] != expected:
-                return False
-    return True
-def _build_edge_model(IH, IW, edge_color, bg_color=0):
-    """Build ONNX model for edge detection via Laplacian conv."""
-    pad_h, pad_w = GH - IH, GW - IW
-    ch_sel = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    for c in range(10):
-        if c != bg_color:
-            ch_sel[0, c, 0, 0] = 1.0
-    lap_k = np.array([[0, -1, 0],
-                       [-1, 4, -1],
-                       [0, -1, 0]], dtype=np.float32).reshape(1, 1, 3, 3)
-    edge_oh = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    edge_oh[0, edge_color, 0, 0] = 1.0
-    bg_oh = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    bg_oh[0, bg_color, 0, 0] = 1.0
-    inits = [
-        _make_int64_init('sl_st', [0, 0, 0, 0]),
-        _make_int64_init('sl_en', [1, 10, IH, IW]),
-        numpy_helper.from_array(ch_sel, 'ch_sel'),
-        numpy_helper.from_array(lap_k, 'lap_k'),
-        numpy_helper.from_array(np.float32(0.5), 'thresh'),
-        numpy_helper.from_array(edge_oh, 'edge_oh'),
-        numpy_helper.from_array(bg_oh, 'bg_oh'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['cropped']),
-        helper.make_node('Conv', ['cropped', 'ch_sel'], ['occ'], kernel_shape=[1, 1]),
-        helper.make_node('Conv', ['occ', 'lap_k'], ['lap_out'], kernel_shape=[3, 3], pads=[1, 1, 1, 1]),
-        helper.make_node('Abs', ['lap_out'], ['lap_abs']),
-        helper.make_node('Greater', ['lap_abs', 'thresh'], ['is_edge_raw']),
-        helper.make_node('Greater', ['occ', 'thresh'], ['is_occ']),
-        helper.make_node('And', ['is_edge_raw', 'is_occ'], ['is_edge']),
-        helper.make_node('Where', ['is_edge', 'edge_oh', 'bg_oh'], ['result_small']),
-    ]
-    nodes.append(_build_pad_node('result_small', 'output', pad_h, pad_w, inits))
-    return mk(nodes, inits)
-def s_edge_detect(td):
-    """Edge detection solver: output = boundary pixels of input shapes."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    for bg_color in [0]:
-        out_colors = set()
-        for _, out in exs:
-            out_colors.update(out.flatten())
-        for edge_color in out_colors:
-            if edge_color == bg_color:
-                continue
-            if all(_has_edges(inp, out, edge_color, bg_color) for inp, out in exs):
-                return _build_edge_model(IH, IW, edge_color, bg_color)
-    return None

neurogolf_solver/solvers/geometric.py DELETED Viewed

@@ -1,177 +0,0 @@
-#!/usr/bin/env python3
-"""Geometric transformation solvers: flip, rotate, shift, crop."""
-import numpy as np
-from onnx import helper
-from ..onnx_helpers import mk, _build_slice_crop, _build_slice_reverse, _build_pad_node
-from ..data_loader import get_exs, fixed_shapes
-from ..gather_helpers import _build_gather_model, _build_gather_model_with_const
-from ..constants import GH, GW
-def s_flip(td):
-    """Flip using Slice(step=-1) — zero MACs."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    for axis, flip_fn in [(0, np.flipud), (1, np.fliplr)]:
-        if all(np.array_equal(out, flip_fn(inp)) for inp, out in exs):
-            onnx_axis = 2 if axis == 0 else 3
-            dim_size = IH if axis == 0 else IW
-            pad_h, pad_w = GH - IH, GW - IW
-            inits = []
-            nodes = []
-            nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            nodes.append(_build_slice_reverse('cropped', 'flipped', onnx_axis, dim_size, inits))
-            nodes.append(_build_pad_node('flipped', 'output', pad_h, pad_w, inits))
-            return mk(nodes, inits)
-    return None
-def s_rotate(td):
-    """Rotate using Slice+Transpose — zero MACs for square grids and k=2.
-    Gather fallback for non-square k=1,3."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    for k in [1, 2, 3]:
-        if not all(np.array_equal(out, np.rot90(inp, k)) for inp, out in exs):
-            continue
-        if k == 2:
-            pad_h, pad_w = GH - OH, GW - OW
-            inits = []
-            nodes = []
-            nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            nodes.append(_build_slice_reverse('cropped', 'flip_h', 2, IH, inits, suffix='_h'))
-            nodes.append(_build_slice_reverse('flip_h', 'rotated', 3, IW, inits, suffix='_w'))
-            nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
-            return mk(nodes, inits)
-        elif k == 1 and IH == IW:
-            pad_h, pad_w = GH - IH, GW - IW
-            inits = []
-            nodes = []
-            nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            nodes.append(helper.make_node('Transpose', ['cropped'], ['transposed'], perm=[0, 1, 3, 2]))
-            nodes.append(_build_slice_reverse('transposed', 'rotated', 2, IH, inits))
-            nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
-            return mk(nodes, inits)
-        elif k == 3 and IH == IW:
-            pad_h, pad_w = GH - IH, GW - IW
-            inits = []
-            nodes = []
-            nodes.append(_build_slice_crop('input', 'cropped', IH, IW, inits))
-            nodes.append(_build_slice_reverse('cropped', 'flipped', 2, IH, inits))
-            nodes.append(helper.make_node('Transpose', ['flipped'], ['rotated'], perm=[0, 1, 3, 2]))
-            nodes.append(_build_pad_node('rotated', 'output', pad_h, pad_w, inits))
-            return mk(nodes, inits)
-        else:
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    if k == 1:
-                        sr, sc = c, IH - 1 - r
-                    elif k == 3:
-                        sr, sc = IW - 1 - c, r
-                    idx[r, c] = [sr, sc]
-            return _build_gather_model(OH, OW, idx)
-    return None
-def s_shift(td):
-    """Shift transformation solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    for dr in range(-5, 6):
-        for dc in range(-5, 6):
-            if dr == 0 and dc == 0:
-                continue
-            ok = True
-            for inp, out in exs:
-                shifted = np.zeros_like(inp)
-                r0, r1 = max(0, dr), min(IH, IH + dr)
-                c0, c1 = max(0, dc), min(IW, IW + dc)
-                if r1 > r0 and c1 > c0:
-                    sr0, sc0 = max(0, -dr), max(0, -dc)
-                    shifted[r0:r1, c0:c1] = inp[sr0:sr0 + (r1 - r0), sc0:sc0 + (c1 - c0)]
-                if not np.array_equal(shifted, out):
-                    ok = False
-                    break
-            if not ok:
-                continue
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            cst = np.full((OH, OW), 0, dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    sr, sc = r - dr, c - dc
-                    if 0 <= sr < IH and 0 <= sc < IW:
-                        idx[r, c] = [sr, sc]
-                    else:
-                        idx[r, c] = [-1, -1]
-            return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-    return None
-def s_fixed_crop(td):
-    """Fixed crop solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH > IH or OW > IW or (OH == IH and OW == IW):
-        return None
-    for r0 in range(IH - OH + 1):
-        for c0 in range(IW - OW + 1):
-            if all(np.array_equal(inp[r0:r0 + OH, c0:c0 + OW], out) for inp, out in exs):
-                idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                for r in range(OH):
-                    for c in range(OW):
-                        idx[r, c] = [r0 + r, c0 + c]
-                return _build_gather_model(OH, OW, idx)
-    return None
-def s_gravity(td):
-    """Detect gravity-like compaction (detection only, no ONNX model built)."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    def _gravity(grid, direction):
-        r = np.zeros_like(grid)
-        h, w = grid.shape
-        if direction in ('down', 'up'):
-            for c in range(w):
-                nz = grid[:, c][grid[:, c] != 0]
-                if direction == 'down':
-                    r[h - len(nz):h, c] = nz
-                else:
-                    r[:len(nz), c] = nz
-        else:
-            for rr in range(h):
-                nz = grid[rr, :][grid[rr, :] != 0]
-                if direction == 'right':
-                    r[rr, w - len(nz):w] = nz
-                else:
-                    r[rr, :len(nz)] = nz
-        return r
-    for d in ('down', 'up', 'left', 'right'):
-        if all(np.array_equal(_gravity(inp, d), out) for inp, out in exs):
-            return None
-    return None

neurogolf_solver/solvers/gravity.py DELETED Viewed

@@ -1,140 +0,0 @@
-#!/usr/bin/env python3
-"""Gravity solver — unrolled bubble-sort via Conv + Where.
-v5.2: Solves Task 78 (direction=up, bg=0, score 8.399).
-Tries all 4 directions × 10 bg colors. Fixed-shape only.
-"""
-import numpy as np
-from onnx import helper, numpy_helper
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node, add_onehot_block
-from ..data_loader import get_exs, fixed_shapes
-from ..constants import GH, GW
-def _gravity_np(grid, direction, bg_color=0):
-    """Apply gravity in numpy for verification."""
-    r = np.full_like(grid, bg_color)
-    h, w = grid.shape
-    if direction == 'down':
-        for c in range(w):
-            nz = grid[:, c][grid[:, c] != bg_color]
-            r[h - len(nz):h, c] = nz
-    elif direction == 'up':
-        for c in range(w):
-            nz = grid[:, c][grid[:, c] != bg_color]
-            r[:len(nz), c] = nz
-    elif direction == 'right':
-        for rr in range(h):
-            nz = grid[rr, :][grid[rr, :] != bg_color]
-            r[rr, w - len(nz):w] = nz
-    elif direction == 'left':
-        for rr in range(h):
-            nz = grid[rr, :][grid[rr, :] != bg_color]
-            r[rr, :len(nz)] = nz
-    return r
-def _build_gravity_model(IH, IW, direction, bg_color=0):
-    """Build ONNX model for gravity via unrolled bubble-sort.
-    Each step compares adjacent cells and swaps if needed:
-    - If current cell is bg AND source neighbor is non-bg → fill with source
-    - If current cell is non-bg AND destination neighbor is bg → vacate to bg
-    After max(IH,IW) passes, all non-bg pixels settle in the gravity direction.
-    """
-    pad_h, pad_w = GH - IH, GW - IW
-    n_steps = max(IH, IW)
-    pull_above = np.zeros((10, 10, 3, 3), dtype=np.float32)
-    pull_below = np.zeros((10, 10, 3, 3), dtype=np.float32)
-    for ch in range(10):
-        if direction == 'down':
-            pull_above[ch, ch, 0, 1] = 1.0
-            pull_below[ch, ch, 2, 1] = 1.0
-        elif direction == 'up':
-            pull_above[ch, ch, 2, 1] = 1.0
-            pull_below[ch, ch, 0, 1] = 1.0
-        elif direction == 'right':
-            pull_above[ch, ch, 1, 0] = 1.0
-            pull_below[ch, ch, 1, 2] = 1.0
-        elif direction == 'left':
-            pull_above[ch, ch, 1, 2] = 1.0
-            pull_below[ch, ch, 1, 0] = 1.0
-    bg_sel = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    bg_sel[0, bg_color, 0, 0] = 1.0
-    bg_oh = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    bg_oh[0, bg_color, 0, 0] = 1.0
-    inits = [
-        _make_int64_init('sl_st', [0, 0, 0, 0]),
-        _make_int64_init('sl_en', [1, 10, IH, IW]),
-        numpy_helper.from_array(pull_above, 'pull_src'),
-        numpy_helper.from_array(pull_below, 'pull_dst'),
-        numpy_helper.from_array(bg_sel, 'bg_sel'),
-        numpy_helper.from_array(bg_oh, 'bg_oh'),
-        numpy_helper.from_array(np.float32(0.5), 'half'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['cur_0']),
-    ]
-    cur = 'cur_0'
-    for i in range(n_steps):
-        src = f'src_{i}'
-        nodes.append(helper.make_node('Conv', [cur, 'pull_src'], [src],
-                                       kernel_shape=[3, 3], pads=[1, 1, 1, 1]))
-        nodes.append(helper.make_node('Mul', [cur, 'bg_sel'], [f'cbg_{i}']))
-        inits.append(_make_int64_init(f'ax1_{i}', [1]))
-        nodes.append(helper.make_node('ReduceSum', [f'cbg_{i}', f'ax1_{i}'], [f'cbgsum_{i}'], keepdims=1))
-        nodes.append(helper.make_node('Greater', [f'cbgsum_{i}', 'half'], [f'cur_is_bg_{i}']))
-        nodes.append(helper.make_node('Mul', [src, 'bg_sel'], [f'sbg_{i}']))
-        inits.append(_make_int64_init(f'ax2_{i}', [1]))
-        nodes.append(helper.make_node('ReduceSum', [f'sbg_{i}', f'ax2_{i}'], [f'sbgsum_{i}'], keepdims=1))
-        nodes.append(helper.make_node('Not', [f'cur_is_bg_{i}'], [f'cur_not_bg_{i}']))
-        nodes.append(helper.make_node('Greater', [f'sbgsum_{i}', 'half'], [f'src_is_bg_{i}']))
-        nodes.append(helper.make_node('Not', [f'src_is_bg_{i}'], [f'src_not_bg_{i}']))
-        nodes.append(helper.make_node('And', [f'cur_is_bg_{i}', f'src_not_bg_{i}'], [f'fill_{i}']))
-        dst = f'dst_{i}'
-        nodes.append(helper.make_node('Conv', [cur, 'pull_dst'], [dst],
-                                       kernel_shape=[3, 3], pads=[1, 1, 1, 1]))
-        nodes.append(helper.make_node('Mul', [dst, 'bg_sel'], [f'dbg_{i}']))
-        inits.append(_make_int64_init(f'ax3_{i}', [1]))
-        nodes.append(helper.make_node('ReduceSum', [f'dbg_{i}', f'ax3_{i}'], [f'dbgsum_{i}'], keepdims=1))
-        nodes.append(helper.make_node('Greater', [f'dbgsum_{i}', 'half'], [f'dst_is_bg_{i}']))
-        nodes.append(helper.make_node('And', [f'cur_not_bg_{i}', f'dst_is_bg_{i}'], [f'vacate_{i}']))
-        nxt = f'cur_{i+1}'
-        nodes.append(helper.make_node('Where', [f'fill_{i}', src, cur], [f'tmp_{i}']))
-        nodes.append(helper.make_node('Where', [f'vacate_{i}', 'bg_oh', f'tmp_{i}'], [nxt]))
-        cur = nxt
-    nodes.append(helper.make_node('ArgMax', [cur], ['grav_am'], axis=1, keepdims=1))
-    add_onehot_block(nodes, inits, 'grav_am', 'grav_oh')
-    nodes.append(_build_pad_node('grav_oh', 'output', pad_h, pad_w, inits))
-    return mk(nodes, inits)
-def s_gravity_unrolled(td):
-    """Gravity solver with unrolled Conv+Where steps.
-    Tries all 4 directions × bg colors 0-9."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    for bg_color in range(10):
-        for direction in ('down', 'up', 'left', 'right'):
-            if all(np.array_equal(_gravity_np(inp, direction, bg_color), out)
-                   for inp, out in exs):
-                return _build_gravity_model(IH, IW, direction, bg_color)
-    return None

neurogolf_solver/solvers/mode.py DELETED Viewed

@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-"""Mode fill solver — output = solid fill of most common input color.
-v5.2: Solves Task 129 (score 19.451).
-Uses runtime ReduceSum→ArgMax→Expand for variable mode across inputs.
-Falls through to s_constant when mode is fixed across all examples.
-"""
-import numpy as np
-from onnx import helper, numpy_helper, TensorProto
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node
-from ..data_loader import get_exs, fixed_shapes
-from ..constants import GH, GW
-def s_mode_fill(td):
-    """Mode fill: output is entirely the most common color from input.
-    Uses runtime ArgMax to handle variable mode across inputs."""
-    exs = get_exs(td)
-    for inp, out in exs:
-        if inp.shape != out.shape:
-            return None
-        vals, counts = np.unique(inp, return_counts=True)
-        mode = vals[np.argmax(counts)]
-        if not np.all(out == mode):
-            return None
-    # Check if mode is always the same color
-    modes = set()
-    for inp, out in exs:
-        vals, counts = np.unique(inp, return_counts=True)
-        modes.add(vals[np.argmax(counts)])
-    if len(modes) == 1:
-        return None  # Let s_constant handle it
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    pad_h, pad_w = GH - IH, GW - IW
-    inits = [
-        _make_int64_init('sl_st', [0, 0, 0, 0]),
-        _make_int64_init('sl_en', [1, 10, IH, IW]),
-        _make_int64_init('rs_axes_mode', [2, 3]),
-        numpy_helper.from_array(np.arange(10, dtype=np.int64).reshape(1, 10, 1, 1), 'classes'),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input', 'sl_st', 'sl_en'], ['cropped']),
-        helper.make_node('ReduceSum', ['cropped', 'rs_axes_mode'], ['hist'], keepdims=1),
-        helper.make_node('ArgMax', ['hist'], ['mode_idx'], axis=1, keepdims=1),
-        helper.make_node('Equal', ['mode_idx', 'classes'], ['eq']),
-        helper.make_node('Cast', ['eq'], ['mode_oh'], to=TensorProto.FLOAT),
-        helper.make_node('Expand', ['mode_oh', 'sl_en'], ['expanded']),
-    ]
-    nodes.append(_build_pad_node('expanded', 'output', pad_h, pad_w, inits))
-    return mk(nodes, inits)

neurogolf_solver/solvers/solver_registry.py DELETED Viewed

@@ -1,163 +0,0 @@
-#!/usr/bin/env python3
-"""Solver registry and task solving orchestration."""
-import os
-import time
-import onnx
-from .analytical import s_identity, s_constant, s_color_map, s_transpose
-from .geometric import s_flip, s_rotate, s_shift, s_fixed_crop, s_gravity
-from .tiling import (s_tile, s_upscale, s_kronecker, s_nonuniform_scale, s_diagonal_tile,
-                     s_mirror_h, s_mirror_v, s_quad_mirror, s_concat, s_concat_enhanced,
-                     s_spatial_gather, s_varshape_spatial_gather)
-from .gravity import s_gravity_unrolled
-from .edge import s_edge_detect
-from .mode import s_mode_fill
-from .wave1 import (s_downsample_stride, s_symmetry_complete, s_extract_inner,
-                    s_add_border, s_sparse_fill, s_channel_filter)
-from .conv import solve_conv_fixed, solve_conv_variable, solve_conv_diffshape, solve_conv_var_diff
-from ..data_loader import get_exs, fixed_shapes
-from ..validators import validate
-from ..profiler import score_network
-from ..constants import EXCLUDED_TASKS, MAX_ONNX_FILESIZE
-# Analytical solvers registry — order matters (cheaper first)
-ANALYTICAL_SOLVERS = [
-    ('identity', s_identity),
-    ('constant', s_constant),
-    ('color_map', s_color_map),
-    ('transpose', s_transpose),
-    ('flip', s_flip),
-    ('rotate', s_rotate),
-    ('shift', s_shift),
-    ('tile', s_tile),
-    ('upscale', s_upscale),
-    ('kronecker', s_kronecker),
-    ('nonuniform_scale', s_nonuniform_scale),
-    ('mirror_h', s_mirror_h),
-    ('mirror_v', s_mirror_v),
-    ('quad_mirror', s_quad_mirror),
-    ('concat', s_concat),
-    ('concat_enhanced', s_concat_enhanced),
-    ('diagonal_tile', s_diagonal_tile),
-    ('fixed_crop', s_fixed_crop),
-    ('spatial_gather', s_spatial_gather),
-    ('varshape_spatial_gather', s_varshape_spatial_gather),
-    ('gravity_unrolled', s_gravity_unrolled),
-    ('edge_detect', s_edge_detect),
-    ('mode_fill', s_mode_fill),
-    ('downsample_stride', s_downsample_stride),
-    ('symmetry_complete', s_symmetry_complete),
-    ('extract_inner', s_extract_inner),
-    ('add_border', s_add_border),
-    ('sparse_fill', s_sparse_fill),
-    ('channel_filter', s_channel_filter),
-]
-def _check_size(path):
-    """Return True if file is within 1.44MB limit."""
-    try:
-        return os.path.getsize(path) <= MAX_ONNX_FILESIZE
-    except OSError:
-        return False
-def _check_scoreable(path):
-    """Return True if score_network returns valid scores (not None).
-    A model that can't be scored will be REJECTED by Kaggle."""
-    macs, memory, params = score_network(path)
-    if macs is None or memory is None or params is None:
-        return False
-    return True
-def _accept_model(path, td, providers):
-    """Full acceptance check: size + validate (outputs) + scoreable.
-    Returns True only if model would be accepted by Kaggle."""
-    if not _check_size(path):
-        return False
-    if not validate(path, td, providers):
-        return False
-    if not _check_scoreable(path):
-        return False
-    return True
-def _cleanup_failed(path):
-    """Delete leftover .onnx file from failed solve attempts.
-    Prevents bad files from ending up in submission zip."""
-    try:
-        if os.path.exists(path):
-            os.remove(path)
-    except OSError:
-        pass
-def solve_task(tn, td, outdir, providers, conv_budget=30.0, excluded_tasks=None):
-    """Solve a single ARC-AGI task.
-    Returns: (ok, solver_name, file_size, elapsed, model_path)
-    If unsolved, deletes any leftover .onnx file.
-    """
-    if excluded_tasks is None:
-        excluded_tasks = EXCLUDED_TASKS
-    t_start = time.time()
-    os.makedirs(outdir, exist_ok=True)
-    path = os.path.join(outdir, f"task{tn:03d}.onnx")
-    if tn in excluded_tasks:
-        return False, 'excluded', None, time.time() - t_start, path
-    # 1. Try analytical solvers (fast, tiny models)
-    for sname, sfn in ANALYTICAL_SOLVERS:
-        try:
-            model = sfn(td)
-            if model is None:
-                continue
-            onnx.save(model, path)
-            if _accept_model(path, td, providers):
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-        except:
-            pass
-    # 2. Determine task shape category and try conv solvers
-    exs = get_exs(td)
-    same_shape = all(inp.shape == out.shape for inp, out in exs)
-    shapes = set(inp.shape for inp, _ in exs)
-    fixed_in = len(shapes) == 1
-    conv_time = conv_budget
-    if same_shape:
-        if fixed_in:
-            result = solve_conv_fixed(td, path, providers, time_budget=conv_time / 2)
-            if result is not None:
-                if _check_size(path) and _check_scoreable(path):
-                    sname, model = result
-                    return True, sname, os.path.getsize(path), time.time() - t_start, path
-        result = solve_conv_variable(td, path, providers, time_budget=conv_time)
-        if result is not None:
-            if _check_size(path) and _check_scoreable(path):
-                sname, model = result
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-    else:
-        sp = fixed_shapes(td)
-        if sp is not None:
-            (IH, IW), (OH, OW) = sp
-            if OH <= IH and OW <= IW:
-                result = solve_conv_diffshape(td, path, providers, time_budget=conv_time)
-                if result is not None:
-                    if _check_size(path) and _check_scoreable(path):
-                        sname, model = result
-                        return True, sname, os.path.getsize(path), time.time() - t_start, path
-        result = solve_conv_var_diff(td, path, providers, time_budget=conv_time)
-        if result is not None:
-            if _check_size(path) and _check_scoreable(path):
-                sname, model = result
-                return True, sname, os.path.getsize(path), time.time() - t_start, path
-    # All solvers failed — delete leftover .onnx file
-    _cleanup_failed(path)
-    return False, None, None, time.time() - t_start, path

neurogolf_solver/solvers/tiling.py DELETED Viewed

@@ -1,429 +0,0 @@
-#!/usr/bin/env python3
-"""Tiling, upscaling, mirror, concat, and spatial gather solvers."""
-import numpy as np
-from onnx import helper
-from itertools import product as iproduct
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node
-from ..data_loader import get_exs, fixed_shapes
-from ..gather_helpers import _build_gather_model, _build_gather_model_with_const
-def s_tile(td):
-    """Tiling solver."""
-    exs = get_exs(td)
-    in_shapes = set(inp.shape for inp, _ in exs)
-    if len(in_shapes) != 1:
-        return None
-    IH, IW = in_shapes.pop()
-    tiles = set()
-    for inp, out in exs:
-        OH, OW = out.shape
-        if OH % IH or OW % IW:
-            return None
-        rH, rW = OH // IH, OW // IW
-        if rH < 1 or rW < 1 or (rH == 1 and rW == 1):
-            return None
-        tiles.add((rH, rW))
-    if len(tiles) != 1:
-        return None
-    rH, rW = tiles.pop()
-    OH, OW = IH * rH, IW * rW
-    if OH > 30 or OW > 30:
-        return None
-    for inp, out in exs:
-        if not np.array_equal(out, np.tile(inp, (rH, rW))):
-            return None
-    pad_h, pad_w = 30 - OH, 30 - OW
-    inits = [
-        _make_int64_init('st', [0, 0, 0, 0]),
-        _make_int64_init('en', [1, 10, IH, IW]),
-        _make_int64_init('rp', [1, 1, rH, rW]),
-    ]
-    nodes = [
-        helper.make_node('Slice', ['input', 'st', 'en'], ['cr']),
-        helper.make_node('Tile', ['cr', 'rp'], ['tl']),
-    ]
-    nodes.append(_build_pad_node('tl', 'output', pad_h, pad_w, inits))
-    return mk(nodes, inits)
-def s_upscale(td):
-    """Upscaling solver."""
-    exs = get_exs(td)
-    in_shapes = set(inp.shape for inp, _ in exs)
-    if len(in_shapes) != 1:
-        return None
-    IH, IW = in_shapes.pop()
-    scales = set()
-    for inp, out in exs:
-        OH, OW = out.shape
-        if OH % IH or OW % IW:
-            return None
-        sH, sW = OH // IH, OW // IW
-        if sH < 2 or sW < 2:
-            return None
-        scales.add((sH, sW))
-    if len(scales) != 1:
-        return None
-    sH, sW = scales.pop()
-    OH, OW = IH * sH, IW * sW
-    if OH > 30 or OW > 30:
-        return None
-    for inp, out in exs:
-        if not np.array_equal(out, np.repeat(np.repeat(inp, sH, 0), sW, 1)):
-            return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            idx[r, c] = [r // sH, c // sW]
-    return _build_gather_model(OH, OW, idx)
-def s_kronecker(td):
-    """Kronecker product solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH % IH != 0 or OW % IW != 0:
-        return None
-    sH, sW = OH // IH, OW // IW
-    if sH < 2 or sW < 2:
-        return None
-    if OH > 30 or OW > 30:
-        return None
-    for inp, out in exs:
-        if not np.array_equal(out, np.kron(inp, np.ones((sH, sW), dtype=np.int64))):
-            return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            idx[r, c] = [r // sH, c // sW]
-    return _build_gather_model(OH, OW, idx)
-def s_nonuniform_scale(td):
-    """Non-uniform scaling solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    for fh, fw in [(1, 2), (2, 1), (1, 3), (3, 1), (2, 3), (3, 2), (1, 4), (4, 1), (2, 4), (4, 2)]:
-        if OH != IH * fh or OW != IW * fw:
-            continue
-        if OH > 30 or OW > 30:
-            continue
-        if all(np.array_equal(np.repeat(np.repeat(inp, fh, 0), fw, 1), out) for inp, out in exs):
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    idx[r, c] = [r // fh, c // fw]
-            return _build_gather_model(OH, OW, idx)
-    return None
-def s_diagonal_tile(td):
-    """Diagonal tiling solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH % IH != 0 or OW % IW != 0:
-        return None
-    rH, rW = OH // IH, OW // IW
-    if rH != rW or rH < 2:
-        return None
-    if OH > 30 or OW > 30:
-        return None
-    for inp, out in exs:
-        for bi in range(rH):
-            for bj in range(rW):
-                block = out[bi * IH:(bi + 1) * IH, bj * IW:(bj + 1) * IW]
-                if bi == bj:
-                    if not np.array_equal(block, inp):
-                        return None
-                else:
-                    if not np.all(block == 0):
-                        return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    cst = np.full((OH, OW), -1, dtype=np.int64)
-    for bi in range(rH):
-        for bj in range(rW):
-            for lr in range(IH):
-                for lc in range(IW):
-                    oi, oj = bi * IH + lr, bj * IW + lc
-                    if bi == bj:
-                        idx[oi, oj] = [lr, lc]
-                    else:
-                        idx[oi, oj] = [-1, -1]
-                        cst[oi, oj] = 0
-    return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-def s_mirror_h(td):
-    """Horizontal mirror solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH != IH or OW != 2 * IW:
-        return None
-    if OW > 30:
-        return None
-    for inp, out in exs:
-        if not np.array_equal(np.concatenate([inp, np.flip(inp, 1)], 1), out):
-            return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sc = c if c < IW else 2 * IW - 1 - c
-            idx[r, c] = [r, sc]
-    return _build_gather_model(OH, OW, idx)
-def s_mirror_v(td):
-    """Vertical mirror solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OW != IW or OH != 2 * IH:
-        return None
-    if OH > 30:
-        return None
-    for inp, out in exs:
-        if not np.array_equal(np.concatenate([inp, np.flip(inp, 0)], 0), out):
-            return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sr = r if r < IH else 2 * IH - 1 - r
-            idx[r, c] = [sr, c]
-    return _build_gather_model(OH, OW, idx)
-def s_quad_mirror(td):
-    """Quad mirror solver."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH != 2 * IH or OW != 2 * IW:
-        return None
-    if OH > 30 or OW > 30:
-        return None
-    for inp, out in exs:
-        expected = np.block([[inp, np.flip(inp, 1)],
-                             [np.flip(inp, 0), np.flip(np.flip(inp, 0), 1)]])
-        if not np.array_equal(expected, out):
-            return None
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for r in range(OH):
-        for c in range(OW):
-            sr = r if r < IH else 2 * IH - 1 - r
-            sc = c if c < IW else 2 * IW - 1 - c
-            idx[r, c] = [sr, sc]
-    return _build_gather_model(OH, OW, idx)
-def s_concat(td):
-    """Concatenation solver with transformations."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    transforms = [
-        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
-    ]
-    if OH == IH and OW % IW == 0 and OW > IW:
-        n = OW // IW
-        if 2 <= n <= 4:
-            for combo in iproduct(range(4), repeat=n):
-                if all(np.array_equal(out, np.concatenate([transforms[t][1](inp) for t in combo], axis=1))
-                       for inp, out in exs):
-                    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                    for oi in range(OH):
-                        for oj in range(OW):
-                            bj = oj // IW
-                            lr, lc = oi, oj % IW
-                            t = transforms[combo[bj]][0]
-                            if t == 'id':
-                                sr, sc = lr, lc
-                            elif t == 'fliplr':
-                                sr, sc = lr, IW - 1 - lc
-                            elif t == 'flipud':
-                                sr, sc = IH - 1 - lr, lc
-                            elif t == 'rot180':
-                                sr, sc = IH - 1 - lr, IW - 1 - lc
-                            idx[oi, oj] = [sr, sc]
-                    return _build_gather_model(OH, OW, idx)
-    if OW == IW and OH % IH == 0 and OH > IH:
-        n = OH // IH
-        if 2 <= n <= 4:
-            for combo in iproduct(range(4), repeat=n):
-                if all(np.array_equal(out, np.concatenate([transforms[t][1](inp) for t in combo], axis=0))
-                       for inp, out in exs):
-                    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                    for oi in range(OH):
-                        for oj in range(OW):
-                            bi = oi // IH
-                            lr, lc = oi % IH, oj
-                            t = transforms[combo[bi]][0]
-                            if t == 'id':
-                                sr, sc = lr, lc
-                            elif t == 'fliplr':
-                                sr, sc = lr, IW - 1 - lc
-                            elif t == 'flipud':
-                                sr, sc = IH - 1 - lr, lc
-                            elif t == 'rot180':
-                                sr, sc = IH - 1 - lr, IW - 1 - lc
-                            idx[oi, oj] = [sr, sc]
-                    return _build_gather_model(OH, OW, idx)
-    return None
-def s_concat_enhanced(td):
-    """Enhanced concatenation with all 8 dihedral transforms."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if IH == OH and IW == OW:
-        return None
-    if OH % IH != 0 or OW % IW != 0:
-        return None
-    rH, rW = OH // IH, OW // IW
-    if rH * rW > 16 or rH * rW < 2:
-        return None
-    if OH > 30 or OW > 30:
-        return None
-    transforms = [
-        ('id', lambda x: x), ('fliplr', lambda x: np.fliplr(x)),
-        ('flipud', lambda x: np.flipud(x)), ('rot180', lambda x: np.rot90(x, 2)),
-        ('rot90', lambda x: np.rot90(x, 1)), ('rot270', lambda x: np.rot90(x, 3)),
-        ('T', lambda x: x.T), ('T_fliplr', lambda x: np.fliplr(x.T)),
-    ]
-    block_transforms = {}
-    for bi in range(rH):
-        for bj in range(rW):
-            found = None
-            for tidx, (tname, tfn) in enumerate(transforms):
-                ok = True
-                for inp, out in exs:
-                    block = out[bi * IH:(bi + 1) * IH, bj * IW:(bj + 1) * IW]
-                    expected = tfn(inp)
-                    if expected.shape != (IH, IW) or not np.array_equal(block, expected):
-                        ok = False
-                        break
-                if ok:
-                    found = (tidx, tname)
-                    break
-            if found is None:
-                return None
-            block_transforms[(bi, bj)] = found
-    idx = np.zeros((OH, OW, 2), dtype=np.int64)
-    for bi in range(rH):
-        for bj in range(rW):
-            _, tname = block_transforms[(bi, bj)]
-            for lr in range(IH):
-                for lc in range(IW):
-                    oi, oj = bi * IH + lr, bj * IW + lc
-                    if tname == 'id':
-                        sr, sc = lr, lc
-                    elif tname == 'fliplr':
-                        sr, sc = lr, IW - 1 - lc
-                    elif tname == 'flipud':
-                        sr, sc = IH - 1 - lr, lc
-                    elif tname == 'rot180':
-                        sr, sc = IH - 1 - lr, IW - 1 - lc
-                    elif tname == 'rot90':
-                        sr, sc = IW - 1 - lc, lr
-                    elif tname == 'rot270':
-                        sr, sc = lc, IH - 1 - lr
-                    elif tname == 'T':
-                        sr, sc = lc, lr
-                    elif tname == 'T_fliplr':
-                        sr, sc = IW - 1 - lc, lr
-                    idx[oi, oj] = [sr, sc]
-    for inp, out in exs:
-        reconstructed = np.zeros_like(out)
-        for oi in range(OH):
-            for oj in range(OW):
-                reconstructed[oi, oj] = inp[idx[oi, oj, 0], idx[oi, oj, 1]]
-        if not np.array_equal(reconstructed, out):
-            return None
-    return _build_gather_model(OH, OW, idx)
-def s_spatial_gather(td):
-    """Spatial gather solver."""
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    exs = get_exs(td)
-    idx = np.full((OH, OW, 2), -1, dtype=np.int64)
-    cst = np.full((OH, OW), -1, dtype=np.int64)
-    for oi in range(OH):
-        for oj in range(OW):
-            vals = set(int(out[oi, oj]) for _, out in exs)
-            if len(vals) == 1:
-                cst[oi, oj] = vals.pop()
-            found = False
-            for ri in range(IH):
-                for rj in range(IW):
-                    if all(int(inp[ri, rj]) == int(out[oi, oj]) for inp, out in exs):
-                        idx[oi, oj] = [ri, rj]
-                        found = True
-                        break
-                if found:
-                    break
-            if not found and cst[oi, oj] < 0:
-                return None
-    return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-def s_varshape_spatial_gather(td):
-    """Variable shape spatial gather solver."""
-    sp = fixed_shapes(td)
-    if sp is not None:
-        return None
-    exs = get_exs(td)
-    exs_30 = []
-    for inp, out in exs:
-        ih, iw = inp.shape
-        oh, ow = out.shape
-        inp30 = np.zeros((30, 30), dtype=np.int64)
-        out30 = np.zeros((30, 30), dtype=np.int64)
-        inp30[:ih, :iw] = inp
-        out30[:oh, :ow] = out
-        exs_30.append((inp30, out30))
-    idx = np.full((30, 30, 2), -1, dtype=np.int64)
-    cst = np.full((30, 30), -1, dtype=np.int64)
-    for oi in range(30):
-        for oj in range(30):
-            vals = set(int(out30[oi, oj]) for _, out30 in exs_30)
-            if len(vals) == 1:
-                cst[oi, oj] = vals.pop()
-            found = False
-            for ri in range(30):
-                for rj in range(30):
-                    if all(int(inp30[ri, rj]) == int(out30[oi, oj]) for inp30, out30 in exs_30):
-                        idx[oi, oj] = [ri, rj]
-                        found = True
-                        break
-                if found:
-                    break
-            if not found and cst[oi, oj] < 0:
-                return None
-    return _build_gather_model_with_const(30, 30, 30, 30, idx, cst)

neurogolf_solver/solvers/wave1.py DELETED Viewed

@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-"""Wave 1 static spatial remapping solvers.
-A4: downsample_stride — strided sampling of input
-A7: symmetry_complete — mirror to complete L-R or T-B symmetry
-A1: extract_inner — remove border frame
-A2: add_border — add constant border
-A6: sparse_fill — pixel to block expansion
-B1: channel_filter — keep only certain colors
-Scan results (2026-04-27): 0 arc-gen validated matches.
-Kept for future tasks and as building blocks.
-"""
-import numpy as np
-from ..data_loader import get_exs, fixed_shapes
-from ..gather_helpers import _build_gather_model, _build_gather_model_with_const
-from ..onnx_helpers import mk, _make_int64_init, _build_pad_node, add_onehot_block
-from ..constants import GH, GW
-def s_downsample_stride(td):
-    """out[r,c] = inp[r*sH + oH, c*sW + oW] for integer strides."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if OH >= IH or OW >= IW:
-        return None
-    for sh in range(2, 6):
-        for sw in range(2, 6):
-            for oh_off in range(sh):
-                for ow_off in range(sw):
-                    ok = True
-                    for inp, out in exs:
-                        sampled = inp[oh_off::sh, ow_off::sw]
-                        if sampled.shape != out.shape or not np.array_equal(sampled, out):
-                            ok = False
-                            break
-                    if ok:
-                        idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                        for r in range(OH):
-                            for c in range(OW):
-                                idx[r, c] = [r * sh + oh_off, c * sw + ow_off]
-                        return _build_gather_model(OH, OW, idx)
-    return None
-def s_symmetry_complete(td):
-    """Complete partial T-B symmetry by adding mirrored + original via Gather."""
-    from onnx import helper, numpy_helper
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    if IH < 2:
-        return None
-    # T-B symmetry: out[r,c] = max(inp[r,c], inp[IH-1-r,c])
-    ok = True
-    for inp, out in exs:
-        exp = inp.copy()
-        for r in range(IH // 2):
-            for c in range(IW):
-                v = max(int(inp[r, c]), int(inp[IH - 1 - r, c]))
-                exp[r, c] = v
-                exp[IH - 1 - r, c] = v
-        if not np.array_equal(out, exp):
-            ok = False
-            break
-    if ok:
-        # Build: Gather(self) + Gather(mirror) → Add → ArgMax → one-hot
-        pad_h, pad_w = GH - OH, GW - OW
-        mirror_idx = np.zeros((GH * GW,), dtype=np.int64)
-        mask = np.zeros((1, 1, GH, GW), dtype=np.float32)
-        self_idx = np.zeros((GH * GW,), dtype=np.int64)
-        for r in range(OH):
-            for c in range(OW):
-                self_idx[r * GW + c] = r * GW + c
-                mirror_idx[r * GW + c] = (IH - 1 - r) * GW + c
-                mask[0, 0, r, c] = 1.0
-        inits = [
-            numpy_helper.from_array(np.array([1, 10, GH * GW], dtype=np.int64), 'fs'),
-            numpy_helper.from_array(self_idx, 'self_idx'),
-            numpy_helper.from_array(mirror_idx, 'mirror_idx'),
-            numpy_helper.from_array(np.array([1, 10, GH, GW], dtype=np.int64), 'os'),
-            numpy_helper.from_array(mask, 'mask'),
-        ]
-        nodes = [
-            helper.make_node('Reshape', ['input', 'fs'], ['flat']),
-            helper.make_node('Gather', ['flat', 'self_idx'], ['g_self'], axis=2),
-            helper.make_node('Gather', ['flat', 'mirror_idx'], ['g_mirror'], axis=2),
-            helper.make_node('Add', ['g_self', 'g_mirror'], ['combined']),
-            helper.make_node('Reshape', ['combined', 'os'], ['combined_2d']),
-            helper.make_node('ArgMax', ['combined_2d'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-        return mk(nodes, inits)
-    # L-R symmetry: out[r,c] = max(inp[r,c], inp[r,IW-1-c])
-    if IW < 2:
-        return None
-    ok = True
-    for inp, out in exs:
-        exp = inp.copy()
-        for r in range(IH):
-            for c in range(IW // 2):
-                v = max(int(inp[r, c]), int(inp[r, IW - 1 - c]))
-                exp[r, c] = v
-                exp[r, IW - 1 - c] = v
-        if not np.array_equal(out, exp):
-            ok = False
-            break
-    if ok:
-        mirror_idx = np.zeros((GH * GW,), dtype=np.int64)
-        mask = np.zeros((1, 1, GH, GW), dtype=np.float32)
-        self_idx = np.zeros((GH * GW,), dtype=np.int64)
-        for r in range(OH):
-            for c in range(OW):
-                self_idx[r * GW + c] = r * GW + c
-                mirror_idx[r * GW + c] = r * GW + (IW - 1 - c)
-                mask[0, 0, r, c] = 1.0
-        inits = [
-            numpy_helper.from_array(np.array([1, 10, GH * GW], dtype=np.int64), 'fs'),
-            numpy_helper.from_array(self_idx, 'self_idx'),
-            numpy_helper.from_array(mirror_idx, 'mirror_idx'),
-            numpy_helper.from_array(np.array([1, 10, GH, GW], dtype=np.int64), 'os'),
-            numpy_helper.from_array(mask, 'mask'),
-        ]
-        nodes = [
-            helper.make_node('Reshape', ['input', 'fs'], ['flat']),
-            helper.make_node('Gather', ['flat', 'self_idx'], ['g_self'], axis=2),
-            helper.make_node('Gather', ['flat', 'mirror_idx'], ['g_mirror'], axis=2),
-            helper.make_node('Add', ['g_self', 'g_mirror'], ['combined']),
-            helper.make_node('Reshape', ['combined', 'os'], ['combined_2d']),
-            helper.make_node('ArgMax', ['combined_2d'], ['am'], axis=1, keepdims=1),
-        ]
-        add_onehot_block(nodes, inits, 'am', 'oh_out')
-        nodes.append(helper.make_node('Mul', ['oh_out', 'mask'], ['output']))
-        return mk(nodes, inits)
-    return None
-def s_extract_inner(td):
-    """Remove N-pixel border frame → smaller output."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    for b in range(1, min(IH, IW) // 2):
-        if OH != IH - 2 * b or OW != IW - 2 * b:
-            continue
-        if all(np.array_equal(inp[b:IH-b, b:IW-b], out) for inp, out in exs):
-            idx = np.zeros((OH, OW, 2), dtype=np.int64)
-            for r in range(OH):
-                for c in range(OW):
-                    idx[r, c] = [r + b, c + b]
-            return _build_gather_model(OH, OW, idx)
-    return None
-def s_add_border(td):
-    """Add constant-color border frame → larger output."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    for b in range(1, 5):
-        if OH != IH + 2 * b or OW != IW + 2 * b:
-            continue
-        if OH > 30 or OW > 30:
-            continue
-        for bc in range(10):
-            ok = True
-            for inp, out in exs:
-                exp = np.full((OH, OW), bc, dtype=np.int64)
-                exp[b:b+IH, b:b+IW] = inp
-                if not np.array_equal(out, exp):
-                    ok = False
-                    break
-            if ok:
-                idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                cst = np.full((OH, OW), -1, dtype=np.int64)
-                for r in range(OH):
-                    for c in range(OW):
-                        if b <= r < b + IH and b <= c < b + IW:
-                            idx[r, c] = [r - b, c - b]
-                        else:
-                            idx[r, c] = [-1, -1]
-                            cst[r, c] = bc
-                return _build_gather_model_with_const(IH, IW, OH, OW, idx, cst)
-    return None
-def s_sparse_fill(td):
-    """Each input pixel becomes an NxN block in output."""
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    for bh in range(2, 10):
-        for bw in range(2, 10):
-            if OH != IH * bh or OW != IW * bw:
-                continue
-            if OH > 30 or OW > 30:
-                continue
-            ok = True
-            for inp, out in exs:
-                exp = np.zeros((OH, OW), dtype=np.int64)
-                for r in range(IH):
-                    for c in range(IW):
-                        exp[r*bh:(r+1)*bh, c*bw:(c+1)*bw] = inp[r, c]
-                if not np.array_equal(out, exp):
-                    ok = False
-                    break
-            if ok:
-                idx = np.zeros((OH, OW, 2), dtype=np.int64)
-                for r in range(OH):
-                    for c in range(OW):
-                        idx[r, c] = [r // bh, c // bw]
-                return _build_gather_model(OH, OW, idx)
-    return None
-def s_channel_filter(td):
-    """Keep only certain colors, rest → background (0)."""
-    from onnx import helper, numpy_helper
-    exs = get_exs(td)
-    sp = fixed_shapes(td)
-    if sp is None:
-        return None
-    (IH, IW), (OH, OW) = sp
-    if (IH, IW) != (OH, OW):
-        return None
-    in_colors = set()
-    out_colors = set()
-    for inp, out in exs:
-        in_colors.update(inp.flatten())
-        out_colors.update(out.flatten())
-    if not (out_colors < in_colors):
-        return None
-    keep = out_colors
-    for inp, out in exs:
-        exp = np.where(np.isin(inp, list(keep)), inp, 0)
-        if not np.array_equal(out, exp):
-            return None
-    ch_mask = np.zeros((1, 10, 1, 1), dtype=np.float32)
-    for c in keep:
-        if 0 <= c < 10:
-            ch_mask[0, c, 0, 0] = 1.0
-    inits = [numpy_helper.from_array(ch_mask, 'ch_mask')]
-    nodes = [helper.make_node('Mul', ['input', 'ch_mask'], ['output'])]
-    return mk(nodes, inits)

neurogolf_solver/submission.py DELETED Viewed

@@ -1,150 +0,0 @@
-#!/usr/bin/env python3
-"""Submission file generation and task running with W&B logging."""
-import os
-import csv
-import io
-import math
-import zipfile
-from collections import Counter
-from .profiler import score_network
-from .constants import MAX_ONNX_FILESIZE, EXCLUDED_TASKS
-try:
-    import wandb
-except ImportError:
-    wandb = None
-def run_tasks(task_nums, tasks, output_dir, providers, conv_budget, excluded_tasks, use_wandb):
-    """Run all tasks and collect results.
-    Returns: (results, costs_dict, total_score)
-    """
-    from .solvers.solver_registry import solve_task
-    results = {}
-    costs_dict = {}
-    total_score = 0
-    for tn in task_nums:
-        if tn not in tasks:
-            continue
-        td = tasks[tn]['data']
-        ok, sname, sz, t_task, model_path = solve_task(
-            tn, td, output_dir, providers, conv_budget, excluded_tasks
-        )
-        if ok:
-            macs, memory, params = score_network(model_path)
-            if macs is None:
-                macs, memory, params = 0, 0, 0
-            cost = macs + memory + params
-            score = max(1.0, 25.0 - math.log(max(1, cost)))
-            total_score += score
-            # Check per-file size limit
-            if sz and sz > MAX_ONNX_FILESIZE:
-                print(f"Task {tn:3d}: {sname:25s} OVER SIZE LIMIT ({sz:,} > {MAX_ONNX_FILESIZE:,})")
-                continue
-            results[tn] = (sname, t_task, sz)
-            costs_dict[tn] = cost
-            print(f"Task {tn:3d}: {sname:25s} {score:7.3f} {cost:>12} {t_task:7.3f}s  ({sz:>8,} bytes)")
-        else:
-            score = 0
-            cost = 0
-            print(f"Task {tn:3d}: UNSOLVED  {t_task:7.3f}s")
-        if use_wandb and wandb is not None:
-            wandb.log({
-                "task_id": tn,
-                "solver": sname if ok else "unsolved",
-                "onnx_bytes": sz if ok else 0,
-                "task_time_sec": t_task,
-                "cost": cost,
-                "score": score,
-            })
-    return results, costs_dict, total_score
-def generate_submission(output_dir, results, costs_dict, active_tasks):
-    """Generate submission.zip and submission.csv.
-    Returns dict with submission info.
-    """
-    n_files = len([f for f in os.listdir(output_dir) if f.endswith('.onnx')])
-    total_size = sum(os.path.getsize(os.path.join(output_dir, f))
-                     for f in os.listdir(output_dir) if f.endswith('.onnx'))
-    # Check per-file size limits
-    oversized = []
-    for f in os.listdir(output_dir):
-        if f.endswith('.onnx'):
-            fsize = os.path.getsize(os.path.join(output_dir, f))
-            if fsize > MAX_ONNX_FILESIZE:
-                oversized.append((f, fsize))
-    # Create submission.zip
-    parent_dir = os.path.dirname(output_dir) or '/kaggle/working/'
-    zip_path = os.path.join(parent_dir, 'submission.zip')
-    buf = io.BytesIO()
-    with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
-        for f in sorted(os.listdir(output_dir)):
-            if f.endswith('.onnx'):
-                zf.write(os.path.join(output_dir, f), f)
-    zip_bytes = buf.getvalue()
-    with open(zip_path, 'wb') as f:
-        f.write(zip_bytes)
-    zip_size = len(zip_bytes)
-    # Create submission.csv
-    csv_path = os.path.join(parent_dir, 'submission.csv')
-    with open(csv_path, 'w', newline='') as f:
-        w = csv.writer(f)
-        w.writerow(['task_id', 'total_cost'])
-        for tn in sorted(costs_dict.keys()):
-            w.writerow([f'task{tn:03d}', costs_dict[tn]])
-    unsolved_count = len(active_tasks) - len(results)
-    total_score = sum(max(1.0, 25.0 - math.log(max(1, cost))) for cost in costs_dict.values())
-    total_cost = sum(costs_dict.values())
-    return {
-        'n_files': n_files,
-        'total_size': total_size,
-        'zip_path': zip_path,
-        'zip_size': zip_size,
-        'csv_path': csv_path,
-        'total_score': total_score,
-        'total_cost': total_cost,
-        'unsolved_count': unsolved_count,
-        'oversized': oversized,
-    }
-def print_summary(results, submission_info, elapsed):
-    """Print summary statistics."""
-    active_count = submission_info['unsolved_count'] + len(results)
-    print(f"\n{'=' * 70}")
-    print(f"Solved: {len(results)}/{active_count} tasks in {elapsed:.0f}s")
-    solver_names = [v[0] for v in results.values()]
-    sc = Counter(solver_names)
-    for s, c in sc.most_common():
-        print(f"  {s}: {c}")
-    print(f"\n{submission_info['n_files']} ONNX files, {submission_info['total_size'] / 1024:.1f} KB uncompressed")
-    print(f"ZIP size: {submission_info['zip_size'] / 1024:.1f} KB")
-    if submission_info['oversized']:
-        print(f"WARNING: {len(submission_info['oversized'])} files exceed 1.44MB limit:")
-        for f, sz in submission_info['oversized']:
-            print(f"  {f}: {sz / 1024:.1f} KB")
-    print(f"\nEstimated LB score: {submission_info['total_score']:.1f}")
-    print(f"Total cost: {submission_info['total_cost']:,}")
-    print(f"Solved: {len(results)} | Unsolved: {submission_info['unsolved_count']}")
-    print(f"Written: {submission_info['zip_path']} | {submission_info['csv_path']}")

neurogolf_solver/validators.py DELETED Viewed

@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""Model validation utilities.
-Validation order (matches Kaggle's verify_network):
-1. File size ≤ 1.44MB
-2. onnx.checker.check_model() — catches malformed graphs
-3. No banned ops (UPPERCASE check to match Kaggle)
-4. All tensor shapes are static (no dynamic dims)
-5. onnxruntime.InferenceSession loads successfully
-6. Correct outputs on train + test + arc-gen
-"""
-import os
-import numpy as np
-import onnx
-import onnxruntime as ort
-from .data_loader import to_onehot
-from .constants import MAX_ARCGEN_VALIDATE, MAX_ONNX_FILESIZE, BANNED_OPS
-_BANNED_OPS_UPPER = {op.upper() for op in BANNED_OPS}
-def check_model_structure(path):
-    """Check ONNX model structure: size, valid graph, no banned ops, static shapes.
-    Returns (ok, error_message)."""
-    # 1. File size
-    try:
-        fsize = os.path.getsize(path)
-    except OSError:
-        return False, f"File not found: {path}"
-    if fsize > MAX_ONNX_FILESIZE:
-        return False, f"File size {fsize} exceeds {MAX_ONNX_FILESIZE} ({fsize/1024:.1f} KB)"
-    # 2. ONNX checker
-    try:
-        model = onnx.load(path)
-        onnx.checker.check_model(model)
-    except Exception as e:
-        return False, f"onnx.checker failed: {e}"
-    # 3. Banned ops (UPPERCASE comparison — matches Kaggle)
-    for node in model.graph.node:
-        if node.op_type.upper() in _BANNED_OPS_UPPER:
-            return False, f"Banned op: {node.op_type}"
-    # 4. Static shapes — all tensors must have fully defined shapes
-    for inp in model.graph.input:
-        if inp.type.HasField('tensor_type'):
-            shape = inp.type.tensor_type.shape
-            if shape:
-                for dim in shape.dim:
-                    if not dim.dim_value and dim.dim_value != 0:
-                        if not dim.dim_param:  # symbolic dim is also not static
-                            pass  # dim_value=0 is valid (means unknown in some contexts)
-                        return False, f"Dynamic shape on input '{inp.name}': {[d.dim_value or d.dim_param for d in shape.dim]}"
-    for out in model.graph.output:
-        if out.type.HasField('tensor_type'):
-            shape = out.type.tensor_type.shape
-            if shape:
-                for dim in shape.dim:
-                    if not dim.dim_value and dim.dim_value != 0:
-                        if not dim.dim_param:
-                            pass
-                        return False, f"Dynamic shape on output '{out.name}': {[d.dim_value or d.dim_param for d in shape.dim]}"
-    return True, None
-def validate(path, td, providers):
-    """Full validation: structure check + correct outputs on all splits.
-    Returns False immediately if any check fails."""
-    # Structure checks first
-    ok, err = check_model_structure(path)
-    if not ok:
-        return False
-    # Load and run inference
-    try:
-        opts = ort.SessionOptions()
-        opts.log_severity_level = 3
-        sess = ort.InferenceSession(path, sess_options=opts, providers=providers)
-    except:
-        return False
-    examples = td['train'] + td['test']
-    if 'arc-gen' in td:
-        examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
-    for ex in examples:
-        inp = to_onehot(ex['input'])
-        exp = to_onehot(ex['output'])
-        try:
-            out = sess.run(['output'], {'input': inp})[0]
-            out = (out > 0.0).astype(np.float32)
-        except:
-            return False
-        if not np.array_equal(out, exp):
-            return False
-    return True
-def validate_raw(raw_bytes, td, providers):
-    """Validate ONNX model from raw bytes."""
-    if len(raw_bytes) > MAX_ONNX_FILESIZE:
-        return False
-    try:
-        opts = ort.SessionOptions()
-        opts.log_severity_level = 3
-        sess = ort.InferenceSession(raw_bytes, sess_options=opts, providers=providers)
-    except:
-        return False
-    examples = td['train'] + td['test']
-    if 'arc-gen' in td:
-        examples = examples + td['arc-gen'][:MAX_ARCGEN_VALIDATE]
-    for ex in examples:
-        inp = to_onehot(ex['input'])
-        exp = to_onehot(ex['output'])
-        try:
-            out = sess.run(['output'], {'input': inp})[0]
-            out = (out > 0.0).astype(np.float32)
-        except:
-            return False
-        if not np.array_equal(out, exp):
-            return False
-    return True