""" Run the PEMF solver on all ARC-AGI tasks and report solve rates. For each task, the solver tries every training pair. A task is "solved" if the solver achieves σ=0 on ALL training pairs. Usage: 1. Download the ARC dataset into arc_data/training/: git clone https://github.com/fchollet/ARC-AGI.git /tmp/arc cp -r /tmp/arc/data/training arc_data/training 2. Run: python scripts/run_all_arc.py Outputs: arc_results/summary.json — per-task results arc_results/report.txt — human-readable report """ import os, json, time, glob import numpy as np from itt_solver.solver_core import initialize_potential, sigma_l1 from itt_solver.beam_logging import beam_minimize_with_log from itt_solver.experiment_driver import default_atomic_factory ARC_DIR = os.environ.get("ARC_DIR", "arc_data/training") OUT_DIR = os.environ.get("OUT_DIR", "arc_results") os.makedirs(OUT_DIR, exist_ok=True) PARAMS = { 'beam_width': 8, 'max_depth': 2, 'lock_coeff': 0.0, 'max_fraction': 1.0, 'use_symmetry': True, 'use_gravity': True, 'use_color_ops': True, 'boundary_source': 'target', } def solve_pair(inp, out, params): """Run solver on one input→output pair. Returns (sigma, transform_name, time_s).""" h, w = len(out), len(out[0]) task = { 'name': 'pair', 'input': inp, 'target': out, 'target_shape': (h, w), } atomic_lib = default_atomic_factory(params, task) phi_in = initialize_potential(inp) phi_target = initialize_potential(out) start = time.time() T_best, phi_best, states, sigmas, logs = beam_minimize_with_log( phi_in, phi_target, atomic_lib, beam_width=params['beam_width'], max_depth=params['max_depth'], lock_coeff=params['lock_coeff'], max_fraction=params['max_fraction'], allowed_symbols=list(range(10)), enable_layer_minus_one=False, boundary_source=params['boundary_source'], ) elapsed = time.time() - start final_sigma = float(sigmas[-1]) if sigmas else float('inf') return final_sigma, repr(T_best), elapsed def run_all(): task_files = sorted(glob.glob(os.path.join(ARC_DIR, "*.json"))) print(f"Running solver on {len(task_files)} ARC training tasks...") print(f"Params: beam_width={PARAMS['beam_width']}, max_depth={PARAMS['max_depth']}") print() results = [] solved_count = 0 partial_count = 0 total_time = 0 for ti, tf in enumerate(task_files): task_id = os.path.basename(tf).replace('.json', '') with open(tf) as fh: task_data = json.load(fh) train_pairs = task_data.get('train', []) test_pairs = task_data.get('test', []) pair_results = [] all_zero = True best_sigma = float('inf') best_transform = None for pi, pair in enumerate(train_pairs): sigma, transform, elapsed = solve_pair(pair['input'], pair['output'], PARAMS) total_time += elapsed pair_results.append({ 'pair': pi, 'sigma': sigma, 'transform': transform, 'time_s': round(elapsed, 4), }) if sigma > 0: all_zero = False if sigma < best_sigma: best_sigma = sigma best_transform = transform test_results = [] test_solved = None for pi, pair in enumerate(test_pairs): if 'output' in pair: sigma, transform, elapsed = solve_pair(pair['input'], pair['output'], PARAMS) total_time += elapsed test_results.append({ 'pair': pi, 'sigma': sigma, 'transform': transform, 'time_s': round(elapsed, 4), }) if test_solved is None: test_solved = True if sigma > 0: test_solved = False status = "SOLVED" if all_zero else "PARTIAL" if best_sigma < float('inf') and best_sigma > 0 else "FAILED" if all_zero: solved_count += 1 elif best_sigma < float('inf'): partial_count += 1 results.append({ 'task_id': task_id, 'status': status, 'train_pairs': len(train_pairs), 'all_train_solved': all_zero, 'best_sigma': best_sigma, 'best_transform': best_transform, 'pair_results': pair_results, 'test_results': test_results, 'test_solved': test_solved, }) if (ti + 1) % 20 == 0 or all_zero: marker = "✅" if all_zero else " " print(f"[{ti+1:3d}/{len(task_files)}] {task_id}: {status} (best σ={best_sigma:.1f}) {marker}") failed_count = len(task_files) - solved_count - partial_count print(f"\n{'='*60}") print(f"RESULTS: {len(task_files)} tasks") print(f" SOLVED (σ=0 all train pairs): {solved_count} ({100*solved_count/len(task_files):.1f}%)") print(f" PARTIAL (σ>0 but finite): {partial_count}") print(f" FAILED: {failed_count}") print(f" Total time: {total_time:.1f}s ({total_time/len(task_files):.2f}s/task)") summary = { 'total_tasks': len(task_files), 'solved': solved_count, 'partial': partial_count, 'failed': failed_count, 'solve_rate': round(100 * solved_count / len(task_files), 2), 'params': PARAMS, 'total_time_s': round(total_time, 2), 'results': results, } with open(os.path.join(OUT_DIR, 'summary.json'), 'w') as fh: json.dump(summary, fh, indent=2) solved_tasks = [r for r in results if r['all_train_solved']] print(f"\nSolved tasks:") for r in solved_tasks: print(f" {r['task_id']}: {r['best_transform']}") partial_tasks = sorted( [r for r in results if not r['all_train_solved'] and r['best_sigma'] < float('inf')], key=lambda r: r['best_sigma'] ) print(f"\nTop 20 closest-to-solving:") for r in partial_tasks[:20]: print(f" {r['task_id']}: σ={r['best_sigma']:.1f} ({r['best_transform']})") with open(os.path.join(OUT_DIR, 'report.txt'), 'w') as fh: fh.write(f"PEMF Solver — ARC-AGI Training Set Results\n{'='*60}\n") fh.write(f"Total tasks: {len(task_files)}\n") fh.write(f"Solved: {solved_count} ({100*solved_count/len(task_files):.1f}%)\n") fh.write(f"Partial: {partial_count}\nFailed: {failed_count}\n") fh.write(f"Time: {total_time:.1f}s\n\n") fh.write(f"Params: {json.dumps(PARAMS, indent=2)}\n\n") fh.write(f"Solved tasks:\n") for r in solved_tasks: fh.write(f" {r['task_id']}: {r['best_transform']}\n") print(f"\nResults saved to {OUT_DIR}/") if __name__ == '__main__': run_all()