""" PEMF ARC-AGI — LLM Program Synthesis via Ollama (Kaggle Edition) ================================================================ Self-contained script for Kaggle GPU notebooks. Pulls a model via Ollama, runs LLM synthesis on unsolved ARC tasks. Usage on Kaggle: 1. Enable GPU (T4 x2 or P100) 2. Enable internet access 3. Upload this file + arc_data/ + already_solved.json 4. Run all cells The script: - Installs Ollama - Pulls the model (qwen2.5-coder:32b or smaller) - Loads ARC tasks - For each unsolved task: generates Python transform(), verifies against training pairs - Saves results to llm_results.json """ import subprocess import sys import os import json import time import re import signal import numpy as np from typing import Dict, List, Optional, Tuple from collections import Counter from pathlib import Path # ============================================================================= # 1. OLLAMA SETUP # ============================================================================= def install_ollama(): """Install Ollama on Kaggle/Linux.""" print("Installing Ollama...") subprocess.run("curl -fsSL https://ollama.com/install.sh | sh", shell=True, check=True, capture_output=True) print("Ollama installed.") def start_ollama(): """Start Ollama server in background.""" print("Starting Ollama server...") proc = subprocess.Popen( ["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) time.sleep(3) # Wait for server to start print(f"Ollama server started (PID {proc.pid})") return proc def pull_model(model_name: str): """Pull a model via Ollama.""" print(f"Pulling model {model_name}... (this may take several minutes)") result = subprocess.run( ["ollama", "pull", model_name], capture_output=True, text=True, timeout=1800 ) if result.returncode != 0: print(f"Pull failed: {result.stderr}") raise RuntimeError(f"Failed to pull {model_name}") print(f"Model {model_name} ready.") def call_ollama(prompt: str, model: str = "qwen2.5-coder:32b", temperature: float = 0.7, timeout_s: int = 120) -> str: """Call Ollama API and return response text.""" import urllib.request payload = { "model": model, "prompt": prompt, "stream": False, "options": { "temperature": temperature, "num_predict": 2048, } } data = json.dumps(payload).encode('utf-8') req = urllib.request.Request( "http://localhost:11434/api/generate", data=data, headers={"Content-Type": "application/json"}, method='POST' ) try: with urllib.request.urlopen(req, timeout=timeout_s) as resp: result = json.loads(resp.read().decode()) return result.get('response', '') except Exception as e: return f"ERROR: {e}" # ============================================================================= # 2. PROMPT BUILDING # ============================================================================= def build_prompt(task: Dict) -> str: """Build prompt for ARC task.""" train_pairs = task.get('train', []) examples = [] for i, pair in enumerate(train_pairs): examples.append( f"Example {i+1}:\n" f" Input: {json.dumps(pair['input'])}\n" f" Output: {json.dumps(pair['output'])}" ) examples_str = "\n".join(examples) # Basic analysis inputs = [np.array(p['input']) for p in train_pairs] outputs = [np.array(p['output']) for p in train_pairs] same_shape = all(i.shape == o.shape for i, o in zip(inputs, outputs)) in_colors = sorted(set(c for i in inputs for c in np.unique(i).tolist())) out_colors = sorted(set(c for o in outputs for c in np.unique(o).tolist())) analysis = f" Same input/output shape: {same_shape}\n" analysis += f" Input colors: {in_colors}\n" analysis += f" Output colors: {out_colors}\n" if not same_shape: ratios = [(o.shape[0]/i.shape[0], o.shape[1]/i.shape[1]) for i, o in zip(inputs, outputs)] analysis += f" Shape ratios (h,w): {ratios}\n" prompt = f"""Solve this ARC-AGI puzzle. Write ONLY a Python function, no explanations. {examples_str} Analysis: {analysis} Write a complete Python function that transforms any input grid to its output. The function MUST work correctly for ALL examples above. ```python import numpy as np from collections import Counter def transform(grid: list[list[int]]) -> list[list[int]]: grid = np.array(grid) """ return prompt # ============================================================================= # 3. CODE EXTRACTION AND VERIFICATION # ============================================================================= def extract_code(response: str) -> Optional[str]: """Extract Python function from LLM response.""" # Try ```python blocks for pattern in [r'```python\s*(.*?)```', r'```\s*(.*?)```']: matches = re.findall(pattern, response, re.DOTALL) for match in matches: if 'def transform' in match: return match.strip() # Try finding def transform directly idx = response.find('def transform') if idx >= 0: # Look backwards for imports before = response[:idx] import_start = before.rfind('import ') if import_start >= 0: code = response[import_start:] else: code = response[idx:] # Trim at next ``` or double newline after function ends end = code.find('```') if end > 0: code = code[:end] return code.strip() # If response itself looks like code (starts with import or def) stripped = response.strip() if stripped.startswith('import') or stripped.startswith('def transform'): return stripped return None def verify_program(code: str, train_pairs: List[Dict]) -> bool: """Execute program and verify against all training pairs.""" namespace = {'np': np, 'numpy': np, 'Counter': Counter, 'collections': __import__('collections')} try: exec(code, namespace) except Exception: return False if 'transform' not in namespace: return False transform_fn = namespace['transform'] for pair in train_pairs: try: inp = [row[:] for row in pair['input']] # deep copy result = transform_fn(inp) if result is None: return False result_arr = np.array(result, dtype=int) expected_arr = np.array(pair['output'], dtype=int) if result_arr.shape != expected_arr.shape: return False if not np.array_equal(result_arr, expected_arr): return False except Exception: return False return True def apply_program(code: str, test_input: List[List[int]]) -> Optional[List[List[int]]]: """Apply verified program to test input.""" namespace = {'np': np, 'numpy': np, 'Counter': Counter, 'collections': __import__('collections')} try: exec(code, namespace) result = namespace['transform']([row[:] for row in test_input]) if result is not None: return [list(row) for row in np.array(result, dtype=int).tolist()] except Exception: pass return None # ============================================================================= # 4. SYNTHESIS ENGINE # ============================================================================= def synthesize_task(task: Dict, model: str = "qwen2.5-coder:32b", n_candidates: int = 8, verbose: bool = False) -> Optional[Tuple[str, str]]: """ Try to solve a task via LLM. Returns (rule_name, code) if successful, None otherwise. """ train_pairs = task.get('train', []) if not train_pairs: return None prompt = build_prompt(task) for i in range(n_candidates): temp = 0.1 if i == 0 else 0.5 + 0.1 * i # first try low temp, then increase response = call_ollama(prompt, model=model, temperature=min(temp, 1.0)) if response.startswith("ERROR:"): if verbose: print(f" Candidate {i+1}: API error") continue code = extract_code(response) if code is None: if verbose: print(f" Candidate {i+1}: No code extracted") continue if verbose: print(f" Candidate {i+1}: {len(code)} chars", end="") if verify_program(code, train_pairs): if verbose: print(f" ✅") return (f"llm_c{i+1}_t{temp:.1f}", code) else: if verbose: print(f" ❌") return None # ============================================================================= # 5. MAIN RUNNER # ============================================================================= def main(): # --- Configuration --- MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:32b") # For smaller GPUs, use: # MODEL = "qwen2.5-coder:14b" (fits T4 16GB) # MODEL = "qwen2.5-coder:7b" (fits any GPU) N_CANDIDATES = int(os.environ.get("N_CANDIDATES", "8")) ARC_DIR = os.environ.get("ARC_DIR", "arc_data/training") ALREADY_SOLVED_FILE = os.environ.get("ALREADY_SOLVED", "already_solved.json") OUTPUT_FILE = os.environ.get("OUTPUT_FILE", "llm_results.json") print("=" * 60) print("PEMF ARC-AGI — LLM Program Synthesis (Kaggle/Ollama)") print("=" * 60) print(f"Model: {MODEL}") print(f"Candidates per task: {N_CANDIDATES}") print(f"ARC data: {ARC_DIR}") print() # --- Install & start Ollama --- try: subprocess.run(["ollama", "--version"], capture_output=True, check=True) print("Ollama already installed.") except (FileNotFoundError, subprocess.CalledProcessError): install_ollama() server = start_ollama() try: pull_model(MODEL) except Exception as e: print(f"Failed to pull {MODEL}: {e}") print("Trying smaller model...") MODEL = "qwen2.5-coder:7b" pull_model(MODEL) # --- Load already solved tasks --- already_solved = set() if os.path.exists(ALREADY_SOLVED_FILE): with open(ALREADY_SOLVED_FILE) as f: already_solved = set(json.load(f)) print(f"Already solved (symbolic): {len(already_solved)} tasks") # --- Load ARC tasks --- import glob task_files = sorted(glob.glob(os.path.join(ARC_DIR, "*.json"))) print(f"Total ARC tasks: {len(task_files)}") unsolved_files = [] for tf in task_files: tid = os.path.basename(tf).replace('.json', '') if tid not in already_solved: unsolved_files.append((tid, tf)) print(f"Unsolved tasks to try: {len(unsolved_files)}") print() # --- Run synthesis --- results = {} solved = 0 total_time = 0 for idx, (tid, tf) in enumerate(unsolved_files): with open(tf) as f: task = json.load(f) print(f"[{idx+1:3d}/{len(unsolved_files)}] {tid}:", end=" ", flush=True) start = time.time() result = synthesize_task(task, model=MODEL, n_candidates=N_CANDIDATES, verbose=False) elapsed = time.time() - start total_time += elapsed if result: rule_name, code = result solved += 1 # Apply to test pairs test_outputs = [] for test in task.get('test', []): out = apply_program(code, test['input']) test_outputs.append(out) results[tid] = { 'status': 'solved', 'rule': rule_name, 'code': code, 'test_outputs': test_outputs, 'time_s': round(elapsed, 2), } print(f"✅ {rule_name} ({elapsed:.1f}s)") else: results[tid] = { 'status': 'failed', 'time_s': round(elapsed, 2), } print(f"❌ ({elapsed:.1f}s)") # Save progress periodically if (idx + 1) % 10 == 0: with open(OUTPUT_FILE, 'w') as f: json.dump({ 'model': MODEL, 'n_candidates': N_CANDIDATES, 'solved': solved, 'attempted': idx + 1, 'total_time_s': round(total_time, 1), 'results': results, }, f, indent=2) print(f" [Progress saved: {solved}/{idx+1} solved]") # --- Final save --- with open(OUTPUT_FILE, 'w') as f: json.dump({ 'model': MODEL, 'n_candidates': N_CANDIDATES, 'solved': solved, 'attempted': len(unsolved_files), 'total_time_s': round(total_time, 1), 'already_solved_symbolic': len(already_solved), 'total_solved': len(already_solved) + solved, 'total_tasks': len(task_files), 'solve_rate': round(100 * (len(already_solved) + solved) / len(task_files), 2), 'results': results, }, f, indent=2) # --- Summary --- print() print("=" * 60) print("FINAL RESULTS") print("=" * 60) print(f"LLM solved: {solved}/{len(unsolved_files)} unsolved tasks") print(f"Symbolic solved: {len(already_solved)}") print(f"TOTAL SOLVED: {len(already_solved) + solved}/{len(task_files)} ({100*(len(already_solved)+solved)/len(task_files):.1f}%)") print(f"Total LLM time: {total_time:.0f}s ({total_time/max(1,len(unsolved_files)):.1f}s/task)") print(f"Results saved to: {OUTPUT_FILE}") # Cleanup server.terminate() # ============================================================================= # 6. GENERATE already_solved.json FROM SYMBOLIC RESULTS # ============================================================================= def generate_already_solved(summary_file: str, output_file: str = "already_solved.json"): """ Generate already_solved.json from a v4 summary file. Run this BEFORE running on Kaggle. """ with open(summary_file) as f: data = json.load(f) solved = [r['task_id'] for r in data['results'] if r.get('all_train_solved')] with open(output_file, 'w') as f: json.dump(solved, f) print(f"Wrote {len(solved)} solved task IDs to {output_file}") if __name__ == "__main__": # If run with --generate-solved, create the already_solved.json if len(sys.argv) > 1 and sys.argv[1] == "--generate-solved": summary = sys.argv[2] if len(sys.argv) > 2 else "arc_results/summary_v4.json" generate_already_solved(summary) else: main()