ARC-AGI / pemf /scripts /llm_solver_cloud.py
Roger MT
move fles into pemf folder
feb08d1
"""
PEMF ARC-AGI — LLM Program Synthesis (Multi-Provider)
=====================================================
Supports:
- NVIDIA NIM (free — DeepSeek V4 Pro, GLM-4, Qwen, Llama)
- Google Gemini (free tier: 15 RPM)
- DeepSeek direct API (very cheap)
- GLM/Zhipu direct API (free tier)
- Ollama local (any model)
Usage:
# NVIDIA NIM — FREE, best option (GLM 4.7 default)
export LLM_PROVIDER=nvidia
export NVIDIA_API_KEY=nvapi-xxxxx
python llm_solver_cloud.py
# Get key: https://build.nvidia.com/settings/api-keys
# Default model: z-ai/glm4.7
# NVIDIA NIM with DeepSeek V4
export LLM_PROVIDER=nvidia
export NVIDIA_API_KEY=nvapi-xxxxx
export LLM_MODEL=deepseek-ai/deepseek-v4-pro
python llm_solver_cloud.py
# Gemini (free)
export LLM_PROVIDER=gemini
export GEMINI_API_KEY=your_key
python llm_solver_cloud.py
# Ollama local
export LLM_PROVIDER=ollama
export OLLAMA_MODEL=qwen2.5-coder:32b
python llm_solver_cloud.py
"""
import os
import sys
import json
import time
import re
import glob
import numpy as np
from typing import Dict, List, Optional, Tuple
from collections import Counter
import urllib.request
# =============================================================================
# PROVIDER CONFIGS
# =============================================================================
PROVIDERS = {
"nvidia": {
"name": "NVIDIA NIM (free — DeepSeek V4, GLM 4.7, Qwen, Llama)",
"base_url": "https://integrate.api.nvidia.com/v1/chat/completions",
"default_model": "z-ai/glm4.7",
"env_key": "NVIDIA_API_KEY",
"free_tier": "Free for NVIDIA Developer Program members",
"get_key_url": "https://build.nvidia.com/settings/api-keys",
"models": {
"glm4.7": "z-ai/glm4.7",
"deepseek-v4": "deepseek-ai/deepseek-v4-pro",
},
},
"gemini": {
"name": "Google Gemini",
"base_url": "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent",
"default_model": "gemini-2.0-flash",
"env_key": "GEMINI_API_KEY",
"free_tier": "15 RPM, 1M tokens/day",
"get_key_url": "https://aistudio.google.com/apikey",
},
"deepseek": {
"name": "DeepSeek (direct API)",
"base_url": "https://api.deepseek.com/v1/chat/completions",
"default_model": "deepseek-chat",
"env_key": "DEEPSEEK_API_KEY",
"free_tier": "$0.07/M input, $0.27/M output",
"get_key_url": "https://platform.deepseek.com/api_keys",
},
"glm": {
"name": "GLM (Zhipu AI direct)",
"base_url": "https://open.bigmodel.cn/api/paas/v4/chat/completions",
"default_model": "glm-4-flash",
"env_key": "GLM_API_KEY",
"free_tier": "glm-4-flash is free",
"get_key_url": "https://open.bigmodel.cn/usercenter/apikeys",
},
"ollama": {
"name": "Ollama (local)",
"base_url": "http://localhost:11434/api/generate",
"default_model": "qwen2.5-coder:32b",
"env_key": None,
},
}
# =============================================================================
# API CALLERS
# =============================================================================
def call_nvidia(prompt: str, api_key: str, model: str = "deepseek-ai/deepseek-v4-pro",
temperature: float = 0.7) -> str:
"""Call NVIDIA NIM API (OpenAI-compatible). Hosts DeepSeek V4, GLM, Qwen, Llama."""
url = "https://integrate.api.nvidia.com/v1/chat/completions"
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2048,
"temperature": temperature,
}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"},
method='POST')
try:
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read().decode())
return result['choices'][0]['message']['content']
except Exception as e:
return f"ERROR: {e}"
def call_gemini(prompt: str, api_key: str, model: str = "gemini-2.0-flash",
temperature: float = 0.7) -> str:
"""Call Google Gemini API."""
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {
"temperature": temperature,
"maxOutputTokens": 2048,
}
}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json"},
method='POST')
try:
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read().decode())
candidates = result.get('candidates', [])
if candidates:
parts = candidates[0].get('content', {}).get('parts', [])
if parts:
return parts[0].get('text', '')
return "ERROR: No response content"
except Exception as e:
return f"ERROR: {e}"
def call_deepseek(prompt: str, api_key: str, model: str = "deepseek-chat",
temperature: float = 0.7) -> str:
"""Call DeepSeek API (OpenAI-compatible)."""
url = "https://api.deepseek.com/v1/chat/completions"
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2048,
"temperature": temperature,
}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"},
method='POST')
try:
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read().decode())
return result['choices'][0]['message']['content']
except Exception as e:
return f"ERROR: {e}"
def call_glm(prompt: str, api_key: str, model: str = "glm-4-flash",
temperature: float = 0.7) -> str:
"""Call GLM/Zhipu API (OpenAI-compatible)."""
url = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2048,
"temperature": temperature,
}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"},
method='POST')
try:
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read().decode())
return result['choices'][0]['message']['content']
except Exception as e:
return f"ERROR: {e}"
def call_ollama(prompt: str, model: str = "qwen2.5-coder:32b",
temperature: float = 0.7) -> str:
"""Call local Ollama."""
url = "http://localhost:11434/api/generate"
payload = {
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature, "num_predict": 2048},
}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json"},
method='POST')
try:
with urllib.request.urlopen(req, timeout=180) as resp:
result = json.loads(resp.read().decode())
return result.get('response', '')
except Exception as e:
return f"ERROR: {e}"
def call_llm(prompt: str, provider: str, api_key: str = "",
model: str = "", temperature: float = 0.7) -> str:
"""Unified LLM caller."""
if provider == "nvidia":
return call_nvidia(prompt, api_key, model or "deepseek-ai/deepseek-v4-pro", temperature)
elif provider == "gemini":
return call_gemini(prompt, api_key, model or "gemini-2.0-flash", temperature)
elif provider == "deepseek":
return call_deepseek(prompt, api_key, model or "deepseek-chat", temperature)
elif provider == "glm":
return call_glm(prompt, api_key, model or "glm-4-flash", temperature)
elif provider == "ollama":
return call_ollama(prompt, model or "qwen2.5-coder:32b", temperature)
else:
return f"ERROR: Unknown provider {provider}"
# =============================================================================
# PROMPT, EXTRACTION, VERIFICATION (same as before)
# =============================================================================
def build_prompt(task: Dict) -> str:
train_pairs = task.get('train', [])
examples = []
for i, pair in enumerate(train_pairs):
examples.append(
f"Example {i+1}:\n"
f" Input: {json.dumps(pair['input'])}\n"
f" Output: {json.dumps(pair['output'])}"
)
examples_str = "\n".join(examples)
inputs = [np.array(p['input']) for p in train_pairs]
outputs = [np.array(p['output']) for p in train_pairs]
same_shape = all(i.shape == o.shape for i, o in zip(inputs, outputs))
in_colors = sorted(set(c for i in inputs for c in np.unique(i).tolist()))
out_colors = sorted(set(c for o in outputs for c in np.unique(o).tolist()))
analysis = f" Same input/output shape: {same_shape}\n"
analysis += f" Input colors: {in_colors}, Output colors: {out_colors}\n"
if not same_shape:
for i, o in zip(inputs[:1], outputs[:1]):
analysis += f" Shape: {i.shape} -> {o.shape}\n"
return f"""Solve this ARC-AGI puzzle. Write ONLY a Python function, no explanations.
{examples_str}
Analysis:
{analysis}
```python
import numpy as np
from collections import Counter, deque
from scipy.ndimage import label
def transform(grid: list[list[int]]) -> list[list[int]]:
grid = np.array(grid)
"""
def extract_code(response: str) -> Optional[str]:
for pattern in [r'```python\s*(.*?)```', r'```\s*(.*?)```']:
matches = re.findall(pattern, response, re.DOTALL)
for match in matches:
if 'def transform' in match:
return match.strip()
idx = response.find('def transform')
if idx >= 0:
before = response[:idx]
import_start = max(before.rfind('import '), before.rfind('from '))
start = import_start if import_start >= 0 else idx
code = response[start:]
end = code.find('```')
if end > 0:
code = code[:end]
return code.strip()
stripped = response.strip()
if stripped.startswith(('import', 'def transform', 'from')):
return stripped
return None
def verify_program(code: str, train_pairs: List[Dict]) -> bool:
namespace = {'np': np, 'numpy': np, 'Counter': Counter,
'deque': __import__('collections').deque}
try:
# Allow scipy import in generated code
try:
import scipy.ndimage
namespace['scipy'] = __import__('scipy')
except ImportError:
pass
exec(code, namespace)
except Exception:
return False
if 'transform' not in namespace:
return False
fn = namespace['transform']
for pair in train_pairs:
try:
result = fn([row[:] for row in pair['input']])
if result is None:
return False
r = np.array(result, dtype=int)
e = np.array(pair['output'], dtype=int)
if r.shape != e.shape or not np.array_equal(r, e):
return False
except Exception:
return False
return True
def apply_program(code: str, test_input):
namespace = {'np': np, 'numpy': np, 'Counter': Counter,
'deque': __import__('collections').deque}
try:
import scipy.ndimage
namespace['scipy'] = __import__('scipy')
except ImportError:
pass
try:
exec(code, namespace)
result = namespace['transform']([row[:] for row in test_input])
if result is not None:
return np.array(result, dtype=int).tolist()
except Exception:
pass
return None
# =============================================================================
# SYNTHESIS + MAIN
# =============================================================================
def synthesize_task(task, provider, api_key, model, n_candidates=8, verbose=False):
prompt = build_prompt(task)
for i in range(n_candidates):
temp = 0.1 if i == 0 else min(0.4 + 0.15 * i, 1.2)
response = call_llm(prompt, provider, api_key, model, temp)
if response.startswith("ERROR:"):
if verbose: print(f" C{i+1}: {response[:60]}")
# Rate limit — wait and retry
if "429" in response or "rate" in response.lower():
time.sleep(5)
continue
code = extract_code(response)
if code is None:
if verbose: print(f" C{i+1}: no code")
continue
if verbose: print(f" C{i+1}: {len(code)}ch", end="")
if verify_program(code, task['train']):
if verbose: print(" ✅")
return (f"llm_c{i+1}", code)
else:
if verbose: print(" ❌")
return None
def main():
PROVIDER = os.environ.get("LLM_PROVIDER", "gemini")
config = PROVIDERS.get(PROVIDER, {})
API_KEY = os.environ.get(config.get("env_key", ""), "") if config.get("env_key") else ""
MODEL = os.environ.get("LLM_MODEL", config.get("default_model", ""))
N_CANDIDATES = int(os.environ.get("N_CANDIDATES", "8"))
ARC_DIR = os.environ.get("ARC_DIR", "arc_data/training")
ALREADY_SOLVED = os.environ.get("ALREADY_SOLVED", "already_solved.json")
OUTPUT = os.environ.get("OUTPUT_FILE", "llm_results.json")
print("=" * 60)
print(f"PEMF ARC-AGI — LLM Synthesis ({config.get('name', PROVIDER)})")
print("=" * 60)
print(f"Provider: {PROVIDER}")
print(f"Model: {MODEL}")
print(f"Candidates/task: {N_CANDIDATES}")
if not API_KEY and PROVIDER != "ollama":
print(f"\n⚠️ No API key! Set {config.get('env_key', '???')}")
print(f" Get key: {config.get('get_key_url', '?')}")
return
print()
# Load already solved
already_solved = set()
if os.path.exists(ALREADY_SOLVED):
with open(ALREADY_SOLVED) as f:
already_solved = set(json.load(f))
print(f"Symbolic solved: {len(already_solved)}")
# Load tasks
task_files = sorted(glob.glob(os.path.join(ARC_DIR, "*.json")))
unsolved = [(os.path.basename(tf).replace('.json',''), tf)
for tf in task_files
if os.path.basename(tf).replace('.json','') not in already_solved]
print(f"Total tasks: {len(task_files)}, unsolved: {len(unsolved)}")
print()
# Run
results = {}
solved = 0
total_time = 0
for idx, (tid, tf) in enumerate(unsolved):
with open(tf) as f:
task = json.load(f)
print(f"[{idx+1:3d}/{len(unsolved)}] {tid}:", end=" ", flush=True)
start = time.time()
result = synthesize_task(task, PROVIDER, API_KEY, MODEL, N_CANDIDATES, verbose=False)
elapsed = time.time() - start
total_time += elapsed
if result:
rule, code = result
solved += 1
test_outputs = [apply_program(code, t['input']) for t in task.get('test', [])]
results[tid] = {'status': 'solved', 'rule': rule, 'code': code,
'test_outputs': test_outputs, 'time_s': round(elapsed, 2)}
print(f"✅ ({elapsed:.1f}s)")
else:
results[tid] = {'status': 'failed', 'time_s': round(elapsed, 2)}
print(f"❌ ({elapsed:.1f}s)")
# Rate limit respect
if PROVIDER == "gemini":
time.sleep(4) # 15 RPM = 1 every 4s
elif PROVIDER == "nvidia":
time.sleep(2) # NIM free tier: ~30 RPM
elif PROVIDER in ("deepseek", "glm"):
time.sleep(1)
# Save every 10
if (idx + 1) % 10 == 0:
_save(OUTPUT, PROVIDER, MODEL, N_CANDIDATES, solved, idx+1,
total_time, already_solved, len(task_files), results)
print(f" [Saved: {solved}/{idx+1}, total {len(already_solved)+solved}/{len(task_files)}]")
# Final save
_save(OUTPUT, PROVIDER, MODEL, N_CANDIDATES, solved, len(unsolved),
total_time, already_solved, len(task_files), results)
print(f"\n{'='*60}")
print(f"LLM solved: {solved}/{len(unsolved)}")
print(f"Symbolic: {len(already_solved)}")
print(f"TOTAL: {len(already_solved)+solved}/{len(task_files)} ({100*(len(already_solved)+solved)/len(task_files):.1f}%)")
print(f"Saved: {OUTPUT}")
def _save(path, provider, model, n_cand, solved, attempted, total_time,
already_solved, total_tasks, results):
with open(path, 'w') as f:
json.dump({
'provider': provider, 'model': model, 'n_candidates': n_cand,
'llm_solved': solved, 'attempted': attempted,
'total_time_s': round(total_time, 1),
'symbolic_solved': len(already_solved),
'total_solved': len(already_solved) + solved,
'total_tasks': total_tasks,
'solve_rate': round(100*(len(already_solved)+solved)/total_tasks, 2),
'results': results,
}, f, indent=2)
if __name__ == "__main__":
main()