""" Merge LLM results with symbolic results to get final solve count. Usage: python merge_results.py arc_results/summary_v4.json llm_results.json """ import json import sys def merge(symbolic_file: str, llm_file: str, output_file: str = "arc_results/summary_final.json"): with open(symbolic_file) as f: symbolic = json.load(f) with open(llm_file) as f: llm = json.load(f) symbolic_solved = {r['task_id'] for r in symbolic['results'] if r.get('all_train_solved')} llm_solved = {tid for tid, r in llm['results'].items() if r['status'] == 'solved'} total_solved = symbolic_solved | llm_solved new_from_llm = llm_solved - symbolic_solved print(f"Symbolic solved: {len(symbolic_solved)}") print(f"LLM solved: {len(llm_solved)}") print(f"New from LLM: {len(new_from_llm)}") print(f"TOTAL SOLVED: {len(total_solved)}/{symbolic['total_tasks']} ({100*len(total_solved)/symbolic['total_tasks']:.1f}%)") print(f"\nNew tasks solved by LLM:") for tid in sorted(new_from_llm): rule = llm['results'][tid].get('rule', '?') print(f" {tid}: {rule}") # Save merged merged = { 'total_tasks': symbolic['total_tasks'], 'symbolic_solved': len(symbolic_solved), 'llm_solved': len(llm_solved), 'new_from_llm': len(new_from_llm), 'total_solved': len(total_solved), 'solve_rate': round(100 * len(total_solved) / symbolic['total_tasks'], 2), 'symbolic_tasks': sorted(symbolic_solved), 'llm_tasks': sorted(llm_solved), 'new_llm_tasks': sorted(new_from_llm), } with open(output_file, 'w') as f: json.dump(merged, f, indent=2) print(f"\nMerged results saved to {output_file}") if __name__ == "__main__": sym = sys.argv[1] if len(sys.argv) > 1 else "arc_results/summary_v4.json" llm = sys.argv[2] if len(sys.argv) > 2 else "llm_results.json" merge(sym, llm)