| """ |
| Merge LLM results with symbolic results to get final solve count. |
| |
| Usage: |
| python merge_results.py arc_results/summary_v4.json llm_results.json |
| """ |
| import json |
| import sys |
|
|
|
|
| def merge(symbolic_file: str, llm_file: str, output_file: str = "arc_results/summary_final.json"): |
| with open(symbolic_file) as f: |
| symbolic = json.load(f) |
| with open(llm_file) as f: |
| llm = json.load(f) |
|
|
| symbolic_solved = {r['task_id'] for r in symbolic['results'] if r.get('all_train_solved')} |
| llm_solved = {tid for tid, r in llm['results'].items() if r['status'] == 'solved'} |
|
|
| total_solved = symbolic_solved | llm_solved |
| new_from_llm = llm_solved - symbolic_solved |
|
|
| print(f"Symbolic solved: {len(symbolic_solved)}") |
| print(f"LLM solved: {len(llm_solved)}") |
| print(f"New from LLM: {len(new_from_llm)}") |
| print(f"TOTAL SOLVED: {len(total_solved)}/{symbolic['total_tasks']} ({100*len(total_solved)/symbolic['total_tasks']:.1f}%)") |
|
|
| print(f"\nNew tasks solved by LLM:") |
| for tid in sorted(new_from_llm): |
| rule = llm['results'][tid].get('rule', '?') |
| print(f" {tid}: {rule}") |
|
|
| |
| merged = { |
| 'total_tasks': symbolic['total_tasks'], |
| 'symbolic_solved': len(symbolic_solved), |
| 'llm_solved': len(llm_solved), |
| 'new_from_llm': len(new_from_llm), |
| 'total_solved': len(total_solved), |
| 'solve_rate': round(100 * len(total_solved) / symbolic['total_tasks'], 2), |
| 'symbolic_tasks': sorted(symbolic_solved), |
| 'llm_tasks': sorted(llm_solved), |
| 'new_llm_tasks': sorted(new_from_llm), |
| } |
| with open(output_file, 'w') as f: |
| json.dump(merged, f, indent=2) |
| print(f"\nMerged results saved to {output_file}") |
|
|
|
|
| if __name__ == "__main__": |
| sym = sys.argv[1] if len(sys.argv) > 1 else "arc_results/summary_v4.json" |
| llm = sys.argv[2] if len(sys.argv) > 2 else "llm_results.json" |
| merge(sym, llm) |
|
|