File size: 1,942 Bytes
feb08d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
Merge LLM results with symbolic results to get final solve count.

Usage:
  python merge_results.py arc_results/summary_v4.json llm_results.json
"""
import json
import sys


def merge(symbolic_file: str, llm_file: str, output_file: str = "arc_results/summary_final.json"):
    with open(symbolic_file) as f:
        symbolic = json.load(f)
    with open(llm_file) as f:
        llm = json.load(f)

    symbolic_solved = {r['task_id'] for r in symbolic['results'] if r.get('all_train_solved')}
    llm_solved = {tid for tid, r in llm['results'].items() if r['status'] == 'solved'}

    total_solved = symbolic_solved | llm_solved
    new_from_llm = llm_solved - symbolic_solved

    print(f"Symbolic solved: {len(symbolic_solved)}")
    print(f"LLM solved:      {len(llm_solved)}")
    print(f"New from LLM:    {len(new_from_llm)}")
    print(f"TOTAL SOLVED:    {len(total_solved)}/{symbolic['total_tasks']} ({100*len(total_solved)/symbolic['total_tasks']:.1f}%)")

    print(f"\nNew tasks solved by LLM:")
    for tid in sorted(new_from_llm):
        rule = llm['results'][tid].get('rule', '?')
        print(f"  {tid}: {rule}")

    # Save merged
    merged = {
        'total_tasks': symbolic['total_tasks'],
        'symbolic_solved': len(symbolic_solved),
        'llm_solved': len(llm_solved),
        'new_from_llm': len(new_from_llm),
        'total_solved': len(total_solved),
        'solve_rate': round(100 * len(total_solved) / symbolic['total_tasks'], 2),
        'symbolic_tasks': sorted(symbolic_solved),
        'llm_tasks': sorted(llm_solved),
        'new_llm_tasks': sorted(new_from_llm),
    }
    with open(output_file, 'w') as f:
        json.dump(merged, f, indent=2)
    print(f"\nMerged results saved to {output_file}")


if __name__ == "__main__":
    sym = sys.argv[1] if len(sys.argv) > 1 else "arc_results/summary_v4.json"
    llm = sys.argv[2] if len(sys.argv) > 2 else "llm_results.json"
    merge(sym, llm)