rogermt commited on
Commit
ec20fec
·
verified ·
1 Parent(s): b9f62c4

Add scripts/merge_results.py

Browse files
Files changed (1) hide show
  1. scripts/merge_results.py +53 -0
scripts/merge_results.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Merge LLM results with symbolic results to get final solve count.
3
+
4
+ Usage:
5
+ python merge_results.py arc_results/summary_v4.json llm_results.json
6
+ """
7
+ import json
8
+ import sys
9
+
10
+
11
+ def merge(symbolic_file: str, llm_file: str, output_file: str = "arc_results/summary_final.json"):
12
+ with open(symbolic_file) as f:
13
+ symbolic = json.load(f)
14
+ with open(llm_file) as f:
15
+ llm = json.load(f)
16
+
17
+ symbolic_solved = {r['task_id'] for r in symbolic['results'] if r.get('all_train_solved')}
18
+ llm_solved = {tid for tid, r in llm['results'].items() if r['status'] == 'solved'}
19
+
20
+ total_solved = symbolic_solved | llm_solved
21
+ new_from_llm = llm_solved - symbolic_solved
22
+
23
+ print(f"Symbolic solved: {len(symbolic_solved)}")
24
+ print(f"LLM solved: {len(llm_solved)}")
25
+ print(f"New from LLM: {len(new_from_llm)}")
26
+ print(f"TOTAL SOLVED: {len(total_solved)}/{symbolic['total_tasks']} ({100*len(total_solved)/symbolic['total_tasks']:.1f}%)")
27
+
28
+ print(f"\nNew tasks solved by LLM:")
29
+ for tid in sorted(new_from_llm):
30
+ rule = llm['results'][tid].get('rule', '?')
31
+ print(f" {tid}: {rule}")
32
+
33
+ # Save merged
34
+ merged = {
35
+ 'total_tasks': symbolic['total_tasks'],
36
+ 'symbolic_solved': len(symbolic_solved),
37
+ 'llm_solved': len(llm_solved),
38
+ 'new_from_llm': len(new_from_llm),
39
+ 'total_solved': len(total_solved),
40
+ 'solve_rate': round(100 * len(total_solved) / symbolic['total_tasks'], 2),
41
+ 'symbolic_tasks': sorted(symbolic_solved),
42
+ 'llm_tasks': sorted(llm_solved),
43
+ 'new_llm_tasks': sorted(new_from_llm),
44
+ }
45
+ with open(output_file, 'w') as f:
46
+ json.dump(merged, f, indent=2)
47
+ print(f"\nMerged results saved to {output_file}")
48
+
49
+
50
+ if __name__ == "__main__":
51
+ sym = sys.argv[1] if len(sys.argv) > 1 else "arc_results/summary_v4.json"
52
+ llm = sys.argv[2] if len(sys.argv) > 2 else "llm_results.json"
53
+ merge(sym, llm)