File size: 1,942 Bytes
feb08d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | """
Merge LLM results with symbolic results to get final solve count.
Usage:
python merge_results.py arc_results/summary_v4.json llm_results.json
"""
import json
import sys
def merge(symbolic_file: str, llm_file: str, output_file: str = "arc_results/summary_final.json"):
with open(symbolic_file) as f:
symbolic = json.load(f)
with open(llm_file) as f:
llm = json.load(f)
symbolic_solved = {r['task_id'] for r in symbolic['results'] if r.get('all_train_solved')}
llm_solved = {tid for tid, r in llm['results'].items() if r['status'] == 'solved'}
total_solved = symbolic_solved | llm_solved
new_from_llm = llm_solved - symbolic_solved
print(f"Symbolic solved: {len(symbolic_solved)}")
print(f"LLM solved: {len(llm_solved)}")
print(f"New from LLM: {len(new_from_llm)}")
print(f"TOTAL SOLVED: {len(total_solved)}/{symbolic['total_tasks']} ({100*len(total_solved)/symbolic['total_tasks']:.1f}%)")
print(f"\nNew tasks solved by LLM:")
for tid in sorted(new_from_llm):
rule = llm['results'][tid].get('rule', '?')
print(f" {tid}: {rule}")
# Save merged
merged = {
'total_tasks': symbolic['total_tasks'],
'symbolic_solved': len(symbolic_solved),
'llm_solved': len(llm_solved),
'new_from_llm': len(new_from_llm),
'total_solved': len(total_solved),
'solve_rate': round(100 * len(total_solved) / symbolic['total_tasks'], 2),
'symbolic_tasks': sorted(symbolic_solved),
'llm_tasks': sorted(llm_solved),
'new_llm_tasks': sorted(new_from_llm),
}
with open(output_file, 'w') as f:
json.dump(merged, f, indent=2)
print(f"\nMerged results saved to {output_file}")
if __name__ == "__main__":
sym = sys.argv[1] if len(sys.argv) > 1 else "arc_results/summary_v4.json"
llm = sys.argv[2] if len(sys.argv) > 2 else "llm_results.json"
merge(sym, llm)
|