{ "base": { "results": [ { "seed": 42, "difficulty": "easy", "score": 0.1736, "correct_flags": 1, "false_positives": 0, "correct_approvals": 0, "missed_errors": 5, "total_errors": 6, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 137, "difficulty": "easy", "score": 0.0167, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 6, "total_errors": 6, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 256, "difficulty": "easy", "score": 0.0389, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 6, "total_errors": 6, "actions_taken": 7, "actions_parsed": 7 }, { "seed": 512, "difficulty": "easy", "score": 0.0333, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 6, "total_errors": 6, "actions_taken": 6, "actions_parsed": 6 }, { "seed": 1024, "difficulty": "easy", "score": 0.1736, "correct_flags": 1, "false_positives": 0, "correct_approvals": 0, "missed_errors": 5, "total_errors": 6, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 42, "difficulty": "medium", "score": 0.0179, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 7, "actions_parsed": 7 }, { "seed": 137, "difficulty": "medium", "score": 0.0256, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 10, "actions_parsed": 10 }, { "seed": 256, "difficulty": "medium", "score": 0.01, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 512, "difficulty": "medium", "score": 0.0256, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 10, "actions_parsed": 10 }, { "seed": 1024, "difficulty": "medium", "score": 0.01, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 42, "difficulty": "hard", "score": 0.01, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 16, "total_errors": 16, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 137, "difficulty": "hard", "score": 0.01, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 17, "total_errors": 17, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 256, "difficulty": "hard", "score": 0.01, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 14, "total_errors": 14, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 512, "difficulty": "hard", "score": 0.0214, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 14, "total_errors": 14, "actions_taken": 9, "actions_parsed": 9 }, { "seed": 1024, "difficulty": "hard", "score": 0.0235, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 17, "total_errors": 17, "actions_taken": 12, "actions_parsed": 12 } ], "overall": 0.04 }, "trained": { "results": [ { "seed": 42, "difficulty": "easy", "score": 0.2958, "correct_flags": 1, "false_positives": 0, "correct_approvals": 2, "missed_errors": 5, "total_errors": 6, "actions_taken": 7, "actions_parsed": 7 }, { "seed": 137, "difficulty": "easy", "score": 0.25, "correct_flags": 0, "false_positives": 1, "correct_approvals": 2, "missed_errors": 6, "total_errors": 6, "actions_taken": 9, "actions_parsed": 9 }, { "seed": 256, "difficulty": "easy", "score": 0.3402, "correct_flags": 1, "false_positives": 0, "correct_approvals": 4, "missed_errors": 5, "total_errors": 6, "actions_taken": 9, "actions_parsed": 9 }, { "seed": 512, "difficulty": "easy", "score": 0.2712, "correct_flags": 1, "false_positives": 2, "correct_approvals": 1, "missed_errors": 5, "total_errors": 6, "actions_taken": 12, "actions_parsed": 12 }, { "seed": 1024, "difficulty": "easy", "score": 0.2791, "correct_flags": 1, "false_positives": 0, "correct_approvals": 1, "missed_errors": 5, "total_errors": 6, "actions_taken": 10, "actions_parsed": 10 }, { "seed": 42, "difficulty": "medium", "score": 0.1308, "correct_flags": 0, "false_positives": 1, "correct_approvals": 2, "missed_errors": 13, "total_errors": 13, "actions_taken": 12, "actions_parsed": 12 }, { "seed": 137, "difficulty": "medium", "score": 0.01, "correct_flags": 0, "false_positives": 0, "correct_approvals": 0, "missed_errors": 13, "total_errors": 13, "actions_taken": 0, "actions_parsed": 0 }, { "seed": 256, "difficulty": "medium", "score": 0.0923, "correct_flags": 0, "false_positives": 1, "correct_approvals": 1, "missed_errors": 13, "total_errors": 13, "actions_taken": 10, "actions_parsed": 10 }, { "seed": 512, "difficulty": "medium", "score": 0.2393, "correct_flags": 1, "false_positives": 0, "correct_approvals": 6, "missed_errors": 12, "total_errors": 13, "actions_taken": 12, "actions_parsed": 12 }, { "seed": 1024, "difficulty": "medium", "score": 0.1735, "correct_flags": 1, "false_positives": 3, "correct_approvals": 1, "missed_errors": 12, "total_errors": 13, "actions_taken": 16, "actions_parsed": 16 }, { "seed": 42, "difficulty": "hard", "score": 0.0271, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 16, "total_errors": 16, "actions_taken": 14, "actions_parsed": 14 }, { "seed": 137, "difficulty": "hard", "score": 0.0235, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 17, "total_errors": 17, "actions_taken": 13, "actions_parsed": 13 }, { "seed": 256, "difficulty": "hard", "score": 0.0262, "correct_flags": 0, "false_positives": 1, "correct_approvals": 0, "missed_errors": 14, "total_errors": 14, "actions_taken": 12, "actions_parsed": 12 }, { "seed": 512, "difficulty": "hard", "score": 0.0771, "correct_flags": 1, "false_positives": 0, "correct_approvals": 0, "missed_errors": 13, "total_errors": 14, "actions_taken": 4, "actions_parsed": 4 }, { "seed": 1024, "difficulty": "hard", "score": 0.0638, "correct_flags": 1, "false_positives": 0, "correct_approvals": 0, "missed_errors": 16, "total_errors": 17, "actions_taken": 4, "actions_parsed": 4 } ], "overall": 0.1533 }, "improvement": 283.25 }