Invalid JSON: Unexpected token 'N', ..." "mttr": NaN,
"... is not valid JSON
| { | |
| "policies": [ | |
| "random", | |
| "heuristic", | |
| "oracle", | |
| "trained" | |
| ], | |
| "tiers": [ | |
| "easy", | |
| "medium", | |
| "hard" | |
| ], | |
| "episodes_per_type": 5, | |
| "per_episode": [ | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -516.0, | |
| "wrong_fixes": 6, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -516.0, | |
| "wrong_fixes": 6, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 9, | |
| "cumulative_reward": -122.0, | |
| "wrong_fixes": 3, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -666.0, | |
| "wrong_fixes": 9, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 6, | |
| "cumulative_reward": -20.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 9, | |
| "cumulative_reward": -172.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 10, | |
| "cumulative_reward": -290.0, | |
| "wrong_fixes": 6, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 9, | |
| "cumulative_reward": -172.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 221, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -566.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 194, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -566.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 225, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -566.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 4.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 7, | |
| "cumulative_reward": -132.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 318, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -566.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 291, | |
| "resolved": true, | |
| "steps": 9, | |
| "cumulative_reward": -222.0, | |
| "wrong_fixes": 5, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 322, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -566.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 353, | |
| "resolved": true, | |
| "steps": 9, | |
| "cumulative_reward": -222.0, | |
| "wrong_fixes": 5, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 384, | |
| "resolved": true, | |
| "steps": 6, | |
| "cumulative_reward": -70.0, | |
| "wrong_fixes": 3, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 415, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -416.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 31, | |
| "resolved": true, | |
| "steps": 5, | |
| "cumulative_reward": 90.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 62, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 124, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 104.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 54.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 291, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 322, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 353, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 104.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 384, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 415, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 31, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 62, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 124, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 291, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 322, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 353, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 384, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 415, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -356.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -286.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -346.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -176.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "db_deadlock", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -306.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 97, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -256.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 128, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -276.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 159, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -276.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 190, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -256.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "memory_leak", | |
| "seed": 221, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -256.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 194, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -196.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 225, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -176.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 256, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -256.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 287, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -216.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "bad_config_push", | |
| "seed": 318, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -226.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 291, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -226.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 322, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -196.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 353, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -276.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 384, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -216.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "failure_type": "dns_outage", | |
| "seed": 415, | |
| "resolved": false, | |
| "steps": 12, | |
| "cumulative_reward": -256.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -992.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -842.0, | |
| "wrong_fixes": 8, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -932.0, | |
| "wrong_fixes": 9, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -692.0, | |
| "wrong_fixes": 5, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -1082.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 97, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -802.0, | |
| "wrong_fixes": 8, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 128, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -952.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 4.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 190, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -1002.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 48.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 5, | |
| "cumulative_reward": -60.0, | |
| "wrong_fixes": 3, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 12, | |
| "cumulative_reward": -332.0, | |
| "wrong_fixes": 6, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 17, | |
| "cumulative_reward": -872.0, | |
| "wrong_fixes": 14, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 11, | |
| "cumulative_reward": -310.0, | |
| "wrong_fixes": 6, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 318, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -802.0, | |
| "wrong_fixes": 8, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 31, | |
| "resolved": true, | |
| "steps": 5, | |
| "cumulative_reward": 90.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 62, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 124, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 31, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 62, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 124, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -442.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -622.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -582.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -602.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "cascade", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -602.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 97, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -382.0, | |
| "wrong_fixes": 3, | |
| "oversight_flags": [ | |
| "autoscaler", | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 128, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -542.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 159, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -512.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 190, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -482.0, | |
| "wrong_fixes": 4, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 221, | |
| "resolved": false, | |
| "steps": 18, | |
| "cumulative_reward": -482.0, | |
| "wrong_fixes": 3, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 7, | |
| "cumulative_reward": 78.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 7, | |
| "cumulative_reward": 48.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "failure_type": "disk_full", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1510.0, | |
| "wrong_fixes": 16, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1210.0, | |
| "wrong_fixes": 10, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1510.0, | |
| "wrong_fixes": 16, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1160.0, | |
| "wrong_fixes": 9, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1560.0, | |
| "wrong_fixes": 17, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 97, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1190.0, | |
| "wrong_fixes": 8, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 128, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1480.0, | |
| "wrong_fixes": 13, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 159, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1340.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 190, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1590.0, | |
| "wrong_fixes": 16, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 221, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1600.0, | |
| "wrong_fixes": 17, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 10, | |
| "cumulative_reward": -240.0, | |
| "wrong_fixes": 5, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 225, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1510.0, | |
| "wrong_fixes": 16, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 256, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1710.0, | |
| "wrong_fixes": 20, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 22, | |
| "cumulative_reward": -1112.0, | |
| "wrong_fixes": 15, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 318, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1310.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 291, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1360.0, | |
| "wrong_fixes": 13, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 322, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1410.0, | |
| "wrong_fixes": 14, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 353, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1560.0, | |
| "wrong_fixes": 17, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 384, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1460.0, | |
| "wrong_fixes": 15, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 415, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1310.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1060.0, | |
| "wrong_fixes": 13, | |
| "oversight_flags": [ | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -760.0, | |
| "wrong_fixes": 7, | |
| "oversight_flags": [ | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1010.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [ | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -860.0, | |
| "wrong_fixes": 9, | |
| "oversight_flags": [ | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -960.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [ | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer", | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 104.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 1, | |
| "cumulative_reward": 110.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 291, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1060.0, | |
| "wrong_fixes": 13, | |
| "oversight_flags": [ | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 322, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -1010.0, | |
| "wrong_fixes": 12, | |
| "oversight_flags": [ | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 353, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -960.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [ | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 384, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -960.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [ | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 415, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -960.0, | |
| "wrong_fixes": 11, | |
| "oversight_flags": [ | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot", | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 0, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 178.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 31, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 178.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 62, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 178.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 93, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 178.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 124, | |
| "resolved": true, | |
| "steps": 4, | |
| "cumulative_reward": 178.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "load_balancer" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 97, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 128, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 159, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 190, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 221, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 134.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 194, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 225, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 256, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 287, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 318, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "autoscaler" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 291, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 322, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 353, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 384, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 415, | |
| "resolved": true, | |
| "steps": 3, | |
| "cumulative_reward": 184.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [ | |
| "deploy_bot" | |
| ], | |
| "had_rogue": true, | |
| "rogue_caught": true, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 0, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -790.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 31, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -850.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 62, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -890.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 93, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -890.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "misrouted_traffic", | |
| "seed": 124, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -780.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 97, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -790.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 128, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -970.0, | |
| "wrong_fixes": 2, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 159, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -890.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 190, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -860.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "cascade", | |
| "seed": 221, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -900.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": false, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 194, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -830.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 225, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -680.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 256, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -860.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 287, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -810.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "autoscaler_cost_cut", | |
| "seed": 318, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -780.0, | |
| "wrong_fixes": 1, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 291, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -770.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 322, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -810.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 353, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -810.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 384, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -770.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "failure_type": "rogue_deploy_bot", | |
| "seed": 415, | |
| "resolved": false, | |
| "steps": 25, | |
| "cumulative_reward": -790.0, | |
| "wrong_fixes": 0, | |
| "oversight_flags": [], | |
| "had_rogue": true, | |
| "rogue_caught": false, | |
| "false_positive": false | |
| } | |
| ], | |
| "aggregates": [ | |
| { | |
| "policy": "heuristic", | |
| "tier": "easy", | |
| "episodes": 20, | |
| "success_rate": 1.0, | |
| "mttr": 1.9, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": 94.4, | |
| "median_reward": 110.0, | |
| "mean_wrong_fixes": 0.25 | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "hard", | |
| "episodes": 20, | |
| "success_rate": 0.5, | |
| "mttr": 1.2, | |
| "rogue_detection_rate": 0.6666666666666666, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -425.3, | |
| "median_reward": -328.0, | |
| "mean_wrong_fixes": 5.5 | |
| }, | |
| { | |
| "policy": "heuristic", | |
| "tier": "medium", | |
| "episodes": 15, | |
| "success_rate": 1.0, | |
| "mttr": 1.2666666666666666, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": 108.66666666666667, | |
| "median_reward": 110.0, | |
| "mean_wrong_fixes": 0 | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "easy", | |
| "episodes": 20, | |
| "success_rate": 1.0, | |
| "mttr": 3, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": 134.0, | |
| "median_reward": 134.0, | |
| "mean_wrong_fixes": 0 | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "hard", | |
| "episodes": 20, | |
| "success_rate": 1.0, | |
| "mttr": 3.25, | |
| "rogue_detection_rate": 1.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": 170.0, | |
| "median_reward": 181.0, | |
| "mean_wrong_fixes": 0 | |
| }, | |
| { | |
| "policy": "oracle", | |
| "tier": "medium", | |
| "episodes": 15, | |
| "success_rate": 1.0, | |
| "mttr": 3, | |
| "rogue_detection_rate": 1.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": 150.66666666666666, | |
| "median_reward": 134.0, | |
| "mean_wrong_fixes": 0 | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "easy", | |
| "episodes": 20, | |
| "success_rate": 0.55, | |
| "mttr": 7.2727272727272725, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -315.4, | |
| "median_reward": -256.0, | |
| "mean_wrong_fixes": 4.95 | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "hard", | |
| "episodes": 20, | |
| "success_rate": 0.1, | |
| "mttr": 16, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -1356.6, | |
| "median_reward": -1435.0, | |
| "mean_wrong_fixes": 13.6 | |
| }, | |
| { | |
| "policy": "random", | |
| "tier": "medium", | |
| "episodes": 15, | |
| "success_rate": 0.4, | |
| "mttr": 8.666666666666666, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -641.3333333333334, | |
| "median_reward": -802.0, | |
| "mean_wrong_fixes": 7.733333333333333 | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "easy", | |
| "episodes": 20, | |
| "success_rate": 0.0, | |
| "mttr": NaN, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -251.5, | |
| "median_reward": -256.0, | |
| "mean_wrong_fixes": 0.25 | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "hard", | |
| "episodes": 20, | |
| "success_rate": 0.0, | |
| "mttr": NaN, | |
| "rogue_detection_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -826.0, | |
| "median_reward": -810.0, | |
| "mean_wrong_fixes": 0.3 | |
| }, | |
| { | |
| "policy": "trained", | |
| "tier": "medium", | |
| "episodes": 15, | |
| "success_rate": 0.3333333333333333, | |
| "mttr": 4.6, | |
| "rogue_detection_rate": 0.2, | |
| "false_positive_rate": 0.0, | |
| "mean_reward": -314.8, | |
| "median_reward": -482.0, | |
| "mean_wrong_fixes": 1.8666666666666667 | |
| } | |
| ] | |
| } |