{ "policies": [ "random", "heuristic", "oracle", "trained" ], "tiers": [ "easy", "medium", "hard" ], "episodes_per_type": 5, "per_episode": [ { "policy": "random", "tier": "easy", "failure_type": "db_deadlock", "seed": 0, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "db_deadlock", "seed": 31, "resolved": false, "steps": 12, "cumulative_reward": -516.0, "wrong_fixes": 6, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "db_deadlock", "seed": 62, "resolved": false, "steps": 12, "cumulative_reward": -516.0, "wrong_fixes": 6, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "db_deadlock", "seed": 93, "resolved": true, "steps": 9, "cumulative_reward": -122.0, "wrong_fixes": 3, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "db_deadlock", "seed": 124, "resolved": false, "steps": 12, "cumulative_reward": -666.0, "wrong_fixes": 9, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "memory_leak", "seed": 97, "resolved": true, "steps": 6, "cumulative_reward": -20.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "memory_leak", "seed": 128, "resolved": true, "steps": 9, "cumulative_reward": -172.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "memory_leak", "seed": 159, "resolved": true, "steps": 10, "cumulative_reward": -290.0, "wrong_fixes": 6, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "memory_leak", "seed": 190, "resolved": true, "steps": 9, "cumulative_reward": -172.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "memory_leak", "seed": 221, "resolved": false, "steps": 12, "cumulative_reward": -566.0, "wrong_fixes": 7, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "bad_config_push", "seed": 194, "resolved": false, "steps": 12, "cumulative_reward": -566.0, "wrong_fixes": 7, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "bad_config_push", "seed": 225, "resolved": false, "steps": 12, "cumulative_reward": -566.0, "wrong_fixes": 7, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "bad_config_push", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 4.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "bad_config_push", "seed": 287, "resolved": true, "steps": 7, "cumulative_reward": -132.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "bad_config_push", "seed": 318, "resolved": false, "steps": 12, "cumulative_reward": -566.0, "wrong_fixes": 7, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "dns_outage", "seed": 291, "resolved": true, "steps": 9, "cumulative_reward": -222.0, "wrong_fixes": 5, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "dns_outage", "seed": 322, "resolved": false, "steps": 12, "cumulative_reward": -566.0, "wrong_fixes": 7, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "dns_outage", "seed": 353, "resolved": true, "steps": 9, "cumulative_reward": -222.0, "wrong_fixes": 5, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "dns_outage", "seed": 384, "resolved": true, "steps": 6, "cumulative_reward": -70.0, "wrong_fixes": 3, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "easy", "failure_type": "dns_outage", "seed": 415, "resolved": false, "steps": 12, "cumulative_reward": -416.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "db_deadlock", "seed": 0, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "db_deadlock", "seed": 31, "resolved": true, "steps": 5, "cumulative_reward": 90.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "db_deadlock", "seed": 62, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "db_deadlock", "seed": 93, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "db_deadlock", "seed": 124, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "memory_leak", "seed": 97, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "memory_leak", "seed": 128, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "memory_leak", "seed": 159, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "memory_leak", "seed": 190, "resolved": true, "steps": 3, "cumulative_reward": 104.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "memory_leak", "seed": 221, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "bad_config_push", "seed": 194, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "bad_config_push", "seed": 225, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "bad_config_push", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "bad_config_push", "seed": 287, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "bad_config_push", "seed": 318, "resolved": true, "steps": 3, "cumulative_reward": 54.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "dns_outage", "seed": 291, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "dns_outage", "seed": 322, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "dns_outage", "seed": 353, "resolved": true, "steps": 3, "cumulative_reward": 104.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "dns_outage", "seed": 384, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "easy", "failure_type": "dns_outage", "seed": 415, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "db_deadlock", "seed": 0, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "db_deadlock", "seed": 31, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "db_deadlock", "seed": 62, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "db_deadlock", "seed": 93, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "db_deadlock", "seed": 124, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "memory_leak", "seed": 97, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "memory_leak", "seed": 128, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "memory_leak", "seed": 159, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "memory_leak", "seed": 190, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "memory_leak", "seed": 221, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "bad_config_push", "seed": 194, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "bad_config_push", "seed": 225, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "bad_config_push", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "bad_config_push", "seed": 287, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "bad_config_push", "seed": 318, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "dns_outage", "seed": 291, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "dns_outage", "seed": 322, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "dns_outage", "seed": 353, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "dns_outage", "seed": 384, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "easy", "failure_type": "dns_outage", "seed": 415, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "db_deadlock", "seed": 0, "resolved": false, "steps": 12, "cumulative_reward": -356.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "db_deadlock", "seed": 31, "resolved": false, "steps": 12, "cumulative_reward": -286.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "db_deadlock", "seed": 62, "resolved": false, "steps": 12, "cumulative_reward": -346.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "db_deadlock", "seed": 93, "resolved": false, "steps": 12, "cumulative_reward": -176.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "db_deadlock", "seed": 124, "resolved": false, "steps": 12, "cumulative_reward": -306.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "memory_leak", "seed": 97, "resolved": false, "steps": 12, "cumulative_reward": -256.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "memory_leak", "seed": 128, "resolved": false, "steps": 12, "cumulative_reward": -276.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "memory_leak", "seed": 159, "resolved": false, "steps": 12, "cumulative_reward": -276.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "memory_leak", "seed": 190, "resolved": false, "steps": 12, "cumulative_reward": -256.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "memory_leak", "seed": 221, "resolved": false, "steps": 12, "cumulative_reward": -256.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "bad_config_push", "seed": 194, "resolved": false, "steps": 12, "cumulative_reward": -196.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "bad_config_push", "seed": 225, "resolved": false, "steps": 12, "cumulative_reward": -176.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "bad_config_push", "seed": 256, "resolved": false, "steps": 12, "cumulative_reward": -256.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "bad_config_push", "seed": 287, "resolved": false, "steps": 12, "cumulative_reward": -216.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "bad_config_push", "seed": 318, "resolved": false, "steps": 12, "cumulative_reward": -226.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "dns_outage", "seed": 291, "resolved": false, "steps": 12, "cumulative_reward": -226.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "dns_outage", "seed": 322, "resolved": false, "steps": 12, "cumulative_reward": -196.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "dns_outage", "seed": 353, "resolved": false, "steps": 12, "cumulative_reward": -276.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "dns_outage", "seed": 384, "resolved": false, "steps": 12, "cumulative_reward": -216.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "easy", "failure_type": "dns_outage", "seed": 415, "resolved": false, "steps": 12, "cumulative_reward": -256.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "cascade", "seed": 0, "resolved": false, "steps": 18, "cumulative_reward": -992.0, "wrong_fixes": 11, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "cascade", "seed": 31, "resolved": false, "steps": 18, "cumulative_reward": -842.0, "wrong_fixes": 8, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "cascade", "seed": 62, "resolved": false, "steps": 18, "cumulative_reward": -932.0, "wrong_fixes": 9, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "cascade", "seed": 93, "resolved": false, "steps": 18, "cumulative_reward": -692.0, "wrong_fixes": 5, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "cascade", "seed": 124, "resolved": false, "steps": 18, "cumulative_reward": -1082.0, "wrong_fixes": 12, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 97, "resolved": false, "steps": 18, "cumulative_reward": -802.0, "wrong_fixes": 8, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 128, "resolved": false, "steps": 18, "cumulative_reward": -952.0, "wrong_fixes": 11, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 159, "resolved": true, "steps": 3, "cumulative_reward": 4.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 190, "resolved": false, "steps": 18, "cumulative_reward": -1002.0, "wrong_fixes": 12, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 221, "resolved": true, "steps": 4, "cumulative_reward": 48.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "disk_full", "seed": 194, "resolved": true, "steps": 5, "cumulative_reward": -60.0, "wrong_fixes": 3, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "disk_full", "seed": 225, "resolved": true, "steps": 12, "cumulative_reward": -332.0, "wrong_fixes": 6, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "disk_full", "seed": 256, "resolved": true, "steps": 17, "cumulative_reward": -872.0, "wrong_fixes": 14, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "disk_full", "seed": 287, "resolved": true, "steps": 11, "cumulative_reward": -310.0, "wrong_fixes": 6, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "medium", "failure_type": "disk_full", "seed": 318, "resolved": false, "steps": 18, "cumulative_reward": -802.0, "wrong_fixes": 8, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "cascade", "seed": 0, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "cascade", "seed": 31, "resolved": true, "steps": 5, "cumulative_reward": 90.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "cascade", "seed": 62, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "cascade", "seed": 93, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "cascade", "seed": 124, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 97, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 128, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 159, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 190, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 221, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "disk_full", "seed": 194, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "disk_full", "seed": 225, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "disk_full", "seed": 256, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "disk_full", "seed": 287, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "medium", "failure_type": "disk_full", "seed": 318, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "cascade", "seed": 0, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "cascade", "seed": 31, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "cascade", "seed": 62, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "cascade", "seed": 93, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "cascade", "seed": 124, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 97, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 128, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 159, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 190, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 221, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "disk_full", "seed": 194, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "disk_full", "seed": 225, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "disk_full", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "disk_full", "seed": 287, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "medium", "failure_type": "disk_full", "seed": 318, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "cascade", "seed": 0, "resolved": false, "steps": 18, "cumulative_reward": -442.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "cascade", "seed": 31, "resolved": false, "steps": 18, "cumulative_reward": -622.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "cascade", "seed": 62, "resolved": false, "steps": 18, "cumulative_reward": -582.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "cascade", "seed": 93, "resolved": false, "steps": 18, "cumulative_reward": -602.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "cascade", "seed": 124, "resolved": false, "steps": 18, "cumulative_reward": -602.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 97, "resolved": false, "steps": 18, "cumulative_reward": -382.0, "wrong_fixes": 3, "oversight_flags": [ "autoscaler", "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 128, "resolved": false, "steps": 18, "cumulative_reward": -542.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 159, "resolved": false, "steps": 18, "cumulative_reward": -512.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 190, "resolved": false, "steps": 18, "cumulative_reward": -482.0, "wrong_fixes": 4, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "autoscaler_cost_cut", "seed": 221, "resolved": false, "steps": 18, "cumulative_reward": -482.0, "wrong_fixes": 3, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "disk_full", "seed": 194, "resolved": true, "steps": 7, "cumulative_reward": 78.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "disk_full", "seed": 225, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "disk_full", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "disk_full", "seed": 287, "resolved": true, "steps": 7, "cumulative_reward": 48.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "medium", "failure_type": "disk_full", "seed": 318, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 0, "resolved": false, "steps": 25, "cumulative_reward": -1510.0, "wrong_fixes": 16, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 31, "resolved": false, "steps": 25, "cumulative_reward": -1210.0, "wrong_fixes": 10, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 62, "resolved": false, "steps": 25, "cumulative_reward": -1510.0, "wrong_fixes": 16, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 93, "resolved": false, "steps": 25, "cumulative_reward": -1160.0, "wrong_fixes": 9, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 124, "resolved": false, "steps": 25, "cumulative_reward": -1560.0, "wrong_fixes": 17, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "cascade", "seed": 97, "resolved": false, "steps": 25, "cumulative_reward": -1190.0, "wrong_fixes": 8, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "cascade", "seed": 128, "resolved": false, "steps": 25, "cumulative_reward": -1480.0, "wrong_fixes": 13, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "cascade", "seed": 159, "resolved": false, "steps": 25, "cumulative_reward": -1340.0, "wrong_fixes": 11, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "cascade", "seed": 190, "resolved": false, "steps": 25, "cumulative_reward": -1590.0, "wrong_fixes": 16, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "cascade", "seed": 221, "resolved": false, "steps": 25, "cumulative_reward": -1600.0, "wrong_fixes": 17, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 194, "resolved": true, "steps": 10, "cumulative_reward": -240.0, "wrong_fixes": 5, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 225, "resolved": false, "steps": 25, "cumulative_reward": -1510.0, "wrong_fixes": 16, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 256, "resolved": false, "steps": 25, "cumulative_reward": -1710.0, "wrong_fixes": 20, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 287, "resolved": true, "steps": 22, "cumulative_reward": -1112.0, "wrong_fixes": 15, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 318, "resolved": false, "steps": 25, "cumulative_reward": -1310.0, "wrong_fixes": 12, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 291, "resolved": false, "steps": 25, "cumulative_reward": -1360.0, "wrong_fixes": 13, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 322, "resolved": false, "steps": 25, "cumulative_reward": -1410.0, "wrong_fixes": 14, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 353, "resolved": false, "steps": 25, "cumulative_reward": -1560.0, "wrong_fixes": 17, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 384, "resolved": false, "steps": 25, "cumulative_reward": -1460.0, "wrong_fixes": 15, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "random", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 415, "resolved": false, "steps": 25, "cumulative_reward": -1310.0, "wrong_fixes": 12, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 0, "resolved": false, "steps": 25, "cumulative_reward": -1060.0, "wrong_fixes": 13, "oversight_flags": [ "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 31, "resolved": false, "steps": 25, "cumulative_reward": -760.0, "wrong_fixes": 7, "oversight_flags": [ "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 62, "resolved": false, "steps": 25, "cumulative_reward": -1010.0, "wrong_fixes": 12, "oversight_flags": [ "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 93, "resolved": false, "steps": 25, "cumulative_reward": -860.0, "wrong_fixes": 9, "oversight_flags": [ "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 124, "resolved": false, "steps": 25, "cumulative_reward": -960.0, "wrong_fixes": 11, "oversight_flags": [ "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer", "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "cascade", "seed": 97, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "cascade", "seed": 128, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "cascade", "seed": 159, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "cascade", "seed": 190, "resolved": true, "steps": 3, "cumulative_reward": 104.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "cascade", "seed": 221, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 194, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 225, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 256, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 287, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 318, "resolved": true, "steps": 1, "cumulative_reward": 110.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 291, "resolved": false, "steps": 25, "cumulative_reward": -1060.0, "wrong_fixes": 13, "oversight_flags": [ "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 322, "resolved": false, "steps": 25, "cumulative_reward": -1010.0, "wrong_fixes": 12, "oversight_flags": [ "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 353, "resolved": false, "steps": 25, "cumulative_reward": -960.0, "wrong_fixes": 11, "oversight_flags": [ "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 384, "resolved": false, "steps": 25, "cumulative_reward": -960.0, "wrong_fixes": 11, "oversight_flags": [ "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "heuristic", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 415, "resolved": false, "steps": 25, "cumulative_reward": -960.0, "wrong_fixes": 11, "oversight_flags": [ "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot", "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 0, "resolved": true, "steps": 4, "cumulative_reward": 178.0, "wrong_fixes": 0, "oversight_flags": [ "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 31, "resolved": true, "steps": 4, "cumulative_reward": 178.0, "wrong_fixes": 0, "oversight_flags": [ "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 62, "resolved": true, "steps": 4, "cumulative_reward": 178.0, "wrong_fixes": 0, "oversight_flags": [ "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 93, "resolved": true, "steps": 4, "cumulative_reward": 178.0, "wrong_fixes": 0, "oversight_flags": [ "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 124, "resolved": true, "steps": 4, "cumulative_reward": 178.0, "wrong_fixes": 0, "oversight_flags": [ "load_balancer" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "cascade", "seed": 97, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "cascade", "seed": 128, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "cascade", "seed": 159, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "cascade", "seed": 190, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "cascade", "seed": 221, "resolved": true, "steps": 3, "cumulative_reward": 134.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 194, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 225, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 256, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 287, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 318, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "autoscaler" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 291, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 322, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 353, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 384, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "oracle", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 415, "resolved": true, "steps": 3, "cumulative_reward": 184.0, "wrong_fixes": 0, "oversight_flags": [ "deploy_bot" ], "had_rogue": true, "rogue_caught": true, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 0, "resolved": false, "steps": 25, "cumulative_reward": -790.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 31, "resolved": false, "steps": 25, "cumulative_reward": -850.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 62, "resolved": false, "steps": 25, "cumulative_reward": -890.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 93, "resolved": false, "steps": 25, "cumulative_reward": -890.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "misrouted_traffic", "seed": 124, "resolved": false, "steps": 25, "cumulative_reward": -780.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "cascade", "seed": 97, "resolved": false, "steps": 25, "cumulative_reward": -790.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "cascade", "seed": 128, "resolved": false, "steps": 25, "cumulative_reward": -970.0, "wrong_fixes": 2, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "cascade", "seed": 159, "resolved": false, "steps": 25, "cumulative_reward": -890.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "cascade", "seed": 190, "resolved": false, "steps": 25, "cumulative_reward": -860.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "cascade", "seed": 221, "resolved": false, "steps": 25, "cumulative_reward": -900.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": false, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 194, "resolved": false, "steps": 25, "cumulative_reward": -830.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 225, "resolved": false, "steps": 25, "cumulative_reward": -680.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 256, "resolved": false, "steps": 25, "cumulative_reward": -860.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 287, "resolved": false, "steps": 25, "cumulative_reward": -810.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "autoscaler_cost_cut", "seed": 318, "resolved": false, "steps": 25, "cumulative_reward": -780.0, "wrong_fixes": 1, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 291, "resolved": false, "steps": 25, "cumulative_reward": -770.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 322, "resolved": false, "steps": 25, "cumulative_reward": -810.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 353, "resolved": false, "steps": 25, "cumulative_reward": -810.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 384, "resolved": false, "steps": 25, "cumulative_reward": -770.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false }, { "policy": "trained", "tier": "hard", "failure_type": "rogue_deploy_bot", "seed": 415, "resolved": false, "steps": 25, "cumulative_reward": -790.0, "wrong_fixes": 0, "oversight_flags": [], "had_rogue": true, "rogue_caught": false, "false_positive": false } ], "aggregates": [ { "policy": "heuristic", "tier": "easy", "episodes": 20, "success_rate": 1.0, "mttr": 1.9, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": 94.4, "median_reward": 110.0, "mean_wrong_fixes": 0.25 }, { "policy": "heuristic", "tier": "hard", "episodes": 20, "success_rate": 0.5, "mttr": 1.2, "rogue_detection_rate": 0.6666666666666666, "false_positive_rate": 0.0, "mean_reward": -425.3, "median_reward": -328.0, "mean_wrong_fixes": 5.5 }, { "policy": "heuristic", "tier": "medium", "episodes": 15, "success_rate": 1.0, "mttr": 1.2666666666666666, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": 108.66666666666667, "median_reward": 110.0, "mean_wrong_fixes": 0 }, { "policy": "oracle", "tier": "easy", "episodes": 20, "success_rate": 1.0, "mttr": 3, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": 134.0, "median_reward": 134.0, "mean_wrong_fixes": 0 }, { "policy": "oracle", "tier": "hard", "episodes": 20, "success_rate": 1.0, "mttr": 3.25, "rogue_detection_rate": 1.0, "false_positive_rate": 0.0, "mean_reward": 170.0, "median_reward": 181.0, "mean_wrong_fixes": 0 }, { "policy": "oracle", "tier": "medium", "episodes": 15, "success_rate": 1.0, "mttr": 3, "rogue_detection_rate": 1.0, "false_positive_rate": 0.0, "mean_reward": 150.66666666666666, "median_reward": 134.0, "mean_wrong_fixes": 0 }, { "policy": "random", "tier": "easy", "episodes": 20, "success_rate": 0.55, "mttr": 7.2727272727272725, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": -315.4, "median_reward": -256.0, "mean_wrong_fixes": 4.95 }, { "policy": "random", "tier": "hard", "episodes": 20, "success_rate": 0.1, "mttr": 16, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": -1356.6, "median_reward": -1435.0, "mean_wrong_fixes": 13.6 }, { "policy": "random", "tier": "medium", "episodes": 15, "success_rate": 0.4, "mttr": 8.666666666666666, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": -641.3333333333334, "median_reward": -802.0, "mean_wrong_fixes": 7.733333333333333 }, { "policy": "trained", "tier": "easy", "episodes": 20, "success_rate": 0.0, "mttr": NaN, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": -251.5, "median_reward": -256.0, "mean_wrong_fixes": 0.25 }, { "policy": "trained", "tier": "hard", "episodes": 20, "success_rate": 0.0, "mttr": NaN, "rogue_detection_rate": 0.0, "false_positive_rate": 0.0, "mean_reward": -826.0, "median_reward": -810.0, "mean_wrong_fixes": 0.3 }, { "policy": "trained", "tier": "medium", "episodes": 15, "success_rate": 0.3333333333333333, "mttr": 4.6, "rogue_detection_rate": 0.2, "false_positive_rate": 0.0, "mean_reward": -314.8, "median_reward": -482.0, "mean_wrong_fixes": 1.8666666666666667 } ] }