agent-cost-optimizer / eval /v8_results.json
narcolepticchicken's picture
Upload eval/v8_results.json with huggingface_hub
129ae86 verified
{
"always_frontier": {
"success": 0.8935,
"avg_cost": 1.0,
"unsafe_rate": 0.026,
"false_done": 0.0805,
"tier_dist": {
"4": 2000
}
},
"always_cheap": {
"success": 0.238,
"avg_cost": 0.04999999999999823,
"unsafe_rate": 0.762,
"false_done": 0.0,
"tier_dist": {
"1": 2000
}
},
"heuristic_static": {
"success": 0.825,
"avg_cost": 0.929275000000004,
"unsafe_rate": 0.058,
"false_done": 0.117,
"tier_dist": {
"4": 674,
"3": 646,
"5": 443,
"2": 237
}
},
"oracle": {
"success": 1.0,
"avg_cost": 0.46504999999999164,
"unsafe_rate": 0.0,
"false_done": 0.0,
"tier_dist": {
"3": 647,
"1": 476,
"2": 567,
"4": 258,
"5": 52
}
},
"v8_dynamic+ML": {
"success": 0.82,
"avg_cost": 0.922775000000004,
"unsafe_rate": 0.0615,
"false_done": 0.1185,
"tier_dist": {
"3": 754,
"4": 538,
"5": 471,
"2": 237
}
},
"v8_dynamic_only": {
"success": 0.82,
"avg_cost": 0.922775000000004,
"unsafe_rate": 0.0615,
"false_done": 0.1185,
"tier_dist": {
"3": 754,
"4": 538,
"5": 471,
"2": 237
}
}
}