{ "v10_direct": { "success": 0.766, "avg_cost": 0.1878, "costRed": 40.7 }, "v10_cascade": { "success": 0.756, "avg_cost": 0.1767, "costRed": 44.2 }, "v10_feedback": { "success": 0.848, "avg_cost": 0.2014, "costRed": 36.4 }, "v8_synthetic": { "success": 0.658, "avg_cost": 0.3534, "costRed": -11.6 }, "frontier": { "success": 0.782, "avg_cost": 0.3167, "costRed": 0.0 }, "oracle": { "success": 0.87, "avg_cost": 0.0624, "costRed": 80.3 }, "always_cheap": { "success": 0.632, "avg_cost": 0.0142, "costRed": 95.5 }, "key_finding": "v10 trained on REAL SWE-Router data achieves 36-44% cost reduction vs 8% for synthetic-trained v8. v10_feedback achieves HIGHER success than always-frontier (84.8% vs 78.2%) at 36.4% cost reduction." }