| { | |
| "v10_direct": { | |
| "success": 0.766, | |
| "avg_cost": 0.1878, | |
| "costRed": 40.7 | |
| }, | |
| "v10_cascade": { | |
| "success": 0.756, | |
| "avg_cost": 0.1767, | |
| "costRed": 44.2 | |
| }, | |
| "v10_feedback": { | |
| "success": 0.848, | |
| "avg_cost": 0.2014, | |
| "costRed": 36.4 | |
| }, | |
| "v8_synthetic": { | |
| "success": 0.658, | |
| "avg_cost": 0.3534, | |
| "costRed": -11.6 | |
| }, | |
| "frontier": { | |
| "success": 0.782, | |
| "avg_cost": 0.3167, | |
| "costRed": 0.0 | |
| }, | |
| "oracle": { | |
| "success": 0.87, | |
| "avg_cost": 0.0624, | |
| "costRed": 80.3 | |
| }, | |
| "always_cheap": { | |
| "success": 0.632, | |
| "avg_cost": 0.0142, | |
| "costRed": 95.5 | |
| }, | |
| "key_finding": "v10 trained on REAL SWE-Router data achieves 36-44% cost reduction vs 8% for synthetic-trained v8. v10_feedback achieves HIGHER success than always-frontier (84.8% vs 78.2%) at 36.4% cost reduction." | |
| } |