CollabReasoning / eval_results.json
Andrew Lara
Deploy landing page update to Space
ee91164
{
"uniform": {
"agent": "uniform",
"n_episodes": 50,
"mean_reward": 7.620243686,
"std_reward": 1.497929030271716,
"mean_accuracy": 0.78,
"std_accuracy": 0.13564659966250536,
"mean_budget_utilization": 1.0,
"episodes": [
{
"total_reward": 8.93117295,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0047081500000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0020814
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.00155605
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0032822000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00200635
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00801035
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001795000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0026818
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0032822000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0035824
}
]
},
{
"total_reward": 5.638689650000001,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.10001425
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0061341000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.00575885
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001715
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.0073349
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00365745
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001825
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0071848
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10001660000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0086858
}
]
},
{
"total_reward": 6.72990625,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0025317
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001520000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.10001840000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0056838
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0028318999999999
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0035824
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00440795
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00695965
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": -0.10001520000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00395765
}
]
},
{
"total_reward": 8.94896065,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0041828
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00816045
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00515845
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00650935
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0037325
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0035824
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.0083856
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00440795
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.00485825
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10001710000000001
}
]
},
{
"total_reward": 3.4224851499999995,
"accuracy": 0.4,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0077852
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10001735
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001795000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": -0.10001605000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": -0.1000168
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0082355
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.10001645
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0025317
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0040327
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001535
}
]
},
{
"total_reward": 7.841082350000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.00635925
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0031321
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001825
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.1000169
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00395765
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0028318999999999
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0086107500000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0035824
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.00425785
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0083856
}
]
},
{
"total_reward": 6.7296836,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": -0.10001520000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0043329
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.1000145
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001660000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0062091499999999
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0023816
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0022315
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0052335000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0032822000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00605905
}
]
},
{
"total_reward": 8.936575950000002,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00290695
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00530855
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.0016311
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00395765
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00605905
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00605905
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00260675
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00485825
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10001855000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00320715
}
]
},
{
"total_reward": 10.0601402,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0016311
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0068846
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.00245665
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00725985
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0077852
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.00440795
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.008986
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0037325
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.00801035
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.008986
}
]
},
{
"total_reward": 10.0479821,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00725985
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00320715
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0064343
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0046331
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00695965
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0020814
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0025317
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00725985
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0032822000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0043329
}
]
},
{
"total_reward": 7.836208800000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00365745
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00485825
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0075600500000002
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0041828
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.00380755
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10001495
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.10001550000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00200635
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00485825
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00530855
}
]
},
{
"total_reward": 6.7447692,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0080854000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0083856
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0049333
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.1000145
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0088359
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.004483
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00320715
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0068846
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10001605000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001520000000001
}
]
},
{
"total_reward": 6.730883350000001,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0023816
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00680955
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001555000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0041828
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00200635
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0047831999999999
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0061341000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": -0.10001660000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0046331
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10001520000000001
}
]
},
{
"total_reward": 7.8407082500000005,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0050834
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00635925
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001675
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0085357
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0038826
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0018562500000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00320715
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00635925
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": -0.10001725
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00545865
}
]
},
{
"total_reward": 8.94701215,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.00545865
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00500835
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0077852
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00680955
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00440795
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00440795
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.0032822000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00695965
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": -0.1000143
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00290695
}
]
},
{
"total_reward": 6.73463205,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00365745
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001795000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": -0.10001660000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0082355
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0035824
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.00710975
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0067345
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00245665
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00290695
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": -0.10001660000000001
}
]
},
{
"total_reward": 8.946108400000002,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": -0.10001745000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00260675
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.008986
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0025317
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0086107500000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00530855
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00170615
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0086858
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.00440795
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0032822000000001
}
]
},
{
"total_reward": 8.934851,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0028318999999999
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00320715
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00440795
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.00425785
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.002982
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.00350735
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0050834
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0035824
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": -0.10001735
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00500835
}
]
},
{
"total_reward": 6.740263100000001,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": -0.1000162
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": -0.10001660000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0077852
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0062842
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.00605905
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00545865
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00350735
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00365745
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10001605000000001
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0075600500000002
}
]
},
{
"total_reward": 6.732084499999999,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.007485
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001555000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00590895
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0028318999999999
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0028318999999999
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10001455000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.00876085
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.1000169
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0017812
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0025317
}
]
},
{
"total_reward": 8.9449069,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0025317
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0062091499999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.00440795
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001815
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0033572499999999
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00350735
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.0080854000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00801035
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0064343
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0023816
}
]
},
{
"total_reward": 8.952115500000001,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.0056838
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10001435
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0086858
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00801035
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.0026818
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0037325
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.00650935
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0047831999999999
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0037325
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00831055
}
]
},
{
"total_reward": 7.849942,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": -0.1000149
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00740995
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0079353
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0020814
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": -0.10001650000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.008986
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0086107500000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0034323
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.0034323
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0080854000000001
}
]
},
{
"total_reward": 7.84551195,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0079353
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0086858
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0047081500000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00425785
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0037325
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.1000182
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.00650935
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0047081500000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.00500835
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000153
}
]
},
{
"total_reward": 8.9472363,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.1000153
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0033572499999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.0056838
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0056838
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00245665
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0085357
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0086858
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0028318999999999
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0025317
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.007485
}
]
},
{
"total_reward": 7.839136450000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00650935
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00440795
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.00155605
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0079353
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00440795
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00260675
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00695965
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10001550000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.0047831999999999
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10001425
}
]
},
{
"total_reward": 10.042878700000001,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.0033572499999999
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00380755
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0018562500000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0086858
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.0016311
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.00155605
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0031321
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0077852
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0085357
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0025317
}
]
},
{
"total_reward": 5.6258552,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.00350735
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10001745000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.10001475
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00380755
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0025317
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.00155605
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001760000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0065844
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0079353
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001735
}
]
},
{
"total_reward": 7.843411150000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.0086858
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10001435
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.00305705
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00680955
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.00485825
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": -0.10001855000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00710975
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0049333
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00350735
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.004483
}
]
},
{
"total_reward": 7.838907149999999,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0043329
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00695965
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00695965
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001855000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": -0.10001535
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0050834
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0026818
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0056838
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.00485825
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0023816
}
]
},
{
"total_reward": 7.836883499999999,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0037325
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0016311
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.00440795
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.1000169
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.0065844
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.1000143
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.0085357
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00350735
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0049333
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0035824
}
]
},
{
"total_reward": 6.7344062,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.10001660000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00320715
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001890000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10001635
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0025317
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00876085
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.008986
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0047831999999999
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0016311
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00455805
}
]
},
{
"total_reward": 6.733280700000001,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": -0.10001605000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0082355
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0017812
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00305705
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": -0.10001755000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": -0.10001800000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00320715
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00635925
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.00575885
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0049333
}
]
},
{
"total_reward": 8.956168,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0075600500000002
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0033572499999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0086107500000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.00440795
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00710975
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0073349
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0077852
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00710975
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00290695
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001455000000001
}
]
},
{
"total_reward": 8.95031035,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00771015
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00635925
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0080854000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0038826
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0035824
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.1000183
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00320715
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0038826
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.00816045
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00545865
}
]
},
{
"total_reward": 5.637345049999999,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.10001415000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0053836
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00650935
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001535
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0053836
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.1000149
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001555000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0056838
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0070347
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00740995
}
]
},
{
"total_reward": 8.951287350000001,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0073349
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0034323
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001695000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0043329
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00245665
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0070347
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.00575885
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.00831055
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.0037325
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00891095
}
]
},
{
"total_reward": 4.53682,
"accuracy": 0.5,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.1000143
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0086858
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10001775
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": -0.10001455000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": -0.10001855000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.00635925
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0080854000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0067345
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": -0.1000145
}
]
},
{
"total_reward": 6.734710550000001,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0041828
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001520000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.00365745
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.00200635
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0075600500000002
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00575885
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.1000143
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.1000182
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0038826
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00771015
}
]
},
{
"total_reward": 7.837031000000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.00740995
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001825
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001555000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0035824
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0020814
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00200635
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.0075600500000002
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.00305705
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0062842
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0050834
}
]
},
{
"total_reward": 7.8445363000000015,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00816045
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00740995
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": -0.1000183
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.00260675
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0043329
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00395765
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": -0.10001520000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0023816
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0086858
}
]
},
{
"total_reward": 7.8411603,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0016311
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0066594500000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0052335000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00500835
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00605905
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.004483
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001470000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": -0.10001755000000001
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00725985
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00485825
}
]
},
{
"total_reward": 6.742589049999999,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0043329
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.00155605
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10001520000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00831055
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00771015
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00500835
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10001605000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0071848
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": -0.1000182
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0085357
}
]
},
{
"total_reward": 6.73163405,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0056838
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0023816
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.004483
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0050834
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0023816
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10001635
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.00771015
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": -0.10001435
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": -0.10001645
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00395765
}
]
},
{
"total_reward": 8.9411572,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0043329
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0046331
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0016311
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0037325
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.00876085
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10001535
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.00320715
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00740995
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0046331
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0028318999999999
}
]
},
{
"total_reward": 6.73613935,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001470000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.00305705
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10001520000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00455805
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10001495
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0065844
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0047831999999999
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.00365745
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00650935
}
]
},
{
"total_reward": 3.4263921500000003,
"accuracy": 0.4,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.00725985
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001470000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.1000143
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0053836
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": -0.10001405000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0061341000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00771015
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10001835
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": -0.1000187
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10001545
}
]
},
{
"total_reward": 6.735009150000001,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.00515845
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0062842
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0035824
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.1000168
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.00771015
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10001675
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00410775
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10001575
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.00425785
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00395765
}
]
},
{
"total_reward": 8.951962250000001,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0047831999999999
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00380755
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.00680955
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0025317
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": -0.10001750000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0080854000000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.0075600500000002
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0077852
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0028318999999999
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0077852
}
]
},
{
"total_reward": 8.94926085,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00545865
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0062091499999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.00725985
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0064343
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.0052335000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0047081500000001
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.10001710000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0033572499999999
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.0033572499999999
},
{
"step": 10,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00725985
}
]
}
]
},
"greedy_max": {
"agent": "greedy_max",
"n_episodes": 50,
"mean_reward": 4.163493538,
"std_reward": 0.8606553919009542,
"mean_accuracy": 0.8399999999999999,
"std_accuracy": 0.15491933384829668,
"mean_budget_utilization": 1.0,
"episodes": [
{
"total_reward": 3.9370282500000005,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003655
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0055137
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.01759675
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0058889500000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0080654
}
]
},
{
"total_reward": 2.83028465,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003635000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00964145
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0142195
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0064893499999998
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000293
}
]
},
{
"total_reward": 5.0546615500000005,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00363745
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0122682
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.01429455
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00919115
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0152702
}
]
},
{
"total_reward": 3.9344796,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003350000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00633925
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.01294365
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.01114245
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00408775
}
]
},
{
"total_reward": 3.9645737,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.01624585
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.013469
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10003445000000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0176718
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0172215
}
]
},
{
"total_reward": 1.7125787,
"accuracy": 0.4,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.1000352
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.00513845
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10002935
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10003525
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00754005
}
]
},
{
"total_reward": 5.04520525,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0118179
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.01579555
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00408775
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00829055
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0052135
}
]
},
{
"total_reward": 3.9325303500000004,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.01339395
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0032622
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10003145000000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0083656
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00754005
}
]
},
{
"total_reward": 5.05931465,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0165460499999999
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.01129255
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.01279355
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0150450500000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00363745
}
]
},
{
"total_reward": 2.8317869000000004,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0150450500000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10003530000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0098666
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00693965
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10002910000000001
}
]
},
{
"total_reward": 3.9469379,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0112175
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.008966
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0155704
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": -0.10003350000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0112175
}
]
},
{
"total_reward": 5.049032799999999,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0154203
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0166211
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0097165
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00408775
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00318715
}
]
},
{
"total_reward": 2.8395168,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0152702
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0073899499999999
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0169213
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": -0.10003350000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10003115
}
]
},
{
"total_reward": 5.0682456,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0139193
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.01444465
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0133189
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.01174285
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0148199
}
]
},
{
"total_reward": 3.92637765,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.008966
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10003005000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00363745
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.00453805
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0092662000000001
}
]
},
{
"total_reward": 2.8245754499999998,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.01354405
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10003445000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": -0.1000366
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0047632
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00633925
}
]
},
{
"total_reward": 3.9420648999999996,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10002825
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.01609575
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00829055
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0145197
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00318715
}
]
},
{
"total_reward": 5.06576895,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0152702
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0160206999999999
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0082155
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0157205
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.01054205
}
]
},
{
"total_reward": 5.0346232,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0077652
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00303705
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0064143
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0082155
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00919115
}
]
},
{
"total_reward": 5.0750001000000005,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0127185
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0169213
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0107671999999999
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.01729655
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.01729655
}
]
},
{
"total_reward": 2.8305914,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003025
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0100167
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0091161
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10002885
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0115177
}
]
},
{
"total_reward": 3.9369521499999998,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.01189295
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0056638
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.013469
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": -0.1000376
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0059639999999999
}
]
},
{
"total_reward": 3.9525647499999996,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.01114245
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10003540000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0067145
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.01744665
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.01729655
}
]
},
{
"total_reward": 5.04955815,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0040127
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0058139
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0165460499999999
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.01324385
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00994165
}
]
},
{
"total_reward": 5.058639199999999,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0115177
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.01204305
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0139193
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.01189295
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0092662000000001
}
]
},
{
"total_reward": 5.038826,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.00528855
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0092662000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.01204305
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0040127
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0082155
}
]
},
{
"total_reward": 3.94558905,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00303705
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0112175
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0178969500000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.013469
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10003145000000001
}
]
},
{
"total_reward": 3.93027465,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00663945
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10003565
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00348735
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00708975
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.01309375
}
]
},
{
"total_reward": 5.0530855,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.013469
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0056638
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.01054205
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0073899499999999
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0160206999999999
}
]
},
{
"total_reward": 5.0443796999999995,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.00363745
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0049133
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0125684
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.00814045
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0151201
}
]
},
{
"total_reward": 3.9214942,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00408775
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00468815
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00558875
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10003525
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0071648
}
]
},
{
"total_reward": 2.8296829,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.01369415
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10003530000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0056638
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.01039195
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": -0.1000317
}
]
},
{
"total_reward": 3.9426592,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0176718
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0047632
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00558875
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0146698
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10003435000000001
}
]
},
{
"total_reward": 5.046406050000001,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.013469
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00318715
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00318715
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0092662000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.01729655
}
]
},
{
"total_reward": 5.05180965,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.00453805
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.01444465
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0112175
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0173716
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00423785
}
]
},
{
"total_reward": 3.94130965,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003300000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00889095
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0103169
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0151201
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0070147
}
]
},
{
"total_reward": 5.04760685,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.01744665
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.008966
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0070147
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0049133
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0092662000000001
}
]
},
{
"total_reward": 5.0503837,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00663945
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.01489495
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0073899499999999
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0041628
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.01729655
}
]
},
{
"total_reward": 2.8441699000000003,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003350000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0122682
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.01639595
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": -0.10003115
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0155704
}
]
},
{
"total_reward": 3.9362849499999997,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00558875
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10002935
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0169213
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00573885
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0080654
}
]
},
{
"total_reward": 5.0385257999999995,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00663945
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00633925
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00558875
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0131688
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00678955
}
]
},
{
"total_reward": 5.06862085,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.0165460499999999
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.01399435
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.00979155
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.01474485
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.01354405
}
]
},
{
"total_reward": 3.9379313999999996,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0049133
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.0124183
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.01129255
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": -0.10003400000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0093412499999999
}
]
},
{
"total_reward": 3.9515129,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003655
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.01684625
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0163209
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.01414445
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00423785
}
]
},
{
"total_reward": 3.9419063999999997,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0094163
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0146698
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10003665
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0038626
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.01399435
}
]
},
{
"total_reward": 3.9389795000000003,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00663945
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.1000366
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.0098666
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0122682
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0102418499999999
}
]
},
{
"total_reward": 2.8365136,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0050634
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.1000366
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10002925
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.01744665
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0140694
}
]
},
{
"total_reward": 5.0585641500000005,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.01294365
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0136191
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00318715
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0161708
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0126434500000001
}
]
},
{
"total_reward": 3.9344063499999997,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0064893499999998
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0170713999999998
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0052135
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": -0.1000317
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0056638
}
]
},
{
"total_reward": 3.9408614,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0169213
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0100167
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": -0.10003095000000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0098666
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00408775
}
]
}
]
},
"oracle": {
"agent": "oracle",
"n_episodes": 50,
"mean_reward": 6.932624968,
"std_reward": 1.50156485979675,
"mean_accuracy": 0.7282222222222221,
"std_accuracy": 0.13007519192972794,
"mean_budget_utilization": 0.9830249999999999,
"episodes": [
{
"total_reward": 7.896245749999999,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003655
},
{
"step": 2,
"tokens_allocated": 177,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3023,
"reward": 1.00704585
},
{
"step": 3,
"tokens_allocated": 283,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2740,
"reward": 1.00884175
},
{
"step": 4,
"tokens_allocated": 782,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1958,
"reward": 1.0319322
},
{
"step": 5,
"tokens_allocated": 163,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1795,
"reward": 1.00674635
},
{
"step": 6,
"tokens_allocated": 448,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1347,
"reward": 1.01161035
},
{
"step": 7,
"tokens_allocated": 420,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 927,
"reward": -0.10001795000000001
},
{
"step": 8,
"tokens_allocated": 618,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 309,
"reward": 1.0190318
},
{
"step": 9,
"tokens_allocated": 193,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 116,
"reward": 1.0092215
},
{
"step": 10,
"tokens_allocated": 116,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00187045
}
]
},
{
"total_reward": 5.67526105,
"accuracy": 0.6,
"total_tokens_used": 3843,
"budget_utilization": 0.96075,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003635000000001
},
{
"step": 2,
"tokens_allocated": 177,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3023,
"reward": 1.0068207
},
{
"step": 3,
"tokens_allocated": 755,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2268,
"reward": 1.03238385
},
{
"step": 4,
"tokens_allocated": 648,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1620,
"reward": -0.10001715
},
{
"step": 5,
"tokens_allocated": 540,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1080,
"reward": 1.0178349
},
{
"step": 6,
"tokens_allocated": 108,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 972,
"reward": 1.0011203499999999
},
{
"step": 7,
"tokens_allocated": 303,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 669,
"reward": 1.0110172
},
{
"step": 8,
"tokens_allocated": 167,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 502,
"reward": -0.1000047
},
{
"step": 9,
"tokens_allocated": 188,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 314,
"reward": -0.10000400000000001
},
{
"step": 10,
"tokens_allocated": 157,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 157,
"reward": 1.00614625
}
]
},
{
"total_reward": 7.87083005,
"accuracy": 0.8,
"total_tokens_used": 3830,
"budget_utilization": 0.9575,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.00246665
},
{
"step": 2,
"tokens_allocated": 527,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3273,
"reward": -0.10001520000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2473,
"reward": 1.01429455
},
{
"step": 4,
"tokens_allocated": 176,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2297,
"reward": 1.0064455
},
{
"step": 5,
"tokens_allocated": 191,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2106,
"reward": 1.00794575
},
{
"step": 6,
"tokens_allocated": 526,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1580,
"reward": 1.0130324
},
{
"step": 7,
"tokens_allocated": 493,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1087,
"reward": 1.01138295
},
{
"step": 8,
"tokens_allocated": 181,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 906,
"reward": 1.00779615
},
{
"step": 9,
"tokens_allocated": 566,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 340,
"reward": -0.10001520000000001
},
{
"step": 10,
"tokens_allocated": 170,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 170,
"reward": 1.0074965
}
]
},
{
"total_reward": 8.9928685,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003350000000001
},
{
"step": 2,
"tokens_allocated": 177,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3023,
"reward": 1.00719595
},
{
"step": 3,
"tokens_allocated": 188,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2835,
"reward": 1.0071954
},
{
"step": 4,
"tokens_allocated": 303,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2532,
"reward": 1.0120679
},
{
"step": 5,
"tokens_allocated": 211,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2321,
"reward": 1.0032166
},
{
"step": 6,
"tokens_allocated": 348,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1973,
"reward": 1.0133415000000001
},
{
"step": 7,
"tokens_allocated": 369,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1604,
"reward": 1.015667
},
{
"step": 8,
"tokens_allocated": 668,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 936,
"reward": 1.02450795
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 136,
"reward": 1.0059639999999999
},
{
"step": 10,
"tokens_allocated": 136,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0037457
}
]
},
{
"total_reward": 5.6425433499999995,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.00216645
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3000,
"reward": 1.013469
},
{
"step": 3,
"tokens_allocated": 468,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2532,
"reward": -0.10001795000000001
},
{
"step": 4,
"tokens_allocated": 271,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2261,
"reward": 1.00764155
},
{
"step": 5,
"tokens_allocated": 471,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1790,
"reward": -0.1000168
},
{
"step": 6,
"tokens_allocated": 268,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1522,
"reward": 1.00816705
},
{
"step": 7,
"tokens_allocated": 761,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 761,
"reward": -0.10001645
},
{
"step": 8,
"tokens_allocated": 190,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 571,
"reward": 1.00742045
},
{
"step": 9,
"tokens_allocated": 142,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 429,
"reward": 1.0037454
},
{
"step": 10,
"tokens_allocated": 429,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001535
}
]
},
{
"total_reward": 6.881022700000001,
"accuracy": 0.7777777777777778,
"total_tokens_used": 3986,
"budget_utilization": 0.9965,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.1000352
},
{
"step": 2,
"tokens_allocated": 711,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2489,
"reward": 1.0264571
},
{
"step": 3,
"tokens_allocated": 388,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2101,
"reward": 1.0173922
},
{
"step": 4,
"tokens_allocated": 600,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1501,
"reward": -0.1000169
},
{
"step": 5,
"tokens_allocated": 125,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1376,
"reward": 1.0041215
},
{
"step": 6,
"tokens_allocated": 137,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1239,
"reward": 1.00449615
},
{
"step": 7,
"tokens_allocated": 154,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1085,
"reward": 1.0059962999999998
},
{
"step": 8,
"tokens_allocated": 271,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 814,
"reward": 1.00689105
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 14,
"reward": 1.0157205
}
]
},
{
"total_reward": 5.7549208499999995,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": -0.10001520000000001
},
{
"step": 2,
"tokens_allocated": 291,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3209,
"reward": -0.100008
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2409,
"reward": 1.00408775
},
{
"step": 4,
"tokens_allocated": 688,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1721,
"reward": -0.10001660000000001
},
{
"step": 5,
"tokens_allocated": 215,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1506,
"reward": 1.002616
},
{
"step": 6,
"tokens_allocated": 150,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1356,
"reward": 1.0042703499999999
},
{
"step": 7,
"tokens_allocated": 169,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1187,
"reward": 1.00644585
},
{
"step": 8,
"tokens_allocated": 494,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 693,
"reward": 1.0122835000000001
},
{
"step": 9,
"tokens_allocated": 693,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0252572
}
]
},
{
"total_reward": 7.91441255,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": -0.1000091
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": 1.01378355
},
{
"step": 3,
"tokens_allocated": 796,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2391,
"reward": 1.0313311
},
{
"step": 4,
"tokens_allocated": 170,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2221,
"reward": 1.00682105
},
{
"step": 5,
"tokens_allocated": 740,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1481,
"reward": 1.03155905
},
{
"step": 6,
"tokens_allocated": 148,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1333,
"reward": 1.00412035
},
{
"step": 7,
"tokens_allocated": 416,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 917,
"reward": 1.00380675
},
{
"step": 8,
"tokens_allocated": 611,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 306,
"reward": 1.02068325
},
{
"step": 9,
"tokens_allocated": 114,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 192,
"reward": 1.00232085
},
{
"step": 10,
"tokens_allocated": 192,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.1000043
}
]
},
{
"total_reward": 9.02716315,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0035173499999999
},
{
"step": 2,
"tokens_allocated": 316,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3484,
"reward": 1.01236745
},
{
"step": 3,
"tokens_allocated": 544,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2940,
"reward": 1.01325665
},
{
"step": 4,
"tokens_allocated": 315,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2625,
"reward": 1.010116
},
{
"step": 5,
"tokens_allocated": 218,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 2407,
"reward": -0.1000076
},
{
"step": 6,
"tokens_allocated": 361,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2046,
"reward": 1.0151420500000001
},
{
"step": 7,
"tokens_allocated": 639,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1407,
"reward": 1.0269110000000001
},
{
"step": 8,
"tokens_allocated": 586,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 821,
"reward": 1.0176825
},
{
"step": 9,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 308,
"reward": 1.01648535
},
{
"step": 10,
"tokens_allocated": 308,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0116924
}
]
},
{
"total_reward": 8.013730599999999,
"accuracy": 0.8888888888888888,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.008991
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2900,
"reward": -0.10003530000000001
},
{
"step": 3,
"tokens_allocated": 181,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2719,
"reward": 1.0065203
},
{
"step": 4,
"tokens_allocated": 485,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2234,
"reward": 1.0110081
},
{
"step": 5,
"tokens_allocated": 744,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1490,
"reward": 1.03275965
},
{
"step": 6,
"tokens_allocated": 149,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1341,
"reward": 1.00584645
},
{
"step": 7,
"tokens_allocated": 670,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 671,
"reward": 1.0227817
},
{
"step": 8,
"tokens_allocated": 279,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 392,
"reward": 1.00764115
},
{
"step": 9,
"tokens_allocated": 392,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.01821755
}
]
},
{
"total_reward": 6.7610722999999995,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.0113926
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2900,
"reward": 1.008966
},
{
"step": 3,
"tokens_allocated": 453,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2447,
"reward": 1.01153505
},
{
"step": 4,
"tokens_allocated": 262,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2185,
"reward": 1.00681645
},
{
"step": 5,
"tokens_allocated": 455,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1730,
"reward": 1.00793255
},
{
"step": 6,
"tokens_allocated": 692,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1038,
"reward": -0.10001495
},
{
"step": 7,
"tokens_allocated": 519,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 519,
"reward": -0.10001550000000001
},
{
"step": 8,
"tokens_allocated": 346,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 173,
"reward": 1.01251605
},
{
"step": 9,
"tokens_allocated": 64,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 109,
"reward": -0.1000018
},
{
"step": 10,
"tokens_allocated": 109,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.00194585
}
]
},
{
"total_reward": 7.855514200000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.00201635
},
{
"step": 2,
"tokens_allocated": 316,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3484,
"reward": 1.01101655
},
{
"step": 3,
"tokens_allocated": 217,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3267,
"reward": 1.0036666
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2467,
"reward": 1.00408775
},
{
"step": 5,
"tokens_allocated": 205,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2262,
"reward": 1.0036672
},
{
"step": 6,
"tokens_allocated": 226,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2036,
"reward": 1.0061428000000001
},
{
"step": 7,
"tokens_allocated": 636,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1400,
"reward": 1.02090715
},
{
"step": 8,
"tokens_allocated": 233,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1167,
"reward": 1.00404105
},
{
"step": 9,
"tokens_allocated": 437,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 730,
"reward": -0.10001605000000001
},
{
"step": 10,
"tokens_allocated": 730,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001520000000001
}
]
},
{
"total_reward": 6.87944935,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0028419
},
{
"step": 2,
"tokens_allocated": 211,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3589,
"reward": 1.0036669
},
{
"step": 3,
"tokens_allocated": 560,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3029,
"reward": -0.10001555000000001
},
{
"step": 4,
"tokens_allocated": 324,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2705,
"reward": 1.01146645
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1905,
"reward": -0.10003115
},
{
"step": 6,
"tokens_allocated": 476,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1429,
"reward": 1.0104832
},
{
"step": 7,
"tokens_allocated": 714,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 715,
"reward": 1.0296841
},
{
"step": 8,
"tokens_allocated": 119,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 596,
"reward": 1.0020204
},
{
"step": 9,
"tokens_allocated": 596,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0193331
}
]
},
{
"total_reward": 7.9015055,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.00966645
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2900,
"reward": 1.01444465
},
{
"step": 3,
"tokens_allocated": 453,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2447,
"reward": -0.10001675
},
{
"step": 4,
"tokens_allocated": 262,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2185,
"reward": 1.0083925
},
{
"step": 5,
"tokens_allocated": 182,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2003,
"reward": 1.0080963
},
{
"step": 6,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1703,
"reward": 1.00846565
},
{
"step": 7,
"tokens_allocated": 532,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1171,
"reward": 1.01310715
},
{
"step": 8,
"tokens_allocated": 780,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 391,
"reward": 1.03485925
},
{
"step": 9,
"tokens_allocated": 97,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 294,
"reward": 1.00449815
},
{
"step": 10,
"tokens_allocated": 294,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10000785000000001
}
]
},
{
"total_reward": 6.915772100000001,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.01295865
},
{
"step": 2,
"tokens_allocated": 777,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2723,
"reward": 1.03328335
},
{
"step": 3,
"tokens_allocated": 170,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2553,
"reward": 1.0071963
},
{
"step": 4,
"tokens_allocated": 273,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2280,
"reward": 1.00809175
},
{
"step": 5,
"tokens_allocated": 760,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1520,
"reward": 1.03140795
},
{
"step": 6,
"tokens_allocated": 380,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1140,
"reward": -0.1000076
},
{
"step": 7,
"tokens_allocated": 356,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 784,
"reward": -0.10000935000000001
},
{
"step": 8,
"tokens_allocated": 522,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 262,
"reward": 1.01610965
},
{
"step": 9,
"tokens_allocated": 262,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0067414
}
]
},
{
"total_reward": 6.76834685,
"accuracy": 0.7,
"total_tokens_used": 3889,
"budget_utilization": 0.97225,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": -0.10000945
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": -0.10001795000000001
},
{
"step": 3,
"tokens_allocated": 298,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2889,
"reward": 1.01041705
},
{
"step": 4,
"tokens_allocated": 309,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2580,
"reward": 1.0112420500000001
},
{
"step": 5,
"tokens_allocated": 215,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2365,
"reward": 1.00254095
},
{
"step": 6,
"tokens_allocated": 354,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2011,
"reward": 1.01236555
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1211,
"reward": -0.1000326
},
{
"step": 8,
"tokens_allocated": 504,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 707,
"reward": 1.01025665
},
{
"step": 9,
"tokens_allocated": 265,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 442,
"reward": 1.00869255
},
{
"step": 10,
"tokens_allocated": 331,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 111,
"reward": 1.01289205
}
]
},
{
"total_reward": 8.9896424,
"accuracy": 0.9,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": -0.10001745000000001
},
{
"step": 2,
"tokens_allocated": 486,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3014,
"reward": 1.00905675
},
{
"step": 3,
"tokens_allocated": 188,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2826,
"reward": 1.008096
},
{
"step": 4,
"tokens_allocated": 302,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2524,
"reward": 1.00891585
},
{
"step": 5,
"tokens_allocated": 210,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2314,
"reward": 1.0044925
},
{
"step": 6,
"tokens_allocated": 231,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2083,
"reward": 1.0063677
},
{
"step": 7,
"tokens_allocated": 260,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1823,
"reward": 1.00696665
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1023,
"reward": 1.0058139
},
{
"step": 9,
"tokens_allocated": 639,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 384,
"reward": 1.02233295
},
{
"step": 10,
"tokens_allocated": 384,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.01761755
}
]
},
{
"total_reward": 7.904428449999999,
"accuracy": 0.8,
"total_tokens_used": 3954,
"budget_utilization": 0.9885,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.00441795
},
{
"step": 2,
"tokens_allocated": 527,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3273,
"reward": 1.01273215
},
{
"step": 3,
"tokens_allocated": 204,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3069,
"reward": 1.0037422999999999
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2269,
"reward": 1.0157205
},
{
"step": 5,
"tokens_allocated": 756,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1513,
"reward": 1.029682
},
{
"step": 6,
"tokens_allocated": 226,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1287,
"reward": -0.10000740000000001
},
{
"step": 7,
"tokens_allocated": 643,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 644,
"reward": 1.0233084
},
{
"step": 8,
"tokens_allocated": 161,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 483,
"reward": 1.00509535
},
{
"step": 9,
"tokens_allocated": 301,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 182,
"reward": 1.00974145
},
{
"step": 10,
"tokens_allocated": 136,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 46,
"reward": -0.10000425
}
]
},
{
"total_reward": 6.7541784,
"accuracy": 0.7,
"total_tokens_used": 3629,
"budget_utilization": 0.90725,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": -0.1000081
},
{
"step": 2,
"tokens_allocated": 205,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3495,
"reward": 1.0047179
},
{
"step": 3,
"tokens_allocated": 218,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3277,
"reward": 1.0053927
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2477,
"reward": 1.0082155
},
{
"step": 5,
"tokens_allocated": 309,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2168,
"reward": 1.0105666
},
{
"step": 6,
"tokens_allocated": 542,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1626,
"reward": 1.01610865
},
{
"step": 7,
"tokens_allocated": 203,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1423,
"reward": 1.00404255
},
{
"step": 8,
"tokens_allocated": 237,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1186,
"reward": 1.0051666
},
{
"step": 9,
"tokens_allocated": 444,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 742,
"reward": -0.10001605000000001
},
{
"step": 10,
"tokens_allocated": 371,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 371,
"reward": -0.10000795000000001
}
]
},
{
"total_reward": 5.6360168999999996,
"accuracy": 0.6,
"total_tokens_used": 3946,
"budget_utilization": 0.9865,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0127185
},
{
"step": 2,
"tokens_allocated": 444,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2756,
"reward": -0.10001555000000001
},
{
"step": 3,
"tokens_allocated": 172,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2584,
"reward": 1.00697105
},
{
"step": 4,
"tokens_allocated": 184,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0071956
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2200,
"reward": 1.0013409
},
{
"step": 6,
"tokens_allocated": 550,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1650,
"reward": -0.10001455000000001
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 850,
"reward": -0.1000309
},
{
"step": 8,
"tokens_allocated": 566,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 284,
"reward": -0.1000169
},
{
"step": 9,
"tokens_allocated": 177,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 107,
"reward": 1.00674565
},
{
"step": 10,
"tokens_allocated": 53,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 54,
"reward": 1.0011231
}
]
},
{
"total_reward": 6.756207800000001,
"accuracy": 0.7,
"total_tokens_used": 3737,
"budget_utilization": 0.93425,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003025
},
{
"step": 2,
"tokens_allocated": 444,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2756,
"reward": 1.00950915
},
{
"step": 3,
"tokens_allocated": 258,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2498,
"reward": 1.00741705
},
{
"step": 4,
"tokens_allocated": 713,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1785,
"reward": -0.10001815
},
{
"step": 5,
"tokens_allocated": 148,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1637,
"reward": 1.0057714500000001
},
{
"step": 6,
"tokens_allocated": 163,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1474,
"reward": 1.00569565
},
{
"step": 7,
"tokens_allocated": 276,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1198,
"reward": -0.10000895
},
{
"step": 8,
"tokens_allocated": 499,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 699,
"reward": 1.01543535
},
{
"step": 9,
"tokens_allocated": 174,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 525,
"reward": 1.0059953
},
{
"step": 10,
"tokens_allocated": 262,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 263,
"reward": 1.0064412
}
]
},
{
"total_reward": 7.8895138,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.0131838
},
{
"step": 2,
"tokens_allocated": 777,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2723,
"reward": -0.10001435
},
{
"step": 3,
"tokens_allocated": 425,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2298,
"reward": 1.0105608
},
{
"step": 4,
"tokens_allocated": 410,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1888,
"reward": 1.00876035
},
{
"step": 5,
"tokens_allocated": 629,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1259,
"reward": 1.0198568000000001
},
{
"step": 6,
"tokens_allocated": 314,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 945,
"reward": 1.01266775
},
{
"step": 7,
"tokens_allocated": 177,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 768,
"reward": -0.1000043
},
{
"step": 8,
"tokens_allocated": 192,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 576,
"reward": 1.0091465
},
{
"step": 9,
"tokens_allocated": 144,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 432,
"reward": 1.0046459
},
{
"step": 10,
"tokens_allocated": 432,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.01071055
}
]
},
{
"total_reward": 9.0091763,
"accuracy": 0.9,
"total_tokens_used": 3633,
"budget_utilization": 0.90825,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": -0.1000149
},
{
"step": 2,
"tokens_allocated": 777,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2723,
"reward": 1.03568495
},
{
"step": 3,
"tokens_allocated": 255,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2468,
"reward": 1.00749225
},
{
"step": 4,
"tokens_allocated": 176,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2292,
"reward": 1.00667065
},
{
"step": 5,
"tokens_allocated": 191,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2101,
"reward": 1.0074204
},
{
"step": 6,
"tokens_allocated": 315,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1786,
"reward": 1.0099659
},
{
"step": 7,
"tokens_allocated": 223,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1563,
"reward": 1.00269065
},
{
"step": 8,
"tokens_allocated": 390,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1173,
"reward": 1.01731705
},
{
"step": 9,
"tokens_allocated": 439,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 734,
"reward": 1.0063573
},
{
"step": 10,
"tokens_allocated": 367,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 367,
"reward": 1.01559205
}
]
},
{
"total_reward": 6.79033245,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0040127
},
{
"step": 2,
"tokens_allocated": 711,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2489,
"reward": 1.0320108000000001
},
{
"step": 3,
"tokens_allocated": 388,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2101,
"reward": 1.0179926
},
{
"step": 4,
"tokens_allocated": 225,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1876,
"reward": 1.00584265
},
{
"step": 5,
"tokens_allocated": 156,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1720,
"reward": 1.0055459
},
{
"step": 6,
"tokens_allocated": 688,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1032,
"reward": -0.1000182
},
{
"step": 7,
"tokens_allocated": 193,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 839,
"reward": 1.0083209
},
{
"step": 8,
"tokens_allocated": 559,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 280,
"reward": 1.01663315
},
{
"step": 9,
"tokens_allocated": 175,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 105,
"reward": -0.10000445000000001
},
{
"step": 10,
"tokens_allocated": 105,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000360000000001
}
]
},
{
"total_reward": 6.78769955,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0115177
},
{
"step": 2,
"tokens_allocated": 266,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2934,
"reward": 1.0058406
},
{
"step": 3,
"tokens_allocated": 733,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2201,
"reward": 1.0306587999999999
},
{
"step": 4,
"tokens_allocated": 393,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1808,
"reward": 1.0185176999999999
},
{
"step": 5,
"tokens_allocated": 150,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1658,
"reward": 1.00547115
},
{
"step": 6,
"tokens_allocated": 414,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1244,
"reward": 1.0095857
},
{
"step": 7,
"tokens_allocated": 155,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1089,
"reward": 1.00614635
},
{
"step": 8,
"tokens_allocated": 181,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 908,
"reward": -0.10000395000000001
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 108,
"reward": -0.10003025
},
{
"step": 10,
"tokens_allocated": 108,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000425
}
]
},
{
"total_reward": 7.883722,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.01176785
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2900,
"reward": 1.0092662000000001
},
{
"step": 3,
"tokens_allocated": 453,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2447,
"reward": 1.00553105
},
{
"step": 4,
"tokens_allocated": 699,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1748,
"reward": 1.0303603
},
{
"step": 5,
"tokens_allocated": 145,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1603,
"reward": 1.00554645
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1203,
"reward": 1.00260675
},
{
"step": 7,
"tokens_allocated": 375,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 828,
"reward": 1.0161920500000001
},
{
"step": 8,
"tokens_allocated": 552,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 276,
"reward": -0.10001550000000001
},
{
"step": 9,
"tokens_allocated": 103,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 173,
"reward": 1.0024715
},
{
"step": 10,
"tokens_allocated": 173,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000465
}
]
},
{
"total_reward": 9.009530049999999,
"accuracy": 0.9,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.01085725
},
{
"step": 2,
"tokens_allocated": 486,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3014,
"reward": 1.01025755
},
{
"step": 3,
"tokens_allocated": 282,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2732,
"reward": 1.00711565
},
{
"step": 4,
"tokens_allocated": 487,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2245,
"reward": 1.0152108
},
{
"step": 5,
"tokens_allocated": 748,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1497,
"reward": 1.0277311
},
{
"step": 6,
"tokens_allocated": 224,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1273,
"reward": -0.10000925000000001
},
{
"step": 7,
"tokens_allocated": 636,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 637,
"reward": 1.0208321
},
{
"step": 8,
"tokens_allocated": 106,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 531,
"reward": 1.0020961000000002
},
{
"step": 9,
"tokens_allocated": 331,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 200,
"reward": 1.0137176
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.00172115
}
]
},
{
"total_reward": 5.77586915,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.01100735
},
{
"step": 2,
"tokens_allocated": 777,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2723,
"reward": -0.10001745000000001
},
{
"step": 3,
"tokens_allocated": 680,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2043,
"reward": -0.10001475
},
{
"step": 4,
"tokens_allocated": 364,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1679,
"reward": 1.0160425
},
{
"step": 5,
"tokens_allocated": 139,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1540,
"reward": 1.00464615
},
{
"step": 6,
"tokens_allocated": 231,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1309,
"reward": 1.00554215
},
{
"step": 7,
"tokens_allocated": 409,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 900,
"reward": -0.10001760000000001
},
{
"step": 8,
"tokens_allocated": 150,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 750,
"reward": 1.0044955
},
{
"step": 9,
"tokens_allocated": 750,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0341852999999999
}
]
},
{
"total_reward": 6.768739500000001,
"accuracy": 0.7,
"total_tokens_used": 3891,
"budget_utilization": 0.97275,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.0161858000000001
},
{
"step": 2,
"tokens_allocated": 777,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2723,
"reward": -0.10001435
},
{
"step": 3,
"tokens_allocated": 255,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2468,
"reward": 1.0060663
},
{
"step": 4,
"tokens_allocated": 176,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2292,
"reward": 1.0078714500000001
},
{
"step": 5,
"tokens_allocated": 286,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2006,
"reward": 1.01071785
},
{
"step": 6,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1706,
"reward": 1.0116928
},
{
"step": 7,
"tokens_allocated": 213,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1493,
"reward": 1.00539295
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 693,
"reward": 1.01084225
},
{
"step": 9,
"tokens_allocated": 259,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 434,
"reward": -0.10000740000000001
},
{
"step": 10,
"tokens_allocated": 325,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 109,
"reward": -0.10000815
}
]
},
{
"total_reward": 5.766700849999999,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.00966645
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": 1.01543465
},
{
"step": 3,
"tokens_allocated": 199,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2988,
"reward": 1.00914615
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2188,
"reward": 1.00814045
},
{
"step": 5,
"tokens_allocated": 729,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1459,
"reward": -0.10001535
},
{
"step": 6,
"tokens_allocated": 583,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 876,
"reward": 1.0188084
},
{
"step": 7,
"tokens_allocated": 438,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 438,
"reward": 1.0055318
},
{
"step": 8,
"tokens_allocated": 292,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 146,
"reward": -0.10000705
},
{
"step": 9,
"tokens_allocated": 146,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000465
}
]
},
{
"total_reward": 6.877045400000001,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0023916
},
{
"step": 2,
"tokens_allocated": 316,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3484,
"reward": 1.01041615
},
{
"step": 3,
"tokens_allocated": 544,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2940,
"reward": 1.01520795
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2140,
"reward": -0.10003525
},
{
"step": 5,
"tokens_allocated": 713,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1427,
"reward": 1.0300594
},
{
"step": 6,
"tokens_allocated": 570,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 857,
"reward": -0.1000143
},
{
"step": 7,
"tokens_allocated": 160,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 697,
"reward": 1.00532055
},
{
"step": 8,
"tokens_allocated": 290,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 407,
"reward": 1.008241
},
{
"step": 9,
"tokens_allocated": 407,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0054583
}
]
},
{
"total_reward": 5.796034349999999,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.01369415
},
{
"step": 2,
"tokens_allocated": 711,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2489,
"reward": 1.02653215
},
{
"step": 3,
"tokens_allocated": 388,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2101,
"reward": 1.018593
},
{
"step": 4,
"tokens_allocated": 375,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1726,
"reward": -0.10000715
},
{
"step": 5,
"tokens_allocated": 215,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1511,
"reward": -0.10000890000000001
},
{
"step": 6,
"tokens_allocated": 604,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 907,
"reward": 1.02406085
},
{
"step": 7,
"tokens_allocated": 283,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 624,
"reward": -0.10000735000000001
},
{
"step": 8,
"tokens_allocated": 156,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 468,
"reward": 1.0064465
},
{
"step": 9,
"tokens_allocated": 468,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0067310999999999
}
]
},
{
"total_reward": 7.86692335,
"accuracy": 0.8,
"total_tokens_used": 3787,
"budget_utilization": 0.94675,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.00981655
},
{
"step": 2,
"tokens_allocated": 308,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3392,
"reward": 1.01116705
},
{
"step": 3,
"tokens_allocated": 212,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3180,
"reward": 1.00231595
},
{
"step": 4,
"tokens_allocated": 227,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2953,
"reward": 1.0041164
},
{
"step": 5,
"tokens_allocated": 615,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2338,
"reward": -0.10001755000000001
},
{
"step": 6,
"tokens_allocated": 350,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1988,
"reward": 1.0128911
},
{
"step": 7,
"tokens_allocated": 621,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1367,
"reward": 1.01978215
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 567,
"reward": -0.1000352
},
{
"step": 9,
"tokens_allocated": 141,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 426,
"reward": 1.0035203
},
{
"step": 10,
"tokens_allocated": 213,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 213,
"reward": 1.0033665999999999
}
]
},
{
"total_reward": 7.8743482999999985,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.00966645
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": 1.0118322499999999
},
{
"step": 3,
"tokens_allocated": 199,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2988,
"reward": 1.0093713
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2188,
"reward": 1.0092662000000001
},
{
"step": 5,
"tokens_allocated": 182,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2006,
"reward": 1.00772105
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1206,
"reward": -0.1000293
},
{
"step": 7,
"tokens_allocated": 150,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1056,
"reward": 1.0056962999999999
},
{
"step": 8,
"tokens_allocated": 176,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 880,
"reward": 1.00727105
},
{
"step": 9,
"tokens_allocated": 330,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 550,
"reward": 1.0135675499999999
},
{
"step": 10,
"tokens_allocated": 550,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10001455000000001
}
]
},
{
"total_reward": 8.9898032,
"accuracy": 0.9,
"total_tokens_used": 3821,
"budget_utilization": 0.95525,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.0098916
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2900,
"reward": 1.01444465
},
{
"step": 3,
"tokens_allocated": 181,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2719,
"reward": 1.0065203
},
{
"step": 4,
"tokens_allocated": 194,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2525,
"reward": 1.0080957000000001
},
{
"step": 5,
"tokens_allocated": 315,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2210,
"reward": 1.01019105
},
{
"step": 6,
"tokens_allocated": 552,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1658,
"reward": -0.1000183
},
{
"step": 7,
"tokens_allocated": 518,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1140,
"reward": 1.01205715
},
{
"step": 8,
"tokens_allocated": 190,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 950,
"reward": 1.0086963
},
{
"step": 9,
"tokens_allocated": 237,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 713,
"reward": 1.0044161
},
{
"step": 10,
"tokens_allocated": 534,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 179,
"reward": 1.01550865
}
]
},
{
"total_reward": 5.638869999999999,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003300000000001
},
{
"step": 2,
"tokens_allocated": 444,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2756,
"reward": 1.0086836
},
{
"step": 3,
"tokens_allocated": 172,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2584,
"reward": 1.0066708500000001
},
{
"step": 4,
"tokens_allocated": 738,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1846,
"reward": -0.10001535
},
{
"step": 5,
"tokens_allocated": 230,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1616,
"reward": 1.003741
},
{
"step": 6,
"tokens_allocated": 404,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1212,
"reward": -0.1000149
},
{
"step": 7,
"tokens_allocated": 378,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 834,
"reward": -0.10000875000000001
},
{
"step": 8,
"tokens_allocated": 208,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 626,
"reward": 1.0017908
},
{
"step": 9,
"tokens_allocated": 156,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 470,
"reward": 1.0053958
},
{
"step": 10,
"tokens_allocated": 470,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.01265995
}
]
},
{
"total_reward": 7.9015086000000005,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.01744665
},
{
"step": 2,
"tokens_allocated": 177,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3023,
"reward": 1.0066706
},
{
"step": 3,
"tokens_allocated": 472,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2551,
"reward": -0.10001695000000001
},
{
"step": 4,
"tokens_allocated": 273,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2278,
"reward": 1.009968
},
{
"step": 5,
"tokens_allocated": 189,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2089,
"reward": 1.00839615
},
{
"step": 6,
"tokens_allocated": 313,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1776,
"reward": 1.0110167
},
{
"step": 7,
"tokens_allocated": 333,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1443,
"reward": -0.10000745000000001
},
{
"step": 8,
"tokens_allocated": 360,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1083,
"reward": 1.01416645
},
{
"step": 9,
"tokens_allocated": 406,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 677,
"reward": 1.0041825
},
{
"step": 10,
"tokens_allocated": 677,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.02968595
}
]
},
{
"total_reward": 3.5552238000000003,
"accuracy": 0.4444444444444444,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": -0.10000945
},
{
"step": 2,
"tokens_allocated": 527,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3273,
"reward": -0.1000143
},
{
"step": 3,
"tokens_allocated": 306,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2967,
"reward": 1.0116174500000001
},
{
"step": 4,
"tokens_allocated": 529,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2438,
"reward": -0.10001775
},
{
"step": 5,
"tokens_allocated": 507,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1931,
"reward": -0.10001455000000001
},
{
"step": 6,
"tokens_allocated": 289,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1642,
"reward": 1.0108678
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 842,
"reward": -0.1000352
},
{
"step": 8,
"tokens_allocated": 140,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 702,
"reward": 1.0034453
},
{
"step": 9,
"tokens_allocated": 702,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.0293845000000001
}
]
},
{
"total_reward": 4.5351288,
"accuracy": 0.5,
"total_tokens_used": 3929,
"budget_utilization": 0.98225,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003350000000001
},
{
"step": 2,
"tokens_allocated": 444,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2756,
"reward": -0.10001520000000001
},
{
"step": 3,
"tokens_allocated": 430,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2326,
"reward": 1.00590745
},
{
"step": 4,
"tokens_allocated": 664,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1662,
"reward": 1.02180635
},
{
"step": 5,
"tokens_allocated": 138,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1524,
"reward": 1.0041959
},
{
"step": 6,
"tokens_allocated": 381,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1143,
"reward": -0.1000071
},
{
"step": 7,
"tokens_allocated": 571,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 572,
"reward": -0.1000143
},
{
"step": 8,
"tokens_allocated": 381,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 191,
"reward": -0.100007
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 141,
"reward": 1.00082305
},
{
"step": 10,
"tokens_allocated": 70,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 71,
"reward": 1.00247315
}
]
},
{
"total_reward": 5.65206235,
"accuracy": 0.6,
"total_tokens_used": 3968,
"budget_utilization": 0.992,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0037425
},
{
"step": 2,
"tokens_allocated": 527,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3273,
"reward": -0.10001825
},
{
"step": 3,
"tokens_allocated": 511,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2762,
"reward": -0.10001555000000001
},
{
"step": 4,
"tokens_allocated": 197,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2565,
"reward": 1.00869595
},
{
"step": 5,
"tokens_allocated": 320,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2245,
"reward": 1.01296765
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1445,
"reward": 1.0056638
},
{
"step": 7,
"tokens_allocated": 270,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1175,
"reward": 1.00741645
},
{
"step": 8,
"tokens_allocated": 293,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 882,
"reward": -0.10000745000000001
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 82,
"reward": 1.0136191
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 32,
"reward": -0.10000185
}
]
},
{
"total_reward": 7.8643071,
"accuracy": 0.8,
"total_tokens_used": 3629,
"budget_utilization": 0.90725,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": -0.10000945
},
{
"step": 2,
"tokens_allocated": 211,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3589,
"reward": 1.0024661
},
{
"step": 3,
"tokens_allocated": 224,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3365,
"reward": 1.0055425
},
{
"step": 4,
"tokens_allocated": 240,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3125,
"reward": 1.0040407
},
{
"step": 5,
"tokens_allocated": 390,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2735,
"reward": 1.01611625
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1935,
"reward": 1.0094163
},
{
"step": 7,
"tokens_allocated": 241,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1694,
"reward": 1.0063672
},
{
"step": 8,
"tokens_allocated": 705,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 989,
"reward": -0.10001520000000001
},
{
"step": 9,
"tokens_allocated": 247,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 742,
"reward": 1.0063669
},
{
"step": 10,
"tokens_allocated": 371,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 371,
"reward": 1.0140158
}
]
},
{
"total_reward": 7.8762429,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0035173499999999
},
{
"step": 2,
"tokens_allocated": 211,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3589,
"reward": 1.0047176
},
{
"step": 3,
"tokens_allocated": 224,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3365,
"reward": 1.0040415
},
{
"step": 4,
"tokens_allocated": 360,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3005,
"reward": 1.01446665
},
{
"step": 5,
"tokens_allocated": 250,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2755,
"reward": 1.00561625
},
{
"step": 6,
"tokens_allocated": 275,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2480,
"reward": 1.007116
},
{
"step": 7,
"tokens_allocated": 775,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1705,
"reward": -0.10001470000000001
},
{
"step": 8,
"tokens_allocated": 710,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 995,
"reward": -0.10001755000000001
},
{
"step": 9,
"tokens_allocated": 373,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 622,
"reward": 1.01529155
},
{
"step": 10,
"tokens_allocated": 622,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.02150825
}
]
},
{
"total_reward": 4.66363005,
"accuracy": 0.5555555555555556,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.011993
},
{
"step": 2,
"tokens_allocated": 308,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3392,
"reward": 1.01131715
},
{
"step": 3,
"tokens_allocated": 530,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2862,
"reward": -0.10001520000000001
},
{
"step": 4,
"tokens_allocated": 511,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2351,
"reward": 1.01663555
},
{
"step": 5,
"tokens_allocated": 195,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2156,
"reward": 1.00824575
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1356,
"reward": 1.01549535
},
{
"step": 7,
"tokens_allocated": 423,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 933,
"reward": -0.10001605000000001
},
{
"step": 8,
"tokens_allocated": 233,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 700,
"reward": -0.10000730000000001
},
{
"step": 9,
"tokens_allocated": 700,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000182
}
]
},
{
"total_reward": 7.87246095,
"accuracy": 0.8,
"total_tokens_used": 3971,
"budget_utilization": 0.99275,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003655
},
{
"step": 2,
"tokens_allocated": 711,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2489,
"reward": 1.0257066000000001
},
{
"step": 3,
"tokens_allocated": 233,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2256,
"reward": 1.0041161
},
{
"step": 4,
"tokens_allocated": 161,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2095,
"reward": 1.0066714
},
{
"step": 5,
"tokens_allocated": 174,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1921,
"reward": 1.00607035
},
{
"step": 6,
"tokens_allocated": 480,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1441,
"reward": -0.10001635
},
{
"step": 7,
"tokens_allocated": 270,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1171,
"reward": 1.0086172500000001
},
{
"step": 8,
"tokens_allocated": 292,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 879,
"reward": 1.0095918
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 79,
"reward": 1.0106171
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 29,
"reward": 1.00112325
}
]
},
{
"total_reward": 7.8952819000000005,
"accuracy": 0.8,
"total_tokens_used": 3985,
"budget_utilization": 0.99625,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0094163
},
{
"step": 2,
"tokens_allocated": 711,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2489,
"reward": 1.0279581
},
{
"step": 3,
"tokens_allocated": 388,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2101,
"reward": 1.01626645
},
{
"step": 4,
"tokens_allocated": 375,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1726,
"reward": 1.0170176
},
{
"step": 5,
"tokens_allocated": 359,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1367,
"reward": -0.10000890000000001
},
{
"step": 6,
"tokens_allocated": 341,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1026,
"reward": 1.01184085
},
{
"step": 7,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 513,
"reward": 1.01168215
},
{
"step": 8,
"tokens_allocated": 342,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 171,
"reward": -0.1000086
},
{
"step": 9,
"tokens_allocated": 106,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 65,
"reward": 1.0008953
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 15,
"reward": 1.00022265
}
]
},
{
"total_reward": 2.3343037,
"accuracy": 0.3,
"total_tokens_used": 3982,
"budget_utilization": 0.9955,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": -0.10000945
},
{
"step": 2,
"tokens_allocated": 527,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3273,
"reward": -0.10001470000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2473,
"reward": 1.0098666
},
{
"step": 4,
"tokens_allocated": 441,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2032,
"reward": -0.10001520000000001
},
{
"step": 5,
"tokens_allocated": 169,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1863,
"reward": 1.00599555
},
{
"step": 6,
"tokens_allocated": 745,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1118,
"reward": -0.10001495
},
{
"step": 7,
"tokens_allocated": 559,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 559,
"reward": 1.0185094
},
{
"step": 8,
"tokens_allocated": 372,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 187,
"reward": -0.10000730000000001
},
{
"step": 9,
"tokens_allocated": 116,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 71,
"reward": -0.10000435
},
{
"step": 10,
"tokens_allocated": 53,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 18,
"reward": -0.1000019
}
]
},
{
"total_reward": 3.5414481000000007,
"accuracy": 0.4444444444444444,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 500,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3500,
"reward": 1.0147598500000001
},
{
"step": 2,
"tokens_allocated": 486,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3014,
"reward": -0.10001470000000001
},
{
"step": 3,
"tokens_allocated": 753,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2261,
"reward": -0.1000143
},
{
"step": 4,
"tokens_allocated": 161,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2100,
"reward": 1.00584585
},
{
"step": 5,
"tokens_allocated": 700,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1400,
"reward": -0.10001405000000001
},
{
"step": 6,
"tokens_allocated": 140,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1260,
"reward": 1.0040457
},
{
"step": 7,
"tokens_allocated": 393,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 867,
"reward": 1.0168666
},
{
"step": 8,
"tokens_allocated": 578,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 289,
"reward": -0.10001835
},
{
"step": 9,
"tokens_allocated": 289,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000085
}
]
},
{
"total_reward": 5.6448685,
"accuracy": 0.6,
"total_tokens_used": 3887,
"budget_utilization": 0.97175,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3800,
"reward": 1.0038926
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3000,
"reward": 1.0136191
},
{
"step": 3,
"tokens_allocated": 468,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2532,
"reward": 1.0086824
},
{
"step": 4,
"tokens_allocated": 452,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2080,
"reward": -0.1000168
},
{
"step": 5,
"tokens_allocated": 433,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1647,
"reward": 1.01018515
},
{
"step": 6,
"tokens_allocated": 411,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1236,
"reward": -0.10001675
},
{
"step": 7,
"tokens_allocated": 154,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1082,
"reward": 1.0056961
},
{
"step": 8,
"tokens_allocated": 721,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 361,
"reward": -0.10001575
},
{
"step": 9,
"tokens_allocated": 135,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 226,
"reward": -0.1000038
},
{
"step": 10,
"tokens_allocated": 113,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 113,
"reward": 1.00284625
}
]
},
{
"total_reward": 7.8907995,
"accuracy": 0.8,
"total_tokens_used": 3512,
"budget_utilization": 0.878,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.0110924
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": 1.0122825500000001
},
{
"step": 3,
"tokens_allocated": 298,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2889,
"reward": 1.00861585
},
{
"step": 4,
"tokens_allocated": 309,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2580,
"reward": -0.10000890000000001
},
{
"step": 5,
"tokens_allocated": 322,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2258,
"reward": 1.0119919
},
{
"step": 6,
"tokens_allocated": 338,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1920,
"reward": -0.10000895
},
{
"step": 7,
"tokens_allocated": 360,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1560,
"reward": 1.01416645
},
{
"step": 8,
"tokens_allocated": 260,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1300,
"reward": 1.00666645
},
{
"step": 9,
"tokens_allocated": 325,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 975,
"reward": 1.0116915499999999
},
{
"step": 10,
"tokens_allocated": 487,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 488,
"reward": 1.0143102
}
]
},
{
"total_reward": 7.8885411,
"accuracy": 0.8,
"total_tokens_used": 3896,
"budget_utilization": 0.974,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 300,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": 1.01191795
},
{
"step": 2,
"tokens_allocated": 513,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3187,
"reward": 1.01468415
},
{
"step": 3,
"tokens_allocated": 298,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2889,
"reward": 1.01146775
},
{
"step": 4,
"tokens_allocated": 206,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2683,
"reward": 1.00441765
},
{
"step": 5,
"tokens_allocated": 558,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2125,
"reward": 1.0170835
},
{
"step": 6,
"tokens_allocated": 531,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1594,
"reward": 1.01453315
},
{
"step": 7,
"tokens_allocated": 797,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 797,
"reward": -0.10001710000000001
},
{
"step": 8,
"tokens_allocated": 132,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 665,
"reward": 1.00457145
},
{
"step": 9,
"tokens_allocated": 249,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 416,
"reward": -0.10000840000000001
},
{
"step": 10,
"tokens_allocated": 312,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 104,
"reward": 1.0098909999999999
}
]
}
]
},
"bandit": {
"agent": "bandit",
"n_episodes": 50,
"mean_reward": 6.525903147,
"std_reward": 1.6575015872803898,
"mean_accuracy": 0.743531746031746,
"std_accuracy": 0.1592141678198517,
"mean_budget_utilization": 0.9884999999999999,
"episodes": [
{
"total_reward": 5.631424399999999,
"accuracy": 0.6,
"total_tokens_used": 3550,
"budget_utilization": 0.8875,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3950,
"reward": -0.1000023
},
{
"step": 2,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3850,
"reward": 1.00127085
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3750,
"reward": 1.0022465
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3650,
"reward": -0.10000375
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3450,
"reward": 1.0032922
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3250,
"reward": -0.10000765
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2850,
"reward": -0.10001795000000001
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2050,
"reward": 1.0059639999999999
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1250,
"reward": 1.0053636
},
{
"step": 10,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 450,
"reward": 1.0133189
}
]
},
{
"total_reward": 3.4211402,
"accuracy": 0.4,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003635000000001
},
{
"step": 2,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3000,
"reward": 1.00321715
},
{
"step": 3,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.1000072
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001715
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2200,
"reward": -0.10000925000000001
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1400,
"reward": 1.0137692
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 600,
"reward": -0.10002935
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": -0.1000047
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 300,
"reward": 1.00306705
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.0011908
}
]
},
{
"total_reward": 6.7648755000000005,
"accuracy": 0.7,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3900,
"reward": 1.00082055
},
{
"step": 2,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3800,
"reward": -0.10000375
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3000,
"reward": 1.01429455
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2200,
"reward": 1.00919115
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1400,
"reward": 1.0152702
},
{
"step": 6,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1350,
"reward": -0.10000205000000001
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 550,
"reward": 1.01759675
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 350,
"reward": 1.00366745
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": -0.10000205000000001
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.0040427
}
]
},
{
"total_reward": 5.639730100000001,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3900,
"reward": -0.10000450000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3100,
"reward": 1.00633925
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2300,
"reward": 1.01294365
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1500,
"reward": 1.01114245
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 700,
"reward": 1.00408775
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": 1.0022415
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 300,
"reward": 1.0029919999999999
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": -0.1000076
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 50,
"reward": -0.10000205000000001
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000235
}
]
},
{
"total_reward": 4.851217999999999,
"accuracy": 0.7142857142857143,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 7,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.01624585
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.013469
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.10003445000000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.0176718
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": -0.1000168
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.00306705
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00081555
}
]
},
{
"total_reward": 4.7170817,
"accuracy": 0.625,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.1000352
},
{
"step": 2,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3000,
"reward": 1.0026918
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2200,
"reward": -0.10002935
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1400,
"reward": -0.10003525
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.0040427
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1000,
"reward": 1.00246665
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0037425
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00423785
}
]
},
{
"total_reward": 4.630049850000001,
"accuracy": 0.5555555555555556,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3900,
"reward": -0.10000445000000001
},
{
"step": 2,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3700,
"reward": -0.100008
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.10000395000000001
},
{
"step": 4,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3400,
"reward": -0.10000890000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2600,
"reward": 1.0052135
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1800,
"reward": 1.00423785
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1000,
"reward": 1.0118179
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 200,
"reward": 1.00498835
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00381755
}
]
},
{
"total_reward": 5.6293051,
"accuracy": 0.6,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3900,
"reward": -0.10000360000000001
},
{
"step": 2,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3850,
"reward": -0.1000022
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3050,
"reward": -0.10003145000000001
},
{
"step": 4,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2850,
"reward": 1.00291695
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2050,
"reward": 1.00754005
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1850,
"reward": 1.00186625
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1050,
"reward": 1.0035624
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 250,
"reward": 1.0092662000000001
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 50,
"reward": 1.0041928
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.1000023
}
]
},
{
"total_reward": 9.055337000000002,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3900,
"reward": 1.00097065
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3100,
"reward": 1.01129255
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2300,
"reward": 1.01279355
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1500,
"reward": 1.0150450500000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 700,
"reward": 1.00363745
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": 1.00306705
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": 1.0035924
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": 1.0038926
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0010457000000001
}
]
},
{
"total_reward": 4.53231765,
"accuracy": 0.5,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00725985
},
{
"step": 2,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3400,
"reward": -0.10000780000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2600,
"reward": 1.0098666
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1800,
"reward": 1.00693965
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1000,
"reward": -0.10002910000000001
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": -0.10000875000000001
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 600,
"reward": -0.100009
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 500,
"reward": -0.10000435
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 300,
"reward": 1.00381755
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 100,
"reward": 1.004493
}
]
},
{
"total_reward": 3.7393624000000005,
"accuracy": 0.5714285714285714,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 7,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00365745
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.008966
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.0155704
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": -0.10003350000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0112175
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": -0.1000072
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000825
}
]
},
{
"total_reward": 6.93710895,
"accuracy": 0.875,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0080854000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0166211
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2700,
"reward": 1.0015710500000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1900,
"reward": 1.00408775
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1100,
"reward": 1.00318715
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1000,
"reward": 1.00202135
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 200,
"reward": -0.1000309
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00156605
}
]
},
{
"total_reward": 10.05136435,
"accuracy": 1.0,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0023816
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00680955
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0169213
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0041828
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00200635
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.00979155
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0061341000000001
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 300,
"reward": 1.0005954
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": 1.0016461
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": 1.0008956
}
]
},
{
"total_reward": 6.865171,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0050834
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00635925
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0133189
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0085357
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 1800,
"reward": -0.1000087
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1000,
"reward": 1.0178969500000001
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 200,
"reward": 1.0122682
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": -0.10000385
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00172115
}
]
},
{
"total_reward": 8.0473817,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.00545865
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00500835
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.0077852
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00680955
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00440795
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.00558875
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0053636
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00695965
}
]
},
{
"total_reward": 6.73081605,
"accuracy": 0.7,
"total_tokens_used": 3800,
"budget_utilization": 0.95,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00365745
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.10003445000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": -0.10001660000000001
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0082355
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00633925
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1100,
"reward": 1.00052035
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 700,
"reward": 1.0067345
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 500,
"reward": 1.00231655
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10000355000000001
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.00306705
}
]
},
{
"total_reward": 5.753239799999999,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": -0.10001745000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00260675
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.008986
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0145197
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1900,
"reward": 1.0019463
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1100,
"reward": 1.0115177
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 300,
"reward": 1.01369415
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": -0.10000880000000001
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10000455000000001
}
]
},
{
"total_reward": 8.051434400000002,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0028318999999999
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0160206999999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.00440795
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": 1.00425785
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.002982
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": 1.00350735
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0050834
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.01234325
}
]
},
{
"total_reward": 6.8307278,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": -0.1000162
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": -0.10001660000000001
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3100,
"reward": 1.0016461
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2700,
"reward": 1.0062842
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1900,
"reward": 1.00919115
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1100,
"reward": 1.00528855
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 300,
"reward": 1.0055137
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.00052035
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00231655
}
]
},
{
"total_reward": 6.8515897,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.007485
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": -0.10001555000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00590895
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2400,
"reward": 1.0028318999999999
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2200,
"reward": 1.0013409
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1400,
"reward": 1.01729655
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1000,
"reward": 1.00876085
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": 1.00799035
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10000925000000001
}
]
},
{
"total_reward": 6.731551549999999,
"accuracy": 0.7,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0025317
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0100167
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0091161
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10001815
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.00217145
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 700,
"reward": 1.0055137
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": -0.10000895
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": -0.1000075
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.0004453
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0017912
}
]
},
{
"total_reward": 3.7282562499999994,
"accuracy": 0.5714285714285714,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 7,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": 1.0056838
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10001435
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.013469
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": -0.1000376
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": 1.0026818
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": -0.10003575
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00650935
}
]
},
{
"total_reward": 8.9628671,
"accuracy": 0.9,
"total_tokens_used": 3600,
"budget_utilization": 0.9,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 3600,
"reward": -0.1000149
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00740995
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0079353
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.01744665
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.01729655
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 800,
"reward": 1.008986
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 700,
"reward": 1.00187125
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 600,
"reward": 1.0007455
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": 1.0007455
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 400,
"reward": 1.0004453
}
]
},
{
"total_reward": 5.748363149999999,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0079353
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0086858
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0165460499999999
},
{
"step": 4,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2200,
"reward": 1.00396765
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1400,
"reward": 1.00994165
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1000,
"reward": -0.1000182
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 900,
"reward": 1.0013459
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": -0.10003655
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10000445000000001
}
]
},
{
"total_reward": 5.7280397999999995,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.1000153
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.0033572499999999
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": 1.0056838
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.01189295
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1900,
"reward": 1.00172115
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1100,
"reward": -0.10003345000000001
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1000,
"reward": 1.00202135
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.0034423
},
{
"step": 9,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10003025
}
]
},
{
"total_reward": 5.75136635,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00650935
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00440795
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.01204305
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2300,
"reward": -0.10000395000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.0082155
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 700,
"reward": 1.0166211
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 600,
"reward": -0.10000355000000001
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": -0.10001550000000001
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0035924
}
]
},
{
"total_reward": 10.058188900000001,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00303705
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0112175
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0178969500000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.013469
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 700,
"reward": 1.00082055
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 600,
"reward": 1.0017962
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 400,
"reward": 1.0026918
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 300,
"reward": 1.0016461
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": 1.0038926
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00172115
}
]
},
{
"total_reward": 5.7356833,
"accuracy": 0.6666666666666666,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.00663945
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2800,
"reward": -0.10001745000000001
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": -0.10001475
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00708975
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.01309375
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 600,
"reward": 1.00321715
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.00366745
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.00201635
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.10000840000000001
}
]
},
{
"total_reward": 5.637050099999999,
"accuracy": 0.6,
"total_tokens_used": 3600,
"budget_utilization": 0.9,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.013469
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0056638
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2300,
"reward": -0.10000400000000001
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.0073899499999999
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1100,
"reward": 1.00485825
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1000,
"reward": 1.00127085
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 800,
"reward": 1.00441795
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 700,
"reward": -0.1000047
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 500,
"reward": -0.10000740000000001
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": -0.10000360000000001
}
]
},
{
"total_reward": 6.84070835,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0043329
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0049133
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.0125684
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": -0.10001855000000001
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.10001535
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0050834
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 600,
"reward": 1.0040427
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": 1.0056838
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00411775
}
]
},
{
"total_reward": 8.92735145,
"accuracy": 0.9,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00408775
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2400,
"reward": 1.00468815
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00558875
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.1000169
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 800,
"reward": 1.0065844
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 600,
"reward": 1.0020914
},
{
"step": 7,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 550,
"reward": 1.0005979
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 350,
"reward": 1.001491
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 300,
"reward": 1.00082305
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.00141595
}
]
},
{
"total_reward": 6.8424329,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": -0.10001660000000001
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.00320715
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.0056638
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.01039195
},
{
"step": 5,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1400,
"reward": -0.10000890000000001
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1000,
"reward": 1.00876085
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 200,
"reward": 1.0113676
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 100,
"reward": 1.0022465
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00082055
}
]
},
{
"total_reward": 6.84093365,
"accuracy": 0.7777777777777778,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 3950,
"reward": -0.1000019
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3150,
"reward": 1.0047632
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2750,
"reward": 1.0017812
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1950,
"reward": 1.0146698
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1150,
"reward": -0.10003435000000001
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1050,
"reward": 1.001496
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 250,
"reward": 1.0160206999999999
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 50,
"reward": 1.00111575
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00112325
}
]
},
{
"total_reward": 6.753542500000001,
"accuracy": 0.7,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.013469
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00318715
},
{
"step": 3,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2300,
"reward": 1.0019463
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1500,
"reward": 1.0092662000000001
},
{
"step": 5,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1450,
"reward": 1.0005979
},
{
"step": 6,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1400,
"reward": -0.1000023
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1000,
"reward": 1.0077852
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.01729655
},
{
"step": 9,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 150,
"reward": -0.10000175
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": -0.10000175
}
]
},
{
"total_reward": 7.84911345,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3200,
"reward": 1.00453805
},
{
"step": 2,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3000,
"reward": -0.10000735000000001
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2200,
"reward": 1.0112175
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1400,
"reward": 1.0173716
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1300,
"reward": 1.0013459
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 500,
"reward": 1.0094163
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": -0.10000705
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.0019463
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.00142095
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00187125
}
]
},
{
"total_reward": 4.53276095,
"accuracy": 0.5,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": -0.10003300000000001
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": 1.00889095
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1600,
"reward": 1.0103169
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1200,
"reward": -0.10001535
},
{
"step": 5,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1150,
"reward": -0.10000200000000001
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 350,
"reward": 1.01114245
},
{
"step": 7,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": -0.10000185
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.00127085
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.0011958
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000038
}
]
},
{
"total_reward": 7.84588485,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0073349
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.008966
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.0070147
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1600,
"reward": 1.0043329
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.00172115
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1100,
"reward": 1.0070347
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 700,
"reward": 1.00575885
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 600,
"reward": -0.1000037
},
{
"step": 9,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.0037325
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": -0.10000715
}
]
},
{
"total_reward": 8.05361085,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.01489495
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0073899499999999
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": 1.0041628
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.01729655
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 300,
"reward": 1.00127085
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": 1.00111575
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.0004453
}
]
},
{
"total_reward": 8.9502416,
"accuracy": 0.9,
"total_tokens_used": 3900,
"budget_utilization": 0.975,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0041828
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0122682
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.01639595
},
{
"step": 4,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 1600,
"reward": 1.00200635
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.0013459
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 700,
"reward": 1.00889095
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 500,
"reward": 1.0020914
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 300,
"reward": -0.100007
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 200,
"reward": 1.0019463
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.00112075
}
]
},
{
"total_reward": 6.7311904500000015,
"accuracy": 0.7,
"total_tokens_used": 3850,
"budget_utilization": 0.9625,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.00558875
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.10002935
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0169213
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.00142095
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1100,
"reward": 1.0020814
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 700,
"reward": 1.00200635
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 600,
"reward": 1.0017962
},
{
"step": 8,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 550,
"reward": -0.10000205000000001
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 350,
"reward": -0.10000905
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 150,
"reward": 1.00141595
}
]
},
{
"total_reward": 7.8409295000000006,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2800,
"reward": 1.00633925
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.00558875
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 1900,
"reward": -0.10000440000000001
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1100,
"reward": 1.00678955
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1000,
"reward": -0.10000350000000001
},
{
"step": 7,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 950,
"reward": 1.00112325
},
{
"step": 8,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 150,
"reward": 1.0122682
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 50,
"reward": 1.00112075
},
{
"step": 10,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00067295
}
]
},
{
"total_reward": 7.84731065,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 50,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3950,
"reward": 1.00067295
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3150,
"reward": 1.01399435
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2350,
"reward": 1.00979155
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 1550,
"reward": 1.01474485
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1450,
"reward": 1.00052035
},
{
"step": 6,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "gsm8k",
"remaining_budget": 1400,
"reward": -0.1000019
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 600,
"reward": -0.1000366
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.0011908
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.00231655
},
{
"step": 10,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": 1.00411775
}
]
},
{
"total_reward": 4.744839,
"accuracy": 0.625,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0043329
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.0124183
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2000,
"reward": 1.01129255
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 1200,
"reward": -0.10003400000000001
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1100,
"reward": 1.00112075
},
{
"step": 6,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 900,
"reward": -0.10000730000000001
},
{
"step": 7,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": 1.0157205
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": -0.1000047
}
]
},
{
"total_reward": 8.047381699999999,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3600,
"reward": 1.0056838
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3200,
"reward": 1.0023816
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2800,
"reward": 1.004483
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 2000,
"reward": 1.01414445
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1200,
"reward": 1.00423785
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 400,
"reward": 1.01039195
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.00336725
},
{
"step": 8,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.0026918
}
]
},
{
"total_reward": 4.6349301,
"accuracy": 0.5555555555555556,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 3950,
"reward": -0.10000185
},
{
"step": 2,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 3550,
"reward": 1.0046331
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2750,
"reward": -0.10003665
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1950,
"reward": 1.0038626
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1150,
"reward": 1.01399435
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 350,
"reward": 1.01204305
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 150,
"reward": -0.10000780000000001
},
{
"step": 8,
"tokens_allocated": 50,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": -0.10000200000000001
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0004453
}
]
},
{
"total_reward": 7.839810100000001,
"accuracy": 0.8,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 10,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3600,
"reward": 1.0070347
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": -0.1000366
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.00305705
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.0122682
},
{
"step": 5,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1500,
"reward": 1.00082055
},
{
"step": 6,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1100,
"reward": -0.10001495
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 700,
"reward": 1.0065844
},
{
"step": 8,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 300,
"reward": 1.0047831999999999
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 100,
"reward": 1.0038926
},
{
"step": 10,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 0,
"reward": 1.00142095
}
]
},
{
"total_reward": 3.5236087000000005,
"accuracy": 0.4444444444444444,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 3200,
"reward": 1.0050634
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l3",
"remaining_budget": 2400,
"reward": -0.1000366
},
{
"step": 3,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 2000,
"reward": -0.1000143
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1900,
"reward": 1.00127085
},
{
"step": 5,
"tokens_allocated": 400,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 1500,
"reward": -0.10001405000000001
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 700,
"reward": 1.00964145
},
{
"step": 7,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": 1.00771015
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 200,
"reward": -0.1000037
},
{
"step": 9,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 0,
"reward": -0.1000085
}
]
},
{
"total_reward": 5.0585641500000005,
"accuracy": 1.0,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 5,
"per_step": [
{
"step": 1,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 3200,
"reward": 1.01294365
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l4_l5",
"remaining_budget": 2400,
"reward": 1.0136191
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1600,
"reward": 1.00318715
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 800,
"reward": 1.0161708
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 0,
"reward": 1.0126434500000001
}
]
},
{
"total_reward": 7.9392846,
"accuracy": 0.8888888888888888,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 9,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.0047831999999999
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0170713999999998
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": 1.0052135
},
{
"step": 4,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 1200,
"reward": -0.1000317
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 400,
"reward": 1.0056638
},
{
"step": 6,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 300,
"reward": 1.0016461
},
{
"step": 7,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 200,
"reward": 1.0017962
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 100,
"reward": 1.00187125
},
{
"step": 9,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00127085
}
]
},
{
"total_reward": 5.838685699999999,
"accuracy": 0.75,
"total_tokens_used": 4000,
"budget_utilization": 1.0,
"steps": 8,
"per_step": [
{
"step": 1,
"tokens_allocated": 400,
"was_correct": true,
"difficulty": "math_l1_l2",
"remaining_budget": 3600,
"reward": 1.00545865
},
{
"step": 2,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 2800,
"reward": 1.0100167
},
{
"step": 3,
"tokens_allocated": 800,
"was_correct": false,
"difficulty": "math_l1_l2",
"remaining_budget": 2000,
"reward": -0.10003095000000001
},
{
"step": 4,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 1900,
"reward": 1.0004453
},
{
"step": 5,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 1100,
"reward": 1.00408775
},
{
"step": 6,
"tokens_allocated": 800,
"was_correct": true,
"difficulty": "math_l3",
"remaining_budget": 300,
"reward": 1.0165460499999999
},
{
"step": 7,
"tokens_allocated": 200,
"was_correct": false,
"difficulty": "math_l4_l5",
"remaining_budget": 100,
"reward": -0.10000925000000001
},
{
"step": 8,
"tokens_allocated": 100,
"was_correct": true,
"difficulty": "gsm8k",
"remaining_budget": 0,
"reward": 1.00217145
}
]
}
]
}
}