Spaces:
Sleeping
Sleeping
| { | |
| "uniform": { | |
| "agent": "uniform", | |
| "n_episodes": 50, | |
| "mean_reward": 7.620243686, | |
| "std_reward": 1.497929030271716, | |
| "mean_accuracy": 0.78, | |
| "std_accuracy": 0.13564659966250536, | |
| "mean_budget_utilization": 1.0, | |
| "episodes": [ | |
| { | |
| "total_reward": 8.93117295, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0047081500000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0020814 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.00155605 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0032822000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00801035 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0026818 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0032822000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0035824 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.638689650000001, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001425 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0061341000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.00575885 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001715 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.0073349 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001825 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0071848 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0086858 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.72990625, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001840000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00395765 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.94896065, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00816045 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00515845 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.0083856 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10001710000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.4224851499999995, | |
| "accuracy": 0.4, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001735 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": -0.1000168 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0082355 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001645 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0040327 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001535 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.841082350000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0031321 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001825 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00395765 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0086107500000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.00425785 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0083856 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7296836, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.1000145 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0062091499999999 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0022315 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0052335000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0032822000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00605905 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.936575950000002, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00290695 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00530855 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00395765 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00605905 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00605905 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10001855000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00320715 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 10.0601402, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0068846 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.00245665 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.00801035 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.008986 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 10.0479821, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0064343 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0046331 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0020814 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0032822000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0043329 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.836208800000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0075600500000002 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.00380755 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001495 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001550000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00530855 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7447692, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0083856 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0049333 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000145 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0088359 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.004483 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0068846 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001520000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.730883350000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0061341000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0046331 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10001520000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8407082500000005, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001675 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0085357 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0038826 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0018562500000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": -0.10001725 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00545865 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.94701215, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.00545865 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00500835 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.0032822000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00290695 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.73463205, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0082355 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.00710975 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0067345 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00245665 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00290695 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": -0.10001660000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.946108400000002, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0086107500000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00530855 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00170615 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0032822000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.934851, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.00425785 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.002982 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": -0.10001735 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00500835 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.740263100000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000162 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0062842 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.00605905 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00545865 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0075600500000002 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.732084499999999, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.007485 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00590895 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001455000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.00876085 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0017812 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0025317 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9449069, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0062091499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001815 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00801035 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0064343 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0023816 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.952115500000001, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00801035 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.0026818 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00831055 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.849942, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000149 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00740995 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0020814 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001650000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0086107500000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0034323 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.0034323 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0080854000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.84551195, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0047081500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00425785 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000182 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0047081500000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.00500835 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000153 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9472363, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000153 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00245665 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0085357 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.007485 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.839136450000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.00155605 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10001550000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10001425 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 10.042878700000001, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00380755 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0018562500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.00155605 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0031321 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0085357 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0025317 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.6258552, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001475 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00380755 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.00155605 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001760000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0065844 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001735 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.843411150000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.00305705 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001855000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00710975 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0049333 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.004483 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.838907149999999, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00695965 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001855000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0026818 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0023816 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.836883499999999, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.0065844 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.0085357 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0049333 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0035824 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7344062, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001890000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001635 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00876085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00455805 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.733280700000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0082355 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0017812 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00305705 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001755000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001800000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.00575885 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0049333 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.956168, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0075600500000002 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0086107500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00710975 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0073349 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00710975 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00290695 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001455000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.95031035, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0038826 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000183 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0038826 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.00816045 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00545865 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.637345049999999, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001415000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0053836 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0053836 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000149 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00740995 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.951287350000001, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0073349 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0034323 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001695000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00245665 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.00575885 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.00831055 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00891095 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.53682, | |
| "accuracy": 0.5, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001775 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001455000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001855000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0067345 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": -0.1000145 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.734710550000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0075600500000002 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00575885 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.1000182 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0038826 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00771015 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.837031000000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.00740995 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001825 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0020814 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.0075600500000002 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.00305705 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0062842 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0050834 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8445363000000015, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00816045 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00740995 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000183 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00395765 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0086858 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8411603, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0066594500000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0052335000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00500835 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00605905 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.004483 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": -0.10001755000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00485825 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.742589049999999, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.00155605 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00831055 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00500835 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0071848 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": -0.1000182 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0085357 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.73163405, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.004483 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001635 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": -0.10001645 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00395765 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9411572, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0046331 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0016311 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.00876085 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00740995 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0046331 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0028318999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.73613935, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.00305705 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00455805 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001495 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0065844 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00650935 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.4263921500000003, | |
| "accuracy": 0.4, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0053836 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001405000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0061341000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10001835 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": -0.1000187 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10001545 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.735009150000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.00515845 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0062842 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0035824 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000168 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001675 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00410775 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10001575 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.00425785 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00395765 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.951962250000001, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00380755 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": -0.10001750000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.0075600500000002 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0077852 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.94926085, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00545865 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0062091499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0064343 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.0052335000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0047081500000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001710000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00725985 | |
| } | |
| ] | |
| } | |
| ] | |
| }, | |
| "greedy_max": { | |
| "agent": "greedy_max", | |
| "n_episodes": 50, | |
| "mean_reward": 4.163493538, | |
| "std_reward": 0.8606553919009542, | |
| "mean_accuracy": 0.8399999999999999, | |
| "std_accuracy": 0.15491933384829668, | |
| "mean_budget_utilization": 1.0, | |
| "episodes": [ | |
| { | |
| "total_reward": 3.9370282500000005, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003655 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0055137 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.01759675 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0058889500000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0080654 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.83028465, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003635000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00964145 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0142195 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0064893499999998 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000293 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0546615500000005, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00363745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.01429455 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00919115 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0152702 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9344796, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00633925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.01294365 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.01114245 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00408775 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9645737, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.01624585 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10003445000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0176718 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0172215 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 1.7125787, | |
| "accuracy": 0.4, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.1000352 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.00513845 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10002935 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10003525 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00754005 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.04520525, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0118179 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.01579555 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00829055 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0052135 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9325303500000004, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.01339395 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0032622 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10003145000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0083656 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00754005 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.05931465, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0165460499999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.01129255 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.01279355 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0150450500000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00363745 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8317869000000004, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0150450500000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003530000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0098666 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00693965 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10002910000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9469379, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0155704 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0112175 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.049032799999999, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0154203 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0166211 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0097165 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00318715 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8395168, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0152702 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0073899499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10003115 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0682456, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0139193 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.01444465 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0133189 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.01174285 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0148199 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.92637765, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003005000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00363745 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.00453805 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0092662000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8245754499999998, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.01354405 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003445000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0047632 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00633925 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9420648999999996, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10002825 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.01609575 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00829055 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0145197 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00318715 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.06576895, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0152702 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0160206999999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0082155 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0157205 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.01054205 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0346232, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0077652 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00303705 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0064143 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0082155 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00919115 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0750001000000005, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0127185 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0107671999999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.01729655 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.01729655 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8305914, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003025 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0100167 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0091161 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10002885 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0115177 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9369521499999998, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.01189295 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": -0.1000376 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0059639999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9525647499999996, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.01114245 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003540000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0067145 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.01744665 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.01729655 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.04955815, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0040127 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0058139 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0165460499999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.01324385 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00994165 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.058639199999999, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0115177 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.01204305 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0139193 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.01189295 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0092662000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.038826, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.00528855 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.01204305 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0040127 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0082155 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.94558905, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00303705 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0178969500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10003145000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.93027465, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00663945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003565 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00348735 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00708975 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.01309375 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0530855, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.01054205 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0073899499999999 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0160206999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0443796999999995, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.00363745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0049133 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0125684 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.00814045 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0151201 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9214942, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00468815 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10003525 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0071648 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8296829, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.01369415 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10003530000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.01039195 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": -0.1000317 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9426592, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0176718 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0047632 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0146698 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10003435000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.046406050000001, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.01729655 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.05180965, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.00453805 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.01444465 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0173716 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00423785 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.94130965, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003300000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00889095 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0103169 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0151201 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0070147 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.04760685, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.01744665 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0070147 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0049133 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0092662000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0503837, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00663945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.01489495 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0073899499999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0041628 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.01729655 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8441699000000003, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.01639595 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": -0.10003115 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0155704 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9362849499999997, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10002935 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00573885 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0080654 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0385257999999995, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00663945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00633925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0131688 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00678955 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.06862085, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.0165460499999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.01399435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.00979155 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.01474485 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.01354405 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9379313999999996, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0049133 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.0124183 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.01129255 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": -0.10003400000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0093412499999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9515129, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003655 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.01684625 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0163209 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.01414445 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00423785 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9419063999999997, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0094163 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0146698 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10003665 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0038626 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.01399435 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9389795000000003, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00663945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.0098666 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0102418499999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.8365136, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0050634 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10002925 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.01744665 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0140694 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0585641500000005, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.01294365 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0136191 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0161708 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0126434500000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9344063499999997, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0064893499999998 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0170713999999998 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0052135 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": -0.1000317 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0056638 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.9408614, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0100167 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": -0.10003095000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0098666 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00408775 | |
| } | |
| ] | |
| } | |
| ] | |
| }, | |
| "oracle": { | |
| "agent": "oracle", | |
| "n_episodes": 50, | |
| "mean_reward": 6.932624968, | |
| "std_reward": 1.50156485979675, | |
| "mean_accuracy": 0.7282222222222221, | |
| "std_accuracy": 0.13007519192972794, | |
| "mean_budget_utilization": 0.9830249999999999, | |
| "episodes": [ | |
| { | |
| "total_reward": 7.896245749999999, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003655 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 177, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3023, | |
| "reward": 1.00704585 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 283, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2740, | |
| "reward": 1.00884175 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 782, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1958, | |
| "reward": 1.0319322 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 163, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1795, | |
| "reward": 1.00674635 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 448, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1347, | |
| "reward": 1.01161035 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 420, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 927, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 618, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 309, | |
| "reward": 1.0190318 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 193, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 116, | |
| "reward": 1.0092215 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 116, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00187045 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.67526105, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3843, | |
| "budget_utilization": 0.96075, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003635000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 177, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3023, | |
| "reward": 1.0068207 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 755, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2268, | |
| "reward": 1.03238385 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 648, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1620, | |
| "reward": -0.10001715 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 540, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1080, | |
| "reward": 1.0178349 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 108, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 972, | |
| "reward": 1.0011203499999999 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 303, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 669, | |
| "reward": 1.0110172 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 167, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 502, | |
| "reward": -0.1000047 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 188, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 314, | |
| "reward": -0.10000400000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 157, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 157, | |
| "reward": 1.00614625 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.87083005, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3830, | |
| "budget_utilization": 0.9575, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.00246665 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 527, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3273, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2473, | |
| "reward": 1.01429455 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 176, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2297, | |
| "reward": 1.0064455 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 191, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2106, | |
| "reward": 1.00794575 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 526, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1580, | |
| "reward": 1.0130324 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 493, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1087, | |
| "reward": 1.01138295 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 181, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 906, | |
| "reward": 1.00779615 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 566, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 340, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 170, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 170, | |
| "reward": 1.0074965 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9928685, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 177, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3023, | |
| "reward": 1.00719595 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 188, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2835, | |
| "reward": 1.0071954 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 303, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2532, | |
| "reward": 1.0120679 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 211, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2321, | |
| "reward": 1.0032166 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 348, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1973, | |
| "reward": 1.0133415000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 369, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1604, | |
| "reward": 1.015667 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 668, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 936, | |
| "reward": 1.02450795 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 136, | |
| "reward": 1.0059639999999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 136, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0037457 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.6425433499999995, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.00216645 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3000, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 468, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2532, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 271, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2261, | |
| "reward": 1.00764155 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 471, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1790, | |
| "reward": -0.1000168 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 268, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1522, | |
| "reward": 1.00816705 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 761, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 761, | |
| "reward": -0.10001645 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 190, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 571, | |
| "reward": 1.00742045 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 142, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 429, | |
| "reward": 1.0037454 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 429, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001535 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.881022700000001, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 3986, | |
| "budget_utilization": 0.9965, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.1000352 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 711, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2489, | |
| "reward": 1.0264571 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 388, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2101, | |
| "reward": 1.0173922 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 600, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1501, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 125, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1376, | |
| "reward": 1.0041215 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 137, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1239, | |
| "reward": 1.00449615 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 154, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1085, | |
| "reward": 1.0059962999999998 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 271, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 814, | |
| "reward": 1.00689105 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 14, | |
| "reward": 1.0157205 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.7549208499999995, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 291, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3209, | |
| "reward": -0.100008 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2409, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 688, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1721, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 215, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1506, | |
| "reward": 1.002616 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 150, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1356, | |
| "reward": 1.0042703499999999 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 169, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1187, | |
| "reward": 1.00644585 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 494, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 693, | |
| "reward": 1.0122835000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 693, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0252572 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.91441255, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": -0.1000091 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": 1.01378355 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 796, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2391, | |
| "reward": 1.0313311 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 170, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2221, | |
| "reward": 1.00682105 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 740, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1481, | |
| "reward": 1.03155905 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 148, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1333, | |
| "reward": 1.00412035 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 416, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 917, | |
| "reward": 1.00380675 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 611, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 306, | |
| "reward": 1.02068325 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 114, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 192, | |
| "reward": 1.00232085 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 192, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.1000043 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 9.02716315, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0035173499999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 316, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3484, | |
| "reward": 1.01236745 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 544, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2940, | |
| "reward": 1.01325665 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 315, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2625, | |
| "reward": 1.010116 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 218, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2407, | |
| "reward": -0.1000076 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 361, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2046, | |
| "reward": 1.0151420500000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 639, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1407, | |
| "reward": 1.0269110000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 586, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 821, | |
| "reward": 1.0176825 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 308, | |
| "reward": 1.01648535 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 308, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0116924 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.013730599999999, | |
| "accuracy": 0.8888888888888888, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.008991 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2900, | |
| "reward": -0.10003530000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 181, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2719, | |
| "reward": 1.0065203 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 485, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2234, | |
| "reward": 1.0110081 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 744, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1490, | |
| "reward": 1.03275965 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 149, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1341, | |
| "reward": 1.00584645 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 670, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 671, | |
| "reward": 1.0227817 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 279, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 392, | |
| "reward": 1.00764115 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 392, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.01821755 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7610722999999995, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.0113926 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2900, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 453, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2447, | |
| "reward": 1.01153505 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 262, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2185, | |
| "reward": 1.00681645 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 455, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1730, | |
| "reward": 1.00793255 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 692, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1038, | |
| "reward": -0.10001495 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 519, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 519, | |
| "reward": -0.10001550000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 346, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 173, | |
| "reward": 1.01251605 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 64, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 109, | |
| "reward": -0.1000018 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 109, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.00194585 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.855514200000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.00201635 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 316, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3484, | |
| "reward": 1.01101655 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 217, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3267, | |
| "reward": 1.0036666 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2467, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 205, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2262, | |
| "reward": 1.0036672 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 226, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2036, | |
| "reward": 1.0061428000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 636, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1400, | |
| "reward": 1.02090715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 233, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1167, | |
| "reward": 1.00404105 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 437, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 730, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 730, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001520000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.87944935, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0028419 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 211, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3589, | |
| "reward": 1.0036669 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 560, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3029, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 324, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2705, | |
| "reward": 1.01146645 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1905, | |
| "reward": -0.10003115 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 476, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1429, | |
| "reward": 1.0104832 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 714, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 715, | |
| "reward": 1.0296841 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 119, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 596, | |
| "reward": 1.0020204 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 596, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0193331 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.9015055, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.00966645 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2900, | |
| "reward": 1.01444465 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 453, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2447, | |
| "reward": -0.10001675 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 262, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2185, | |
| "reward": 1.0083925 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 182, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2003, | |
| "reward": 1.0080963 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1703, | |
| "reward": 1.00846565 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 532, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1171, | |
| "reward": 1.01310715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 780, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 391, | |
| "reward": 1.03485925 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 97, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 294, | |
| "reward": 1.00449815 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 294, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10000785000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.915772100000001, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.01295865 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 777, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2723, | |
| "reward": 1.03328335 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 170, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2553, | |
| "reward": 1.0071963 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 273, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2280, | |
| "reward": 1.00809175 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 760, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1520, | |
| "reward": 1.03140795 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 380, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1140, | |
| "reward": -0.1000076 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 356, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 784, | |
| "reward": -0.10000935000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 522, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 262, | |
| "reward": 1.01610965 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 262, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0067414 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.76834685, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3889, | |
| "budget_utilization": 0.97225, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": -0.10000945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 298, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2889, | |
| "reward": 1.01041705 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 309, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2580, | |
| "reward": 1.0112420500000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 215, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2365, | |
| "reward": 1.00254095 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 354, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2011, | |
| "reward": 1.01236555 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1211, | |
| "reward": -0.1000326 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 504, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 707, | |
| "reward": 1.01025665 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 265, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 442, | |
| "reward": 1.00869255 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 331, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 111, | |
| "reward": 1.01289205 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9896424, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 486, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3014, | |
| "reward": 1.00905675 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 188, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2826, | |
| "reward": 1.008096 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 302, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2524, | |
| "reward": 1.00891585 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 210, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2314, | |
| "reward": 1.0044925 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 231, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2083, | |
| "reward": 1.0063677 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 260, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1823, | |
| "reward": 1.00696665 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1023, | |
| "reward": 1.0058139 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 639, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 384, | |
| "reward": 1.02233295 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 384, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.01761755 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.904428449999999, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3954, | |
| "budget_utilization": 0.9885, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.00441795 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 527, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3273, | |
| "reward": 1.01273215 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 204, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3069, | |
| "reward": 1.0037422999999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2269, | |
| "reward": 1.0157205 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 756, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1513, | |
| "reward": 1.029682 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 226, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1287, | |
| "reward": -0.10000740000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 643, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 644, | |
| "reward": 1.0233084 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 161, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 483, | |
| "reward": 1.00509535 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 301, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 182, | |
| "reward": 1.00974145 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 136, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 46, | |
| "reward": -0.10000425 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7541784, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3629, | |
| "budget_utilization": 0.90725, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": -0.1000081 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 205, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3495, | |
| "reward": 1.0047179 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 218, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3277, | |
| "reward": 1.0053927 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2477, | |
| "reward": 1.0082155 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 309, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2168, | |
| "reward": 1.0105666 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 542, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1626, | |
| "reward": 1.01610865 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 203, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1423, | |
| "reward": 1.00404255 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 237, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1186, | |
| "reward": 1.0051666 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 444, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 742, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 371, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 371, | |
| "reward": -0.10000795000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.6360168999999996, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3946, | |
| "budget_utilization": 0.9865, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0127185 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 444, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2756, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 172, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2584, | |
| "reward": 1.00697105 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 184, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0071956 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2200, | |
| "reward": 1.0013409 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 550, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1650, | |
| "reward": -0.10001455000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 850, | |
| "reward": -0.1000309 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 566, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 284, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 177, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 107, | |
| "reward": 1.00674565 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 53, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 54, | |
| "reward": 1.0011231 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.756207800000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3737, | |
| "budget_utilization": 0.93425, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003025 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 444, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2756, | |
| "reward": 1.00950915 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 258, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2498, | |
| "reward": 1.00741705 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 713, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1785, | |
| "reward": -0.10001815 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 148, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1637, | |
| "reward": 1.0057714500000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 163, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1474, | |
| "reward": 1.00569565 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 276, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1198, | |
| "reward": -0.10000895 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 499, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 699, | |
| "reward": 1.01543535 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 174, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 525, | |
| "reward": 1.0059953 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 262, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 263, | |
| "reward": 1.0064412 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8895138, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.0131838 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 777, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2723, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 425, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2298, | |
| "reward": 1.0105608 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 410, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1888, | |
| "reward": 1.00876035 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 629, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1259, | |
| "reward": 1.0198568000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 314, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 945, | |
| "reward": 1.01266775 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 177, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 768, | |
| "reward": -0.1000043 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 192, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 576, | |
| "reward": 1.0091465 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 144, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 432, | |
| "reward": 1.0046459 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 432, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.01071055 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 9.0091763, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3633, | |
| "budget_utilization": 0.90825, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": -0.1000149 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 777, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2723, | |
| "reward": 1.03568495 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 255, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2468, | |
| "reward": 1.00749225 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 176, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2292, | |
| "reward": 1.00667065 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 191, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2101, | |
| "reward": 1.0074204 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 315, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1786, | |
| "reward": 1.0099659 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 223, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1563, | |
| "reward": 1.00269065 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 390, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1173, | |
| "reward": 1.01731705 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 439, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 734, | |
| "reward": 1.0063573 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 367, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 367, | |
| "reward": 1.01559205 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.79033245, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0040127 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 711, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2489, | |
| "reward": 1.0320108000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 388, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2101, | |
| "reward": 1.0179926 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 225, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1876, | |
| "reward": 1.00584265 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 156, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1720, | |
| "reward": 1.0055459 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 688, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1032, | |
| "reward": -0.1000182 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 193, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 839, | |
| "reward": 1.0083209 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 559, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 280, | |
| "reward": 1.01663315 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 175, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 105, | |
| "reward": -0.10000445000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 105, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000360000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.78769955, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0115177 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 266, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2934, | |
| "reward": 1.0058406 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 733, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2201, | |
| "reward": 1.0306587999999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 393, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1808, | |
| "reward": 1.0185176999999999 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 150, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1658, | |
| "reward": 1.00547115 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 414, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1244, | |
| "reward": 1.0095857 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 155, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1089, | |
| "reward": 1.00614635 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 181, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 908, | |
| "reward": -0.10000395000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 108, | |
| "reward": -0.10003025 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 108, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000425 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.883722, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.01176785 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2900, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 453, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2447, | |
| "reward": 1.00553105 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 699, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1748, | |
| "reward": 1.0303603 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 145, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1603, | |
| "reward": 1.00554645 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1203, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 375, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 828, | |
| "reward": 1.0161920500000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 552, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 276, | |
| "reward": -0.10001550000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 103, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 173, | |
| "reward": 1.0024715 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 173, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000465 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 9.009530049999999, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.01085725 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 486, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3014, | |
| "reward": 1.01025755 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 282, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2732, | |
| "reward": 1.00711565 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 487, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2245, | |
| "reward": 1.0152108 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 748, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1497, | |
| "reward": 1.0277311 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 224, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1273, | |
| "reward": -0.10000925000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 636, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 637, | |
| "reward": 1.0208321 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 106, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 531, | |
| "reward": 1.0020961000000002 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 331, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 200, | |
| "reward": 1.0137176 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.00172115 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.77586915, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.01100735 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 777, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2723, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 680, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2043, | |
| "reward": -0.10001475 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 364, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1679, | |
| "reward": 1.0160425 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 139, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1540, | |
| "reward": 1.00464615 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 231, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1309, | |
| "reward": 1.00554215 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 409, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 900, | |
| "reward": -0.10001760000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 150, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 750, | |
| "reward": 1.0044955 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 750, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0341852999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.768739500000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3891, | |
| "budget_utilization": 0.97275, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.0161858000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 777, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2723, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 255, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2468, | |
| "reward": 1.0060663 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 176, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2292, | |
| "reward": 1.0078714500000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 286, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2006, | |
| "reward": 1.01071785 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1706, | |
| "reward": 1.0116928 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 213, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1493, | |
| "reward": 1.00539295 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 693, | |
| "reward": 1.01084225 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 259, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 434, | |
| "reward": -0.10000740000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 325, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 109, | |
| "reward": -0.10000815 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.766700849999999, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.00966645 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": 1.01543465 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 199, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2988, | |
| "reward": 1.00914615 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2188, | |
| "reward": 1.00814045 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 729, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1459, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 583, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 876, | |
| "reward": 1.0188084 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 438, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 438, | |
| "reward": 1.0055318 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 292, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 146, | |
| "reward": -0.10000705 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 146, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000465 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.877045400000001, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0023916 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 316, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3484, | |
| "reward": 1.01041615 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 544, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2940, | |
| "reward": 1.01520795 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2140, | |
| "reward": -0.10003525 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 713, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1427, | |
| "reward": 1.0300594 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 570, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 857, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 160, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 697, | |
| "reward": 1.00532055 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 290, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 407, | |
| "reward": 1.008241 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 407, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0054583 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.796034349999999, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.01369415 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 711, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2489, | |
| "reward": 1.02653215 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 388, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2101, | |
| "reward": 1.018593 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 375, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1726, | |
| "reward": -0.10000715 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 215, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1511, | |
| "reward": -0.10000890000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 604, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 907, | |
| "reward": 1.02406085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 283, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 624, | |
| "reward": -0.10000735000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 156, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 468, | |
| "reward": 1.0064465 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 468, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0067310999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.86692335, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3787, | |
| "budget_utilization": 0.94675, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.00981655 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 308, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3392, | |
| "reward": 1.01116705 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 212, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3180, | |
| "reward": 1.00231595 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 227, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2953, | |
| "reward": 1.0041164 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 615, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2338, | |
| "reward": -0.10001755000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 350, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1988, | |
| "reward": 1.0128911 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 621, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1367, | |
| "reward": 1.01978215 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 567, | |
| "reward": -0.1000352 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 141, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 426, | |
| "reward": 1.0035203 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 213, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 213, | |
| "reward": 1.0033665999999999 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8743482999999985, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.00966645 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": 1.0118322499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 199, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2988, | |
| "reward": 1.0093713 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2188, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 182, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2006, | |
| "reward": 1.00772105 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1206, | |
| "reward": -0.1000293 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 150, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1056, | |
| "reward": 1.0056962999999999 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 176, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 880, | |
| "reward": 1.00727105 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 330, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 550, | |
| "reward": 1.0135675499999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 550, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10001455000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9898032, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3821, | |
| "budget_utilization": 0.95525, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.0098916 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2900, | |
| "reward": 1.01444465 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 181, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2719, | |
| "reward": 1.0065203 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 194, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2525, | |
| "reward": 1.0080957000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 315, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2210, | |
| "reward": 1.01019105 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 552, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1658, | |
| "reward": -0.1000183 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 518, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1140, | |
| "reward": 1.01205715 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 190, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 950, | |
| "reward": 1.0086963 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 237, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 713, | |
| "reward": 1.0044161 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 534, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 179, | |
| "reward": 1.01550865 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.638869999999999, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003300000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 444, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2756, | |
| "reward": 1.0086836 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 172, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2584, | |
| "reward": 1.0066708500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 738, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1846, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 230, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1616, | |
| "reward": 1.003741 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 404, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1212, | |
| "reward": -0.1000149 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 378, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 834, | |
| "reward": -0.10000875000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 208, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 626, | |
| "reward": 1.0017908 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 156, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 470, | |
| "reward": 1.0053958 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 470, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.01265995 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.9015086000000005, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.01744665 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 177, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3023, | |
| "reward": 1.0066706 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 472, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2551, | |
| "reward": -0.10001695000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 273, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2278, | |
| "reward": 1.009968 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 189, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2089, | |
| "reward": 1.00839615 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 313, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1776, | |
| "reward": 1.0110167 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 333, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1443, | |
| "reward": -0.10000745000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 360, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1083, | |
| "reward": 1.01416645 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 406, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 677, | |
| "reward": 1.0041825 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 677, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.02968595 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.5552238000000003, | |
| "accuracy": 0.4444444444444444, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": -0.10000945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 527, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3273, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 306, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2967, | |
| "reward": 1.0116174500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 529, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2438, | |
| "reward": -0.10001775 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 507, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1931, | |
| "reward": -0.10001455000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 289, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1642, | |
| "reward": 1.0108678 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 842, | |
| "reward": -0.1000352 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 140, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 702, | |
| "reward": 1.0034453 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 702, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.0293845000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.5351288, | |
| "accuracy": 0.5, | |
| "total_tokens_used": 3929, | |
| "budget_utilization": 0.98225, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 444, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2756, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 430, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2326, | |
| "reward": 1.00590745 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 664, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1662, | |
| "reward": 1.02180635 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 138, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1524, | |
| "reward": 1.0041959 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 381, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1143, | |
| "reward": -0.1000071 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 571, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 572, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 381, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 191, | |
| "reward": -0.100007 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 141, | |
| "reward": 1.00082305 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 70, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 71, | |
| "reward": 1.00247315 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.65206235, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3968, | |
| "budget_utilization": 0.992, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0037425 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 527, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3273, | |
| "reward": -0.10001825 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 511, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2762, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 197, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2565, | |
| "reward": 1.00869595 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 320, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2245, | |
| "reward": 1.01296765 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1445, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 270, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1175, | |
| "reward": 1.00741645 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 293, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 882, | |
| "reward": -0.10000745000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 82, | |
| "reward": 1.0136191 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 32, | |
| "reward": -0.10000185 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8643071, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3629, | |
| "budget_utilization": 0.90725, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": -0.10000945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 211, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3589, | |
| "reward": 1.0024661 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 224, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3365, | |
| "reward": 1.0055425 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 240, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3125, | |
| "reward": 1.0040407 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 390, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2735, | |
| "reward": 1.01611625 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1935, | |
| "reward": 1.0094163 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 241, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1694, | |
| "reward": 1.0063672 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 705, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 989, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 247, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 742, | |
| "reward": 1.0063669 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 371, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 371, | |
| "reward": 1.0140158 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8762429, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0035173499999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 211, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3589, | |
| "reward": 1.0047176 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 224, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3365, | |
| "reward": 1.0040415 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 360, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3005, | |
| "reward": 1.01446665 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 250, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2755, | |
| "reward": 1.00561625 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 275, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2480, | |
| "reward": 1.007116 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 775, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1705, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 710, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 995, | |
| "reward": -0.10001755000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 373, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 622, | |
| "reward": 1.01529155 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 622, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.02150825 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.66363005, | |
| "accuracy": 0.5555555555555556, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.011993 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 308, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3392, | |
| "reward": 1.01131715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 530, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2862, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 511, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2351, | |
| "reward": 1.01663555 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 195, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2156, | |
| "reward": 1.00824575 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1356, | |
| "reward": 1.01549535 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 423, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 933, | |
| "reward": -0.10001605000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 233, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 700, | |
| "reward": -0.10000730000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 700, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000182 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.87246095, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3971, | |
| "budget_utilization": 0.99275, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003655 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 711, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2489, | |
| "reward": 1.0257066000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 233, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2256, | |
| "reward": 1.0041161 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 161, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2095, | |
| "reward": 1.0066714 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 174, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1921, | |
| "reward": 1.00607035 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 480, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1441, | |
| "reward": -0.10001635 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 270, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1171, | |
| "reward": 1.0086172500000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 292, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 879, | |
| "reward": 1.0095918 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 79, | |
| "reward": 1.0106171 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 29, | |
| "reward": 1.00112325 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8952819000000005, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3985, | |
| "budget_utilization": 0.99625, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0094163 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 711, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2489, | |
| "reward": 1.0279581 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 388, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2101, | |
| "reward": 1.01626645 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 375, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1726, | |
| "reward": 1.0170176 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 359, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1367, | |
| "reward": -0.10000890000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 341, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1026, | |
| "reward": 1.01184085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 513, | |
| "reward": 1.01168215 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 342, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 171, | |
| "reward": -0.1000086 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 106, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 65, | |
| "reward": 1.0008953 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 15, | |
| "reward": 1.00022265 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 2.3343037, | |
| "accuracy": 0.3, | |
| "total_tokens_used": 3982, | |
| "budget_utilization": 0.9955, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": -0.10000945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 527, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3273, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2473, | |
| "reward": 1.0098666 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 441, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2032, | |
| "reward": -0.10001520000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 169, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1863, | |
| "reward": 1.00599555 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 745, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1118, | |
| "reward": -0.10001495 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 559, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 559, | |
| "reward": 1.0185094 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 372, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 187, | |
| "reward": -0.10000730000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 116, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 71, | |
| "reward": -0.10000435 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 53, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 18, | |
| "reward": -0.1000019 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.5414481000000007, | |
| "accuracy": 0.4444444444444444, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 500, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3500, | |
| "reward": 1.0147598500000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 486, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3014, | |
| "reward": -0.10001470000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 753, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2261, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 161, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2100, | |
| "reward": 1.00584585 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 700, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1400, | |
| "reward": -0.10001405000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 140, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1260, | |
| "reward": 1.0040457 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 393, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 867, | |
| "reward": 1.0168666 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 578, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 289, | |
| "reward": -0.10001835 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 289, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000085 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.6448685, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3887, | |
| "budget_utilization": 0.97175, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3800, | |
| "reward": 1.0038926 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3000, | |
| "reward": 1.0136191 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 468, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2532, | |
| "reward": 1.0086824 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 452, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2080, | |
| "reward": -0.1000168 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 433, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1647, | |
| "reward": 1.01018515 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 411, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1236, | |
| "reward": -0.10001675 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 154, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1082, | |
| "reward": 1.0056961 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 721, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 361, | |
| "reward": -0.10001575 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 135, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 226, | |
| "reward": -0.1000038 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 113, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 113, | |
| "reward": 1.00284625 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8907995, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3512, | |
| "budget_utilization": 0.878, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.0110924 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": 1.0122825500000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 298, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2889, | |
| "reward": 1.00861585 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 309, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2580, | |
| "reward": -0.10000890000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 322, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2258, | |
| "reward": 1.0119919 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 338, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1920, | |
| "reward": -0.10000895 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 360, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1560, | |
| "reward": 1.01416645 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 260, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1300, | |
| "reward": 1.00666645 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 325, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 975, | |
| "reward": 1.0116915499999999 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 487, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 488, | |
| "reward": 1.0143102 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8885411, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 3896, | |
| "budget_utilization": 0.974, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 300, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": 1.01191795 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 513, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3187, | |
| "reward": 1.01468415 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 298, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2889, | |
| "reward": 1.01146775 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 206, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2683, | |
| "reward": 1.00441765 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 558, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2125, | |
| "reward": 1.0170835 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 531, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1594, | |
| "reward": 1.01453315 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 797, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 797, | |
| "reward": -0.10001710000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 132, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 665, | |
| "reward": 1.00457145 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 249, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 416, | |
| "reward": -0.10000840000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 312, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 104, | |
| "reward": 1.0098909999999999 | |
| } | |
| ] | |
| } | |
| ] | |
| }, | |
| "bandit": { | |
| "agent": "bandit", | |
| "n_episodes": 50, | |
| "mean_reward": 6.525903147, | |
| "std_reward": 1.6575015872803898, | |
| "mean_accuracy": 0.743531746031746, | |
| "std_accuracy": 0.1592141678198517, | |
| "mean_budget_utilization": 0.9884999999999999, | |
| "episodes": [ | |
| { | |
| "total_reward": 5.631424399999999, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3550, | |
| "budget_utilization": 0.8875, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3950, | |
| "reward": -0.1000023 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3850, | |
| "reward": 1.00127085 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3750, | |
| "reward": 1.0022465 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3650, | |
| "reward": -0.10000375 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3450, | |
| "reward": 1.0032922 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3250, | |
| "reward": -0.10000765 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2850, | |
| "reward": -0.10001795000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2050, | |
| "reward": 1.0059639999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1250, | |
| "reward": 1.0053636 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 450, | |
| "reward": 1.0133189 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.4211402, | |
| "accuracy": 0.4, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003635000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3000, | |
| "reward": 1.00321715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.1000072 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001715 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2200, | |
| "reward": -0.10000925000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1400, | |
| "reward": 1.0137692 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 600, | |
| "reward": -0.10002935 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": -0.1000047 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 300, | |
| "reward": 1.00306705 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.0011908 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7648755000000005, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3900, | |
| "reward": 1.00082055 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3800, | |
| "reward": -0.10000375 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3000, | |
| "reward": 1.01429455 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2200, | |
| "reward": 1.00919115 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1400, | |
| "reward": 1.0152702 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1350, | |
| "reward": -0.10000205000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 550, | |
| "reward": 1.01759675 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 350, | |
| "reward": 1.00366745 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": -0.10000205000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.0040427 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.639730100000001, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3900, | |
| "reward": -0.10000450000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3100, | |
| "reward": 1.00633925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2300, | |
| "reward": 1.01294365 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1500, | |
| "reward": 1.01114245 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 700, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": 1.0022415 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 300, | |
| "reward": 1.0029919999999999 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": -0.1000076 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 50, | |
| "reward": -0.10000205000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000235 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.851217999999999, | |
| "accuracy": 0.7142857142857143, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 7, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.01624585 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.10003445000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.0176718 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": -0.1000168 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.00306705 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00081555 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.7170817, | |
| "accuracy": 0.625, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.1000352 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3000, | |
| "reward": 1.0026918 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2200, | |
| "reward": -0.10002935 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1400, | |
| "reward": -0.10003525 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.0040427 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1000, | |
| "reward": 1.00246665 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0037425 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00423785 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.630049850000001, | |
| "accuracy": 0.5555555555555556, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3900, | |
| "reward": -0.10000445000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3700, | |
| "reward": -0.100008 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.10000395000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3400, | |
| "reward": -0.10000890000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2600, | |
| "reward": 1.0052135 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1800, | |
| "reward": 1.00423785 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1000, | |
| "reward": 1.0118179 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 200, | |
| "reward": 1.00498835 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00381755 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.6293051, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3900, | |
| "reward": -0.10000360000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3850, | |
| "reward": -0.1000022 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3050, | |
| "reward": -0.10003145000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2850, | |
| "reward": 1.00291695 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2050, | |
| "reward": 1.00754005 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1850, | |
| "reward": 1.00186625 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1050, | |
| "reward": 1.0035624 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 250, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 50, | |
| "reward": 1.0041928 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.1000023 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 9.055337000000002, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3900, | |
| "reward": 1.00097065 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3100, | |
| "reward": 1.01129255 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2300, | |
| "reward": 1.01279355 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1500, | |
| "reward": 1.0150450500000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 700, | |
| "reward": 1.00363745 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": 1.00306705 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": 1.0035924 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": 1.0038926 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0010457000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.53231765, | |
| "accuracy": 0.5, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00725985 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3400, | |
| "reward": -0.10000780000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2600, | |
| "reward": 1.0098666 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1800, | |
| "reward": 1.00693965 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1000, | |
| "reward": -0.10002910000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": -0.10000875000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 600, | |
| "reward": -0.100009 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 500, | |
| "reward": -0.10000435 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 300, | |
| "reward": 1.00381755 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 100, | |
| "reward": 1.004493 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.7393624000000005, | |
| "accuracy": 0.5714285714285714, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 7, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.0155704 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": -0.10003350000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": -0.1000072 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000825 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.93710895, | |
| "accuracy": 0.875, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0080854000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0166211 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2700, | |
| "reward": 1.0015710500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1900, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1100, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1000, | |
| "reward": 1.00202135 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 200, | |
| "reward": -0.1000309 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00156605 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 10.05136435, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.00979155 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0061341000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 300, | |
| "reward": 1.0005954 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": 1.0016461 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": 1.0008956 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.865171, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00635925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0133189 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0085357 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1800, | |
| "reward": -0.1000087 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1000, | |
| "reward": 1.0178969500000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 200, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": -0.10000385 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00172115 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.0473817, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.00545865 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00500835 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00680955 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0053636 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00695965 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.73081605, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3800, | |
| "budget_utilization": 0.95, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00365745 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.10003445000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0082355 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00633925 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1100, | |
| "reward": 1.00052035 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 700, | |
| "reward": 1.0067345 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 500, | |
| "reward": 1.00231655 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10000355000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.00306705 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.753239799999999, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00260675 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0145197 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1900, | |
| "reward": 1.0019463 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1100, | |
| "reward": 1.0115177 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 300, | |
| "reward": 1.01369415 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": -0.10000880000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10000455000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.051434400000002, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0160206999999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": 1.00425785 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.002982 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": 1.00350735 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.01234325 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.8307278, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000162 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3100, | |
| "reward": 1.0016461 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2700, | |
| "reward": 1.0062842 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1900, | |
| "reward": 1.00919115 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1100, | |
| "reward": 1.00528855 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 300, | |
| "reward": 1.0055137 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.00052035 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00231655 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.8515897, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.007485 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001555000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00590895 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2400, | |
| "reward": 1.0028318999999999 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2200, | |
| "reward": 1.0013409 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1400, | |
| "reward": 1.01729655 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1000, | |
| "reward": 1.00876085 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": 1.00799035 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10000925000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.731551549999999, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0025317 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0100167 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0091161 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001815 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.00217145 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 700, | |
| "reward": 1.0055137 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": -0.10000895 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": -0.1000075 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.0004453 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0017912 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.7282562499999994, | |
| "accuracy": 0.5714285714285714, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 7, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10001435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": -0.1000376 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": 1.0026818 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": -0.10003575 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00650935 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9628671, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3600, | |
| "budget_utilization": 0.9, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000149 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00740995 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.01744665 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.01729655 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 800, | |
| "reward": 1.008986 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 700, | |
| "reward": 1.00187125 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 600, | |
| "reward": 1.0007455 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": 1.0007455 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 400, | |
| "reward": 1.0004453 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.748363149999999, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0079353 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0086858 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0165460499999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2200, | |
| "reward": 1.00396765 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1400, | |
| "reward": 1.00994165 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1000, | |
| "reward": -0.1000182 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 900, | |
| "reward": 1.0013459 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": -0.10003655 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10000445000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.7280397999999995, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.1000153 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.0033572499999999 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.01189295 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1900, | |
| "reward": 1.00172115 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1100, | |
| "reward": -0.10003345000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1000, | |
| "reward": 1.00202135 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.0034423 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10003025 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.75136635, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00650935 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00440795 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.01204305 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2300, | |
| "reward": -0.10000395000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.0082155 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 700, | |
| "reward": 1.0166211 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 600, | |
| "reward": -0.10000355000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": -0.10001550000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0035924 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 10.058188900000001, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00303705 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0178969500000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 700, | |
| "reward": 1.00082055 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 600, | |
| "reward": 1.0017962 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 400, | |
| "reward": 1.0026918 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 300, | |
| "reward": 1.0016461 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": 1.0038926 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00172115 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.7356833, | |
| "accuracy": 0.6666666666666666, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.00663945 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2800, | |
| "reward": -0.10001745000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": -0.10001475 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00708975 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.01309375 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 600, | |
| "reward": 1.00321715 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.00366745 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.00201635 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.10000840000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.637050099999999, | |
| "accuracy": 0.6, | |
| "total_tokens_used": 3600, | |
| "budget_utilization": 0.9, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2300, | |
| "reward": -0.10000400000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.0073899499999999 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1100, | |
| "reward": 1.00485825 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1000, | |
| "reward": 1.00127085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 800, | |
| "reward": 1.00441795 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 700, | |
| "reward": -0.1000047 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 500, | |
| "reward": -0.10000740000000001 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": -0.10000360000000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.84070835, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0049133 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.0125684 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": -0.10001855000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0050834 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 600, | |
| "reward": 1.0040427 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00411775 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.92735145, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2400, | |
| "reward": 1.00468815 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.1000169 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 800, | |
| "reward": 1.0065844 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 600, | |
| "reward": 1.0020914 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 550, | |
| "reward": 1.0005979 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 350, | |
| "reward": 1.001491 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 300, | |
| "reward": 1.00082305 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.00141595 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.8424329, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": -0.10001660000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.00320715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.01039195 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1400, | |
| "reward": -0.10000890000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1000, | |
| "reward": 1.00876085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 200, | |
| "reward": 1.0113676 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 100, | |
| "reward": 1.0022465 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00082055 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.84093365, | |
| "accuracy": 0.7777777777777778, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3950, | |
| "reward": -0.1000019 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3150, | |
| "reward": 1.0047632 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2750, | |
| "reward": 1.0017812 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1950, | |
| "reward": 1.0146698 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1150, | |
| "reward": -0.10003435000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1050, | |
| "reward": 1.001496 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 250, | |
| "reward": 1.0160206999999999 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 50, | |
| "reward": 1.00111575 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00112325 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.753542500000001, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.013469 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2300, | |
| "reward": 1.0019463 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1500, | |
| "reward": 1.0092662000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1450, | |
| "reward": 1.0005979 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1400, | |
| "reward": -0.1000023 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1000, | |
| "reward": 1.0077852 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.01729655 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 150, | |
| "reward": -0.10000175 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": -0.10000175 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.84911345, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3200, | |
| "reward": 1.00453805 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3000, | |
| "reward": -0.10000735000000001 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2200, | |
| "reward": 1.0112175 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1400, | |
| "reward": 1.0173716 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1300, | |
| "reward": 1.0013459 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 500, | |
| "reward": 1.0094163 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": -0.10000705 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.0019463 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.00142095 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00187125 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.53276095, | |
| "accuracy": 0.5, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": -0.10003300000000001 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": 1.00889095 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1600, | |
| "reward": 1.0103169 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1200, | |
| "reward": -0.10001535 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1150, | |
| "reward": -0.10000200000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 350, | |
| "reward": 1.01114245 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": -0.10000185 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.00127085 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.0011958 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000038 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.84588485, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0073349 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.008966 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.0070147 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.00172115 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1100, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 700, | |
| "reward": 1.00575885 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 600, | |
| "reward": -0.1000037 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.0037325 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": -0.10000715 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.05361085, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.01489495 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0073899499999999 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": 1.0041628 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.01729655 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 300, | |
| "reward": 1.00127085 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": 1.00111575 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.0004453 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.9502416, | |
| "accuracy": 0.9, | |
| "total_tokens_used": 3900, | |
| "budget_utilization": 0.975, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0041828 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.01639595 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1600, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.0013459 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 700, | |
| "reward": 1.00889095 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 500, | |
| "reward": 1.0020914 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 300, | |
| "reward": -0.100007 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 200, | |
| "reward": 1.0019463 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.00112075 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 6.7311904500000015, | |
| "accuracy": 0.7, | |
| "total_tokens_used": 3850, | |
| "budget_utilization": 0.9625, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.10002935 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0169213 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.00142095 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1100, | |
| "reward": 1.0020814 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 700, | |
| "reward": 1.00200635 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 600, | |
| "reward": 1.0017962 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 550, | |
| "reward": -0.10000205000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 350, | |
| "reward": -0.10000905 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 150, | |
| "reward": 1.00141595 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.8409295000000006, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2800, | |
| "reward": 1.00633925 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.00558875 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1900, | |
| "reward": -0.10000440000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1100, | |
| "reward": 1.00678955 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1000, | |
| "reward": -0.10000350000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 950, | |
| "reward": 1.00112325 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 150, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 50, | |
| "reward": 1.00112075 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00067295 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.84731065, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 50, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3950, | |
| "reward": 1.00067295 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3150, | |
| "reward": 1.01399435 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2350, | |
| "reward": 1.00979155 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1550, | |
| "reward": 1.01474485 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1450, | |
| "reward": 1.00052035 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1400, | |
| "reward": -0.1000019 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 600, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.0011908 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.00231655 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": 1.00411775 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.744839, | |
| "accuracy": 0.625, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0043329 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.0124183 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2000, | |
| "reward": 1.01129255 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1200, | |
| "reward": -0.10003400000000001 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1100, | |
| "reward": 1.00112075 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 900, | |
| "reward": -0.10000730000000001 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": 1.0157205 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": -0.1000047 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 8.047381699999999, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3600, | |
| "reward": 1.0056838 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3200, | |
| "reward": 1.0023816 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2800, | |
| "reward": 1.004483 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 2000, | |
| "reward": 1.01414445 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1200, | |
| "reward": 1.00423785 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 400, | |
| "reward": 1.01039195 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.00336725 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.0026918 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 4.6349301, | |
| "accuracy": 0.5555555555555556, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3950, | |
| "reward": -0.10000185 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 3550, | |
| "reward": 1.0046331 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2750, | |
| "reward": -0.10003665 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1950, | |
| "reward": 1.0038626 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1150, | |
| "reward": 1.01399435 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 350, | |
| "reward": 1.01204305 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 150, | |
| "reward": -0.10000780000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 50, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": -0.10000200000000001 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0004453 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.839810100000001, | |
| "accuracy": 0.8, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 10, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3600, | |
| "reward": 1.0070347 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.00305705 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.0122682 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1500, | |
| "reward": 1.00082055 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1100, | |
| "reward": -0.10001495 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 700, | |
| "reward": 1.0065844 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 300, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 100, | |
| "reward": 1.0038926 | |
| }, | |
| { | |
| "step": 10, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 0, | |
| "reward": 1.00142095 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 3.5236087000000005, | |
| "accuracy": 0.4444444444444444, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 3200, | |
| "reward": 1.0050634 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2400, | |
| "reward": -0.1000366 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2000, | |
| "reward": -0.1000143 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1900, | |
| "reward": 1.00127085 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 400, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 1500, | |
| "reward": -0.10001405000000001 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 700, | |
| "reward": 1.00964145 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": 1.00771015 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 200, | |
| "reward": -0.1000037 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 0, | |
| "reward": -0.1000085 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.0585641500000005, | |
| "accuracy": 1.0, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 5, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 3200, | |
| "reward": 1.01294365 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 2400, | |
| "reward": 1.0136191 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1600, | |
| "reward": 1.00318715 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 800, | |
| "reward": 1.0161708 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 0, | |
| "reward": 1.0126434500000001 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 7.9392846, | |
| "accuracy": 0.8888888888888888, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 9, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.0047831999999999 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0170713999999998 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": 1.0052135 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 1200, | |
| "reward": -0.1000317 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 400, | |
| "reward": 1.0056638 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 300, | |
| "reward": 1.0016461 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 200, | |
| "reward": 1.0017962 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 100, | |
| "reward": 1.00187125 | |
| }, | |
| { | |
| "step": 9, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00127085 | |
| } | |
| ] | |
| }, | |
| { | |
| "total_reward": 5.838685699999999, | |
| "accuracy": 0.75, | |
| "total_tokens_used": 4000, | |
| "budget_utilization": 1.0, | |
| "steps": 8, | |
| "per_step": [ | |
| { | |
| "step": 1, | |
| "tokens_allocated": 400, | |
| "was_correct": true, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 3600, | |
| "reward": 1.00545865 | |
| }, | |
| { | |
| "step": 2, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 2800, | |
| "reward": 1.0100167 | |
| }, | |
| { | |
| "step": 3, | |
| "tokens_allocated": 800, | |
| "was_correct": false, | |
| "difficulty": "math_l1_l2", | |
| "remaining_budget": 2000, | |
| "reward": -0.10003095000000001 | |
| }, | |
| { | |
| "step": 4, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 1900, | |
| "reward": 1.0004453 | |
| }, | |
| { | |
| "step": 5, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 1100, | |
| "reward": 1.00408775 | |
| }, | |
| { | |
| "step": 6, | |
| "tokens_allocated": 800, | |
| "was_correct": true, | |
| "difficulty": "math_l3", | |
| "remaining_budget": 300, | |
| "reward": 1.0165460499999999 | |
| }, | |
| { | |
| "step": 7, | |
| "tokens_allocated": 200, | |
| "was_correct": false, | |
| "difficulty": "math_l4_l5", | |
| "remaining_budget": 100, | |
| "reward": -0.10000925000000001 | |
| }, | |
| { | |
| "step": 8, | |
| "tokens_allocated": 100, | |
| "was_correct": true, | |
| "difficulty": "gsm8k", | |
| "remaining_budget": 0, | |
| "reward": 1.00217145 | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| } |