test-rl-hackathon-budget / outputs /ppo_hard_multi_eval.json
Akshay Babbar
chore: HF Space export (size filter)
98a5a8c
{
"timestamp": "20260426_141609",
"ppo_mean": 0.69635,
"ppo_std": 0.03555975409238808,
"ppo_ci": [
0.6709138516281651,
0.721786148371835
],
"heu_mean": 0.60779,
"heu_std": 0.038213652301529906,
"heu_ci": [
0.5804554991132379,
0.6351245008867622
],
"delta": 0.08855999999999997,
"delta_pct": 14.570822158969374,
"win_rate": 1.0,
"episodes": [
{
"seed": 0,
"ppo": {
"overall_score": 0.688,
"success_score": 0.7,
"latency_score": 0.6432,
"budget_score": 0.0909,
"sla_score": 1.0,
"adaptation_score": 0.9286
},
"heuristic": {
"overall_score": 0.5569,
"success_score": 0.65,
"latency_score": 0.4686,
"budget_score": 0.0364,
"sla_score": 0.9474,
"adaptation_score": 0.6032
}
},
{
"seed": 1,
"ppo": {
"overall_score": 0.731,
"success_score": 0.75,
"latency_score": 0.6778,
"budget_score": 0.1364,
"sla_score": 1.0,
"adaptation_score": 1.0
},
"heuristic": {
"overall_score": 0.6077,
"success_score": 0.7,
"latency_score": 0.5213,
"budget_score": 0.0455,
"sla_score": 1.0,
"adaptation_score": 0.6833
}
},
{
"seed": 2,
"ppo": {
"overall_score": 0.7226,
"success_score": 0.75,
"latency_score": 0.6358,
"budget_score": 0.1364,
"sla_score": 1.0,
"adaptation_score": 1.0
},
"heuristic": {
"overall_score": 0.6165,
"success_score": 0.7,
"latency_score": 0.4967,
"budget_score": 0.2,
"sla_score": 1.0,
"adaptation_score": 0.6357
}
},
{
"seed": 3,
"ppo": {
"overall_score": 0.7411,
"success_score": 0.75,
"latency_score": 0.7281,
"budget_score": 0.1364,
"sla_score": 1.0,
"adaptation_score": 1.0
},
"heuristic": {
"overall_score": 0.6289,
"success_score": 0.7,
"latency_score": 0.5416,
"budget_score": 0.2091,
"sla_score": 0.95,
"adaptation_score": 0.6833
}
},
{
"seed": 4,
"ppo": {
"overall_score": 0.6556,
"success_score": 0.65,
"latency_score": 0.6307,
"budget_score": 0.0909,
"sla_score": 1.0,
"adaptation_score": 0.8542
},
"heuristic": {
"overall_score": 0.5933,
"success_score": 0.65,
"latency_score": 0.5175,
"budget_score": 0.0818,
"sla_score": 1.0,
"adaptation_score": 0.6625
}
},
{
"seed": 5,
"ppo": {
"overall_score": 0.6704,
"success_score": 0.65,
"latency_score": 0.6214,
"budget_score": 0.0909,
"sla_score": 1.0,
"adaptation_score": 0.9375
},
"heuristic": {
"overall_score": 0.607,
"success_score": 0.65,
"latency_score": 0.5142,
"budget_score": 0.0364,
"sla_score": 0.9412,
"adaptation_score": 0.8125
}
},
{
"seed": 6,
"ppo": {
"overall_score": 0.7384,
"success_score": 0.75,
"latency_score": 0.7149,
"budget_score": 0.1364,
"sla_score": 1.0,
"adaptation_score": 1.0
},
"heuristic": {
"overall_score": 0.6546,
"success_score": 0.65,
"latency_score": 0.5611,
"budget_score": 0.4545,
"sla_score": 1.0,
"adaptation_score": 0.6458
}
},
{
"seed": 7,
"ppo": {
"overall_score": 0.6844,
"success_score": 0.7,
"latency_score": 0.6252,
"budget_score": 0.0909,
"sla_score": 1.0,
"adaptation_score": 0.9286
},
"heuristic": {
"overall_score": 0.6477,
"success_score": 0.7,
"latency_score": 0.5613,
"budget_score": 0.0364,
"sla_score": 1.0,
"adaptation_score": 0.85
}
},
{
"seed": 8,
"ppo": {
"overall_score": 0.6402,
"success_score": 0.65,
"latency_score": 0.5536,
"budget_score": 0.0909,
"sla_score": 1.0,
"adaptation_score": 0.8542
},
"heuristic": {
"overall_score": 0.5338,
"success_score": 0.6,
"latency_score": 0.4135,
"budget_score": 0.2,
"sla_score": 0.85,
"adaptation_score": 0.5682
}
},
{
"seed": 9,
"ppo": {
"overall_score": 0.6918,
"success_score": 0.7,
"latency_score": 0.607,
"budget_score": 0.1364,
"sla_score": 1.0,
"adaptation_score": 0.95
},
"heuristic": {
"overall_score": 0.6315,
"success_score": 0.7,
"latency_score": 0.5336,
"budget_score": 0.0818,
"sla_score": 1.0,
"adaptation_score": 0.7625
}
}
]
}