test-rl-hackathon-budget / train /gen_outputs.py
Akshay Babbar
chore: HF Space export (size filter)
98a5a8c
"""
Generate sample episode output JSON files for easy (seed 42) and hard_multi (seed 42).
Usage:
uv run python train/gen_outputs.py
Output:
outputs/episode_easy_seed42.json
outputs/episode_hard_multi_seed42.json
"""
from __future__ import annotations
import json
from pathlib import Path
from budget_router.environment import BudgetRouterEnv
from budget_router.models import Action
from budget_router.policies import heuristic_baseline_policy
from budget_router.reward import grade_episode
from budget_router.tasks import EASY, HARD_MULTI
def capture_episode(scenario, seed: int) -> dict:
env = BudgetRouterEnv()
obs = env.reset(seed=seed, scenario=scenario)
steps = []
while not obs.done:
action: Action = heuristic_baseline_policy(obs)
obs_before = {
"provider_a_status": float(obs.provider_a_status),
"provider_b_status": float(obs.provider_b_status),
"provider_c_status": float(obs.provider_c_status),
"budget_remaining": float(obs.budget_remaining),
"queue_backlog": float(obs.queue_backlog),
"system_latency": float(obs.system_latency),
"step_count": float(obs.step_count),
}
obs = env.step(action)
# Serialize metadata — coerce non-JSON-native types
meta = {}
for k, v in (obs.metadata or {}).items():
if isinstance(v, (int, float, bool, str, type(None))):
meta[k] = v
else:
meta[k] = str(v)
steps.append({
"step": int(meta.get("step", len(steps) + 1)),
"action": action.action_type.value,
"observation_before": obs_before,
"observation_after": {
"provider_a_status": float(obs.provider_a_status),
"provider_b_status": float(obs.provider_b_status),
"provider_c_status": float(obs.provider_c_status),
"budget_remaining": float(obs.budget_remaining),
"queue_backlog": float(obs.queue_backlog),
"system_latency": float(obs.system_latency),
"step_count": float(obs.step_count),
},
"reward": float(obs.reward),
"done": bool(obs.done),
"metadata": meta,
})
grader_raw = grade_episode(env._internal.history)
grader = {k: float(v) for k, v in grader_raw.items()}
return {
"scenario": scenario.name,
"seed": seed,
"policy": "heuristic",
"total_steps": len(steps),
"grader": grader,
"steps": steps,
}
def main() -> None:
output_dir = Path("outputs")
output_dir.mkdir(exist_ok=True)
print("Capturing easy seed=42 ...")
easy = capture_episode(EASY, 42)
(output_dir / "episode_easy_seed42.json").write_text(json.dumps(easy, indent=2))
print(f" total_steps={easy['total_steps']} grader={easy['grader']['overall_score']:.4f}")
print("Capturing hard_multi seed=42 ...")
hm = capture_episode(HARD_MULTI, 42)
(output_dir / "episode_hard_multi_seed42.json").write_text(json.dumps(hm, indent=2))
print(f" total_steps={hm['total_steps']} grader={hm['grader']['overall_score']:.4f}")
print("✅ Done — outputs/ contains 2 valid JSON files")
if __name__ == "__main__":
main()