til-26-ae-agent / session_state.json
E-Rong's picture
Update session_state: Phase 2 complete, Phase 3 pending
9da3da4 verified
raw
history blame
2.69 kB
{
"project": "TIL-26-AE Bomberman Agent",
"repo": "E-Rong/til-26-ae-agent",
"space": "e-rong/til-26-ae",
"last_updated": "2026-05-14T11:30:00Z",
"current_phase": 3,
"phases": {
"1": {
"status": "COMPLETE",
"timesteps": 500352,
"checkpoint": "phase1_final.zip",
"eval_win_rate": "92.0%",
"eval_avg_reward": 180.1,
"eval_survival": "100.0%",
"completed_at": "2026-05-14T04:30:00Z"
},
"2": {
"status": "COMPLETE",
"job_id": "6a058adfe48bea4538b9c767",
"timesteps": 1001760,
"checkpoint": "phase2_final.zip",
"eval_win_rate": "93.0%",
"eval_avg_reward": 153.4,
"eval_avg_bombs": 20.1,
"completed_at": "2026-05-14T11:30:00Z",
"shaping": {
"method": "visit_count_adaptive",
"k": 1.2,
"base_weight": 0.5
},
"note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior."
},
"3": {
"status": "PENDING",
"duration": 1000000,
"opponents": "rule_based_curriculum",
"teams": 3,
"script": "phase3_curriculum.py"
}
},
"mistakes_log": [
{
"date": "2026-05-14",
"mistake": "Used sandboxes for 3+ hour training runs",
"cost": "~$4.87",
"why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
"fix": "Use HF Jobs for any training >30 minutes"
},
{
"date": "2026-05-14",
"mistake": "git clone private repo in HF Job without auth",
"cost": "~$0.10",
"why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
"fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
},
{
"date": "2026-05-14",
"mistake": "No session state persistence on Hub",
"cost": "Time lost reconstructing state",
"why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
"fix": "Push session_state.json to Hub after every milestone. Read it at start of every session."
}
],
"scripts": {
"phase1_training": "completed",
"phase2_training": "completed (phase2_resume.py)",
"phase3_training": "phase3_curriculum.py ready to submit",
"inference": "ae_manager.py in Hub repo"
},
"next_steps": [
"Submit Phase 3 HF Job using phase3_curriculum.py",
"Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps",
"Monitor Phase 3 until completion"
]
}