{ "project": "TIL-26-AE Bomberman Agent", "repo": "E-Rong/til-26-ae-agent", "space": "e-rong/til-26-ae", "last_updated": "2026-05-14", "current_phase": 2, "phases": { "1": { "status": "COMPLETE", "timesteps": 500352, "checkpoint": "phase1_final.zip", "eval_win_rate": "92.0%", "eval_avg_reward": 180.1, "eval_survival": "100.0%", "completed_at": "2026-05-14T04:30:00Z" }, "2": { "status": "IN_PROGRESS", "started_from": "phase1_final.zip", "latest_checkpoint": "phase2_ckpt_600352.zip", "latest_timestep": 600352, "target_timestep": 1000352, "remaining_steps": 400000, "shaping": { "method": "visit_count_adaptive", "k": 1.2, "base_weight": 0.5 }, "note": "Training crashed during sandbox session. Need to resume from checkpoint.", "blockers": [ "HF Job git clone auth failure for private TIL repo", "sandbox process died without detection, kept billing empty" ] }, "3": { "status": "PENDING", "duration": 1000000, "opponents": "rule_based_curriculum", "teams": 3 } }, "mistakes_log": [ { "date": "2026-05-14", "mistake": "Used sandboxes for 3+ hour training runs", "cost": "~$4.87", "why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.", "fix": "Use HF Jobs for any training >30 minutes" }, { "date": "2026-05-14", "mistake": "git clone private repo in HF Job without auth", "cost": "~$0.10", "why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.", "fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN" }, { "date": "2026-05-14", "mistake": "No session state persistence on Hub", "cost": "Time lost reconstructing state", "why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.", "fix": "This file. Push/pull session_state.json from Hub at every session boundary." } ], "scripts": { "phase1_training": "/app/phase1_script.py (lost in sandbox reset)", "phase2_training": "phase2_job.py in Hub repo", "inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)" }, "next_steps": [ "Fix HF Job script to use snapshot_download instead of git clone", "Test with 5-minute smoke job before full submission", "Resume Phase 2 from phase2_ckpt_600352.zip to 1,000,352 steps", "Run evaluation vs random opponents", "Proceed to Phase 3 curriculum" ] }