| { |
| "project": "TIL-26-AE Bomberman Agent", |
| "repo": "E-Rong/til-26-ae-agent", |
| "space": "e-rong/til-26-ae", |
| "last_updated": "2026-05-14T11:30:00Z", |
| "current_phase": 3, |
| "phases": { |
| "1": { |
| "status": "COMPLETE", |
| "timesteps": 500352, |
| "checkpoint": "phase1_final.zip", |
| "eval_win_rate": "92.0%", |
| "eval_avg_reward": 180.1, |
| "eval_survival": "100.0%", |
| "completed_at": "2026-05-14T04:30:00Z" |
| }, |
| "2": { |
| "status": "COMPLETE", |
| "job_id": "6a058adfe48bea4538b9c767", |
| "timesteps": 1001760, |
| "checkpoint": "phase2_final.zip", |
| "eval_win_rate": "93.0%", |
| "eval_avg_reward": 153.4, |
| "eval_avg_bombs": 20.1, |
| "completed_at": "2026-05-14T11:30:00Z", |
| "shaping": { |
| "method": "visit_count_adaptive", |
| "k": 1.2, |
| "base_weight": 0.5 |
| }, |
| "note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior." |
| }, |
| "3": { |
| "status": "PENDING", |
| "duration": 1000000, |
| "opponents": "rule_based_curriculum", |
| "teams": 3, |
| "script": "phase3_curriculum.py" |
| } |
| }, |
| "mistakes_log": [ |
| { |
| "date": "2026-05-14", |
| "mistake": "Used sandboxes for 3+ hour training runs", |
| "cost": "~$4.87", |
| "why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.", |
| "fix": "Use HF Jobs for any training >30 minutes" |
| }, |
| { |
| "date": "2026-05-14", |
| "mistake": "git clone private repo in HF Job without auth", |
| "cost": "~$0.10", |
| "why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.", |
| "fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN" |
| }, |
| { |
| "date": "2026-05-14", |
| "mistake": "No session state persistence on Hub", |
| "cost": "Time lost reconstructing state", |
| "why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.", |
| "fix": "Push session_state.json to Hub after every milestone. Read it at start of every session." |
| } |
| ], |
| "scripts": { |
| "phase1_training": "completed", |
| "phase2_training": "completed (phase2_resume.py)", |
| "phase3_training": "phase3_curriculum.py ready to submit", |
| "inference": "ae_manager.py in Hub repo" |
| }, |
| "next_steps": [ |
| "Submit Phase 3 HF Job using phase3_curriculum.py", |
| "Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps", |
| "Monitor Phase 3 until completion" |
| ] |
| } |
|
|