File size: 2,741 Bytes
69d8b50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | {
"project": "TIL-26-AE Bomberman Agent",
"repo": "E-Rong/til-26-ae-agent",
"space": "e-rong/til-26-ae",
"last_updated": "2026-05-14",
"current_phase": 2,
"phases": {
"1": {
"status": "COMPLETE",
"timesteps": 500352,
"checkpoint": "phase1_final.zip",
"eval_win_rate": "92.0%",
"eval_avg_reward": 180.1,
"eval_survival": "100.0%",
"completed_at": "2026-05-14T04:30:00Z"
},
"2": {
"status": "IN_PROGRESS",
"started_from": "phase1_final.zip",
"latest_checkpoint": "phase2_ckpt_600352.zip",
"latest_timestep": 600352,
"target_timestep": 1000352,
"remaining_steps": 400000,
"shaping": {
"method": "visit_count_adaptive",
"k": 1.2,
"base_weight": 0.5
},
"note": "Training crashed during sandbox session. Need to resume from checkpoint.",
"blockers": [
"HF Job git clone auth failure for private TIL repo",
"sandbox process died without detection, kept billing empty"
]
},
"3": {
"status": "PENDING",
"duration": 1000000,
"opponents": "rule_based_curriculum",
"teams": 3
}
},
"mistakes_log": [
{
"date": "2026-05-14",
"mistake": "Used sandboxes for 3+ hour training runs",
"cost": "~$4.87",
"why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
"fix": "Use HF Jobs for any training >30 minutes"
},
{
"date": "2026-05-14",
"mistake": "git clone private repo in HF Job without auth",
"cost": "~$0.10",
"why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
"fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
},
{
"date": "2026-05-14",
"mistake": "No session state persistence on Hub",
"cost": "Time lost reconstructing state",
"why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
"fix": "This file. Push/pull session_state.json from Hub at every session boundary."
}
],
"scripts": {
"phase1_training": "/app/phase1_script.py (lost in sandbox reset)",
"phase2_training": "phase2_job.py in Hub repo",
"inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
},
"next_steps": [
"Fix HF Job script to use snapshot_download instead of git clone",
"Test with 5-minute smoke job before full submission",
"Resume Phase 2 from phase2_ckpt_600352.zip to 1,000,352 steps",
"Run evaluation vs random opponents",
"Proceed to Phase 3 curriculum"
]
} |