E-Rong
/

til-26-ae-agent

ml-intern

Model card Files Files and versions

xet

Community

E-Rong commited on about 19 hours ago

Commit

69d8b50

verified ·

1 Parent(s): 2f3c7cd

Add session_state.json for cross-session persistence

Browse files

Files changed (1) hide show

session_state.json +77 -0

session_state.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "project": "TIL-26-AE Bomberman Agent",
+  "repo": "E-Rong/til-26-ae-agent",
+  "space": "e-rong/til-26-ae",
+  "last_updated": "2026-05-14",
+  "current_phase": 2,
+  "phases": {
+    "1": {
+      "status": "COMPLETE",
+      "timesteps": 500352,
+      "checkpoint": "phase1_final.zip",
+      "eval_win_rate": "92.0%",
+      "eval_avg_reward": 180.1,
+      "eval_survival": "100.0%",
+      "completed_at": "2026-05-14T04:30:00Z"
+    },
+    "2": {
+      "status": "IN_PROGRESS",
+      "started_from": "phase1_final.zip",
+      "latest_checkpoint": "phase2_ckpt_600352.zip",
+      "latest_timestep": 600352,
+      "target_timestep": 1000352,
+      "remaining_steps": 400000,
+      "shaping": {
+        "method": "visit_count_adaptive",
+        "k": 1.2,
+        "base_weight": 0.5
+      },
+      "note": "Training crashed during sandbox session. Need to resume from checkpoint.",
+      "blockers": [
+        "HF Job git clone auth failure for private TIL repo",
+        "sandbox process died without detection, kept billing empty"
+      ]
+    },
+    "3": {
+      "status": "PENDING",
+      "duration": 1000000,
+      "opponents": "rule_based_curriculum",
+      "teams": 3
+    }
+  },
+  "mistakes_log": [
+    {
+      "date": "2026-05-14",
+      "mistake": "Used sandboxes for 3+ hour training runs",
+      "cost": "~$4.87",
+      "why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
+      "fix": "Use HF Jobs for any training >30 minutes"
+    },
+    {
+      "date": "2026-05-14",
+      "mistake": "git clone private repo in HF Job without auth",
+      "cost": "~$0.10",
+      "why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
+      "fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
+    },
+    {
+      "date": "2026-05-14",
+      "mistake": "No session state persistence on Hub",
+      "cost": "Time lost reconstructing state",
+      "why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
+      "fix": "This file. Push/pull session_state.json from Hub at every session boundary."
+    }
+  ],
+  "scripts": {
+    "phase1_training": "/app/phase1_script.py (lost in sandbox reset)",
+    "phase2_training": "phase2_job.py in Hub repo",
+    "inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
+  },
+  "next_steps": [
+    "Fix HF Job script to use snapshot_download instead of git clone",
+    "Test with 5-minute smoke job before full submission",
+    "Resume Phase 2 from phase2_ckpt_600352.zip to 1,000,352 steps",
+    "Run evaluation vs random opponents",
+    "Proceed to Phase 3 curriculum"
+  ]
+}