E-Rong
/

til-26-ae-agent

ml-intern

Model card Files Files and versions

xet

Community

E-Rong commited on about 5 hours ago

Commit

9da3da4

verified ·

1 Parent(s): 1823ab5

Update session_state: Phase 2 complete, Phase 3 pending

Browse files

Files changed (1) hide show

session_state.json +18 -16

session_state.json CHANGED Viewed

@@ -2,8 +2,8 @@
   "project": "TIL-26-AE Bomberman Agent",
   "repo": "E-Rong/til-26-ae-agent",
   "space": "e-rong/til-26-ae",
-  "last_updated": "2026-05-14",
-  "current_phase": 2,
   "phases": {
     "1": {
       "status": "COMPLETE",
@@ -15,25 +15,27 @@
       "completed_at": "2026-05-14T04:30:00Z"
     },
     "2": {
-      "status": "RUNNING_IN_HF_JOB",
       "job_id": "6a058adfe48bea4538b9c767",
-      "started_from": "phase2_ckpt_600352.zip",
-      "latest_checkpoint": "phase2_ckpt_600352.zip",
-      "latest_timestep": 600352,
-      "target_timestep": 1000352,
-      "remaining_steps": 400000,
       "shaping": {
         "method": "visit_count_adaptive",
         "k": 1.2,
         "base_weight": 0.5
       },
-      "note": "Resumed via HF Job at 2026-05-14 08:42 UTC. Using snapshot_download for auth."
     },
     "3": {
       "status": "PENDING",
       "duration": 1000000,
       "opponents": "rule_based_curriculum",
-      "teams": 3
     }
   },
   "mistakes_log": [
@@ -61,13 +63,13 @@
   ],
   "scripts": {
     "phase1_training": "completed",
-    "phase2_training": "phase2_resume.py in Hub repo",
-    "inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
   },
   "next_steps": [
-    "Monitor Phase 2 HF Job (6a058adfe48bea4538b9c767) until completion",
-    "Verify phase2_final.zip pushed to Hub",
-    "Run evaluation vs random opponents",
-    "Proceed to Phase 3 curriculum"
   ]
 }

   "project": "TIL-26-AE Bomberman Agent",
   "repo": "E-Rong/til-26-ae-agent",
   "space": "e-rong/til-26-ae",
+  "last_updated": "2026-05-14T11:30:00Z",
+  "current_phase": 3,
   "phases": {
     "1": {
       "status": "COMPLETE",
       "completed_at": "2026-05-14T04:30:00Z"
     },
     "2": {
+      "status": "COMPLETE",
       "job_id": "6a058adfe48bea4538b9c767",
+      "timesteps": 1001760,
+      "checkpoint": "phase2_final.zip",
+      "eval_win_rate": "93.0%",
+      "eval_avg_reward": 153.4,
+      "eval_avg_bombs": 20.1,
+      "completed_at": "2026-05-14T11:30:00Z",
       "shaping": {
         "method": "visit_count_adaptive",
         "k": 1.2,
         "base_weight": 0.5
       },
+      "note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior."
     },
     "3": {
       "status": "PENDING",
       "duration": 1000000,
       "opponents": "rule_based_curriculum",
+      "teams": 3,
+      "script": "phase3_curriculum.py"
     }
   },
   "mistakes_log": [
   ],
   "scripts": {
     "phase1_training": "completed",
+    "phase2_training": "completed (phase2_resume.py)",
+    "phase3_training": "phase3_curriculum.py ready to submit",
+    "inference": "ae_manager.py in Hub repo"
   },
   "next_steps": [
+    "Submit Phase 3 HF Job using phase3_curriculum.py",
+    "Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps",
+    "Monitor Phase 3 until completion"
   ]
 }