E-Rong commited on
Commit
9da3da4
·
verified ·
1 Parent(s): 1823ab5

Update session_state: Phase 2 complete, Phase 3 pending

Browse files
Files changed (1) hide show
  1. session_state.json +18 -16
session_state.json CHANGED
@@ -2,8 +2,8 @@
2
  "project": "TIL-26-AE Bomberman Agent",
3
  "repo": "E-Rong/til-26-ae-agent",
4
  "space": "e-rong/til-26-ae",
5
- "last_updated": "2026-05-14",
6
- "current_phase": 2,
7
  "phases": {
8
  "1": {
9
  "status": "COMPLETE",
@@ -15,25 +15,27 @@
15
  "completed_at": "2026-05-14T04:30:00Z"
16
  },
17
  "2": {
18
- "status": "RUNNING_IN_HF_JOB",
19
  "job_id": "6a058adfe48bea4538b9c767",
20
- "started_from": "phase2_ckpt_600352.zip",
21
- "latest_checkpoint": "phase2_ckpt_600352.zip",
22
- "latest_timestep": 600352,
23
- "target_timestep": 1000352,
24
- "remaining_steps": 400000,
 
25
  "shaping": {
26
  "method": "visit_count_adaptive",
27
  "k": 1.2,
28
  "base_weight": 0.5
29
  },
30
- "note": "Resumed via HF Job at 2026-05-14 08:42 UTC. Using snapshot_download for auth."
31
  },
32
  "3": {
33
  "status": "PENDING",
34
  "duration": 1000000,
35
  "opponents": "rule_based_curriculum",
36
- "teams": 3
 
37
  }
38
  },
39
  "mistakes_log": [
@@ -61,13 +63,13 @@
61
  ],
62
  "scripts": {
63
  "phase1_training": "completed",
64
- "phase2_training": "phase2_resume.py in Hub repo",
65
- "inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
 
66
  },
67
  "next_steps": [
68
- "Monitor Phase 2 HF Job (6a058adfe48bea4538b9c767) until completion",
69
- "Verify phase2_final.zip pushed to Hub",
70
- "Run evaluation vs random opponents",
71
- "Proceed to Phase 3 curriculum"
72
  ]
73
  }
 
2
  "project": "TIL-26-AE Bomberman Agent",
3
  "repo": "E-Rong/til-26-ae-agent",
4
  "space": "e-rong/til-26-ae",
5
+ "last_updated": "2026-05-14T11:30:00Z",
6
+ "current_phase": 3,
7
  "phases": {
8
  "1": {
9
  "status": "COMPLETE",
 
15
  "completed_at": "2026-05-14T04:30:00Z"
16
  },
17
  "2": {
18
+ "status": "COMPLETE",
19
  "job_id": "6a058adfe48bea4538b9c767",
20
+ "timesteps": 1001760,
21
+ "checkpoint": "phase2_final.zip",
22
+ "eval_win_rate": "93.0%",
23
+ "eval_avg_reward": 153.4,
24
+ "eval_avg_bombs": 20.1,
25
+ "completed_at": "2026-05-14T11:30:00Z",
26
  "shaping": {
27
  "method": "visit_count_adaptive",
28
  "k": 1.2,
29
  "base_weight": 0.5
30
  },
31
+ "note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior."
32
  },
33
  "3": {
34
  "status": "PENDING",
35
  "duration": 1000000,
36
  "opponents": "rule_based_curriculum",
37
+ "teams": 3,
38
+ "script": "phase3_curriculum.py"
39
  }
40
  },
41
  "mistakes_log": [
 
63
  ],
64
  "scripts": {
65
  "phase1_training": "completed",
66
+ "phase2_training": "completed (phase2_resume.py)",
67
+ "phase3_training": "phase3_curriculum.py ready to submit",
68
+ "inference": "ae_manager.py in Hub repo"
69
  },
70
  "next_steps": [
71
+ "Submit Phase 3 HF Job using phase3_curriculum.py",
72
+ "Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps",
73
+ "Monitor Phase 3 until completion"
 
74
  ]
75
  }