Update session_state: Phase 2 complete, Phase 3 pending
Browse files- session_state.json +18 -16
session_state.json
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
"project": "TIL-26-AE Bomberman Agent",
|
| 3 |
"repo": "E-Rong/til-26-ae-agent",
|
| 4 |
"space": "e-rong/til-26-ae",
|
| 5 |
-
"last_updated": "2026-05-
|
| 6 |
-
"current_phase":
|
| 7 |
"phases": {
|
| 8 |
"1": {
|
| 9 |
"status": "COMPLETE",
|
|
@@ -15,25 +15,27 @@
|
|
| 15 |
"completed_at": "2026-05-14T04:30:00Z"
|
| 16 |
},
|
| 17 |
"2": {
|
| 18 |
-
"status": "
|
| 19 |
"job_id": "6a058adfe48bea4538b9c767",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
-
"
|
| 24 |
-
"
|
|
|
|
| 25 |
"shaping": {
|
| 26 |
"method": "visit_count_adaptive",
|
| 27 |
"k": 1.2,
|
| 28 |
"base_weight": 0.5
|
| 29 |
},
|
| 30 |
-
"note": "
|
| 31 |
},
|
| 32 |
"3": {
|
| 33 |
"status": "PENDING",
|
| 34 |
"duration": 1000000,
|
| 35 |
"opponents": "rule_based_curriculum",
|
| 36 |
-
"teams": 3
|
|
|
|
| 37 |
}
|
| 38 |
},
|
| 39 |
"mistakes_log": [
|
|
@@ -61,13 +63,13 @@
|
|
| 61 |
],
|
| 62 |
"scripts": {
|
| 63 |
"phase1_training": "completed",
|
| 64 |
-
"phase2_training": "phase2_resume.py
|
| 65 |
-
"
|
|
|
|
| 66 |
},
|
| 67 |
"next_steps": [
|
| 68 |
-
"
|
| 69 |
-
"
|
| 70 |
-
"
|
| 71 |
-
"Proceed to Phase 3 curriculum"
|
| 72 |
]
|
| 73 |
}
|
|
|
|
| 2 |
"project": "TIL-26-AE Bomberman Agent",
|
| 3 |
"repo": "E-Rong/til-26-ae-agent",
|
| 4 |
"space": "e-rong/til-26-ae",
|
| 5 |
+
"last_updated": "2026-05-14T11:30:00Z",
|
| 6 |
+
"current_phase": 3,
|
| 7 |
"phases": {
|
| 8 |
"1": {
|
| 9 |
"status": "COMPLETE",
|
|
|
|
| 15 |
"completed_at": "2026-05-14T04:30:00Z"
|
| 16 |
},
|
| 17 |
"2": {
|
| 18 |
+
"status": "COMPLETE",
|
| 19 |
"job_id": "6a058adfe48bea4538b9c767",
|
| 20 |
+
"timesteps": 1001760,
|
| 21 |
+
"checkpoint": "phase2_final.zip",
|
| 22 |
+
"eval_win_rate": "93.0%",
|
| 23 |
+
"eval_avg_reward": 153.4,
|
| 24 |
+
"eval_avg_bombs": 20.1,
|
| 25 |
+
"completed_at": "2026-05-14T11:30:00Z",
|
| 26 |
"shaping": {
|
| 27 |
"method": "visit_count_adaptive",
|
| 28 |
"k": 1.2,
|
| 29 |
"base_weight": 0.5
|
| 30 |
},
|
| 31 |
+
"note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior."
|
| 32 |
},
|
| 33 |
"3": {
|
| 34 |
"status": "PENDING",
|
| 35 |
"duration": 1000000,
|
| 36 |
"opponents": "rule_based_curriculum",
|
| 37 |
+
"teams": 3,
|
| 38 |
+
"script": "phase3_curriculum.py"
|
| 39 |
}
|
| 40 |
},
|
| 41 |
"mistakes_log": [
|
|
|
|
| 63 |
],
|
| 64 |
"scripts": {
|
| 65 |
"phase1_training": "completed",
|
| 66 |
+
"phase2_training": "completed (phase2_resume.py)",
|
| 67 |
+
"phase3_training": "phase3_curriculum.py ready to submit",
|
| 68 |
+
"inference": "ae_manager.py in Hub repo"
|
| 69 |
},
|
| 70 |
"next_steps": [
|
| 71 |
+
"Submit Phase 3 HF Job using phase3_curriculum.py",
|
| 72 |
+
"Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps",
|
| 73 |
+
"Monitor Phase 3 until completion"
|
|
|
|
| 74 |
]
|
| 75 |
}
|