File size: 2,687 Bytes
69d8b50
 
 
 
9da3da4
 
69d8b50
 
 
 
 
 
 
 
 
 
 
9da3da4
67d546f
9da3da4
 
 
 
 
 
69d8b50
 
 
 
 
9da3da4
69d8b50
 
 
 
 
9da3da4
 
69d8b50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67d546f
69d8b50
 
 
67d546f
9da3da4
 
 
69d8b50
 
9da3da4
 
 
69d8b50
67d546f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "project": "TIL-26-AE Bomberman Agent",
  "repo": "E-Rong/til-26-ae-agent",
  "space": "e-rong/til-26-ae",
  "last_updated": "2026-05-14T11:30:00Z",
  "current_phase": 3,
  "phases": {
    "1": {
      "status": "COMPLETE",
      "timesteps": 500352,
      "checkpoint": "phase1_final.zip",
      "eval_win_rate": "92.0%",
      "eval_avg_reward": 180.1,
      "eval_survival": "100.0%",
      "completed_at": "2026-05-14T04:30:00Z"
    },
    "2": {
      "status": "COMPLETE",
      "job_id": "6a058adfe48bea4538b9c767",
      "timesteps": 1001760,
      "checkpoint": "phase2_final.zip",
      "eval_win_rate": "93.0%",
      "eval_avg_reward": 153.4,
      "eval_avg_bombs": 20.1,
      "completed_at": "2026-05-14T11:30:00Z",
      "shaping": {
        "method": "visit_count_adaptive",
        "k": 1.2,
        "base_weight": 0.5
      },
      "note": "Exploration shaping successful. Reward decreased (180→153) but win rate increased (92%→93%), indicating more robust exploration behavior."
    },
    "3": {
      "status": "PENDING",
      "duration": 1000000,
      "opponents": "rule_based_curriculum",
      "teams": 3,
      "script": "phase3_curriculum.py"
    }
  },
  "mistakes_log": [
    {
      "date": "2026-05-14",
      "mistake": "Used sandboxes for 3+ hour training runs",
      "cost": "~$4.87",
      "why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
      "fix": "Use HF Jobs for any training >30 minutes"
    },
    {
      "date": "2026-05-14",
      "mistake": "git clone private repo in HF Job without auth",
      "cost": "~$0.10",
      "why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
      "fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
    },
    {
      "date": "2026-05-14",
      "mistake": "No session state persistence on Hub",
      "cost": "Time lost reconstructing state",
      "why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
      "fix": "Push session_state.json to Hub after every milestone. Read it at start of every session."
    }
  ],
  "scripts": {
    "phase1_training": "completed",
    "phase2_training": "completed (phase2_resume.py)",
    "phase3_training": "phase3_curriculum.py ready to submit",
    "inference": "ae_manager.py in Hub repo"
  },
  "next_steps": [
    "Submit Phase 3 HF Job using phase3_curriculum.py",
    "Phase 3: 5-stage rule-based curriculum (static→random→simple_bomb→evasive→mixed), 1M steps",
    "Monitor Phase 3 until completion"
  ]
}