File size: 2,741 Bytes
69d8b50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{
  "project": "TIL-26-AE Bomberman Agent",
  "repo": "E-Rong/til-26-ae-agent",
  "space": "e-rong/til-26-ae",
  "last_updated": "2026-05-14",
  "current_phase": 2,
  "phases": {
    "1": {
      "status": "COMPLETE",
      "timesteps": 500352,
      "checkpoint": "phase1_final.zip",
      "eval_win_rate": "92.0%",
      "eval_avg_reward": 180.1,
      "eval_survival": "100.0%",
      "completed_at": "2026-05-14T04:30:00Z"
    },
    "2": {
      "status": "IN_PROGRESS",
      "started_from": "phase1_final.zip",
      "latest_checkpoint": "phase2_ckpt_600352.zip",
      "latest_timestep": 600352,
      "target_timestep": 1000352,
      "remaining_steps": 400000,
      "shaping": {
        "method": "visit_count_adaptive",
        "k": 1.2,
        "base_weight": 0.5
      },
      "note": "Training crashed during sandbox session. Need to resume from checkpoint.",
      "blockers": [
        "HF Job git clone auth failure for private TIL repo",
        "sandbox process died without detection, kept billing empty"
      ]
    },
    "3": {
      "status": "PENDING",
      "duration": 1000000,
      "opponents": "rule_based_curriculum",
      "teams": 3
    }
  },
  "mistakes_log": [
    {
      "date": "2026-05-14",
      "mistake": "Used sandboxes for 3+ hour training runs",
      "cost": "~$4.87",
      "why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
      "fix": "Use HF Jobs for any training >30 minutes"
    },
    {
      "date": "2026-05-14",
      "mistake": "git clone private repo in HF Job without auth",
      "cost": "~$0.10",
      "why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
      "fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
    },
    {
      "date": "2026-05-14",
      "mistake": "No session state persistence on Hub",
      "cost": "Time lost reconstructing state",
      "why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
      "fix": "This file. Push/pull session_state.json from Hub at every session boundary."
    }
  ],
  "scripts": {
    "phase1_training": "/app/phase1_script.py (lost in sandbox reset)",
    "phase2_training": "phase2_job.py in Hub repo",
    "inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
  },
  "next_steps": [
    "Fix HF Job script to use snapshot_download instead of git clone",
    "Test with 5-minute smoke job before full submission",
    "Resume Phase 2 from phase2_ckpt_600352.zip to 1,000,352 steps",
    "Run evaluation vs random opponents",
    "Proceed to Phase 3 curriculum"
  ]
}