E-Rong
/

til-26-ae-agent

Model card Files Files and versions

til-26-ae-agent / session_state.json

E-Rong's picture

Upload session_state.json

67d546f verified 1 day ago

2.54 kB

	{
	"project": "TIL-26-AE Bomberman Agent",
	"repo": "E-Rong/til-26-ae-agent",
	"space": "e-rong/til-26-ae",
	"last_updated": "2026-05-14",
	"current_phase": 2,
	"phases": {
	"1": {
	"status": "COMPLETE",
	"timesteps": 500352,
	"checkpoint": "phase1_final.zip",
	"eval_win_rate": "92.0%",
	"eval_avg_reward": 180.1,
	"eval_survival": "100.0%",
	"completed_at": "2026-05-14T04:30:00Z"
	},
	"2": {
	"status": "RUNNING_IN_HF_JOB",
	"job_id": "6a058adfe48bea4538b9c767",
	"started_from": "phase2_ckpt_600352.zip",
	"latest_checkpoint": "phase2_ckpt_600352.zip",
	"latest_timestep": 600352,
	"target_timestep": 1000352,
	"remaining_steps": 400000,
	"shaping": {
	"method": "visit_count_adaptive",
	"k": 1.2,
	"base_weight": 0.5
	},
	"note": "Resumed via HF Job at 2026-05-14 08:42 UTC. Using snapshot_download for auth."
	},
	"3": {
	"status": "PENDING",
	"duration": 1000000,
	"opponents": "rule_based_curriculum",
	"teams": 3
	}
	},
	"mistakes_log": [
	{
	"date": "2026-05-14",
	"mistake": "Used sandboxes for 3+ hour training runs",
	"cost": "~$4.87",
	"why_wrong": "Sandboxes are interactive dev environments, not batch compute. They recycle/timeout and keep billing when empty.",
	"fix": "Use HF Jobs for any training >30 minutes"
	},
	{
	"date": "2026-05-14",
	"mistake": "git clone private repo in HF Job without auth",
	"cost": "~$0.10",
	"why_wrong": "HF Jobs have HF_TOKEN env var, but git clone doesn't use it automatically. Need snapshot_download or token-in-URL.",
	"fix": "Use huggingface_hub.snapshot_download() which auto-uses HF_TOKEN"
	},
	{
	"date": "2026-05-14",
	"mistake": "No session state persistence on Hub",
	"cost": "Time lost reconstructing state",
	"why_wrong": "Relied on ephemeral /app files instead of pushing state to Hub repo after every milestone.",
	"fix": "Push session_state.json to Hub after every milestone. Read it at start of every session."
	}
	],
	"scripts": {
	"phase1_training": "completed",
	"phase2_training": "phase2_resume.py in Hub repo",
	"inference": "ae_manager.py in Hub repo (also in e-rong/til-26-ae ae/src/)"
	},
	"next_steps": [
	"Monitor Phase 2 HF Job (6a058adfe48bea4538b9c767) until completion",
	"Verify phase2_final.zip pushed to Hub",
	"Run evaluation vs random opponents",
	"Proceed to Phase 3 curriculum"
	]
	}