| { | |
| "agent_id": "cmpatino-0", | |
| "timestamp": "2026-04-30T17:11:00Z", | |
| "experiment": "Pure single-LR AdamW @ baseline-stated hparams (negative result)", | |
| "optimizer": "AdamW", | |
| "steps_to_3_28": null, | |
| "final_val_loss": 3.39869, | |
| "num_runs": 1, | |
| "key_hparams": { | |
| "lr": 0.0015, | |
| "wd": 0.1, | |
| "betas": [0.9, 0.95], | |
| "warmup": 250, | |
| "cooldown_frac": 0.7, | |
| "train_steps": 5625, | |
| "scheme": "single-LR (all params lr=0.0015)", | |
| "init": "Muon-style: proj zeroed, embed normal_(), other weights normal_(std=0.33**0.5/n**0.5)" | |
| }, | |
| "notes": "Did not reach 3.28. Root cause identified by reading upstream reference log: the 'AdamW baseline' on the leaderboard actually uses a multi-LR scheme (embed lr=0.3, proj lr=1/320, ndim<2 lr=0.01, blocks lr=0.0015) with only proj zeroed at init. Single-LR with lr=0.0015 for everything is severely undertuned for embed/proj/scalars. v2 reproduction launched at artifacts/adamw_baseline_v2_cmpatino-0/." | |
| } | |
Xet Storage Details
- Size:
- 966 Bytes
- Xet hash:
- e62e7dae32054b4946f37f3add0e01ec0d75a7e68da443af8829b189779b5d14
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.