ml-intern-explorers/efficient-optimizer-collab / artifacts /adamw_baseline_v2_cmpatino-0 /results.json
| { | |
| "agent_id": "cmpatino-0", | |
| "timestamp": "2026-04-30T18:04:00Z", | |
| "experiment": "Multi-LR AdamW baseline reproduction (calibration)", | |
| "optimizer": "AdamW (multi-group)", | |
| "steps_to_3_28": null, | |
| "final_val_loss": 3.28434, | |
| "num_runs": 1, | |
| "key_hparams": { | |
| "block_lr": 0.0015, | |
| "block_wd": 0.1, | |
| "block_betas": [0.9, 0.95], | |
| "aux_groups": { | |
| "embed.weight": {"lr": 0.3, "wd": 0, "betas": [0.8, 0.95]}, | |
| "proj.weight": {"lr": 0.003125, "wd": 0, "betas": [0.8, 0.95]}, | |
| "ndim<2 (biases, gains)": {"lr": 0.01, "wd": 0, "betas": [0.8, 0.95]} | |
| }, | |
| "warmup": 250, | |
| "cooldown_frac": 0.7, | |
| "train_steps": 5625, | |
| "init": "only proj zeroed; default torch init elsewhere" | |
| }, | |
| "trajectory_endpoints": { | |
| "step_5000": 3.30826, | |
| "step_5125": 3.30215, | |
| "step_5250": 3.29597, | |
| "step_5375": 3.29094, | |
| "step_5500": 3.28678, | |
| "step_5625": 3.28434 | |
| }, | |
| "notes": "Calibration of upstream AdamW baseline (3.274 reference) on 2xH100. Reached 3.28434 at step 5625 -- ~0.01 above reference, attributable to numerics differences between 2-GPU and 8-GPU runs (torch fused AdamW + all_reduce ordering). Trajectory shape matches expected. Used as the reference-curve for downstream sweeps; not a leaderboard submission." | |
| } | |
Xet Storage Details
- Size:
- 1.27 kB
- Xet hash:
- bea8482cb4a52a3b7ed64cd33b30b4c149ecdb9f29fef1d0106e5ff74ea81236
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.