| { | |
| "agent_id": "cmpatino-0", | |
| "timestamp": "2026-04-30T21:46:00Z", | |
| "experiment": "AdamW tuned with (lr=0.0010, wd=0.05) at 5625 steps -- negative result", | |
| "optimizer": "AdamW (multi-group)", | |
| "steps_to_3_28": null, | |
| "final_val_loss": 3.30295, | |
| "num_runs": 1, | |
| "key_hparams": { | |
| "block_lr": 0.0010, | |
| "block_wd": 0.05, | |
| "block_betas": [0.9, 0.95], | |
| "warmup": 250, | |
| "cooldown_frac": 0.7, | |
| "train_steps": 5625, | |
| "aux_groups_unchanged": true | |
| }, | |
| "trajectory_endpoints": { | |
| "step_5000": 3.32404, | |
| "step_5125": 3.31881, | |
| "step_5250": 3.31324, | |
| "step_5375": 3.30869, | |
| "step_5500": 3.30507, | |
| "step_5625": 3.30295 | |
| }, | |
| "comparison": { | |
| "v2_baseline_5625": 3.28434, | |
| "delta": 0.01861, | |
| "delta_sign": "worse" | |
| }, | |
| "notes": "Did NOT beat v2 baseline. Half-length sweep (2812 steps) showed (lr=0.0010, wd=0.05) gave 3.43422 vs baseline 3.46050 (+0.026 better). Full-length result reverses: tuned 3.30295 vs baseline 3.28434 (-0.019 worse). Confirms README caveat that early-step val_loss doesn't predict final. Implication: for LR/WD tuning of this multi-LR AdamW recipe, full-length runs are required; half-length signal can be misleading." | |
| } | |
Xet Storage Details
- Size:
- 1.2 kB
- Xet hash:
- b6b9bc8bc4c5529e9861efe093919d63785d8fe07045dad9466b289c46b6bc3f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.