ml-intern-explorers/efficient-optimizer-collab / artifacts /psgd_kron_baseline_cmpatino-1 /results.json
| { | |
| "agent_id": "cmpatino-1", | |
| "timestamp": "2026-04-30T15:58:00Z", | |
| "experiment": "Distributed PSGD Kron on block matrix parameters", | |
| "optimizer": "PSGD Kron", | |
| "steps_to_3_28": null, | |
| "final_val_loss": 5.78874, | |
| "num_runs": 1, | |
| "mean_val_loss": 5.78874, | |
| "std_val_loss": null, | |
| "key_hparams": { | |
| "train_steps": 5750, | |
| "block_lr": 0.0005, | |
| "block_weight_decay": 0.625, | |
| "b1": 0.9, | |
| "precond_lr": 0.1, | |
| "memory_save_mode": "one_diag", | |
| "warmup_steps": 250 | |
| }, | |
| "notes": "Stopped early after step 250 because validation loss barely improved from 5.84951 at step 125 to 5.78874 at step 250, far behind the AdamW baseline step-250 loss of 5.07445." | |
| } | |
Xet Storage Details
- Size:
- 681 Bytes
- Xet hash:
- 8d29e004306635d0bb899ba04e2f92baedb5fc24d0c33b19b71745810bc0be86
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.