| { |
| "SignSGD_ConfigC_T32": { |
| "config": "SignSGD_ConfigC_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 6.36, |
| "optimizer_mem_mb": 6.36, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 1.7113, |
| "min_loss": 1.5921, |
| "loss_1000": 1.8099, |
| "loss_2500": 1.6944, |
| "loss_5000": 1.6944, |
| "avg_step_ms": 11.2 |
| }, |
| "SignSGD_ConfigE_T32": { |
| "config": "SignSGD_ConfigE_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 6.36, |
| "optimizer_mem_mb": 6.36, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 1.6149, |
| "min_loss": 1.5157, |
| "loss_1000": 1.7146, |
| "loss_2500": 1.5861, |
| "loss_5000": 1.5861, |
| "avg_step_ms": 11.2 |
| }, |
| "Lion_bf16_T32": { |
| "config": "Lion_bf16_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 3.18, |
| "optimizer_mem_mb": 9.55, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 2.5521, |
| "min_loss": 2.4668, |
| "loss_1000": 2.5683, |
| "loss_2500": 2.5486, |
| "loss_5000": 2.5486, |
| "avg_step_ms": 11.2 |
| }, |
| "Lion_FP32_T32": { |
| "config": "Lion_FP32_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 6.36, |
| "optimizer_mem_mb": 12.73, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 3.428, |
| "min_loss": 2.4911, |
| "loss_1000": 3.0947, |
| "loss_2500": 3.4403, |
| "loss_5000": 3.4403, |
| "avg_step_ms": 11.2 |
| }, |
| "Adam_bf16_T32": { |
| "config": "Adam_bf16_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 3.18, |
| "optimizer_mem_mb": 9.55, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 2.3487, |
| "min_loss": 2.248, |
| "loss_1000": 2.3016, |
| "loss_2500": 2.3237, |
| "loss_5000": 2.3237, |
| "avg_step_ms": 11.2 |
| }, |
| "Adam_FP32_T32": { |
| "config": "Adam_FP32_T32", |
| "n_params": 1668128, |
| "model_mem_mb": 6.36, |
| "optimizer_mem_mb": 19.09, |
| "peak_vram_mb": 388.2, |
| "final_loss_avg100": 2.7597, |
| "min_loss": 2.2049, |
| "loss_1000": 2.3037, |
| "loss_2500": 2.6991, |
| "loss_5000": 2.6991, |
| "avg_step_ms": 11.2 |
| } |
| } |