File size: 1,952 Bytes
d8bc908 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | {
"SignSGD_ConfigC_T32": {
"config": "SignSGD_ConfigC_T32",
"n_params": 1668128,
"model_mem_mb": 6.36,
"optimizer_mem_mb": 6.36,
"peak_vram_mb": 388.2,
"final_loss_avg100": 1.7113,
"min_loss": 1.5921,
"loss_1000": 1.8099,
"loss_2500": 1.6944,
"loss_5000": 1.6944,
"avg_step_ms": 11.2
},
"SignSGD_ConfigE_T32": {
"config": "SignSGD_ConfigE_T32",
"n_params": 1668128,
"model_mem_mb": 6.36,
"optimizer_mem_mb": 6.36,
"peak_vram_mb": 388.2,
"final_loss_avg100": 1.6149,
"min_loss": 1.5157,
"loss_1000": 1.7146,
"loss_2500": 1.5861,
"loss_5000": 1.5861,
"avg_step_ms": 11.2
},
"Lion_bf16_T32": {
"config": "Lion_bf16_T32",
"n_params": 1668128,
"model_mem_mb": 3.18,
"optimizer_mem_mb": 9.55,
"peak_vram_mb": 388.2,
"final_loss_avg100": 2.5521,
"min_loss": 2.4668,
"loss_1000": 2.5683,
"loss_2500": 2.5486,
"loss_5000": 2.5486,
"avg_step_ms": 11.2
},
"Lion_FP32_T32": {
"config": "Lion_FP32_T32",
"n_params": 1668128,
"model_mem_mb": 6.36,
"optimizer_mem_mb": 12.73,
"peak_vram_mb": 388.2,
"final_loss_avg100": 3.428,
"min_loss": 2.4911,
"loss_1000": 3.0947,
"loss_2500": 3.4403,
"loss_5000": 3.4403,
"avg_step_ms": 11.2
},
"Adam_bf16_T32": {
"config": "Adam_bf16_T32",
"n_params": 1668128,
"model_mem_mb": 3.18,
"optimizer_mem_mb": 9.55,
"peak_vram_mb": 388.2,
"final_loss_avg100": 2.3487,
"min_loss": 2.248,
"loss_1000": 2.3016,
"loss_2500": 2.3237,
"loss_5000": 2.3237,
"avg_step_ms": 11.2
},
"Adam_FP32_T32": {
"config": "Adam_FP32_T32",
"n_params": 1668128,
"model_mem_mb": 6.36,
"optimizer_mem_mb": 19.09,
"peak_vram_mb": 388.2,
"final_loss_avg100": 2.7597,
"min_loss": 2.2049,
"loss_1000": 2.3037,
"loss_2500": 2.6991,
"loss_5000": 2.6991,
"avg_step_ms": 11.2
}
} |