chemlm-4.87m / deepspeed_config.json
sagawa's picture
Upload 8 files
10db4af verified
{
"bf16": {
"enabled": true
},
"gradient_clipping": 0.0,
"steps_per_print": 100,
"train_batch_size": 4096,
"train_micro_batch_size_per_gpu": 1024,
"wall_clock_breakdown": false,
"zero_optimization": {
"gather_16bit_weights_on_model_save": true,
"stage": 0
}
}