{ "bf16": { "enabled": true }, "gradient_clipping": 0.0, "steps_per_print": 100, "train_batch_size": 4096, "train_micro_batch_size_per_gpu": 128, "wall_clock_breakdown": false, "zero_optimization": { "gather_16bit_weights_on_model_save": true, "stage": 0 } }