| { |
| "bf16": { |
| "enabled": true |
| }, |
| "zero_optimization": { |
| "stage": 3, |
| "offload_optimizer": { |
| "device": "cpu", |
| "pin_memory": true |
| }, |
| "offload_param": { |
| "device": "cpu", |
| "pin_memory": true |
| }, |
| "overlap_comm": true, |
| "contiguous_gradients": true, |
| "sub_group_size": 1e9, |
| "reduce_bucket_size": 5e8, |
| "stage3_prefetch_bucket_size": 5e8, |
| "stage3_param_persistence_threshold": 1e6, |
| "stage3_max_live_parameters": 1e9, |
| "stage3_max_reuse_distance": 1e9, |
| "stage3_gather_16bit_weights_on_model_save": true |
| }, |
| "gradient_accumulation_steps": 32, |
| "gradient_clipping": 1.0, |
| "steps_per_print": 10, |
| "train_batch_size": "auto", |
| "train_micro_batch_size_per_gpu": "auto", |
| "wall_clock_breakdown": false, |
| "communication_data_type": "bf16", |
| "prescale_gradients": false, |
| "sparse_gradients": false, |
| "compression_training": { |
| "weight_quantization": { |
| "shared_parameters": {}, |
| "different_groups": {} |
| }, |
| "activation_quantization": { |
| "shared_parameters": {}, |
| "different_groups": {} |
| }, |
| "sparse_pruning": { |
| "shared_parameters": {}, |
| "different_groups": {} |
| } |
| }, |
| "flops_profiler": { |
| "enabled": false, |
| "profile_step": 1, |
| "module_depth": -1, |
| "top_modules": 1, |
| "detailed": true, |
| "output_file": null |
| }, |
| "tensorboard": { |
| "enabled": true, |
| "output_path": "./logs/tensorboard", |
| "job_name": "helion_v2_training" |
| } |
| } |