Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul
feat(v1.5): 32B SFT config + 3-way benchmark (v1 vs base32B vs v1.5)
8056cbe | { | |
| "_comment_purpose": "ZeRO-3 + CPU offload for 32B QLoRA on Kaggle T4Γ2 (16GBΓ2). Each T4 holds ~8GB sharded weights + LoRA grads. Optimizer states are paged to CPU RAM (Kaggle has 30GB).", | |
| "bf16": { | |
| "enabled": true | |
| }, | |
| "optimizer": { | |
| "type": "AdamW", | |
| "params": { | |
| "lr": "auto", | |
| "betas": "auto", | |
| "eps": "auto", | |
| "weight_decay": "auto" | |
| } | |
| }, | |
| "scheduler": { | |
| "type": "WarmupDecayLR", | |
| "params": { | |
| "warmup_min_lr": "auto", | |
| "warmup_max_lr": "auto", | |
| "warmup_num_steps": "auto", | |
| "total_num_steps": "auto" | |
| } | |
| }, | |
| "zero_optimization": { | |
| "stage": 3, | |
| "overlap_comm": true, | |
| "contiguous_gradients": true, | |
| "sub_group_size": 1000000000, | |
| "reduce_bucket_size": "auto", | |
| "stage3_prefetch_bucket_size": "auto", | |
| "stage3_param_persistence_threshold": "auto", | |
| "stage3_max_live_parameters": 1000000000, | |
| "stage3_max_reuse_distance": 1000000000, | |
| "stage3_gather_16bit_weights_on_model_save": true, | |
| "offload_param": { | |
| "device": "cpu", | |
| "pin_memory": true | |
| }, | |
| "offload_optimizer": { | |
| "device": "cpu", | |
| "pin_memory": true | |
| } | |
| }, | |
| "gradient_accumulation_steps": "auto", | |
| "gradient_clipping": "auto", | |
| "train_batch_size": "auto", | |
| "train_micro_batch_size_per_gpu": "auto", | |
| "wall_clock_breakdown": false | |
| } | |