| { | |
| "experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", | |
| "model": "Qwen/Qwen3.6-35B-A3B", | |
| "config": { | |
| "model_name": "Qwen/Qwen3.6-35B-A3B", | |
| "max_seq_length": 2048, | |
| "load_in_4bit": false, | |
| "load_in_8bit": false, | |
| "lora_r": 16, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.0, | |
| "lora_bias": "none", | |
| "use_rslora": false, | |
| "optim": "adamw_8bit", | |
| "batch_size": 4, | |
| "gradient_accumulation_steps": 8, | |
| "packing": false, | |
| "learning_rate": 0.0002, | |
| "lr_scheduler_type": "cosine", | |
| "warmup_steps": 0.06, | |
| "weight_decay": 0.01, | |
| "max_steps": 150, | |
| "save_steps": 50, | |
| "eval_steps": 25, | |
| "logging_steps": 5, | |
| "seed": 42, | |
| "experiment_name": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", | |
| "skip_eval": false, | |
| "smoke_test": false | |
| }, | |
| "results": { | |
| "train_loss": 0.5228066476186116, | |
| "steps": 150, | |
| "epoch": null | |
| }, | |
| "timings": { | |
| "model_load_sec": 31.11, | |
| "data_load_sec": 22.24, | |
| "lora_setup_sec": 9.57, | |
| "training_sec": 4606.66, | |
| "training_loss": 0.5228, | |
| "training_steps": 150, | |
| "save_sec": 1.65, | |
| "step_timing": { | |
| "total_steps": 150, | |
| "avg_step_sec": 29.347, | |
| "min_step_sec": 14.719, | |
| "max_step_sec": 33.735, | |
| "total_step_sec": 4402.09, | |
| "eval_count": 5, | |
| "total_eval_sec": 3821.16, | |
| "save_count": 3, | |
| "total_save_sec": 10.16, | |
| "peak_gpu_gb": 64.7 | |
| }, | |
| "total_sec": 4690.74, | |
| "total_min": 78.18, | |
| "total_params": 34679779968, | |
| "trainable_params": 19169280, | |
| "trainable_pct": 0.0553 | |
| } | |
| } |