| { |
| "experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", |
| "base_model": "Qwen/Qwen3.6-35B-A3B", |
| "adapter_path": "/checkpoints/experiments/ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155/final_adapter", |
| "merged_at": "2026-05-06T07:14:33", |
| "dtype": "bfloat16", |
| "merge_method": "PeftModel.merge_and_unload()", |
| "training_metadata": { |
| "experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", |
| "model": "Qwen/Qwen3.6-35B-A3B", |
| "config": { |
| "model_name": "Qwen/Qwen3.6-35B-A3B", |
| "max_seq_length": 2048, |
| "load_in_4bit": false, |
| "load_in_8bit": false, |
| "lora_r": 16, |
| "lora_alpha": 16, |
| "lora_dropout": 0.0, |
| "lora_bias": "none", |
| "use_rslora": false, |
| "optim": "adamw_8bit", |
| "batch_size": 4, |
| "gradient_accumulation_steps": 8, |
| "packing": false, |
| "learning_rate": 0.0002, |
| "lr_scheduler_type": "cosine", |
| "warmup_steps": 0.06, |
| "weight_decay": 0.01, |
| "max_steps": 150, |
| "save_steps": 50, |
| "eval_steps": 25, |
| "logging_steps": 5, |
| "seed": 42, |
| "experiment_name": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", |
| "skip_eval": false, |
| "smoke_test": false |
| }, |
| "results": { |
| "train_loss": 0.5228066476186116, |
| "steps": 150, |
| "epoch": null |
| }, |
| "timings": { |
| "model_load_sec": 31.11, |
| "data_load_sec": 22.24, |
| "lora_setup_sec": 9.57, |
| "training_sec": 4606.66, |
| "training_loss": 0.5228, |
| "training_steps": 150, |
| "save_sec": 1.65, |
| "step_timing": { |
| "total_steps": 150, |
| "avg_step_sec": 29.347, |
| "min_step_sec": 14.719, |
| "max_step_sec": 33.735, |
| "total_step_sec": 4402.09, |
| "eval_count": 5, |
| "total_eval_sec": 3821.16, |
| "save_count": 3, |
| "total_save_sec": 10.16, |
| "peak_gpu_gb": 64.7 |
| }, |
| "total_sec": 4690.74, |
| "total_min": 78.18, |
| "total_params": 34679779968, |
| "trainable_params": 19169280, |
| "trainable_pct": 0.0553 |
| } |
| } |
| } |