{ "experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", "base_model": "Qwen/Qwen3.6-35B-A3B", "adapter_path": "/checkpoints/experiments/ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155/final_adapter", "merged_at": "2026-05-06T07:14:33", "dtype": "bfloat16", "merge_method": "PeftModel.merge_and_unload()", "training_metadata": { "experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", "model": "Qwen/Qwen3.6-35B-A3B", "config": { "model_name": "Qwen/Qwen3.6-35B-A3B", "max_seq_length": 2048, "load_in_4bit": false, "load_in_8bit": false, "lora_r": 16, "lora_alpha": 16, "lora_dropout": 0.0, "lora_bias": "none", "use_rslora": false, "optim": "adamw_8bit", "batch_size": 4, "gradient_accumulation_steps": 8, "packing": false, "learning_rate": 0.0002, "lr_scheduler_type": "cosine", "warmup_steps": 0.06, "weight_decay": 0.01, "max_steps": 150, "save_steps": 50, "eval_steps": 25, "logging_steps": 5, "seed": 42, "experiment_name": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155", "skip_eval": false, "smoke_test": false }, "results": { "train_loss": 0.5228066476186116, "steps": 150, "epoch": null }, "timings": { "model_load_sec": 31.11, "data_load_sec": 22.24, "lora_setup_sec": 9.57, "training_sec": 4606.66, "training_loss": 0.5228, "training_steps": 150, "save_sec": 1.65, "step_timing": { "total_steps": 150, "avg_step_sec": 29.347, "min_step_sec": 14.719, "max_step_sec": 33.735, "total_step_sec": 4402.09, "eval_count": 5, "total_eval_sec": 3821.16, "save_count": 3, "total_save_sec": 10.16, "peak_gpu_gb": 64.7 }, "total_sec": 4690.74, "total_min": 78.18, "total_params": 34679779968, "trainable_params": 19169280, "trainable_pct": 0.0553 } } }