shaneMattner's picture
Upload folder using huggingface_hub
27f25c8 verified
{
"experiment": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155",
"model": "Qwen/Qwen3.6-35B-A3B",
"config": {
"model_name": "Qwen/Qwen3.6-35B-A3B",
"max_seq_length": 2048,
"load_in_4bit": false,
"load_in_8bit": false,
"lora_r": 16,
"lora_alpha": 16,
"lora_dropout": 0.0,
"lora_bias": "none",
"use_rslora": false,
"optim": "adamw_8bit",
"batch_size": 4,
"gradient_accumulation_steps": 8,
"packing": false,
"learning_rate": 0.0002,
"lr_scheduler_type": "cosine",
"warmup_steps": 0.06,
"weight_decay": 0.01,
"max_steps": 150,
"save_steps": 50,
"eval_steps": 25,
"logging_steps": 5,
"seed": 42,
"experiment_name": "ssd-Qwen3.6-35B-A3B-r16-full-20260505-184155",
"skip_eval": false,
"smoke_test": false
},
"results": {
"train_loss": 0.5228066476186116,
"steps": 150,
"epoch": null
},
"timings": {
"model_load_sec": 31.11,
"data_load_sec": 22.24,
"lora_setup_sec": 9.57,
"training_sec": 4606.66,
"training_loss": 0.5228,
"training_steps": 150,
"save_sec": 1.65,
"step_timing": {
"total_steps": 150,
"avg_step_sec": 29.347,
"min_step_sec": 14.719,
"max_step_sec": 33.735,
"total_step_sec": 4402.09,
"eval_count": 5,
"total_eval_sec": 3821.16,
"save_count": 3,
"total_save_sec": 10.16,
"peak_gpu_gb": 64.7
},
"total_sec": 4690.74,
"total_min": 78.18,
"total_params": 34679779968,
"trainable_params": 19169280,
"trainable_pct": 0.0553
}
}