{ "_comment": "Excerpt of the training configuration; see adapter_config.json for the base model required by PEFT.", "task_type": "causal_lm", "torch_dtype": "bfloat16", "max_length": 8192, "max_new_tokens": 64, "tuner": { "type": "lora", "lora_rank": 32, "lora_alpha": 64, "lora_dropout": 0.05, "lora_bias": "none", "target_modules": "all-linear (language model only; vision merger limited to linear_fc1/linear_fc2)", "use_dora": false, "use_rslora": false, "freeze_vit": true, "freeze_aligner": false }, "optimizer": { "name": "adamw_torch_fused", "learning_rate": 1e-4, "weight_decay": 0.1, "adam_beta1": 0.9, "adam_beta2": 0.95, "adam_epsilon": 1e-8, "max_grad_norm": 1.0, "lr_scheduler_type": "cosine", "warmup_ratio": 0.05, "aligner_lr": 2e-6 }, "training": { "num_train_epochs": 1.0, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8, "world_size": 4, "global_batch_size": 32, "bf16": true, "gradient_checkpointing": true, "seed": 42, "data_seed": 42, "deepspeed_zero_stage": 2, "total_steps": 294, "best_eval_loss": 0.1063, "best_step": 294 }, "framework": { "ms_swift_version": "4.1.2", "peft_version": "0.19.1" } }