model_id: /home/ec2-user/_Zhengyue/download/models/Meta-Llama-3.1-8B-Instruct model_family: llama-3.1 LoRA: r: 0 alpha: 128 dropout: 0.05 used_policy: true used_strategy: true step_token: , answer_token: , loss_type: grad_ascent tune_vision_tower: false tune_mm_projector: true tune_language_model: true data_path: /home/ec2-user/_Zhengyue/workspace/Step-DPO/sft/Safety-Reasoning/safety_reasoning-mixed_benign4k_beh7k_jailbreak6k_vanilla_6k_helpful23k.json split: with_strategy_policy batch_size: 1 gradient_accumulation_steps: 32 max_grad_norm: 1.0 num_epochs: 3 max_length: 4096 save_dir: models/llama8b_${num_epochs}_epochs_lr${lr}_${model_family}_${split} save_steps: 100 lr: 5.0e-06 weight_decay: 0.01 seed: 233 workers: 6 lr_scheduler_type: cosine warmup_ratio: 0.0 max_train_steps: -1 report_to: wandb resume_from_checkpoint: ''