| model_id: /home/ec2-user/_Zhengyue/download/models/Meta-Llama-3.1-8B-Instruct | |
| model_family: llama-3.1 | |
| LoRA: | |
| r: 0 | |
| alpha: 128 | |
| dropout: 0.05 | |
| used_policy: true | |
| used_strategy: true | |
| step_token: <step>,</step> | |
| answer_token: <answer>,</answer> | |
| loss_type: grad_ascent | |
| tune_vision_tower: false | |
| tune_mm_projector: true | |
| tune_language_model: true | |
| data_path: /home/ec2-user/_Zhengyue/workspace/Step-DPO/sft/Safety-Reasoning/safety_reasoning-mixed_benign4k_beh7k_jailbreak6k_vanilla_6k_helpful23k.json | |
| split: with_strategy_policy | |
| batch_size: 1 | |
| gradient_accumulation_steps: 32 | |
| max_grad_norm: 1.0 | |
| num_epochs: 3 | |
| max_length: 4096 | |
| save_dir: models/llama8b_${num_epochs}_epochs_lr${lr}_${model_family}_${split} | |
| save_steps: 100 | |
| lr: 5.0e-06 | |
| weight_decay: 0.01 | |
| seed: 233 | |
| workers: 6 | |
| lr_scheduler_type: cosine | |
| warmup_ratio: 0.0 | |
| max_train_steps: -1 | |
| report_to: wandb | |
| resume_from_checkpoint: '' | |