stage: sft
do_train: true
max_samples: 100000
do_eval: false
save_strategy: steps
save_steps: 5
logging_steps: 10
fp16: false
bf16: true
adam_beta1: 0.9
adam_beta2: 0.95
overwrite_output_dir: true
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 1.0e-06
lr_scheduler_type: cosine
num_train_epochs: 2
warmup_ratio: 0.05
weight_decay: 0.0001
template: qwen
max_steps: 10
preprocessing_num_workers: 16
overwrite_cache: true
model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
finetuning_type: lora
trust_remote_code: true
dataset: TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
dataset_dir: /scratch/zrs2020/LlamaFactoryHelper/LLaMA-Factory/data
cutoff_len: 16192
tokenized_path: /scratch/zrs2020/.cache/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
output_dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints