_target_: null
job:
  name: testing__pvv2_lora
  mode: local
  dry_run: false
  work_dir: null
slurm:
  time_limit: null
  constraint: null
  memory: null
  cpus_per_task: 16
  partition: null
  mail_user: null
execution:
  nodes: 1
  gpus_per_node: 2
  num_gpus: null
  hostfile: null
  secrets_file: ./secrets.env
model:
  name_or_path: Qwen/Qwen2.5-1.5B-Instruct
  finetuning_type: lora
dataset:
  name: TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
  dir: null
  info_json: null
  template: qwen
  cutoff_len: 16192
  val_size: 0.0
  tokenized_path: /scratch/zrs2020/.cache/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
  hf_hub_url: TAUR-dev/D-SFT_C-ours_cd3arg_10responses_reflections10_formats-C_full
  formatting: sharegpt
  ranking: false
  subset: null
  split: train
  folder: null
  num_samples: null
  columns:
    prompt: null
    query: null
    response: null
    history: null
    messages: conversations
    system: null
    tools: null
    images: null
    videos: null
    audios: null
    chosen: null
    rejected: null
    kto_tag: null
  tags:
    role: role
    content: content
    user: user
    assistant: assistant
    observation: null
    function: null
    system: null
output:
  experiment_dir: ./experiments
wandb:
  project: null
  run_name: testing__pvv2_lora
  entity: null
hf:
  repo_id: TAUR-dev/testing__pvv2_lora
  private: false
  upload_artifacts: true
cleanup:
  checkpoints: false
  merged: false
training:
  stage: sft
  do_train: true
  max_samples: 100000
  do_eval: false
  save_strategy: steps
  save_steps: 5
  logging_steps: 10
  fp16: false
  bf16: true
  adam_beta1: 0.9
  adam_beta2: 0.95
  overwrite_output_dir: true
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 1
  gradient_checkpointing: true
  learning_rate: 1.0e-06
  lr_scheduler_type: cosine
  num_train_epochs: 2
  warmup_ratio: 0.05
  weight_decay: 0.0001
  template: qwen
  max_steps: 10
  preprocessing_num_workers: 16
  overwrite_cache: true
finetuning:
  training:
    stage: sft
    do_train: true
    finetuning_type: lora
    lora_rank: 8
    lora_alpha: 16
    lora_dropout: 0.05
    lora_target: all
    overwrite_cache: true
    preprocessing_num_workers: 16
    dataloader_num_workers: 4
    logging_steps: 10
    save_steps: 500
    plot_loss: true
    overwrite_output_dir: true
    save_only_model: false
    report_to: none
    per_device_train_batch_size: 1
    gradient_accumulation_steps: 8
    learning_rate: 0.0001
    num_train_epochs: 3.0
    lr_scheduler_type: cosine
    warmup_ratio: 0.1
    bf16: true
    ddp_timeout: 180000000
    resume_from_checkpoint: null
    val_size: 0.1
    per_device_eval_batch_size: 1
    eval_strategy: steps
    eval_steps: 500
    do_eval: true
  merge:
    export_dir: null
    export_size: 5
    export_device: cpu
    export_legacy_format: false