| run: |
| run_dir: ./runs/dpo_run_14b_v1 |
| seed: 42 |
| wandb: |
| enabled: true |
| project: dpo-training |
| entity: null |
| name: null |
| tags: |
| - dpo-lora |
| - preference-optimization |
| notes: null |
| model: |
| repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT |
| revision: null |
| base_local_dir: base_model |
| trust_remote_code: true |
| tokenizer_use_fast: true |
| device_map: auto |
| torch_dtype: bfloat16 |
| use_4bit: false |
| bnb_4bit_quant_type: nf4 |
| bnb_4bit_use_double_quant: false |
| bnb_4bit_compute_dtype: bfloat16 |
| attn_implementation: null |
| data: |
| train_jsonl: dpo_pairs_generated.jsonl |
| eval_jsonl: null |
| eval_split_ratio: 0.1 |
| prompt_field: prompt |
| chosen_field: chosen |
| rejected_field: rejected |
| score_field: f1_score |
| format_type: chatml |
| system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\ |
| \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\ |
| \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\ |
| \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\ |
| \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\ |
| \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\ |
| add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\ |
| 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\ |
| \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\ |
| \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\ |
| \ impl, trait\n4. If there is extra information (e.g., enum variants), include\ |
| \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n" |
| max_length: 2048 |
| shuffle: true |
| num_proc: 4 |
| peft: |
| enabled: true |
| r: 16 |
| lora_alpha: 32 |
| lora_dropout: 0.05 |
| bias: none |
| target_modules: auto |
| dpo: |
| beta: 0.1 |
| label_smoothing: 0.0 |
| loss_type: sigmoid |
| use_reference_model: true |
| reference_free: false |
| train: |
| num_train_epochs: 3 |
| per_device_train_batch_size: 1 |
| per_device_eval_batch_size: 1 |
| gradient_accumulation_steps: 8 |
| learning_rate: 5e-5 |
| weight_decay: 0.0 |
| warmup_ratio: 0.1 |
| lr_scheduler_type: cosine |
| optim: adamw_torch |
| max_grad_norm: 1.0 |
| gradient_checkpointing: true |
| logging_steps: 2 |
| save_strategy: steps |
| save_steps: 100 |
| save_total_limit: 10 |
| evaluation_strategy: steps |
| eval_steps: 25 |
| load_best_model_at_end: true |
| early_stopping: |
| enabled: true |
| patience: 5 |
| min_delta: 0.001 |
| metric: eval_loss |
| mode: min |
| resume_from_checkpoint: auto |
| merge: |
| enabled: true |
| merged_dtype: float16 |
| max_shard_size: 2GB |
| output_dir: ./merged_14b_dpo_lora |
|
|