| # Qwen2.5-3B model configuration | |
| # Hugging Face model identifier | |
| model_id: Qwen/Qwen2.5-Math-1.5B | |
| # Device mapping strategy | |
| device_map: cuda | |
| # LoRA configuration | |
| lora: | |
| r: 32 | |
| lora_alpha: 64 | |
| target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| - gate_proj | |
| - up_proj | |
| - down_proj | |
| lora_dropout: 0.05 | |
| bias: none | |
| task_type: CAUSAL_LM | |