# Qwen2.5-3B model configuration

# Hugging Face model identifier
model_id: Qwen/Qwen2.5-Math-1.5B

# Device mapping strategy
device_map: cuda

# LoRA configuration
lora:
  r: 32
  lora_alpha: 64
  target_modules: 
    - q_proj
    - k_proj
    - v_proj
    - o_proj
    - gate_proj
    - up_proj
    - down_proj
  lora_dropout: 0.05
  bias: none
  task_type: CAUSAL_LM