grpo-countdown-problem / src /config /sft /model /qwen2.5-3b.yaml
Dat1710's picture
Upload folder using huggingface_hub
00db46c verified
# Qwen2.5-3B model configuration
# Hugging Face model identifier
model_id: Qwen/Qwen2.5-Math-1.5B
# Device mapping strategy
device_map: cuda
# LoRA configuration
lora:
r: 32
lora_alpha: 64
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
lora_dropout: 0.05
bias: none
task_type: CAUSAL_LM