training-scripts / train_qwen_hf_jobs.py
nathens's picture
Upload train_qwen_hf_jobs.py with huggingface_hub
8b28065 verified
# /// script
# dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "transformers", "datasets", "torch"]
# ///
"""
Fine-tune Qwen2.5-0.5B on open-r1/codeforces-cots for instruction following.
Production-ready script with LoRA, Trackio monitoring, and Hub saving.
"""
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from transformers import AutoTokenizer
import trackio
# Load dataset - using the "messages" field for chat format
print("πŸ“š Loading dataset: open-r1/codeforces-cots")
dataset = load_dataset("open-r1/codeforces-cots", "solutions", split="train")
# For demo purposes, use a subset. Remove this line for full training.
dataset = dataset.select(range(min(1000, len(dataset))))
print(f"πŸ“Š Training on {len(dataset)} examples")
# Load tokenizer to apply chat template
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B", trust_remote_code=True)
# Define formatting function to convert messages to text using chat template
def formatting_func(example):
"""Convert messages field to formatted text using tokenizer's chat template."""
if "messages" in example and example["messages"]:
# Apply chat template to convert messages to text
text = tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
add_generation_prompt=False
)
return {"text": text}
return {"text": ""}
# Apply formatting to dataset
print("πŸ”„ Formatting dataset with chat template...")
dataset = dataset.map(formatting_func, remove_columns=dataset.column_names)
# Create train/eval split for monitoring
dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
# Configure LoRA for efficient training
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
# Initialize trainer with SFT configuration
print("🎯 Initializing SFTTrainer")
trainer = SFTTrainer(
model="Qwen/Qwen2.5-0.5B",
train_dataset=dataset_split["train"],
eval_dataset=dataset_split["test"],
peft_config=lora_config,
args=SFTConfig(
# Dataset configuration - using default "text" field from formatting_func
# Output and Hub settings
output_dir="qwen-codeforces-sft",
push_to_hub=True,
hub_model_id="nathens/qwen-codeforces-sft",
hub_strategy="every_save",
# Training hyperparameters
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
learning_rate=2e-4,
# Evaluation and logging
eval_strategy="steps",
eval_steps=50,
logging_steps=10,
save_steps=100,
save_total_limit=2,
# Optimization settings
bf16=True,
gradient_checkpointing=True,
optim="adamw_torch",
lr_scheduler_type="cosine",
warmup_ratio=0.1,
# Monitoring with Trackio
report_to="trackio",
project="codeforces-instruction-tuning",
run_name="qwen-codeforces-v1",
)
)
# Train
print("πŸ‹οΈ Starting training...")
trainer.train()
# Save final model
print("πŸ’Ύ Saving final model to Hub")
trainer.push_to_hub()
print("βœ… Training complete!")
print(f"πŸ“ Model available at: https://huggingface.co/nathens/qwen-codeforces-sft")