# /// script # dependencies = ["trl>=0.12.0", "peft>=0.7.0", "trackio", "transformers", "datasets", "torch"] # /// """ Fine-tune Qwen2.5-0.5B on open-r1/codeforces-cots for instruction following. Production-ready script with LoRA, Trackio monitoring, and Hub saving. """ from datasets import load_dataset from peft import LoraConfig from trl import SFTTrainer, SFTConfig from transformers import AutoTokenizer import trackio # Load dataset - using the "messages" field for chat format print("📚 Loading dataset: open-r1/codeforces-cots") dataset = load_dataset("open-r1/codeforces-cots", "solutions", split="train") # For demo purposes, use a subset. Remove this line for full training. dataset = dataset.select(range(min(1000, len(dataset)))) print(f"📊 Training on {len(dataset)} examples") # Load tokenizer to apply chat template tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B", trust_remote_code=True) # Define formatting function to convert messages to text using chat template def formatting_func(example): """Convert messages field to formatted text using tokenizer's chat template.""" if "messages" in example and example["messages"]: # Apply chat template to convert messages to text text = tokenizer.apply_chat_template( example["messages"], tokenize=False, add_generation_prompt=False ) return {"text": text} return {"text": ""} # Apply formatting to dataset print("🔄 Formatting dataset with chat template...") dataset = dataset.map(formatting_func, remove_columns=dataset.column_names) # Create train/eval split for monitoring dataset_split = dataset.train_test_split(test_size=0.1, seed=42) # Configure LoRA for efficient training lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) # Initialize trainer with SFT configuration print("🎯 Initializing SFTTrainer") trainer = SFTTrainer( model="Qwen/Qwen2.5-0.5B", train_dataset=dataset_split["train"], eval_dataset=dataset_split["test"], peft_config=lora_config, args=SFTConfig( # Dataset configuration - using default "text" field from formatting_func # Output and Hub settings output_dir="qwen-codeforces-sft", push_to_hub=True, hub_model_id="nathens/qwen-codeforces-sft", hub_strategy="every_save", # Training hyperparameters num_train_epochs=1, per_device_train_batch_size=2, gradient_accumulation_steps=4, learning_rate=2e-4, # Evaluation and logging eval_strategy="steps", eval_steps=50, logging_steps=10, save_steps=100, save_total_limit=2, # Optimization settings bf16=True, gradient_checkpointing=True, optim="adamw_torch", lr_scheduler_type="cosine", warmup_ratio=0.1, # Monitoring with Trackio report_to="trackio", project="codeforces-instruction-tuning", run_name="qwen-codeforces-v1", ) ) # Train print("🏋️ Starting training...") trainer.train() # Save final model print("💾 Saving final model to Hub") trainer.push_to_hub() print("✅ Training complete!") print(f"📁 Model available at: https://huggingface.co/nathens/qwen-codeforces-sft")