# /// script # dependencies = [ # "trl>=0.12.0", # "peft>=0.7.0", # "transformers>=4.36.0", # "accelerate>=0.24.0", # "trackio", # "bitsandbytes", # "datasets", # "huggingface_hub" # ] # /// """ Fine-tune Qwen3-8B on Vyvo Life CoPilot conversations dataset. """ import json from datasets import Dataset from peft import LoraConfig from trl import SFTTrainer, SFTConfig from huggingface_hub import hf_hub_download print("📦 Downloading dataset from Hub...") data_path = hf_hub_download( repo_id="Codyfederer/vyvo-text-conversations", filename="text_conversations.jsonl", repo_type="dataset" ) # Load JSONL manually to avoid schema inference issues print("🔄 Loading and converting to messages format...") conversations = [] with open(data_path, 'r', encoding='utf-8') as f: for line in f: item = json.loads(line) messages = [] for turn in item['turns']: messages.append({ 'role': turn['role'], 'content': turn['content'] }) conversations.append({'messages': messages}) dataset = Dataset.from_list(conversations) print(f"✅ Converted {len(dataset)} conversations") # Create train/eval split print("🔀 Creating train/eval split...") dataset_split = dataset.train_test_split(test_size=0.05, seed=42) train_dataset = dataset_split["train"] eval_dataset = dataset_split["test"] print(f" Train: {len(train_dataset)} examples") print(f" Eval: {len(eval_dataset)} examples") # Training configuration - optimized for memory on A10G config = SFTConfig( # Hub settings output_dir="qwen3-8b-vyvo-copilot", push_to_hub=True, hub_model_id="Codyfederer/qwen3-8b-vyvo-copilot", hub_strategy="every_save", hub_private_repo=False, # Training parameters - reduced for memory num_train_epochs=3, per_device_train_batch_size=1, # Reduced from 2 gradient_accumulation_steps=16, # Increased to maintain effective batch size learning_rate=2e-4, max_length=1024, # Reduced from 2048 to save memory # Memory optimization gradient_checkpointing=True, gradient_checkpointing_kwargs={"use_reentrant": False}, bf16=True, optim="adamw_8bit", # Use 8-bit optimizer to save memory # Logging & checkpointing logging_steps=10, save_strategy="steps", save_steps=200, save_total_limit=2, # Evaluation - skip eval during training to save memory eval_strategy="no", # Optimization warmup_ratio=0.05, lr_scheduler_type="cosine", weight_decay=0.01, # Monitoring report_to="trackio", project="vyvo-copilot-training", run_name="qwen3-8b-sft-v1", ) # LoRA configuration - optimized for Qwen3 peft_config = LoraConfig( r=32, lora_alpha=64, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], ) # Initialize and train print("🎯 Initializing trainer with Qwen/Qwen3-8B...") trainer = SFTTrainer( model="Qwen/Qwen3-8B", train_dataset=train_dataset, args=config, peft_config=peft_config, ) print("🚀 Starting training...") trainer.train() print("💾 Pushing final model to Hub...") trainer.push_to_hub() print("✅ Training complete!") print("📦 Model saved to: https://huggingface.co/Codyfederer/qwen3-8b-vyvo-copilot") print("📊 View metrics at: https://huggingface.co/spaces/Codyfederer/trackio")