""" Agent Q3 [Evo] — Training Pipeline Unsloth LoRA fine-tuning on Llama-3.2-3B-Instruct. Reads from HF dataset madDegen/agent-q3, pushes adapter to madDegen/agent-q3-loras. """ import os from datasets import load_dataset from unsloth import FastLanguageModel from trl import SFTTrainer from transformers import TrainingArguments BASE_MODEL = os.getenv("BASE_MODEL", "unsloth/Llama-3.2-3B-Instruct-bnb-4bit") HF_DATASET = os.getenv("HF_DATASET", "madDegen/agent-q3") ADAPTER_REPO = os.getenv("ADAPTER_REPO","madDegen/agent-q3-loras") MAX_SEQ_LEN = int(os.getenv("MAX_SEQ_LEN", 2048)) LORA_RANK = int(os.getenv("LORA_RANK", 16)) def run(): model, tokenizer = FastLanguageModel.from_pretrained( model_name=BASE_MODEL, max_seq_length=MAX_SEQ_LEN, load_in_4bit=True, ) model = FastLanguageModel.get_peft_model( model, r=LORA_RANK, target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", ) dataset = load_dataset(HF_DATASET, split="train") trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=MAX_SEQ_LEN, args=TrainingArguments( per_device_train_batch_size=2, gradient_accumulation_steps=4, warmup_steps=10, max_steps=100, learning_rate=2e-4, fp16=True, logging_steps=10, output_dir="./evo_checkpoints", optim="adamw_8bit", seed=42, ), ) trainer.train() model.push_to_hub(ADAPTER_REPO, token=os.getenv("HF_TOKEN")) tokenizer.push_to_hub(ADAPTER_REPO, token=os.getenv("HF_TOKEN")) print(f"Adapter pushed to {ADAPTER_REPO}") if __name__ == "__main__": run()