| """ |
| Agent Q3 [Evo] — Training Pipeline |
| Unsloth LoRA fine-tuning on Llama-3.2-3B-Instruct. |
| Reads from HF dataset madDegen/agent-q3, pushes adapter to madDegen/agent-q3-loras. |
| """ |
| import os |
| from datasets import load_dataset |
| from unsloth import FastLanguageModel |
| from trl import SFTTrainer |
| from transformers import TrainingArguments |
|
|
| BASE_MODEL = os.getenv("BASE_MODEL", "unsloth/Llama-3.2-3B-Instruct-bnb-4bit") |
| HF_DATASET = os.getenv("HF_DATASET", "madDegen/agent-q3") |
| ADAPTER_REPO = os.getenv("ADAPTER_REPO","madDegen/agent-q3-loras") |
| MAX_SEQ_LEN = int(os.getenv("MAX_SEQ_LEN", 2048)) |
| LORA_RANK = int(os.getenv("LORA_RANK", 16)) |
|
|
| def run(): |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name=BASE_MODEL, |
| max_seq_length=MAX_SEQ_LEN, |
| load_in_4bit=True, |
| ) |
| model = FastLanguageModel.get_peft_model( |
| model, |
| r=LORA_RANK, |
| target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"], |
| lora_alpha=16, |
| lora_dropout=0, |
| bias="none", |
| use_gradient_checkpointing="unsloth", |
| ) |
|
|
| dataset = load_dataset(HF_DATASET, split="train") |
|
|
| trainer = SFTTrainer( |
| model=model, |
| tokenizer=tokenizer, |
| train_dataset=dataset, |
| dataset_text_field="text", |
| max_seq_length=MAX_SEQ_LEN, |
| args=TrainingArguments( |
| per_device_train_batch_size=2, |
| gradient_accumulation_steps=4, |
| warmup_steps=10, |
| max_steps=100, |
| learning_rate=2e-4, |
| fp16=True, |
| logging_steps=10, |
| output_dir="./evo_checkpoints", |
| optim="adamw_8bit", |
| seed=42, |
| ), |
| ) |
| trainer.train() |
| model.push_to_hub(ADAPTER_REPO, token=os.getenv("HF_TOKEN")) |
| tokenizer.push_to_hub(ADAPTER_REPO, token=os.getenv("HF_TOKEN")) |
| print(f"Adapter pushed to {ADAPTER_REPO}") |
|
|
| if __name__ == "__main__": |
| run() |
|
|