stevenkhan's picture
Add training script
e65ccc2 verified
"""
Pokemon Showdown Battle Strategist - SFT Training Script
Fine-tunes Llama 3.1 8B Instruct on 500K expert battle decisions.
"""
import os
import torch
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
DATASET_ID = "stevenkhan/pokemon-showdown-battle-sft"
OUTPUT_DIR = "/data/pokemon-showdown-strategist"
HUB_MODEL_ID = "stevenkhan/pokemon-showdown-strategist"
os.environ["TRACKIO_PROJECT"] = "pokemon-showdown-strategist"
print("Loading dataset...")
dataset = load_dataset(DATASET_ID, split="train")
print(f"Dataset loaded: {len(dataset)} examples")
dataset = dataset.shuffle(seed=42)
split = dataset.train_test_split(test_size=0.01, seed=42)
train_dataset = split["train"]
eval_dataset = split["test"]
print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
peft_config = LoraConfig(
r=128,
lora_alpha=16,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules="all-linear",
)
training_args = SFTConfig(
output_dir=OUTPUT_DIR,
hub_model_id=HUB_MODEL_ID,
push_to_hub=True,
hub_strategy="every_save",
max_length=2048,
assistant_only_loss=True,
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
learning_rate=2e-4,
lr_scheduler_type="cosine",
warmup_ratio=0.05,
weight_decay=0.01,
max_grad_norm=1.0,
bf16=True,
gradient_checkpointing=True,
eval_strategy="steps",
eval_steps=500,
per_device_eval_batch_size=2,
logging_steps=10,
logging_first_step=True,
disable_tqdm=True,
report_to="trackio",
run_name="pokemon-showdown-strategist-llama31-8b-lora",
save_strategy="steps",
save_steps=500,
save_total_limit=3,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False,
model_init_kwargs={"torch_dtype": torch.bfloat16},
)
print("Initializing trainer...")
trainer = SFTTrainer(
model=MODEL_ID,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
peft_config=peft_config,
)
print("Starting training...")
train_result = trainer.train()
print("Saving final model...")
trainer.save_model()
trainer.push_to_hub(commit_message="Final Pokemon Showdown Strategist model")
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
print(f"Training complete! Model: https://huggingface.co/{HUB_MODEL_ID}")
print(f"Training loss: {metrics.get('train_loss', 'N/A')}")