tenacious-bench-adapter / train_judge.py
rafiakedir's picture
feat: add model card, inference example, and training scripts
e1374e8 verified
#!/usr/bin/env python3
"""
Day 5 — Tenacious-Bench ORPO Judge Training Script
Trains Qwen2.5-1.5B-Instruct with LoRA using ORPO (reference-free preference optimization).
Run on Colab T4 or locally with sufficient VRAM.
All hyperparameters are in hyperparams.json and replicated here for auditability.
Usage:
python train_judge.py [--data-path PATH] [--output-dir DIR]
"""
import os
import sys
import json
import random
import logging
import datetime
import argparse
from pathlib import Path
import numpy as np
ROOT = Path(__file__).parent.parent
HYPERPARAMS_PATH = Path(__file__).parent / "hyperparams.json"
DATA_PATH = ROOT / "training_data/preference_pairs.jsonl"
OUTPUT_DIR = Path(__file__).parent / "adapter"
LOG_DIR = Path(__file__).parent
SEED = 42
def set_seed(seed: int):
random.seed(seed)
np.random.seed(seed)
try:
import torch
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
except ImportError:
pass
def setup_logging(log_path: Path):
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(str(log_path)),
logging.StreamHandler(sys.stdout),
],
)
return logging.getLogger(__name__)
def detect_precision():
try:
import torch
if torch.cuda.is_available():
cap = torch.cuda.get_device_capability()
name = torch.cuda.get_device_name()
if cap[0] >= 8: # A100, A10, 4090 — bf16 capable
logging.info(f"GPU {name} (compute {cap[0]}.{cap[1]}) supports bf16")
return {"bf16": True, "fp16": False}
else: # T4, V100 — fp16 only
logging.info(f"GPU {name} (compute {cap[0]}.{cap[1]}) using fp16")
return {"bf16": False, "fp16": True}
except Exception:
pass
return {"bf16": False, "fp16": False}
def load_dataset(data_path: Path, logger):
from datasets import Dataset
pairs = []
with open(data_path) as f:
for line in f:
line = line.strip()
if line:
pairs.append(json.loads(line))
logger.info(f"Loaded {len(pairs)} preference pairs from {data_path}")
for p in pairs:
p.pop("task_id", None)
p.pop("dimension", None)
return Dataset.from_list(pairs)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", type=str, default=str(DATA_PATH))
parser.add_argument("--output-dir", type=str, default=str(OUTPUT_DIR))
parser.add_argument("--hub-token", type=str, default=os.environ.get("HF_TOKEN", ""))
args = parser.parse_args()
set_seed(SEED)
timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S")
log_path = LOG_DIR / f"training_run_seed{SEED}_{timestamp}.log"
logger = setup_logging(log_path)
with open(HYPERPARAMS_PATH) as f:
hp = json.load(f)
logger.info(f"Hyperparameters: {json.dumps(hp, indent=2)}")
precision = detect_precision()
logger.info(f"Precision: {precision}")
# Load Unsloth model
logger.info("Loading Unsloth Qwen2.5-1.5B-Instruct with 4-bit quantization...")
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=hp["model_id"],
max_seq_length=hp["orpo_trainer"]["max_length"],
dtype=None, # auto-detect
load_in_4bit=True,
)
# Apply LoRA
logger.info(f"Applying LoRA: r={hp['lora']['r']}, alpha={hp['lora']['lora_alpha']}, "
f"targets={hp['lora']['target_modules']}")
model = FastLanguageModel.get_peft_model(
model,
r=hp["lora"]["r"],
target_modules=hp["lora"]["target_modules"],
lora_alpha=hp["lora"]["lora_alpha"],
lora_dropout=hp["lora"]["lora_dropout"],
bias=hp["lora"]["bias"],
use_gradient_checkpointing="unsloth",
random_state=SEED,
)
# Load dataset
dataset = load_dataset(Path(args.data_path), logger)
logger.info(f"Dataset size: {len(dataset)}")
# Training arguments
from trl import ORPOConfig, ORPOTrainer
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
training_args = ORPOConfig(
output_dir=str(output_dir),
learning_rate=hp["orpo_trainer"]["learning_rate"],
per_device_train_batch_size=hp["orpo_trainer"]["per_device_train_batch_size"],
gradient_accumulation_steps=hp["orpo_trainer"]["gradient_accumulation_steps"],
num_train_epochs=hp["orpo_trainer"]["num_train_epochs"],
warmup_ratio=hp["orpo_trainer"]["warmup_ratio"],
lr_scheduler_type=hp["orpo_trainer"]["lr_scheduler_type"],
beta=hp["orpo_trainer"]["beta"],
max_length=hp["orpo_trainer"]["max_length"],
max_prompt_length=hp["orpo_trainer"]["max_prompt_length"],
logging_steps=hp["orpo_trainer"]["logging_steps"],
save_steps=hp["orpo_trainer"]["save_steps"],
seed=SEED,
bf16=precision["bf16"],
fp16=precision["fp16"],
report_to="none",
remove_unused_columns=False,
)
trainer = ORPOTrainer(
model=model,
args=training_args,
train_dataset=dataset,
tokenizer=tokenizer,
)
logger.info("Starting ORPO training...")
train_result = trainer.train()
logger.info(f"Training complete. Metrics: {train_result.metrics}")
# Save adapter locally
logger.info(f"Saving LoRA adapter to {output_dir}")
model.save_pretrained(str(output_dir))
tokenizer.save_pretrained(str(output_dir))
# Save training run log (copy log file to standard name)
standard_log = LOG_DIR / "training_run.log"
import shutil
shutil.copy(str(log_path), str(standard_log))
logger.info(f"Training log copied to {standard_log}")
# Push to HuggingFace
hub_model_id = hp.get("hub_model_id", "rafiakedir/tenacious-bench-adapter")
hub_token = args.hub_token or os.environ.get("HF_TOKEN", "")
if hub_token:
logger.info(f"Pushing adapter to HuggingFace: {hub_model_id}")
model.push_to_hub(hub_model_id, token=hub_token)
tokenizer.push_to_hub(hub_model_id, token=hub_token)
logger.info(f"Adapter pushed to https://huggingface.co/{hub_model_id}")
else:
logger.warning("HF_TOKEN not set — skipping HuggingFace push")
logger.info("=== TRAINING COMPLETE ===")
logger.info(f"Adapter saved to: {output_dir}")
logger.info(f"Log: {standard_log}")
return train_result.metrics
if __name__ == "__main__":
main()