Safetensors
English
qwen3_5
judge
b2b-sales
orpo
lora
preference-learning
tenacious-bench
evaluation
qwen2.5
unsloth
Instructions to use rafiakedir/tenacious-bench-adapter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Local Apps
- Unsloth Studio new
How to use rafiakedir/tenacious-bench-adapter with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for rafiakedir/tenacious-bench-adapter to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for rafiakedir/tenacious-bench-adapter to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for rafiakedir/tenacious-bench-adapter to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="rafiakedir/tenacious-bench-adapter", max_seq_length=2048, )
| #!/usr/bin/env python3 | |
| """ | |
| Day 5 — Tenacious-Bench ORPO Judge Training Script | |
| Trains Qwen2.5-1.5B-Instruct with LoRA using ORPO (reference-free preference optimization). | |
| Run on Colab T4 or locally with sufficient VRAM. | |
| All hyperparameters are in hyperparams.json and replicated here for auditability. | |
| Usage: | |
| python train_judge.py [--data-path PATH] [--output-dir DIR] | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import random | |
| import logging | |
| import datetime | |
| import argparse | |
| from pathlib import Path | |
| import numpy as np | |
| ROOT = Path(__file__).parent.parent | |
| HYPERPARAMS_PATH = Path(__file__).parent / "hyperparams.json" | |
| DATA_PATH = ROOT / "training_data/preference_pairs.jsonl" | |
| OUTPUT_DIR = Path(__file__).parent / "adapter" | |
| LOG_DIR = Path(__file__).parent | |
| SEED = 42 | |
| def set_seed(seed: int): | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| try: | |
| import torch | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(seed) | |
| except ImportError: | |
| pass | |
| def setup_logging(log_path: Path): | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s", | |
| handlers=[ | |
| logging.FileHandler(str(log_path)), | |
| logging.StreamHandler(sys.stdout), | |
| ], | |
| ) | |
| return logging.getLogger(__name__) | |
| def detect_precision(): | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| cap = torch.cuda.get_device_capability() | |
| name = torch.cuda.get_device_name() | |
| if cap[0] >= 8: # A100, A10, 4090 — bf16 capable | |
| logging.info(f"GPU {name} (compute {cap[0]}.{cap[1]}) supports bf16") | |
| return {"bf16": True, "fp16": False} | |
| else: # T4, V100 — fp16 only | |
| logging.info(f"GPU {name} (compute {cap[0]}.{cap[1]}) using fp16") | |
| return {"bf16": False, "fp16": True} | |
| except Exception: | |
| pass | |
| return {"bf16": False, "fp16": False} | |
| def load_dataset(data_path: Path, logger): | |
| from datasets import Dataset | |
| pairs = [] | |
| with open(data_path) as f: | |
| for line in f: | |
| line = line.strip() | |
| if line: | |
| pairs.append(json.loads(line)) | |
| logger.info(f"Loaded {len(pairs)} preference pairs from {data_path}") | |
| for p in pairs: | |
| p.pop("task_id", None) | |
| p.pop("dimension", None) | |
| return Dataset.from_list(pairs) | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--data-path", type=str, default=str(DATA_PATH)) | |
| parser.add_argument("--output-dir", type=str, default=str(OUTPUT_DIR)) | |
| parser.add_argument("--hub-token", type=str, default=os.environ.get("HF_TOKEN", "")) | |
| args = parser.parse_args() | |
| set_seed(SEED) | |
| timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S") | |
| log_path = LOG_DIR / f"training_run_seed{SEED}_{timestamp}.log" | |
| logger = setup_logging(log_path) | |
| with open(HYPERPARAMS_PATH) as f: | |
| hp = json.load(f) | |
| logger.info(f"Hyperparameters: {json.dumps(hp, indent=2)}") | |
| precision = detect_precision() | |
| logger.info(f"Precision: {precision}") | |
| # Load Unsloth model | |
| logger.info("Loading Unsloth Qwen2.5-1.5B-Instruct with 4-bit quantization...") | |
| from unsloth import FastLanguageModel | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=hp["model_id"], | |
| max_seq_length=hp["orpo_trainer"]["max_length"], | |
| dtype=None, # auto-detect | |
| load_in_4bit=True, | |
| ) | |
| # Apply LoRA | |
| logger.info(f"Applying LoRA: r={hp['lora']['r']}, alpha={hp['lora']['lora_alpha']}, " | |
| f"targets={hp['lora']['target_modules']}") | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r=hp["lora"]["r"], | |
| target_modules=hp["lora"]["target_modules"], | |
| lora_alpha=hp["lora"]["lora_alpha"], | |
| lora_dropout=hp["lora"]["lora_dropout"], | |
| bias=hp["lora"]["bias"], | |
| use_gradient_checkpointing="unsloth", | |
| random_state=SEED, | |
| ) | |
| # Load dataset | |
| dataset = load_dataset(Path(args.data_path), logger) | |
| logger.info(f"Dataset size: {len(dataset)}") | |
| # Training arguments | |
| from trl import ORPOConfig, ORPOTrainer | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| training_args = ORPOConfig( | |
| output_dir=str(output_dir), | |
| learning_rate=hp["orpo_trainer"]["learning_rate"], | |
| per_device_train_batch_size=hp["orpo_trainer"]["per_device_train_batch_size"], | |
| gradient_accumulation_steps=hp["orpo_trainer"]["gradient_accumulation_steps"], | |
| num_train_epochs=hp["orpo_trainer"]["num_train_epochs"], | |
| warmup_ratio=hp["orpo_trainer"]["warmup_ratio"], | |
| lr_scheduler_type=hp["orpo_trainer"]["lr_scheduler_type"], | |
| beta=hp["orpo_trainer"]["beta"], | |
| max_length=hp["orpo_trainer"]["max_length"], | |
| max_prompt_length=hp["orpo_trainer"]["max_prompt_length"], | |
| logging_steps=hp["orpo_trainer"]["logging_steps"], | |
| save_steps=hp["orpo_trainer"]["save_steps"], | |
| seed=SEED, | |
| bf16=precision["bf16"], | |
| fp16=precision["fp16"], | |
| report_to="none", | |
| remove_unused_columns=False, | |
| ) | |
| trainer = ORPOTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| logger.info("Starting ORPO training...") | |
| train_result = trainer.train() | |
| logger.info(f"Training complete. Metrics: {train_result.metrics}") | |
| # Save adapter locally | |
| logger.info(f"Saving LoRA adapter to {output_dir}") | |
| model.save_pretrained(str(output_dir)) | |
| tokenizer.save_pretrained(str(output_dir)) | |
| # Save training run log (copy log file to standard name) | |
| standard_log = LOG_DIR / "training_run.log" | |
| import shutil | |
| shutil.copy(str(log_path), str(standard_log)) | |
| logger.info(f"Training log copied to {standard_log}") | |
| # Push to HuggingFace | |
| hub_model_id = hp.get("hub_model_id", "rafiakedir/tenacious-bench-adapter") | |
| hub_token = args.hub_token or os.environ.get("HF_TOKEN", "") | |
| if hub_token: | |
| logger.info(f"Pushing adapter to HuggingFace: {hub_model_id}") | |
| model.push_to_hub(hub_model_id, token=hub_token) | |
| tokenizer.push_to_hub(hub_model_id, token=hub_token) | |
| logger.info(f"Adapter pushed to https://huggingface.co/{hub_model_id}") | |
| else: | |
| logger.warning("HF_TOKEN not set — skipping HuggingFace push") | |
| logger.info("=== TRAINING COMPLETE ===") | |
| logger.info(f"Adapter saved to: {output_dir}") | |
| logger.info(f"Log: {standard_log}") | |
| return train_result.metrics | |
| if __name__ == "__main__": | |
| main() | |