Orb / train_alkaid_a.py
pennydoesdev's picture
Add training script
7040a6f verified
#!/usr/bin/env python3
"""
Alkaid A β€” Fine-tuning Script
Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled
Framework: Unsloth + TRL (SFTTrainer)
Method: LoRA (16-bit) with train_on_responses_only
Requirements:
pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages
Usage:
# 1. Login to Hugging Face first:
huggingface-cli login --token YOUR_HF_TOKEN
# 2. Run training:
python train_alkaid_a.py
# 3. Push to Hugging Face Hub:
python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A"
"""
import argparse
import json
import os
from pathlib import Path
# =============================================================================
# CONFIGURATION β€” Edit these values for your setup
# =============================================================================
CONFIG = {
# Model
"base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
"max_seq_length": 4096,
"load_in_4bit": True, # Set False if you have 56GB+ VRAM for 16-bit
# LoRA
"lora_r": 16,
"lora_alpha": 16,
"lora_dropout": 0,
"target_modules": [
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
],
# Training
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 4,
"warmup_steps": 10,
"num_train_epochs": 3,
"max_steps": -1, # Set to positive number to override epochs
"learning_rate": 2e-4,
"optim": "adamw_8bit",
"lr_scheduler_type": "cosine",
"fp16": False,
"bf16": True,
"logging_steps": 10,
"save_steps": 50,
"seed": 42,
# Data
"example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered",
"custom_data_path": "alkaid_a_training_data.jsonl",
# Output
"output_dir": "./alkaid_a_checkpoints",
"final_model_dir": "./alkaid_a_final",
}
# =============================================================================
# DATA PREPARATION
# =============================================================================
def format_example_dataset(example):
"""
Convert the Opus reasoning dataset into chat format.
Columns: problem, thinking, solution β†’ system/user/assistant messages
"""
system_msg = (
"You are Alkaid A, an advanced AI coding and deployment assistant. "
"You follow a rigorous multi-phase workflow including code review, "
"iterative debugging, deployment planning, security audits, versioned "
"releases, and comprehensive documentation."
)
# Build assistant response with thinking tags (matching the base model's format)
assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"
return {
"messages": [
{"role": "system", "content": system_msg},
{"role": "user", "content": example["problem"]},
{"role": "assistant", "content": assistant_content},
]
}
def load_custom_data(path):
"""Load custom JSONL training data (already in messages format)."""
data = []
with open(path, "r") as f:
for line in f:
line = line.strip()
if line:
data.append(json.loads(line))
return data
def prepare_datasets(tokenizer):
"""Combine example dataset + custom data into a single training set."""
from datasets import Dataset, concatenate_datasets, load_dataset
# --- Load example dataset from Hugging Face ---
print("πŸ“¦ Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered")
example_ds = load_dataset(CONFIG["example_dataset"], split="train")
# Filter to coding/reasoning examples for best alignment
example_ds = example_ds.filter(
lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""]
)
print(f" β†’ {len(example_ds)} examples after filtering")
# Convert to chat format
example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names)
# --- Load custom data ---
custom_path = CONFIG["custom_data_path"]
if os.path.exists(custom_path):
print(f"πŸ“¦ Loading custom data: {custom_path}")
custom_data = load_custom_data(custom_path)
custom_ds = Dataset.from_list(custom_data)
print(f" β†’ {len(custom_ds)} custom examples loaded")
else:
print(f"⚠️ Custom data not found at {custom_path}, using example dataset only")
custom_ds = None
# --- Apply chat template ---
def apply_template(example):
text = tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
add_generation_prompt=False,
)
return {"text": text}
example_ds = example_ds.map(apply_template)
if custom_ds is not None:
custom_ds = custom_ds.map(apply_template)
# Combine: custom data is repeated 3x to increase its weight
combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds])
else:
combined = example_ds
combined = combined.shuffle(seed=CONFIG["seed"])
print(f"βœ… Total training examples: {len(combined)}")
return combined
# =============================================================================
# MODEL SETUP
# =============================================================================
def setup_model():
"""Load the base model with Unsloth optimizations and attach LoRA."""
from unsloth import FastLanguageModel
print(f"πŸ”§ Loading model: {CONFIG['base_model']}")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=CONFIG["base_model"],
max_seq_length=CONFIG["max_seq_length"],
load_in_4bit=CONFIG["load_in_4bit"],
dtype=None, # Auto-detect
)
print("πŸ”§ Attaching LoRA adapters")
model = FastLanguageModel.get_peft_model(
model,
r=CONFIG["lora_r"],
target_modules=CONFIG["target_modules"],
lora_alpha=CONFIG["lora_alpha"],
lora_dropout=CONFIG["lora_dropout"],
bias="none",
use_gradient_checkpointing="unsloth", # 30% less VRAM
random_state=CONFIG["seed"],
)
return model, tokenizer
# =============================================================================
# TRAINING
# =============================================================================
def train(model, tokenizer, dataset):
"""Run SFT training with TRL's SFTTrainer."""
from trl import SFTTrainer, SFTConfig
print("πŸš€ Starting training...")
training_args = SFTConfig(
output_dir=CONFIG["output_dir"],
per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
warmup_steps=CONFIG["warmup_steps"],
num_train_epochs=CONFIG["num_train_epochs"],
max_steps=CONFIG["max_steps"],
learning_rate=CONFIG["learning_rate"],
optim=CONFIG["optim"],
lr_scheduler_type=CONFIG["lr_scheduler_type"],
fp16=CONFIG["fp16"],
bf16=CONFIG["bf16"],
logging_steps=CONFIG["logging_steps"],
save_steps=CONFIG["save_steps"],
save_total_limit=3,
seed=CONFIG["seed"],
max_seq_length=CONFIG["max_seq_length"],
dataset_text_field="text",
report_to="none", # Set to "wandb" if you use Weights & Biases
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
args=training_args,
)
# Train
stats = trainer.train()
print(f"βœ… Training complete! Loss: {stats.training_loss:.4f}")
return trainer
# =============================================================================
# EXPORT & PUSH
# =============================================================================
def save_model(model, tokenizer, push=False, hub_id=None):
"""Save locally and optionally push to Hugging Face Hub."""
from unsloth import FastLanguageModel
final_dir = CONFIG["final_model_dir"]
# Save LoRA adapters (small, fast)
print(f"πŸ’Ύ Saving LoRA adapters to {final_dir}")
model.save_pretrained(final_dir)
tokenizer.save_pretrained(final_dir)
# Save merged model in 16-bit (for deployment)
merged_dir = f"{final_dir}_merged_16bit"
print(f"πŸ’Ύ Saving merged 16-bit model to {merged_dir}")
model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit")
# Export GGUF for local inference (llama.cpp / Ollama / LM Studio)
gguf_dir = f"{final_dir}_gguf"
print(f"πŸ’Ύ Exporting GGUF (Q4_K_M) to {gguf_dir}")
try:
model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m")
except Exception as e:
print(f"⚠️ GGUF export failed (non-critical): {e}")
# Push to Hub
if push and hub_id:
print(f"πŸš€ Pushing to Hugging Face Hub: {hub_id}")
model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit")
print(f"βœ… Model live at: https://huggingface.co/{hub_id}")
elif push:
print("⚠️ --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)")
# =============================================================================
# MAIN
# =============================================================================
def main():
parser = argparse.ArgumentParser(description="Train Alkaid A")
parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub")
parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)")
args = parser.parse_args()
# Step 1: Load model
model, tokenizer = setup_model()
# Step 2: Prepare data
dataset = prepare_datasets(tokenizer)
# Step 3: Train
trainer = train(model, tokenizer, dataset)
# Step 4: Save & export
save_model(model, tokenizer, push=args.push, hub_id=args.hub_id)
print("\n" + "=" * 60)
print("πŸŽ‰ Alkaid A training pipeline complete!")
print("=" * 60)
print(f" Checkpoints: {CONFIG['output_dir']}")
print(f" Final model: {CONFIG['final_model_dir']}")
print(f" GGUF export: {CONFIG['final_model_dir']}_gguf")
if args.push and args.hub_id:
print(f" Hub: https://huggingface.co/{args.hub_id}")
print("=" * 60)
if __name__ == "__main__":
main()