Orb / train_alkaid_a.py

Add training script

7040a6f verified 8 days ago

10.5 kB

	#!/usr/bin/env python3
	"""
	Alkaid A — Fine-tuning Script
	Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled
	Framework: Unsloth + TRL (SFTTrainer)
	Method: LoRA (16-bit) with train_on_responses_only

	Requirements:
	pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages

	Usage:
	# 1. Login to Hugging Face first:
	huggingface-cli login --token YOUR_HF_TOKEN

	# 2. Run training:
	python train_alkaid_a.py

	# 3. Push to Hugging Face Hub:
	python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A"
	"""

	import argparse
	import json
	import os
	from pathlib import Path

	# =============================================================================
	# CONFIGURATION — Edit these values for your setup
	# =============================================================================

	CONFIG = {
	# Model
	"base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
	"max_seq_length": 4096,
	"load_in_4bit": True, # Set False if you have 56GB+ VRAM for 16-bit

	# LoRA
	"lora_r": 16,
	"lora_alpha": 16,
	"lora_dropout": 0,
	"target_modules": [
	"q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj",
	],

	# Training
	"per_device_train_batch_size": 1,
	"gradient_accumulation_steps": 4,
	"warmup_steps": 10,
	"num_train_epochs": 3,
	"max_steps": -1, # Set to positive number to override epochs
	"learning_rate": 2e-4,
	"optim": "adamw_8bit",
	"lr_scheduler_type": "cosine",
	"fp16": False,
	"bf16": True,
	"logging_steps": 10,
	"save_steps": 50,
	"seed": 42,

	# Data
	"example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered",
	"custom_data_path": "alkaid_a_training_data.jsonl",

	# Output
	"output_dir": "./alkaid_a_checkpoints",
	"final_model_dir": "./alkaid_a_final",
	}


	# =============================================================================
	# DATA PREPARATION
	# =============================================================================

	def format_example_dataset(example):
	"""
	Convert the Opus reasoning dataset into chat format.
	Columns: problem, thinking, solution → system/user/assistant messages
	"""
	system_msg = (
	"You are Alkaid A, an advanced AI coding and deployment assistant. "
	"You follow a rigorous multi-phase workflow including code review, "
	"iterative debugging, deployment planning, security audits, versioned "
	"releases, and comprehensive documentation."
	)

	# Build assistant response with thinking tags (matching the base model's format)
	assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"

	return {
	"messages": [
	{"role": "system", "content": system_msg},
	{"role": "user", "content": example["problem"]},
	{"role": "assistant", "content": assistant_content},
	]
	}


	def load_custom_data(path):
	"""Load custom JSONL training data (already in messages format)."""
	data = []
	with open(path, "r") as f:
	for line in f:
	line = line.strip()
	if line:
	data.append(json.loads(line))
	return data


	def prepare_datasets(tokenizer):
	"""Combine example dataset + custom data into a single training set."""
	from datasets import Dataset, concatenate_datasets, load_dataset

	# --- Load example dataset from Hugging Face ---
	print("📦 Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered")
	example_ds = load_dataset(CONFIG["example_dataset"], split="train")

	# Filter to coding/reasoning examples for best alignment
	example_ds = example_ds.filter(
	lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""]
	)
	print(f" → {len(example_ds)} examples after filtering")

	# Convert to chat format
	example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names)

	# --- Load custom data ---
	custom_path = CONFIG["custom_data_path"]
	if os.path.exists(custom_path):
	print(f"📦 Loading custom data: {custom_path}")
	custom_data = load_custom_data(custom_path)
	custom_ds = Dataset.from_list(custom_data)
	print(f" → {len(custom_ds)} custom examples loaded")
	else:
	print(f"⚠️ Custom data not found at {custom_path}, using example dataset only")
	custom_ds = None

	# --- Apply chat template ---
	def apply_template(example):
	text = tokenizer.apply_chat_template(
	example["messages"],
	tokenize=False,
	add_generation_prompt=False,
	)
	return {"text": text}

	example_ds = example_ds.map(apply_template)

	if custom_ds is not None:
	custom_ds = custom_ds.map(apply_template)
	# Combine: custom data is repeated 3x to increase its weight
	combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds])
	else:
	combined = example_ds

	combined = combined.shuffle(seed=CONFIG["seed"])
	print(f"✅ Total training examples: {len(combined)}")
	return combined


	# =============================================================================
	# MODEL SETUP
	# =============================================================================

	def setup_model():
	"""Load the base model with Unsloth optimizations and attach LoRA."""
	from unsloth import FastLanguageModel

	print(f"🔧 Loading model: {CONFIG['base_model']}")
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=CONFIG["base_model"],
	max_seq_length=CONFIG["max_seq_length"],
	load_in_4bit=CONFIG["load_in_4bit"],
	dtype=None, # Auto-detect
	)

	print("🔧 Attaching LoRA adapters")
	model = FastLanguageModel.get_peft_model(
	model,
	r=CONFIG["lora_r"],
	target_modules=CONFIG["target_modules"],
	lora_alpha=CONFIG["lora_alpha"],
	lora_dropout=CONFIG["lora_dropout"],
	bias="none",
	use_gradient_checkpointing="unsloth", # 30% less VRAM
	random_state=CONFIG["seed"],
	)

	return model, tokenizer


	# =============================================================================
	# TRAINING
	# =============================================================================

	def train(model, tokenizer, dataset):
	"""Run SFT training with TRL's SFTTrainer."""
	from trl import SFTTrainer, SFTConfig

	print("🚀 Starting training...")

	training_args = SFTConfig(
	output_dir=CONFIG["output_dir"],
	per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
	gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
	warmup_steps=CONFIG["warmup_steps"],
	num_train_epochs=CONFIG["num_train_epochs"],
	max_steps=CONFIG["max_steps"],
	learning_rate=CONFIG["learning_rate"],
	optim=CONFIG["optim"],
	lr_scheduler_type=CONFIG["lr_scheduler_type"],
	fp16=CONFIG["fp16"],
	bf16=CONFIG["bf16"],
	logging_steps=CONFIG["logging_steps"],
	save_steps=CONFIG["save_steps"],
	save_total_limit=3,
	seed=CONFIG["seed"],
	max_seq_length=CONFIG["max_seq_length"],
	dataset_text_field="text",
	report_to="none", # Set to "wandb" if you use Weights & Biases
	)

	trainer = SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=dataset,
	args=training_args,
	)

	# Train
	stats = trainer.train()
	print(f"✅ Training complete! Loss: {stats.training_loss:.4f}")

	return trainer


	# =============================================================================
	# EXPORT & PUSH
	# =============================================================================

	def save_model(model, tokenizer, push=False, hub_id=None):
	"""Save locally and optionally push to Hugging Face Hub."""
	from unsloth import FastLanguageModel

	final_dir = CONFIG["final_model_dir"]

	# Save LoRA adapters (small, fast)
	print(f"💾 Saving LoRA adapters to {final_dir}")
	model.save_pretrained(final_dir)
	tokenizer.save_pretrained(final_dir)

	# Save merged model in 16-bit (for deployment)
	merged_dir = f"{final_dir}_merged_16bit"
	print(f"💾 Saving merged 16-bit model to {merged_dir}")
	model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit")

	# Export GGUF for local inference (llama.cpp / Ollama / LM Studio)
	gguf_dir = f"{final_dir}_gguf"
	print(f"💾 Exporting GGUF (Q4_K_M) to {gguf_dir}")
	try:
	model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m")
	except Exception as e:
	print(f"⚠️ GGUF export failed (non-critical): {e}")

	# Push to Hub
	if push and hub_id:
	print(f"🚀 Pushing to Hugging Face Hub: {hub_id}")
	model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit")
	print(f"✅ Model live at: https://huggingface.co/{hub_id}")
	elif push:
	print("⚠️ --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)")


	# =============================================================================
	# MAIN
	# =============================================================================

	def main():
	parser = argparse.ArgumentParser(description="Train Alkaid A")
	parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub")
	parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)")
	args = parser.parse_args()

	# Step 1: Load model
	model, tokenizer = setup_model()

	# Step 2: Prepare data
	dataset = prepare_datasets(tokenizer)

	# Step 3: Train
	trainer = train(model, tokenizer, dataset)

	# Step 4: Save & export
	save_model(model, tokenizer, push=args.push, hub_id=args.hub_id)

	print("\n" + "=" * 60)
	print("🎉 Alkaid A training pipeline complete!")
	print("=" * 60)
	print(f" Checkpoints: {CONFIG['output_dir']}")
	print(f" Final model: {CONFIG['final_model_dir']}")
	print(f" GGUF export: {CONFIG['final_model_dir']}_gguf")
	if args.push and args.hub_id:
	print(f" Hub: https://huggingface.co/{args.hub_id}")
	print("=" * 60)


	if __name__ == "__main__":
	main()