#!/usr/bin/env python3 """ Alkaid A — Fine-tuning Script Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled Framework: Unsloth + TRL (SFTTrainer) Method: LoRA (16-bit) with train_on_responses_only Requirements: pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages Usage: # 1. Login to Hugging Face first: huggingface-cli login --token YOUR_HF_TOKEN # 2. Run training: python train_alkaid_a.py # 3. Push to Hugging Face Hub: python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A" """ import argparse import json import os from pathlib import Path # ============================================================================= # CONFIGURATION — Edit these values for your setup # ============================================================================= CONFIG = { # Model "base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled", "max_seq_length": 4096, "load_in_4bit": True, # Set False if you have 56GB+ VRAM for 16-bit # LoRA "lora_r": 16, "lora_alpha": 16, "lora_dropout": 0, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], # Training "per_device_train_batch_size": 1, "gradient_accumulation_steps": 4, "warmup_steps": 10, "num_train_epochs": 3, "max_steps": -1, # Set to positive number to override epochs "learning_rate": 2e-4, "optim": "adamw_8bit", "lr_scheduler_type": "cosine", "fp16": False, "bf16": True, "logging_steps": 10, "save_steps": 50, "seed": 42, # Data "example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered", "custom_data_path": "alkaid_a_training_data.jsonl", # Output "output_dir": "./alkaid_a_checkpoints", "final_model_dir": "./alkaid_a_final", } # ============================================================================= # DATA PREPARATION # ============================================================================= def format_example_dataset(example): """ Convert the Opus reasoning dataset into chat format. Columns: problem, thinking, solution → system/user/assistant messages """ system_msg = ( "You are Alkaid A, an advanced AI coding and deployment assistant. " "You follow a rigorous multi-phase workflow including code review, " "iterative debugging, deployment planning, security audits, versioned " "releases, and comprehensive documentation." ) # Build assistant response with thinking tags (matching the base model's format) assistant_content = f"\n{example['thinking']}\n\n\n{example['solution']}" return { "messages": [ {"role": "system", "content": system_msg}, {"role": "user", "content": example["problem"]}, {"role": "assistant", "content": assistant_content}, ] } def load_custom_data(path): """Load custom JSONL training data (already in messages format).""" data = [] with open(path, "r") as f: for line in f: line = line.strip() if line: data.append(json.loads(line)) return data def prepare_datasets(tokenizer): """Combine example dataset + custom data into a single training set.""" from datasets import Dataset, concatenate_datasets, load_dataset # --- Load example dataset from Hugging Face --- print("📦 Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered") example_ds = load_dataset(CONFIG["example_dataset"], split="train") # Filter to coding/reasoning examples for best alignment example_ds = example_ds.filter( lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""] ) print(f" → {len(example_ds)} examples after filtering") # Convert to chat format example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names) # --- Load custom data --- custom_path = CONFIG["custom_data_path"] if os.path.exists(custom_path): print(f"📦 Loading custom data: {custom_path}") custom_data = load_custom_data(custom_path) custom_ds = Dataset.from_list(custom_data) print(f" → {len(custom_ds)} custom examples loaded") else: print(f"⚠️ Custom data not found at {custom_path}, using example dataset only") custom_ds = None # --- Apply chat template --- def apply_template(example): text = tokenizer.apply_chat_template( example["messages"], tokenize=False, add_generation_prompt=False, ) return {"text": text} example_ds = example_ds.map(apply_template) if custom_ds is not None: custom_ds = custom_ds.map(apply_template) # Combine: custom data is repeated 3x to increase its weight combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds]) else: combined = example_ds combined = combined.shuffle(seed=CONFIG["seed"]) print(f"✅ Total training examples: {len(combined)}") return combined # ============================================================================= # MODEL SETUP # ============================================================================= def setup_model(): """Load the base model with Unsloth optimizations and attach LoRA.""" from unsloth import FastLanguageModel print(f"🔧 Loading model: {CONFIG['base_model']}") model, tokenizer = FastLanguageModel.from_pretrained( model_name=CONFIG["base_model"], max_seq_length=CONFIG["max_seq_length"], load_in_4bit=CONFIG["load_in_4bit"], dtype=None, # Auto-detect ) print("🔧 Attaching LoRA adapters") model = FastLanguageModel.get_peft_model( model, r=CONFIG["lora_r"], target_modules=CONFIG["target_modules"], lora_alpha=CONFIG["lora_alpha"], lora_dropout=CONFIG["lora_dropout"], bias="none", use_gradient_checkpointing="unsloth", # 30% less VRAM random_state=CONFIG["seed"], ) return model, tokenizer # ============================================================================= # TRAINING # ============================================================================= def train(model, tokenizer, dataset): """Run SFT training with TRL's SFTTrainer.""" from trl import SFTTrainer, SFTConfig print("🚀 Starting training...") training_args = SFTConfig( output_dir=CONFIG["output_dir"], per_device_train_batch_size=CONFIG["per_device_train_batch_size"], gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"], warmup_steps=CONFIG["warmup_steps"], num_train_epochs=CONFIG["num_train_epochs"], max_steps=CONFIG["max_steps"], learning_rate=CONFIG["learning_rate"], optim=CONFIG["optim"], lr_scheduler_type=CONFIG["lr_scheduler_type"], fp16=CONFIG["fp16"], bf16=CONFIG["bf16"], logging_steps=CONFIG["logging_steps"], save_steps=CONFIG["save_steps"], save_total_limit=3, seed=CONFIG["seed"], max_seq_length=CONFIG["max_seq_length"], dataset_text_field="text", report_to="none", # Set to "wandb" if you use Weights & Biases ) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, args=training_args, ) # Train stats = trainer.train() print(f"✅ Training complete! Loss: {stats.training_loss:.4f}") return trainer # ============================================================================= # EXPORT & PUSH # ============================================================================= def save_model(model, tokenizer, push=False, hub_id=None): """Save locally and optionally push to Hugging Face Hub.""" from unsloth import FastLanguageModel final_dir = CONFIG["final_model_dir"] # Save LoRA adapters (small, fast) print(f"💾 Saving LoRA adapters to {final_dir}") model.save_pretrained(final_dir) tokenizer.save_pretrained(final_dir) # Save merged model in 16-bit (for deployment) merged_dir = f"{final_dir}_merged_16bit" print(f"💾 Saving merged 16-bit model to {merged_dir}") model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit") # Export GGUF for local inference (llama.cpp / Ollama / LM Studio) gguf_dir = f"{final_dir}_gguf" print(f"💾 Exporting GGUF (Q4_K_M) to {gguf_dir}") try: model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m") except Exception as e: print(f"⚠️ GGUF export failed (non-critical): {e}") # Push to Hub if push and hub_id: print(f"🚀 Pushing to Hugging Face Hub: {hub_id}") model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit") print(f"✅ Model live at: https://huggingface.co/{hub_id}") elif push: print("⚠️ --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)") # ============================================================================= # MAIN # ============================================================================= def main(): parser = argparse.ArgumentParser(description="Train Alkaid A") parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub") parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)") args = parser.parse_args() # Step 1: Load model model, tokenizer = setup_model() # Step 2: Prepare data dataset = prepare_datasets(tokenizer) # Step 3: Train trainer = train(model, tokenizer, dataset) # Step 4: Save & export save_model(model, tokenizer, push=args.push, hub_id=args.hub_id) print("\n" + "=" * 60) print("🎉 Alkaid A training pipeline complete!") print("=" * 60) print(f" Checkpoints: {CONFIG['output_dir']}") print(f" Final model: {CONFIG['final_model_dir']}") print(f" GGUF export: {CONFIG['final_model_dir']}_gguf") if args.push and args.hub_id: print(f" Hub: https://huggingface.co/{args.hub_id}") print("=" * 60) if __name__ == "__main__": main()