pennydoesdev
/

Orb

+#!/usr/bin/env python3
+"""
+Alkaid A — Fine-tuning Script
+Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled
+Framework: Unsloth + TRL (SFTTrainer)
+Method: LoRA (16-bit) with train_on_responses_only
+Requirements:
+    pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages
+Usage:
+    # 1. Login to Hugging Face first:
+    huggingface-cli login --token YOUR_HF_TOKEN
+    # 2. Run training:
+    python train_alkaid_a.py
+    # 3. Push to Hugging Face Hub:
+    python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A"
+"""
+import argparse
+import json
+import os
+from pathlib import Path
+# =============================================================================
+# CONFIGURATION — Edit these values for your setup
+# =============================================================================
+CONFIG = {
+    # Model
+    "base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
+    "max_seq_length": 4096,
+    "load_in_4bit": True,       # Set False if you have 56GB+ VRAM for 16-bit
+    # LoRA
+    "lora_r": 16,
+    "lora_alpha": 16,
+    "lora_dropout": 0,
+    "target_modules": [
+        "q_proj", "k_proj", "v_proj", "o_proj",
+        "gate_proj", "up_proj", "down_proj",
+    ],
+    # Training
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 4,
+    "warmup_steps": 10,
+    "num_train_epochs": 3,
+    "max_steps": -1,            # Set to positive number to override epochs
+    "learning_rate": 2e-4,
+    "optim": "adamw_8bit",
+    "lr_scheduler_type": "cosine",
+    "fp16": False,
+    "bf16": True,
+    "logging_steps": 10,
+    "save_steps": 50,
+    "seed": 42,
+    # Data
+    "example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered",
+    "custom_data_path": "alkaid_a_training_data.jsonl",
+    # Output
+    "output_dir": "./alkaid_a_checkpoints",
+    "final_model_dir": "./alkaid_a_final",
+}
+# =============================================================================
+# DATA PREPARATION
+# =============================================================================
+def format_example_dataset(example):
+    """
+    Convert the Opus reasoning dataset into chat format.
+    Columns: problem, thinking, solution → system/user/assistant messages
+    """
+    system_msg = (
+        "You are Alkaid A, an advanced AI coding and deployment assistant. "
+        "You follow a rigorous multi-phase workflow including code review, "
+        "iterative debugging, deployment planning, security audits, versioned "
+        "releases, and comprehensive documentation."
+    )
+    # Build assistant response with thinking tags (matching the base model's format)
+    assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"
+    return {
+        "messages": [
+            {"role": "system", "content": system_msg},
+            {"role": "user", "content": example["problem"]},
+            {"role": "assistant", "content": assistant_content},
+        ]
+    }
+def load_custom_data(path):
+    """Load custom JSONL training data (already in messages format)."""
+    data = []
+    with open(path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                data.append(json.loads(line))
+    return data
+def prepare_datasets(tokenizer):
+    """Combine example dataset + custom data into a single training set."""
+    from datasets import Dataset, concatenate_datasets, load_dataset
+    # --- Load example dataset from Hugging Face ---
+    print("📦 Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered")
+    example_ds = load_dataset(CONFIG["example_dataset"], split="train")
+    # Filter to coding/reasoning examples for best alignment
+    example_ds = example_ds.filter(
+        lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""]
+    )
+    print(f"   → {len(example_ds)} examples after filtering")
+    # Convert to chat format
+    example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names)
+    # --- Load custom data ---
+    custom_path = CONFIG["custom_data_path"]
+    if os.path.exists(custom_path):
+        print(f"📦 Loading custom data: {custom_path}")
+        custom_data = load_custom_data(custom_path)
+        custom_ds = Dataset.from_list(custom_data)
+        print(f"   → {len(custom_ds)} custom examples loaded")
+    else:
+        print(f"⚠️  Custom data not found at {custom_path}, using example dataset only")
+        custom_ds = None
+    # --- Apply chat template ---
+    def apply_template(example):
+        text = tokenizer.apply_chat_template(
+            example["messages"],
+            tokenize=False,
+            add_generation_prompt=False,
+        )
+        return {"text": text}
+    example_ds = example_ds.map(apply_template)
+    if custom_ds is not None:
+        custom_ds = custom_ds.map(apply_template)
+        # Combine: custom data is repeated 3x to increase its weight
+        combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds])
+    else:
+        combined = example_ds
+    combined = combined.shuffle(seed=CONFIG["seed"])
+    print(f"✅ Total training examples: {len(combined)}")
+    return combined
+# =============================================================================
+# MODEL SETUP
+# =============================================================================
+def setup_model():
+    """Load the base model with Unsloth optimizations and attach LoRA."""
+    from unsloth import FastLanguageModel
+    print(f"🔧 Loading model: {CONFIG['base_model']}")
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=CONFIG["base_model"],
+        max_seq_length=CONFIG["max_seq_length"],
+        load_in_4bit=CONFIG["load_in_4bit"],
+        dtype=None,  # Auto-detect
+    )
+    print("🔧 Attaching LoRA adapters")
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r=CONFIG["lora_r"],
+        target_modules=CONFIG["target_modules"],
+        lora_alpha=CONFIG["lora_alpha"],
+        lora_dropout=CONFIG["lora_dropout"],
+        bias="none",
+        use_gradient_checkpointing="unsloth",  # 30% less VRAM
+        random_state=CONFIG["seed"],
+    )
+    return model, tokenizer
+# =============================================================================
+# TRAINING
+# =============================================================================
+def train(model, tokenizer, dataset):
+    """Run SFT training with TRL's SFTTrainer."""
+    from trl import SFTTrainer, SFTConfig
+    print("🚀 Starting training...")
+    training_args = SFTConfig(
+        output_dir=CONFIG["output_dir"],
+        per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
+        gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
+        warmup_steps=CONFIG["warmup_steps"],
+        num_train_epochs=CONFIG["num_train_epochs"],
+        max_steps=CONFIG["max_steps"],
+        learning_rate=CONFIG["learning_rate"],
+        optim=CONFIG["optim"],
+        lr_scheduler_type=CONFIG["lr_scheduler_type"],
+        fp16=CONFIG["fp16"],
+        bf16=CONFIG["bf16"],
+        logging_steps=CONFIG["logging_steps"],
+        save_steps=CONFIG["save_steps"],
+        save_total_limit=3,
+        seed=CONFIG["seed"],
+        max_seq_length=CONFIG["max_seq_length"],
+        dataset_text_field="text",
+        report_to="none",       # Set to "wandb" if you use Weights & Biases
+    )
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=dataset,
+        args=training_args,
+    )
+    # Train
+    stats = trainer.train()
+    print(f"✅ Training complete! Loss: {stats.training_loss:.4f}")
+    return trainer
+# =============================================================================
+# EXPORT & PUSH
+# =============================================================================
+def save_model(model, tokenizer, push=False, hub_id=None):
+    """Save locally and optionally push to Hugging Face Hub."""
+    from unsloth import FastLanguageModel
+    final_dir = CONFIG["final_model_dir"]
+    # Save LoRA adapters (small, fast)
+    print(f"💾 Saving LoRA adapters to {final_dir}")
+    model.save_pretrained(final_dir)
+    tokenizer.save_pretrained(final_dir)
+    # Save merged model in 16-bit (for deployment)
+    merged_dir = f"{final_dir}_merged_16bit"
+    print(f"💾 Saving merged 16-bit model to {merged_dir}")
+    model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit")
+    # Export GGUF for local inference (llama.cpp / Ollama / LM Studio)
+    gguf_dir = f"{final_dir}_gguf"
+    print(f"💾 Exporting GGUF (Q4_K_M) to {gguf_dir}")
+    try:
+        model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m")
+    except Exception as e:
+        print(f"⚠️  GGUF export failed (non-critical): {e}")
+    # Push to Hub
+    if push and hub_id:
+        print(f"🚀 Pushing to Hugging Face Hub: {hub_id}")
+        model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit")
+        print(f"✅ Model live at: https://huggingface.co/{hub_id}")
+    elif push:
+        print("⚠️  --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)")
+# =============================================================================
+# MAIN
+# =============================================================================
+def main():
+    parser = argparse.ArgumentParser(description="Train Alkaid A")
+    parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub")
+    parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)")
+    args = parser.parse_args()
+    # Step 1: Load model
+    model, tokenizer = setup_model()
+    # Step 2: Prepare data
+    dataset = prepare_datasets(tokenizer)
+    # Step 3: Train
+    trainer = train(model, tokenizer, dataset)
+    # Step 4: Save & export
+    save_model(model, tokenizer, push=args.push, hub_id=args.hub_id)
+    print("\n" + "=" * 60)
+    print("🎉 Alkaid A training pipeline complete!")
+    print("=" * 60)
+    print(f"  Checkpoints: {CONFIG['output_dir']}")
+    print(f"  Final model: {CONFIG['final_model_dir']}")
+    print(f"  GGUF export: {CONFIG['final_model_dir']}_gguf")
+    if args.push and args.hub_id:
+        print(f"  Hub: https://huggingface.co/{args.hub_id}")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()