| |
| """ |
| Alkaid A β Fine-tuning Script |
| Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled |
| Framework: Unsloth + TRL (SFTTrainer) |
| Method: LoRA (16-bit) with train_on_responses_only |
| |
| Requirements: |
| pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages |
| |
| Usage: |
| # 1. Login to Hugging Face first: |
| huggingface-cli login --token YOUR_HF_TOKEN |
| |
| # 2. Run training: |
| python train_alkaid_a.py |
| |
| # 3. Push to Hugging Face Hub: |
| python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A" |
| """ |
|
|
| import argparse |
| import json |
| import os |
| from pathlib import Path |
|
|
| |
| |
| |
|
|
| CONFIG = { |
| |
| "base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled", |
| "max_seq_length": 4096, |
| "load_in_4bit": True, |
|
|
| |
| "lora_r": 16, |
| "lora_alpha": 16, |
| "lora_dropout": 0, |
| "target_modules": [ |
| "q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj", |
| ], |
|
|
| |
| "per_device_train_batch_size": 1, |
| "gradient_accumulation_steps": 4, |
| "warmup_steps": 10, |
| "num_train_epochs": 3, |
| "max_steps": -1, |
| "learning_rate": 2e-4, |
| "optim": "adamw_8bit", |
| "lr_scheduler_type": "cosine", |
| "fp16": False, |
| "bf16": True, |
| "logging_steps": 10, |
| "save_steps": 50, |
| "seed": 42, |
|
|
| |
| "example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered", |
| "custom_data_path": "alkaid_a_training_data.jsonl", |
|
|
| |
| "output_dir": "./alkaid_a_checkpoints", |
| "final_model_dir": "./alkaid_a_final", |
| } |
|
|
|
|
| |
| |
| |
|
|
| def format_example_dataset(example): |
| """ |
| Convert the Opus reasoning dataset into chat format. |
| Columns: problem, thinking, solution β system/user/assistant messages |
| """ |
| system_msg = ( |
| "You are Alkaid A, an advanced AI coding and deployment assistant. " |
| "You follow a rigorous multi-phase workflow including code review, " |
| "iterative debugging, deployment planning, security audits, versioned " |
| "releases, and comprehensive documentation." |
| ) |
|
|
| |
| assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}" |
|
|
| return { |
| "messages": [ |
| {"role": "system", "content": system_msg}, |
| {"role": "user", "content": example["problem"]}, |
| {"role": "assistant", "content": assistant_content}, |
| ] |
| } |
|
|
|
|
| def load_custom_data(path): |
| """Load custom JSONL training data (already in messages format).""" |
| data = [] |
| with open(path, "r") as f: |
| for line in f: |
| line = line.strip() |
| if line: |
| data.append(json.loads(line)) |
| return data |
|
|
|
|
| def prepare_datasets(tokenizer): |
| """Combine example dataset + custom data into a single training set.""" |
| from datasets import Dataset, concatenate_datasets, load_dataset |
|
|
| |
| print("π¦ Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered") |
| example_ds = load_dataset(CONFIG["example_dataset"], split="train") |
|
|
| |
| example_ds = example_ds.filter( |
| lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""] |
| ) |
| print(f" β {len(example_ds)} examples after filtering") |
|
|
| |
| example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names) |
|
|
| |
| custom_path = CONFIG["custom_data_path"] |
| if os.path.exists(custom_path): |
| print(f"π¦ Loading custom data: {custom_path}") |
| custom_data = load_custom_data(custom_path) |
| custom_ds = Dataset.from_list(custom_data) |
| print(f" β {len(custom_ds)} custom examples loaded") |
| else: |
| print(f"β οΈ Custom data not found at {custom_path}, using example dataset only") |
| custom_ds = None |
|
|
| |
| def apply_template(example): |
| text = tokenizer.apply_chat_template( |
| example["messages"], |
| tokenize=False, |
| add_generation_prompt=False, |
| ) |
| return {"text": text} |
|
|
| example_ds = example_ds.map(apply_template) |
|
|
| if custom_ds is not None: |
| custom_ds = custom_ds.map(apply_template) |
| |
| combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds]) |
| else: |
| combined = example_ds |
|
|
| combined = combined.shuffle(seed=CONFIG["seed"]) |
| print(f"β
Total training examples: {len(combined)}") |
| return combined |
|
|
|
|
| |
| |
| |
|
|
| def setup_model(): |
| """Load the base model with Unsloth optimizations and attach LoRA.""" |
| from unsloth import FastLanguageModel |
|
|
| print(f"π§ Loading model: {CONFIG['base_model']}") |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name=CONFIG["base_model"], |
| max_seq_length=CONFIG["max_seq_length"], |
| load_in_4bit=CONFIG["load_in_4bit"], |
| dtype=None, |
| ) |
|
|
| print("π§ Attaching LoRA adapters") |
| model = FastLanguageModel.get_peft_model( |
| model, |
| r=CONFIG["lora_r"], |
| target_modules=CONFIG["target_modules"], |
| lora_alpha=CONFIG["lora_alpha"], |
| lora_dropout=CONFIG["lora_dropout"], |
| bias="none", |
| use_gradient_checkpointing="unsloth", |
| random_state=CONFIG["seed"], |
| ) |
|
|
| return model, tokenizer |
|
|
|
|
| |
| |
| |
|
|
| def train(model, tokenizer, dataset): |
| """Run SFT training with TRL's SFTTrainer.""" |
| from trl import SFTTrainer, SFTConfig |
|
|
| print("π Starting training...") |
|
|
| training_args = SFTConfig( |
| output_dir=CONFIG["output_dir"], |
| per_device_train_batch_size=CONFIG["per_device_train_batch_size"], |
| gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"], |
| warmup_steps=CONFIG["warmup_steps"], |
| num_train_epochs=CONFIG["num_train_epochs"], |
| max_steps=CONFIG["max_steps"], |
| learning_rate=CONFIG["learning_rate"], |
| optim=CONFIG["optim"], |
| lr_scheduler_type=CONFIG["lr_scheduler_type"], |
| fp16=CONFIG["fp16"], |
| bf16=CONFIG["bf16"], |
| logging_steps=CONFIG["logging_steps"], |
| save_steps=CONFIG["save_steps"], |
| save_total_limit=3, |
| seed=CONFIG["seed"], |
| max_seq_length=CONFIG["max_seq_length"], |
| dataset_text_field="text", |
| report_to="none", |
| ) |
|
|
| trainer = SFTTrainer( |
| model=model, |
| tokenizer=tokenizer, |
| train_dataset=dataset, |
| args=training_args, |
| ) |
|
|
| |
| stats = trainer.train() |
| print(f"β
Training complete! Loss: {stats.training_loss:.4f}") |
|
|
| return trainer |
|
|
|
|
| |
| |
| |
|
|
| def save_model(model, tokenizer, push=False, hub_id=None): |
| """Save locally and optionally push to Hugging Face Hub.""" |
| from unsloth import FastLanguageModel |
|
|
| final_dir = CONFIG["final_model_dir"] |
|
|
| |
| print(f"πΎ Saving LoRA adapters to {final_dir}") |
| model.save_pretrained(final_dir) |
| tokenizer.save_pretrained(final_dir) |
|
|
| |
| merged_dir = f"{final_dir}_merged_16bit" |
| print(f"πΎ Saving merged 16-bit model to {merged_dir}") |
| model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit") |
|
|
| |
| gguf_dir = f"{final_dir}_gguf" |
| print(f"πΎ Exporting GGUF (Q4_K_M) to {gguf_dir}") |
| try: |
| model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m") |
| except Exception as e: |
| print(f"β οΈ GGUF export failed (non-critical): {e}") |
|
|
| |
| if push and hub_id: |
| print(f"π Pushing to Hugging Face Hub: {hub_id}") |
| model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit") |
| print(f"β
Model live at: https://huggingface.co/{hub_id}") |
| elif push: |
| print("β οΈ --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)") |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Train Alkaid A") |
| parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub") |
| parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)") |
| args = parser.parse_args() |
|
|
| |
| model, tokenizer = setup_model() |
|
|
| |
| dataset = prepare_datasets(tokenizer) |
|
|
| |
| trainer = train(model, tokenizer, dataset) |
|
|
| |
| save_model(model, tokenizer, push=args.push, hub_id=args.hub_id) |
|
|
| print("\n" + "=" * 60) |
| print("π Alkaid A training pipeline complete!") |
| print("=" * 60) |
| print(f" Checkpoints: {CONFIG['output_dir']}") |
| print(f" Final model: {CONFIG['final_model_dir']}") |
| print(f" GGUF export: {CONFIG['final_model_dir']}_gguf") |
| if args.push and args.hub_id: |
| print(f" Hub: https://huggingface.co/{args.hub_id}") |
| print("=" * 60) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|