pennydoesdev commited on
Commit
7040a6f
Β·
verified Β·
1 Parent(s): 6a4a555

Add training script

Browse files
Files changed (1) hide show
  1. train_alkaid_a.py +308 -0
train_alkaid_a.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Alkaid A β€” Fine-tuning Script
4
+ Base Model: Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled
5
+ Framework: Unsloth + TRL (SFTTrainer)
6
+ Method: LoRA (16-bit) with train_on_responses_only
7
+
8
+ Requirements:
9
+ pip install unsloth transformers trl datasets peft accelerate bitsandbytes --break-system-packages
10
+
11
+ Usage:
12
+ # 1. Login to Hugging Face first:
13
+ huggingface-cli login --token YOUR_HF_TOKEN
14
+
15
+ # 2. Run training:
16
+ python train_alkaid_a.py
17
+
18
+ # 3. Push to Hugging Face Hub:
19
+ python train_alkaid_a.py --push --hub_id "YourUsername/Alkaid-A"
20
+ """
21
+
22
+ import argparse
23
+ import json
24
+ import os
25
+ from pathlib import Path
26
+
27
+ # =============================================================================
28
+ # CONFIGURATION β€” Edit these values for your setup
29
+ # =============================================================================
30
+
31
+ CONFIG = {
32
+ # Model
33
+ "base_model": "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
34
+ "max_seq_length": 4096,
35
+ "load_in_4bit": True, # Set False if you have 56GB+ VRAM for 16-bit
36
+
37
+ # LoRA
38
+ "lora_r": 16,
39
+ "lora_alpha": 16,
40
+ "lora_dropout": 0,
41
+ "target_modules": [
42
+ "q_proj", "k_proj", "v_proj", "o_proj",
43
+ "gate_proj", "up_proj", "down_proj",
44
+ ],
45
+
46
+ # Training
47
+ "per_device_train_batch_size": 1,
48
+ "gradient_accumulation_steps": 4,
49
+ "warmup_steps": 10,
50
+ "num_train_epochs": 3,
51
+ "max_steps": -1, # Set to positive number to override epochs
52
+ "learning_rate": 2e-4,
53
+ "optim": "adamw_8bit",
54
+ "lr_scheduler_type": "cosine",
55
+ "fp16": False,
56
+ "bf16": True,
57
+ "logging_steps": 10,
58
+ "save_steps": 50,
59
+ "seed": 42,
60
+
61
+ # Data
62
+ "example_dataset": "nohurry/Opus-4.6-Reasoning-3000x-filtered",
63
+ "custom_data_path": "alkaid_a_training_data.jsonl",
64
+
65
+ # Output
66
+ "output_dir": "./alkaid_a_checkpoints",
67
+ "final_model_dir": "./alkaid_a_final",
68
+ }
69
+
70
+
71
+ # =============================================================================
72
+ # DATA PREPARATION
73
+ # =============================================================================
74
+
75
+ def format_example_dataset(example):
76
+ """
77
+ Convert the Opus reasoning dataset into chat format.
78
+ Columns: problem, thinking, solution β†’ system/user/assistant messages
79
+ """
80
+ system_msg = (
81
+ "You are Alkaid A, an advanced AI coding and deployment assistant. "
82
+ "You follow a rigorous multi-phase workflow including code review, "
83
+ "iterative debugging, deployment planning, security audits, versioned "
84
+ "releases, and comprehensive documentation."
85
+ )
86
+
87
+ # Build assistant response with thinking tags (matching the base model's format)
88
+ assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"
89
+
90
+ return {
91
+ "messages": [
92
+ {"role": "system", "content": system_msg},
93
+ {"role": "user", "content": example["problem"]},
94
+ {"role": "assistant", "content": assistant_content},
95
+ ]
96
+ }
97
+
98
+
99
+ def load_custom_data(path):
100
+ """Load custom JSONL training data (already in messages format)."""
101
+ data = []
102
+ with open(path, "r") as f:
103
+ for line in f:
104
+ line = line.strip()
105
+ if line:
106
+ data.append(json.loads(line))
107
+ return data
108
+
109
+
110
+ def prepare_datasets(tokenizer):
111
+ """Combine example dataset + custom data into a single training set."""
112
+ from datasets import Dataset, concatenate_datasets, load_dataset
113
+
114
+ # --- Load example dataset from Hugging Face ---
115
+ print("πŸ“¦ Loading example dataset: nohurry/Opus-4.6-Reasoning-3000x-filtered")
116
+ example_ds = load_dataset(CONFIG["example_dataset"], split="train")
117
+
118
+ # Filter to coding/reasoning examples for best alignment
119
+ example_ds = example_ds.filter(
120
+ lambda x: x.get("category", "") in ["code", "math", "reasoning", "logic", ""]
121
+ )
122
+ print(f" β†’ {len(example_ds)} examples after filtering")
123
+
124
+ # Convert to chat format
125
+ example_ds = example_ds.map(format_example_dataset, remove_columns=example_ds.column_names)
126
+
127
+ # --- Load custom data ---
128
+ custom_path = CONFIG["custom_data_path"]
129
+ if os.path.exists(custom_path):
130
+ print(f"πŸ“¦ Loading custom data: {custom_path}")
131
+ custom_data = load_custom_data(custom_path)
132
+ custom_ds = Dataset.from_list(custom_data)
133
+ print(f" β†’ {len(custom_ds)} custom examples loaded")
134
+ else:
135
+ print(f"⚠️ Custom data not found at {custom_path}, using example dataset only")
136
+ custom_ds = None
137
+
138
+ # --- Apply chat template ---
139
+ def apply_template(example):
140
+ text = tokenizer.apply_chat_template(
141
+ example["messages"],
142
+ tokenize=False,
143
+ add_generation_prompt=False,
144
+ )
145
+ return {"text": text}
146
+
147
+ example_ds = example_ds.map(apply_template)
148
+
149
+ if custom_ds is not None:
150
+ custom_ds = custom_ds.map(apply_template)
151
+ # Combine: custom data is repeated 3x to increase its weight
152
+ combined = concatenate_datasets([example_ds, custom_ds, custom_ds, custom_ds])
153
+ else:
154
+ combined = example_ds
155
+
156
+ combined = combined.shuffle(seed=CONFIG["seed"])
157
+ print(f"βœ… Total training examples: {len(combined)}")
158
+ return combined
159
+
160
+
161
+ # =============================================================================
162
+ # MODEL SETUP
163
+ # =============================================================================
164
+
165
+ def setup_model():
166
+ """Load the base model with Unsloth optimizations and attach LoRA."""
167
+ from unsloth import FastLanguageModel
168
+
169
+ print(f"πŸ”§ Loading model: {CONFIG['base_model']}")
170
+ model, tokenizer = FastLanguageModel.from_pretrained(
171
+ model_name=CONFIG["base_model"],
172
+ max_seq_length=CONFIG["max_seq_length"],
173
+ load_in_4bit=CONFIG["load_in_4bit"],
174
+ dtype=None, # Auto-detect
175
+ )
176
+
177
+ print("πŸ”§ Attaching LoRA adapters")
178
+ model = FastLanguageModel.get_peft_model(
179
+ model,
180
+ r=CONFIG["lora_r"],
181
+ target_modules=CONFIG["target_modules"],
182
+ lora_alpha=CONFIG["lora_alpha"],
183
+ lora_dropout=CONFIG["lora_dropout"],
184
+ bias="none",
185
+ use_gradient_checkpointing="unsloth", # 30% less VRAM
186
+ random_state=CONFIG["seed"],
187
+ )
188
+
189
+ return model, tokenizer
190
+
191
+
192
+ # =============================================================================
193
+ # TRAINING
194
+ # =============================================================================
195
+
196
+ def train(model, tokenizer, dataset):
197
+ """Run SFT training with TRL's SFTTrainer."""
198
+ from trl import SFTTrainer, SFTConfig
199
+
200
+ print("πŸš€ Starting training...")
201
+
202
+ training_args = SFTConfig(
203
+ output_dir=CONFIG["output_dir"],
204
+ per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
205
+ gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
206
+ warmup_steps=CONFIG["warmup_steps"],
207
+ num_train_epochs=CONFIG["num_train_epochs"],
208
+ max_steps=CONFIG["max_steps"],
209
+ learning_rate=CONFIG["learning_rate"],
210
+ optim=CONFIG["optim"],
211
+ lr_scheduler_type=CONFIG["lr_scheduler_type"],
212
+ fp16=CONFIG["fp16"],
213
+ bf16=CONFIG["bf16"],
214
+ logging_steps=CONFIG["logging_steps"],
215
+ save_steps=CONFIG["save_steps"],
216
+ save_total_limit=3,
217
+ seed=CONFIG["seed"],
218
+ max_seq_length=CONFIG["max_seq_length"],
219
+ dataset_text_field="text",
220
+ report_to="none", # Set to "wandb" if you use Weights & Biases
221
+ )
222
+
223
+ trainer = SFTTrainer(
224
+ model=model,
225
+ tokenizer=tokenizer,
226
+ train_dataset=dataset,
227
+ args=training_args,
228
+ )
229
+
230
+ # Train
231
+ stats = trainer.train()
232
+ print(f"βœ… Training complete! Loss: {stats.training_loss:.4f}")
233
+
234
+ return trainer
235
+
236
+
237
+ # =============================================================================
238
+ # EXPORT & PUSH
239
+ # =============================================================================
240
+
241
+ def save_model(model, tokenizer, push=False, hub_id=None):
242
+ """Save locally and optionally push to Hugging Face Hub."""
243
+ from unsloth import FastLanguageModel
244
+
245
+ final_dir = CONFIG["final_model_dir"]
246
+
247
+ # Save LoRA adapters (small, fast)
248
+ print(f"πŸ’Ύ Saving LoRA adapters to {final_dir}")
249
+ model.save_pretrained(final_dir)
250
+ tokenizer.save_pretrained(final_dir)
251
+
252
+ # Save merged model in 16-bit (for deployment)
253
+ merged_dir = f"{final_dir}_merged_16bit"
254
+ print(f"πŸ’Ύ Saving merged 16-bit model to {merged_dir}")
255
+ model.save_pretrained_merged(merged_dir, tokenizer, save_method="merged_16bit")
256
+
257
+ # Export GGUF for local inference (llama.cpp / Ollama / LM Studio)
258
+ gguf_dir = f"{final_dir}_gguf"
259
+ print(f"πŸ’Ύ Exporting GGUF (Q4_K_M) to {gguf_dir}")
260
+ try:
261
+ model.save_pretrained_gguf(gguf_dir, tokenizer, quantization_method="q4_k_m")
262
+ except Exception as e:
263
+ print(f"⚠️ GGUF export failed (non-critical): {e}")
264
+
265
+ # Push to Hub
266
+ if push and hub_id:
267
+ print(f"πŸš€ Pushing to Hugging Face Hub: {hub_id}")
268
+ model.push_to_hub(hub_id, tokenizer, save_method="merged_16bit")
269
+ print(f"βœ… Model live at: https://huggingface.co/{hub_id}")
270
+ elif push:
271
+ print("⚠️ --push requires --hub_id (e.g., --hub_id YourName/Alkaid-A)")
272
+
273
+
274
+ # =============================================================================
275
+ # MAIN
276
+ # =============================================================================
277
+
278
+ def main():
279
+ parser = argparse.ArgumentParser(description="Train Alkaid A")
280
+ parser.add_argument("--push", action="store_true", help="Push to Hugging Face Hub")
281
+ parser.add_argument("--hub_id", type=str, default=None, help="Hub repo ID (e.g., YourName/Alkaid-A)")
282
+ args = parser.parse_args()
283
+
284
+ # Step 1: Load model
285
+ model, tokenizer = setup_model()
286
+
287
+ # Step 2: Prepare data
288
+ dataset = prepare_datasets(tokenizer)
289
+
290
+ # Step 3: Train
291
+ trainer = train(model, tokenizer, dataset)
292
+
293
+ # Step 4: Save & export
294
+ save_model(model, tokenizer, push=args.push, hub_id=args.hub_id)
295
+
296
+ print("\n" + "=" * 60)
297
+ print("πŸŽ‰ Alkaid A training pipeline complete!")
298
+ print("=" * 60)
299
+ print(f" Checkpoints: {CONFIG['output_dir']}")
300
+ print(f" Final model: {CONFIG['final_model_dir']}")
301
+ print(f" GGUF export: {CONFIG['final_model_dir']}_gguf")
302
+ if args.push and args.hub_id:
303
+ print(f" Hub: https://huggingface.co/{args.hub_id}")
304
+ print("=" * 60)
305
+
306
+
307
+ if __name__ == "__main__":
308
+ main()