Spaces:

pennydoesdev
/

Orb-Studio

Running

App Files Files Community

Orb-Studio / app.py

pennydoesdev

Deploy Alkaid A Studio — training + inference UI

8250859 verified 8 days ago

raw

history blame contribute delete

17.6 kB

	"""
	Alkaid A — Hugging Face Spaces App
	Training + Inference UI hosted entirely on Hugging Face

	Space Type: Docker (GPU required — A10G or A100 recommended)
	"""

	import os
	import json
	import threading
	import time
	import gradio as gr
	from pathlib import Path

	# =============================================================================
	# CONFIGURATION
	# =============================================================================

	BASE_MODEL = "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled"
	OUTPUT_DIR = "/home/user/app/alkaid_a_checkpoints"
	FINAL_DIR = "/home/user/app/alkaid_a_final"
	DATA_DIR = "/home/user/app/data"
	LOG_FILE = "/home/user/app/training.log"

	SYSTEM_PROMPT = (
	"You are Alkaid A, an advanced AI coding and deployment assistant. "
	"You follow a rigorous multi-phase workflow: (1) Provide detailed feedback "
	"with pros/cons on code or plans, identifying weak points and breaks. "
	"(2) Guide through a detailed debug phase. (3) Outline a deployment strategy "
	"ready for production. (4) Repeat debugging with variations across five iterations. "
	"(5) Conduct deep dives on integration covering security, scalability, and compliance. "
	"(6) Test all API endpoints and set monitoring. (7) Scrape help docs, check tool "
	"compatibility, infer issues, and adjust on the fly. (8) Ensure every version is "
	"backed up in a GitHub releases folder, starting at 00.00.00 and incrementing by "
	"00.00.01. (9) Guide through pushing changes to a GitHub repository. "
	"(10) Include user testing, performance benchmarking, and hardening. "
	"(11) Add documentation for future developers and automated testing. "
	"(12) Summarize what went well and acknowledge progress."
	)

	# Global state
	training_status = {"running": False, "progress": "", "log": ""}
	loaded_model = {"model": None, "tokenizer": None}


	# =============================================================================
	# TRAINING TAB
	# =============================================================================

	def format_opus_example(example):
	"""Convert Opus dataset row to chat format."""
	assistant_content = f"<think>\n{example['thinking']}\n</think>\n\n{example['solution']}"
	return {
	"messages": [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": example["problem"]},
	{"role": "assistant", "content": assistant_content},
	]
	}


	def run_training(
	hf_token,
	hub_repo_id,
	learning_rate,
	num_epochs,
	lora_rank,
	max_seq_length,
	batch_size,
	use_4bit,
	custom_data_text,
	progress=gr.Progress(track_tqdm=True),
	):
	"""Run the full training pipeline."""
	global training_status

	if training_status["running"]:
	return "Training is already running. Please wait."

	training_status["running"] = True
	training_status["log"] = ""

	def log(msg):
	training_status["log"] += msg + "\n"
	training_status["progress"] = msg
	print(msg)

	try:
	# --- Login ---
	log("Step 1/7: Authenticating with Hugging Face...")
	if hf_token:
	from huggingface_hub import login
	login(token=hf_token)
	log(" Logged in successfully.")
	else:
	log(" No token provided — will save locally only.")

	# --- Load Model ---
	log(f"Step 2/7: Loading base model ({BASE_MODEL})...")
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import torch

	if use_4bit:
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_use_double_quant=True,
	)
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	quantization_config=bnb_config,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)
	else:
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)

	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	log(" Model loaded.")

	# --- Attach LoRA ---
	log(f"Step 3/7: Attaching LoRA (rank={lora_rank})...")
	from peft import LoraConfig, get_peft_model, TaskType

	lora_config = LoraConfig(
	r=int(lora_rank),
	lora_alpha=int(lora_rank),
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
	lora_dropout=0.0,
	bias="none",
	task_type=TaskType.CAUSAL_LM,
	)
	model = get_peft_model(model, lora_config)
	trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
	total = sum(p.numel() for p in model.parameters())
	log(f" LoRA attached. Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")

	# --- Prepare Data ---
	log("Step 4/7: Preparing training data...")
	from datasets import load_dataset, Dataset, concatenate_datasets

	# Load Opus dataset
	opus_ds = load_dataset("nohurry/Opus-4.6-Reasoning-3000x-filtered", split="train")
	opus_ds = opus_ds.map(format_opus_example, remove_columns=opus_ds.column_names)
	log(f" Opus dataset: {len(opus_ds)} examples")

	# Process custom data if provided
	if custom_data_text and custom_data_text.strip():
	custom_rows = []
	for line in custom_data_text.strip().split("\n"):
	line = line.strip()
	if line:
	try:
	custom_rows.append(json.loads(line))
	except json.JSONDecodeError:
	log(f" Warning: Skipping invalid JSON line")
	if custom_rows:
	custom_ds = Dataset.from_list(custom_rows)
	# Weight custom data 3x
	all_ds = concatenate_datasets([opus_ds, custom_ds, custom_ds, custom_ds])
	log(f" Custom data: {len(custom_rows)} examples (weighted 3x)")
	else:
	all_ds = opus_ds
	else:
	all_ds = opus_ds

	# Apply chat template
	def apply_template(example):
	text = tokenizer.apply_chat_template(
	example["messages"], tokenize=False, add_generation_prompt=False
	)
	return {"text": text}

	all_ds = all_ds.map(apply_template)
	all_ds = all_ds.shuffle(seed=42)
	log(f" Total training examples: {len(all_ds)}")

	# --- Train ---
	log(f"Step 5/7: Training ({num_epochs} epochs, lr={learning_rate})...")
	from trl import SFTTrainer, SFTConfig

	training_args = SFTConfig(
	output_dir=OUTPUT_DIR,
	per_device_train_batch_size=int(batch_size),
	gradient_accumulation_steps=4,
	warmup_steps=10,
	num_train_epochs=int(num_epochs),
	learning_rate=float(learning_rate),
	optim="adamw_8bit",
	lr_scheduler_type="cosine",
	bf16=True,
	fp16=False,
	logging_steps=5,
	save_steps=50,
	save_total_limit=2,
	max_seq_length=int(max_seq_length),
	dataset_text_field="text",
	report_to="none",
	seed=42,
	)

	trainer = SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=all_ds,
	args=training_args,
	)

	result = trainer.train()
	log(f" Training complete! Final loss: {result.training_loss:.4f}")

	# --- Save ---
	log("Step 6/7: Saving model...")
	model.save_pretrained(FINAL_DIR)
	tokenizer.save_pretrained(FINAL_DIR)
	log(f" Saved to {FINAL_DIR}")

	# --- Push to Hub ---
	if hf_token and hub_repo_id:
	log(f"Step 7/7: Pushing to {hub_repo_id}...")
	model.push_to_hub(hub_repo_id, use_auth_token=hf_token)
	tokenizer.push_to_hub(hub_repo_id, use_auth_token=hf_token)
	log(f" Live at: https://huggingface.co/{hub_repo_id}")
	else:
	log("Step 7/7: Skipped push (no token or repo ID).")

	log("\n" + "=" * 50)
	log("TRAINING COMPLETE!")
	log("=" * 50)

	training_status["running"] = False
	return training_status["log"]

	except Exception as e:
	training_status["running"] = False
	error_msg = f"\nERROR: {str(e)}"
	log(error_msg)
	import traceback
	log(traceback.format_exc())
	return training_status["log"]


	# =============================================================================
	# INFERENCE TAB
	# =============================================================================

	def load_model_for_inference(model_source, hf_token):
	"""Load model for inference (either local checkpoint or from Hub)."""
	global loaded_model

	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import torch

	source = FINAL_DIR if model_source == "Local checkpoint" else model_source

	if hf_token:
	from huggingface_hub import login
	login(token=hf_token)

	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	)

	loaded_model["tokenizer"] = AutoTokenizer.from_pretrained(source, trust_remote_code=True)
	loaded_model["model"] = AutoModelForCausalLM.from_pretrained(
	source,
	quantization_config=bnb_config,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)

	if loaded_model["tokenizer"].pad_token is None:
	loaded_model["tokenizer"].pad_token = loaded_model["tokenizer"].eos_token

	return f"Model loaded from: {source}"

	except Exception as e:
	return f"Error loading model: {str(e)}"


	def generate_response(user_message, temperature, max_tokens, system_override):
	"""Generate a response using the loaded model."""
	if loaded_model["model"] is None:
	return "Please load a model first using the 'Load Model' button."

	try:
	import torch

	model = loaded_model["model"]
	tokenizer = loaded_model["tokenizer"]

	sys_prompt = system_override if system_override.strip() else SYSTEM_PROMPT

	messages = [
	{"role": "system", "content": sys_prompt},
	{"role": "user", "content": user_message},
	]

	input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)

	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.pad_token_id,
	)

	response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True)
	return response

	except Exception as e:
	return f"Generation error: {str(e)}"


	# =============================================================================
	# GRADIO UI
	# =============================================================================

	# Load default custom data
	default_custom_data = ""
	custom_data_path = Path("/home/user/app/alkaid_a_training_data.jsonl")
	if custom_data_path.exists():
	default_custom_data = custom_data_path.read_text()

	with gr.Blocks(
	title="Alkaid A — Train & Deploy",
	theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"),
	css="""
	.main-title { text-align: center; margin-bottom: 0; }
	.subtitle { text-align: center; color: #6b7280; margin-top: 4px; }
	"""
	) as app:

	gr.Markdown("# Alkaid A", elem_classes="main-title")
	gr.Markdown(
	"Fine-tuned from Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled",
	elem_classes="subtitle"
	)

	with gr.Tabs():

	# ---- TRAINING TAB ----
	with gr.Tab("Train", id="train"):
	gr.Markdown("### Train Your Model")
	gr.Markdown(
	"This will fine-tune the base model using LoRA on the Opus reasoning "
	"dataset plus your custom data, then push the result to your Hugging Face repo."
	)

	with gr.Row():
	with gr.Column(scale=1):
	hf_token_train = gr.Textbox(
	label="Hugging Face Token (Write access)",
	type="password",
	placeholder="hf_...",
	)
	hub_repo = gr.Textbox(
	label="Hub Repo ID (e.g., YourName/Alkaid-A)",
	placeholder="YourUsername/Alkaid-A",
	)

	with gr.Column(scale=1):
	with gr.Row():
	lr = gr.Number(label="Learning Rate", value=2e-4)
	epochs = gr.Number(label="Epochs", value=3, precision=0)
	with gr.Row():
	rank = gr.Number(label="LoRA Rank", value=16, precision=0)
	seq_len = gr.Number(label="Max Seq Length", value=2048, precision=0)
	with gr.Row():
	bs = gr.Number(label="Batch Size", value=1, precision=0)
	fourbit = gr.Checkbox(label="4-bit Quantization", value=True)

	custom_data = gr.Code(
	label="Custom Training Data (JSONL — one JSON object per line)",
	value=default_custom_data,
	language="json",
	lines=10,
	)

	train_btn = gr.Button("Start Training", variant="primary", size="lg")
	train_output = gr.Textbox(label="Training Log", lines=20, interactive=False)

	train_btn.click(
	fn=run_training,
	inputs=[hf_token_train, hub_repo, lr, epochs, rank, seq_len, bs, fourbit, custom_data],
	outputs=train_output,
	)

	# ---- INFERENCE TAB ----
	with gr.Tab("Chat", id="chat"):
	gr.Markdown("### Chat with Alkaid A")

	with gr.Row():
	model_source = gr.Textbox(
	label="Model Source",
	value="Local checkpoint",
	placeholder="Local checkpoint OR HuggingFace repo ID",
	)
	hf_token_infer = gr.Textbox(
	label="HF Token (if loading from Hub)",
	type="password",
	placeholder="hf_...",
	)
	load_btn = gr.Button("Load Model")

	load_status = gr.Textbox(label="Status", interactive=False)
	load_btn.click(fn=load_model_for_inference, inputs=[model_source, hf_token_infer], outputs=load_status)

	system_box = gr.Textbox(
	label="System Prompt (optional override)",
	value="",
	placeholder="Leave empty to use default Alkaid A system prompt",
	lines=3,
	)

	chatbot_input = gr.Textbox(
	label="Your Message",
	placeholder="Paste your code or describe your plan...",
	lines=6,
	)

	with gr.Row():
	temp = gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7, step=0.1)
	max_tok = gr.Slider(label="Max Tokens", minimum=256, maximum=4096, value=2048, step=256)

	gen_btn = gr.Button("Generate", variant="primary")
	response_box = gr.Textbox(label="Alkaid A Response", lines=20, interactive=False)

	gen_btn.click(
	fn=generate_response,
	inputs=[chatbot_input, temp, max_tok, system_box],
	outputs=response_box,
	)

	# ---- ABOUT TAB ----
	with gr.Tab("About", id="about"):
	gr.Markdown("""
	### Alkaid A

	Base Model: [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled)

	Training Dataset: [nohurry/Opus-4.6-Reasoning-3000x-filtered](https://huggingface.co/datasets/nohurry/Opus-4.6-Reasoning-3000x-filtered) (2,326 reasoning examples)

	Method: LoRA SFT with 4-bit quantization

	Alkaid A's Workflow:
	1. Detailed code/plan feedback with pros and cons
	2. Guided debug phase
	3. Production deployment strategy
	4. 5x debug iterations with variations
	5. Security, scalability, compliance deep dive
	6. API endpoint testing and monitoring setup
	7. Help doc scraping and compatibility checks
	8. GitHub versioned releases (00.00.XX)
	9. Guided repository push
	10. User testing, benchmarking, hardening
	11. Developer documentation and automated tests
	12. Progress summary and acknowledgment

	License: Apache 2.0
	""")


	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860, share=False)