Spaces:

Optitransfer
/

borg-merge-v1-chat

Sleeping

App Files Files Community

Optitransfer commited on 24 days ago

Commit

69fc7ff

verified ·

1 Parent(s): 4526530

Add Gradio chat interface

Browse files

Files changed (1) hide show

app.py +89 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True,
+)
+SYSTEM_PROMPT = (
+    "You are a helpful, harmless, and honest assistant created by Optitransfer. "
+    "You are a merged model combining knowledge from 9 source models spanning "
+    "4 architecture families. Answer questions clearly and step by step when "
+    "reasoning is needed."
+)
+def respond(message, history):
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for user_msg, bot_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    text = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = tokenizer(text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=1024,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True,
+        )
+    new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
+    response = tokenizer.decode(new_tokens, skip_special_tokens=True)
+    return response
+DESCRIPTION = """
+# 🤖 Qwen2.5-7B-Instruct — Borg Merge v1
+**A single checkpoint created by merging 9 models from 4 architecture families.**
+This model was created using training-free cross-family weight merging — no fine-tuning,
+no distillation, no router. It lifts **GSM8K by +3.3 pp**, **ARC-Challenge by +3.2 pp**,
+and **IFEval by +2.6 pp** over the unmerged Qwen2.5-7B-Instruct anchor.
+Try asking it reasoning questions, math problems, or instruction-following tasks!
+| Task | Anchor | This Model | Lift |
+|---|---|---|---|
+| GSM8K | 0.812 | **0.845** | **+3.3 pp** |
+| ARC-Challenge | 0.526 | **0.557** | **+3.2 pp** |
+| IFEval | 0.655 | **0.681** | **+2.6 pp** |
+"""
+EXAMPLES = [
+    "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What's the total discount percentage?",
+    "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
+    "Write a Python function that finds the longest common subsequence of two strings.",
+    "If 5 machines can produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
+    "What are three key advantages of renewable energy over fossil fuels? Be specific.",
+]
+demo = gr.ChatInterface(
+    fn=respond,
+    title="Borg Merge v1 — Cross-Family Merged Model",
+    description=DESCRIPTION,
+    examples=EXAMPLES,
+    theme=gr.themes.Soft(),
+    analytics_enabled=False,
+    chatbot=gr.Chatbot(height=500),
+)
+if __name__ == "__main__":
+    demo.launch()