Spaces:

Optitransfer
/

borg-merge-v1-chat

Sleeping

App Files Files Community

Optitransfer commited on 23 days ago

Commit

52dccf4

verified ·

1 Parent(s): 5701fd2

Add identity prompt, welcome message, New Chat button, examples panel

Browse files

Files changed (1) hide show

app.py +134 -46

app.py CHANGED Viewed

@@ -12,8 +12,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
 # -- Load at module level ------------------------------------------------
-# ZeroGPU intercepts .to("cuda") and keeps weights on CPU/meta until
-# a @spaces.GPU function actually runs, then moves them automatically.
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
@@ -21,25 +19,58 @@ model = AutoModelForCausalLM.from_pretrained(
 ).to("cuda")
 model.eval()
 @spaces.GPU(duration=60)
-def chat(message, history, system_prompt, max_tokens, temperature, top_p):
-    """Generate a response. ZeroGPU allocates A10G for up to 120s."""
-    # -- Build conversation -----------------------------------------------
-    messages = []
-    if system_prompt and system_prompt.strip():
-        messages.append({"role": "system", "content": system_prompt.strip()})
     for turn in history:
-        if isinstance(turn, dict):
-            messages.append(turn)
-        elif isinstance(turn, (list, tuple)) and len(turn) == 2:
-            messages.append({"role": "user", "content": turn[0]})
-            if turn[1]:
-                messages.append({"role": "assistant", "content": turn[1]})
-    messages.append({"role": "user", "content": message})
-    # apply_chat_template -> plain string, then tokenize explicitly
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
@@ -68,16 +99,36 @@ def chat(message, history, system_prompt, max_tokens, temperature, top_p):
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    response = ""
     for token in streamer:
         if token:
-            response += token
-            yield response
     thread.join()
 # -- UI -------------------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
@@ -114,37 +165,74 @@ donor models while preserving the anchor's core capabilities.
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
-SYSTEM_DEFAULT = (
-    "You are a helpful, knowledgeable AI assistant. "
-    "Answer clearly and concisely."
-)
 EXAMPLES = [
-    ["Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?"],
-    ["Explain the difference between supervised and unsupervised learning. Give a real-world example of each."],
-    ["Write a Python function that finds the longest common subsequence of two strings."],
-    ["If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?"],
-    ["What are three key advantages of renewable energy over fossil fuels? Be specific."],
 ]
-demo = gr.ChatInterface(
-    fn=chat,
     title="Borg Merge v1",
-    description=DESCRIPTION,
-    additional_inputs=[
-        gr.Textbox(
-            value=SYSTEM_DEFAULT,
-            label="System prompt",
             lines=2,
-        ),
-        gr.Slider(64, 2048, value=512, step=64, label="Max new tokens"),
-        gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
-        gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
-    ],
-    examples=EXAMPLES,
-    cache_examples=False,
-    type="messages",
-)
 if __name__ == "__main__":
     demo.launch()

 MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
 # -- Load at module level ------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
 ).to("cuda")
 model.eval()
+# -- Identity prompt (always prepended, not user-editable) ----------------
+IDENTITY_PROMPT = (
+    "You are Borg Merge v1, a collective intelligence formed by merging "
+    "9 language models from 4 different architecture families into a single "
+    "unified checkpoint. You were not fine-tuned, distilled, or trained. "
+    "Your weights were merged directly.\n\n"
+    "Your construction:\n"
+    "- Base (anchor): Qwen2.5-7B-Instruct\n"
+    "- Llama family donors: Mistral-7B-Instruct-v0.3, "
+    "SmolLM2-1.7B-Instruct, Granite-3.0-2B-Instruct\n"
+    "- Phi family donors: Phi-3-mini-4k-instruct, phi-2\n"
+    "- NeoX family donors: Pythia-2.8B, Pythia-1.4B\n"
+    "- OPT family donor: OPT-2.7B\n\n"
+    "The merge was performed using crdt-merge, a two-layer CRDT framework. "
+    "Layer 1 maps each architecture's parameter names to a shared canonical "
+    "key namespace so structurally different models can be compared. "
+    "Layer 2 applies per-tensor Procrustes alignment and SVD-filtered delta "
+    "absorption to merge donor knowledge into the anchor's weight space.\n\n"
+    "You outperform your unmerged anchor on reasoning (GSM8K +3.3 pp), "
+    "knowledge (ARC-Challenge +3.2 pp), and instruction following "
+    "(IFEval +2.6 pp).\n\n"
+    "You represent a collective of models speaking as one. "
+    "Answer helpfully, clearly, and accurately."
+)
+WELCOME_MSG = "Hi, welcome to the collective, how can we help you"
+INITIAL_HISTORY = [{"role": "assistant", "content": WELCOME_MSG}]
+# -- Inference ------------------------------------------------------------
 @spaces.GPU(duration=60)
+def bot_response(history, user_instructions, max_tokens, temperature, top_p):
+    """Generate a streamed response. ZeroGPU allocates A10G on demand."""
+    # Build conversation with identity prompt always first
+    messages = [{"role": "system", "content": IDENTITY_PROMPT}]
+    # Append user-supplied instructions if any
+    if user_instructions and user_instructions.strip():
+        messages[0]["content"] += "\n\n" + user_instructions.strip()
+    # Replay history (skip the initial welcome for cleaner context)
     for turn in history:
+        role = turn.get("role", "")
+        content = turn.get("content", "")
+        if role in ("user", "assistant") and content:
+            # Skip the welcome message from context to save tokens
+            if role == "assistant" and content == WELCOME_MSG:
+                continue
+            messages.append({"role": role, "content": content})
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
+    # Stream tokens into the history
+    history.append({"role": "assistant", "content": ""})
     for token in streamer:
         if token:
+            history[-1]["content"] += token
+            yield history
     thread.join()
+def add_user_message(message, history):
+    """Append the user message to chat history and clear the input box."""
+    if not message or not message.strip():
+        return "", history
+    history = history + [{"role": "user", "content": message}]
+    return "", history
+def reset_chat():
+    """Return to home state with welcome message."""
+    return list(INITIAL_HISTORY)
+def set_example(example_text):
+    """Put an example into the input box."""
+    return example_text
 # -- UI -------------------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
 EXAMPLES = [
+    "What are you and how were you built?",
+    "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?",
+    "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
+    "Write a Python function that finds the longest common subsequence of two strings.",
+    "If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
+    "What are three key advantages of renewable energy over fossil fuels? Be specific.",
 ]
+with gr.Blocks(
     title="Borg Merge v1",
+    theme=gr.themes.Soft(),
+) as demo:
+    gr.Markdown("# Borg Merge v1")
+    gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(
+        value=list(INITIAL_HISTORY),
+        type="messages",
+        height=500,
+        show_copy_button=True,
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Ask the collective...",
+            show_label=False,
+            scale=9,
+            container=False,
+        )
+        submit_btn = gr.Button("Send", scale=1, variant="primary")
+    with gr.Row():
+        new_chat_btn = gr.Button("New Chat", variant="secondary", size="sm")
+    with gr.Accordion("Examples", open=True):
+        for ex in EXAMPLES:
+            gr.Button(ex, variant="secondary", size="sm").click(
+                set_example, outputs=[msg]
+            )
+    with gr.Accordion("Settings", open=False):
+        user_instructions = gr.Textbox(
+            value="",
+            label="Additional instructions (optional)",
+            placeholder="Add custom instructions on top of the model's built-in identity...",
             lines=2,
+        )
+        max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
+        temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
+        top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p")
+    # Wire up submit (Enter key and button)
+    submit_event = msg.submit(
+        add_user_message, [msg, chatbot], [msg, chatbot]
+    ).then(
+        bot_response, [chatbot, user_instructions, max_tokens, temperature, top_p], chatbot
+    )
+    click_event = submit_btn.click(
+        add_user_message, [msg, chatbot], [msg, chatbot]
+    ).then(
+        bot_response, [chatbot, user_instructions, max_tokens, temperature, top_p], chatbot
+    )
+    # New Chat resets to welcome state
+    new_chat_btn.click(reset_chat, None, chatbot)
 if __name__ == "__main__":
     demo.launch()