Spaces:

Optitransfer
/

borg-merge-v1-chat

Sleeping

App Files Files Community

Optitransfer commited on 23 days ago

Commit

be58545

verified ·

1 Parent(s): 467f9c1

Switch to gr.Blocks: welcome message, New Chat button, examples

Browse files

Files changed (1) hide show

app.py +113 -43

app.py CHANGED Viewed

@@ -156,31 +156,55 @@ IDENTITY_PROMPT = (
     "Model card: https://huggingface.co/Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1\n"
     "Paper: https://ssrn.com/abstract=6545518\n"
     "crdt-merge: https://github.com/mgillr/crdt-merge\n"
-    "Write-up: https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252"
 )
 @spaces.GPU(duration=60)
-def chat(message, history, extra_instructions, max_tokens, temperature, top_p):
-    """Generate a response. ZeroGPU allocates A10G for up to 60s."""
-    # Always start with the identity prompt
     system_content = IDENTITY_PROMPT
     if extra_instructions and extra_instructions.strip():
         system_content += "\n\n" + extra_instructions.strip()
     messages = [{"role": "system", "content": system_content}]
-    for turn in history:
-        if isinstance(turn, dict):
-            messages.append(turn)
-        elif isinstance(turn, (list, tuple)) and len(turn) == 2:
-            messages.append({"role": "user", "content": turn[0]})
-            if turn[1]:
-                messages.append({"role": "assistant", "content": turn[1]})
-    messages.append({"role": "user", "content": message})
-    # apply_chat_template -> plain string, then tokenize explicitly
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
@@ -209,16 +233,25 @@ def chat(message, history, extra_instructions, max_tokens, temperature, top_p):
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    response = ""
     for token in streamer:
         if token:
-            response += token
-            yield response
     thread.join()
-# -- UI -------------------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
@@ -255,35 +288,72 @@ donor models while preserving the anchor's core capabilities.
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
-EXAMPLES = [
-    ["What are you and how were you built?"],
-    ["Explain the crdt-merge paper and its technical details"],
-    ["Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?"],
-    ["Explain the difference between supervised and unsupervised learning. Give a real-world example of each."],
-    ["Write a Python function that finds the longest common subsequence of two strings."],
-    ["If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?"],
-    ["What are three key advantages of renewable energy over fossil fuels? Be specific."],
-]
-demo = gr.ChatInterface(
-    fn=chat,
-    title="Borg Merge v1",
-    description=DESCRIPTION,
-    additional_inputs=[
-        gr.Textbox(
             value="",
             label="Additional instructions (optional)",
-            placeholder="Add custom instructions on top of the built-in identity...",
             lines=2,
-        ),
-        gr.Slider(64, 2048, value=512, step=64, label="Max new tokens"),
-        gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
-        gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
-    ],
-    examples=EXAMPLES,
-    cache_examples=False,
-    type="messages",
-)
 if __name__ == "__main__":
     demo.launch()

     "Model card: https://huggingface.co/Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1\n"
     "Paper: https://ssrn.com/abstract=6545518\n"
     "crdt-merge: https://github.com/mgillr/crdt-merge\n"
+    "Write-up: https://medium.com/@rgillespie83/we-merged-9-models-from-4-"
+    "architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252"
 )
+# -- Welcome state --------------------------------------------------------
+WELCOME = [
+    {"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"},
+]
+EXAMPLE_LIST = [
+    "What are you and how were you built?",
+    "Explain the crdt-merge paper and its technical details",
+    "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?",
+    "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
+    "Write a Python function that finds the longest common subsequence of two strings.",
+    "If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
+    "What are three key advantages of renewable energy over fossil fuels? Be specific.",
+]
+# -- Inference functions (identical logic to proven baseline) --------------
+def add_user_message(message, history):
+    """Append user message to history, clear input box."""
+    if not message or not message.strip():
+        return "", history
+    history = history + [{"role": "user", "content": message.strip()}]
+    return "", history
 @spaces.GPU(duration=60)
+def respond(history, extra_instructions, max_tokens, temperature, top_p):
+    """Generate a streamed response. ZeroGPU allocates A10G for up to 60s."""
+    if not history or history[-1]["role"] != "user":
+        yield history
+        return
+    # Build messages -- identity prompt always first
     system_content = IDENTITY_PROMPT
     if extra_instructions and extra_instructions.strip():
         system_content += "\n\n" + extra_instructions.strip()
     messages = [{"role": "system", "content": system_content}]
+    for msg in history:
+        if msg["role"] in ("user", "assistant") and msg.get("content"):
+            messages.append({"role": msg["role"], "content": msg["content"]})
+    # Tokenize (same approach as proven baseline)
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
+    # Stream tokens -- yield full history with growing assistant response
+    history = history + [{"role": "assistant", "content": ""}]
     for token in streamer:
         if token:
+            history[-1] = {
+                "role": "assistant",
+                "content": history[-1]["content"] + token,
+            }
+            yield history
     thread.join()
+def new_chat():
+    """Reset conversation to welcome state."""
+    return [{"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"}], ""
+# -- UI description -------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
+# -- Build UI with gr.Blocks ---------------------------------------------
+with gr.Blocks(title="Borg Merge v1", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Borg Merge v1")
+    gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(
+        value=list(WELCOME),
+        type="messages",
+        height=500,
+        show_copy_button=True,
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            show_label=False,
+            placeholder="Ask the collective anything...",
+            scale=8,
+            container=False,
+        )
+        send_btn = gr.Button("Send", variant="primary", scale=1)
+    with gr.Row():
+        new_chat_btn = gr.Button("New Chat", variant="secondary", size="sm")
+    with gr.Accordion("Settings", open=False):
+        extra = gr.Textbox(
             value="",
             label="Additional instructions (optional)",
+            placeholder="Custom instructions appended to the built-in identity...",
             lines=2,
+        )
+        max_tokens = gr.Slider(
+            64, 2048, value=512, step=64, label="Max new tokens"
+        )
+        temperature = gr.Slider(
+            0.0, 1.5, value=0.7, step=0.05, label="Temperature"
+        )
+        top_p = gr.Slider(
+            0.0, 1.0, value=0.9, step=0.05, label="Top-p"
+        )
+    gr.Examples(
+        examples=EXAMPLE_LIST,
+        inputs=msg,
+        label="Try these examples",
+    )
+    # -- Wire events -------------------------------------------------------
+    gen_inputs = [chatbot, extra, max_tokens, temperature, top_p]
+    msg.submit(
+        add_user_message, [msg, chatbot], [msg, chatbot]
+    ).then(
+        respond, gen_inputs, chatbot
+    )
+    send_btn.click(
+        add_user_message, [msg, chatbot], [msg, chatbot]
+    ).then(
+        respond, gen_inputs, chatbot
+    )
+    new_chat_btn.click(new_chat, outputs=[chatbot, msg])
 if __name__ == "__main__":
     demo.launch()