Spaces:

Optitransfer
/

borg-merge-v1-chat

Sleeping

App Files Files Community

Optitransfer commited on 23 days ago

Commit

079ddc1

verified ·

1 Parent(s): be58545

Revert to ChatInterface base: add welcome message, rename clear to New Chat

Browse files

Files changed (1) hide show

app.py +50 -107

app.py CHANGED Viewed

@@ -161,50 +161,29 @@ IDENTITY_PROMPT = (
 )
-# -- Welcome state --------------------------------------------------------
-WELCOME = [
-    {"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"},
-]
-EXAMPLE_LIST = [
-    "What are you and how were you built?",
-    "Explain the crdt-merge paper and its technical details",
-    "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?",
-    "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
-    "Write a Python function that finds the longest common subsequence of two strings.",
-    "If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
-    "What are three key advantages of renewable energy over fossil fuels? Be specific.",
-]
-# -- Inference functions (identical logic to proven baseline) --------------
-def add_user_message(message, history):
-    """Append user message to history, clear input box."""
-    if not message or not message.strip():
-        return "", history
-    history = history + [{"role": "user", "content": message.strip()}]
-    return "", history
 @spaces.GPU(duration=60)
-def respond(history, extra_instructions, max_tokens, temperature, top_p):
-    """Generate a streamed response. ZeroGPU allocates A10G for up to 60s."""
-    if not history or history[-1]["role"] != "user":
-        yield history
-        return
-    # Build messages -- identity prompt always first
     system_content = IDENTITY_PROMPT
     if extra_instructions and extra_instructions.strip():
         system_content += "\n\n" + extra_instructions.strip()
     messages = [{"role": "system", "content": system_content}]
-    for msg in history:
-        if msg["role"] in ("user", "assistant") and msg.get("content"):
-            messages.append({"role": msg["role"], "content": msg["content"]})
-    # Tokenize (same approach as proven baseline)
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
@@ -233,25 +212,16 @@ def respond(history, extra_instructions, max_tokens, temperature, top_p):
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    # Stream tokens -- yield full history with growing assistant response
-    history = history + [{"role": "assistant", "content": ""}]
     for token in streamer:
         if token:
-            history[-1] = {
-                "role": "assistant",
-                "content": history[-1]["content"] + token,
-            }
-            yield history
     thread.join()
-def new_chat():
-    """Reset conversation to welcome state."""
-    return [{"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"}], ""
-# -- UI description -------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
@@ -288,72 +258,45 @@ donor models while preserving the anchor's core capabilities.
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
-# -- Build UI with gr.Blocks ---------------------------------------------
-with gr.Blocks(title="Borg Merge v1", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Borg Merge v1")
-    gr.Markdown(DESCRIPTION)
-    chatbot = gr.Chatbot(
         value=list(WELCOME),
-        type="messages",
-        height=500,
         show_copy_button=True,
-    )
-    with gr.Row():
-        msg = gr.Textbox(
-            show_label=False,
-            placeholder="Ask the collective anything...",
-            scale=8,
-            container=False,
-        )
-        send_btn = gr.Button("Send", variant="primary", scale=1)
-    with gr.Row():
-        new_chat_btn = gr.Button("New Chat", variant="secondary", size="sm")
-    with gr.Accordion("Settings", open=False):
-        extra = gr.Textbox(
             value="",
             label="Additional instructions (optional)",
-            placeholder="Custom instructions appended to the built-in identity...",
             lines=2,
-        )
-        max_tokens = gr.Slider(
-            64, 2048, value=512, step=64, label="Max new tokens"
-        )
-        temperature = gr.Slider(
-            0.0, 1.5, value=0.7, step=0.05, label="Temperature"
-        )
-        top_p = gr.Slider(
-            0.0, 1.0, value=0.9, step=0.05, label="Top-p"
-        )
-    gr.Examples(
-        examples=EXAMPLE_LIST,
-        inputs=msg,
-        label="Try these examples",
-    )
-    # -- Wire events -------------------------------------------------------
-    gen_inputs = [chatbot, extra, max_tokens, temperature, top_p]
-    msg.submit(
-        add_user_message, [msg, chatbot], [msg, chatbot]
-    ).then(
-        respond, gen_inputs, chatbot
-    )
-    send_btn.click(
-        add_user_message, [msg, chatbot], [msg, chatbot]
-    ).then(
-        respond, gen_inputs, chatbot
-    )
-    new_chat_btn.click(new_chat, outputs=[chatbot, msg])
 if __name__ == "__main__":
     demo.launch()

 )
+# -- Inference (identical to proven baseline) -----------------------------
 @spaces.GPU(duration=60)
+def chat(message, history, extra_instructions, max_tokens, temperature, top_p):
+    """Generate a response. ZeroGPU allocates A10G for up to 60s."""
+    # Always start with the identity prompt
     system_content = IDENTITY_PROMPT
     if extra_instructions and extra_instructions.strip():
         system_content += "\n\n" + extra_instructions.strip()
     messages = [{"role": "system", "content": system_content}]
+    for turn in history:
+        if isinstance(turn, dict):
+            messages.append(turn)
+        elif isinstance(turn, (list, tuple)) and len(turn) == 2:
+            messages.append({"role": "user", "content": turn[0]})
+            if turn[1]:
+                messages.append({"role": "assistant", "content": turn[1]})
+    messages.append({"role": "user", "content": message})
+    # apply_chat_template -> plain string, then tokenize explicitly
     text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     thread = Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
+    response = ""
     for token in streamer:
         if token:
+            response += token
+            yield response
     thread.join()
+# -- UI -------------------------------------------------------------------
 DESCRIPTION = """\
 **9 models. 4 architecture families. Zero training. One checkpoint.**
 [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
 """
+WELCOME = [
+    {"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"},
+]
+EXAMPLES = [
+    ["What are you and how were you built?"],
+    ["Explain the crdt-merge paper and its technical details"],
+    ["Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?"],
+    ["Explain the difference between supervised and unsupervised learning. Give a real-world example of each."],
+    ["Write a Python function that finds the longest common subsequence of two strings."],
+    ["If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?"],
+    ["What are three key advantages of renewable energy over fossil fuels? Be specific."],
+]
+demo = gr.ChatInterface(
+    fn=chat,
+    title="Borg Merge v1",
+    description=DESCRIPTION,
+    chatbot=gr.Chatbot(
         value=list(WELCOME),
         show_copy_button=True,
+        height=500,
+    ),
+    additional_inputs=[
+        gr.Textbox(
             value="",
             label="Additional instructions (optional)",
+            placeholder="Add custom instructions on top of the built-in identity...",
             lines=2,
+        ),
+        gr.Slider(64, 2048, value=512, step=64, label="Max new tokens"),
+        gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
+        gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
+    ],
+    examples=EXAMPLES,
+    cache_examples=False,
+    type="messages",
+    clear_btn="New Chat",
+)
 if __name__ == "__main__":
     demo.launch()