Spaces:

SupraLabs
/

Supra-50M-Instruct

Running

App Files Files Community

AxionLab-official commited on 2 days ago

Commit

de13e30

verified ·

1 Parent(s): 560025e

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -51

app.py CHANGED Viewed

@@ -13,7 +13,6 @@ hf_logging.set_verbosity_error()
 # ── Config ────────────────────────────────────────────────────────────────────
 MODEL_ID = "SupraLabs/Supra-50M-Instruct"
-DEVICE = "cpu"
 # ── Load model ────────────────────────────────────────────────────────────────
@@ -27,56 +26,66 @@ print("[+] Model ready.")
 # ── Prompt builder (Alpaca format) ────────────────────────────────────────────
-def build_prompt(history: list[dict], system: str) -> str:
-    """Convert chat history into the Alpaca instruct format the model expects."""
     parts = []
     if system.strip():
         parts.append(
             "Below is an instruction that describes a task. "
             "Write a response that appropriately completes the request.\n\n"
-            f"### Instruction:\n{system}\n\n### Response:\nUnderstood.\n"
         )
     for msg in history:
-        role, content = msg["role"], msg["content"]
         if role == "user":
             parts.append(
                 "Below is an instruction that describes a task. "
                 "Write a response that appropriately completes the request.\n\n"
                 f"### Instruction:\n{content}\n\n### Response:\n"
             )
-        elif role == "assistant":
-            parts.append(content + "\n")
     return "".join(parts)
-# ── Generation ────────────────────────────────────────────────────────────────
-def generate_response(
     message: str,
-    history: list[dict],
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
     repetition_penalty: float,
 ) -> str:
-    history = history + [{"role": "user", "content": message}]
-    prompt = build_prompt(history, system_prompt)
-    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         output_ids = model.generate(
             **inputs,
-            max_new_tokens=max_new_tokens,
-            do_sample=temperature > 0,
-            temperature=temperature if temperature > 0 else 1.0,
-            top_p=top_p,
             top_k=50,
-            repetition_penalty=repetition_penalty,
             pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
@@ -87,33 +96,22 @@ def generate_response(
 # ── UI ────────────────────────────────────────────────────────────────────────
-DESCRIPTION = """
-<div style="text-align:center; padding: 8px 0 4px;">
-  <h1 style="font-size:2rem; margin:0;">🦅 Supra-50M Instruct</h1>
-  <p style="color:#888; margin:4px 0 0;">50M-parameter chat model by <a href="https://huggingface.co/SupraLabs" target="_blank">SupraLabs</a> — running on CPU</p>
-</div>
-"""
-with gr.Blocks(title="Supra-50M Instruct", theme=gr.themes.Soft()) as demo:
-    gr.HTML(DESCRIPTION)
     with gr.Row():
         with gr.Column(scale=3):
-            chatbot = gr.Chatbot(
-                label="Chat",
-                type="messages",
-                height=480,
-                show_copy_button=True,
             )
-            with gr.Row():
-                msg_box = gr.Textbox(
-                    placeholder="Type your message…",
-                    show_label=False,
-                    scale=5,
-                    lines=1,
-                    max_lines=4,
-                    submit_btn=True,
-                )
         with gr.Column(scale=1, min_width=220):
             gr.Markdown("### ⚙️ Parameters")
@@ -138,15 +136,13 @@ with gr.Blocks(title="Supra-50M Instruct", theme=gr.themes.Soft()) as demo:
     # ── State & wiring ────────────────────────────────────────────────────────
-    chat_history = gr.State([])
     def on_submit(message, history, system, max_tok, temp, top_p_val, rep_pen):
         if not message.strip():
             return history, history, ""
-        response = generate_response(
-            message, history, system, max_tok, temp, top_p_val, rep_pen
-        )
         history = history + [
             {"role": "user", "content": message},
@@ -156,19 +152,19 @@ with gr.Blocks(title="Supra-50M Instruct", theme=gr.themes.Soft()) as demo:
     msg_box.submit(
         fn=on_submit,
-        inputs=[msg_box, chat_history, system_prompt, max_new_tokens, temperature, top_p, repetition_penalty],
-        outputs=[chatbot, chat_history, msg_box],
     )
     clear_btn.click(
         fn=lambda: ([], [], ""),
-        outputs=[chatbot, chat_history, msg_box],
     )
     gr.Markdown(
-        "<p style='text-align:center; color:#aaa; font-size:0.8rem; margin-top:12px;'>"
-        "Model: <a href='https://huggingface.co/SupraLabs/Supra-50M-Instruct' target='_blank'>SupraLabs/Supra-50M-Instruct</a> — "
-        "Apache 2.0 License — © SupraLabs 2026</p>"
     )

 # ── Config ────────────────────────────────────────────────────────────────────
 MODEL_ID = "SupraLabs/Supra-50M-Instruct"
 # ── Load model ────────────────────────────────────────────────────────────────
 # ── Prompt builder (Alpaca format) ────────────────────────────────────────────
+def build_prompt(history: list, system: str, new_message: str) -> str:
+    """Convert chat history + new message into Alpaca instruct format."""
     parts = []
     if system.strip():
         parts.append(
             "Below is an instruction that describes a task. "
             "Write a response that appropriately completes the request.\n\n"
+            f"### Instruction:\n{system}\n\n### Response:\nUnderstood.\n\n"
         )
+    # history is list of {"role": ..., "content": ...} dicts (Gradio 6 format)
     for msg in history:
+        role = msg["role"] if isinstance(msg, dict) else msg[0]
+        content = msg["content"] if isinstance(msg, dict) else msg[1]
         if role == "user":
             parts.append(
                 "Below is an instruction that describes a task. "
                 "Write a response that appropriately completes the request.\n\n"
                 f"### Instruction:\n{content}\n\n### Response:\n"
             )
+        elif role == "assistant" and content:
+            parts.append(content + "\n\n")
+    # Add new user message
+    parts.append(
+        "Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request.\n\n"
+        f"### Instruction:\n{new_message}\n\n### Response:\n"
+    )
     return "".join(parts)
+# ── Generate ──────────────────────────────────────────────────────────────────
+def chat(
     message: str,
+    history: list,
     system_prompt: str,
     max_new_tokens: int,
     temperature: float,
     top_p: float,
     repetition_penalty: float,
 ) -> str:
+    if not message.strip():
+        return ""
+    prompt = build_prompt(history, system_prompt, message)
+    inputs = tokenizer(prompt, return_tensors="pt")
     with torch.no_grad():
         output_ids = model.generate(
             **inputs,
+            max_new_tokens=int(max_new_tokens),
+            do_sample=temperature > 0.01,
+            temperature=float(temperature),
+            top_p=float(top_p),
             top_k=50,
+            repetition_penalty=float(repetition_penalty),
             pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
 # ── UI ────────────────────────────────────────────────────────────────────────
+with gr.Blocks(title="Supra-50M Instruct") as demo:
+    gr.Markdown(
+        "# 🦅 Supra-50M Instruct\n"
+        "50M-parameter chat model by [SupraLabs](https://huggingface.co/SupraLabs) — running on CPU"
+    )
     with gr.Row():
         with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Chat", height=480)
+            msg_box = gr.Textbox(
+                placeholder="Type your message and press Enter…",
+                show_label=False,
+                lines=1,
+                max_lines=4,
+                submit_btn=True,
             )
         with gr.Column(scale=1, min_width=220):
             gr.Markdown("### ⚙️ Parameters")
     # ── State & wiring ────────────────────────────────────────────────────────
+    chat_state = gr.State([])
     def on_submit(message, history, system, max_tok, temp, top_p_val, rep_pen):
         if not message.strip():
             return history, history, ""
+        response = chat(message, history, system, max_tok, temp, top_p_val, rep_pen)
         history = history + [
             {"role": "user", "content": message},
     msg_box.submit(
         fn=on_submit,
+        inputs=[msg_box, chat_state, system_prompt, max_new_tokens, temperature, top_p, repetition_penalty],
+        outputs=[chatbot, chat_state, msg_box],
     )
     clear_btn.click(
         fn=lambda: ([], [], ""),
+        outputs=[chatbot, chat_state, msg_box],
     )
     gr.Markdown(
+        "<p style='text-align:center; color:#aaa; font-size:0.8rem; margin-top:8px;'>"
+        "Model: <a href='https://huggingface.co/SupraLabs/Supra-50M-Instruct' target='_blank'>"
+        "SupraLabs/Supra-50M-Instruct</a> — Apache 2.0 — © SupraLabs 2026</p>"
     )