Spaces:

Rishi2455
/

Assistant-End-Turn

Sleeping

App Files Files Community

Rishi2455 commited on about 1 month ago

Commit

1ef6c6d

verified ·

1 Parent(s): 72915e4

Upload 3 files

Browse files

Files changed (3) hide show

README.md +36 -0
app.py +193 -0
requirements.txt +5 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+title: Assistant End-Turn Detector
+emoji: 🗣️
+colorFrom: blue
+colorTo: pink
+sdk: gradio
+sdk_version: 6.13.0
+app_file: app.py
+pinned: false
+---
+# 🗣️ Assistant End-Turn Detector
+This Hugging Face Space hosts a **Sequence Classification** model designed to detect when a user wants an AI assistant to **STOP** or **CONTINUE** speaking during a real-time conversation.
+## 🚀 Overview
+- **Architecture:** `LlamaForSequenceClassification`
+- **Base Model:** [SmolLM2-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct)
+- **Task:** Binary classification (0: CONTINUE, 1: STOP)
+- **Use Case:** Real-time turn-taking and interruption handling for voice/chat bots.
+## 🛠️ Implementation
+The model was fine-tuned on custom dialogue datasets where users either provide back-channeling (encouraging continuation) or interruptions (asking questions, changing topics).
+### How to use locally
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+2. Run the application:
+   ```bash
+   python app.py
+   ```

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import os
+import time
+# --- Configuration ---
+# 1. Update this with your actual Hugging Face Repository ID
+MODEL_ID = "Rishi2455/Assistant-End-Turn"
+# 2. Map of predicted IDs to human-readable labels
+LABEL_MAP = {0: "CONTINUE ✅", 1: "STOP 🛑"}
+# --- Model Loading ---
+def load_model():
+    print(f"🚀 Loading model: {MODEL_ID}")
+    try:
+        # Priority 1: Check if model files are in the same directory (Space upload)
+        if os.path.exists("./config.json"):
+            path = "./"
+        # Priority 2: Check standard local path
+        elif os.path.exists("./models/ETDv8"):
+            path = "./models/ETDv8"
+        # Priority 3: Fetch from Hugging Face Hub
+        else:
+            path = MODEL_ID
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        # Determine device and torch dtype
+        if torch.cuda.is_available():
+            device = "cuda"
+            dtype = torch.bfloat16
+        elif torch.backends.mps.is_available():
+            device = "mps"
+            dtype = torch.float16
+        else:
+            device = "cpu"
+            dtype = torch.float32
+        model = AutoModelForSequenceClassification.from_pretrained(
+            path, torch_dtype=dtype
+        ).to(device)
+        model.eval()
+        return model, tokenizer, device, None
+    except Exception as e:
+        return None, None, "cpu", str(e)
+# Global model data for lazy loading inside Gradio
+model_data = {"model": None, "tokenizer": None, "device": "cpu", "error": None}
+def get_model():
+    if model_data["model"] is None:
+        model_data["model"], model_data["tokenizer"], model_data["device"], model_data["error"] = load_model()
+    return model_data["model"], model_data["tokenizer"], model_data["device"], model_data["error"]
+# --- Inference Logic ---
+def detect_turn_end(history):
+    if not history or history[-1]["role"] != "user":
+        return "<div style='color: #64748b; text-align: center; padding: 20px;'>Last message should be from user</div>"
+    model, tokenizer, device, error = get_model()
+    if model is None:
+        return f"<div style='color: #ef4444; padding: 10px; border: 1px solid #ef4444; border-radius: 5px;'><b>❌ Model Error:</b> {error if error else 'Unknown error'}</div>"
+    # 1. Prepare Dialogue History
+    dialogue = history.copy()
+    if not any(m["role"] == "system" for m in dialogue):
+        dialogue.insert(0, {"role": "system", "content": "You are a helpful AI assistant named SmolLM, trained by Hugging Face"})
+    # 2. Apply Chat Template
+    try:
+        input_text = tokenizer.apply_chat_template(dialogue, tokenize=False, add_generation_prompt=False)
+    except:
+        # Manual fallback if template missing
+        input_text = "".join([f"<|im_start|>{m['role']}\n{m['content']}<|im_end|>\n" for m in dialogue])
+    # 3. Predict
+    tokens = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=1024).to(device)
+    start_time = time.time()
+    with torch.no_grad():
+        outputs = model(**tokens)
+    latency = (time.time() - start_time) * 1000
+    # Get highest confidence prediction
+    probs = torch.softmax(outputs.logits, dim=-1).squeeze()
+    pred_idx = torch.argmax(probs).item()
+    confidence = probs[pred_idx].item()
+    label = LABEL_MAP.get(pred_idx, "UNKNOWN")
+    color = "#10b981" if pred_idx == 0 else "#ef4444"
+    bg_color = "rgba(16, 185, 129, 0.15)" if pred_idx == 0 else "rgba(239, 68, 68, 0.15)"
+    result_html = f"""
+    <div style="padding: 24px; border-radius: 12px; background-color: {bg_color}; border: 2px solid {color}; backdrop-filter: blur(8px);">
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+            <h1 style="margin: 0; color: white; font-size: 2em; letter-spacing: 1px;">{label}</h1>
+            <div style="text-align: right;">
+                <p style="margin: 0; color: #94a3b8; font-size: 0.9em;">CONFIDENCE</p>
+                <b style="color: {color}; font-size: 1.4em;">{confidence:.2%}</b>
+            </div>
+        </div>
+        <div style="margin-top: 15px; padding-top: 15px; border-top: 1px solid rgba(255,255,255,0.1); display: flex; gap: 20px;">
+            <p style="margin: 0; color: #cbd5e1; font-size: 0.85em;">Latency: <b>{latency:.1f}ms</b></p>
+            <p style="margin: 0; color: #cbd5e1; font-size: 0.85em;">Device: <b>{device.upper()}</b></p>
+        </div>
+    </div>
+    """
+    return result_html
+# --- UI Layout ---
+with gr.Blocks(theme=gr.themes.Default(primary_hue="indigo", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"])) as demo:
+    with gr.Column(elem_id="container"):
+        gr.Markdown("# 🤖 AI Turn-End Detector")
+        gr.Markdown("""
+        Predict if the AI assistant should **STOP** or **CONTINUE** speaking based on the latest user interaction.
+        Built with SmolLM2-135M and Fine-tuned for real-time turn detection.
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                chat = gr.Chatbot(type="messages", label="Dialogue Stream", height=450, bubble_full_width=False, show_label=False)
+                with gr.Row():
+                    txt = gr.Textbox(
+                        label="User Input",
+                        placeholder="Type a message or an interruption...",
+                        scale=9,
+                        container=False
+                    )
+                    btn = gr.Button("🔮 Predict", variant="primary", scale=1)
+                with gr.Row():
+                    clear = gr.Button("🗑️ Clear Context")
+                    undo = gr.Button("🔙 Undo Last")
+            with gr.Column(scale=1):
+                gr.Markdown("### 🔍 Model Decision")
+                status_box = gr.HTML("<div style='height: 150px; display: flex; align-items: center; justify-content: center; border: 2px dashed #334155; border-radius: 12px; color: #64748b; text-align: center;'>Send a message to see the model's prediction</div>")
+                with gr.Accordion("Technical Details", open=True):
+                    gr.Markdown(f"""
+                    - **Architecture:** Llama-based Sequence Classification
+                    - **Base Model:** SmolLM2-135M-Instruct
+                    - **Target:** Real-time Interruption Detection
+                    - **HF Repo:** `{MODEL_ID}`
+                    """)
+                gr.Examples(
+                    examples=[
+                        ["Can you please...", "User stops mid-sentence (interruption)"],
+                        ["Yes, tell me more.", "Positive feedback (continue)"],
+                        ["Wait, I didn't get that part.", "Question (stop)"],
+                        ["Okay.", "Short affirmative (stop)"]
+                    ],
+                    inputs=[txt]
+                )
+    # Logic
+    def user_action(message, history):
+        if not message.strip():
+            return "", history
+        history.append({"role": "user", "content": message})
+        return "", history
+    def perform_inference(history):
+        return detect_turn_end(history)
+    # Trigger Chain
+    txt.submit(user_action, [txt, chat], [txt, chat]).then(
+        perform_inference, [chat], [status_box]
+    )
+    btn.click(user_action, [txt, chat], [txt, chat]).then(
+        perform_inference, [chat], [status_box]
+    )
+    clear.click(lambda: ([], "<div style='height: 150px; display: flex; align-items: center; justify-content: center; border: 2px dashed #334155; border-radius: 12px; color: #64748b; text-align: center;'>History Cleared</div>"), None, [chat, status_box])
+    undo.click(lambda h: h[:-1] if h else [], [chat], [chat]).then(
+        perform_inference, [chat], [status_box]
+    )
+# Custom Premium Styling
+demo.css = """
+body { background-color: #0f172a !important; color: #f8fafc !important; }
+#container { max-width: 1100px; margin: auto; padding: 20px; }
+.gr-chatbot { border-radius: 12px !important; border: 1px solid #1e293b !important; background-color: #1e293b !important; }
+.message-row { transition: all 0.2s ease-in-out; }
+.message-row:hover { transform: scale(1.01); }
+footer { display: none !important; }
+"""
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+torch
+gradio
+accelerate
+numpy