fluent communication part :: rakib

Browse files

Files changed (6) hide show

.env +2 -0
app.py +8 -27
core/backend.py +272 -251
frontend/index.html +1 -0
frontend/script.js +97 -102
services/streaming.py +178 -113

.env CHANGED Viewed

@@ -5,6 +5,8 @@ LANGCHAIN_ENDPOINT='https://api.smith.langchain.com'
 LANGCHAIN_API_KEY='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'
 LANGCHAIN_PROJECT='Default'
 # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
 # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
 # TWILIO_PHONE_NUMBER="+14343375085"

 LANGCHAIN_API_KEY='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'
 LANGCHAIN_PROJECT='Default'
+GOOGLE_API_KEY="AIzaSyA9sqz4YKQHKXR9TU1imw0DPOghzHOMiBo"
 # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
 # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
 # TWILIO_PHONE_NUMBER="+14343375085"

app.py CHANGED Viewed

@@ -1,19 +1,3 @@
-"""
-app.py — FastAPI entry point
-Fixes applied
-─────────────
-1. STT is now fully async (stt.transcribe is a coroutine) — no more
-   asyncio.to_thread wrapper needed in the WS handler.
-2. BARGE-IN: when the client sends a new audio blob while TTS is still
-   playing, the running tts_streamer is cancelled before starting a new
-   turn.  The client enforces isProcessing so this should be rare, but
-   the server now handles it gracefully.
-3. Per-session cancel token stored in `_active_streamer` so any new
-   utterance from the same WS cleanly aborts the previous one.
-4. All other logic (ping/pong, safe send helpers, chat WS) is unchanged.
-"""
 import asyncio
 import json
 import os
@@ -55,7 +39,6 @@ async def root():
     return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
-# ── Helpers ────────────────────────────────────────────────────────────────────
 def _ws_open(ws: WebSocket) -> bool:
     return ws.client_state == WebSocketState.CONNECTED
@@ -80,7 +63,6 @@ async def _safe_bytes(ws: WebSocket, data: bytes) -> bool:
         return False
-# ── Text chat WebSocket ────────────────────────────────────────────────────────
 @app.websocket("/ws/chat")
 async def ws_chat(ws: WebSocket):
     await ws.accept()
@@ -118,7 +100,6 @@ async def ws_chat(ws: WebSocket):
             print(f"[CHAT] WS error: {exc}")
-# ── Voice WebSocket ────────────────────────────────────────────────────────────
 @app.websocket("/ws/voice")
 async def ws_voice(ws: WebSocket):
     await ws.accept()
@@ -126,7 +107,7 @@ async def ws_voice(ws: WebSocket):
     stt             = STTProcessor()
     user_id         = "voice_user"
-    _active_streamer: ParallelTTSStreamer | None = None   # barge-in handle
     try:
         while True:
@@ -146,18 +127,18 @@ async def ws_voice(ws: WebSocket):
                     print(f"[VOICE] Receive error: {exc}")
                 break
-            # ── Audio blob from client VAD ──────────────────────────────────
             if "bytes" in data and data["bytes"]:
                 audio_bytes = data["bytes"]
                 print(f"[VOICE] Received utterance: {len(audio_bytes):,} bytes")
-                # ── Barge-in: cancel any running TTS turn ───────────────────
                 if _active_streamer is not None:
                     print("[VOICE] Barge-in — cancelling previous TTS.")
                     await _active_streamer.cancel()
                     _active_streamer = None
-                # 1. STT — now a native coroutine (GPU semaphore inside)
                 transcript = await stt.transcribe(audio_bytes)
                 if not transcript:
@@ -172,7 +153,7 @@ async def ws_voice(ws: WebSocket):
                 if not await _safe_text(ws, {"type": "stt", "text": transcript}):
                     break
-                # 2. AI + TTS pipeline
                 tts_streamer    = ParallelTTSStreamer()
                 _active_streamer = tts_streamer
@@ -198,17 +179,17 @@ async def ws_voice(ws: WebSocket):
                 await asyncio.gather(run_ai_and_tts(), stream_tts_audio())
                 _active_streamer = None
-                # Signal end-of-turn → client resumes VAD
                 await _safe_text(ws, {"type": "end"})
-            # ── Control messages ────────────────────────────────────────────
             elif "text" in data and data["text"]:
                 try:
                     msg = json.loads(data["text"])
                     if msg.get("type") == "ping":
                         await _safe_text(ws, {"type": "pong"})
-                    # Client can send {"type":"cancel"} to abort TTS mid-turn
                     elif msg.get("type") == "cancel":
                         if _active_streamer is not None:
                             print("[VOICE] Client cancel signal received.")

 import asyncio
 import json
 import os
     return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
 def _ws_open(ws: WebSocket) -> bool:
     return ws.client_state == WebSocketState.CONNECTED
         return False
 @app.websocket("/ws/chat")
 async def ws_chat(ws: WebSocket):
     await ws.accept()
             print(f"[CHAT] WS error: {exc}")
 @app.websocket("/ws/voice")
 async def ws_voice(ws: WebSocket):
     await ws.accept()
     stt             = STTProcessor()
     user_id         = "voice_user"
+    _active_streamer: ParallelTTSStreamer | None = None
     try:
         while True:
                     print(f"[VOICE] Receive error: {exc}")
                 break
             if "bytes" in data and data["bytes"]:
                 audio_bytes = data["bytes"]
                 print(f"[VOICE] Received utterance: {len(audio_bytes):,} bytes")
                 if _active_streamer is not None:
                     print("[VOICE] Barge-in — cancelling previous TTS.")
                     await _active_streamer.cancel()
                     _active_streamer = None
                 transcript = await stt.transcribe(audio_bytes)
                 if not transcript:
                 if not await _safe_text(ws, {"type": "stt", "text": transcript}):
                     break
                 tts_streamer    = ParallelTTSStreamer()
                 _active_streamer = tts_streamer
                 await asyncio.gather(run_ai_and_tts(), stream_tts_audio())
                 _active_streamer = None
                 await _safe_text(ws, {"type": "end"})
             elif "text" in data and data["text"]:
                 try:
                     msg = json.loads(data["text"])
                     if msg.get("type") == "ping":
                         await _safe_text(ws, {"type": "pong"})
                     elif msg.get("type") == "cancel":
                         if _active_streamer is not None:
                             print("[VOICE] Client cancel signal received.")

core/backend.py CHANGED Viewed

@@ -1,36 +1,43 @@
-from langgraph.graph import StateGraph, START, END
-from typing import TypedDict, Annotated
-from langchain_core.messages import BaseMessage
-from langgraph.graph.message import add_messages
 from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
-from langchain_ollama import ChatOllama
 from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_community.tools import DuckDuckGoSearchRun
-from langchain_core.tools import tool
-from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, RemoveMessage, SystemMessage
-import aiosqlite, uuid, os, httpx, asyncio
 from twilio.rest import Client
-from dotenv import load_dotenv
-import json, pytz
-from datetime import datetime
-######################### STATE #########################
 class ChatState(TypedDict):
-    messages: Annotated[list[BaseMessage], add_messages]
     summary: str
-######################### TOOLS #########################
-# After imports, before STATE class
-def get_db_path():
     return os.path.join(os.path.dirname(__file__), "daa.db")
-def send_sms(to_number: str, message: str):
-    client = Client(os.getenv("TWILIO_ACCOUNT_SID"), os.getenv("TWILIO_AUTH_TOKEN"))
-    client.messages.create(
-        body=message,
-        from_=os.getenv("TWILIO_PHONE_NUMBER"),
-        to=to_number
-    )
 def format_bd_number(num: str) -> str:
     num = num.strip().replace(" ", "")
@@ -38,36 +45,50 @@ def format_bd_number(num: str) -> str:
         return "+88" + num
     if num.startswith("8801"):
         return "+" + num
-    return num  # already formatted or unknown
 @tool
 def get_bd_time() -> str:
-    """
-    Get current Bangladesh time (Asia/Dhaka) with weekday name
-    """
-    tz = pytz.timezone("Asia/Dhaka")
     now = datetime.now(tz)
     return now.strftime("%Y-%m-%d %H:%M:%S (%A, Bangladesh Time)")
 @tool
-async def search_doctor(name: str = "", category: str = "", visiting_days: str = "") -> str:
     """
-    Search doctors by name, category, or visiting_days from SQLite database.
-    Any combination of filters is supported (OR logic for each field).
     """
-    db_path = get_db_path()
-    query = "SELECT * FROM doctors WHERE 1=1"
-    params = []
-    conditions = []
     if name:
         conditions.append("LOWER(doctor_name) LIKE ?")
         params.append(f"%{name.lower()}%")
     if category:
         conditions.append("LOWER(category) LIKE ?")
         params.append(f"%{category.lower()}%")
     if visiting_days:
         conditions.append("LOWER(visiting_days) LIKE ?")
         params.append(f"%{visiting_days.lower()}%")
@@ -78,119 +99,89 @@ async def search_doctor(name: str = "", category: str = "", visiting_days: str =
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute(query, params)
-        rows = await cursor.fetchall()
     if not rows:
-        return json.dumps({
-            "success": False,
-            "message": "No doctors found matching your search.",
-            "data": []
-        })
-    return json.dumps({
-        "success": True,
-        "count": len(rows),
-        "data": [dict(r) for r in rows]
-    })
 @tool
 async def search_appointment_by_phone(patient_num: str) -> str:
-    """
-    Search all appointments using patient phone number.
-    """
-    db_path = get_db_path()
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
-        cursor = await db.execute("""
-            SELECT * FROM patients
-            WHERE patient_num = ?
-            ORDER BY visiting_date ASC
-        """, (patient_num,))
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No appointments found for this phone number.",
-            "data": []
         })
-    return json.dumps({
-        "success": True,
-        "count": len(rows),
-        "data": [dict(r) for r in rows]
-    })
 @tool
-async def book_appointment(doctor_id: int, patient_name: str, patient_age: str, patient_num: str, visiting_date: str) -> str:
     """
     Book a doctor appointment and save it to the patients table.
     Args:
-        doctor_id: Doctor's ID from search_doctor results.
-        patient_name: Full name of the patient.
-        patient_age: Age of the patient (e.g. "32").
-        patient_num: Contact phone number of the patient.
         visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
-    Returns a booking confirmation with the new record ID.
     """
-    db_path = get_db_path()
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
-        patient_num = format_bd_number(patient_num)
-        # Verify doctor exists
         cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
         doctor = await cursor.fetchone()
         if not doctor:
             return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
-        doctor_data = dict(doctor)
-        doctor_name = doctor_data.get("doctor_name", "Unknown")
-        doctor_category = doctor_data.get("doctor_category", "Unknown")
-        # Check for conflicting booking (same doctor + same date)
         cursor = await db.execute(
             """SELECT id FROM patients
-            WHERE doctor_name = ? AND visiting_date = ? AND patient_num = ?""",
             (doctor_name, visiting_date, patient_num),
         )
-        conflict = await cursor.fetchone()
-        if conflict:
             return (
                 f"A booking for {patient_name} with Dr. {doctor_name} "
                 f"on {visiting_date} already exists."
             )
-        # Insert into patients table
-        cursor = await db.execute(
-            """INSERT INTO patients (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
-            VALUES (?, ?, ?, ?, ?, ?)""",
             (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
         )
         await db.commit()
-    # Send SMS confirmation
-    sms_message = (
-        f"✅ Appointment Confirmed!\n"
-        f"Doctor     : {doctor_name}\n"
-        f"Patient    : {patient_name}\n"
-        f"Visit Date : {visiting_date}\n"
-        f"Please arrive 10 minutes early."
-    )
-    # try:
-    #     send_sms(to_number=patient_num, message=sms_message)
-    #     sms_status = "📱 SMS confirmation sent."
-    # except Exception as e:
-    #     sms_status = f"⚠️ SMS failed: {str(e)}"
     return (
         f"✅ Appointment Booked!\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
@@ -201,182 +192,203 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
         f"Contact      : {patient_num}\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
         f"Please arrive 10 minutes early."
-        # f"{sms_status}"
     )
 async def delete_appointment(patient_num: str, doctor_name: str) -> str:
-    """
-    Delete an appointment using patient phone number and doctor name.
-    """
-    db_path = get_db_path()
-    # normalize phone number
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
-        # check if appointment exists first
-        cursor = await db.execute("""
-            SELECT * FROM patients
-            WHERE patient_num = ?
-            AND LOWER(doctor_name) = LOWER(?)
-        """, (patient_num, doctor_name))
-        row = await cursor.fetchone()
-        if not row:
-            return json.dumps({
-                "success": False,
-                "message": "No matching appointment found to delete."
-            })
-        # delete appointment
-        await db.execute("""
-            DELETE FROM patients
-            WHERE patient_num = ?
-            AND LOWER(doctor_name) = LOWER(?)
-        """, (patient_num, doctor_name))
         await db.commit()
     return json.dumps({
         "success": True,
-        "message": f"Appointment with Dr. {doctor_name} deleted successfully."
     })
-######################### MAIN AGENT CLASS #########################
 class AIBackend:
-    def __init__(self):
         load_dotenv()
-        os.environ["LANGCHAIN_PROJECT"] = "Doctor Appointment Automation"
-        self.llm = ChatOllama(model="gemma4:e4b", streaming=True) # qwen2.5:3b, gemma4:e4b
-        self.tools = [search_doctor, book_appointment, get_bd_time, search_appointment_by_phone, delete_appointment]
-        self.tool_node = ToolNode(self.tools)
         self.llm_with_tools = self.llm.bind_tools(self.tools)
-    async def async_setup(self):
-        db_path = os.path.join(os.path.dirname(__file__), "daa.db")
-        self.conn = await aiosqlite.connect(db_path)
         self.checkpointer = AsyncSqliteSaver(self.conn)
-        await self._create_user_table()
-        self.graph = self._build_graph()
         self.summary_graph = self._build_summary_graph()
-    async def _create_user_table(self):
         await self.conn.execute("""
             CREATE TABLE IF NOT EXISTS userid_threadid (
-                userId  TEXT UNIQUE NOT NULL,
                 threadId TEXT UNIQUE NOT NULL
             )
-            """)
         await self.conn.commit()
-    ######################### SUMMARIZE NODE #########################
     async def summarize_conversation(self, state: ChatState):
-        existing_summary = state.get("summary", "")
         messages = state["messages"]
-        prompt = (
-                f"""
-                You are maintaining a long-term conversation memory for a chatbot.
-                Existing summary:
-                {existing_summary}
-                Update and extend the summary using ONLY the new conversation messages above.
-                Instructions:
-                - Preserve important existing context.
-                - Add new facts, decisions, preferences, goals, issues, and ongoing tasks.
-                - Keep technical details concise but meaningful.
-                - Track unresolved problems or follow-up actions.
-                - Avoid repetition and remove outdated or redundant information when appropriate.
-                - Maintain chronological consistency.
-                - Write the summary in clear bullet points.
-                - Focus on information useful for future conversations and contextual continuity.
-                - Do NOT include casual greetings or temporary small talk unless important.
-                - Keep the summary compact but information-dense.
-                """
-                    if existing_summary
-                    else
-                    """
-                You are creating a long-term conversation memory summary for a chatbot.
-                Summarize the conversation above.
-                Instructions:
-                - Capture important user information, goals, preferences, projects, and decisions.
-                - Include technical issues, debugging progress, and solutions discussed.
-                - Track ongoing tasks or unresolved questions.
-                - Ignore casual greetings and low-value chatter.
-                - Write concise, structured bullet points.
-                - Keep the summary compact but highly informative for future context retention.
-                """
-                )
-        messages_for_summary = messages + [HumanMessage(content=prompt)]
-        response = await self.llm.ainvoke(messages_for_summary)
         return {
             "summary": response.content,
             "messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
         }
-    async def should_summarize(self, state: ChatState):
-        if len(state["messages"]) > 10:
-            return "summarize_node"
-        return "chat_node"
-    ######################### CHAT NODE #########################
     async def chat_node(self, state: ChatState):
-        summary = state.get("summary", "")
         messages = state["messages"]
-        print('#'*50)
         print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
-        if summary:
-            print(f"[SUMMARY]:\n{summary}\n")
         else:
-            print("[NO SUMMARY YET]\n")
-        print('$'*50)
-        print("[MESSAGES]:")
-        for m in messages:
-            role = m.__class__.__name__
-            print(f"  [{role}]: {m.content[:200]}")
-        print('$'*50,'\n')
-        if summary:
-            summary_message = SystemMessage(
-                content=(
-                        "You are a Bangla voice assistant. You are provided with a condensed memory of previous conversations.\n\n"
-                        f"Conversation Memory:\n{summary}\n\n"
-                        "Instructions:\n"
-                        "- Always respond in Bangla (বাংলা)"
-                        "- Keep sentences short for speech"
-                        "- No English unless necessary"
-                        "- Use this memory as long-term conversational context.\n"
-                        "- Maintain continuity with the user's previous discussions, projects, goals, and preferences.\n"
-                        "- Prioritize recent and relevant information when generating responses.\n"
-                        "- Do not repeat the summary unless necessary.\n"
-                        "- If new information conflicts with old memory, prefer the latest context.\n"
-                        "- Use the memory naturally to improve personalization, reasoning, and follow-up responses.\n"
-                        "- Treat unresolved issues, active projects, and pending tasks as ongoing unless stated otherwise."
-                        )
-                    )
-            messages = [summary_message] + messages
-        response = await self.llm_with_tools.ainvoke(messages)
-        print(f"Final [{response.__class__.__name__}]: {response.content[:200]}")
         print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
-        print('#'*50)
         return {"messages": [response]}
-    ######################### GRAPH #########################
     def _build_graph(self):
         g = StateGraph(ChatState)
         g.add_node("chat_node", self.chat_node)
-        g.add_node("tools", self.tool_node)
         g.add_edge(START, "chat_node")
         g.add_conditional_edges("chat_node", tools_condition)
         g.add_edge("tools", "chat_node")
         return g.compile(checkpointer=self.checkpointer)
     def _build_summary_graph(self):
@@ -386,40 +398,49 @@ class AIBackend:
         g.add_edge("summarize_node", END)
         return g.compile(checkpointer=self.checkpointer)
-    ######################### STREAMING #########################
     async def ai_only_stream(self, initial_state: dict, config: dict):
-        async for message_chunk, metadata in self.graph.astream(initial_state, config=config, stream_mode="messages"):
-            if isinstance(message_chunk, AIMessage) and message_chunk.content:
-                yield message_chunk.content
-        # Auto Summarization Execute
-        current_state = await self.graph.aget_state(config)
-        if len(current_state.values.get("messages", [])) > 10:
             asyncio.create_task(
-                self.summary_graph.ainvoke(current_state.values, config=config)
             )
-            print('@'*20,'Summarization Execute','@'*20)
-    ######################### THREAD ID #########################
     @staticmethod
     def generate_thread_id() -> str:
         return str(uuid.uuid4())
-    ######################### RETRIEVE ALL THREADS #########################
-    async def retrieve_all_threads(self):
-        all_threads = set()
-        async for checkpoint in self.checkpointer.alist(None):
-            all_threads.add(checkpoint.config["configurable"]["thread_id"])
-        return list(all_threads)
-    ######################### MAIN ENTRY POINT #########################
     async def main(self, user_id: str, user_query: str):
         async with self.conn.execute(
-            "SELECT userId, threadId FROM userid_threadid WHERE userId = ?", (user_id,)
         ) as cursor:
-            result = await cursor.fetchone()
-        if result is None:
             thread_id = user_id + self.generate_thread_id()
             await self.conn.execute(
                 "INSERT INTO userid_threadid (userId, threadId) VALUES (?, ?)",
@@ -427,12 +448,12 @@ class AIBackend:
             )
             await self.conn.commit()
         else:
-            thread_id = result[1]
         initial_state = {"messages": [HumanMessage(content=user_query)]}
         config = {
             "configurable": {"thread_id": thread_id},
-            "metadata": {"thread_id": thread_id},
-            "run_name": "chat_turn",
         }
         return self.ai_only_stream(initial_state, config)

+from __future__ import annotations
+import asyncio
+import json
+import os
+import uuid
+import aiosqlite
+import pytz
+from datetime import datetime
+from dotenv import load_dotenv
+from langchain_core.messages import (
+    AIMessage, AIMessageChunk, HumanMessage, RemoveMessage,
+    SystemMessage, ToolMessage,
+)
+from langchain_core.tools import tool
+from langchain_google_genai import ChatGoogleGenerativeAI
 from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
+from langgraph.graph import END, START, StateGraph
+from langgraph.graph.message import add_messages
 from langgraph.prebuilt import ToolNode, tools_condition
 from twilio.rest import Client
+from typing import Annotated, TypedDict
+# ═══════════════════════════════════════════════════════════════════════════════
+#  STATE
+# ═══════════════════════════════════════════════════════════════════════════════
 class ChatState(TypedDict):
+    messages: Annotated[list, add_messages]
     summary: str
+# ═══════════════════════════════════════════════════════════════════════════════
+#  HELPERS
+# ═══════════════════════════════════════════════════════════════════════════════
+def get_db_path() -> str:
     return os.path.join(os.path.dirname(__file__), "daa.db")
 def format_bd_number(num: str) -> str:
     num = num.strip().replace(" ", "")
         return "+88" + num
     if num.startswith("8801"):
         return "+" + num
+    return num
+def send_sms(to_number: str, message: str) -> None:
+    client = Client(os.getenv("TWILIO_ACCOUNT_SID"), os.getenv("TWILIO_AUTH_TOKEN"))
+    client.messages.create(
+        body=message,
+        from_=os.getenv("TWILIO_PHONE_NUMBER"),
+        to=to_number,
+    )
+# ═══════════════════════════════════════════════════════════════════════════════
+#  TOOLS
+# ═══════════════════════════════════════════════════════════════════════════════
 @tool
 def get_bd_time() -> str:
+    """Get current Bangladesh time (Asia/Dhaka) with weekday name."""
+    tz  = pytz.timezone("Asia/Dhaka")
     now = datetime.now(tz)
     return now.strftime("%Y-%m-%d %H:%M:%S (%A, Bangladesh Time)")
 @tool
+async def search_doctor(
+    name: str = "",
+    category: str = "",
+    visiting_days: str = "",
+) -> str:
     """
+    Search doctors by name, category, or visiting_days from the SQLite database.
+    Any combination of filters is supported (OR logic across fields).
     """
+    db_path    = get_db_path()
+    query      = "SELECT * FROM doctors WHERE 1=1"
+    params: list = []
+    conditions: list[str] = []
     if name:
         conditions.append("LOWER(doctor_name) LIKE ?")
         params.append(f"%{name.lower()}%")
     if category:
         conditions.append("LOWER(category) LIKE ?")
         params.append(f"%{category.lower()}%")
     if visiting_days:
         conditions.append("LOWER(visiting_days) LIKE ?")
         params.append(f"%{visiting_days.lower()}%")
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute(query, params)
+        rows   = await cursor.fetchall()
     if not rows:
+        return json.dumps({"success": False, "message": "No doctors found.", "data": []})
+    return json.dumps({"success": True, "count": len(rows), "data": [dict(r) for r in rows]})
 @tool
 async def search_appointment_by_phone(patient_num: str) -> str:
+    """Search all appointments using the patient's phone number."""
+    db_path     = get_db_path()
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
+        cursor = await db.execute(
+            "SELECT * FROM patients WHERE patient_num = ? ORDER BY visiting_date ASC",
+            (patient_num,),
+        )
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No appointments found for this phone number.",
+            "data": [],
         })
+    return json.dumps({"success": True, "count": len(rows), "data": [dict(r) for r in rows]})
 @tool
+async def book_appointment(
+    doctor_id: int,
+    patient_name: str,
+    patient_age: str,
+    patient_num: str,
+    visiting_date: str,
+) -> str:
     """
     Book a doctor appointment and save it to the patients table.
     Args:
+        doctor_id:     Doctor's ID from search_doctor results.
+        patient_name:  Full name of the patient.
+        patient_age:   Age of the patient (e.g. "32").
+        patient_num:   Contact phone number of the patient.
         visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
     """
+    db_path     = get_db_path()
+    patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
         doctor = await cursor.fetchone()
         if not doctor:
             return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
+        doctor_data     = dict(doctor)
+        doctor_name     = doctor_data.get("doctor_name", "Unknown")
+        doctor_category = doctor_data.get("category", "Unknown")
         cursor = await db.execute(
             """SELECT id FROM patients
+               WHERE doctor_name = ? AND visiting_date = ? AND patient_num = ?""",
             (doctor_name, visiting_date, patient_num),
         )
+        if await cursor.fetchone():
             return (
                 f"A booking for {patient_name} with Dr. {doctor_name} "
                 f"on {visiting_date} already exists."
             )
+        await db.execute(
+            """INSERT INTO patients
+               (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
+               VALUES (?, ?, ?, ?, ?, ?)""",
             (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
         )
         await db.commit()
     return (
         f"✅ Appointment Booked!\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
         f"Contact      : {patient_num}\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
         f"Please arrive 10 minutes early."
     )
+@tool
 async def delete_appointment(patient_num: str, doctor_name: str) -> str:
+    """Delete an appointment using the patient's phone number and doctor name."""
+    db_path     = get_db_path()
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
+        cursor = await db.execute(
+            """SELECT * FROM patients
+               WHERE patient_num = ? AND LOWER(doctor_name) = LOWER(?)""",
+            (patient_num, doctor_name),
+        )
+        if not await cursor.fetchone():
+            return json.dumps({"success": False, "message": "No matching appointment found."})
+        await db.execute(
+            """DELETE FROM patients
+               WHERE patient_num = ? AND LOWER(doctor_name) = LOWER(?)""",
+            (patient_num, doctor_name),
+        )
         await db.commit()
     return json.dumps({
         "success": True,
+        "message": f"Appointment with Dr. {doctor_name} deleted successfully.",
     })
+# ═══════════════════════════════════════════════════════════════════════════════
+#  SYSTEM PROMPT
+# ═══════════════════════════════════════════════════════════════════════════════
+BASE_SYSTEM = (
+    "You are a helpful Bangla voice assistant for a doctor appointment system.\n"
+    "Rules:\n"
+    "- Always respond in Bangla (বাংলা).\n"
+    "- Keep sentences short and natural for text-to-speech playback.\n"
+    "- Avoid markdown, bullet points, or long lists in voice responses.\n"
+    "- Use tools when needed to search doctors or manage appointments.\n"
+    "- Be polite, concise, and clear.\n"
+    "- Do not use English unless a proper noun requires it.\n"
+)
+SUMMARY_SYSTEM = (
+    BASE_SYSTEM
+    + "\nYou also have a condensed memory of previous conversations:\n\n"
+    "{summary}\n\n"
+    "Use this memory for continuity. Do not repeat it unless asked."
+)
+# ═══════════════════════════════════════════════════════════════════════════════
+#  AGENT
+# ═══════════════════════════════════════════════════════════════════════════════
 class AIBackend:
+    def __init__(self) -> None:
         load_dotenv()
+        os.environ.setdefault("LANGCHAIN_PROJECT", "Doctor Appointment Automation")
+        self.llm = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash",
+            temperature=0.3,
+        )
+        self.tools          = [
+            search_doctor,
+            book_appointment,
+            get_bd_time,
+            search_appointment_by_phone,
+            delete_appointment,
+        ]
+        self.tool_node      = ToolNode(self.tools)
         self.llm_with_tools = self.llm.bind_tools(self.tools)
+    # ── Setup ──────────────────────────────────────────────────────────────────
+    async def async_setup(self) -> None:
+        db_path           = get_db_path()
+        self.conn         = await aiosqlite.connect(db_path)
         self.checkpointer = AsyncSqliteSaver(self.conn)
+        await self._create_tables()
+        self.graph         = self._build_graph()
         self.summary_graph = self._build_summary_graph()
+    async def _create_tables(self) -> None:
         await self.conn.execute("""
             CREATE TABLE IF NOT EXISTS userid_threadid (
+                userId   TEXT UNIQUE NOT NULL,
                 threadId TEXT UNIQUE NOT NULL
             )
+        """)
+        await self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS doctors (
+                id            INTEGER PRIMARY KEY AUTOINCREMENT,
+                doctor_name   TEXT NOT NULL,
+                category      TEXT NOT NULL,
+                visiting_days TEXT NOT NULL,
+                chamber       TEXT,
+                fee           TEXT
+            )
+        """)
+        await self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS patients (
+                id              INTEGER PRIMARY KEY AUTOINCREMENT,
+                doctor_name     TEXT NOT NULL,
+                doctor_category TEXT,
+                patient_name    TEXT NOT NULL,
+                patient_age     TEXT,
+                patient_num     TEXT NOT NULL,
+                visiting_date   TEXT NOT NULL
+            )
+        """)
         await self.conn.commit()
+    # ── Summarise node ─────────────────────────────────────────────────────────
     async def summarize_conversation(self, state: ChatState):
+        existing = state.get("summary", "")
         messages = state["messages"]
+        if existing:
+            prompt = (
+                f"Existing summary:\n{existing}\n\n"
+                "Update the summary with the new messages above. "
+                "Keep it concise, bullet-pointed, and information-dense. "
+                "Preserve unresolved issues and ongoing tasks."
+            )
+        else:
+            prompt = (
+                "Summarise this conversation. "
+                "Capture goals, decisions, preferences, and unresolved questions. "
+                "Be concise and use bullet points."
+            )
+        response = await self.llm.ainvoke(messages + [HumanMessage(content=prompt)])
         return {
             "summary": response.content,
             "messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
         }
+    async def should_summarize(self, state: ChatState) -> str:
+        return "summarize_node" if len(state["messages"]) > 10 else "chat_node"
+    # ── Chat node — streaming version ──────────────────────────────────────────
     async def chat_node(self, state: ChatState):
+        """
+        Uses astream() instead of ainvoke() so that LangGraph's
+        stream_mode='messages' can relay individual tokens to the caller
+        as they arrive from Gemini, rather than waiting for the full
+        response to complete before yielding anything.
+        The streamed chunks are merged into a single AIMessage for the
+        graph state so checkpointing and tool detection work unchanged.
+        """
+        summary  = state.get("summary", "")
         messages = state["messages"]
+        print("#" * 50)
         print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
+        print(f"[SUMMARY]: {summary[:120] if summary else 'None'}")
+        for m in messages:
+            print(f"  [{m.__class__.__name__}]: {str(m.content)[:160]}")
+        print("#" * 50)
+        sys_content   = SUMMARY_SYSTEM.format(summary=summary) if summary else BASE_SYSTEM
+        full_messages = [SystemMessage(content=sys_content)] + list(messages)
+        # Stream tokens from Gemini — LangGraph relays these via
+        # stream_mode="messages" before the node returns its state update.
+        collected: list[AIMessageChunk] = []
+        async for chunk in self.llm_with_tools.astream(full_messages):
+            collected.append(chunk)
+        # Merge chunks into a single AIMessage for the state
+        if not collected:
+            response = AIMessage(content="")
         else:
+            # LangChain chunk addition merges content + tool_calls correctly
+            response = collected[0]
+            for c in collected[1:]:
+                response = response + c
+        print(f"[AI]: {str(response.content)[:200]}")
         print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
         return {"messages": [response]}
+    # ── Graph ──────────────────────────────────────────────────────────────────
     def _build_graph(self):
         g = StateGraph(ChatState)
         g.add_node("chat_node", self.chat_node)
+        g.add_node("tools",     self.tool_node)
         g.add_edge(START, "chat_node")
         g.add_conditional_edges("chat_node", tools_condition)
         g.add_edge("tools", "chat_node")
         return g.compile(checkpointer=self.checkpointer)
     def _build_summary_graph(self):
         g.add_edge("summarize_node", END)
         return g.compile(checkpointer=self.checkpointer)
+    # ── Streaming ──────────────────────────────────────────────────────────────
     async def ai_only_stream(self, initial_state: dict, config: dict):
+        """
+        Async generator — yields AI text tokens as they arrive from Gemini.
+        Because chat_node now uses astream() internally, LangGraph's
+        stream_mode='messages' receives genuine token chunks from the model
+        and re-emits them here — no more full-response buffering.
+        """
+        async for chunk, _meta in self.graph.astream(
+            initial_state, config=config, stream_mode="messages"
+        ):
+            if isinstance(chunk, AIMessage) and chunk.content:
+                yield chunk.content
+        # Auto-summarise in background when history grows long
+        current = await self.graph.aget_state(config)
+        if len(current.values.get("messages", [])) > 10:
             asyncio.create_task(
+                self.summary_graph.ainvoke(current.values, config=config)
             )
+            print("@" * 20, "Summarisation triggered", "@" * 20)
+    # ── Thread management ──────────────────────────────────────────────────────
     @staticmethod
     def generate_thread_id() -> str:
         return str(uuid.uuid4())
+    async def retrieve_all_threads(self) -> list[str]:
+        threads: set[str] = set()
+        async for cp in self.checkpointer.alist(None):
+            threads.add(cp.config["configurable"]["thread_id"])
+        return list(threads)
+    # ── Public entry point ─────────────────────────────────────────────────────
     async def main(self, user_id: str, user_query: str):
+        """Return an async generator of AI text tokens."""
         async with self.conn.execute(
+            "SELECT threadId FROM userid_threadid WHERE userId = ?", (user_id,)
         ) as cursor:
+            row = await cursor.fetchone()
+        if row is None:
             thread_id = user_id + self.generate_thread_id()
             await self.conn.execute(
                 "INSERT INTO userid_threadid (userId, threadId) VALUES (?, ?)",
             )
             await self.conn.commit()
         else:
+            thread_id = row[0]
         initial_state = {"messages": [HumanMessage(content=user_query)]}
         config = {
             "configurable": {"thread_id": thread_id},
+            "metadata":     {"thread_id": thread_id},
+            "run_name":     "chat_turn",
         }
         return self.ai_only_stream(initial_state, config)

frontend/index.html CHANGED Viewed

@@ -45,3 +45,4 @@
 <script src="script.js"></script>
 </body>
 </html>

 <script src="script.js"></script>
 </body>
 </html>

frontend/script.js CHANGED Viewed

@@ -1,22 +1,3 @@
-/* ─────────────────────────────────────────────────────────────────────────────
-   script.js — Voice + text chat client
-   Fixes applied
-   ─────────────
-   1. DOUBLE-SEND BUG: silenceTimer is now explicitly cleared whenever
-      isProcessing is set to true, so a timer that was already ticking
-      can't fire a second stopRecorder() call.
-   2. TTS INTERRUPT / BARGE-IN: stopAllAudio() cancels the current
-      HTMLAudioElement and sends {"type":"cancel"} to the server so the
-      TTS pipeline also aborts server-side.
-   3. MARKDOWN RENDERING: AI bubble uses marked.parse() instead of
-      textContent so Bangla markdown (bold, lists, headings) renders
-      correctly in the chat.
-   4. VAD barge-in path: if the user starts speaking while TTS is playing
-      the audio stops immediately, isProcessing resets, and the new
-      utterance is captured normally.
-───────────────────────────────────────────────────────────────────────────── */
 const chatBox = document.getElementById('chat-box');
 const sendBtn = document.getElementById('send-btn');
 const textInput = document.getElementById('text-input');
@@ -24,12 +5,10 @@ const micBtn = document.getElementById('mic-btn');
 const userId = 'walid';
-// ── WebSockets ────────────────────────────────────────────────────────────────
 const chatSocket = new WebSocket('ws://127.0.0.1:8679/ws/chat');
 const voiceSocket = new WebSocket('ws://127.0.0.1:8679/ws/voice');
 voiceSocket.binaryType = 'arraybuffer';
-// ── State ─────────────────────────────────────────────────────────────────────
 let micStream = null;
 let audioContext = null;
 let analyser = null;
@@ -39,18 +18,98 @@ let isListening = false;
 let isSpeaking = false;
 let silenceTimer = null;
 let vadInterval = null;
-let isProcessing = false; // true while server is processing / TTS playing
 let currentAIMessage = null;
-let currentAudio = null; // the HTMLAudioElement currently playing
-let playbackChain = Promise.resolve();
-// ── VAD config ────────────────────────────────────────────────────────────────
-const SILENCE_THRESHOLD_DB = -45; // dBFS
-const SILENCE_TIMEOUT_MS = 3000; // ms of silence before sending utterance
 const VAD_POLL_MS = 100;
-// ── Text chat ─────────────────────────────────────────────────────────────────
 sendBtn.onclick = sendTextMessage;
 textInput.addEventListener('keydown', (e) => {
   if (e.key === 'Enter') sendTextMessage();
@@ -77,7 +136,6 @@ chatSocket.onmessage = (e) => {
 chatSocket.onerror = (e) => console.error('Chat WS error:', e);
 chatSocket.onclose = () => console.log('Chat WS closed');
-// ── Voice WebSocket events ────────────────────────────────────────────────────
 voiceSocket.onopen = () => console.log('[WS] Voice connected');
 voiceSocket.onclose = () => {
   console.log('[WS] Voice closed');
@@ -86,7 +144,6 @@ voiceSocket.onclose = () => {
 voiceSocket.onerror = (e) => console.error('[WS] Voice error:', e);
 voiceSocket.onmessage = (event) => {
-  // Binary → audio playback
   if (event.data instanceof ArrayBuffer) {
     enqueueAudio(event.data);
     return;
@@ -106,25 +163,22 @@ voiceSocket.onmessage = (event) => {
       break;
     case 'llm_token':
-      // FIX: stream tokens into a div; final markdown render happens on 'end'
       if (!currentAIMessage) {
         currentAIMessage = appendMessage('', 'ai');
         currentAIMessage._raw = '';
       }
       currentAIMessage._raw += msg.token;
-      // Live preview: render markdown progressively
       currentAIMessage.innerHTML = marked.parse(currentAIMessage._raw);
       chatBox.scrollTop = chatBox.scrollHeight;
       break;
     case 'end':
-      // Ensure final markdown render
       if (currentAIMessage && currentAIMessage._raw) {
         currentAIMessage.innerHTML = marked.parse(currentAIMessage._raw);
       }
       currentAIMessage = null;
-      isProcessing = false;
-      if (isListening) setMicStatus('listening');
       break;
     case 'error':
@@ -137,70 +191,18 @@ voiceSocket.onmessage = (event) => {
       break;
     default:
-      console.log('[WS] Unknown msg:', msg.type);
   }
 };
-// ── Audio playback ─────────────────────────────────────────────────────────────
-function enqueueAudio(buffer) {
-  playbackChain = playbackChain.then(() => playBuffer(buffer));
-}
-function playBuffer(buffer) {
-  return new Promise((resolve) => {
-    if (isProcessing === false) {
-      resolve();
-      return;
-    } // cancelled mid-chain
-    const blob = new Blob([buffer], { type: 'audio/mpeg' });
-    const url = URL.createObjectURL(blob);
-    const audio = new Audio(url);
-    currentAudio = audio;
-    const done = () => {
-      URL.revokeObjectURL(url);
-      currentAudio = null;
-      resolve();
-    };
-    audio.onended = done;
-    audio.onerror = () => {
-      console.warn('[AUDIO] playback error');
-      done();
-    };
-    audio.play().catch(() => done());
-  });
-}
-/**
- * Stop all queued and current audio immediately.
- * Also sends a cancel signal to the server so TTS generation stops.
- */
-function stopAllAudio() {
-  // Replace the chain with an already-resolved promise so queued buffers
-  // that haven't started yet are silently dropped.
-  playbackChain = Promise.resolve();
-  if (currentAudio) {
-    currentAudio.pause();
-    currentAudio.src = '';
-    currentAudio = null;
-  }
-  // Tell server to abort TTS pipeline
-  if (voiceSocket.readyState === WebSocket.OPEN) {
-    voiceSocket.send(JSON.stringify({ type: 'cancel' }));
-  }
-}
-// ── Mic button ────────────────────────────────────────────────────────────────
 micBtn.onclick = async () => {
   if (!isListening) await startListening();
   else stopListening();
 };
-// ── Start continuous listening with VAD ───────────────────────────────────────
 async function startListening() {
   try {
     micStream = await navigator.mediaDevices.getUserMedia({
       audio: {
@@ -228,14 +230,12 @@ async function startListening() {
   vadInterval = setInterval(vadTick, VAD_POLL_MS);
 }
-// ── Stop everything ───────────────────────────────────────────────────────────
 function stopListening() {
   clearInterval(vadInterval);
   clearTimeout(silenceTimer);
   vadInterval = silenceTimer = null;
-  if (isSpeaking) stopRecorder(true); // discard in-progress utterance
   stopAllAudio();
   micStream?.getTracks().forEach((t) => t.stop());
@@ -246,7 +246,6 @@ function stopListening() {
   setMicStatus('off');
 }
-// ── VAD polling ───────────────────────────────────────────────────────────────
 function vadTick() {
   if (!analyser) return;
@@ -258,19 +257,18 @@ function vadTick() {
   const speaking = db > SILENCE_THRESHOLD_DB;
   if (speaking) {
-    // FIX: barge-in — user started talking while TTS is playing
     if (isProcessing) {
-      console.log('[VAD] Barge-in detected — stopping TTS.');
       stopAllAudio();
       isProcessing = false;
     }
-    // FIX: clear any pending silence timer so it can't double-fire
     clearTimeout(silenceTimer);
     silenceTimer = null;
     if (!isSpeaking) {
       isSpeaking = true;
       startRecorder();
       setMicStatus('recording');
     }
@@ -280,9 +278,9 @@ function vadTick() {
         silenceTimer = null;
         isSpeaking = false;
-        // FIX: set isProcessing *before* stopping the recorder so that
-        // if vadTick fires again during onstop it sees the flag and skips.
         isProcessing = true;
         stopRecorder(false);
         setMicStatus('processing');
       }, SILENCE_TIMEOUT_MS);
@@ -290,7 +288,6 @@ function vadTick() {
   }
 }
-// ── Recorder ──────────────────────────────────────────────────────────────────
 function startRecorder() {
   if (!micStream) return;
   audioChunks = [];
@@ -335,7 +332,6 @@ function stopRecorder(discard = false) {
   mediaRecorder = null;
 }
-// ── UI helpers ────────────────────────────────────────────────────────────────
 function setMicStatus(state) {
   const labels = {
     off: '🎤 Start Voice',
@@ -352,7 +348,6 @@ function appendMessage(text, sender) {
   div.className = `message ${sender}`;
   if (sender === 'ai' && typeof marked !== 'undefined') {
-    // FIX: render Bangla markdown (bold, lists, headings) properly
     div.innerHTML = marked.parse(text);
   } else {
     div.textContent = text;

 const chatBox = document.getElementById('chat-box');
 const sendBtn = document.getElementById('send-btn');
 const textInput = document.getElementById('text-input');
 const userId = 'walid';
 const chatSocket = new WebSocket('ws://127.0.0.1:8679/ws/chat');
 const voiceSocket = new WebSocket('ws://127.0.0.1:8679/ws/voice');
 voiceSocket.binaryType = 'arraybuffer';
 let micStream = null;
 let audioContext = null;
 let analyser = null;
 let isSpeaking = false;
 let silenceTimer = null;
 let vadInterval = null;
+let isProcessing = false;
 let currentAIMessage = null;
+let _playbackCancelled = false;
+const SILENCE_THRESHOLD_DB = -45;
+const SILENCE_TIMEOUT_MS = 1200;
 const VAD_POLL_MS = 100;
+let _playCtx = null;
+let _schedEndTime = 0;
+let _endTimer = null;
+function _getPlayCtx() {
+  if (!_playCtx || _playCtx.state === 'closed') {
+    _playCtx = new (window.AudioContext || window.webkitAudioContext)();
+    _schedEndTime = 0;
+  }
+  if (_playCtx.state === 'suspended') _playCtx.resume();
+  return _playCtx;
+}
+async function enqueueAudio(buffer) {
+  if (_playbackCancelled) return;
+  const ctx = _getPlayCtx();
+  let decoded;
+  try {
+    decoded = await ctx.decodeAudioData(buffer.slice(0));
+  } catch (err) {
+    console.warn('[AUDIO] decode error:', err);
+    return;
+  }
+  if (_playbackCancelled) return;
+  const src = ctx.createBufferSource();
+  src.buffer = decoded;
+  src.connect(ctx.destination);
+  const now = ctx.currentTime;
+  const startAt = Math.max(now + 0.02, _schedEndTime);
+  src.start(startAt);
+  _schedEndTime = startAt + decoded.duration;
+}
+/**
+ * Called once the server sends `{type:"end"}`.
+ * We know all audio is enqueued; schedule the "processing done" callback
+ * to fire when the last chunk finishes playing.
+ */
+function _schedulePlaybackEnd() {
+  clearTimeout(_endTimer);
+  const ctx = _playCtx;
+  if (!ctx || ctx.state === 'closed') {
+    _onPlaybackFinished();
+    return;
+  }
+  const remaining = Math.max(0, (_schedEndTime - ctx.currentTime) * 1000) + 120;
+  _endTimer = setTimeout(() => {
+    if (!_playbackCancelled) _onPlaybackFinished();
+  }, remaining);
+}
+function _onPlaybackFinished() {
+  isProcessing = false;
+  if (isListening) setMicStatus('listening');
+}
+/**
+ * Stop all queued and currently-playing audio immediately.
+ * Closes the AudioContext so future-scheduled nodes are silenced too.
+ */
+function stopAllAudio() {
+  _playbackCancelled = true;
+  clearTimeout(_endTimer);
+  _endTimer = null;
+  if (_playCtx && _playCtx.state !== 'closed') {
+    _playCtx.close().catch(() => {});
+  }
+  _playCtx = null;
+  _schedEndTime = 0;
+  if (voiceSocket.readyState === WebSocket.OPEN) {
+    voiceSocket.send(JSON.stringify({ type: 'cancel' }));
+  }
+}
 sendBtn.onclick = sendTextMessage;
 textInput.addEventListener('keydown', (e) => {
   if (e.key === 'Enter') sendTextMessage();
 chatSocket.onerror = (e) => console.error('Chat WS error:', e);
 chatSocket.onclose = () => console.log('Chat WS closed');
 voiceSocket.onopen = () => console.log('[WS] Voice connected');
 voiceSocket.onclose = () => {
   console.log('[WS] Voice closed');
 voiceSocket.onerror = (e) => console.error('[WS] Voice error:', e);
 voiceSocket.onmessage = (event) => {
   if (event.data instanceof ArrayBuffer) {
     enqueueAudio(event.data);
     return;
       break;
     case 'llm_token':
       if (!currentAIMessage) {
         currentAIMessage = appendMessage('', 'ai');
         currentAIMessage._raw = '';
       }
       currentAIMessage._raw += msg.token;
       currentAIMessage.innerHTML = marked.parse(currentAIMessage._raw);
       chatBox.scrollTop = chatBox.scrollHeight;
       break;
     case 'end':
       if (currentAIMessage && currentAIMessage._raw) {
         currentAIMessage.innerHTML = marked.parse(currentAIMessage._raw);
       }
       currentAIMessage = null;
+      _schedulePlaybackEnd();
       break;
     case 'error':
       break;
     default:
+      console.log('[WS] Unknown msg type:', msg.type);
   }
 };
 micBtn.onclick = async () => {
   if (!isListening) await startListening();
   else stopListening();
 };
 async function startListening() {
+  _getPlayCtx();
   try {
     micStream = await navigator.mediaDevices.getUserMedia({
       audio: {
   vadInterval = setInterval(vadTick, VAD_POLL_MS);
 }
 function stopListening() {
   clearInterval(vadInterval);
   clearTimeout(silenceTimer);
   vadInterval = silenceTimer = null;
+  if (isSpeaking) stopRecorder(true);
   stopAllAudio();
   micStream?.getTracks().forEach((t) => t.stop());
   setMicStatus('off');
 }
 function vadTick() {
   if (!analyser) return;
   const speaking = db > SILENCE_THRESHOLD_DB;
   if (speaking) {
     if (isProcessing) {
+      console.log('[VAD] Barge-in — stopping TTS.');
       stopAllAudio();
       isProcessing = false;
     }
     clearTimeout(silenceTimer);
     silenceTimer = null;
     if (!isSpeaking) {
       isSpeaking = true;
+      _playbackCancelled = false;
       startRecorder();
       setMicStatus('recording');
     }
         silenceTimer = null;
         isSpeaking = false;
         isProcessing = true;
+        _playbackCancelled = false;
         stopRecorder(false);
         setMicStatus('processing');
       }, SILENCE_TIMEOUT_MS);
   }
 }
 function startRecorder() {
   if (!micStream) return;
   audioChunks = [];
   mediaRecorder = null;
 }
 function setMicStatus(state) {
   const labels = {
     off: '🎤 Start Voice',
   div.className = `message ${sender}`;
   if (sender === 'ai' && typeof marked !== 'undefined') {
     div.innerHTML = marked.parse(text);
   } else {
     div.textContent = text;

services/streaming.py CHANGED Viewed

@@ -1,172 +1,237 @@
-"""
-services/streaming.py — Parallel + ordered TTS streamer
-Fixes applied
-─────────────
-1. BUFFER RACE — self.buffer is now only mutated while holding
-   self._flush_lock, so add_token() and _schedule_flush() can never
-   interleave partial writes.
-2. CANCELLATION — ParallelTTSStreamer.cancel() drops all pending tasks
-   and poisons the queue with a sentinel so stream_audio() exits
-   immediately.  app.py calls cancel() when the user starts speaking
-   mid-playback, giving true barge-in / interrupt behaviour.
-3. Markdown stripping (_clean_for_tts) is unchanged.
-4. Audio ordering guarantee is unchanged (task-chain pattern).
-"""
 from __future__ import annotations
 import asyncio
 import re
 import edge_tts
-VOICE          = "bn-BD-NabanitaNeural"
-FLUSH_LEN      = 80          # chars before forced flush
-MIN_CHARS      = 5           # skip tiny fragments
-FLUSH_TRIGGERS = frozenset(".!?।,;:\n—–")
-# ── Markdown → plain text ──────────────────────────────────────────────────────
 def _clean_for_tts(text: str) -> str:
-    text = re.sub(r"\*{1,3}", "", text)
-    text = re.sub(r"#+\s*", "", text)
-    text = re.sub(r"^\s*[-•]\s*", "", text, flags=re.MULTILINE)
     text = re.sub(r"^\s*[\d০-৯]+[.)]\s*", "", text, flags=re.MULTILINE)
-    text = re.sub(r"`+", "", text)
-    text = re.sub(r"\n{2,}", "\n", text)
     return text.strip()
-# ── Streamer ───────────────────────────────────────────────────────────────────
 class ParallelTTSStreamer:
     """
-    Collects LLM tokens → prosodic chunks → parallel edge-tts calls →
-    serialised audio queue.
     Usage
     ─────
         streamer = ParallelTTSStreamer()
-        # producer
         await streamer.add_token(token)
-        await streamer.flush()          # call once when LLM finishes
-        # consumer (run concurrently with producer)
-        async for chunk in streamer.stream_audio():
-            await ws.send_bytes(chunk)
-        # interrupt (call from any coroutine)
         await streamer.cancel()
     """
     def __init__(self, voice: str = VOICE) -> None:
         self.voice       = voice
         self.buffer      = ""
-        self.queue: asyncio.Queue[bytes | None] = asyncio.Queue()
-        self._prev_task: asyncio.Task | None    = None
-        self._flush_lock = asyncio.Lock()
         self._cancelled  = False
-        self._tasks: list[asyncio.Task] = []    # track all live tasks
-    # ── Token intake ───────────────────────────────────────────────────────────
     async def add_token(self, token: str) -> None:
         if not token or self._cancelled:
             return
-        # FIX: hold the lock for the buffer write too, not just the flush
-        async with self._flush_lock:
-            self.buffer += token
-            should_flush = (
-                any(ch in FLUSH_TRIGGERS for ch in token)
-                or len(self.buffer) >= FLUSH_LEN
-            )
-        if should_flush:
-            await self._schedule_flush()
-    # ── Flush scheduling ───────────────────────────────────────────────────────
-    async def _schedule_flush(self) -> None:
         if self._cancelled:
             return
-        async with self._flush_lock:
-            raw          = self.buffer.strip()
-            self.buffer  = ""
         text = _clean_for_tts(raw)
         if len(text) < MIN_CHARS:
             return
-        prev = self._prev_task
-        task = asyncio.create_task(self._tts_ordered(text, prev))
-        self._prev_task = task
         self._tasks.append(task)
-        task.add_done_callback(lambda t: self._tasks.remove(t) if t in self._tasks else None)
-    # ── Ordered TTS task ───────────────────────────────────────────────────────
-    async def _tts_ordered(self, text: str, wait_for: asyncio.Task | None) -> None:
-        # Step 1 — synthesise (may run in parallel with other chunks)
-        audio_chunks: list[bytes] = []
-        if not self._cancelled:
-            try:
-                communicate = edge_tts.Communicate(text, self.voice)
-                async for chunk in communicate.stream():
-                    if self._cancelled:
-                        break
-                    if chunk["type"] == "audio":
-                        audio_chunks.append(chunk["data"])
-            except Exception as exc:
-                print(f"[TTS] edge-tts error for '{text[:40]}': {exc}")
-        # Step 2 — wait for predecessor to finish queuing (preserves order)
-        if wait_for and not wait_for.done():
-            try:
-                await wait_for
-            except Exception:
-                pass
-        # Step 3 — write to queue (skipped if cancelled)
-        if not self._cancelled:
-            for data in audio_chunks:
-                await self.queue.put(data)
-    # ── Flush remaining buffer ─────────────────────────────────────────────────
     async def flush(self) -> None:
-        """Call once after the LLM stream ends."""
-        await self._schedule_flush()
-        if self._prev_task:
-            try:
-                await self._prev_task
-            except Exception:
-                pass
-        await self.queue.put(None)          # end-of-stream sentinel
-    # ── Interrupt / barge-in ───────────────────────────────────────────────────
     async def cancel(self) -> None:
         """
-        Immediately abort all in-flight TTS tasks and unblock stream_audio().
-        Safe to call from any coroutine while stream_audio() is running.
         """
         self._cancelled = True
-        # Cancel all pending asyncio tasks
         for task in list(self._tasks):
             task.cancel()
-        # Drain and poison the queue so stream_audio() exits
-        while not self.queue.empty():
-            try:
-                self.queue.get_nowait()
-            except asyncio.QueueEmpty:
-                break
-        await self.queue.put(None)          # sentinel → stream_audio exits
-    # ── Audio consumer ─────────────────────────────────────────────────────────
     async def stream_audio(self):
-        """Async generator — yields ordered audio bytes until cancelled/done."""
         while True:
-            chunk = await self.queue.get()
-            if chunk is None:
-                break
-            yield chunk

 from __future__ import annotations
 import asyncio
 import re
+from dataclasses import dataclass, field
+from typing import Optional
 import edge_tts
+VOICE = "bn-BD-NabanitaNeural"
+FIRST_FLUSH_BOUNDARY_MIN = 25
+FIRST_FLUSH_HARD         = 70
+SUBSEQUENT_FLUSH_BOUNDARY_MIN = 40
+SUBSEQUENT_FLUSH_HARD        = 110
+MIN_CHARS = 4
+SENTENCE_BOUNDARIES = frozenset(".!?।॥\n")
+CLAUSE_BOUNDARIES   = frozenset(",;:—–")
 def _clean_for_tts(text: str) -> str:
+    text = re.sub(r"\*{1,3}",      "",  text)
+    text = re.sub(r"#+\s*",        "",  text)
+    text = re.sub(r"^\s*[-•]\s*",  "",  text, flags=re.MULTILINE)
     text = re.sub(r"^\s*[\d০-৯]+[.)]\s*", "", text, flags=re.MULTILINE)
+    text = re.sub(r"`+",           "",  text)
+    text = re.sub(r"\n{2,}",       "\n", text)
     return text.strip()
+def _should_flush(buffer: str, first_chunk: bool) -> bool:
+    """
+    Return True if the buffer is ready to be sent to TTS.
+    Flushing strategy (per chunk):
+      1. If we hit a sentence boundary and have enough chars → flush.
+      2. If we're at the hard limit (even mid-sentence) → flush.
+      3. If we hit a clause boundary near the hard limit → flush early.
+    """
+    n = len(buffer)
+    boundary_min  = FIRST_FLUSH_BOUNDARY_MIN if first_chunk else SUBSEQUENT_FLUSH_BOUNDARY_MIN
+    hard_limit    = FIRST_FLUSH_HARD         if first_chunk else SUBSEQUENT_FLUSH_HARD
+    if n == 0:
+        return False
+    if n >= hard_limit:
+        return True
+    last_char = buffer[-1] if buffer else ""
+    if last_char in SENTENCE_BOUNDARIES and n >= boundary_min:
+        return True
+    if last_char in CLAUSE_BOUNDARIES and n >= hard_limit * 0.8:
+        return True
+    return False
+@dataclass
+class _AudioSlot:
+    """Holds synthesised audio for one TTS chunk. Delivered in slot order."""
+    index:  int
+    ready:  asyncio.Event          = field(default_factory=asyncio.Event)
+    chunks: list[bytes]            = field(default_factory=list)
+    error:  bool                   = False
 class ParallelTTSStreamer:
     """
+    Collects LLM tokens → prosodic sentence chunks → parallel edge-tts
+    synthesis → slot-ordered audio delivery.
     Usage
     ─────
         streamer = ParallelTTSStreamer()
         await streamer.add_token(token)
+        await streamer.flush()
+        async for audio_bytes in streamer.stream_audio():
+            await ws.send_bytes(audio_bytes)
         await streamer.cancel()
     """
     def __init__(self, voice: str = VOICE) -> None:
         self.voice       = voice
         self.buffer      = ""
         self._cancelled  = False
+        self._first_chunk = True
+        self._slot_index  = 0
+        self._slots: list[_AudioSlot] = []
+        self._slots_lock  = asyncio.Lock()
+        self._tasks: list[asyncio.Task] = []
+        self._done_event  = asyncio.Event()
     async def add_token(self, token: str) -> None:
         if not token or self._cancelled:
             return
+        self.buffer += token
+        if _should_flush(self.buffer, self._first_chunk):
+            self._first_chunk = False
+            await self._schedule_chunk()
+    async def _schedule_chunk(self) -> None:
         if self._cancelled:
+            self.buffer = ""
             return
+        raw  = self.buffer.strip()
+        self.buffer = ""
         text = _clean_for_tts(raw)
         if len(text) < MIN_CHARS:
             return
+        async with self._slots_lock:
+            slot = _AudioSlot(index=self._slot_index)
+            self._slot_index += 1
+            self._slots.append(slot)
+        task = asyncio.create_task(self._synthesise(text, slot))
         self._tasks.append(task)
+        task.add_done_callback(
+            lambda t: self._tasks.remove(t) if t in self._tasks else None
+        )
+    async def _synthesise(self, text: str, slot: _AudioSlot) -> None:
+        if self._cancelled:
+            slot.error = True
+            slot.ready.set()
+            return
+        try:
+            communicate = edge_tts.Communicate(text, self.voice)
+            async for chunk in communicate.stream():
+                if self._cancelled:
+                    slot.error = True
+                    slot.ready.set()
+                    return
+                if chunk["type"] == "audio":
+                    slot.chunks.append(chunk["data"])
+        except asyncio.CancelledError:
+            slot.error = True
+        except Exception as exc:
+            print(f"[TTS] edge-tts error for '{text[:50]}': {exc}")
+            slot.error = True
+        finally:
+            slot.ready.set()
     async def flush(self) -> None:
+        if self.buffer.strip():
+            await self._schedule_chunk()
+        if self._tasks:
+            await asyncio.gather(*self._tasks, return_exceptions=True)
+        self._done_event.set()
     async def cancel(self) -> None:
         """
+        Immediately abort all in-flight synthesis tasks.
+        Marks all pending slots as errored so stream_audio() exits promptly.
+        Idempotent.
         """
         self._cancelled = True
         for task in list(self._tasks):
             task.cancel()
+        self._tasks.clear()
+        async with self._slots_lock:
+            for slot in self._slots:
+                if not slot.ready.is_set():
+                    slot.error = True
+                    slot.ready.set()
+        self._done_event.set()
     async def stream_audio(self):
+        """
+        Yields ordered audio bytes.  Slots are consumed in creation order;
+        each slot is awaited individually so synthesis of slot N+1 can
+        proceed in parallel while the consumer is yielding slot N's bytes.
+        """
+        delivered = 0
         while True:
+            async with self._slots_lock:
+                if delivered < len(self._slots):
+                    slot = self._slots[delivered]
+                else:
+                    slot = None
+            if slot is None:
+                if self._done_event.is_set():
+                    break
+                await asyncio.sleep(0.005)
+                continue
+            await slot.ready.wait()
+            if not self._cancelled and not slot.error:
+                for audio_bytes in slot.chunks:
+                    yield audio_bytes
+            delivered += 1
+    def reset(self) -> None:
+        self._cancelled   = False
+        self._first_chunk = True
+        self.buffer       = ""
+        self._slot_index  = 0
+        self._slots.clear()
+        self._tasks.clear()
+        self._done_event.clear()