structure ready :: rakib

Browse files

Files changed (12) hide show

.env +8 -8
.vscode/settings.json +4 -0
app.py +178 -71
core/backend.py +58 -53
frontend/index.html +1 -1
frontend/script.js +259 -131
frontend/style.css +1 -1
requirements.txt +45 -20
services/streaming.py +262 -0
services/stt.py +132 -57
services/tts.py +25 -8
services/vad.py +50 -0

.env CHANGED Viewed

@@ -5,13 +5,13 @@ LANGCHAIN_ENDPOINT='https://api.smith.langchain.com'
 LANGCHAIN_API_KEY='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'
 LANGCHAIN_PROJECT='Default'
-TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
-TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
-TWILIO_PHONE_NUMBER="+14343375085"
-LIVEKIT_URL=wss://demo-wqwzjgsv.livekit.cloud
-LIVEKIT_API_KEY=APIesfzMFdhmrb6
-LIVEKIT_API_SECRET=kb7jLghH6Q3qLXxUHoYwREpYJdgX8qgAOHBDOG7q40G
-GROQ_API_KEY=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
-CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f

 LANGCHAIN_API_KEY='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'
 LANGCHAIN_PROJECT='Default'
+# TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
+# TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
+# TWILIO_PHONE_NUMBER="+14343375085"
+# LIVEKIT_URL=wss://demo-wqwzjgsv.livekit.cloud
+# LIVEKIT_API_KEY=APIesfzMFdhmrb6
+# LIVEKIT_API_SECRET=kb7jLghH6Q3qLXxUHoYwREpYJdgX8qgAOHBDOG7q40G
+# GROQ_API_KEY=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
+# CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "python-envs.defaultEnvManager": "ms-python.python:conda",
+  "python-envs.defaultPackageManager": "ms-python.python:conda"
+}

app.py CHANGED Viewed

@@ -1,95 +1,202 @@
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
 from contextlib import asynccontextmanager
-from pydantic import BaseModel
-from core.backend import AIBackend
-import uvicorn, json, os
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from services.stt import StreamingSTT
-from services.tts import text_to_speech_stream
 from fastapi.staticfiles import StaticFiles
-chatbot_obj = AIBackend()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    await chatbot_obj.async_setup()
     yield
-    if chatbot_obj.conn:
-        await chatbot_obj.conn.close()
 app = FastAPI(lifespan=lifespan)
-class UserRequest(BaseModel):
-    user_id: str
-    user_query: str
-@app.post("/chat")
-async def chat(request: UserRequest):
-    stream = await chatbot_obj.main(
-        user_id=request.user_id,
-        user_query=request.user_query,
-    )
-    return StreamingResponse(stream, media_type="text/event-stream")
 @app.websocket("/ws/chat")
-async def websocket_chat(websocket: WebSocket):
-    await websocket.accept()
     try:
         while True:
-            # receive frontend message
-            data = await websocket.receive_text()
-            payload = json.loads(data)
-            user_id = payload["user_id"]
-            user_query = payload["user_query"]
-            # stream AI response
-            stream = await chatbot_obj.main(
-                user_id=user_id,
-                user_query=user_query
-            )
-            async for chunk in stream:
-                await websocket.send_text(chunk)
-            # notify frontend response finished
-            await websocket.send_text("[[END]]")
     except WebSocketDisconnect:
-        print("Client disconnected")
 @app.websocket("/ws/voice")
-async def voice_ws(websocket: WebSocket):
-    await websocket.accept()
-    stt = StreamingSTT()
     try:
         while True:
-            message = await websocket.receive()
-            # 🎤 AUDIO INPUT
-            if "bytes" in message:
-                audio_chunk = message["bytes"]
-                stt.add_audio(audio_chunk)
-                text = stt.transcribe_if_ready()
-                if not text:
                     continue
-                await websocket.send_text(f"[STT]: {text}")
-                # 🤖 LLM STREAM
-                stream = chatbot_obj.main(
-                    user_id="voice_user",
-                    user_query=text
-                )
-                full_response = ""
-                async for token in stream:
-                    full_response += token
-                    await websocket.send_text(f"[LLM]: {token}")
-                # 🔊 TTS STREAM
-                async for audio_chunk in text_to_speech_stream(full_response):
-                    await websocket.send_bytes(audio_chunk)
-                await websocket.send_text("[END]")
-    except WebSocketDisconnect:
-        print("Voice client disconnected")
-if __name__ == "__main__":
-    uvicorn.run("app:app", host="127.0.0.1", port=8679, reload=True)

+import asyncio
+import json
+import os
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import FileResponse, HTMLResponse
 from fastapi.staticfiles import StaticFiles
+from starlette.websockets import WebSocketState
+from core.backend import AIBackend
+from services.stt import STTProcessor
+from services.streaming import ParallelTTSStreamer
+ai = AIBackend()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    await ai.async_setup()
+    print("[APP] AI backend ready.")
     yield
+    if hasattr(ai, "conn") and ai.conn:
+        await ai.conn.close()
 app = FastAPI(lifespan=lifespan)
+try:
+    app.mount("/static", StaticFiles(directory="."), name="static")
+except Exception:
+    pass
+@app.get("/")
+async def root():
+    if os.path.exists("index.html"):
+        return FileResponse("index.html")
+    return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def _ws_open(ws: WebSocket) -> bool:
+    """Return True if the WebSocket connection is still alive."""
+    return ws.client_state == WebSocketState.CONNECTED
+async def _safe_text(ws: WebSocket, payload: dict) -> bool:
+    if not _ws_open(ws):
+        return False
+    try:
+        await ws.send_text(json.dumps(payload))
+        return True
+    except Exception:
+        return False
+async def _safe_bytes(ws: WebSocket, data: bytes) -> bool:
+    if not _ws_open(ws):
+        return False
+    try:
+        await ws.send_bytes(data)
+        return True
+    except Exception:
+        return False
+# ── Text chat WebSocket ───────────────────────────────────────────────────────
 @app.websocket("/ws/chat")
+async def ws_chat(ws: WebSocket):
+    await ws.accept()
+    print("[CHAT] Client connected")
     try:
         while True:
+            raw = await ws.receive_text()
+            try:
+                data = json.loads(raw)
+            except json.JSONDecodeError:
+                await _safe_text(ws, {"type": "error", "text": "Invalid JSON"})
+                continue
+            user_id    = data.get("user_id", "default_user")
+            user_query = data.get("user_query", "").strip()
+            if not user_query:
+                continue
+            full_response = ""
+            try:
+                stream = await ai.main(user_id, user_query)
+                async for token in stream:
+                    full_response += token
+                await _safe_text(ws, {"type": "chat", "text": full_response})
+            except Exception as e:
+                print(f"[CHAT] AI error: {e}")
+                await _safe_text(ws, {"type": "error", "text": str(e)})
+            await _safe_text(ws, {"type": "end"})
     except WebSocketDisconnect:
+        print("[CHAT] Client disconnected")
+    except Exception as e:
+        if "disconnect" not in str(e).lower():
+            print(f"[CHAT] WS error: {e}")
+# ── Voice WebSocket ───────────────────────────────────────────────────────────
 @app.websocket("/ws/voice")
+async def ws_voice(ws: WebSocket):
+    await ws.accept()
+    print("[VOICE] Client connected")
+    stt     = STTProcessor()
+    user_id = "voice_user"
     try:
         while True:
+            # ── FIX: Check connection state before every receive ──────────────
+            # The previous crash "Cannot call receive once a disconnect message
+            # has been received" happened because we called ws.receive() after
+            # the client had already disconnected. Now we check first.
+            if not _ws_open(ws):
+                print("[VOICE] Connection dropped, exiting handler.")
+                break
+            try:
+                data = await ws.receive()
+            except WebSocketDisconnect:
+                print("[VOICE] Client disconnected.")
+                break
+            except Exception as e:
+                # Catches starlette's internal disconnect errors gracefully
+                if "disconnect" in str(e).lower():
+                    print("[VOICE] Client disconnected (recv error).")
+                else:
+                    print(f"[VOICE] Receive error: {e}")
+                break
+            # ── Audio blob from VAD ───────────────────────────────────────────
+            if "bytes" in data and data["bytes"]:
+                audio_bytes = data["bytes"]
+                print(f"[VOICE] Received utterance: {len(audio_bytes):,} bytes")
+                # 1. STT — in thread so event loop isn't blocked
+                transcript = await asyncio.to_thread(stt.transcribe, audio_bytes)
+                if not transcript:
+                    await _safe_text(ws, {
+                        "type": "error",
+                        "text": "কথা বুঝতে পারিনি, আবার বলুন।"
+                    })
+                    # Send 'end' so client's isProcessing resets and VAD resumes
+                    await _safe_text(ws, {"type": "end"})
                     continue
+                print(f"[VOICE] STT: {transcript}")
+                if not await _safe_text(ws, {"type": "stt", "text": transcript}):
+                    break
+                # 2. AI + TTS pipeline
+                tts_streamer = ParallelTTSStreamer()
+                async def run_ai_and_tts():
+                    try:
+                        stream = await ai.main(user_id, transcript)
+                        async for token in stream:
+                            if not token:
+                                continue
+                            if not await _safe_text(ws, {"type": "llm_token", "token": token}):
+                                break
+                            await tts_streamer.add_token(token)
+                    except Exception as e:
+                        print(f"[VOICE] AI error: {e}")
+                    finally:
+                        await tts_streamer.flush()
+                async def stream_tts_audio():
+                    async for chunk in tts_streamer.stream_audio():
+                        if not await _safe_bytes(ws, chunk):
+                            break
+                await asyncio.gather(run_ai_and_tts(), stream_tts_audio())
+                # Signal end of turn → client resumes VAD
+                await _safe_text(ws, {"type": "end"})
+            # ── Control messages ──────────────────────────────────────────────
+            elif "text" in data and data["text"]:
+                try:
+                    msg = json.loads(data["text"])
+                    if msg.get("type") == "ping":
+                        await _safe_text(ws, {"type": "pong"})
+                except json.JSONDecodeError:
+                    pass
+    except WebSocketDisconnect:
+        print("[VOICE] Client disconnected (outer)")
+    except Exception as e:
+        if "disconnect" not in str(e).lower():
+            print(f"[VOICE] WS error: {e}")
+    finally:
+        print("[VOICE] Handler exiting cleanly.")

core/backend.py CHANGED Viewed

@@ -20,6 +20,7 @@ class ChatState(TypedDict):
     summary: str
 ######################### TOOLS #########################
 def get_db_path():
     return os.path.join(os.path.dirname(__file__), "daa.db")
@@ -58,34 +59,34 @@ async def search_doctor(name: str = "", category: str = "", visiting_days: str =
     query = "SELECT * FROM doctors WHERE 1=1"
     params = []
     conditions = []
     if name:
         conditions.append("LOWER(doctor_name) LIKE ?")
         params.append(f"%{name.lower()}%")
     if category:
         conditions.append("LOWER(category) LIKE ?")
         params.append(f"%{category.lower()}%")
     if visiting_days:
         conditions.append("LOWER(visiting_days) LIKE ?")
         params.append(f"%{visiting_days.lower()}%")
     if conditions:
         query += " AND (" + " OR ".join(conditions) + ")"
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute(query, params)
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No doctors found matching your search.",
             "data": []
         })
     return json.dumps({
         "success": True,
         "count": len(rows),
@@ -99,25 +100,25 @@ async def search_appointment_by_phone(patient_num: str) -> str:
     """
     db_path = get_db_path()
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute("""
             SELECT * FROM patients
             WHERE patient_num = ?
             ORDER BY visiting_date ASC
         """, (patient_num,))
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No appointments found for this phone number.",
             "data": []
         })
     return json.dumps({
         "success": True,
         "count": len(rows),
@@ -128,32 +129,33 @@ async def search_appointment_by_phone(patient_num: str) -> str:
 async def book_appointment(doctor_id: int, patient_name: str, patient_age: str, patient_num: str, visiting_date: str) -> str:
     """
     Book a doctor appointment and save it to the patients table.
     Args:
         doctor_id: Doctor's ID from search_doctor results.
         patient_name: Full name of the patient.
         patient_age: Age of the patient (e.g. "32").
         patient_num: Contact phone number of the patient.
         visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
     Returns a booking confirmation with the new record ID.
     """
     db_path = get_db_path()
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         patient_num = format_bd_number(patient_num)
         # Verify doctor exists
         cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
         doctor = await cursor.fetchone()
         if not doctor:
             return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
         doctor_data = dict(doctor)
         doctor_name = doctor_data.get("doctor_name", "Unknown")
         doctor_category = doctor_data.get("doctor_category", "Unknown")
         # Check for conflicting booking (same doctor + same date)
         cursor = await db.execute(
             """SELECT id FROM patients
@@ -166,7 +168,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
                 f"A booking for {patient_name} with Dr. {doctor_name} "
                 f"on {visiting_date} already exists."
             )
         # Insert into patients table
         cursor = await db.execute(
             """INSERT INTO patients (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
@@ -174,7 +176,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
             (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
         )
         await db.commit()
     # Send SMS confirmation
     sms_message = (
         f"✅ Appointment Confirmed!\n"
@@ -188,7 +190,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
     #     sms_status = "📱 SMS confirmation sent."
     # except Exception as e:
     #     sms_status = f"⚠️ SMS failed: {str(e)}"
     return (
         f"✅ Appointment Booked!\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
@@ -209,33 +211,33 @@ async def delete_appointment(patient_num: str, doctor_name: str) -> str:
     db_path = get_db_path()
     # normalize phone number
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         # check if appointment exists first
         cursor = await db.execute("""
             SELECT * FROM patients
             WHERE patient_num = ?
             AND LOWER(doctor_name) = LOWER(?)
         """, (patient_num, doctor_name))
         row = await cursor.fetchone()
         if not row:
             return json.dumps({
                 "success": False,
                 "message": "No matching appointment found to delete."
             })
         # delete appointment
         await db.execute("""
             DELETE FROM patients
             WHERE patient_num = ?
             AND LOWER(doctor_name) = LOWER(?)
         """, (patient_num, doctor_name))
         await db.commit()
     return json.dumps({
         "success": True,
         "message": f"Appointment with Dr. {doctor_name} deleted successfully."
@@ -250,7 +252,7 @@ class AIBackend:
         self.tools = [search_doctor, book_appointment, get_bd_time, search_appointment_by_phone, delete_appointment]
         self.tool_node = ToolNode(self.tools)
         self.llm_with_tools = self.llm.bind_tools(self.tools)
     async def async_setup(self):
         db_path = os.path.join(os.path.dirname(__file__), "daa.db")
         self.conn = await aiosqlite.connect(db_path)
@@ -258,7 +260,7 @@ class AIBackend:
         await self._create_user_table()
         self.graph = self._build_graph()
         self.summary_graph = self._build_summary_graph()
     async def _create_user_table(self):
         await self.conn.execute("""
             CREATE TABLE IF NOT EXISTS userid_threadid (
@@ -267,7 +269,7 @@ class AIBackend:
             )
             """)
         await self.conn.commit()
     ######################### SUMMARIZE NODE #########################
     async def summarize_conversation(self, state: ChatState):
         existing_summary = state.get("summary", "")
@@ -275,12 +277,12 @@ class AIBackend:
         prompt = (
                 f"""
                 You are maintaining a long-term conversation memory for a chatbot.
                 Existing summary:
                 {existing_summary}
                 Update and extend the summary using ONLY the new conversation messages above.
                 Instructions:
                 - Preserve important existing context.
                 - Add new facts, decisions, preferences, goals, issues, and ongoing tasks.
@@ -297,9 +299,9 @@ class AIBackend:
                     else
                     """
                 You are creating a long-term conversation memory summary for a chatbot.
                 Summarize the conversation above.
                 Instructions:
                 - Capture important user information, goals, preferences, projects, and decisions.
                 - Include technical issues, debugging progress, and solutions discussed.
@@ -315,37 +317,40 @@ class AIBackend:
             "summary": response.content,
             "messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
         }
     async def should_summarize(self, state: ChatState):
         if len(state["messages"]) > 10:
             return "summarize_node"
         return "chat_node"
     ######################### CHAT NODE #########################
     async def chat_node(self, state: ChatState):
         summary = state.get("summary", "")
         messages = state["messages"]
         print('#'*50)
         print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
         if summary:
             print(f"[SUMMARY]:\n{summary}\n")
         else:
             print("[NO SUMMARY YET]\n")
         print('$'*50)
         print("[MESSAGES]:")
         for m in messages:
             role = m.__class__.__name__
             print(f"  [{role}]: {m.content[:200]}")
         print('$'*50,'\n')
         if summary:
             summary_message = SystemMessage(
                 content=(
-                        "You are provided with a condensed memory of previous conversations.\n\n"
                         f"Conversation Memory:\n{summary}\n\n"
                         "Instructions:\n"
                         "- Use this memory as long-term conversational context.\n"
                         "- Maintain continuity with the user's previous discussions, projects, goals, and preferences.\n"
                         "- Prioritize recent and relevant information when generating responses.\n"
@@ -361,32 +366,32 @@ class AIBackend:
         print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
         print('#'*50)
         return {"messages": [response]}
     ######################### GRAPH #########################
     def _build_graph(self):
         g = StateGraph(ChatState)
         g.add_node("chat_node", self.chat_node)
         g.add_node("tools", self.tool_node)
         g.add_edge(START, "chat_node")
         g.add_conditional_edges("chat_node", tools_condition)
         g.add_edge("tools", "chat_node")
         return g.compile(checkpointer=self.checkpointer)
     def _build_summary_graph(self):
         g = StateGraph(ChatState)
         g.add_node("summarize_node", self.summarize_conversation)
         g.add_edge(START, "summarize_node")
         g.add_edge("summarize_node", END)
         return g.compile(checkpointer=self.checkpointer)
     ######################### STREAMING #########################
     async def ai_only_stream(self, initial_state: dict, config: dict):
         async for message_chunk, metadata in self.graph.astream(initial_state, config=config, stream_mode="messages"):
             if isinstance(message_chunk, AIMessage) and message_chunk.content:
                 yield message_chunk.content
         # Auto Summarization Execute
         current_state = await self.graph.aget_state(config)
         if len(current_state.values.get("messages", [])) > 10:
@@ -394,26 +399,26 @@ class AIBackend:
                 self.summary_graph.ainvoke(current_state.values, config=config)
             )
             print('@'*20,'Summarization Execute','@'*20)
     ######################### THREAD ID #########################
     @staticmethod
     def generate_thread_id() -> str:
         return str(uuid.uuid4())
     ######################### RETRIEVE ALL THREADS #########################
     async def retrieve_all_threads(self):
         all_threads = set()
         async for checkpoint in self.checkpointer.alist(None):
             all_threads.add(checkpoint.config["configurable"]["thread_id"])
         return list(all_threads)
     ######################### MAIN ENTRY POINT #########################
     async def main(self, user_id: str, user_query: str):
         async with self.conn.execute(
             "SELECT userId, threadId FROM userid_threadid WHERE userId = ?", (user_id,)
         ) as cursor:
             result = await cursor.fetchone()
         if result is None:
             thread_id = user_id + self.generate_thread_id()
             await self.conn.execute(
@@ -423,11 +428,11 @@ class AIBackend:
             await self.conn.commit()
         else:
             thread_id = result[1]
         initial_state = {"messages": [HumanMessage(content=user_query)]}
         config = {
             "configurable": {"thread_id": thread_id},
             "metadata": {"thread_id": thread_id},
             "run_name": "chat_turn",
         }
-        return self.ai_only_stream(initial_state, config)

     summary: str
 ######################### TOOLS #########################
+# After imports, before STATE class
 def get_db_path():
     return os.path.join(os.path.dirname(__file__), "daa.db")
     query = "SELECT * FROM doctors WHERE 1=1"
     params = []
     conditions = []
     if name:
         conditions.append("LOWER(doctor_name) LIKE ?")
         params.append(f"%{name.lower()}%")
     if category:
         conditions.append("LOWER(category) LIKE ?")
         params.append(f"%{category.lower()}%")
     if visiting_days:
         conditions.append("LOWER(visiting_days) LIKE ?")
         params.append(f"%{visiting_days.lower()}%")
     if conditions:
         query += " AND (" + " OR ".join(conditions) + ")"
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute(query, params)
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No doctors found matching your search.",
             "data": []
         })
     return json.dumps({
         "success": True,
         "count": len(rows),
     """
     db_path = get_db_path()
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         cursor = await db.execute("""
             SELECT * FROM patients
             WHERE patient_num = ?
             ORDER BY visiting_date ASC
         """, (patient_num,))
         rows = await cursor.fetchall()
     if not rows:
         return json.dumps({
             "success": False,
             "message": "No appointments found for this phone number.",
             "data": []
         })
     return json.dumps({
         "success": True,
         "count": len(rows),
 async def book_appointment(doctor_id: int, patient_name: str, patient_age: str, patient_num: str, visiting_date: str) -> str:
     """
     Book a doctor appointment and save it to the patients table.
     Args:
         doctor_id: Doctor's ID from search_doctor results.
         patient_name: Full name of the patient.
         patient_age: Age of the patient (e.g. "32").
         patient_num: Contact phone number of the patient.
         visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
     Returns a booking confirmation with the new record ID.
     """
     db_path = get_db_path()
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         patient_num = format_bd_number(patient_num)
         # Verify doctor exists
         cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
         doctor = await cursor.fetchone()
         if not doctor:
             return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
         doctor_data = dict(doctor)
         doctor_name = doctor_data.get("doctor_name", "Unknown")
         doctor_category = doctor_data.get("doctor_category", "Unknown")
         # Check for conflicting booking (same doctor + same date)
         cursor = await db.execute(
             """SELECT id FROM patients
                 f"A booking for {patient_name} with Dr. {doctor_name} "
                 f"on {visiting_date} already exists."
             )
         # Insert into patients table
         cursor = await db.execute(
             """INSERT INTO patients (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
             (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
         )
         await db.commit()
     # Send SMS confirmation
     sms_message = (
         f"✅ Appointment Confirmed!\n"
     #     sms_status = "📱 SMS confirmation sent."
     # except Exception as e:
     #     sms_status = f"⚠️ SMS failed: {str(e)}"
     return (
         f"✅ Appointment Booked!\n"
         f"━━━━━━━━━━━━━━━━━━━━━━\n"
     db_path = get_db_path()
     # normalize phone number
     patient_num = format_bd_number(patient_num)
     async with aiosqlite.connect(db_path) as db:
         db.row_factory = aiosqlite.Row
         # check if appointment exists first
         cursor = await db.execute("""
             SELECT * FROM patients
             WHERE patient_num = ?
             AND LOWER(doctor_name) = LOWER(?)
         """, (patient_num, doctor_name))
         row = await cursor.fetchone()
         if not row:
             return json.dumps({
                 "success": False,
                 "message": "No matching appointment found to delete."
             })
         # delete appointment
         await db.execute("""
             DELETE FROM patients
             WHERE patient_num = ?
             AND LOWER(doctor_name) = LOWER(?)
         """, (patient_num, doctor_name))
         await db.commit()
     return json.dumps({
         "success": True,
         "message": f"Appointment with Dr. {doctor_name} deleted successfully."
         self.tools = [search_doctor, book_appointment, get_bd_time, search_appointment_by_phone, delete_appointment]
         self.tool_node = ToolNode(self.tools)
         self.llm_with_tools = self.llm.bind_tools(self.tools)
     async def async_setup(self):
         db_path = os.path.join(os.path.dirname(__file__), "daa.db")
         self.conn = await aiosqlite.connect(db_path)
         await self._create_user_table()
         self.graph = self._build_graph()
         self.summary_graph = self._build_summary_graph()
     async def _create_user_table(self):
         await self.conn.execute("""
             CREATE TABLE IF NOT EXISTS userid_threadid (
             )
             """)
         await self.conn.commit()
     ######################### SUMMARIZE NODE #########################
     async def summarize_conversation(self, state: ChatState):
         existing_summary = state.get("summary", "")
         prompt = (
                 f"""
                 You are maintaining a long-term conversation memory for a chatbot.
                 Existing summary:
                 {existing_summary}
                 Update and extend the summary using ONLY the new conversation messages above.
                 Instructions:
                 - Preserve important existing context.
                 - Add new facts, decisions, preferences, goals, issues, and ongoing tasks.
                     else
                     """
                 You are creating a long-term conversation memory summary for a chatbot.
                 Summarize the conversation above.
                 Instructions:
                 - Capture important user information, goals, preferences, projects, and decisions.
                 - Include technical issues, debugging progress, and solutions discussed.
             "summary": response.content,
             "messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
         }
     async def should_summarize(self, state: ChatState):
         if len(state["messages"]) > 10:
             return "summarize_node"
         return "chat_node"
     ######################### CHAT NODE #########################
     async def chat_node(self, state: ChatState):
         summary = state.get("summary", "")
         messages = state["messages"]
         print('#'*50)
         print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
         if summary:
             print(f"[SUMMARY]:\n{summary}\n")
         else:
             print("[NO SUMMARY YET]\n")
         print('$'*50)
         print("[MESSAGES]:")
         for m in messages:
             role = m.__class__.__name__
             print(f"  [{role}]: {m.content[:200]}")
         print('$'*50,'\n')
         if summary:
             summary_message = SystemMessage(
                 content=(
+                        "You are a Bangla voice assistant. You are provided with a condensed memory of previous conversations.\n\n"
                         f"Conversation Memory:\n{summary}\n\n"
                         "Instructions:\n"
+                        "- Always respond in Bangla (বাংলা)"
+                        "- Keep sentences short for speech"
+                        "- No English unless necessary"
                         "- Use this memory as long-term conversational context.\n"
                         "- Maintain continuity with the user's previous discussions, projects, goals, and preferences.\n"
                         "- Prioritize recent and relevant information when generating responses.\n"
         print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
         print('#'*50)
         return {"messages": [response]}
     ######################### GRAPH #########################
     def _build_graph(self):
         g = StateGraph(ChatState)
         g.add_node("chat_node", self.chat_node)
         g.add_node("tools", self.tool_node)
         g.add_edge(START, "chat_node")
         g.add_conditional_edges("chat_node", tools_condition)
         g.add_edge("tools", "chat_node")
         return g.compile(checkpointer=self.checkpointer)
     def _build_summary_graph(self):
         g = StateGraph(ChatState)
         g.add_node("summarize_node", self.summarize_conversation)
         g.add_edge(START, "summarize_node")
         g.add_edge("summarize_node", END)
         return g.compile(checkpointer=self.checkpointer)
     ######################### STREAMING #########################
     async def ai_only_stream(self, initial_state: dict, config: dict):
         async for message_chunk, metadata in self.graph.astream(initial_state, config=config, stream_mode="messages"):
             if isinstance(message_chunk, AIMessage) and message_chunk.content:
                 yield message_chunk.content
         # Auto Summarization Execute
         current_state = await self.graph.aget_state(config)
         if len(current_state.values.get("messages", [])) > 10:
                 self.summary_graph.ainvoke(current_state.values, config=config)
             )
             print('@'*20,'Summarization Execute','@'*20)
     ######################### THREAD ID #########################
     @staticmethod
     def generate_thread_id() -> str:
         return str(uuid.uuid4())
     ######################### RETRIEVE ALL THREADS #########################
     async def retrieve_all_threads(self):
         all_threads = set()
         async for checkpoint in self.checkpointer.alist(None):
             all_threads.add(checkpoint.config["configurable"]["thread_id"])
         return list(all_threads)
     ######################### MAIN ENTRY POINT #########################
     async def main(self, user_id: str, user_query: str):
         async with self.conn.execute(
             "SELECT userId, threadId FROM userid_threadid WHERE userId = ?", (user_id,)
         ) as cursor:
             result = await cursor.fetchone()
         if result is None:
             thread_id = user_id + self.generate_thread_id()
             await self.conn.execute(
             await self.conn.commit()
         else:
             thread_id = result[1]
         initial_state = {"messages": [HumanMessage(content=user_query)]}
         config = {
             "configurable": {"thread_id": thread_id},
             "metadata": {"thread_id": thread_id},
             "run_name": "chat_turn",
         }
+        return self.ai_only_stream(initial_state, config)

frontend/index.html CHANGED Viewed

@@ -44,4 +44,4 @@
 <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
 <script src="script.js"></script>
 </body>
-</html>

 <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
 <script src="script.js"></script>
 </body>
+</html>

frontend/script.js CHANGED Viewed

@@ -1,166 +1,294 @@
-const chatBox = document.getElementById("chat-box");
-const sendBtn = document.getElementById("send-btn");
-const textInput = document.getElementById("text-input");
-const micBtn = document.getElementById("mic-btn");
-const userId = "walid";
-// =======================
-// CHAT WEBSOCKET
-// =======================
-const chatSocket = new WebSocket("ws://127.0.0.1:8679/ws/chat");
-chatSocket.onmessage = (event) => {
-    const data = event.data;
-    if (data === "[[END]]") {
-        return;
-    }
-    appendMessage(data, "ai");
-};
-sendBtn.onclick = () => {
-    sendTextMessage();
-};
-textInput.addEventListener("keydown", (e) => {
-    if (e.key === "Enter") {
-        sendTextMessage();
-    }
 });
 function sendTextMessage() {
-    const message = textInput.value.trim();
-    if (!message) return;
-    appendMessage(message, "user");
-    chatSocket.send(JSON.stringify({
-        user_id: userId,
-        user_query: message
-    }));
-    textInput.value = "";
 }
-// =======================
-// VOICE WEBSOCKET
-// =======================
-const voiceSocket = new WebSocket("ws://127.0.0.1:8679/ws/voice");
-voiceSocket.binaryType = "arraybuffer";
-let mediaRecorder;
-let audioChunks = [];
-let isRecording = false;
-voiceSocket.onmessage = async (event) => {
-    // TEXT MESSAGE
-    if (typeof event.data === "string") {
-        const text = event.data;
-        if (text.startsWith("[STT]:")) {
-            appendMessage("🎤 " + text.replace("[STT]:", ""), "user");
-        }
-        else if (text.startsWith("[LLM]:")) {
-            appendMessage(
-                text.replace("[LLM]:", ""),
-                "ai"
-            );
-        }
-        return;
     }
-    // AUDIO MESSAGE
-    const audioBlob = new Blob([event.data], { type: "audio/mp3" });
-    const audioUrl = URL.createObjectURL(audioBlob);
-    const audio = new Audio(audioUrl);
-    audio.play();
-};
-micBtn.onclick = async () => {
-    if (!isRecording) {
-        startRecording();
     } else {
-        stopRecording();
     }
-};
-async function startRecording() {
-    const stream = await navigator.mediaDevices.getUserMedia({
-        audio: true
-    });
-    mediaRecorder = new MediaRecorder(stream, {
-        mimeType: "audio/webm"
-    });
-    mediaRecorder.start(250);
-    mediaRecorder.ondataavailable = async (event) => {
-        if (event.data.size > 0 &&
-            voiceSocket.readyState === WebSocket.OPEN) {
-            const arrayBuffer = await event.data.arrayBuffer();
-            voiceSocket.send(arrayBuffer);
-        }
     };
-    isRecording = true;
-    micBtn.innerText = "⏹ Stop Voice";
-    micBtn.classList.add("recording");
 }
-function stopRecording() {
-    mediaRecorder.stop();
-    isRecording = false;
-    micBtn.innerText = "🎤 Start Voice";
-    micBtn.classList.remove("recording");
 }
-// =======================
-// UI
-// =======================
 function appendMessage(text, sender) {
-    const div = document.createElement("div");
-    div.classList.add("message");
-    div.classList.add(sender);
-    div.innerHTML = marked.parse(text);
-    chatBox.appendChild(div);
-    chatBox.scrollTop = chatBox.scrollHeight;
-}

+const chatBox = document.getElementById('chat-box');
+const sendBtn = document.getElementById('send-btn');
+const textInput = document.getElementById('text-input');
+const micBtn = document.getElementById('mic-btn');
+const userId = 'walid';
+// ── WebSockets ────────────────────────────────────────────────────────────────
+const chatSocket = new WebSocket('ws://127.0.0.1:8679/ws/chat');
+const voiceSocket = new WebSocket('ws://127.0.0.1:8679/ws/voice');
+voiceSocket.binaryType = 'arraybuffer';
+// ── State ─────────────────────────────────────────────────────────────────────
+let micStream = null;
+let audioContext = null;
+let analyser = null;
+let mediaRecorder = null;
+let audioChunks = [];
+let isListening = false;
+let isSpeaking = false;
+let silenceTimer = null;
+let vadInterval = null;
+let isProcessing = false; // true while server is processing an utterance
+let currentAIMessage = null;
+let playbackChain = Promise.resolve();
+// ── VAD config ────────────────────────────────────────────────────────────────
+const SILENCE_THRESHOLD_DB = -45; // dBFS; lower = more sensitive
+const SILENCE_TIMEOUT_MS = 3000; // 3 s silence → send utterance
+const VAD_POLL_MS = 100;
+// ── Text chat ─────────────────────────────────────────────────────────────────
+sendBtn.onclick = sendTextMessage;
+textInput.addEventListener('keydown', (e) => {
+  if (e.key === 'Enter') sendTextMessage();
 });
 function sendTextMessage() {
+  const msg = textInput.value.trim();
+  if (!msg) return;
+  appendMessage(msg, 'user');
+  chatSocket.send(JSON.stringify({ user_id: userId, user_query: msg }));
+  textInput.value = '';
+}
+// Chat WS now sends JSON: {"type":"chat","text":"..."} or {"type":"end"}
+chatSocket.onmessage = (e) => {
+  let msg;
+  try {
+    msg = JSON.parse(e.data);
+  } catch {
+    return;
+  }
+  if (msg.type === 'chat' && msg.text) appendMessage(msg.text, 'ai');
+  if (msg.type === 'error') appendMessage('⚠️ ' + msg.text, 'system');
+  // 'end' — nothing to do for text chat
+};
+chatSocket.onerror = (e) => console.error('Chat WS error:', e);
+chatSocket.onclose = () => console.log('Chat WS closed');
+// ── Voice WebSocket events ────────────────────────────────────────────────────
+voiceSocket.onopen = () => console.log('[WS] Voice connected');
+voiceSocket.onclose = () => {
+  console.log('[WS] Voice closed');
+  stopListening();
+};
+voiceSocket.onerror = (e) => console.error('[WS] Voice error:', e);
+voiceSocket.onmessage = (event) => {
+  // Binary → audio playback
+  if (event.data instanceof ArrayBuffer) {
+    enqueueAudio(event.data);
+    return;
+  }
+  let msg;
+  try {
+    msg = JSON.parse(event.data);
+  } catch {
+    return;
+  }
+  switch (msg.type) {
+    case 'stt':
+      // Show transcribed Bangla text as user bubble
+      appendMessage('🎤 ' + msg.text, 'user');
+      currentAIMessage = null;
+      break;
+    case 'llm_token':
+      // Stream AI tokens into growing bubble
+      if (!currentAIMessage) currentAIMessage = appendMessage('', 'ai');
+      currentAIMessage.textContent += msg.token;
+      chatBox.scrollTop = chatBox.scrollHeight;
+      break;
+    case 'end':
+      // Server finished this turn → resume VAD listening
+      currentAIMessage = null;
+      isProcessing = false;
+      if (isListening) setMicStatus('listening');
+      break;
+    case 'error':
+      appendMessage('⚠️ ' + msg.text, 'system');
+      // Still need to reset so VAD resumes
+      isProcessing = false;
+      if (isListening) setMicStatus('listening');
+      break;
+    case 'pong':
+      break;
+    default:
+      console.log('[WS] Unknown msg:', msg.type);
+  }
+};
+// ── Audio playback: sequential, no overlap ────────────────────────────────────
+function enqueueAudio(buffer) {
+  playbackChain = playbackChain.then(() => playBuffer(buffer));
+}
+function playBuffer(buffer) {
+  return new Promise((resolve) => {
+    const blob = new Blob([buffer], { type: 'audio/mpeg' });
+    const url = URL.createObjectURL(blob);
+    const audio = new Audio(url);
+    const done = () => {
+      URL.revokeObjectURL(url);
+      resolve();
+    };
+    audio.onended = done;
+    audio.onerror = () => {
+      console.warn('[AUDIO] playback error');
+      done();
+    };
+    audio.play().catch(() => done());
+  });
 }
+// ── Mic button ────────────────────────────────────────────────────────────────
+micBtn.onclick = async () => {
+  if (!isListening) await startListening();
+  else stopListening();
+};
+// ── Start continuous listening with VAD ───────────────────────────────────────
+async function startListening() {
+  try {
+    micStream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        echoCancellation: true,
+        noiseSuppression: true,
+        autoGainControl: true,
+        channelCount: 1,
+        sampleRate: 16000,
+      },
+    });
+  } catch (e) {
+    console.error('Mic error:', e);
+    appendMessage('⚠️ Microphone access denied.', 'system');
+    return;
+  }
+  audioContext = new AudioContext();
+  const source = audioContext.createMediaStreamSource(micStream);
+  analyser = audioContext.createAnalyser();
+  analyser.fftSize = 512;
+  source.connect(analyser);
+  isListening = true;
+  setMicStatus('listening');
+  vadInterval = setInterval(vadTick, VAD_POLL_MS);
+}
+// ── Stop everything ───────────────────────────────────────────────────────────
+function stopListening() {
+  clearInterval(vadInterval);
+  clearTimeout(silenceTimer);
+  vadInterval = silenceTimer = null;
+  if (isSpeaking) stopRecorder(true); // discard in-progress utterance
+  micStream?.getTracks().forEach((t) => t.stop());
+  audioContext?.close();
+  micStream = audioContext = analyser = null;
+  isSpeaking = isListening = isProcessing = false;
+  setMicStatus('off');
+}
+// ── VAD polling ───────────────────────────────────────────────────────────────
+function vadTick() {
+  if (!analyser || isProcessing) return;
+  const data = new Float32Array(analyser.frequencyBinCount);
+  analyser.getFloatTimeDomainData(data);
+  // RMS → dBFS
+  const rms = Math.sqrt(data.reduce((s, v) => s + v * v, 0) / data.length);
+  const db = rms > 0 ? 20 * Math.log10(rms) : -Infinity;
+  const speaking = db > SILENCE_THRESHOLD_DB;
+  if (speaking) {
+    clearTimeout(silenceTimer);
+    silenceTimer = null;
+    if (!isSpeaking) {
+      isSpeaking = true;
+      startRecorder();
+      setMicStatus('recording');
     }
+  } else {
+    if (isSpeaking && !silenceTimer) {
+      silenceTimer = setTimeout(() => {
+        silenceTimer = null;
+        isSpeaking = false;
+        isProcessing = true;
+        stopRecorder(false); // send the utterance
+        setMicStatus('processing');
+      }, SILENCE_TIMEOUT_MS);
+    }
+  }
+}
+// ── Recorder ──────────────────────────────────────────────────────────────────
+function startRecorder() {
+  if (!micStream) return;
+  audioChunks = [];
+  const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
+    ? 'audio/webm;codecs=opus'
+    : 'audio/webm';
+  mediaRecorder = new MediaRecorder(micStream, { mimeType });
+  mediaRecorder.ondataavailable = (e) => {
+    if (e.data.size > 0) audioChunks.push(e.data);
+  };
+  mediaRecorder.onstop = async () => {
+    if (!audioChunks.length) return;
+    const blob = new Blob(audioChunks, { type: mimeType });
+    const buffer = await blob.arrayBuffer();
+    audioChunks = [];
+    if (voiceSocket.readyState === WebSocket.OPEN) {
+      console.log(`[VAD] Sending utterance: ${buffer.byteLength} bytes`);
+      voiceSocket.send(buffer);
     } else {
+      console.warn('[VAD] WS not open, utterance discarded');
+      isProcessing = false;
+      if (isListening) setMicStatus('listening');
     }
+  };
+  mediaRecorder.start();
+}
+function stopRecorder(discard = false) {
+  if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
+  if (discard) {
+    mediaRecorder.ondataavailable = () => {};
+    mediaRecorder.onstop = () => {
+      audioChunks = [];
     };
+  }
+  mediaRecorder.stop();
+  mediaRecorder = null;
 }
+// ── UI ────────────────────────────────────────────────────────────────────────
+function setMicStatus(state) {
+  const labels = {
+    off: '🎤 Start Voice',
+    listening: '🟢 Listening… (click to stop)',
+    recording: '🔴 Speaking…',
+    processing: '⏳ Processing…',
+  };
+  micBtn.innerText = labels[state] ?? '🎤 Start Voice';
+  micBtn.className = state === 'off' ? '' : `mic-${state}`;
 }
 function appendMessage(text, sender) {
+  const div = document.createElement('div');
+  div.className = `message ${sender}`;
+  div.textContent = text;
+  chatBox.appendChild(div);
+  chatBox.scrollTop = chatBox.scrollHeight;
+  return div;
+}

frontend/style.css CHANGED Viewed

@@ -149,4 +149,4 @@ button:hover {
     flex-direction: column;
     padding: 10px;
     gap: 6px;
-}

     flex-direction: column;
     padding: 10px;
     gap: 6px;
+}

requirements.txt CHANGED Viewed

@@ -1,35 +1,60 @@
 python-dotenv
 fastapi
 uvicorn
-requests
 langchain
-langchain-chroma
-langchain-classic
-langchain-community
 langchain-core
 langchain-experimental
-langchain-google-genai
 langchain-huggingface
 langchain-mcp-adapters
-langchain-ollama
-langchain-openai
-langchain-protocol
-langchain-text-splitters
 langgraph
 langgraph-checkpoint
 langgraph-checkpoint-sqlite
-langgraph-prebuilt
-langgraph-sdk
 langsmith
-aiosqlite
-colorama
 faster-whisper
 mcp
-numpy
-ollama
-pydantic
-twilio
-uuid_utils
-uv
-uvicorn

+# ===== Core =====
 python-dotenv
+requests
+numpy
+pydantic
+colorama
+uuid_utils
+# ===== Web Framework =====
 fastapi
 uvicorn
+websockets
+# ===== Async / DB =====
+aiosqlite
+# ===== LangChain Ecosystem =====
 langchain
 langchain-core
+langchain-community
 langchain-experimental
+langchain-text-splitters
+langchain-chroma
+langchain-classic
+langchain-protocol
+langchain-openai
+langchain-ollama
 langchain-huggingface
+langchain-google-genai
 langchain-mcp-adapters
+# ===== LangGraph Ecosystem =====
 langgraph
+langgraph-sdk
+langgraph-prebuilt
 langgraph-checkpoint
 langgraph-checkpoint-sqlite
+# ===== Observability / Tracking =====
 langsmith
+# ===== AI / LLM / STT / TTS =====
+ollama
 faster-whisper
+edge-tts
+google-generativeai
+# ===== Audio / Media =====
+pydub
+Pillow
+# ===== MCP =====
 mcp
+# ===== Utility =====
+uv
+pytz

services/streaming.py ADDED Viewed

	@@ -0,0 +1,262 @@

+# import asyncio
+# import edge_tts
+# # ── Voice ─────────────────────────────────────────────────────────────────────
+# VOICE = "bn-BD-NabanitaNeural"
+# # Flush when buffer reaches this many characters (even without punctuation)
+# FLUSH_LEN = 50
+# # Don't send a TTS request for fewer than this many characters
+# MIN_CHARS = 3
+# # Punctuation marks that trigger an immediate flush
+# FLUSH_TRIGGERS = frozenset(".!?।,;:\n—–")
+# class ParallelTTSStreamer:
+#     """
+#     Collects LLM tokens, splits them into prosodic chunks, and converts each
+#     chunk to audio via edge-tts.
+#     FIX: Audio chunks are now guaranteed to arrive IN ORDER by chaining each
+#     TTS task so it only writes to the queue after the previous task finishes.
+#     This prevents audio chunks from chunk-2 overtaking chunk-1 during playback.
+#     """
+#     def __init__(self, voice: str = VOICE):
+#         self.voice   = voice
+#         self.buffer  = ""
+#         self.queue: asyncio.Queue[bytes | None] = asyncio.Queue()
+#         # Tracks the last scheduled task so each new task waits for it first
+#         self._prev_task: asyncio.Task | None = None
+#         self._flush_lock = asyncio.Lock()
+#     async def add_token(self, token: str) -> None:
+#         """Feed a single LLM output token into the streamer."""
+#         if not token:
+#             return
+#         self.buffer += token
+#         should_flush = (
+#             any(ch in FLUSH_TRIGGERS for ch in token)
+#             or len(self.buffer) >= FLUSH_LEN
+#         )
+#         if should_flush:
+#             await self._schedule_flush()
+#     async def _schedule_flush(self) -> None:
+#         """Snapshot the buffer and schedule an ordered TTS task."""
+#         async with self._flush_lock:
+#             text = self.buffer.strip()
+#             self.buffer = ""
+#         if len(text) < MIN_CHARS:
+#             return
+#         # Each task waits for the previous one before pushing to queue,
+#         # guaranteeing in-order audio delivery.
+#         prev = self._prev_task
+#         task = asyncio.create_task(self._tts_ordered(text, prev))
+#         self._prev_task = task
+#     async def _tts_ordered(self, text: str, wait_for: asyncio.Task | None) -> None:
+#         """
+#         1. First synthesise audio bytes (can run in parallel with other chunks).
+#         2. Then wait for the previous chunk to finish writing to queue.
+#         3. Then push our bytes to the queue in order.
+#         """
+#         # Step 1: synthesise concurrently (no queue writes yet)
+#         audio_chunks: list[bytes] = []
+#         try:
+#             communicate = edge_tts.Communicate(text, self.voice)
+#             async for chunk in communicate.stream():
+#                 if chunk["type"] == "audio":
+#                     audio_chunks.append(chunk["data"])
+#         except Exception as e:
+#             print(f"[TTS] edge-tts error for '{text[:30]}…': {e}")
+#             # Still need to chain — wait for prev even on error
+#             if wait_for and not wait_for.done():
+#                 await wait_for
+#             return
+#         # Step 2: wait for the previous chunk to have finished queuing
+#         if wait_for and not wait_for.done():
+#             try:
+#                 await wait_for
+#             except Exception:
+#                 pass
+#         # Step 3: push our audio bytes in order
+#         for data in audio_chunks:
+#             await self.queue.put(data)
+#     async def flush(self) -> None:
+#         """
+#         Flush remaining buffer, wait for all in-flight TTS tasks, then
+#         signal end-of-stream with sentinel None.
+#         """
+#         await self._schedule_flush()
+#         # Wait for the last chained task (which transitively waits for all)
+#         if self._prev_task:
+#             try:
+#                 await self._prev_task
+#             except Exception:
+#                 pass
+#         await self.queue.put(None)
+#     async def stream_audio(self):
+#         """
+#         Async generator that yields audio bytes in order.
+#         Stops when the sentinel None is received.
+#         """
+#         while True:
+#             chunk = await self.queue.get()
+#             if chunk is None:
+#                 break
+#             yield chunk
+import re
+import asyncio
+import edge_tts
+VOICE = "bn-BD-NabanitaNeural"
+FLUSH_LEN     = 80        # chars before forced flush (longer = more natural speech)
+MIN_CHARS     = 5         # don't TTS tiny fragments
+FLUSH_TRIGGERS = frozenset(".!?।,;:\n—–")
+def _clean_for_tts(text: str) -> str:
+    """
+    Strip markdown and other non-speech symbols before sending to edge-tts.
+    The LLM outputs markdown (**, *, #, -, numbers+dot lists) which edge-tts
+    either reads aloud awkwardly ("asterisk asterisk") or returns 'No audio
+    was received' on punctuation-only chunks like '**' or '-)'.
+    """
+    # Remove bold/italic markers
+    text = re.sub(r'\*{1,3}', '', text)
+    # Remove heading markers
+    text = re.sub(r'#+\s*', '', text)
+    # Remove list markers like "১.", "1.", "-", "•"
+    text = re.sub(r'^\s*[-•]\s*', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*[\d০-৯]+[.)]\s*', '', text, flags=re.MULTILINE)
+    # Remove leftover backticks
+    text = re.sub(r'`+', '', text)
+    # Collapse extra whitespace / blank lines
+    text = re.sub(r'\n{2,}', '\n', text)
+    text = text.strip()
+    return text
+class ParallelTTSStreamer:
+    """
+    Collects LLM tokens → splits into prosodic chunks → converts to audio
+    via edge-tts in parallel → streams audio bytes IN ORDER.
+    Audio ordering is guaranteed by a task chain: each chunk task synthesises
+    audio freely (parallel) but only writes to the queue after the previous
+    chunk finishes, so the client always hears chunk-1 before chunk-2.
+    """
+    def __init__(self, voice: str = VOICE):
+        self.voice      = voice
+        self.buffer     = ""
+        self.queue: asyncio.Queue[bytes | None] = asyncio.Queue()
+        self._prev_task: asyncio.Task | None = None
+        self._flush_lock = asyncio.Lock()
+    async def add_token(self, token: str) -> None:
+        if not token:
+            return
+        self.buffer += token
+        if any(ch in FLUSH_TRIGGERS for ch in token) or len(self.buffer) >= FLUSH_LEN:
+            await self._schedule_flush()
+    async def _schedule_flush(self) -> None:
+        async with self._flush_lock:
+            raw  = self.buffer.strip()
+            self.buffer = ""
+        text = _clean_for_tts(raw)
+        if len(text) < MIN_CHARS:
+            return
+        prev = self._prev_task
+        task = asyncio.create_task(self._tts_ordered(text, prev))
+        self._prev_task = task
+    async def _tts_ordered(self, text: str, wait_for: asyncio.Task | None) -> None:
+        """Synthesise audio (parallel), then write to queue in order."""
+        # Step 1 — synthesise concurrently
+        audio_chunks: list[bytes] = []
+        try:
+            communicate = edge_tts.Communicate(text, self.voice)
+            async for chunk in communicate.stream():
+                if chunk["type"] == "audio":
+                    audio_chunks.append(chunk["data"])
+        except Exception as e:
+            print(f"[TTS] edge-tts error for '{text[:40]}': {e}")
+        # Step 2 — wait for previous chunk to finish queuing
+        if wait_for and not wait_for.done():
+            try:
+                await wait_for
+            except Exception:
+                pass
+        # Step 3 — write to queue in order
+        for data in audio_chunks:
+            await self.queue.put(data)
+    async def flush(self) -> None:
+        """Flush remaining buffer, await all tasks, send end sentinel."""
+        await self._schedule_flush()
+        if self._prev_task:
+            try:
+                await self._prev_task
+            except Exception:
+                pass
+        await self.queue.put(None)
+    async def stream_audio(self):
+        while True:
+            chunk = await self.queue.get()
+            if chunk is None:
+                break
+            yield chunk

services/stt.py CHANGED Viewed

@@ -1,73 +1,148 @@
-# from faster_whisper import WhisperModel
-# import tempfile
-# model = WhisperModel("small", device="cpu", compute_type="int8")
-# class StreamingSTT:
-#     def __init__(self):
-#         self.audio_buffer = bytearray()
-#     def add_audio(self, chunk: bytes):
-#         self.audio_buffer.extend(chunk)
-#     def transcribe_if_ready(self):
-#         # simple chunk trigger (1.5–3 sec buffer recommended)
-#         if len(self.audio_buffer) < 48000 * 2 * 2:
-#             return None
-#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as f:
-#             f.write(self.audio_buffer)
-#             f.flush()
-#             segments, _ = model.transcribe(f.name, language="bn", task="translate", beam_size=1)
-#             text = " ".join([s.text for s in segments])
-#         self.audio_buffer.clear()
-#         return text
-from faster_whisper import WhisperModel
-import tempfile
-model = WhisperModel(
-    "small",
-    device="cpu",
-    compute_type="int8"
-)
-class StreamingSTT:
-    def __init__(self):
-        self.audio_buffer = bytearray()
-    def add_audio(self, chunk: bytes):
-        self.audio_buffer.extend(chunk)
-    def transcribe_if_ready(self):
-        # wait enough audio
-        if len(self.audio_buffer) < 50000:
             return None
-        # SAVE AS WEBM
-        with tempfile.NamedTemporaryFile(
-            suffix=".webm",
-            delete=True
-        ) as f:
-            f.write(self.audio_buffer)
-            f.flush()
-            segments, _ = model.transcribe(
-                f.name,
-                language="bn",
-                beam_size=1
-            )
-            text = " ".join(
-                [segment.text for segment in segments]
             )
-        self.audio_buffer.clear()
-        return text.strip()

+import os
+import re
+import subprocess
+import tempfile
+from faster_whisper import WhisperModel
+model = WhisperModel("large-v3", device="cuda", compute_type="int8_float32")
+BANGLA_PATTERN = re.compile(r'[\u0980-\u09FF]')
+# Scripts we consider "wrong" — Arabic, Urdu, Devanagari (when expecting Bangla)
+WRONG_SCRIPT_PATTERN = re.compile(
+    r'[\u0600-\u06FF'   # Arabic / Urdu
+    r'\u0750-\u077F'   # Arabic Supplement
+    r'\uFB50-\uFDFF'   # Arabic Presentation Forms
+    r'\uFE70-\uFEFF]'  # Arabic Presentation Forms-B
+)
+def _is_valid_bangla(text: str) -> bool:
+    """
+    Return True if the transcript looks like real Bangla.
+    A valid transcript must:
+    1. Contain at least one Bangla Unicode character, OR be very short
+       (some valid responses are single digits/punctuation).
+    2. NOT be dominated by Arabic/Urdu script (Whisper wrong-script error).
+    """
+    bangla_chars  = len(BANGLA_PATTERN.findall(text))
+    wrong_chars   = len(WRONG_SCRIPT_PATTERN.findall(text))
+    total_alpha   = sum(1 for c in text if c.isalpha())
+    if total_alpha == 0:
+        return True   # digits/punctuation only — let it through
+    # If more than 30% of alphabetic chars are Arabic/Urdu script, reject
+    if total_alpha > 0 and (wrong_chars / total_alpha) > 0.30:
+        return False
+    # Must have at least some Bangla characters for long responses
+    if total_alpha > 5 and bangla_chars == 0:
+        return False
+    return True
+class STTProcessor:
+    MIN_INPUT_BYTES = 3_000
+    def _to_wav(self, audio_bytes: bytes) -> str | None:
+        """Convert browser WebM/opus to 16 kHz mono WAV with loudness normalization."""
+        in_path = out_path = None
+        try:
+            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
+                f.write(audio_bytes)
+                in_path = f.name
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                out_path = f.name
+            result = subprocess.run([
+                "ffmpeg", "-y", "-loglevel", "warning",
+                "-i", in_path,
+                "-ar", "16000", "-ac", "1",
+                "-af", "loudnorm",
+                "-f", "wav", out_path,
+            ], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
+            if result.returncode != 0:
+                print("[STT] ffmpeg error:", result.stderr.decode(errors="replace").strip())
+                return None
+            if not os.path.exists(out_path) or os.path.getsize(out_path) < 500:
+                print("[STT] ffmpeg produced empty WAV.")
+                return None
+            print(f"[STT] WAV ready: {os.path.getsize(out_path):,} bytes")
+            return out_path
+        except Exception as e:
+            print(f"[STT] _to_wav: {e}")
+            return None
+        finally:
+            if in_path and os.path.exists(in_path):
+                try: os.remove(in_path)
+                except OSError: pass
+    def transcribe(self, audio_bytes: bytes) -> str | None:
+        if len(audio_bytes) < self.MIN_INPUT_BYTES:
+            print(f"[STT] Too short ({len(audio_bytes)} B), skipping.")
             return None
+        wav_path = None
+        try:
+            wav_path = self._to_wav(audio_bytes)
+            if not wav_path:
+                return None
+            # segments, info = model.transcribe(wav_path, language="bn", task="translate", beam_size=5)
+            segments, info = model.transcribe(
+                wav_path,
+                language="bn",
+                beam_size=5,
+                vad_filter=False,               # loudnorm handles quiet audio
+                condition_on_previous_text=False,
+                temperature=0,
+                suppress_tokens=[-1],
+                no_speech_threshold=0.5,
+                log_prob_threshold=-1.0,
+                # task="translate"
+                # NO initial_prompt — causes hallucination loops on base model
             )
+            text = " ".join(seg.text.strip() for seg in segments).strip()
+            print(f"[STT] Lang={info.language} prob={info.language_probability:.2f}")
+            if not text:
+                print("[STT] Empty transcript.")
+                return None
+            # ── Hallucination guard: repeated words ───────────────────────────
+            words = text.split()
+            if len(words) > 5 and (len(set(words)) / len(words)) < 0.25:
+                print(f"[STT] Hallucination (repetition) discarded: {text[:60]}")
+                return None
+            # ── Script validation: must be Bangla Unicode ─────────────────────
+            if not _is_valid_bangla(text):
+                print(f"[STT] Wrong script (Arabic/Urdu output from base model) "
+                      f"discarded: {text[:60]}")
+                print("[STT] ⚠ If this keeps happening, ensure you're using "
+                      "model='small' not 'base'.")
+                return None
+            print(f"[STT] Transcript: {text}")
+            return text
+        except Exception as e:
+            print(f"[STT] transcribe: {e}")
+            import traceback; traceback.print_exc()
+            return None
+        finally:
+            if wav_path and os.path.exists(wav_path):
+                try: os.remove(wav_path)
+                except OSError: pass

services/tts.py CHANGED Viewed

@@ -1,12 +1,29 @@
 import edge_tts
-import asyncio
-import tempfile
-VOICE = "en-US-AriaNeural"
-async def text_to_speech_stream(text: str):
-    communicate = edge_tts.Communicate(text, VOICE)
-    async for chunk in communicate.stream():
-        if chunk["type"] == "audio":
-            yield chunk["data"]

 import edge_tts
+VOICE = "bn-BD-NabanitaNeural"
+async def text_to_speech_stream(text: str, voice: str = VOICE):
+    """
+    Async generator that converts *text* to Bangla audio and yields
+    raw MP3 bytes chunk-by-chunk as they arrive from edge-tts.
+    Args:
+        text:  The Bangla (or mixed) text to synthesise.
+        voice: edge-tts voice name. Defaults to bn-BD-NabanitaNeural.
+    Yields:
+        bytes — raw MP3 audio data ready to send over WebSocket.
+    """
+    text = text.strip()
+    if not text:
+        return
+    try:
+        communicate = edge_tts.Communicate(text, voice)
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                yield chunk["data"]
+    except Exception as e:
+        print(f"[TTS] text_to_speech_stream error: {e}")

services/vad.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import webrtcvad
+class VADDetector:
+    def __init__(self, sample_rate=16000, frame_ms=30, aggressiveness=2):
+        self.vad = webrtcvad.Vad(aggressiveness)
+        self.sample_rate = sample_rate
+        self.frame_ms = frame_ms
+        self.frame_size = int(sample_rate * frame_ms / 1000) * 2
+    def is_valid(self, frame: bytes):
+        return len(frame) == self.frame_size
+    def is_speech(self, frame: bytes) -> bool:
+        if not self.is_valid(frame):
+            return False
+        try:
+            return self.vad.is_speech(frame, self.sample_rate)
+        except:
+            return False
+class VADSegmenter:
+    def __init__(self, vad: VADDetector, silence_limit=8):
+        self.vad = vad
+        self.silence_limit = silence_limit
+        self.buffer = bytearray()
+        self.silence = 0
+        self.active = False
+    def add_frame(self, frame: bytes):
+        speech = self.vad.is_speech(frame)
+        if speech:
+            self.buffer.extend(frame)
+            self.active = True
+            self.silence = 0
+        else:
+            if self.active:
+                self.silence += 1
+        if self.active and self.silence > self.silence_limit:
+            audio = bytes(self.buffer)
+            self.buffer.clear()
+            self.silence = 0
+            self.active = False
+            return audio
+        return None