Commit ·
ed5b8b8
1
Parent(s): f2ea5fc
structure ready :: rakib
Browse files- .env +8 -8
- .vscode/settings.json +4 -0
- app.py +178 -71
- core/backend.py +58 -53
- frontend/index.html +1 -1
- frontend/script.js +259 -131
- frontend/style.css +1 -1
- requirements.txt +45 -20
- services/streaming.py +262 -0
- services/stt.py +132 -57
- services/tts.py +25 -8
- services/vad.py +50 -0
.env
CHANGED
|
@@ -5,13 +5,13 @@ LANGCHAIN_ENDPOINT='https://api.smith.langchain.com'
|
|
| 5 |
LANGCHAIN_API_KEY='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'
|
| 6 |
LANGCHAIN_PROJECT='Default'
|
| 7 |
|
| 8 |
-
TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
|
| 9 |
-
TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
|
| 10 |
-
TWILIO_PHONE_NUMBER="+14343375085"
|
| 11 |
|
| 12 |
-
LIVEKIT_URL=wss://demo-wqwzjgsv.livekit.cloud
|
| 13 |
-
LIVEKIT_API_KEY=APIesfzMFdhmrb6
|
| 14 |
-
LIVEKIT_API_SECRET=kb7jLghH6Q3qLXxUHoYwREpYJdgX8qgAOHBDOG7q40G
|
| 15 |
|
| 16 |
-
GROQ_API_KEY=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
|
| 17 |
-
CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f
|
|
|
|
| 5 |
LANGCHAIN_API_KEY='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'
|
| 6 |
LANGCHAIN_PROJECT='Default'
|
| 7 |
|
| 8 |
+
# TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
|
| 9 |
+
# TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
|
| 10 |
+
# TWILIO_PHONE_NUMBER="+14343375085"
|
| 11 |
|
| 12 |
+
# LIVEKIT_URL=wss://demo-wqwzjgsv.livekit.cloud
|
| 13 |
+
# LIVEKIT_API_KEY=APIesfzMFdhmrb6
|
| 14 |
+
# LIVEKIT_API_SECRET=kb7jLghH6Q3qLXxUHoYwREpYJdgX8qgAOHBDOG7q40G
|
| 15 |
|
| 16 |
+
# GROQ_API_KEY=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
|
| 17 |
+
# CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-envs.defaultEnvManager": "ms-python.python:conda",
|
| 3 |
+
"python-envs.defaultPackageManager": "ms-python.python:conda"
|
| 4 |
+
}
|
app.py
CHANGED
|
@@ -1,95 +1,202 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
from contextlib import asynccontextmanager
|
| 4 |
-
|
| 5 |
-
from core.backend import AIBackend
|
| 6 |
-
import uvicorn, json, os
|
| 7 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 9 |
-
from
|
| 10 |
-
from services.tts import text_to_speech_stream
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
|
|
|
| 13 |
|
| 14 |
-
chatbot_obj = AIBackend()
|
| 15 |
|
| 16 |
@asynccontextmanager
|
| 17 |
async def lifespan(app: FastAPI):
|
| 18 |
-
await
|
|
|
|
| 19 |
yield
|
| 20 |
-
if
|
| 21 |
-
await
|
|
|
|
| 22 |
|
| 23 |
app = FastAPI(lifespan=lifespan)
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
@app.post("/chat")
|
| 30 |
-
async def chat(request: UserRequest):
|
| 31 |
-
stream = await chatbot_obj.main(
|
| 32 |
-
user_id=request.user_id,
|
| 33 |
-
user_query=request.user_query,
|
| 34 |
-
)
|
| 35 |
-
return StreamingResponse(stream, media_type="text/event-stream")
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
@app.websocket("/ws/chat")
|
| 38 |
-
async def
|
| 39 |
-
await
|
|
|
|
| 40 |
try:
|
| 41 |
while True:
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
await
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
except WebSocketDisconnect:
|
| 61 |
-
print("Client disconnected")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
|
|
|
| 63 |
@app.websocket("/ws/voice")
|
| 64 |
-
async def
|
| 65 |
-
await
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
try:
|
| 68 |
while True:
|
| 69 |
-
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
continue
|
| 77 |
-
await websocket.send_text(f"[STT]: {text}")
|
| 78 |
-
# 🤖 LLM STREAM
|
| 79 |
-
stream = chatbot_obj.main(
|
| 80 |
-
user_id="voice_user",
|
| 81 |
-
user_query=text
|
| 82 |
-
)
|
| 83 |
-
full_response = ""
|
| 84 |
-
async for token in stream:
|
| 85 |
-
full_response += token
|
| 86 |
-
await websocket.send_text(f"[LLM]: {token}")
|
| 87 |
-
# 🔊 TTS STREAM
|
| 88 |
-
async for audio_chunk in text_to_speech_stream(full_response):
|
| 89 |
-
await websocket.send_bytes(audio_chunk)
|
| 90 |
-
await websocket.send_text("[END]")
|
| 91 |
-
except WebSocketDisconnect:
|
| 92 |
-
print("Voice client disconnected")
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import asyncio
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
from contextlib import asynccontextmanager
|
| 6 |
+
|
|
|
|
|
|
|
|
|
|
| 7 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 8 |
+
from fastapi.responses import FileResponse, HTMLResponse
|
|
|
|
| 9 |
from fastapi.staticfiles import StaticFiles
|
| 10 |
+
from starlette.websockets import WebSocketState
|
| 11 |
+
|
| 12 |
+
from core.backend import AIBackend
|
| 13 |
+
from services.stt import STTProcessor
|
| 14 |
+
from services.streaming import ParallelTTSStreamer
|
| 15 |
|
| 16 |
+
ai = AIBackend()
|
| 17 |
|
|
|
|
| 18 |
|
| 19 |
@asynccontextmanager
|
| 20 |
async def lifespan(app: FastAPI):
|
| 21 |
+
await ai.async_setup()
|
| 22 |
+
print("[APP] AI backend ready.")
|
| 23 |
yield
|
| 24 |
+
if hasattr(ai, "conn") and ai.conn:
|
| 25 |
+
await ai.conn.close()
|
| 26 |
+
|
| 27 |
|
| 28 |
app = FastAPI(lifespan=lifespan)
|
| 29 |
|
| 30 |
+
try:
|
| 31 |
+
app.mount("/static", StaticFiles(directory="."), name="static")
|
| 32 |
+
except Exception:
|
| 33 |
+
pass
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@app.get("/")
|
| 37 |
+
async def root():
|
| 38 |
+
if os.path.exists("index.html"):
|
| 39 |
+
return FileResponse("index.html")
|
| 40 |
+
return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 44 |
+
def _ws_open(ws: WebSocket) -> bool:
|
| 45 |
+
"""Return True if the WebSocket connection is still alive."""
|
| 46 |
+
return ws.client_state == WebSocketState.CONNECTED
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
async def _safe_text(ws: WebSocket, payload: dict) -> bool:
|
| 50 |
+
if not _ws_open(ws):
|
| 51 |
+
return False
|
| 52 |
+
try:
|
| 53 |
+
await ws.send_text(json.dumps(payload))
|
| 54 |
+
return True
|
| 55 |
+
except Exception:
|
| 56 |
+
return False
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
async def _safe_bytes(ws: WebSocket, data: bytes) -> bool:
|
| 60 |
+
if not _ws_open(ws):
|
| 61 |
+
return False
|
| 62 |
+
try:
|
| 63 |
+
await ws.send_bytes(data)
|
| 64 |
+
return True
|
| 65 |
+
except Exception:
|
| 66 |
+
return False
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ── Text chat WebSocket ───────────────────────────────────────────────────────
|
| 70 |
@app.websocket("/ws/chat")
|
| 71 |
+
async def ws_chat(ws: WebSocket):
|
| 72 |
+
await ws.accept()
|
| 73 |
+
print("[CHAT] Client connected")
|
| 74 |
try:
|
| 75 |
while True:
|
| 76 |
+
raw = await ws.receive_text()
|
| 77 |
+
try:
|
| 78 |
+
data = json.loads(raw)
|
| 79 |
+
except json.JSONDecodeError:
|
| 80 |
+
await _safe_text(ws, {"type": "error", "text": "Invalid JSON"})
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
user_id = data.get("user_id", "default_user")
|
| 84 |
+
user_query = data.get("user_query", "").strip()
|
| 85 |
+
if not user_query:
|
| 86 |
+
continue
|
| 87 |
+
|
| 88 |
+
full_response = ""
|
| 89 |
+
try:
|
| 90 |
+
stream = await ai.main(user_id, user_query)
|
| 91 |
+
async for token in stream:
|
| 92 |
+
full_response += token
|
| 93 |
+
await _safe_text(ws, {"type": "chat", "text": full_response})
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f"[CHAT] AI error: {e}")
|
| 96 |
+
await _safe_text(ws, {"type": "error", "text": str(e)})
|
| 97 |
+
|
| 98 |
+
await _safe_text(ws, {"type": "end"})
|
| 99 |
+
|
| 100 |
except WebSocketDisconnect:
|
| 101 |
+
print("[CHAT] Client disconnected")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
if "disconnect" not in str(e).lower():
|
| 104 |
+
print(f"[CHAT] WS error: {e}")
|
| 105 |
+
|
| 106 |
|
| 107 |
+
# ── Voice WebSocket ───────────────────────────────────────────────────────────
|
| 108 |
@app.websocket("/ws/voice")
|
| 109 |
+
async def ws_voice(ws: WebSocket):
|
| 110 |
+
await ws.accept()
|
| 111 |
+
print("[VOICE] Client connected")
|
| 112 |
+
|
| 113 |
+
stt = STTProcessor()
|
| 114 |
+
user_id = "voice_user"
|
| 115 |
+
|
| 116 |
try:
|
| 117 |
while True:
|
| 118 |
+
# ── FIX: Check connection state before every receive ──────────────
|
| 119 |
+
# The previous crash "Cannot call receive once a disconnect message
|
| 120 |
+
# has been received" happened because we called ws.receive() after
|
| 121 |
+
# the client had already disconnected. Now we check first.
|
| 122 |
+
if not _ws_open(ws):
|
| 123 |
+
print("[VOICE] Connection dropped, exiting handler.")
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
data = await ws.receive()
|
| 128 |
+
except WebSocketDisconnect:
|
| 129 |
+
print("[VOICE] Client disconnected.")
|
| 130 |
+
break
|
| 131 |
+
except Exception as e:
|
| 132 |
+
# Catches starlette's internal disconnect errors gracefully
|
| 133 |
+
if "disconnect" in str(e).lower():
|
| 134 |
+
print("[VOICE] Client disconnected (recv error).")
|
| 135 |
+
else:
|
| 136 |
+
print(f"[VOICE] Receive error: {e}")
|
| 137 |
+
break
|
| 138 |
+
|
| 139 |
+
# ── Audio blob from VAD ───────────────────────────────────────────
|
| 140 |
+
if "bytes" in data and data["bytes"]:
|
| 141 |
+
audio_bytes = data["bytes"]
|
| 142 |
+
print(f"[VOICE] Received utterance: {len(audio_bytes):,} bytes")
|
| 143 |
+
|
| 144 |
+
# 1. STT — in thread so event loop isn't blocked
|
| 145 |
+
transcript = await asyncio.to_thread(stt.transcribe, audio_bytes)
|
| 146 |
+
|
| 147 |
+
if not transcript:
|
| 148 |
+
await _safe_text(ws, {
|
| 149 |
+
"type": "error",
|
| 150 |
+
"text": "কথা বুঝতে পারিনি, আবার বলুন।"
|
| 151 |
+
})
|
| 152 |
+
# Send 'end' so client's isProcessing resets and VAD resumes
|
| 153 |
+
await _safe_text(ws, {"type": "end"})
|
| 154 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
+
print(f"[VOICE] STT: {transcript}")
|
| 157 |
+
if not await _safe_text(ws, {"type": "stt", "text": transcript}):
|
| 158 |
+
break
|
| 159 |
+
|
| 160 |
+
# 2. AI + TTS pipeline
|
| 161 |
+
tts_streamer = ParallelTTSStreamer()
|
| 162 |
+
|
| 163 |
+
async def run_ai_and_tts():
|
| 164 |
+
try:
|
| 165 |
+
stream = await ai.main(user_id, transcript)
|
| 166 |
+
async for token in stream:
|
| 167 |
+
if not token:
|
| 168 |
+
continue
|
| 169 |
+
if not await _safe_text(ws, {"type": "llm_token", "token": token}):
|
| 170 |
+
break
|
| 171 |
+
await tts_streamer.add_token(token)
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"[VOICE] AI error: {e}")
|
| 174 |
+
finally:
|
| 175 |
+
await tts_streamer.flush()
|
| 176 |
+
|
| 177 |
+
async def stream_tts_audio():
|
| 178 |
+
async for chunk in tts_streamer.stream_audio():
|
| 179 |
+
if not await _safe_bytes(ws, chunk):
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
await asyncio.gather(run_ai_and_tts(), stream_tts_audio())
|
| 183 |
+
|
| 184 |
+
# Signal end of turn → client resumes VAD
|
| 185 |
+
await _safe_text(ws, {"type": "end"})
|
| 186 |
+
|
| 187 |
+
# ── Control messages ──────────────────────────────────────────────
|
| 188 |
+
elif "text" in data and data["text"]:
|
| 189 |
+
try:
|
| 190 |
+
msg = json.loads(data["text"])
|
| 191 |
+
if msg.get("type") == "ping":
|
| 192 |
+
await _safe_text(ws, {"type": "pong"})
|
| 193 |
+
except json.JSONDecodeError:
|
| 194 |
+
pass
|
| 195 |
+
|
| 196 |
+
except WebSocketDisconnect:
|
| 197 |
+
print("[VOICE] Client disconnected (outer)")
|
| 198 |
+
except Exception as e:
|
| 199 |
+
if "disconnect" not in str(e).lower():
|
| 200 |
+
print(f"[VOICE] WS error: {e}")
|
| 201 |
+
finally:
|
| 202 |
+
print("[VOICE] Handler exiting cleanly.")
|
core/backend.py
CHANGED
|
@@ -20,6 +20,7 @@ class ChatState(TypedDict):
|
|
| 20 |
summary: str
|
| 21 |
|
| 22 |
######################### TOOLS #########################
|
|
|
|
| 23 |
def get_db_path():
|
| 24 |
return os.path.join(os.path.dirname(__file__), "daa.db")
|
| 25 |
|
|
@@ -58,34 +59,34 @@ async def search_doctor(name: str = "", category: str = "", visiting_days: str =
|
|
| 58 |
query = "SELECT * FROM doctors WHERE 1=1"
|
| 59 |
params = []
|
| 60 |
conditions = []
|
| 61 |
-
|
| 62 |
if name:
|
| 63 |
conditions.append("LOWER(doctor_name) LIKE ?")
|
| 64 |
params.append(f"%{name.lower()}%")
|
| 65 |
-
|
| 66 |
if category:
|
| 67 |
conditions.append("LOWER(category) LIKE ?")
|
| 68 |
params.append(f"%{category.lower()}%")
|
| 69 |
-
|
| 70 |
if visiting_days:
|
| 71 |
conditions.append("LOWER(visiting_days) LIKE ?")
|
| 72 |
params.append(f"%{visiting_days.lower()}%")
|
| 73 |
-
|
| 74 |
if conditions:
|
| 75 |
query += " AND (" + " OR ".join(conditions) + ")"
|
| 76 |
-
|
| 77 |
async with aiosqlite.connect(db_path) as db:
|
| 78 |
db.row_factory = aiosqlite.Row
|
| 79 |
cursor = await db.execute(query, params)
|
| 80 |
rows = await cursor.fetchall()
|
| 81 |
-
|
| 82 |
if not rows:
|
| 83 |
return json.dumps({
|
| 84 |
"success": False,
|
| 85 |
"message": "No doctors found matching your search.",
|
| 86 |
"data": []
|
| 87 |
})
|
| 88 |
-
|
| 89 |
return json.dumps({
|
| 90 |
"success": True,
|
| 91 |
"count": len(rows),
|
|
@@ -99,25 +100,25 @@ async def search_appointment_by_phone(patient_num: str) -> str:
|
|
| 99 |
"""
|
| 100 |
db_path = get_db_path()
|
| 101 |
patient_num = format_bd_number(patient_num)
|
| 102 |
-
|
| 103 |
async with aiosqlite.connect(db_path) as db:
|
| 104 |
db.row_factory = aiosqlite.Row
|
| 105 |
-
|
| 106 |
cursor = await db.execute("""
|
| 107 |
SELECT * FROM patients
|
| 108 |
WHERE patient_num = ?
|
| 109 |
ORDER BY visiting_date ASC
|
| 110 |
""", (patient_num,))
|
| 111 |
-
|
| 112 |
rows = await cursor.fetchall()
|
| 113 |
-
|
| 114 |
if not rows:
|
| 115 |
return json.dumps({
|
| 116 |
"success": False,
|
| 117 |
"message": "No appointments found for this phone number.",
|
| 118 |
"data": []
|
| 119 |
})
|
| 120 |
-
|
| 121 |
return json.dumps({
|
| 122 |
"success": True,
|
| 123 |
"count": len(rows),
|
|
@@ -128,32 +129,33 @@ async def search_appointment_by_phone(patient_num: str) -> str:
|
|
| 128 |
async def book_appointment(doctor_id: int, patient_name: str, patient_age: str, patient_num: str, visiting_date: str) -> str:
|
| 129 |
"""
|
| 130 |
Book a doctor appointment and save it to the patients table.
|
| 131 |
-
|
| 132 |
Args:
|
| 133 |
doctor_id: Doctor's ID from search_doctor results.
|
| 134 |
patient_name: Full name of the patient.
|
| 135 |
patient_age: Age of the patient (e.g. "32").
|
| 136 |
patient_num: Contact phone number of the patient.
|
| 137 |
visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
|
| 138 |
-
|
| 139 |
Returns a booking confirmation with the new record ID.
|
| 140 |
"""
|
| 141 |
db_path = get_db_path()
|
| 142 |
-
|
| 143 |
async with aiosqlite.connect(db_path) as db:
|
| 144 |
db.row_factory = aiosqlite.Row
|
|
|
|
| 145 |
patient_num = format_bd_number(patient_num)
|
| 146 |
-
|
| 147 |
# Verify doctor exists
|
| 148 |
cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
|
| 149 |
doctor = await cursor.fetchone()
|
| 150 |
if not doctor:
|
| 151 |
return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
|
| 152 |
-
|
| 153 |
doctor_data = dict(doctor)
|
| 154 |
doctor_name = doctor_data.get("doctor_name", "Unknown")
|
| 155 |
doctor_category = doctor_data.get("doctor_category", "Unknown")
|
| 156 |
-
|
| 157 |
# Check for conflicting booking (same doctor + same date)
|
| 158 |
cursor = await db.execute(
|
| 159 |
"""SELECT id FROM patients
|
|
@@ -166,7 +168,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
|
|
| 166 |
f"A booking for {patient_name} with Dr. {doctor_name} "
|
| 167 |
f"on {visiting_date} already exists."
|
| 168 |
)
|
| 169 |
-
|
| 170 |
# Insert into patients table
|
| 171 |
cursor = await db.execute(
|
| 172 |
"""INSERT INTO patients (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
|
|
@@ -174,7 +176,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
|
|
| 174 |
(doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
|
| 175 |
)
|
| 176 |
await db.commit()
|
| 177 |
-
|
| 178 |
# Send SMS confirmation
|
| 179 |
sms_message = (
|
| 180 |
f"✅ Appointment Confirmed!\n"
|
|
@@ -188,7 +190,7 @@ async def book_appointment(doctor_id: int, patient_name: str, patient_age: str,
|
|
| 188 |
# sms_status = "📱 SMS confirmation sent."
|
| 189 |
# except Exception as e:
|
| 190 |
# sms_status = f"⚠️ SMS failed: {str(e)}"
|
| 191 |
-
|
| 192 |
return (
|
| 193 |
f"✅ Appointment Booked!\n"
|
| 194 |
f"━━━━━━━━━━━━━━━━━━━━━━\n"
|
|
@@ -209,33 +211,33 @@ async def delete_appointment(patient_num: str, doctor_name: str) -> str:
|
|
| 209 |
db_path = get_db_path()
|
| 210 |
# normalize phone number
|
| 211 |
patient_num = format_bd_number(patient_num)
|
| 212 |
-
|
| 213 |
async with aiosqlite.connect(db_path) as db:
|
| 214 |
db.row_factory = aiosqlite.Row
|
| 215 |
-
|
| 216 |
# check if appointment exists first
|
| 217 |
cursor = await db.execute("""
|
| 218 |
SELECT * FROM patients
|
| 219 |
WHERE patient_num = ?
|
| 220 |
AND LOWER(doctor_name) = LOWER(?)
|
| 221 |
""", (patient_num, doctor_name))
|
| 222 |
-
|
| 223 |
row = await cursor.fetchone()
|
| 224 |
if not row:
|
| 225 |
return json.dumps({
|
| 226 |
"success": False,
|
| 227 |
"message": "No matching appointment found to delete."
|
| 228 |
})
|
| 229 |
-
|
| 230 |
# delete appointment
|
| 231 |
await db.execute("""
|
| 232 |
DELETE FROM patients
|
| 233 |
WHERE patient_num = ?
|
| 234 |
AND LOWER(doctor_name) = LOWER(?)
|
| 235 |
""", (patient_num, doctor_name))
|
| 236 |
-
|
| 237 |
await db.commit()
|
| 238 |
-
|
| 239 |
return json.dumps({
|
| 240 |
"success": True,
|
| 241 |
"message": f"Appointment with Dr. {doctor_name} deleted successfully."
|
|
@@ -250,7 +252,7 @@ class AIBackend:
|
|
| 250 |
self.tools = [search_doctor, book_appointment, get_bd_time, search_appointment_by_phone, delete_appointment]
|
| 251 |
self.tool_node = ToolNode(self.tools)
|
| 252 |
self.llm_with_tools = self.llm.bind_tools(self.tools)
|
| 253 |
-
|
| 254 |
async def async_setup(self):
|
| 255 |
db_path = os.path.join(os.path.dirname(__file__), "daa.db")
|
| 256 |
self.conn = await aiosqlite.connect(db_path)
|
|
@@ -258,7 +260,7 @@ class AIBackend:
|
|
| 258 |
await self._create_user_table()
|
| 259 |
self.graph = self._build_graph()
|
| 260 |
self.summary_graph = self._build_summary_graph()
|
| 261 |
-
|
| 262 |
async def _create_user_table(self):
|
| 263 |
await self.conn.execute("""
|
| 264 |
CREATE TABLE IF NOT EXISTS userid_threadid (
|
|
@@ -267,7 +269,7 @@ class AIBackend:
|
|
| 267 |
)
|
| 268 |
""")
|
| 269 |
await self.conn.commit()
|
| 270 |
-
|
| 271 |
######################### SUMMARIZE NODE #########################
|
| 272 |
async def summarize_conversation(self, state: ChatState):
|
| 273 |
existing_summary = state.get("summary", "")
|
|
@@ -275,12 +277,12 @@ class AIBackend:
|
|
| 275 |
prompt = (
|
| 276 |
f"""
|
| 277 |
You are maintaining a long-term conversation memory for a chatbot.
|
| 278 |
-
|
| 279 |
Existing summary:
|
| 280 |
{existing_summary}
|
| 281 |
-
|
| 282 |
Update and extend the summary using ONLY the new conversation messages above.
|
| 283 |
-
|
| 284 |
Instructions:
|
| 285 |
- Preserve important existing context.
|
| 286 |
- Add new facts, decisions, preferences, goals, issues, and ongoing tasks.
|
|
@@ -297,9 +299,9 @@ class AIBackend:
|
|
| 297 |
else
|
| 298 |
"""
|
| 299 |
You are creating a long-term conversation memory summary for a chatbot.
|
| 300 |
-
|
| 301 |
Summarize the conversation above.
|
| 302 |
-
|
| 303 |
Instructions:
|
| 304 |
- Capture important user information, goals, preferences, projects, and decisions.
|
| 305 |
- Include technical issues, debugging progress, and solutions discussed.
|
|
@@ -315,37 +317,40 @@ class AIBackend:
|
|
| 315 |
"summary": response.content,
|
| 316 |
"messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
|
| 317 |
}
|
| 318 |
-
|
| 319 |
async def should_summarize(self, state: ChatState):
|
| 320 |
if len(state["messages"]) > 10:
|
| 321 |
return "summarize_node"
|
| 322 |
return "chat_node"
|
| 323 |
-
|
| 324 |
######################### CHAT NODE #########################
|
| 325 |
async def chat_node(self, state: ChatState):
|
| 326 |
summary = state.get("summary", "")
|
| 327 |
messages = state["messages"]
|
| 328 |
-
|
| 329 |
print('#'*50)
|
| 330 |
print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
|
| 331 |
if summary:
|
| 332 |
print(f"[SUMMARY]:\n{summary}\n")
|
| 333 |
else:
|
| 334 |
print("[NO SUMMARY YET]\n")
|
| 335 |
-
|
| 336 |
print('$'*50)
|
| 337 |
print("[MESSAGES]:")
|
| 338 |
for m in messages:
|
| 339 |
role = m.__class__.__name__
|
| 340 |
print(f" [{role}]: {m.content[:200]}")
|
| 341 |
print('$'*50,'\n')
|
| 342 |
-
|
| 343 |
if summary:
|
| 344 |
summary_message = SystemMessage(
|
| 345 |
content=(
|
| 346 |
-
"You are provided with a condensed memory of previous conversations.\n\n"
|
| 347 |
f"Conversation Memory:\n{summary}\n\n"
|
| 348 |
"Instructions:\n"
|
|
|
|
|
|
|
|
|
|
| 349 |
"- Use this memory as long-term conversational context.\n"
|
| 350 |
"- Maintain continuity with the user's previous discussions, projects, goals, and preferences.\n"
|
| 351 |
"- Prioritize recent and relevant information when generating responses.\n"
|
|
@@ -361,32 +366,32 @@ class AIBackend:
|
|
| 361 |
print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
|
| 362 |
print('#'*50)
|
| 363 |
return {"messages": [response]}
|
| 364 |
-
|
| 365 |
######################### GRAPH #########################
|
| 366 |
def _build_graph(self):
|
| 367 |
g = StateGraph(ChatState)
|
| 368 |
g.add_node("chat_node", self.chat_node)
|
| 369 |
g.add_node("tools", self.tool_node)
|
| 370 |
-
|
| 371 |
g.add_edge(START, "chat_node")
|
| 372 |
g.add_conditional_edges("chat_node", tools_condition)
|
| 373 |
g.add_edge("tools", "chat_node")
|
| 374 |
-
|
| 375 |
return g.compile(checkpointer=self.checkpointer)
|
| 376 |
-
|
| 377 |
def _build_summary_graph(self):
|
| 378 |
g = StateGraph(ChatState)
|
| 379 |
g.add_node("summarize_node", self.summarize_conversation)
|
| 380 |
g.add_edge(START, "summarize_node")
|
| 381 |
g.add_edge("summarize_node", END)
|
| 382 |
return g.compile(checkpointer=self.checkpointer)
|
| 383 |
-
|
| 384 |
######################### STREAMING #########################
|
| 385 |
async def ai_only_stream(self, initial_state: dict, config: dict):
|
| 386 |
async for message_chunk, metadata in self.graph.astream(initial_state, config=config, stream_mode="messages"):
|
| 387 |
if isinstance(message_chunk, AIMessage) and message_chunk.content:
|
| 388 |
yield message_chunk.content
|
| 389 |
-
|
| 390 |
# Auto Summarization Execute
|
| 391 |
current_state = await self.graph.aget_state(config)
|
| 392 |
if len(current_state.values.get("messages", [])) > 10:
|
|
@@ -394,26 +399,26 @@ class AIBackend:
|
|
| 394 |
self.summary_graph.ainvoke(current_state.values, config=config)
|
| 395 |
)
|
| 396 |
print('@'*20,'Summarization Execute','@'*20)
|
| 397 |
-
|
| 398 |
######################### THREAD ID #########################
|
| 399 |
@staticmethod
|
| 400 |
def generate_thread_id() -> str:
|
| 401 |
return str(uuid.uuid4())
|
| 402 |
-
|
| 403 |
######################### RETRIEVE ALL THREADS #########################
|
| 404 |
async def retrieve_all_threads(self):
|
| 405 |
all_threads = set()
|
| 406 |
async for checkpoint in self.checkpointer.alist(None):
|
| 407 |
all_threads.add(checkpoint.config["configurable"]["thread_id"])
|
| 408 |
return list(all_threads)
|
| 409 |
-
|
| 410 |
######################### MAIN ENTRY POINT #########################
|
| 411 |
async def main(self, user_id: str, user_query: str):
|
| 412 |
async with self.conn.execute(
|
| 413 |
"SELECT userId, threadId FROM userid_threadid WHERE userId = ?", (user_id,)
|
| 414 |
) as cursor:
|
| 415 |
result = await cursor.fetchone()
|
| 416 |
-
|
| 417 |
if result is None:
|
| 418 |
thread_id = user_id + self.generate_thread_id()
|
| 419 |
await self.conn.execute(
|
|
@@ -423,11 +428,11 @@ class AIBackend:
|
|
| 423 |
await self.conn.commit()
|
| 424 |
else:
|
| 425 |
thread_id = result[1]
|
| 426 |
-
|
| 427 |
initial_state = {"messages": [HumanMessage(content=user_query)]}
|
| 428 |
config = {
|
| 429 |
"configurable": {"thread_id": thread_id},
|
| 430 |
"metadata": {"thread_id": thread_id},
|
| 431 |
"run_name": "chat_turn",
|
| 432 |
}
|
| 433 |
-
return self.ai_only_stream(initial_state, config)
|
|
|
|
| 20 |
summary: str
|
| 21 |
|
| 22 |
######################### TOOLS #########################
|
| 23 |
+
# After imports, before STATE class
|
| 24 |
def get_db_path():
|
| 25 |
return os.path.join(os.path.dirname(__file__), "daa.db")
|
| 26 |
|
|
|
|
| 59 |
query = "SELECT * FROM doctors WHERE 1=1"
|
| 60 |
params = []
|
| 61 |
conditions = []
|
| 62 |
+
|
| 63 |
if name:
|
| 64 |
conditions.append("LOWER(doctor_name) LIKE ?")
|
| 65 |
params.append(f"%{name.lower()}%")
|
| 66 |
+
|
| 67 |
if category:
|
| 68 |
conditions.append("LOWER(category) LIKE ?")
|
| 69 |
params.append(f"%{category.lower()}%")
|
| 70 |
+
|
| 71 |
if visiting_days:
|
| 72 |
conditions.append("LOWER(visiting_days) LIKE ?")
|
| 73 |
params.append(f"%{visiting_days.lower()}%")
|
| 74 |
+
|
| 75 |
if conditions:
|
| 76 |
query += " AND (" + " OR ".join(conditions) + ")"
|
| 77 |
+
|
| 78 |
async with aiosqlite.connect(db_path) as db:
|
| 79 |
db.row_factory = aiosqlite.Row
|
| 80 |
cursor = await db.execute(query, params)
|
| 81 |
rows = await cursor.fetchall()
|
| 82 |
+
|
| 83 |
if not rows:
|
| 84 |
return json.dumps({
|
| 85 |
"success": False,
|
| 86 |
"message": "No doctors found matching your search.",
|
| 87 |
"data": []
|
| 88 |
})
|
| 89 |
+
|
| 90 |
return json.dumps({
|
| 91 |
"success": True,
|
| 92 |
"count": len(rows),
|
|
|
|
| 100 |
"""
|
| 101 |
db_path = get_db_path()
|
| 102 |
patient_num = format_bd_number(patient_num)
|
| 103 |
+
|
| 104 |
async with aiosqlite.connect(db_path) as db:
|
| 105 |
db.row_factory = aiosqlite.Row
|
| 106 |
+
|
| 107 |
cursor = await db.execute("""
|
| 108 |
SELECT * FROM patients
|
| 109 |
WHERE patient_num = ?
|
| 110 |
ORDER BY visiting_date ASC
|
| 111 |
""", (patient_num,))
|
| 112 |
+
|
| 113 |
rows = await cursor.fetchall()
|
| 114 |
+
|
| 115 |
if not rows:
|
| 116 |
return json.dumps({
|
| 117 |
"success": False,
|
| 118 |
"message": "No appointments found for this phone number.",
|
| 119 |
"data": []
|
| 120 |
})
|
| 121 |
+
|
| 122 |
return json.dumps({
|
| 123 |
"success": True,
|
| 124 |
"count": len(rows),
|
|
|
|
| 129 |
async def book_appointment(doctor_id: int, patient_name: str, patient_age: str, patient_num: str, visiting_date: str) -> str:
|
| 130 |
"""
|
| 131 |
Book a doctor appointment and save it to the patients table.
|
| 132 |
+
|
| 133 |
Args:
|
| 134 |
doctor_id: Doctor's ID from search_doctor results.
|
| 135 |
patient_name: Full name of the patient.
|
| 136 |
patient_age: Age of the patient (e.g. "32").
|
| 137 |
patient_num: Contact phone number of the patient.
|
| 138 |
visiting_date: Date of visit in YYYY-MM-DD format (e.g. 2025-06-15).
|
| 139 |
+
|
| 140 |
Returns a booking confirmation with the new record ID.
|
| 141 |
"""
|
| 142 |
db_path = get_db_path()
|
| 143 |
+
|
| 144 |
async with aiosqlite.connect(db_path) as db:
|
| 145 |
db.row_factory = aiosqlite.Row
|
| 146 |
+
|
| 147 |
patient_num = format_bd_number(patient_num)
|
| 148 |
+
|
| 149 |
# Verify doctor exists
|
| 150 |
cursor = await db.execute("SELECT * FROM doctors WHERE id = ?", (doctor_id,))
|
| 151 |
doctor = await cursor.fetchone()
|
| 152 |
if not doctor:
|
| 153 |
return f"No doctor found with ID {doctor_id}. Please search for a doctor first."
|
| 154 |
+
|
| 155 |
doctor_data = dict(doctor)
|
| 156 |
doctor_name = doctor_data.get("doctor_name", "Unknown")
|
| 157 |
doctor_category = doctor_data.get("doctor_category", "Unknown")
|
| 158 |
+
|
| 159 |
# Check for conflicting booking (same doctor + same date)
|
| 160 |
cursor = await db.execute(
|
| 161 |
"""SELECT id FROM patients
|
|
|
|
| 168 |
f"A booking for {patient_name} with Dr. {doctor_name} "
|
| 169 |
f"on {visiting_date} already exists."
|
| 170 |
)
|
| 171 |
+
|
| 172 |
# Insert into patients table
|
| 173 |
cursor = await db.execute(
|
| 174 |
"""INSERT INTO patients (doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date)
|
|
|
|
| 176 |
(doctor_name, doctor_category, patient_name, patient_age, patient_num, visiting_date),
|
| 177 |
)
|
| 178 |
await db.commit()
|
| 179 |
+
|
| 180 |
# Send SMS confirmation
|
| 181 |
sms_message = (
|
| 182 |
f"✅ Appointment Confirmed!\n"
|
|
|
|
| 190 |
# sms_status = "📱 SMS confirmation sent."
|
| 191 |
# except Exception as e:
|
| 192 |
# sms_status = f"⚠️ SMS failed: {str(e)}"
|
| 193 |
+
|
| 194 |
return (
|
| 195 |
f"✅ Appointment Booked!\n"
|
| 196 |
f"━━━━━━━━━━━━━━━━━━━━━━\n"
|
|
|
|
| 211 |
db_path = get_db_path()
|
| 212 |
# normalize phone number
|
| 213 |
patient_num = format_bd_number(patient_num)
|
| 214 |
+
|
| 215 |
async with aiosqlite.connect(db_path) as db:
|
| 216 |
db.row_factory = aiosqlite.Row
|
| 217 |
+
|
| 218 |
# check if appointment exists first
|
| 219 |
cursor = await db.execute("""
|
| 220 |
SELECT * FROM patients
|
| 221 |
WHERE patient_num = ?
|
| 222 |
AND LOWER(doctor_name) = LOWER(?)
|
| 223 |
""", (patient_num, doctor_name))
|
| 224 |
+
|
| 225 |
row = await cursor.fetchone()
|
| 226 |
if not row:
|
| 227 |
return json.dumps({
|
| 228 |
"success": False,
|
| 229 |
"message": "No matching appointment found to delete."
|
| 230 |
})
|
| 231 |
+
|
| 232 |
# delete appointment
|
| 233 |
await db.execute("""
|
| 234 |
DELETE FROM patients
|
| 235 |
WHERE patient_num = ?
|
| 236 |
AND LOWER(doctor_name) = LOWER(?)
|
| 237 |
""", (patient_num, doctor_name))
|
| 238 |
+
|
| 239 |
await db.commit()
|
| 240 |
+
|
| 241 |
return json.dumps({
|
| 242 |
"success": True,
|
| 243 |
"message": f"Appointment with Dr. {doctor_name} deleted successfully."
|
|
|
|
| 252 |
self.tools = [search_doctor, book_appointment, get_bd_time, search_appointment_by_phone, delete_appointment]
|
| 253 |
self.tool_node = ToolNode(self.tools)
|
| 254 |
self.llm_with_tools = self.llm.bind_tools(self.tools)
|
| 255 |
+
|
| 256 |
async def async_setup(self):
|
| 257 |
db_path = os.path.join(os.path.dirname(__file__), "daa.db")
|
| 258 |
self.conn = await aiosqlite.connect(db_path)
|
|
|
|
| 260 |
await self._create_user_table()
|
| 261 |
self.graph = self._build_graph()
|
| 262 |
self.summary_graph = self._build_summary_graph()
|
| 263 |
+
|
| 264 |
async def _create_user_table(self):
|
| 265 |
await self.conn.execute("""
|
| 266 |
CREATE TABLE IF NOT EXISTS userid_threadid (
|
|
|
|
| 269 |
)
|
| 270 |
""")
|
| 271 |
await self.conn.commit()
|
| 272 |
+
|
| 273 |
######################### SUMMARIZE NODE #########################
|
| 274 |
async def summarize_conversation(self, state: ChatState):
|
| 275 |
existing_summary = state.get("summary", "")
|
|
|
|
| 277 |
prompt = (
|
| 278 |
f"""
|
| 279 |
You are maintaining a long-term conversation memory for a chatbot.
|
| 280 |
+
|
| 281 |
Existing summary:
|
| 282 |
{existing_summary}
|
| 283 |
+
|
| 284 |
Update and extend the summary using ONLY the new conversation messages above.
|
| 285 |
+
|
| 286 |
Instructions:
|
| 287 |
- Preserve important existing context.
|
| 288 |
- Add new facts, decisions, preferences, goals, issues, and ongoing tasks.
|
|
|
|
| 299 |
else
|
| 300 |
"""
|
| 301 |
You are creating a long-term conversation memory summary for a chatbot.
|
| 302 |
+
|
| 303 |
Summarize the conversation above.
|
| 304 |
+
|
| 305 |
Instructions:
|
| 306 |
- Capture important user information, goals, preferences, projects, and decisions.
|
| 307 |
- Include technical issues, debugging progress, and solutions discussed.
|
|
|
|
| 317 |
"summary": response.content,
|
| 318 |
"messages": [RemoveMessage(id=m.id) for m in messages[:-2]],
|
| 319 |
}
|
| 320 |
+
|
| 321 |
async def should_summarize(self, state: ChatState):
|
| 322 |
if len(state["messages"]) > 10:
|
| 323 |
return "summarize_node"
|
| 324 |
return "chat_node"
|
| 325 |
+
|
| 326 |
######################### CHAT NODE #########################
|
| 327 |
async def chat_node(self, state: ChatState):
|
| 328 |
summary = state.get("summary", "")
|
| 329 |
messages = state["messages"]
|
| 330 |
+
|
| 331 |
print('#'*50)
|
| 332 |
print(">>>>>>>>>> CHAT NODE START <<<<<<<<<<")
|
| 333 |
if summary:
|
| 334 |
print(f"[SUMMARY]:\n{summary}\n")
|
| 335 |
else:
|
| 336 |
print("[NO SUMMARY YET]\n")
|
| 337 |
+
|
| 338 |
print('$'*50)
|
| 339 |
print("[MESSAGES]:")
|
| 340 |
for m in messages:
|
| 341 |
role = m.__class__.__name__
|
| 342 |
print(f" [{role}]: {m.content[:200]}")
|
| 343 |
print('$'*50,'\n')
|
| 344 |
+
|
| 345 |
if summary:
|
| 346 |
summary_message = SystemMessage(
|
| 347 |
content=(
|
| 348 |
+
"You are a Bangla voice assistant. You are provided with a condensed memory of previous conversations.\n\n"
|
| 349 |
f"Conversation Memory:\n{summary}\n\n"
|
| 350 |
"Instructions:\n"
|
| 351 |
+
"- Always respond in Bangla (বাংলা)"
|
| 352 |
+
"- Keep sentences short for speech"
|
| 353 |
+
"- No English unless necessary"
|
| 354 |
"- Use this memory as long-term conversational context.\n"
|
| 355 |
"- Maintain continuity with the user's previous discussions, projects, goals, and preferences.\n"
|
| 356 |
"- Prioritize recent and relevant information when generating responses.\n"
|
|
|
|
| 366 |
print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
|
| 367 |
print('#'*50)
|
| 368 |
return {"messages": [response]}
|
| 369 |
+
|
| 370 |
######################### GRAPH #########################
|
| 371 |
def _build_graph(self):
|
| 372 |
g = StateGraph(ChatState)
|
| 373 |
g.add_node("chat_node", self.chat_node)
|
| 374 |
g.add_node("tools", self.tool_node)
|
| 375 |
+
|
| 376 |
g.add_edge(START, "chat_node")
|
| 377 |
g.add_conditional_edges("chat_node", tools_condition)
|
| 378 |
g.add_edge("tools", "chat_node")
|
| 379 |
+
|
| 380 |
return g.compile(checkpointer=self.checkpointer)
|
| 381 |
+
|
| 382 |
def _build_summary_graph(self):
|
| 383 |
g = StateGraph(ChatState)
|
| 384 |
g.add_node("summarize_node", self.summarize_conversation)
|
| 385 |
g.add_edge(START, "summarize_node")
|
| 386 |
g.add_edge("summarize_node", END)
|
| 387 |
return g.compile(checkpointer=self.checkpointer)
|
| 388 |
+
|
| 389 |
######################### STREAMING #########################
|
| 390 |
async def ai_only_stream(self, initial_state: dict, config: dict):
|
| 391 |
async for message_chunk, metadata in self.graph.astream(initial_state, config=config, stream_mode="messages"):
|
| 392 |
if isinstance(message_chunk, AIMessage) and message_chunk.content:
|
| 393 |
yield message_chunk.content
|
| 394 |
+
|
| 395 |
# Auto Summarization Execute
|
| 396 |
current_state = await self.graph.aget_state(config)
|
| 397 |
if len(current_state.values.get("messages", [])) > 10:
|
|
|
|
| 399 |
self.summary_graph.ainvoke(current_state.values, config=config)
|
| 400 |
)
|
| 401 |
print('@'*20,'Summarization Execute','@'*20)
|
| 402 |
+
|
| 403 |
######################### THREAD ID #########################
|
| 404 |
@staticmethod
|
| 405 |
def generate_thread_id() -> str:
|
| 406 |
return str(uuid.uuid4())
|
| 407 |
+
|
| 408 |
######################### RETRIEVE ALL THREADS #########################
|
| 409 |
async def retrieve_all_threads(self):
|
| 410 |
all_threads = set()
|
| 411 |
async for checkpoint in self.checkpointer.alist(None):
|
| 412 |
all_threads.add(checkpoint.config["configurable"]["thread_id"])
|
| 413 |
return list(all_threads)
|
| 414 |
+
|
| 415 |
######################### MAIN ENTRY POINT #########################
|
| 416 |
async def main(self, user_id: str, user_query: str):
|
| 417 |
async with self.conn.execute(
|
| 418 |
"SELECT userId, threadId FROM userid_threadid WHERE userId = ?", (user_id,)
|
| 419 |
) as cursor:
|
| 420 |
result = await cursor.fetchone()
|
| 421 |
+
|
| 422 |
if result is None:
|
| 423 |
thread_id = user_id + self.generate_thread_id()
|
| 424 |
await self.conn.execute(
|
|
|
|
| 428 |
await self.conn.commit()
|
| 429 |
else:
|
| 430 |
thread_id = result[1]
|
| 431 |
+
|
| 432 |
initial_state = {"messages": [HumanMessage(content=user_query)]}
|
| 433 |
config = {
|
| 434 |
"configurable": {"thread_id": thread_id},
|
| 435 |
"metadata": {"thread_id": thread_id},
|
| 436 |
"run_name": "chat_turn",
|
| 437 |
}
|
| 438 |
+
return self.ai_only_stream(initial_state, config)
|
frontend/index.html
CHANGED
|
@@ -44,4 +44,4 @@
|
|
| 44 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 45 |
<script src="script.js"></script>
|
| 46 |
</body>
|
| 47 |
-
</html>
|
|
|
|
| 44 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 45 |
<script src="script.js"></script>
|
| 46 |
</body>
|
| 47 |
+
</html>
|
frontend/script.js
CHANGED
|
@@ -1,166 +1,294 @@
|
|
| 1 |
-
const chatBox = document.getElementById(
|
| 2 |
-
const sendBtn = document.getElementById(
|
| 3 |
-
const textInput = document.getElementById(
|
| 4 |
-
const micBtn = document.getElementById(
|
| 5 |
-
|
| 6 |
-
const userId =
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
});
|
| 36 |
|
| 37 |
-
|
| 38 |
function sendTextMessage() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
//
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
|
|
|
| 71 |
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
"ai"
|
| 82 |
-
);
|
| 83 |
-
}
|
| 84 |
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
| 86 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
| 96 |
-
};
|
| 97 |
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
if (
|
| 102 |
-
|
|
|
|
| 103 |
} else {
|
| 104 |
-
|
|
|
|
|
|
|
| 105 |
}
|
| 106 |
-
};
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
async function startRecording() {
|
| 110 |
-
|
| 111 |
-
const stream = await navigator.mediaDevices.getUserMedia({
|
| 112 |
-
audio: true
|
| 113 |
-
});
|
| 114 |
-
|
| 115 |
-
mediaRecorder = new MediaRecorder(stream, {
|
| 116 |
-
mimeType: "audio/webm"
|
| 117 |
-
});
|
| 118 |
-
|
| 119 |
-
mediaRecorder.start(250);
|
| 120 |
-
|
| 121 |
-
mediaRecorder.ondataavailable = async (event) => {
|
| 122 |
-
|
| 123 |
-
if (event.data.size > 0 &&
|
| 124 |
-
voiceSocket.readyState === WebSocket.OPEN) {
|
| 125 |
|
| 126 |
-
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
};
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
micBtn.innerText = "⏹ Stop Voice";
|
| 135 |
-
micBtn.classList.add("recording");
|
| 136 |
}
|
| 137 |
|
| 138 |
-
|
| 139 |
-
function
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
| 147 |
}
|
| 148 |
|
| 149 |
-
|
| 150 |
-
// =======================
|
| 151 |
-
// UI
|
| 152 |
-
// =======================
|
| 153 |
-
|
| 154 |
function appendMessage(text, sender) {
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
chatBox.appendChild(div);
|
| 164 |
-
|
| 165 |
-
chatBox.scrollTop = chatBox.scrollHeight;
|
| 166 |
-
}
|
|
|
|
| 1 |
+
const chatBox = document.getElementById('chat-box');
|
| 2 |
+
const sendBtn = document.getElementById('send-btn');
|
| 3 |
+
const textInput = document.getElementById('text-input');
|
| 4 |
+
const micBtn = document.getElementById('mic-btn');
|
| 5 |
+
|
| 6 |
+
const userId = 'walid';
|
| 7 |
+
|
| 8 |
+
// ── WebSockets ────────────────────────────────────────────────────────────────
|
| 9 |
+
const chatSocket = new WebSocket('ws://127.0.0.1:8679/ws/chat');
|
| 10 |
+
const voiceSocket = new WebSocket('ws://127.0.0.1:8679/ws/voice');
|
| 11 |
+
voiceSocket.binaryType = 'arraybuffer';
|
| 12 |
+
|
| 13 |
+
// ── State ─────────────────────────────────────────────────────────────────────
|
| 14 |
+
let micStream = null;
|
| 15 |
+
let audioContext = null;
|
| 16 |
+
let analyser = null;
|
| 17 |
+
let mediaRecorder = null;
|
| 18 |
+
let audioChunks = [];
|
| 19 |
+
let isListening = false;
|
| 20 |
+
let isSpeaking = false;
|
| 21 |
+
let silenceTimer = null;
|
| 22 |
+
let vadInterval = null;
|
| 23 |
+
let isProcessing = false; // true while server is processing an utterance
|
| 24 |
+
|
| 25 |
+
let currentAIMessage = null;
|
| 26 |
+
let playbackChain = Promise.resolve();
|
| 27 |
+
|
| 28 |
+
// ── VAD config ────────────────────────────────────────────────────────────────
|
| 29 |
+
const SILENCE_THRESHOLD_DB = -45; // dBFS; lower = more sensitive
|
| 30 |
+
const SILENCE_TIMEOUT_MS = 3000; // 3 s silence → send utterance
|
| 31 |
+
const VAD_POLL_MS = 100;
|
| 32 |
+
|
| 33 |
+
// ── Text chat ─────────────────────────────────────────────────────────────────
|
| 34 |
+
sendBtn.onclick = sendTextMessage;
|
| 35 |
+
textInput.addEventListener('keydown', (e) => {
|
| 36 |
+
if (e.key === 'Enter') sendTextMessage();
|
| 37 |
});
|
| 38 |
|
|
|
|
| 39 |
function sendTextMessage() {
|
| 40 |
+
const msg = textInput.value.trim();
|
| 41 |
+
if (!msg) return;
|
| 42 |
+
appendMessage(msg, 'user');
|
| 43 |
+
chatSocket.send(JSON.stringify({ user_id: userId, user_query: msg }));
|
| 44 |
+
textInput.value = '';
|
| 45 |
+
}
|
| 46 |
|
| 47 |
+
// Chat WS now sends JSON: {"type":"chat","text":"..."} or {"type":"end"}
|
| 48 |
+
chatSocket.onmessage = (e) => {
|
| 49 |
+
let msg;
|
| 50 |
+
try {
|
| 51 |
+
msg = JSON.parse(e.data);
|
| 52 |
+
} catch {
|
| 53 |
+
return;
|
| 54 |
+
}
|
| 55 |
+
if (msg.type === 'chat' && msg.text) appendMessage(msg.text, 'ai');
|
| 56 |
+
if (msg.type === 'error') appendMessage('⚠️ ' + msg.text, 'system');
|
| 57 |
+
// 'end' — nothing to do for text chat
|
| 58 |
+
};
|
| 59 |
+
chatSocket.onerror = (e) => console.error('Chat WS error:', e);
|
| 60 |
+
chatSocket.onclose = () => console.log('Chat WS closed');
|
| 61 |
+
|
| 62 |
+
// ── Voice WebSocket events ────────────────────────────────────────────────────
|
| 63 |
+
voiceSocket.onopen = () => console.log('[WS] Voice connected');
|
| 64 |
+
voiceSocket.onclose = () => {
|
| 65 |
+
console.log('[WS] Voice closed');
|
| 66 |
+
stopListening();
|
| 67 |
+
};
|
| 68 |
+
voiceSocket.onerror = (e) => console.error('[WS] Voice error:', e);
|
| 69 |
+
|
| 70 |
+
voiceSocket.onmessage = (event) => {
|
| 71 |
+
// Binary → audio playback
|
| 72 |
+
if (event.data instanceof ArrayBuffer) {
|
| 73 |
+
enqueueAudio(event.data);
|
| 74 |
+
return;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
let msg;
|
| 78 |
+
try {
|
| 79 |
+
msg = JSON.parse(event.data);
|
| 80 |
+
} catch {
|
| 81 |
+
return;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
switch (msg.type) {
|
| 85 |
+
case 'stt':
|
| 86 |
+
// Show transcribed Bangla text as user bubble
|
| 87 |
+
appendMessage('🎤 ' + msg.text, 'user');
|
| 88 |
+
currentAIMessage = null;
|
| 89 |
+
break;
|
| 90 |
+
|
| 91 |
+
case 'llm_token':
|
| 92 |
+
// Stream AI tokens into growing bubble
|
| 93 |
+
if (!currentAIMessage) currentAIMessage = appendMessage('', 'ai');
|
| 94 |
+
currentAIMessage.textContent += msg.token;
|
| 95 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 96 |
+
break;
|
| 97 |
+
|
| 98 |
+
case 'end':
|
| 99 |
+
// Server finished this turn → resume VAD listening
|
| 100 |
+
currentAIMessage = null;
|
| 101 |
+
isProcessing = false;
|
| 102 |
+
if (isListening) setMicStatus('listening');
|
| 103 |
+
break;
|
| 104 |
+
|
| 105 |
+
case 'error':
|
| 106 |
+
appendMessage('⚠️ ' + msg.text, 'system');
|
| 107 |
+
// Still need to reset so VAD resumes
|
| 108 |
+
isProcessing = false;
|
| 109 |
+
if (isListening) setMicStatus('listening');
|
| 110 |
+
break;
|
| 111 |
+
|
| 112 |
+
case 'pong':
|
| 113 |
+
break;
|
| 114 |
+
|
| 115 |
+
default:
|
| 116 |
+
console.log('[WS] Unknown msg:', msg.type);
|
| 117 |
+
}
|
| 118 |
+
};
|
| 119 |
|
| 120 |
+
// ── Audio playback: sequential, no overlap ────────────────────────────────────
|
| 121 |
+
function enqueueAudio(buffer) {
|
| 122 |
+
playbackChain = playbackChain.then(() => playBuffer(buffer));
|
| 123 |
+
}
|
| 124 |
|
| 125 |
+
function playBuffer(buffer) {
|
| 126 |
+
return new Promise((resolve) => {
|
| 127 |
+
const blob = new Blob([buffer], { type: 'audio/mpeg' });
|
| 128 |
+
const url = URL.createObjectURL(blob);
|
| 129 |
+
const audio = new Audio(url);
|
| 130 |
+
const done = () => {
|
| 131 |
+
URL.revokeObjectURL(url);
|
| 132 |
+
resolve();
|
| 133 |
+
};
|
| 134 |
+
audio.onended = done;
|
| 135 |
+
audio.onerror = () => {
|
| 136 |
+
console.warn('[AUDIO] playback error');
|
| 137 |
+
done();
|
| 138 |
+
};
|
| 139 |
+
audio.play().catch(() => done());
|
| 140 |
+
});
|
| 141 |
}
|
| 142 |
|
| 143 |
+
// ── Mic button ────────────────────────────────────────────────────────────────
|
| 144 |
+
micBtn.onclick = async () => {
|
| 145 |
+
if (!isListening) await startListening();
|
| 146 |
+
else stopListening();
|
| 147 |
+
};
|
| 148 |
|
| 149 |
+
// ── Start continuous listening with VAD ───────────────────────────────────────
|
| 150 |
+
async function startListening() {
|
| 151 |
+
try {
|
| 152 |
+
micStream = await navigator.mediaDevices.getUserMedia({
|
| 153 |
+
audio: {
|
| 154 |
+
echoCancellation: true,
|
| 155 |
+
noiseSuppression: true,
|
| 156 |
+
autoGainControl: true,
|
| 157 |
+
channelCount: 1,
|
| 158 |
+
sampleRate: 16000,
|
| 159 |
+
},
|
| 160 |
+
});
|
| 161 |
+
} catch (e) {
|
| 162 |
+
console.error('Mic error:', e);
|
| 163 |
+
appendMessage('⚠️ Microphone access denied.', 'system');
|
| 164 |
+
return;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
audioContext = new AudioContext();
|
| 168 |
+
const source = audioContext.createMediaStreamSource(micStream);
|
| 169 |
+
analyser = audioContext.createAnalyser();
|
| 170 |
+
analyser.fftSize = 512;
|
| 171 |
+
source.connect(analyser);
|
| 172 |
+
|
| 173 |
+
isListening = true;
|
| 174 |
+
setMicStatus('listening');
|
| 175 |
+
|
| 176 |
+
vadInterval = setInterval(vadTick, VAD_POLL_MS);
|
| 177 |
+
}
|
| 178 |
|
| 179 |
+
// ── Stop everything ───────────────────────────────────────────────────────────
|
| 180 |
+
function stopListening() {
|
| 181 |
+
clearInterval(vadInterval);
|
| 182 |
+
clearTimeout(silenceTimer);
|
| 183 |
+
vadInterval = silenceTimer = null;
|
| 184 |
|
| 185 |
+
if (isSpeaking) stopRecorder(true); // discard in-progress utterance
|
| 186 |
|
| 187 |
+
micStream?.getTracks().forEach((t) => t.stop());
|
| 188 |
+
audioContext?.close();
|
| 189 |
+
micStream = audioContext = analyser = null;
|
| 190 |
|
| 191 |
+
isSpeaking = isListening = isProcessing = false;
|
| 192 |
+
setMicStatus('off');
|
| 193 |
+
}
|
| 194 |
|
| 195 |
+
// ── VAD polling ───────────────────────────────────────────────────────────────
|
| 196 |
+
function vadTick() {
|
| 197 |
+
if (!analyser || isProcessing) return;
|
| 198 |
|
| 199 |
+
const data = new Float32Array(analyser.frequencyBinCount);
|
| 200 |
+
analyser.getFloatTimeDomainData(data);
|
| 201 |
|
| 202 |
+
// RMS → dBFS
|
| 203 |
+
const rms = Math.sqrt(data.reduce((s, v) => s + v * v, 0) / data.length);
|
| 204 |
+
const db = rms > 0 ? 20 * Math.log10(rms) : -Infinity;
|
| 205 |
+
const speaking = db > SILENCE_THRESHOLD_DB;
|
| 206 |
|
| 207 |
+
if (speaking) {
|
| 208 |
+
clearTimeout(silenceTimer);
|
| 209 |
+
silenceTimer = null;
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
if (!isSpeaking) {
|
| 212 |
+
isSpeaking = true;
|
| 213 |
+
startRecorder();
|
| 214 |
+
setMicStatus('recording');
|
| 215 |
}
|
| 216 |
+
} else {
|
| 217 |
+
if (isSpeaking && !silenceTimer) {
|
| 218 |
+
silenceTimer = setTimeout(() => {
|
| 219 |
+
silenceTimer = null;
|
| 220 |
+
isSpeaking = false;
|
| 221 |
+
isProcessing = true;
|
| 222 |
+
stopRecorder(false); // send the utterance
|
| 223 |
+
setMicStatus('processing');
|
| 224 |
+
}, SILENCE_TIMEOUT_MS);
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
|
| 229 |
+
// ── Recorder ──────────────────────────────────────────────────────────────────
|
| 230 |
+
function startRecorder() {
|
| 231 |
+
if (!micStream) return;
|
| 232 |
+
audioChunks = [];
|
| 233 |
|
| 234 |
+
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
| 235 |
+
? 'audio/webm;codecs=opus'
|
| 236 |
+
: 'audio/webm';
|
| 237 |
|
| 238 |
+
mediaRecorder = new MediaRecorder(micStream, { mimeType });
|
|
|
|
| 239 |
|
| 240 |
+
mediaRecorder.ondataavailable = (e) => {
|
| 241 |
+
if (e.data.size > 0) audioChunks.push(e.data);
|
| 242 |
+
};
|
| 243 |
|
| 244 |
+
mediaRecorder.onstop = async () => {
|
| 245 |
+
if (!audioChunks.length) return;
|
| 246 |
+
const blob = new Blob(audioChunks, { type: mimeType });
|
| 247 |
+
const buffer = await blob.arrayBuffer();
|
| 248 |
+
audioChunks = [];
|
| 249 |
|
| 250 |
+
if (voiceSocket.readyState === WebSocket.OPEN) {
|
| 251 |
+
console.log(`[VAD] Sending utterance: ${buffer.byteLength} bytes`);
|
| 252 |
+
voiceSocket.send(buffer);
|
| 253 |
} else {
|
| 254 |
+
console.warn('[VAD] WS not open, utterance discarded');
|
| 255 |
+
isProcessing = false;
|
| 256 |
+
if (isListening) setMicStatus('listening');
|
| 257 |
}
|
| 258 |
+
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
+
mediaRecorder.start();
|
| 261 |
+
}
|
| 262 |
|
| 263 |
+
function stopRecorder(discard = false) {
|
| 264 |
+
if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
|
| 265 |
+
if (discard) {
|
| 266 |
+
mediaRecorder.ondataavailable = () => {};
|
| 267 |
+
mediaRecorder.onstop = () => {
|
| 268 |
+
audioChunks = [];
|
| 269 |
};
|
| 270 |
+
}
|
| 271 |
+
mediaRecorder.stop();
|
| 272 |
+
mediaRecorder = null;
|
|
|
|
|
|
|
| 273 |
}
|
| 274 |
|
| 275 |
+
// ── UI ────────────────────────────────────────────────────────────────────────
|
| 276 |
+
function setMicStatus(state) {
|
| 277 |
+
const labels = {
|
| 278 |
+
off: '🎤 Start Voice',
|
| 279 |
+
listening: '🟢 Listening… (click to stop)',
|
| 280 |
+
recording: '🔴 Speaking…',
|
| 281 |
+
processing: '⏳ Processing…',
|
| 282 |
+
};
|
| 283 |
+
micBtn.innerText = labels[state] ?? '🎤 Start Voice';
|
| 284 |
+
micBtn.className = state === 'off' ? '' : `mic-${state}`;
|
| 285 |
}
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
function appendMessage(text, sender) {
|
| 288 |
+
const div = document.createElement('div');
|
| 289 |
+
div.className = `message ${sender}`;
|
| 290 |
+
div.textContent = text;
|
| 291 |
+
chatBox.appendChild(div);
|
| 292 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 293 |
+
return div;
|
| 294 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/style.css
CHANGED
|
@@ -149,4 +149,4 @@ button:hover {
|
|
| 149 |
flex-direction: column;
|
| 150 |
padding: 10px;
|
| 151 |
gap: 6px;
|
| 152 |
-
}
|
|
|
|
| 149 |
flex-direction: column;
|
| 150 |
padding: 10px;
|
| 151 |
gap: 6px;
|
| 152 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,35 +1,60 @@
|
|
|
|
|
| 1 |
python-dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
fastapi
|
| 3 |
uvicorn
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
langchain
|
| 6 |
-
langchain-chroma
|
| 7 |
-
langchain-classic
|
| 8 |
-
langchain-community
|
| 9 |
langchain-core
|
|
|
|
| 10 |
langchain-experimental
|
| 11 |
-
langchain-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
langchain-huggingface
|
|
|
|
| 13 |
langchain-mcp-adapters
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
langchain-protocol
|
| 17 |
-
langchain-text-splitters
|
| 18 |
langgraph
|
|
|
|
|
|
|
| 19 |
langgraph-checkpoint
|
| 20 |
langgraph-checkpoint-sqlite
|
| 21 |
-
|
| 22 |
-
|
| 23 |
langsmith
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
faster-whisper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
mcp
|
| 28 |
-
numpy
|
| 29 |
-
ollama
|
| 30 |
-
pydantic
|
| 31 |
-
twilio
|
| 32 |
-
uuid_utils
|
| 33 |
-
uv
|
| 34 |
-
uvicorn
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===== Core =====
|
| 2 |
python-dotenv
|
| 3 |
+
requests
|
| 4 |
+
numpy
|
| 5 |
+
pydantic
|
| 6 |
+
colorama
|
| 7 |
+
uuid_utils
|
| 8 |
+
|
| 9 |
+
# ===== Web Framework =====
|
| 10 |
fastapi
|
| 11 |
uvicorn
|
| 12 |
+
websockets
|
| 13 |
+
|
| 14 |
+
# ===== Async / DB =====
|
| 15 |
+
aiosqlite
|
| 16 |
+
|
| 17 |
+
# ===== LangChain Ecosystem =====
|
| 18 |
langchain
|
|
|
|
|
|
|
|
|
|
| 19 |
langchain-core
|
| 20 |
+
langchain-community
|
| 21 |
langchain-experimental
|
| 22 |
+
langchain-text-splitters
|
| 23 |
+
langchain-chroma
|
| 24 |
+
langchain-classic
|
| 25 |
+
langchain-protocol
|
| 26 |
+
|
| 27 |
+
langchain-openai
|
| 28 |
+
langchain-ollama
|
| 29 |
langchain-huggingface
|
| 30 |
+
langchain-google-genai
|
| 31 |
langchain-mcp-adapters
|
| 32 |
+
|
| 33 |
+
# ===== LangGraph Ecosystem =====
|
|
|
|
|
|
|
| 34 |
langgraph
|
| 35 |
+
langgraph-sdk
|
| 36 |
+
langgraph-prebuilt
|
| 37 |
langgraph-checkpoint
|
| 38 |
langgraph-checkpoint-sqlite
|
| 39 |
+
|
| 40 |
+
# ===== Observability / Tracking =====
|
| 41 |
langsmith
|
| 42 |
+
|
| 43 |
+
# ===== AI / LLM / STT / TTS =====
|
| 44 |
+
ollama
|
| 45 |
faster-whisper
|
| 46 |
+
edge-tts
|
| 47 |
+
google-generativeai
|
| 48 |
+
|
| 49 |
+
# ===== Audio / Media =====
|
| 50 |
+
pydub
|
| 51 |
+
Pillow
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ===== MCP =====
|
| 56 |
mcp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# ===== Utility =====
|
| 59 |
+
uv
|
| 60 |
+
pytz
|
services/streaming.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import asyncio
|
| 2 |
+
# import edge_tts
|
| 3 |
+
|
| 4 |
+
# # ── Voice ─────────────────────────────────────────────────────────────────────
|
| 5 |
+
# VOICE = "bn-BD-NabanitaNeural"
|
| 6 |
+
|
| 7 |
+
# # Flush when buffer reaches this many characters (even without punctuation)
|
| 8 |
+
# FLUSH_LEN = 50
|
| 9 |
+
|
| 10 |
+
# # Don't send a TTS request for fewer than this many characters
|
| 11 |
+
# MIN_CHARS = 3
|
| 12 |
+
|
| 13 |
+
# # Punctuation marks that trigger an immediate flush
|
| 14 |
+
# FLUSH_TRIGGERS = frozenset(".!?।,;:\n—–")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# class ParallelTTSStreamer:
|
| 18 |
+
# """
|
| 19 |
+
# Collects LLM tokens, splits them into prosodic chunks, and converts each
|
| 20 |
+
# chunk to audio via edge-tts.
|
| 21 |
+
|
| 22 |
+
# FIX: Audio chunks are now guaranteed to arrive IN ORDER by chaining each
|
| 23 |
+
# TTS task so it only writes to the queue after the previous task finishes.
|
| 24 |
+
# This prevents audio chunks from chunk-2 overtaking chunk-1 during playback.
|
| 25 |
+
# """
|
| 26 |
+
|
| 27 |
+
# def __init__(self, voice: str = VOICE):
|
| 28 |
+
# self.voice = voice
|
| 29 |
+
# self.buffer = ""
|
| 30 |
+
# self.queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
| 31 |
+
# # Tracks the last scheduled task so each new task waits for it first
|
| 32 |
+
# self._prev_task: asyncio.Task | None = None
|
| 33 |
+
# self._flush_lock = asyncio.Lock()
|
| 34 |
+
|
| 35 |
+
# async def add_token(self, token: str) -> None:
|
| 36 |
+
# """Feed a single LLM output token into the streamer."""
|
| 37 |
+
# if not token:
|
| 38 |
+
# return
|
| 39 |
+
|
| 40 |
+
# self.buffer += token
|
| 41 |
+
|
| 42 |
+
# should_flush = (
|
| 43 |
+
# any(ch in FLUSH_TRIGGERS for ch in token)
|
| 44 |
+
# or len(self.buffer) >= FLUSH_LEN
|
| 45 |
+
# )
|
| 46 |
+
|
| 47 |
+
# if should_flush:
|
| 48 |
+
# await self._schedule_flush()
|
| 49 |
+
|
| 50 |
+
# async def _schedule_flush(self) -> None:
|
| 51 |
+
# """Snapshot the buffer and schedule an ordered TTS task."""
|
| 52 |
+
# async with self._flush_lock:
|
| 53 |
+
# text = self.buffer.strip()
|
| 54 |
+
# self.buffer = ""
|
| 55 |
+
|
| 56 |
+
# if len(text) < MIN_CHARS:
|
| 57 |
+
# return
|
| 58 |
+
|
| 59 |
+
# # Each task waits for the previous one before pushing to queue,
|
| 60 |
+
# # guaranteeing in-order audio delivery.
|
| 61 |
+
# prev = self._prev_task
|
| 62 |
+
# task = asyncio.create_task(self._tts_ordered(text, prev))
|
| 63 |
+
# self._prev_task = task
|
| 64 |
+
|
| 65 |
+
# async def _tts_ordered(self, text: str, wait_for: asyncio.Task | None) -> None:
|
| 66 |
+
# """
|
| 67 |
+
# 1. First synthesise audio bytes (can run in parallel with other chunks).
|
| 68 |
+
# 2. Then wait for the previous chunk to finish writing to queue.
|
| 69 |
+
# 3. Then push our bytes to the queue in order.
|
| 70 |
+
# """
|
| 71 |
+
# # Step 1: synthesise concurrently (no queue writes yet)
|
| 72 |
+
# audio_chunks: list[bytes] = []
|
| 73 |
+
# try:
|
| 74 |
+
# communicate = edge_tts.Communicate(text, self.voice)
|
| 75 |
+
# async for chunk in communicate.stream():
|
| 76 |
+
# if chunk["type"] == "audio":
|
| 77 |
+
# audio_chunks.append(chunk["data"])
|
| 78 |
+
# except Exception as e:
|
| 79 |
+
# print(f"[TTS] edge-tts error for '{text[:30]}…': {e}")
|
| 80 |
+
# # Still need to chain — wait for prev even on error
|
| 81 |
+
# if wait_for and not wait_for.done():
|
| 82 |
+
# await wait_for
|
| 83 |
+
# return
|
| 84 |
+
|
| 85 |
+
# # Step 2: wait for the previous chunk to have finished queuing
|
| 86 |
+
# if wait_for and not wait_for.done():
|
| 87 |
+
# try:
|
| 88 |
+
# await wait_for
|
| 89 |
+
# except Exception:
|
| 90 |
+
# pass
|
| 91 |
+
|
| 92 |
+
# # Step 3: push our audio bytes in order
|
| 93 |
+
# for data in audio_chunks:
|
| 94 |
+
# await self.queue.put(data)
|
| 95 |
+
|
| 96 |
+
# async def flush(self) -> None:
|
| 97 |
+
# """
|
| 98 |
+
# Flush remaining buffer, wait for all in-flight TTS tasks, then
|
| 99 |
+
# signal end-of-stream with sentinel None.
|
| 100 |
+
# """
|
| 101 |
+
# await self._schedule_flush()
|
| 102 |
+
|
| 103 |
+
# # Wait for the last chained task (which transitively waits for all)
|
| 104 |
+
# if self._prev_task:
|
| 105 |
+
# try:
|
| 106 |
+
# await self._prev_task
|
| 107 |
+
# except Exception:
|
| 108 |
+
# pass
|
| 109 |
+
|
| 110 |
+
# await self.queue.put(None)
|
| 111 |
+
|
| 112 |
+
# async def stream_audio(self):
|
| 113 |
+
# """
|
| 114 |
+
# Async generator that yields audio bytes in order.
|
| 115 |
+
# Stops when the sentinel None is received.
|
| 116 |
+
# """
|
| 117 |
+
# while True:
|
| 118 |
+
# chunk = await self.queue.get()
|
| 119 |
+
# if chunk is None:
|
| 120 |
+
# break
|
| 121 |
+
# yield chunk
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
import re
|
| 155 |
+
import asyncio
|
| 156 |
+
import edge_tts
|
| 157 |
+
|
| 158 |
+
VOICE = "bn-BD-NabanitaNeural"
|
| 159 |
+
FLUSH_LEN = 80 # chars before forced flush (longer = more natural speech)
|
| 160 |
+
MIN_CHARS = 5 # don't TTS tiny fragments
|
| 161 |
+
FLUSH_TRIGGERS = frozenset(".!?।,;:\n—–")
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _clean_for_tts(text: str) -> str:
|
| 165 |
+
"""
|
| 166 |
+
Strip markdown and other non-speech symbols before sending to edge-tts.
|
| 167 |
+
|
| 168 |
+
The LLM outputs markdown (**, *, #, -, numbers+dot lists) which edge-tts
|
| 169 |
+
either reads aloud awkwardly ("asterisk asterisk") or returns 'No audio
|
| 170 |
+
was received' on punctuation-only chunks like '**' or '-)'.
|
| 171 |
+
"""
|
| 172 |
+
# Remove bold/italic markers
|
| 173 |
+
text = re.sub(r'\*{1,3}', '', text)
|
| 174 |
+
# Remove heading markers
|
| 175 |
+
text = re.sub(r'#+\s*', '', text)
|
| 176 |
+
# Remove list markers like "১.", "1.", "-", "•"
|
| 177 |
+
text = re.sub(r'^\s*[-•]\s*', '', text, flags=re.MULTILINE)
|
| 178 |
+
text = re.sub(r'^\s*[\d০-৯]+[.)]\s*', '', text, flags=re.MULTILINE)
|
| 179 |
+
# Remove leftover backticks
|
| 180 |
+
text = re.sub(r'`+', '', text)
|
| 181 |
+
# Collapse extra whitespace / blank lines
|
| 182 |
+
text = re.sub(r'\n{2,}', '\n', text)
|
| 183 |
+
text = text.strip()
|
| 184 |
+
return text
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
class ParallelTTSStreamer:
|
| 188 |
+
"""
|
| 189 |
+
Collects LLM tokens → splits into prosodic chunks → converts to audio
|
| 190 |
+
via edge-tts in parallel → streams audio bytes IN ORDER.
|
| 191 |
+
|
| 192 |
+
Audio ordering is guaranteed by a task chain: each chunk task synthesises
|
| 193 |
+
audio freely (parallel) but only writes to the queue after the previous
|
| 194 |
+
chunk finishes, so the client always hears chunk-1 before chunk-2.
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
def __init__(self, voice: str = VOICE):
|
| 198 |
+
self.voice = voice
|
| 199 |
+
self.buffer = ""
|
| 200 |
+
self.queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
| 201 |
+
self._prev_task: asyncio.Task | None = None
|
| 202 |
+
self._flush_lock = asyncio.Lock()
|
| 203 |
+
|
| 204 |
+
async def add_token(self, token: str) -> None:
|
| 205 |
+
if not token:
|
| 206 |
+
return
|
| 207 |
+
self.buffer += token
|
| 208 |
+
if any(ch in FLUSH_TRIGGERS for ch in token) or len(self.buffer) >= FLUSH_LEN:
|
| 209 |
+
await self._schedule_flush()
|
| 210 |
+
|
| 211 |
+
async def _schedule_flush(self) -> None:
|
| 212 |
+
async with self._flush_lock:
|
| 213 |
+
raw = self.buffer.strip()
|
| 214 |
+
self.buffer = ""
|
| 215 |
+
|
| 216 |
+
text = _clean_for_tts(raw)
|
| 217 |
+
if len(text) < MIN_CHARS:
|
| 218 |
+
return
|
| 219 |
+
|
| 220 |
+
prev = self._prev_task
|
| 221 |
+
task = asyncio.create_task(self._tts_ordered(text, prev))
|
| 222 |
+
self._prev_task = task
|
| 223 |
+
|
| 224 |
+
async def _tts_ordered(self, text: str, wait_for: asyncio.Task | None) -> None:
|
| 225 |
+
"""Synthesise audio (parallel), then write to queue in order."""
|
| 226 |
+
# Step 1 — synthesise concurrently
|
| 227 |
+
audio_chunks: list[bytes] = []
|
| 228 |
+
try:
|
| 229 |
+
communicate = edge_tts.Communicate(text, self.voice)
|
| 230 |
+
async for chunk in communicate.stream():
|
| 231 |
+
if chunk["type"] == "audio":
|
| 232 |
+
audio_chunks.append(chunk["data"])
|
| 233 |
+
except Exception as e:
|
| 234 |
+
print(f"[TTS] edge-tts error for '{text[:40]}': {e}")
|
| 235 |
+
|
| 236 |
+
# Step 2 — wait for previous chunk to finish queuing
|
| 237 |
+
if wait_for and not wait_for.done():
|
| 238 |
+
try:
|
| 239 |
+
await wait_for
|
| 240 |
+
except Exception:
|
| 241 |
+
pass
|
| 242 |
+
|
| 243 |
+
# Step 3 — write to queue in order
|
| 244 |
+
for data in audio_chunks:
|
| 245 |
+
await self.queue.put(data)
|
| 246 |
+
|
| 247 |
+
async def flush(self) -> None:
|
| 248 |
+
"""Flush remaining buffer, await all tasks, send end sentinel."""
|
| 249 |
+
await self._schedule_flush()
|
| 250 |
+
if self._prev_task:
|
| 251 |
+
try:
|
| 252 |
+
await self._prev_task
|
| 253 |
+
except Exception:
|
| 254 |
+
pass
|
| 255 |
+
await self.queue.put(None)
|
| 256 |
+
|
| 257 |
+
async def stream_audio(self):
|
| 258 |
+
while True:
|
| 259 |
+
chunk = await self.queue.get()
|
| 260 |
+
if chunk is None:
|
| 261 |
+
break
|
| 262 |
+
yield chunk
|
services/stt.py
CHANGED
|
@@ -1,73 +1,148 @@
|
|
| 1 |
-
# from faster_whisper import WhisperModel
|
| 2 |
-
# import tempfile
|
| 3 |
-
|
| 4 |
-
# model = WhisperModel("small", device="cpu", compute_type="int8")
|
| 5 |
-
# class StreamingSTT:
|
| 6 |
-
# def __init__(self):
|
| 7 |
-
# self.audio_buffer = bytearray()
|
| 8 |
-
|
| 9 |
-
# def add_audio(self, chunk: bytes):
|
| 10 |
-
# self.audio_buffer.extend(chunk)
|
| 11 |
-
|
| 12 |
-
# def transcribe_if_ready(self):
|
| 13 |
-
# # simple chunk trigger (1.5–3 sec buffer recommended)
|
| 14 |
-
# if len(self.audio_buffer) < 48000 * 2 * 2:
|
| 15 |
-
# return None
|
| 16 |
-
|
| 17 |
-
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as f:
|
| 18 |
-
# f.write(self.audio_buffer)
|
| 19 |
-
# f.flush()
|
| 20 |
-
# segments, _ = model.transcribe(f.name, language="bn", task="translate", beam_size=1)
|
| 21 |
-
# text = " ".join([s.text for s in segments])
|
| 22 |
-
|
| 23 |
-
# self.audio_buffer.clear()
|
| 24 |
-
# return text
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
-
|
| 30 |
-
import tempfile
|
| 31 |
|
| 32 |
-
model = WhisperModel(
|
| 33 |
-
"small",
|
| 34 |
-
device="cpu",
|
| 35 |
-
compute_type="int8"
|
| 36 |
-
)
|
| 37 |
|
| 38 |
-
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
def add_audio(self, chunk: bytes):
|
| 44 |
-
self.audio_buffer.extend(chunk)
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return None
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
|
| 57 |
-
f.write(self.audio_buffer)
|
| 58 |
-
f.flush()
|
| 59 |
|
| 60 |
-
segments,
|
| 61 |
-
f.name,
|
| 62 |
-
language="bn",
|
| 63 |
-
beam_size=1
|
| 64 |
-
)
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
self.audio_buffer.clear()
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
import subprocess
|
| 5 |
+
import tempfile
|
| 6 |
|
| 7 |
+
from faster_whisper import WhisperModel
|
| 8 |
|
| 9 |
|
| 10 |
+
model = WhisperModel("large-v3", device="cuda", compute_type="int8_float32")
|
|
|
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
BANGLA_PATTERN = re.compile(r'[\u0980-\u09FF]')
|
| 14 |
|
| 15 |
+
# Scripts we consider "wrong" — Arabic, Urdu, Devanagari (when expecting Bangla)
|
| 16 |
+
WRONG_SCRIPT_PATTERN = re.compile(
|
| 17 |
+
r'[\u0600-\u06FF' # Arabic / Urdu
|
| 18 |
+
r'\u0750-\u077F' # Arabic Supplement
|
| 19 |
+
r'\uFB50-\uFDFF' # Arabic Presentation Forms
|
| 20 |
+
r'\uFE70-\uFEFF]' # Arabic Presentation Forms-B
|
| 21 |
+
)
|
| 22 |
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
def _is_valid_bangla(text: str) -> bool:
|
| 25 |
+
"""
|
| 26 |
+
Return True if the transcript looks like real Bangla.
|
| 27 |
+
|
| 28 |
+
A valid transcript must:
|
| 29 |
+
1. Contain at least one Bangla Unicode character, OR be very short
|
| 30 |
+
(some valid responses are single digits/punctuation).
|
| 31 |
+
2. NOT be dominated by Arabic/Urdu script (Whisper wrong-script error).
|
| 32 |
+
"""
|
| 33 |
+
bangla_chars = len(BANGLA_PATTERN.findall(text))
|
| 34 |
+
wrong_chars = len(WRONG_SCRIPT_PATTERN.findall(text))
|
| 35 |
+
total_alpha = sum(1 for c in text if c.isalpha())
|
| 36 |
+
|
| 37 |
+
if total_alpha == 0:
|
| 38 |
+
return True # digits/punctuation only — let it through
|
| 39 |
+
|
| 40 |
+
# If more than 30% of alphabetic chars are Arabic/Urdu script, reject
|
| 41 |
+
if total_alpha > 0 and (wrong_chars / total_alpha) > 0.30:
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
+
# Must have at least some Bangla characters for long responses
|
| 45 |
+
if total_alpha > 5 and bangla_chars == 0:
|
| 46 |
+
return False
|
| 47 |
+
|
| 48 |
+
return True
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class STTProcessor:
|
| 52 |
+
MIN_INPUT_BYTES = 3_000
|
| 53 |
+
|
| 54 |
+
def _to_wav(self, audio_bytes: bytes) -> str | None:
|
| 55 |
+
"""Convert browser WebM/opus to 16 kHz mono WAV with loudness normalization."""
|
| 56 |
+
in_path = out_path = None
|
| 57 |
+
try:
|
| 58 |
+
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
|
| 59 |
+
f.write(audio_bytes)
|
| 60 |
+
in_path = f.name
|
| 61 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 62 |
+
out_path = f.name
|
| 63 |
+
|
| 64 |
+
result = subprocess.run([
|
| 65 |
+
"ffmpeg", "-y", "-loglevel", "warning",
|
| 66 |
+
"-i", in_path,
|
| 67 |
+
"-ar", "16000", "-ac", "1",
|
| 68 |
+
"-af", "loudnorm",
|
| 69 |
+
"-f", "wav", out_path,
|
| 70 |
+
], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
|
| 71 |
+
|
| 72 |
+
if result.returncode != 0:
|
| 73 |
+
print("[STT] ffmpeg error:", result.stderr.decode(errors="replace").strip())
|
| 74 |
+
return None
|
| 75 |
+
if not os.path.exists(out_path) or os.path.getsize(out_path) < 500:
|
| 76 |
+
print("[STT] ffmpeg produced empty WAV.")
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
print(f"[STT] WAV ready: {os.path.getsize(out_path):,} bytes")
|
| 80 |
+
return out_path
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"[STT] _to_wav: {e}")
|
| 83 |
+
return None
|
| 84 |
+
finally:
|
| 85 |
+
if in_path and os.path.exists(in_path):
|
| 86 |
+
try: os.remove(in_path)
|
| 87 |
+
except OSError: pass
|
| 88 |
+
|
| 89 |
+
def transcribe(self, audio_bytes: bytes) -> str | None:
|
| 90 |
+
if len(audio_bytes) < self.MIN_INPUT_BYTES:
|
| 91 |
+
print(f"[STT] Too short ({len(audio_bytes)} B), skipping.")
|
| 92 |
return None
|
| 93 |
|
| 94 |
+
wav_path = None
|
| 95 |
+
try:
|
| 96 |
+
wav_path = self._to_wav(audio_bytes)
|
| 97 |
+
if not wav_path:
|
| 98 |
+
return None
|
| 99 |
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
# segments, info = model.transcribe(wav_path, language="bn", task="translate", beam_size=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
segments, info = model.transcribe(
|
| 104 |
+
wav_path,
|
| 105 |
+
language="bn",
|
| 106 |
+
beam_size=5,
|
| 107 |
+
vad_filter=False, # loudnorm handles quiet audio
|
| 108 |
+
condition_on_previous_text=False,
|
| 109 |
+
temperature=0,
|
| 110 |
+
suppress_tokens=[-1],
|
| 111 |
+
no_speech_threshold=0.5,
|
| 112 |
+
log_prob_threshold=-1.0,
|
| 113 |
+
# task="translate"
|
| 114 |
+
# NO initial_prompt — causes hallucination loops on base model
|
| 115 |
)
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
text = " ".join(seg.text.strip() for seg in segments).strip()
|
| 118 |
+
print(f"[STT] Lang={info.language} prob={info.language_probability:.2f}")
|
| 119 |
+
|
| 120 |
+
if not text:
|
| 121 |
+
print("[STT] Empty transcript.")
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
# ── Hallucination guard: repeated words ───────────────────────────
|
| 125 |
+
words = text.split()
|
| 126 |
+
if len(words) > 5 and (len(set(words)) / len(words)) < 0.25:
|
| 127 |
+
print(f"[STT] Hallucination (repetition) discarded: {text[:60]}")
|
| 128 |
+
return None
|
| 129 |
+
|
| 130 |
+
# ── Script validation: must be Bangla Unicode ─────────────────────
|
| 131 |
+
if not _is_valid_bangla(text):
|
| 132 |
+
print(f"[STT] Wrong script (Arabic/Urdu output from base model) "
|
| 133 |
+
f"discarded: {text[:60]}")
|
| 134 |
+
print("[STT] ⚠ If this keeps happening, ensure you're using "
|
| 135 |
+
"model='small' not 'base'.")
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
print(f"[STT] Transcript: {text}")
|
| 139 |
+
return text
|
| 140 |
+
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"[STT] transcribe: {e}")
|
| 143 |
+
import traceback; traceback.print_exc()
|
| 144 |
+
return None
|
| 145 |
+
finally:
|
| 146 |
+
if wav_path and os.path.exists(wav_path):
|
| 147 |
+
try: os.remove(wav_path)
|
| 148 |
+
except OSError: pass
|
services/tts.py
CHANGED
|
@@ -1,12 +1,29 @@
|
|
|
|
|
| 1 |
import edge_tts
|
| 2 |
-
import asyncio
|
| 3 |
-
import tempfile
|
| 4 |
|
| 5 |
-
VOICE = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
import edge_tts
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
VOICE = "bn-BD-NabanitaNeural"
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
async def text_to_speech_stream(text: str, voice: str = VOICE):
|
| 8 |
+
"""
|
| 9 |
+
Async generator that converts *text* to Bangla audio and yields
|
| 10 |
+
raw MP3 bytes chunk-by-chunk as they arrive from edge-tts.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
text: The Bangla (or mixed) text to synthesise.
|
| 14 |
+
voice: edge-tts voice name. Defaults to bn-BD-NabanitaNeural.
|
| 15 |
|
| 16 |
+
Yields:
|
| 17 |
+
bytes — raw MP3 audio data ready to send over WebSocket.
|
| 18 |
+
"""
|
| 19 |
+
text = text.strip()
|
| 20 |
+
if not text:
|
| 21 |
+
return
|
| 22 |
|
| 23 |
+
try:
|
| 24 |
+
communicate = edge_tts.Communicate(text, voice)
|
| 25 |
+
async for chunk in communicate.stream():
|
| 26 |
+
if chunk["type"] == "audio":
|
| 27 |
+
yield chunk["data"]
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"[TTS] text_to_speech_stream error: {e}")
|
services/vad.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import webrtcvad
|
| 3 |
+
|
| 4 |
+
class VADDetector:
|
| 5 |
+
def __init__(self, sample_rate=16000, frame_ms=30, aggressiveness=2):
|
| 6 |
+
self.vad = webrtcvad.Vad(aggressiveness)
|
| 7 |
+
self.sample_rate = sample_rate
|
| 8 |
+
self.frame_ms = frame_ms
|
| 9 |
+
self.frame_size = int(sample_rate * frame_ms / 1000) * 2
|
| 10 |
+
|
| 11 |
+
def is_valid(self, frame: bytes):
|
| 12 |
+
return len(frame) == self.frame_size
|
| 13 |
+
|
| 14 |
+
def is_speech(self, frame: bytes) -> bool:
|
| 15 |
+
if not self.is_valid(frame):
|
| 16 |
+
return False
|
| 17 |
+
try:
|
| 18 |
+
return self.vad.is_speech(frame, self.sample_rate)
|
| 19 |
+
except:
|
| 20 |
+
return False
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class VADSegmenter:
|
| 24 |
+
def __init__(self, vad: VADDetector, silence_limit=8):
|
| 25 |
+
self.vad = vad
|
| 26 |
+
self.silence_limit = silence_limit
|
| 27 |
+
|
| 28 |
+
self.buffer = bytearray()
|
| 29 |
+
self.silence = 0
|
| 30 |
+
self.active = False
|
| 31 |
+
|
| 32 |
+
def add_frame(self, frame: bytes):
|
| 33 |
+
speech = self.vad.is_speech(frame)
|
| 34 |
+
|
| 35 |
+
if speech:
|
| 36 |
+
self.buffer.extend(frame)
|
| 37 |
+
self.active = True
|
| 38 |
+
self.silence = 0
|
| 39 |
+
else:
|
| 40 |
+
if self.active:
|
| 41 |
+
self.silence += 1
|
| 42 |
+
|
| 43 |
+
if self.active and self.silence > self.silence_limit:
|
| 44 |
+
audio = bytes(self.buffer)
|
| 45 |
+
self.buffer.clear()
|
| 46 |
+
self.silence = 0
|
| 47 |
+
self.active = False
|
| 48 |
+
return audio
|
| 49 |
+
|
| 50 |
+
return None
|