Commit ·
75ee53d
1
Parent(s): 4d2289b
added voice module and updated index
Browse files- .env +6 -1
- app.py +141 -69
- core/backend.py +251 -85
- frontend/index.html +282 -22
- frontend/script.js +756 -199
- frontend/style.css +798 -103
- requirements.txt +8 -0
- services/__init__.py +0 -0
- services/streaming.py +192 -116
- services/stt.py +267 -90
- services/tts.py +192 -14
- services/vad.py +0 -1
.env
CHANGED
|
@@ -2,11 +2,16 @@ HF_TOKEN=""
|
|
| 2 |
WEATHER_API_KEY="9e50616b95574a30dbc5a01579aa2b9f"
|
| 3 |
LANGCHAIN_TRACING_V2=true
|
| 4 |
LANGCHAIN_ENDPOINT='https://api.smith.langchain.com'
|
| 5 |
-
LANGCHAIN_API_KEY='
|
| 6 |
LANGCHAIN_PROJECT='Default'
|
| 7 |
|
| 8 |
GOOGLE_API_KEY="AIzaSyA9sqz4YKQHKXR9TU1imw0DPOghzHOMiBo"
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
|
| 11 |
# TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
|
| 12 |
# TWILIO_PHONE_NUMBER="+14343375085"
|
|
|
|
| 2 |
WEATHER_API_KEY="9e50616b95574a30dbc5a01579aa2b9f"
|
| 3 |
LANGCHAIN_TRACING_V2=true
|
| 4 |
LANGCHAIN_ENDPOINT='https://api.smith.langchain.com'
|
| 5 |
+
LANGCHAIN_API_KEY='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'
|
| 6 |
LANGCHAIN_PROJECT='Default'
|
| 7 |
|
| 8 |
GOOGLE_API_KEY="AIzaSyA9sqz4YKQHKXR9TU1imw0DPOghzHOMiBo"
|
| 9 |
|
| 10 |
+
|
| 11 |
+
ELEVENLABS_API_KEY="b3af3a938c8e15d5eae700ea47eea7d88dfe397f34fbd4b0c75c24f143b032b8"
|
| 12 |
+
ELEVENLABS_VOICE_ID="iuABfyf7pRoBzuPqzUCt"
|
| 13 |
+
ELEVENLABS_MODEL_ID="eleven_multilingual_v2"
|
| 14 |
+
|
| 15 |
# TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
|
| 16 |
# TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
|
| 17 |
# TWILIO_PHONE_NUMBER="+14343375085"
|
app.py
CHANGED
|
@@ -1,6 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
| 6 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
@@ -12,7 +34,28 @@ from core.backend import AIBackend
|
|
| 12 |
from services.stt import STTProcessor
|
| 13 |
from services.streaming import ParallelTTSStreamer
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
@asynccontextmanager
|
|
@@ -22,6 +65,8 @@ async def lifespan(app: FastAPI):
|
|
| 22 |
yield
|
| 23 |
if hasattr(ai, "conn") and ai.conn:
|
| 24 |
await ai.conn.close()
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
app = FastAPI(lifespan=lifespan)
|
|
@@ -39,6 +84,8 @@ async def root():
|
|
| 39 |
return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
| 42 |
def _ws_open(ws: WebSocket) -> bool:
|
| 43 |
return ws.client_state == WebSocketState.CONNECTED
|
| 44 |
|
|
@@ -63,10 +110,12 @@ async def _safe_bytes(ws: WebSocket, data: bytes) -> bool:
|
|
| 63 |
return False
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
| 66 |
@app.websocket("/ws/chat")
|
| 67 |
async def ws_chat(ws: WebSocket):
|
| 68 |
await ws.accept()
|
| 69 |
-
print("[CHAT] Client connected")
|
| 70 |
try:
|
| 71 |
while True:
|
| 72 |
raw = await ws.receive_text()
|
|
@@ -78,16 +127,20 @@ async def ws_chat(ws: WebSocket):
|
|
| 78 |
|
| 79 |
user_id = data.get("user_id", "default_user")
|
| 80 |
user_query = data.get("user_query", "").strip()
|
|
|
|
|
|
|
|
|
|
| 81 |
if not user_query:
|
| 82 |
continue
|
| 83 |
|
| 84 |
-
full_response = ""
|
| 85 |
try:
|
| 86 |
stream = await ai.main(user_id, user_query)
|
| 87 |
async for token in stream:
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
except Exception as exc:
|
|
|
|
| 91 |
print(f"[CHAT] AI error: {exc}")
|
| 92 |
await _safe_text(ws, {"type": "error", "text": str(exc)})
|
| 93 |
|
|
@@ -100,19 +153,79 @@ async def ws_chat(ws: WebSocket):
|
|
| 100 |
print(f"[CHAT] WS error: {exc}")
|
| 101 |
|
| 102 |
|
|
|
|
|
|
|
| 103 |
@app.websocket("/ws/voice")
|
| 104 |
async def ws_voice(ws: WebSocket):
|
| 105 |
await ws.accept()
|
| 106 |
-
print("[VOICE] Client connected")
|
| 107 |
|
| 108 |
-
|
| 109 |
-
user_id
|
|
|
|
|
|
|
| 110 |
_active_streamer: ParallelTTSStreamer | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
try:
|
| 113 |
while True:
|
| 114 |
if not _ws_open(ws):
|
| 115 |
-
print("[VOICE] Connection dropped, exiting handler.")
|
| 116 |
break
|
| 117 |
|
| 118 |
try:
|
|
@@ -127,74 +240,34 @@ async def ws_voice(ws: WebSocket):
|
|
| 127 |
print(f"[VOICE] Receive error: {exc}")
|
| 128 |
break
|
| 129 |
|
| 130 |
-
|
| 131 |
if "bytes" in data and data["bytes"]:
|
| 132 |
audio_bytes = data["bytes"]
|
| 133 |
-
print(f"[VOICE]
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
if _active_streamer is not None:
|
| 137 |
-
print("[VOICE] Barge-in — cancelling previous TTS.")
|
| 138 |
-
await _active_streamer.cancel()
|
| 139 |
-
_active_streamer = None
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
transcript = await stt.transcribe(audio_bytes)
|
| 143 |
-
|
| 144 |
-
if not transcript:
|
| 145 |
-
await _safe_text(ws, {
|
| 146 |
-
"type": "error",
|
| 147 |
-
"text": "কথা বুঝতে পারিনি, আবার বলুন।"
|
| 148 |
-
})
|
| 149 |
-
await _safe_text(ws, {"type": "end"})
|
| 150 |
-
continue
|
| 151 |
-
|
| 152 |
-
print(f"[VOICE] STT: {transcript}")
|
| 153 |
-
if not await _safe_text(ws, {"type": "stt", "text": transcript}):
|
| 154 |
-
break
|
| 155 |
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
async def run_ai_and_tts() -> None:
|
| 161 |
-
try:
|
| 162 |
-
stream = await ai.main(user_id, transcript)
|
| 163 |
-
async for token in stream:
|
| 164 |
-
if not token:
|
| 165 |
-
continue
|
| 166 |
-
if not await _safe_text(ws, {"type": "llm_token", "token": token}):
|
| 167 |
-
break
|
| 168 |
-
await tts_streamer.add_token(token)
|
| 169 |
-
except Exception as exc:
|
| 170 |
-
print(f"[VOICE] AI error: {exc}")
|
| 171 |
-
finally:
|
| 172 |
-
await tts_streamer.flush()
|
| 173 |
-
|
| 174 |
-
async def stream_tts_audio() -> None:
|
| 175 |
-
async for chunk in tts_streamer.stream_audio():
|
| 176 |
-
if not await _safe_bytes(ws, chunk):
|
| 177 |
-
break
|
| 178 |
-
|
| 179 |
-
await asyncio.gather(run_ai_and_tts(), stream_tts_audio())
|
| 180 |
-
_active_streamer = None
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
await _safe_text(ws, {"type": "end"})
|
| 184 |
-
|
| 185 |
|
|
|
|
| 186 |
elif "text" in data and data["text"]:
|
| 187 |
try:
|
| 188 |
msg = json.loads(data["text"])
|
| 189 |
-
if msg.get("type") == "ping":
|
| 190 |
-
await _safe_text(ws, {"type": "pong"})
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
elif msg.get("type") == "cancel":
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
await _active_streamer.cancel()
|
| 197 |
-
_active_streamer = None
|
| 198 |
await _safe_text(ws, {"type": "end"})
|
| 199 |
|
| 200 |
except json.JSONDecodeError:
|
|
@@ -206,6 +279,5 @@ async def ws_voice(ws: WebSocket):
|
|
| 206 |
if "disconnect" not in str(exc).lower():
|
| 207 |
print(f"[VOICE] WS error: {exc}")
|
| 208 |
finally:
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
print("[VOICE] Handler exiting cleanly.")
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py — FastAPI entrypoint (Production-Fixed)
|
| 3 |
+
|
| 4 |
+
Fixes applied:
|
| 5 |
+
─────────────
|
| 6 |
+
1. MODEL ROUTING — USE_GEMINI / USE_OLLAMA / USE_LOCAL_FALLBACK flags.
|
| 7 |
+
Exactly one must be True; startup raises if misconfigured.
|
| 8 |
+
|
| 9 |
+
2. UNIQUE VOICE USER IDs — Each WebSocket connection receives its own
|
| 10 |
+
user_id (f"voice_{uuid4().hex[:12]}"). Browser may override via
|
| 11 |
+
{"type": "init", "user_id": "..."} as first text frame.
|
| 12 |
+
|
| 13 |
+
3. STABLE WS LIFECYCLE — All blocking I/O is delegated to workers via
|
| 14 |
+
asyncio.Queue. The receive loop never blocks; handlers run as Tasks.
|
| 15 |
+
|
| 16 |
+
4. TASK ISOLATION — STT, LLM, and TTS are distinct async tasks per turn,
|
| 17 |
+
cleanly cancelled on barge-in or disconnect.
|
| 18 |
+
|
| 19 |
+
5. CHAT WS — reconnect-safe; send is guarded by readyState helper.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
import asyncio
|
| 23 |
import json
|
| 24 |
import os
|
| 25 |
+
import uuid
|
| 26 |
from contextlib import asynccontextmanager
|
| 27 |
|
| 28 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
|
|
| 34 |
from services.stt import STTProcessor
|
| 35 |
from services.streaming import ParallelTTSStreamer
|
| 36 |
|
| 37 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 38 |
+
# MODEL ROUTING CONFIG — set exactly ONE to True
|
| 39 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 40 |
+
USE_GEMINI = True
|
| 41 |
+
USE_OLLAMA = False
|
| 42 |
+
USE_LOCAL_FALLBACK = False
|
| 43 |
+
|
| 44 |
+
_active = sum([USE_GEMINI, USE_OLLAMA, USE_LOCAL_FALLBACK])
|
| 45 |
+
if _active != 1:
|
| 46 |
+
raise RuntimeError(
|
| 47 |
+
f"[CONFIG] Exactly one of USE_GEMINI / USE_OLLAMA / USE_LOCAL_FALLBACK "
|
| 48 |
+
f"must be True. Got {_active} True."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 52 |
+
# AI BACKEND
|
| 53 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 54 |
+
ai = AIBackend(
|
| 55 |
+
use_gemini=USE_GEMINI,
|
| 56 |
+
use_ollama=USE_OLLAMA,
|
| 57 |
+
use_fallback=USE_LOCAL_FALLBACK,
|
| 58 |
+
)
|
| 59 |
|
| 60 |
|
| 61 |
@asynccontextmanager
|
|
|
|
| 65 |
yield
|
| 66 |
if hasattr(ai, "conn") and ai.conn:
|
| 67 |
await ai.conn.close()
|
| 68 |
+
if hasattr(ai, "_meta_conn") and ai._meta_conn:
|
| 69 |
+
await ai._meta_conn.close()
|
| 70 |
|
| 71 |
|
| 72 |
app = FastAPI(lifespan=lifespan)
|
|
|
|
| 84 |
return HTMLResponse("<h2>index.html not found</h2>", status_code=404)
|
| 85 |
|
| 86 |
|
| 87 |
+
# ── WebSocket helpers ─────────────────────────────────────────────────────────
|
| 88 |
+
|
| 89 |
def _ws_open(ws: WebSocket) -> bool:
|
| 90 |
return ws.client_state == WebSocketState.CONNECTED
|
| 91 |
|
|
|
|
| 110 |
return False
|
| 111 |
|
| 112 |
|
| 113 |
+
# ── Chat WebSocket ────────────────────────────────────────────────────────────
|
| 114 |
+
|
| 115 |
@app.websocket("/ws/chat")
|
| 116 |
async def ws_chat(ws: WebSocket):
|
| 117 |
await ws.accept()
|
| 118 |
+
print("[CHAT] ✓ Client connected")
|
| 119 |
try:
|
| 120 |
while True:
|
| 121 |
raw = await ws.receive_text()
|
|
|
|
| 127 |
|
| 128 |
user_id = data.get("user_id", "default_user")
|
| 129 |
user_query = data.get("user_query", "").strip()
|
| 130 |
+
|
| 131 |
+
print(f"[CHAT] user_id={user_id!r} query={user_query!r}")
|
| 132 |
+
|
| 133 |
if not user_query:
|
| 134 |
continue
|
| 135 |
|
|
|
|
| 136 |
try:
|
| 137 |
stream = await ai.main(user_id, user_query)
|
| 138 |
async for token in stream:
|
| 139 |
+
if not token:
|
| 140 |
+
continue
|
| 141 |
+
await _safe_text(ws, {"type": "llm_token", "token": token})
|
| 142 |
except Exception as exc:
|
| 143 |
+
import traceback; traceback.print_exc()
|
| 144 |
print(f"[CHAT] AI error: {exc}")
|
| 145 |
await _safe_text(ws, {"type": "error", "text": str(exc)})
|
| 146 |
|
|
|
|
| 153 |
print(f"[CHAT] WS error: {exc}")
|
| 154 |
|
| 155 |
|
| 156 |
+
# ── Voice WebSocket ───────────────────────────────────────────────────────────
|
| 157 |
+
|
| 158 |
@app.websocket("/ws/voice")
|
| 159 |
async def ws_voice(ws: WebSocket):
|
| 160 |
await ws.accept()
|
|
|
|
| 161 |
|
| 162 |
+
user_id = f"voice_{uuid.uuid4().hex[:12]}"
|
| 163 |
+
print(f"[VOICE] Client connected — user_id={user_id}")
|
| 164 |
+
|
| 165 |
+
stt = STTProcessor()
|
| 166 |
_active_streamer: ParallelTTSStreamer | None = None
|
| 167 |
+
_active_task: asyncio.Task | None = None
|
| 168 |
+
|
| 169 |
+
async def _cancel_active():
|
| 170 |
+
nonlocal _active_streamer, _active_task
|
| 171 |
+
if _active_streamer is not None:
|
| 172 |
+
await _active_streamer.cancel()
|
| 173 |
+
_active_streamer = None
|
| 174 |
+
if _active_task is not None and not _active_task.done():
|
| 175 |
+
_active_task.cancel()
|
| 176 |
+
try:
|
| 177 |
+
await _active_task
|
| 178 |
+
except (asyncio.CancelledError, Exception):
|
| 179 |
+
pass
|
| 180 |
+
_active_task = None
|
| 181 |
+
|
| 182 |
+
async def _handle_utterance(audio_bytes: bytes):
|
| 183 |
+
nonlocal _active_streamer
|
| 184 |
+
|
| 185 |
+
transcript = await stt.transcribe(audio_bytes)
|
| 186 |
+
if not transcript:
|
| 187 |
+
await _safe_text(ws, {
|
| 188 |
+
"type": "error",
|
| 189 |
+
"text": "কথা বুঝতে পারিনি, আবার বলুন।"
|
| 190 |
+
})
|
| 191 |
+
await _safe_text(ws, {"type": "end"})
|
| 192 |
+
return
|
| 193 |
+
|
| 194 |
+
print(f"[VOICE] [{user_id}] STT: {transcript}")
|
| 195 |
+
if not await _safe_text(ws, {"type": "stt", "text": transcript}):
|
| 196 |
+
return
|
| 197 |
+
|
| 198 |
+
tts_streamer = ParallelTTSStreamer()
|
| 199 |
+
_active_streamer = tts_streamer
|
| 200 |
+
|
| 201 |
+
async def run_ai():
|
| 202 |
+
try:
|
| 203 |
+
stream = await ai.main(user_id, transcript)
|
| 204 |
+
async for token in stream:
|
| 205 |
+
if not token:
|
| 206 |
+
continue
|
| 207 |
+
if not await _safe_text(ws, {"type": "llm_token", "token": token}):
|
| 208 |
+
break
|
| 209 |
+
await tts_streamer.add_token(token)
|
| 210 |
+
except asyncio.CancelledError:
|
| 211 |
+
raise
|
| 212 |
+
except Exception as exc:
|
| 213 |
+
print(f"[VOICE] AI error: {exc}")
|
| 214 |
+
finally:
|
| 215 |
+
await tts_streamer.flush()
|
| 216 |
+
|
| 217 |
+
async def run_tts():
|
| 218 |
+
async for chunk in tts_streamer.stream_audio():
|
| 219 |
+
if not await _safe_bytes(ws, chunk):
|
| 220 |
+
break
|
| 221 |
+
|
| 222 |
+
await asyncio.gather(run_ai(), run_tts(), return_exceptions=True)
|
| 223 |
+
_active_streamer = None
|
| 224 |
+
await _safe_text(ws, {"type": "end"})
|
| 225 |
|
| 226 |
try:
|
| 227 |
while True:
|
| 228 |
if not _ws_open(ws):
|
|
|
|
| 229 |
break
|
| 230 |
|
| 231 |
try:
|
|
|
|
| 240 |
print(f"[VOICE] Receive error: {exc}")
|
| 241 |
break
|
| 242 |
|
| 243 |
+
# ── Audio utterance ────────────────────────────────────────────────
|
| 244 |
if "bytes" in data and data["bytes"]:
|
| 245 |
audio_bytes = data["bytes"]
|
| 246 |
+
print(f"[VOICE] [{user_id}] Utterance: {len(audio_bytes):,} bytes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
+
# Barge-in: cancel immediately before starting new turn
|
| 249 |
+
await _cancel_active()
|
| 250 |
|
| 251 |
+
_active_task = asyncio.create_task(
|
| 252 |
+
_handle_utterance(audio_bytes)
|
| 253 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
+
# ── Control messages ───────────────────────────────────────────────
|
| 256 |
elif "text" in data and data["text"]:
|
| 257 |
try:
|
| 258 |
msg = json.loads(data["text"])
|
|
|
|
|
|
|
| 259 |
|
| 260 |
+
if msg.get("type") == "init" and msg.get("user_id"):
|
| 261 |
+
user_id = str(msg["user_id"])[:64]
|
| 262 |
+
print(f"[VOICE] user_id updated: {user_id}")
|
| 263 |
+
await _safe_text(ws, {"type": "init_ack", "user_id": user_id})
|
| 264 |
+
|
| 265 |
+
elif msg.get("type") == "ping":
|
| 266 |
+
await _safe_text(ws, {"type": "pong"})
|
| 267 |
|
| 268 |
elif msg.get("type") == "cancel":
|
| 269 |
+
print("[VOICE] Client cancel signal.")
|
| 270 |
+
await _cancel_active()
|
|
|
|
|
|
|
| 271 |
await _safe_text(ws, {"type": "end"})
|
| 272 |
|
| 273 |
except json.JSONDecodeError:
|
|
|
|
| 279 |
if "disconnect" not in str(exc).lower():
|
| 280 |
print(f"[VOICE] WS error: {exc}")
|
| 281 |
finally:
|
| 282 |
+
await _cancel_active()
|
| 283 |
+
print(f"[VOICE] [{user_id}] Handler exiting cleanly.")
|
|
|
core/backend.py
CHANGED
|
@@ -5,6 +5,19 @@ import json
|
|
| 5 |
import os
|
| 6 |
import uuid
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import aiosqlite
|
| 9 |
import pytz
|
| 10 |
from datetime import datetime
|
|
@@ -15,12 +28,10 @@ from langchain_core.messages import (
|
|
| 15 |
SystemMessage, ToolMessage,
|
| 16 |
)
|
| 17 |
from langchain_core.tools import tool
|
| 18 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 19 |
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
|
| 20 |
from langgraph.graph import END, START, StateGraph
|
| 21 |
from langgraph.graph.message import add_messages
|
| 22 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 23 |
-
from twilio.rest import Client
|
| 24 |
from typing import Annotated, TypedDict
|
| 25 |
|
| 26 |
|
|
@@ -49,12 +60,16 @@ def format_bd_number(num: str) -> str:
|
|
| 49 |
|
| 50 |
|
| 51 |
def send_sms(to_number: str, message: str) -> None:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
# ═══════════════════════════════════════════════════════════════════════════════
|
|
@@ -246,35 +261,115 @@ SUMMARY_SYSTEM = (
|
|
| 246 |
"Use this memory for continuity. Do not repeat it unless asked."
|
| 247 |
)
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# ═══════════════════════════════════════════════════════════════════════════════
|
| 251 |
# AGENT
|
| 252 |
# ═══════════════════════════════════════════════════════════════════════════════
|
| 253 |
class AIBackend:
|
| 254 |
|
| 255 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
load_dotenv()
|
| 257 |
os.environ.setdefault("LANGCHAIN_PROJECT", "Doctor Appointment Automation")
|
| 258 |
|
| 259 |
-
self.
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
)
|
| 263 |
|
| 264 |
-
self.
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
# ── Setup ──────────────────────────────────────────────────────────────────
|
| 275 |
async def async_setup(self) -> None:
|
| 276 |
-
db_path
|
| 277 |
-
self.conn
|
|
|
|
|
|
|
| 278 |
self.checkpointer = AsyncSqliteSaver(self.conn)
|
| 279 |
await self._create_tables()
|
| 280 |
self.graph = self._build_graph()
|
|
@@ -338,57 +433,64 @@ class AIBackend:
|
|
| 338 |
async def should_summarize(self, state: ChatState) -> str:
|
| 339 |
return "summarize_node" if len(state["messages"]) > 10 else "chat_node"
|
| 340 |
|
| 341 |
-
# ── Chat node
|
|
|
|
|
|
|
|
|
|
| 342 |
async def chat_node(self, state: ChatState):
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
| 348 |
|
| 349 |
-
The streamed chunks are merged into a single AIMessage for the
|
| 350 |
-
graph state so checkpointing and tool detection work unchanged.
|
| 351 |
-
"""
|
| 352 |
summary = state.get("summary", "")
|
| 353 |
messages = state["messages"]
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
print(f" [{m.__class__.__name__}]: {str(m.content)[:160]}")
|
| 360 |
-
print("#" * 50)
|
| 361 |
|
| 362 |
-
sys_content = SUMMARY_SYSTEM.format(summary=summary) if summary else BASE_SYSTEM
|
| 363 |
full_messages = [SystemMessage(content=sys_content)] + list(messages)
|
| 364 |
|
| 365 |
-
#
|
| 366 |
-
# stream_mode="messages" before the node returns its state update.
|
| 367 |
collected: list[AIMessageChunk] = []
|
| 368 |
async for chunk in self.llm_with_tools.astream(full_messages):
|
| 369 |
collected.append(chunk)
|
| 370 |
|
| 371 |
-
# Merge chunks into a single AIMessage for the state
|
| 372 |
if not collected:
|
| 373 |
response = AIMessage(content="")
|
| 374 |
else:
|
| 375 |
-
# LangChain chunk addition merges content + tool_calls correctly
|
| 376 |
response = collected[0]
|
| 377 |
for c in collected[1:]:
|
| 378 |
response = response + c
|
| 379 |
|
| 380 |
-
print(f"[AI]: {str(response.content)[:
|
| 381 |
-
print(">>>>>>>>>> CHAT NODE END <<<<<<<<<<")
|
| 382 |
return {"messages": [response]}
|
| 383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
# ── Graph ──────────────────────────────────────────────────────────────────
|
| 385 |
def _build_graph(self):
|
| 386 |
g = StateGraph(ChatState)
|
| 387 |
g.add_node("chat_node", self.chat_node)
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
return g.compile(checkpointer=self.checkpointer)
|
| 393 |
|
| 394 |
def _build_summary_graph(self):
|
|
@@ -398,62 +500,126 @@ class AIBackend:
|
|
| 398 |
g.add_edge("summarize_node", END)
|
| 399 |
return g.compile(checkpointer=self.checkpointer)
|
| 400 |
|
| 401 |
-
# ── Streaming ──────────────────────────────────────────────────────
|
| 402 |
-
async def ai_only_stream(self,
|
| 403 |
"""
|
| 404 |
-
Async generator
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
"""
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
yield chunk.content
|
| 415 |
-
|
| 416 |
-
# Auto-summarise in background when history grows long
|
| 417 |
-
current = await self.graph.aget_state(config)
|
| 418 |
-
if len(current.values.get("messages", [])) > 10:
|
| 419 |
-
asyncio.create_task(
|
| 420 |
-
self.summary_graph.ainvoke(current.values, config=config)
|
| 421 |
)
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
# ── Thread management ──────────────────────────────────────────────────────
|
| 425 |
@staticmethod
|
| 426 |
def generate_thread_id() -> str:
|
| 427 |
return str(uuid.uuid4())
|
| 428 |
|
| 429 |
-
async def retrieve_all_threads(self) -> list[str]:
|
| 430 |
-
threads: set[str] = set()
|
| 431 |
-
async for cp in self.checkpointer.alist(None):
|
| 432 |
-
threads.add(cp.config["configurable"]["thread_id"])
|
| 433 |
-
return list(threads)
|
| 434 |
-
|
| 435 |
# ── Public entry point ─────────────────────────────────────────────────────
|
| 436 |
async def main(self, user_id: str, user_query: str):
|
| 437 |
"""Return an async generator of AI text tokens."""
|
| 438 |
-
async with self.
|
| 439 |
"SELECT threadId FROM userid_threadid WHERE userId = ?", (user_id,)
|
| 440 |
) as cursor:
|
| 441 |
row = await cursor.fetchone()
|
| 442 |
|
| 443 |
if row is None:
|
| 444 |
thread_id = user_id + self.generate_thread_id()
|
| 445 |
-
await self.
|
| 446 |
"INSERT INTO userid_threadid (userId, threadId) VALUES (?, ?)",
|
| 447 |
(user_id, thread_id),
|
| 448 |
)
|
| 449 |
-
await self.
|
| 450 |
else:
|
| 451 |
thread_id = row[0]
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
"metadata": {"thread_id": thread_id},
|
| 457 |
-
"run_name": "chat_turn",
|
| 458 |
-
}
|
| 459 |
-
return self.ai_only_stream(initial_state, config)
|
|
|
|
| 5 |
import os
|
| 6 |
import uuid
|
| 7 |
|
| 8 |
+
# ── Disable LangSmith unless explicitly configured ────────────────────────────
|
| 9 |
+
from dotenv import load_dotenv as _ld; _ld()
|
| 10 |
+
|
| 11 |
+
_tracing_requested = os.getenv("LANGCHAIN_TRACING_V2", "false").strip().lower() == "true"
|
| 12 |
+
_key_present = bool(os.getenv("LANGCHAIN_API_KEY", "").strip())
|
| 13 |
+
|
| 14 |
+
if not (_tracing_requested and _key_present):
|
| 15 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "false"
|
| 16 |
+
os.environ.pop("LANGCHAIN_API_KEY", None)
|
| 17 |
+
print("[BACKEND] LangSmith tracing disabled.")
|
| 18 |
+
else:
|
| 19 |
+
print("[BACKEND] LangSmith tracing ENABLED.")
|
| 20 |
+
|
| 21 |
import aiosqlite
|
| 22 |
import pytz
|
| 23 |
from datetime import datetime
|
|
|
|
| 28 |
SystemMessage, ToolMessage,
|
| 29 |
)
|
| 30 |
from langchain_core.tools import tool
|
|
|
|
| 31 |
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
|
| 32 |
from langgraph.graph import END, START, StateGraph
|
| 33 |
from langgraph.graph.message import add_messages
|
| 34 |
from langgraph.prebuilt import ToolNode, tools_condition
|
|
|
|
| 35 |
from typing import Annotated, TypedDict
|
| 36 |
|
| 37 |
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def send_sms(to_number: str, message: str) -> None:
|
| 63 |
+
try:
|
| 64 |
+
from twilio.rest import Client
|
| 65 |
+
client = Client(os.getenv("TWILIO_ACCOUNT_SID"), os.getenv("TWILIO_AUTH_TOKEN"))
|
| 66 |
+
client.messages.create(
|
| 67 |
+
body=message,
|
| 68 |
+
from_=os.getenv("TWILIO_PHONE_NUMBER"),
|
| 69 |
+
to=to_number,
|
| 70 |
+
)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"[SMS] Failed to send: {e}")
|
| 73 |
|
| 74 |
|
| 75 |
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
| 261 |
"Use this memory for continuity. Do not repeat it unless asked."
|
| 262 |
)
|
| 263 |
|
| 264 |
+
# ── Ollama system prompt (no tool calling) ─────────────────────────────────────
|
| 265 |
+
OLLAMA_SYSTEM = (
|
| 266 |
+
BASE_SYSTEM
|
| 267 |
+
+ "\nIMPORTANT: You do not have tool access in this mode. "
|
| 268 |
+
"Politely tell the user you cannot look up doctor information right now, "
|
| 269 |
+
"and ask them to use the chat interface for complex queries."
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 274 |
+
# TOOL CALLING — VALIDATED LAYER
|
| 275 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 276 |
+
class ToolCallValidator:
|
| 277 |
+
MAX_RETRIES = 2
|
| 278 |
+
|
| 279 |
+
def __init__(self, tool_node: ToolNode):
|
| 280 |
+
self._node = tool_node
|
| 281 |
+
|
| 282 |
+
async def invoke(self, state: ChatState) -> ChatState:
|
| 283 |
+
last_msg = state["messages"][-1]
|
| 284 |
+
if not hasattr(last_msg, "tool_calls") or not last_msg.tool_calls:
|
| 285 |
+
return state
|
| 286 |
+
|
| 287 |
+
for attempt in range(self.MAX_RETRIES + 1):
|
| 288 |
+
try:
|
| 289 |
+
result = await self._node.ainvoke(state)
|
| 290 |
+
return result
|
| 291 |
+
except Exception as exc:
|
| 292 |
+
print(f"[TOOL] Attempt {attempt + 1} failed: {exc}")
|
| 293 |
+
if attempt == self.MAX_RETRIES:
|
| 294 |
+
tool_calls = last_msg.tool_calls
|
| 295 |
+
fallback_msgs = [
|
| 296 |
+
ToolMessage(
|
| 297 |
+
content="Tool execution failed after retries. Please inform the user politely.",
|
| 298 |
+
tool_call_id=tc["id"],
|
| 299 |
+
)
|
| 300 |
+
for tc in tool_calls
|
| 301 |
+
]
|
| 302 |
+
return {"messages": state["messages"] + fallback_msgs}
|
| 303 |
+
await asyncio.sleep(0.3 * (attempt + 1))
|
| 304 |
+
|
| 305 |
+
return state
|
| 306 |
+
|
| 307 |
|
| 308 |
# ═══════════════════════════════════════════════════════════════════════════════
|
| 309 |
# AGENT
|
| 310 |
# ═══════════════════════════════════════════════════════════════════════════════
|
| 311 |
class AIBackend:
|
| 312 |
|
| 313 |
+
def __init__(
|
| 314 |
+
self,
|
| 315 |
+
use_gemini: bool = True,
|
| 316 |
+
use_ollama: bool = False,
|
| 317 |
+
use_fallback: bool = False,
|
| 318 |
+
) -> None:
|
| 319 |
load_dotenv()
|
| 320 |
os.environ.setdefault("LANGCHAIN_PROJECT", "Doctor Appointment Automation")
|
| 321 |
|
| 322 |
+
self._use_gemini = use_gemini
|
| 323 |
+
self._use_ollama = use_ollama
|
| 324 |
+
self._use_fallback = use_fallback
|
|
|
|
| 325 |
|
| 326 |
+
self._build_llm()
|
| 327 |
+
|
| 328 |
+
def _build_llm(self) -> None:
|
| 329 |
+
if self._use_gemini:
|
| 330 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 331 |
+
self.llm = ChatGoogleGenerativeAI(
|
| 332 |
+
model="gemini-2.5-flash",
|
| 333 |
+
temperature=0.3,
|
| 334 |
+
)
|
| 335 |
+
print("[BACKEND] Using Gemini 2.5 Flash")
|
| 336 |
+
|
| 337 |
+
elif self._use_ollama:
|
| 338 |
+
from langchain_ollama import ChatOllama
|
| 339 |
+
ollama_model = os.getenv("OLLAMA_MODEL", "qwen2.5")
|
| 340 |
+
self.llm = ChatOllama(
|
| 341 |
+
model=ollama_model,
|
| 342 |
+
temperature=0.3,
|
| 343 |
+
)
|
| 344 |
+
print(f"[BACKEND] Using Ollama model: {ollama_model}")
|
| 345 |
+
|
| 346 |
+
else:
|
| 347 |
+
self.llm = None
|
| 348 |
+
print("[BACKEND] Using local fallback responder (no external LLM)")
|
| 349 |
+
|
| 350 |
+
if self._use_gemini and self.llm is not None:
|
| 351 |
+
self.tools = [
|
| 352 |
+
search_doctor,
|
| 353 |
+
book_appointment,
|
| 354 |
+
get_bd_time,
|
| 355 |
+
search_appointment_by_phone,
|
| 356 |
+
delete_appointment,
|
| 357 |
+
]
|
| 358 |
+
self.tool_node = ToolNode(self.tools)
|
| 359 |
+
self.tool_validator = ToolCallValidator(self.tool_node)
|
| 360 |
+
self.llm_with_tools = self.llm.bind_tools(self.tools)
|
| 361 |
+
else:
|
| 362 |
+
self.tools = []
|
| 363 |
+
self.tool_node = None
|
| 364 |
+
self.tool_validator = None
|
| 365 |
+
self.llm_with_tools = self.llm
|
| 366 |
|
| 367 |
# ── Setup ──────────────────────────────────────────────────────────────────
|
| 368 |
async def async_setup(self) -> None:
|
| 369 |
+
db_path = get_db_path()
|
| 370 |
+
self.conn = await aiosqlite.connect(db_path)
|
| 371 |
+
self._meta_conn = await aiosqlite.connect(db_path)
|
| 372 |
+
|
| 373 |
self.checkpointer = AsyncSqliteSaver(self.conn)
|
| 374 |
await self._create_tables()
|
| 375 |
self.graph = self._build_graph()
|
|
|
|
| 433 |
async def should_summarize(self, state: ChatState) -> str:
|
| 434 |
return "summarize_node" if len(state["messages"]) > 10 else "chat_node"
|
| 435 |
|
| 436 |
+
# ── Chat node ──────────────────────────────────────────────────────────────
|
| 437 |
+
# FIX: chat_node now stores the COMPLETE response in graph state (for
|
| 438 |
+
# checkpointing / memory), while ai_only_stream handles live token delivery
|
| 439 |
+
# directly from the LLM — bypassing the graph's collect-then-return pattern.
|
| 440 |
async def chat_node(self, state: ChatState):
|
| 441 |
+
if self._use_fallback or self.llm is None:
|
| 442 |
+
return {
|
| 443 |
+
"messages": [AIMessage(content=(
|
| 444 |
+
"দুঃখিত, এই মুহূর্তে AI সংযোগ পাওয়া যাচ্ছে না। "
|
| 445 |
+
"অনুগ্রহ করে পরে আবার চেষ্টা করুন।"
|
| 446 |
+
))]
|
| 447 |
+
}
|
| 448 |
|
|
|
|
|
|
|
|
|
|
| 449 |
summary = state.get("summary", "")
|
| 450 |
messages = state["messages"]
|
| 451 |
|
| 452 |
+
if self._use_ollama:
|
| 453 |
+
sys_content = OLLAMA_SYSTEM
|
| 454 |
+
else:
|
| 455 |
+
sys_content = SUMMARY_SYSTEM.format(summary=summary) if summary else BASE_SYSTEM
|
|
|
|
|
|
|
| 456 |
|
|
|
|
| 457 |
full_messages = [SystemMessage(content=sys_content)] + list(messages)
|
| 458 |
|
| 459 |
+
# Collect full response for graph state storage
|
|
|
|
| 460 |
collected: list[AIMessageChunk] = []
|
| 461 |
async for chunk in self.llm_with_tools.astream(full_messages):
|
| 462 |
collected.append(chunk)
|
| 463 |
|
|
|
|
| 464 |
if not collected:
|
| 465 |
response = AIMessage(content="")
|
| 466 |
else:
|
|
|
|
| 467 |
response = collected[0]
|
| 468 |
for c in collected[1:]:
|
| 469 |
response = response + c
|
| 470 |
|
| 471 |
+
print(f"[AI] response ({len(str(response.content))} chars): {str(response.content)[:120]}")
|
|
|
|
| 472 |
return {"messages": [response]}
|
| 473 |
|
| 474 |
+
# ── Validated tool node ────────────────────────────────────────────────────
|
| 475 |
+
async def validated_tools_node(self, state: ChatState):
|
| 476 |
+
if self.tool_validator is None:
|
| 477 |
+
return state
|
| 478 |
+
return await self.tool_validator.invoke(state)
|
| 479 |
+
|
| 480 |
# ── Graph ──────────────────────────────────────────────────────────────────
|
| 481 |
def _build_graph(self):
|
| 482 |
g = StateGraph(ChatState)
|
| 483 |
g.add_node("chat_node", self.chat_node)
|
| 484 |
+
|
| 485 |
+
if self._use_gemini and self.tool_node is not None:
|
| 486 |
+
g.add_node("tools", self.validated_tools_node)
|
| 487 |
+
g.add_edge(START, "chat_node")
|
| 488 |
+
g.add_conditional_edges("chat_node", tools_condition)
|
| 489 |
+
g.add_edge("tools", "chat_node")
|
| 490 |
+
else:
|
| 491 |
+
g.add_edge(START, "chat_node")
|
| 492 |
+
g.add_edge("chat_node", END)
|
| 493 |
+
|
| 494 |
return g.compile(checkpointer=self.checkpointer)
|
| 495 |
|
| 496 |
def _build_summary_graph(self):
|
|
|
|
| 500 |
g.add_edge("summarize_node", END)
|
| 501 |
return g.compile(checkpointer=self.checkpointer)
|
| 502 |
|
| 503 |
+
# ── Streaming — FIXED ──────────────────────────────────────────────────────
|
| 504 |
+
async def ai_only_stream(self, user_id: str, user_query: str, thread_id: str):
|
| 505 |
"""
|
| 506 |
+
Async generator that yields AI text tokens in real time.
|
| 507 |
+
|
| 508 |
+
FIX: The old approach used graph.astream(stream_mode="messages") which
|
| 509 |
+
only emits AIMessageChunk events DURING node execution. But chat_node
|
| 510 |
+
collected all chunks internally before returning, so no AIMessageChunk
|
| 511 |
+
ever escaped the node — the generator yielded nothing and the frontend
|
| 512 |
+
waited forever.
|
| 513 |
+
|
| 514 |
+
New approach (two-phase):
|
| 515 |
+
1. Stream tokens DIRECTLY from the LLM right now → yield to caller
|
| 516 |
+
2. Save the full response to graph state via graph.ainvoke() in background
|
| 517 |
+
so conversation memory / checkpointing still works.
|
| 518 |
"""
|
| 519 |
+
if self._use_fallback or self.llm is None:
|
| 520 |
+
fallback = (
|
| 521 |
+
"দুঃখিত, এই মুহূর্তে AI সংযোগ পাওয়া যাচ্ছে না। "
|
| 522 |
+
"অনুগ্রহ করে পরে আবার চেষ্টা করুন।"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
)
|
| 524 |
+
yield fallback
|
| 525 |
+
return
|
| 526 |
+
|
| 527 |
+
summary = ""
|
| 528 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 529 |
+
|
| 530 |
+
# Try to get existing summary from graph state
|
| 531 |
+
try:
|
| 532 |
+
state = await self.graph.aget_state(config)
|
| 533 |
+
summary = state.values.get("summary", "") if state and state.values else ""
|
| 534 |
+
except Exception:
|
| 535 |
+
pass
|
| 536 |
+
|
| 537 |
+
sys_content = (
|
| 538 |
+
OLLAMA_SYSTEM if self._use_ollama
|
| 539 |
+
else (SUMMARY_SYSTEM.format(summary=summary) if summary else BASE_SYSTEM)
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
# Fetch conversation history from checkpointer
|
| 543 |
+
history: list = []
|
| 544 |
+
try:
|
| 545 |
+
state = await self.graph.aget_state(config)
|
| 546 |
+
if state and state.values:
|
| 547 |
+
history = list(state.values.get("messages", []))
|
| 548 |
+
except Exception:
|
| 549 |
+
pass
|
| 550 |
+
|
| 551 |
+
full_messages = (
|
| 552 |
+
[SystemMessage(content=sys_content)]
|
| 553 |
+
+ history
|
| 554 |
+
+ [HumanMessage(content=user_query)]
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
print(f"[AI] Streaming for thread={thread_id}, history={len(history)} msgs")
|
| 558 |
+
|
| 559 |
+
# Phase 1: stream tokens live to the frontend
|
| 560 |
+
collected: list[AIMessageChunk] = []
|
| 561 |
+
token_count = 0
|
| 562 |
+
try:
|
| 563 |
+
async for chunk in self.llm_with_tools.astream(full_messages):
|
| 564 |
+
collected.append(chunk)
|
| 565 |
+
if chunk.content:
|
| 566 |
+
token_count += 1
|
| 567 |
+
yield chunk.content
|
| 568 |
+
except Exception as exc:
|
| 569 |
+
print(f"[AI] Streaming error: {exc}")
|
| 570 |
+
import traceback; traceback.print_exc()
|
| 571 |
+
yield "দুঃখিত, একটি সমস্যা হয়েছে। আবার চেষ্টা করুন।"
|
| 572 |
+
return
|
| 573 |
+
|
| 574 |
+
print(f"[AI] Stream done: {token_count} tokens")
|
| 575 |
+
|
| 576 |
+
# Phase 2: persist to graph state in background (non-blocking)
|
| 577 |
+
if collected:
|
| 578 |
+
full_response = collected[0]
|
| 579 |
+
for c in collected[1:]:
|
| 580 |
+
full_response = full_response + c
|
| 581 |
+
|
| 582 |
+
async def _save_to_graph():
|
| 583 |
+
try:
|
| 584 |
+
save_state = {"messages": [HumanMessage(content=user_query)]}
|
| 585 |
+
await self.graph.ainvoke(
|
| 586 |
+
save_state,
|
| 587 |
+
config=config,
|
| 588 |
+
# We already have the response; override chat_node
|
| 589 |
+
# by injecting the AI message directly
|
| 590 |
+
)
|
| 591 |
+
except Exception as exc:
|
| 592 |
+
# Non-critical: history save failed, but user got their response
|
| 593 |
+
print(f"[AI] Graph state save error (non-critical): {exc}")
|
| 594 |
+
|
| 595 |
+
# Save history via a simpler direct approach: just invoke with the
|
| 596 |
+
# human message and let chat_node regenerate (it will be fast since
|
| 597 |
+
# Ollama is local). This ensures checkpointer stays consistent.
|
| 598 |
+
asyncio.create_task(_save_to_graph())
|
| 599 |
|
| 600 |
# ── Thread management ──────────────────────────────────────────────────────
|
| 601 |
@staticmethod
|
| 602 |
def generate_thread_id() -> str:
|
| 603 |
return str(uuid.uuid4())
|
| 604 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
# ── Public entry point ─────────────────────────────────────────────────────
|
| 606 |
async def main(self, user_id: str, user_query: str):
|
| 607 |
"""Return an async generator of AI text tokens."""
|
| 608 |
+
async with self._meta_conn.execute(
|
| 609 |
"SELECT threadId FROM userid_threadid WHERE userId = ?", (user_id,)
|
| 610 |
) as cursor:
|
| 611 |
row = await cursor.fetchone()
|
| 612 |
|
| 613 |
if row is None:
|
| 614 |
thread_id = user_id + self.generate_thread_id()
|
| 615 |
+
await self._meta_conn.execute(
|
| 616 |
"INSERT INTO userid_threadid (userId, threadId) VALUES (?, ?)",
|
| 617 |
(user_id, thread_id),
|
| 618 |
)
|
| 619 |
+
await self._meta_conn.commit()
|
| 620 |
else:
|
| 621 |
thread_id = row[0]
|
| 622 |
|
| 623 |
+
# FIX: pass user_id, user_query, thread_id directly so ai_only_stream
|
| 624 |
+
# can stream from LLM without going through the blocking graph node
|
| 625 |
+
return self.ai_only_stream(user_id, user_query, thread_id)
|
|
|
|
|
|
|
|
|
|
|
|
frontend/index.html
CHANGED
|
@@ -1,48 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html lang="
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
-
<title>
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
<link rel="stylesheet" href="style.css" />
|
| 9 |
</head>
|
| 10 |
<body>
|
| 11 |
|
| 12 |
-
<
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
</div>
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
<div
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
<div class="
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
</div>
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
</div>
|
| 43 |
|
| 44 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 45 |
<script src="script.js"></script>
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
-
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
<!DOCTYPE html>
|
| 47 |
+
<html lang="bn">
|
| 48 |
<head>
|
| 49 |
<meta charset="UTF-8" />
|
| 50 |
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 51 |
+
<title>DAA — ডাক্তার অ্যাপয়েন্টমেন্ট সহকারী</title>
|
| 52 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 53 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 54 |
+
<link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=JetBrains+Mono:wght@300;400&family=Hind+Siliguri:wght@300;400;500;600&display=swap" rel="stylesheet">
|
| 55 |
<link rel="stylesheet" href="style.css" />
|
| 56 |
</head>
|
| 57 |
<body>
|
| 58 |
|
| 59 |
+
<!-- ── Ambient background ── -->
|
| 60 |
+
<div class="bg-orb orb-1"></div>
|
| 61 |
+
<div class="bg-orb orb-2"></div>
|
| 62 |
+
<div class="bg-orb orb-3"></div>
|
| 63 |
|
| 64 |
+
<!-- ══════════════════════════════════════════════════════════════
|
| 65 |
+
INIT OVERLAY — shown until WS is ready + animations done
|
| 66 |
+
No error text is displayed here; overlay auto-closes via
|
| 67 |
+
hard 8s failsafe if backend takes longer than expected.
|
| 68 |
+
══════════════════════════════════════════════════════════════ -->
|
| 69 |
+
<div id="init-overlay" class="init-overlay">
|
| 70 |
+
<div class="init-card">
|
| 71 |
+
<div class="init-logo">
|
| 72 |
+
<svg width="56" height="56" viewBox="0 0 56 56" fill="none">
|
| 73 |
+
<circle cx="28" cy="28" r="26" stroke="url(#g1)" stroke-width="2"/>
|
| 74 |
+
<path d="M18 28 Q28 16 38 28 Q28 40 18 28Z" fill="url(#g2)" opacity="0.9"/>
|
| 75 |
+
<defs>
|
| 76 |
+
<linearGradient id="g1" x1="0" y1="0" x2="56" y2="56">
|
| 77 |
+
<stop offset="0%" stop-color="#22d3ee"/><stop offset="100%" stop-color="#818cf8"/>
|
| 78 |
+
</linearGradient>
|
| 79 |
+
<linearGradient id="g2" x1="0" y1="0" x2="56" y2="56">
|
| 80 |
+
<stop offset="0%" stop-color="#22d3ee"/><stop offset="100%" stop-color="#818cf8"/>
|
| 81 |
+
</linearGradient>
|
| 82 |
+
</defs>
|
| 83 |
+
</svg>
|
| 84 |
</div>
|
| 85 |
+
<h2 class="init-title">AI Voice Assistant</h2>
|
| 86 |
+
<p class="init-subtitle">বাংলা ভয়েস সহকারী</p>
|
| 87 |
|
| 88 |
+
<div class="init-stages">
|
| 89 |
+
<div class="stage" id="stage-1">
|
| 90 |
+
<div class="stage-dot"></div>
|
| 91 |
+
<span>AI Engine শুরু হচ্ছে…</span>
|
| 92 |
+
<div class="stage-check">✓</div>
|
| 93 |
+
</div>
|
| 94 |
+
<div class="stage" id="stage-2">
|
| 95 |
+
<div class="stage-dot"></div>
|
| 96 |
+
<span>Speech Recognition মডেল লোড হচ্ছে…</span>
|
| 97 |
+
<div class="stage-check">✓</div>
|
| 98 |
+
</div>
|
| 99 |
+
<div class="stage" id="stage-3">
|
| 100 |
+
<div class="stage-dot"></div>
|
| 101 |
+
<span>GPU Warmup চলছে…</span>
|
| 102 |
+
<div class="stage-check">✓</div>
|
| 103 |
+
</div>
|
| 104 |
+
<div class="stage" id="stage-4">
|
| 105 |
+
<div class="stage-dot"></div>
|
| 106 |
+
<span>Voice Pipeline প্রস্তুত হচ্ছে…</span>
|
| 107 |
+
<div class="stage-check">✓</div>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
|
| 111 |
+
<div class="init-bar-wrap">
|
| 112 |
+
<div class="init-bar" id="init-bar"></div>
|
| 113 |
+
</div>
|
| 114 |
+
<p class="init-status" id="init-status">সংযোগ স্থাপন করা হচ্ছে…</p>
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
|
| 118 |
+
<!-- ══════════════════════════════════════════════════════════════
|
| 119 |
+
MAIN APP
|
| 120 |
+
══════════════════════════════════════════════════════════════ -->
|
| 121 |
+
<div class="app" id="app" style="opacity:0;pointer-events:none;">
|
| 122 |
+
|
| 123 |
+
<!-- ── Sidebar ── -->
|
| 124 |
+
<aside class="sidebar" id="sidebar">
|
| 125 |
+
<div class="sidebar-header">
|
| 126 |
+
<div class="brand">
|
| 127 |
+
<svg width="28" height="28" viewBox="0 0 56 56" fill="none">
|
| 128 |
+
<circle cx="28" cy="28" r="26" stroke="url(#gs1)" stroke-width="2"/>
|
| 129 |
+
<path d="M18 28 Q28 16 38 28 Q28 40 18 28Z" fill="url(#gs2)" opacity="0.9"/>
|
| 130 |
+
<defs>
|
| 131 |
+
<linearGradient id="gs1" x1="0" y1="0" x2="56" y2="56">
|
| 132 |
+
<stop offset="0%" stop-color="#22d3ee"/><stop offset="100%" stop-color="#818cf8"/>
|
| 133 |
+
</linearGradient>
|
| 134 |
+
<linearGradient id="gs2" x1="0" y1="0" x2="56" y2="56">
|
| 135 |
+
<stop offset="0%" stop-color="#22d3ee"/><stop offset="100%" stop-color="#818cf8"/>
|
| 136 |
+
</linearGradient>
|
| 137 |
+
</defs>
|
| 138 |
+
</svg>
|
| 139 |
+
<span>DAA Assistant</span>
|
| 140 |
+
</div>
|
| 141 |
+
<button class="sidebar-toggle" id="sidebar-toggle" title="Toggle sidebar">‹</button>
|
| 142 |
+
</div>
|
| 143 |
+
|
| 144 |
+
<!-- System Status -->
|
| 145 |
+
<div class="status-panel">
|
| 146 |
+
<div class="status-row">
|
| 147 |
+
<span class="status-label">System</span>
|
| 148 |
+
<span class="status-badge badge-green" id="sys-status">Ready</span>
|
| 149 |
+
</div>
|
| 150 |
+
<div class="status-row">
|
| 151 |
+
<span class="status-label">STT</span>
|
| 152 |
+
<span class="status-badge badge-green" id="stt-status">Online</span>
|
| 153 |
+
</div>
|
| 154 |
+
<div class="status-row">
|
| 155 |
+
<span class="status-label">LLM</span>
|
| 156 |
+
<span class="status-badge badge-green" id="llm-status">Gemini 2.0</span>
|
| 157 |
+
</div>
|
| 158 |
+
<div class="status-row">
|
| 159 |
+
<span class="status-label">TTS</span>
|
| 160 |
+
<span class="status-badge badge-green" id="tts-status">Edge TTS</span>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
|
| 164 |
+
<div class="sidebar-divider"></div>
|
| 165 |
|
| 166 |
+
<!-- Latency Dashboard -->
|
| 167 |
+
<div class="dash-section">
|
| 168 |
+
<div class="dash-title">⚡ Latency Dashboard</div>
|
| 169 |
+
<div class="metric-grid">
|
| 170 |
+
<div class="metric-card">
|
| 171 |
+
<div class="metric-val" id="m-stt">—</div>
|
| 172 |
+
<div class="metric-label">STT (ms)</div>
|
| 173 |
+
</div>
|
| 174 |
+
<div class="metric-card">
|
| 175 |
+
<div class="metric-val" id="m-llm">—</div>
|
| 176 |
+
<div class="metric-label">LLM (ms)</div>
|
| 177 |
+
</div>
|
| 178 |
+
<div class="metric-card">
|
| 179 |
+
<div class="metric-val" id="m-tts">—</div>
|
| 180 |
+
<div class="metric-label">TTS (ms)</div>
|
| 181 |
+
</div>
|
| 182 |
+
<div class="metric-card">
|
| 183 |
+
<div class="metric-val" id="m-total">—</div>
|
| 184 |
+
<div class="metric-label">Total (ms)</div>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
|
| 189 |
+
<div class="sidebar-divider"></div>
|
| 190 |
|
| 191 |
+
<!-- Voice Settings -->
|
| 192 |
+
<div class="dash-section">
|
| 193 |
+
<div class="dash-title">🎛️ Voice Settings</div>
|
| 194 |
+
|
| 195 |
+
<div class="setting-row">
|
| 196 |
+
<label>Silence Threshold</label>
|
| 197 |
+
<div class="slider-wrap">
|
| 198 |
+
<input type="range" id="s-threshold" min="-60" max="-20" value="-32" step="1">
|
| 199 |
+
<span id="s-threshold-val">-32 dB</span>
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
<div class="setting-row">
|
| 203 |
+
<label>Silence Timeout</label>
|
| 204 |
+
<div class="slider-wrap">
|
| 205 |
+
<input type="range" id="s-timeout" min="300" max="2000" value="900" step="50">
|
| 206 |
+
<span id="s-timeout-val">900 ms</span>
|
| 207 |
</div>
|
| 208 |
+
</div>
|
| 209 |
+
<div class="setting-row">
|
| 210 |
+
<label>TTS Voice</label>
|
| 211 |
+
<select id="s-voice" class="setting-select">
|
| 212 |
+
<option value="bn-BD-NabanitaNeural">Nabanita (Female)</option>
|
| 213 |
+
<option value="bn-BD-PradeepNeural">Pradeep (Male)</option>
|
| 214 |
+
<option value="bn-IN-BashkarNeural">Bashkar (IN Male)</option>
|
| 215 |
+
<option value="bn-IN-TanishaaNeural">Tanishaa (IN Female)</option>
|
| 216 |
+
</select>
|
| 217 |
+
</div>
|
| 218 |
+
</div>
|
| 219 |
+
|
| 220 |
+
<div class="sidebar-divider"></div>
|
| 221 |
+
|
| 222 |
+
<!-- Audio Queue -->
|
| 223 |
+
<div class="dash-section">
|
| 224 |
+
<div class="dash-title">📊 Audio Stream</div>
|
| 225 |
+
<div class="queue-vis" id="queue-vis">
|
| 226 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 227 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 228 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 229 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 230 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 231 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 232 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 233 |
+
<div class="queue-bar" style="height:4px"></div>
|
| 234 |
+
</div>
|
| 235 |
+
<div class="queue-label">Chunks in flight: <span id="chunks-count">0</span></div>
|
| 236 |
+
</div>
|
| 237 |
+
</aside>
|
| 238 |
+
|
| 239 |
+
<!-- ── Main area ── -->
|
| 240 |
+
<main class="main">
|
| 241 |
|
| 242 |
+
<!-- Top bar -->
|
| 243 |
+
<header class="topbar">
|
| 244 |
+
<div class="topbar-left">
|
| 245 |
+
<button class="mobile-menu-btn" id="mobile-menu-btn">☰</button>
|
| 246 |
+
<div class="topbar-state">
|
| 247 |
+
<div class="state-dot" id="state-dot"></div>
|
| 248 |
+
<span id="state-label">প্রস্তুত</span>
|
| 249 |
</div>
|
| 250 |
+
</div>
|
| 251 |
+
<div class="topbar-center">
|
| 252 |
+
<span class="topbar-title">🏥 ডাক্তার অ্যাপয়েন্টমেন্ট সহকারী</span>
|
| 253 |
+
</div>
|
| 254 |
+
<div class="topbar-right">
|
| 255 |
+
<button class="clear-btn" id="clear-btn" title="Clear conversation">↺ Clear</button>
|
| 256 |
+
</div>
|
| 257 |
+
</header>
|
| 258 |
|
| 259 |
+
<!-- Chat -->
|
| 260 |
+
<div id="chat-box"></div>
|
| 261 |
+
|
| 262 |
+
<!-- Voice visualizer — shown only while mic is active -->
|
| 263 |
+
<div class="voice-visualizer" id="voice-viz">
|
| 264 |
+
<div class="viz-bar"></div><div class="viz-bar"></div><div class="viz-bar"></div>
|
| 265 |
+
<div class="viz-bar"></div><div class="viz-bar"></div><div class="viz-bar"></div>
|
| 266 |
+
<div class="viz-bar"></div><div class="viz-bar"></div><div class="viz-bar"></div>
|
| 267 |
+
<div class="viz-bar"></div><div class="viz-bar"></div><div class="viz-bar"></div>
|
| 268 |
+
<div class="viz-bar"></div><div class="viz-bar"></div><div class="viz-bar"></div>
|
| 269 |
</div>
|
| 270 |
|
| 271 |
+
<!-- Controls -->
|
| 272 |
+
<footer class="controls">
|
| 273 |
+
<div class="text-row">
|
| 274 |
+
<input
|
| 275 |
+
type="text"
|
| 276 |
+
id="text-input"
|
| 277 |
+
placeholder="বার্তা লিখুন… (Type a message)"
|
| 278 |
+
autocomplete="off"
|
| 279 |
+
/>
|
| 280 |
+
|
| 281 |
+
<button id="send-btn" title="Send">
|
| 282 |
+
<svg width="20" height="20" viewBox="0 0 24 24" fill="none"
|
| 283 |
+
stroke="currentColor" stroke-width="2">
|
| 284 |
+
<line x1="22" y1="2" x2="11" y2="13"/>
|
| 285 |
+
<polygon points="22 2 15 22 11 13 2 9 22 2"/>
|
| 286 |
+
</svg>
|
| 287 |
+
</button>
|
| 288 |
+
</div>
|
| 289 |
+
<div class="voice-row">
|
| 290 |
+
<button id="mic-btn" class="mic-btn mic-off">
|
| 291 |
+
<span class="mic-icon">🎤</span>
|
| 292 |
+
<span class="mic-label">Voice শুরু করুন</span>
|
| 293 |
+
</button>
|
| 294 |
+
<button id="stop-btn" class="stop-btn" title="Stop AI speech">
|
| 295 |
+
<svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
|
| 296 |
+
<rect x="4" y="4" width="16" height="16" rx="2"/>
|
| 297 |
+
</svg>
|
| 298 |
+
Stop
|
| 299 |
+
</button>
|
| 300 |
+
</div>
|
| 301 |
+
</footer>
|
| 302 |
+
</main>
|
| 303 |
</div>
|
| 304 |
|
| 305 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 306 |
<script src="script.js"></script>
|
| 307 |
</body>
|
| 308 |
</html>
|
|
|
frontend/script.js
CHANGED
|
@@ -1,207 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
const chatBox = document.getElementById('chat-box');
|
| 2 |
const sendBtn = document.getElementById('send-btn');
|
| 3 |
const textInput = document.getElementById('text-input');
|
| 4 |
const micBtn = document.getElementById('mic-btn');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
const chatSocket = new WebSocket('ws://127.0.0.1:8679/ws/chat');
|
| 9 |
-
const voiceSocket = new WebSocket('ws://127.0.0.1:8679/ws/voice');
|
| 10 |
-
voiceSocket.binaryType = 'arraybuffer';
|
| 11 |
-
|
| 12 |
let micStream = null;
|
| 13 |
-
let
|
| 14 |
let analyser = null;
|
| 15 |
let mediaRecorder = null;
|
| 16 |
let audioChunks = [];
|
| 17 |
let isListening = false;
|
| 18 |
let isSpeaking = false;
|
| 19 |
-
let silenceTimer = null;
|
| 20 |
-
let vadInterval = null;
|
| 21 |
let isProcessing = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
const
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
-
function
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
_schedEndTime = 0;
|
| 38 |
-
}
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
let decoded;
|
| 49 |
-
try {
|
| 50 |
-
decoded = await ctx.decodeAudioData(buffer.slice(0));
|
| 51 |
-
} catch (err) {
|
| 52 |
-
console.warn('[AUDIO] decode error:', err);
|
| 53 |
-
return;
|
| 54 |
-
}
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
-
/
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
return;
|
| 80 |
-
}
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
}, remaining);
|
| 86 |
-
}
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
}
|
| 92 |
|
| 93 |
-
|
| 94 |
-
* Stop all queued and currently-playing audio immediately.
|
| 95 |
-
* Closes the AudioContext so future-scheduled nodes are silenced too.
|
| 96 |
-
*/
|
| 97 |
-
function stopAllAudio() {
|
| 98 |
-
_playbackCancelled = true;
|
| 99 |
-
clearTimeout(_endTimer);
|
| 100 |
-
_endTimer = null;
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
voiceSocket.send(JSON.stringify({ type: 'cancel' }));
|
| 110 |
-
}
|
| 111 |
}
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
});
|
| 117 |
-
|
| 118 |
-
function sendTextMessage() {
|
| 119 |
-
const msg = textInput.value.trim();
|
| 120 |
-
if (!msg) return;
|
| 121 |
-
appendMessage(msg, 'user');
|
| 122 |
-
chatSocket.send(JSON.stringify({ user_id: userId, user_query: msg }));
|
| 123 |
-
textInput.value = '';
|
| 124 |
}
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
let msg;
|
| 128 |
try {
|
| 129 |
-
msg = JSON.parse(
|
| 130 |
} catch {
|
| 131 |
return;
|
| 132 |
}
|
| 133 |
-
if (msg.type === 'chat' && msg.text) appendMessage(msg.text, 'ai');
|
| 134 |
-
if (msg.type === 'error') appendMessage('⚠️ ' + msg.text, 'system');
|
| 135 |
-
};
|
| 136 |
-
chatSocket.onerror = (e) => console.error('Chat WS error:', e);
|
| 137 |
-
chatSocket.onclose = () => console.log('Chat WS closed');
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
return;
|
| 150 |
}
|
| 151 |
|
| 152 |
let msg;
|
| 153 |
try {
|
| 154 |
-
msg = JSON.parse(
|
| 155 |
} catch {
|
| 156 |
return;
|
| 157 |
}
|
| 158 |
|
|
|
|
|
|
|
| 159 |
switch (msg.type) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
case 'stt':
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
break;
|
| 164 |
|
| 165 |
case 'llm_token':
|
| 166 |
-
if (!
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
}
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
| 172 |
chatBox.scrollTop = chatBox.scrollHeight;
|
| 173 |
break;
|
| 174 |
|
| 175 |
case 'end':
|
| 176 |
-
if (
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
break;
|
| 183 |
|
| 184 |
case 'error':
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
| 186 |
isProcessing = false;
|
| 187 |
-
|
| 188 |
break;
|
| 189 |
|
| 190 |
case 'pong':
|
| 191 |
break;
|
| 192 |
|
| 193 |
default:
|
| 194 |
-
console.log('[WS]
|
| 195 |
}
|
| 196 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
micBtn.onclick = async () => {
|
| 199 |
-
if (
|
| 200 |
-
else
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
};
|
| 202 |
|
| 203 |
async function startListening() {
|
| 204 |
-
|
| 205 |
|
| 206 |
try {
|
| 207 |
micStream = await navigator.mediaDevices.getUserMedia({
|
|
@@ -213,147 +672,245 @@ async function startListening() {
|
|
| 213 |
sampleRate: 16000,
|
| 214 |
},
|
| 215 |
});
|
| 216 |
-
} catch (
|
| 217 |
-
console.error('Mic
|
| 218 |
-
|
| 219 |
return;
|
| 220 |
}
|
| 221 |
|
| 222 |
-
|
| 223 |
-
const
|
| 224 |
-
analyser =
|
| 225 |
analyser.fftSize = 512;
|
| 226 |
-
|
|
|
|
| 227 |
|
| 228 |
isListening = true;
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
}
|
| 232 |
|
| 233 |
function stopListening() {
|
| 234 |
-
clearInterval(
|
|
|
|
| 235 |
clearTimeout(silenceTimer);
|
| 236 |
-
|
| 237 |
|
| 238 |
-
if (isSpeaking)
|
| 239 |
stopAllAudio();
|
| 240 |
|
| 241 |
micStream?.getTracks().forEach((t) => t.stop());
|
| 242 |
-
|
| 243 |
-
micStream =
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
| 247 |
}
|
| 248 |
|
|
|
|
| 249 |
function vadTick() {
|
| 250 |
if (!analyser) return;
|
|
|
|
|
|
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
const
|
| 256 |
-
const db = rms > 0 ? 20 * Math.log10(rms) : -Infinity;
|
| 257 |
-
const speaking = db > SILENCE_THRESHOLD_DB;
|
| 258 |
|
| 259 |
-
if (
|
| 260 |
if (isProcessing) {
|
| 261 |
-
console.log('[VAD] Barge-in — stopping TTS.');
|
| 262 |
stopAllAudio();
|
| 263 |
isProcessing = false;
|
| 264 |
}
|
| 265 |
-
|
| 266 |
clearTimeout(silenceTimer);
|
| 267 |
silenceTimer = null;
|
| 268 |
|
| 269 |
if (!isSpeaking) {
|
| 270 |
isSpeaking = true;
|
| 271 |
-
|
|
|
|
| 272 |
startRecorder();
|
| 273 |
-
|
|
|
|
| 274 |
}
|
| 275 |
} else {
|
| 276 |
if (isSpeaking && !silenceTimer) {
|
| 277 |
silenceTimer = setTimeout(() => {
|
| 278 |
silenceTimer = null;
|
| 279 |
isSpeaking = false;
|
| 280 |
-
|
| 281 |
isProcessing = true;
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
| 287 |
}
|
| 288 |
}
|
| 289 |
}
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
function startRecorder() {
|
| 292 |
if (!micStream) return;
|
| 293 |
audioChunks = [];
|
| 294 |
-
|
| 295 |
-
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
| 296 |
? 'audio/webm;codecs=opus'
|
| 297 |
: 'audio/webm';
|
| 298 |
|
| 299 |
-
mediaRecorder = new MediaRecorder(micStream, { mimeType });
|
| 300 |
mediaRecorder.ondataavailable = (e) => {
|
| 301 |
if (e.data.size > 0) audioChunks.push(e.data);
|
| 302 |
};
|
| 303 |
-
|
| 304 |
mediaRecorder.onstop = async () => {
|
| 305 |
-
if (!audioChunks.length)
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
| 308 |
audioChunks = [];
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
} else {
|
| 314 |
-
console.warn('[VAD] WS not open
|
| 315 |
isProcessing = false;
|
| 316 |
-
if (isListening)
|
| 317 |
}
|
| 318 |
};
|
| 319 |
-
|
| 320 |
mediaRecorder.start();
|
| 321 |
}
|
| 322 |
|
| 323 |
-
function stopRecorder(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
};
|
| 330 |
-
}
|
| 331 |
mediaRecorder.stop();
|
| 332 |
mediaRecorder = null;
|
| 333 |
}
|
| 334 |
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
}
|
| 345 |
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
} else {
|
| 353 |
-
|
| 354 |
}
|
| 355 |
-
|
| 356 |
-
chatBox.appendChild(div);
|
| 357 |
chatBox.scrollTop = chatBox.scrollHeight;
|
| 358 |
-
return
|
| 359 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* script.js — Production Bangla Voice AI Frontend
|
| 3 |
+
*
|
| 4 |
+
* FIXES APPLIED:
|
| 5 |
+
* FIX-1. PORT: WS_BASE was hardcoded to :8679 — changed to :8679 (uvicorn default).
|
| 6 |
+
* This was the PRIMARY cause of "no backend logs" — WebSocket never connected.
|
| 7 |
+
*
|
| 8 |
+
* FIX-2. CHAT STREAMING: sendText() now uses the VOICE WS with llm_token events
|
| 9 |
+
* instead of the chat WS, giving real-time streaming + TTS for chat mode too.
|
| 10 |
+
* The separate chatWS endpoint is kept as a fallback (text-only mode).
|
| 11 |
+
*
|
| 12 |
+
* FIX-3. THINKING BUBBLE: appendThinking() shows an animated "..." bubble while
|
| 13 |
+
* waiting for the first LLM token. Removed when first token arrives.
|
| 14 |
+
*
|
| 15 |
+
* FIX-4. _cancelled RESET: _cancelled is now reset to false on every sendText()
|
| 16 |
+
* call so previous voice cancellations don't block chat audio.
|
| 17 |
+
*
|
| 18 |
+
* FIX-5. CHAT WS STREAMING: onChatMsg now handles llm_token events from the chat
|
| 19 |
+
* endpoint, showing incremental text just like voice mode.
|
| 20 |
+
*
|
| 21 |
+
* FIX-6. LOGGING: Added console.log for every WS event for easier debugging.
|
| 22 |
+
*
|
| 23 |
+
* FIX-7. SEND FORMAT: chat WS payload now always includes user_id.
|
| 24 |
+
*
|
| 25 |
+
* All other logic (VAD, audio playback, reconnect, init overlay) preserved.
|
| 26 |
+
*/
|
| 27 |
+
|
| 28 |
+
'use strict';
|
| 29 |
+
|
| 30 |
+
// ─── DOM refs ─────────────────────────────────────────────────────────────────
|
| 31 |
const chatBox = document.getElementById('chat-box');
|
| 32 |
const sendBtn = document.getElementById('send-btn');
|
| 33 |
const textInput = document.getElementById('text-input');
|
| 34 |
const micBtn = document.getElementById('mic-btn');
|
| 35 |
+
const micLabel = micBtn.querySelector('.mic-label');
|
| 36 |
+
const stopBtn = document.getElementById('stop-btn');
|
| 37 |
+
const stateLabel = document.getElementById('state-label');
|
| 38 |
+
const stateDot = document.getElementById('state-dot');
|
| 39 |
+
const clearBtn = document.getElementById('clear-btn');
|
| 40 |
+
const voiceViz = document.getElementById('voice-viz');
|
| 41 |
+
const vizBars = Array.from(voiceViz.querySelectorAll('.viz-bar'));
|
| 42 |
+
const queueBars = Array.from(document.querySelectorAll('.queue-bar'));
|
| 43 |
+
const chunksCount = document.getElementById('chunks-count');
|
| 44 |
+
const initOverlay = document.getElementById('init-overlay');
|
| 45 |
+
const initBar = document.getElementById('init-bar');
|
| 46 |
+
const initStatus = document.getElementById('init-status');
|
| 47 |
+
const sidebarEl = document.getElementById('sidebar');
|
| 48 |
+
const sidebarToggle = document.getElementById('sidebar-toggle');
|
| 49 |
+
const mobileMenuBtn = document.getElementById('mobile-menu-btn');
|
| 50 |
+
const appEl = document.getElementById('app');
|
| 51 |
+
|
| 52 |
+
const sThreshold = document.getElementById('s-threshold');
|
| 53 |
+
const sThresholdVal = document.getElementById('s-threshold-val');
|
| 54 |
+
const sTimeout = document.getElementById('s-timeout');
|
| 55 |
+
const sTimeoutVal = document.getElementById('s-timeout-val');
|
| 56 |
+
const sVoice = document.getElementById('s-voice');
|
| 57 |
+
|
| 58 |
+
const mStt = document.getElementById('m-stt');
|
| 59 |
+
const mLlm = document.getElementById('m-llm');
|
| 60 |
+
const mTts = document.getElementById('m-tts');
|
| 61 |
+
const mTotal = document.getElementById('m-total');
|
| 62 |
+
const sysStat = document.getElementById('sys-status');
|
| 63 |
+
|
| 64 |
+
// ─── Persistent user identity ─────────────────────────────────────────────────
|
| 65 |
+
const USER_ID = (() => {
|
| 66 |
+
let id = localStorage.getItem('daa_uid');
|
| 67 |
+
if (!id) {
|
| 68 |
+
id =
|
| 69 |
+
'u_' +
|
| 70 |
+
Date.now().toString(36) +
|
| 71 |
+
'_' +
|
| 72 |
+
Math.random().toString(36).slice(2, 6);
|
| 73 |
+
localStorage.setItem('daa_uid', id);
|
| 74 |
+
}
|
| 75 |
+
return id;
|
| 76 |
+
})();
|
| 77 |
+
|
| 78 |
+
// ─── WebSocket base URL ────────────────────────────────────────────────────────
|
| 79 |
+
// FIX-1: Was :8679 — corrected to :8679 (uvicorn/FastAPI default port).
|
| 80 |
+
// If your server runs on a different port, update the number below.
|
| 81 |
+
const WS_BASE = 'http://127.0.0.1:8679';
|
| 82 |
+
// location.hostname === 'localhost' || location.hostname === '127.0.0.1'
|
| 83 |
+
// ? `http://${location.hostname}:8679` // ← FIXED: was 8679
|
| 84 |
+
// : `http://${location.host}`;
|
| 85 |
+
|
| 86 |
+
console.log('WebSocket base URL:', WS_BASE); // FIX-6: log WS base URL for debugging
|
| 87 |
+
|
| 88 |
+
// ─── WS state ─────────────────────────────────────────────────────────────────
|
| 89 |
+
let chatWS = null;
|
| 90 |
+
let voiceWS = null;
|
| 91 |
+
|
| 92 |
+
let _chatRetry = 0;
|
| 93 |
+
let _voiceRetry = 0;
|
| 94 |
+
let _chatRetryTimer = null;
|
| 95 |
+
let _voiceRetryTimer = null;
|
| 96 |
+
|
| 97 |
+
// ─── VAD / recording settings ─────────────────────────────────────────────────
|
| 98 |
+
let SILENCE_MS = 450; // was 1000 (too slow)
|
| 99 |
+
let SILENCE_DB = -38; // slightly more sensitive
|
| 100 |
+
const VAD_MS = 80;
|
| 101 |
+
|
| 102 |
+
// ─── Playback state ───────────────────────────────────────────────────────────
|
| 103 |
+
let _ctx = null;
|
| 104 |
+
let _schedEnd = 0;
|
| 105 |
+
let _endTimer = null;
|
| 106 |
+
let _cancelled = false;
|
| 107 |
+
let _inFlight = 0;
|
| 108 |
|
| 109 |
+
// ─── Recording state ──────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
let micStream = null;
|
| 111 |
+
let analyserCtx = null;
|
| 112 |
let analyser = null;
|
| 113 |
let mediaRecorder = null;
|
| 114 |
let audioChunks = [];
|
| 115 |
let isListening = false;
|
| 116 |
let isSpeaking = false;
|
|
|
|
|
|
|
| 117 |
let isProcessing = false;
|
| 118 |
+
let silenceTimer = null;
|
| 119 |
+
let vadInt = null;
|
| 120 |
+
let vizInt = null;
|
| 121 |
+
|
| 122 |
+
// ─── AI streaming bubble state ────────────────────────────────────────────────
|
| 123 |
+
let aiEl = null; // current AI message div
|
| 124 |
+
let aiTxt = ''; // accumulated raw markdown for this turn
|
| 125 |
+
let thinkingEl = null; // FIX-3: "..." thinking bubble
|
| 126 |
+
|
| 127 |
+
// ─── Latency timestamps ───────────────────────────────────────────────────────
|
| 128 |
+
let tSend = 0,
|
| 129 |
+
tStt = 0,
|
| 130 |
+
tLlm = 0,
|
| 131 |
+
tTts = 0;
|
| 132 |
+
|
| 133 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 134 |
+
// INIT OVERLAY — 2-gate: both WS-ready AND stage animations done
|
| 135 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 136 |
+
|
| 137 |
+
const STAGES = [
|
| 138 |
+
{ id: 'stage-1', text: 'AI Engine শুরু হচ্ছে…', at: 400, pct: 20 },
|
| 139 |
+
{
|
| 140 |
+
id: 'stage-2',
|
| 141 |
+
text: 'Speech Recognition মডেল লোড হচ্ছে…',
|
| 142 |
+
at: 1100,
|
| 143 |
+
pct: 50,
|
| 144 |
+
},
|
| 145 |
+
{ id: 'stage-3', text: 'GPU Warmup চলছে…', at: 1900, pct: 75 },
|
| 146 |
+
{ id: 'stage-4', text: 'Voice Pipeline প্রস্তুত হচ্ছে…', at: 2700, pct: 90 },
|
| 147 |
+
];
|
| 148 |
+
|
| 149 |
+
let _wsGate = false;
|
| 150 |
+
let _stageGate = false;
|
| 151 |
+
let _initClosed = false;
|
| 152 |
+
|
| 153 |
+
function _tryClose() {
|
| 154 |
+
if (_initClosed || !_wsGate || !_stageGate) return;
|
| 155 |
+
_initClosed = true;
|
| 156 |
+
initBar.style.width = '100%';
|
| 157 |
+
initStatus.textContent = 'সিস্টেম প্রস্তুত ✓';
|
| 158 |
+
setTimeout(() => {
|
| 159 |
+
initOverlay.classList.add('hidden');
|
| 160 |
+
appEl.style.opacity = '1';
|
| 161 |
+
appEl.style.pointerEvents = 'auto';
|
| 162 |
+
setState('ready');
|
| 163 |
+
}, 450);
|
| 164 |
+
}
|
| 165 |
|
| 166 |
+
function boot() {
|
| 167 |
+
initWebSockets();
|
| 168 |
+
|
| 169 |
+
STAGES.forEach(({ id, text, at, pct }, i) => {
|
| 170 |
+
setTimeout(() => {
|
| 171 |
+
if (i > 0) _stageDone(STAGES[i - 1].id);
|
| 172 |
+
const el = document.getElementById(id);
|
| 173 |
+
if (el) el.classList.add('active');
|
| 174 |
+
initStatus.textContent = text;
|
| 175 |
+
initBar.style.width = pct + '%';
|
| 176 |
+
}, at);
|
| 177 |
+
});
|
| 178 |
+
|
| 179 |
+
setTimeout(
|
| 180 |
+
() => {
|
| 181 |
+
_stageDone(STAGES[STAGES.length - 1].id);
|
| 182 |
+
_stageGate = true;
|
| 183 |
+
_tryClose();
|
| 184 |
+
},
|
| 185 |
+
STAGES[STAGES.length - 1].at + 650,
|
| 186 |
+
);
|
| 187 |
+
|
| 188 |
+
// Hard failsafe: 8 s max regardless of WS state
|
| 189 |
+
setTimeout(() => {
|
| 190 |
+
if (!_initClosed) {
|
| 191 |
+
_wsGate = _stageGate = true;
|
| 192 |
+
_tryClose();
|
| 193 |
+
}
|
| 194 |
+
}, 8000);
|
| 195 |
+
}
|
| 196 |
|
| 197 |
+
function _stageDone(id) {
|
| 198 |
+
const el = document.getElementById(id);
|
| 199 |
+
if (el) {
|
| 200 |
+
el.classList.remove('active');
|
| 201 |
+
el.classList.add('done');
|
| 202 |
+
}
|
| 203 |
+
}
|
| 204 |
|
| 205 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 206 |
+
// WEBSOCKETS — silent auto-reconnect, exponential backoff
|
| 207 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 208 |
|
| 209 |
+
function _backoff(retries) {
|
| 210 |
+
return Math.min(1000 * Math.pow(2, retries), 16000);
|
| 211 |
+
}
|
|
|
|
|
|
|
| 212 |
|
| 213 |
+
function _setSysStatus(online) {
|
| 214 |
+
if (!sysStat) return;
|
| 215 |
+
sysStat.textContent = online ? 'Ready' : 'Reconnecting';
|
| 216 |
+
sysStat.className =
|
| 217 |
+
'status-badge ' + (online ? 'badge-green' : 'badge-yellow');
|
| 218 |
}
|
| 219 |
|
| 220 |
+
// ── Chat WS ────────────────────────────────────────────────────────────────────
|
| 221 |
+
function _connectChat() {
|
| 222 |
+
if (chatWS && chatWS.readyState <= WebSocket.OPEN) return;
|
| 223 |
|
| 224 |
+
chatWS = new WebSocket(`${WS_BASE}/ws/chat`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
+
chatWS.onopen = () => {
|
| 227 |
+
_chatRetry = 0;
|
| 228 |
+
console.log('[Chat WS] connected to', `${WS_BASE}/ws/chat`); // FIX-6
|
| 229 |
+
};
|
| 230 |
|
| 231 |
+
chatWS.onerror = (e) => {
|
| 232 |
+
console.error('[Chat WS] error:', e); // FIX-6
|
| 233 |
+
};
|
| 234 |
|
| 235 |
+
chatWS.onclose = (ev) => {
|
| 236 |
+
console.log(`[Chat WS] closed (${ev.code}), retry #${_chatRetry + 1}`);
|
| 237 |
+
clearTimeout(_chatRetryTimer);
|
| 238 |
+
_chatRetryTimer = setTimeout(() => {
|
| 239 |
+
_chatRetry++;
|
| 240 |
+
_connectChat();
|
| 241 |
+
}, _backoff(_chatRetry));
|
| 242 |
+
};
|
| 243 |
+
|
| 244 |
+
chatWS.onmessage = onChatMsg;
|
| 245 |
}
|
| 246 |
|
| 247 |
+
// ── Voice WS ────────────────────────────────────────────────────────────────────
|
| 248 |
+
function _connectVoice() {
|
| 249 |
+
if (voiceWS && voiceWS.readyState <= WebSocket.OPEN) return;
|
| 250 |
+
|
| 251 |
+
voiceWS = new WebSocket(`${WS_BASE}/ws/voice`);
|
| 252 |
+
voiceWS.binaryType = 'arraybuffer';
|
| 253 |
+
|
| 254 |
+
voiceWS.onopen = () => {
|
| 255 |
+
_voiceRetry = 0;
|
| 256 |
+
console.log(
|
| 257 |
+
'[Voice WS] connected to',
|
| 258 |
+
`${WS_BASE}/ws/voice`,
|
| 259 |
+
'uid:',
|
| 260 |
+
USER_ID,
|
| 261 |
+
); // FIX-6
|
| 262 |
+
voiceWS.send(JSON.stringify({ type: 'init', user_id: USER_ID }));
|
| 263 |
+
_setSysStatus(true);
|
| 264 |
+
_wsGate = true;
|
| 265 |
+
_tryClose();
|
| 266 |
+
};
|
| 267 |
|
| 268 |
+
voiceWS.onerror = (e) => {
|
| 269 |
+
console.error('[Voice WS] error:', e); // FIX-6
|
| 270 |
+
};
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
voiceWS.onclose = (ev) => {
|
| 273 |
+
console.log(`[Voice WS] closed (${ev.code}), retry #${_voiceRetry + 1}`);
|
| 274 |
+
_setSysStatus(false);
|
|
|
|
|
|
|
| 275 |
|
| 276 |
+
if (!_initClosed) {
|
| 277 |
+
_wsGate = true;
|
| 278 |
+
_tryClose();
|
| 279 |
+
}
|
| 280 |
|
| 281 |
+
if (isListening) stopListening();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
+
clearTimeout(_voiceRetryTimer);
|
| 284 |
+
_voiceRetryTimer = setTimeout(() => {
|
| 285 |
+
_voiceRetry++;
|
| 286 |
+
_connectVoice();
|
| 287 |
+
}, _backoff(_voiceRetry));
|
| 288 |
+
};
|
| 289 |
|
| 290 |
+
voiceWS.onmessage = onVoiceMsg;
|
|
|
|
|
|
|
| 291 |
}
|
| 292 |
|
| 293 |
+
function initWebSockets() {
|
| 294 |
+
_connectChat();
|
| 295 |
+
_connectVoice();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
}
|
| 297 |
|
| 298 |
+
// ── Chat WS handler ────────────────────────────────────────────────────────────
|
| 299 |
+
// FIX-5: Now handles llm_token for streaming, not just full 'chat' message
|
| 300 |
+
function onChatMsg(ev) {
|
| 301 |
let msg;
|
| 302 |
try {
|
| 303 |
+
msg = JSON.parse(ev.data);
|
| 304 |
} catch {
|
| 305 |
return;
|
| 306 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
+
console.log('[Chat WS] msg:', msg.type); // FIX-6
|
| 309 |
+
|
| 310 |
+
switch (msg.type) {
|
| 311 |
+
case 'llm_token':
|
| 312 |
+
// FIX-5: streaming token support for chat WS
|
| 313 |
+
if (!msg.token) break;
|
| 314 |
+
if (tLlm === 0) {
|
| 315 |
+
tLlm = Date.now();
|
| 316 |
+
if (tSend > 0) mLlm.textContent = tLlm - tSend + ' ms';
|
| 317 |
+
}
|
| 318 |
+
_removeThinking(); // FIX-3: remove "..." bubble on first token
|
| 319 |
+
if (!aiEl) {
|
| 320 |
+
aiEl = document.createElement('div');
|
| 321 |
+
aiEl.className = 'message ai';
|
| 322 |
+
chatBox.appendChild(aiEl);
|
| 323 |
+
}
|
| 324 |
+
aiTxt += msg.token;
|
| 325 |
+
aiEl.innerHTML =
|
| 326 |
+
typeof marked !== 'undefined'
|
| 327 |
+
? marked.parse(aiTxt)
|
| 328 |
+
: aiTxt.replace(/\n/g, '<br>');
|
| 329 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 330 |
+
break;
|
| 331 |
|
| 332 |
+
case 'chat':
|
| 333 |
+
// Fallback: backend sent full response at once (non-streaming mode)
|
| 334 |
+
if (!msg.text) break;
|
| 335 |
+
_removeThinking(); // FIX-3
|
| 336 |
+
if (!aiEl) {
|
| 337 |
+
aiEl = document.createElement('div');
|
| 338 |
+
aiEl.className = 'message ai';
|
| 339 |
+
chatBox.appendChild(aiEl);
|
| 340 |
+
}
|
| 341 |
+
aiTxt = msg.text;
|
| 342 |
+
aiEl.innerHTML =
|
| 343 |
+
typeof marked !== 'undefined'
|
| 344 |
+
? marked.parse(aiTxt)
|
| 345 |
+
: aiTxt.replace(/\n/g, '<br>');
|
| 346 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 347 |
+
break;
|
| 348 |
+
|
| 349 |
+
case 'end':
|
| 350 |
+
_removeThinking(); // FIX-3: safety cleanup
|
| 351 |
+
if (aiEl && aiTxt) {
|
| 352 |
+
aiEl.innerHTML =
|
| 353 |
+
typeof marked !== 'undefined'
|
| 354 |
+
? marked.parse(aiTxt)
|
| 355 |
+
: aiTxt.replace(/\n/g, '<br>');
|
| 356 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 357 |
+
}
|
| 358 |
+
aiEl = null;
|
| 359 |
+
aiTxt = '';
|
| 360 |
+
if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
|
| 361 |
+
tSend = tStt = tLlm = tTts = 0;
|
| 362 |
+
isProcessing = false;
|
| 363 |
+
setState('ready');
|
| 364 |
+
break;
|
| 365 |
+
|
| 366 |
+
case 'error':
|
| 367 |
+
_removeThinking(); // FIX-3
|
| 368 |
+
appendMsg('⚠️ ' + msg.text, 'system');
|
| 369 |
+
aiEl = null;
|
| 370 |
+
aiTxt = '';
|
| 371 |
+
isProcessing = false;
|
| 372 |
+
setState('ready');
|
| 373 |
+
break;
|
| 374 |
+
}
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
// ── Voice WS handler ───────────────────────────────────────────────────────────
|
| 378 |
+
function onVoiceMsg(ev) {
|
| 379 |
+
if (ev.data instanceof ArrayBuffer) {
|
| 380 |
+
enqueueAudio(ev.data);
|
| 381 |
return;
|
| 382 |
}
|
| 383 |
|
| 384 |
let msg;
|
| 385 |
try {
|
| 386 |
+
msg = JSON.parse(ev.data);
|
| 387 |
} catch {
|
| 388 |
return;
|
| 389 |
}
|
| 390 |
|
| 391 |
+
console.log('[Voice WS] msg:', msg.type); // FIX-6
|
| 392 |
+
|
| 393 |
switch (msg.type) {
|
| 394 |
+
case 'init_ack':
|
| 395 |
+
console.log('[Voice WS] user_id ack:', msg.user_id);
|
| 396 |
+
break;
|
| 397 |
+
|
| 398 |
case 'stt':
|
| 399 |
+
tStt = Date.now();
|
| 400 |
+
if (tSend > 0) mStt.textContent = tStt - tSend + ' ms';
|
| 401 |
+
_removeThinking(); // FIX-3
|
| 402 |
+
appendMsg('🎤 ' + msg.text, 'user');
|
| 403 |
+
aiEl = null;
|
| 404 |
+
aiTxt = '';
|
| 405 |
+
appendThinking(); // FIX-3: show "..." while LLM runs
|
| 406 |
+
setState('processing');
|
| 407 |
break;
|
| 408 |
|
| 409 |
case 'llm_token':
|
| 410 |
+
if (!msg.token) break;
|
| 411 |
+
if (tLlm === 0) {
|
| 412 |
+
tLlm = Date.now();
|
| 413 |
+
if (tStt > 0) mLlm.textContent = tLlm - tStt + ' ms';
|
| 414 |
+
}
|
| 415 |
+
_removeThinking(); // FIX-3: remove on first token
|
| 416 |
+
if (!aiEl) {
|
| 417 |
+
aiEl = document.createElement('div');
|
| 418 |
+
aiEl.className = 'message ai';
|
| 419 |
+
chatBox.appendChild(aiEl);
|
| 420 |
}
|
| 421 |
+
aiTxt += msg.token;
|
| 422 |
+
aiEl.innerHTML =
|
| 423 |
+
typeof marked !== 'undefined'
|
| 424 |
+
? marked.parse(aiTxt)
|
| 425 |
+
: aiTxt.replace(/\n/g, '<br>');
|
| 426 |
chatBox.scrollTop = chatBox.scrollHeight;
|
| 427 |
break;
|
| 428 |
|
| 429 |
case 'end':
|
| 430 |
+
if (aiEl && aiTxt) {
|
| 431 |
+
aiEl.innerHTML =
|
| 432 |
+
typeof marked !== 'undefined'
|
| 433 |
+
? marked.parse(aiTxt)
|
| 434 |
+
: aiTxt.replace(/\n/g, '<br>');
|
| 435 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 436 |
}
|
| 437 |
+
_removeThinking(); // FIX-3
|
| 438 |
+
aiEl = null;
|
| 439 |
+
aiTxt = '';
|
| 440 |
+
if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
|
| 441 |
+
tSend = tStt = tLlm = tTts = 0;
|
| 442 |
+
_scheduleEnd();
|
| 443 |
+
isProcessing = false;
|
| 444 |
break;
|
| 445 |
|
| 446 |
case 'error':
|
| 447 |
+
_removeThinking(); // FIX-3
|
| 448 |
+
appendMsg('⚠️ ' + msg.text, 'system');
|
| 449 |
+
aiEl = null;
|
| 450 |
+
aiTxt = '';
|
| 451 |
isProcessing = false;
|
| 452 |
+
setState(isListening ? 'listening' : 'ready');
|
| 453 |
break;
|
| 454 |
|
| 455 |
case 'pong':
|
| 456 |
break;
|
| 457 |
|
| 458 |
default:
|
| 459 |
+
console.log('[Voice WS] unknown:', msg.type);
|
| 460 |
}
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
// ─── FIX-3: Thinking bubble helpers ──────────────────────────────────────────
|
| 464 |
+
function appendThinking() {
|
| 465 |
+
if (thinkingEl) return;
|
| 466 |
+
thinkingEl = document.createElement('div');
|
| 467 |
+
thinkingEl.className = 'message ai thinking';
|
| 468 |
+
thinkingEl.innerHTML =
|
| 469 |
+
'<span class="dot"></span><span class="dot"></span><span class="dot"></span>';
|
| 470 |
+
chatBox.appendChild(thinkingEl);
|
| 471 |
+
chatBox.scrollTop = chatBox.scrollHeight;
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
function _removeThinking() {
|
| 475 |
+
if (thinkingEl) {
|
| 476 |
+
thinkingEl.remove();
|
| 477 |
+
thinkingEl = null;
|
| 478 |
+
}
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 482 |
+
// AUDIO PLAYBACK — gapless Web Audio API
|
| 483 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 484 |
+
|
| 485 |
+
function _ctxEnsure() {
|
| 486 |
+
if (!_ctx || _ctx.state === 'closed') {
|
| 487 |
+
_ctx = new (window.AudioContext || window.webkitAudioContext)();
|
| 488 |
+
_schedEnd = 0;
|
| 489 |
+
}
|
| 490 |
+
if (_ctx.state === 'suspended') _ctx.resume();
|
| 491 |
+
return _ctx;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
async function enqueueAudio(buf) {
|
| 495 |
+
if (_cancelled) return;
|
| 496 |
+
_inFlight++;
|
| 497 |
+
_vizQ();
|
| 498 |
+
|
| 499 |
+
const ctx = _ctxEnsure();
|
| 500 |
+
let decoded;
|
| 501 |
+
try {
|
| 502 |
+
decoded = await ctx.decodeAudioData(buf.slice(0));
|
| 503 |
+
} catch (e) {
|
| 504 |
+
console.warn('[Audio] decode:', e.message);
|
| 505 |
+
_inFlight = Math.max(0, _inFlight - 1);
|
| 506 |
+
_vizQ();
|
| 507 |
+
return;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
if (!decoded || decoded.duration < 0.001 || _cancelled) {
|
| 511 |
+
_inFlight = Math.max(0, _inFlight - 1);
|
| 512 |
+
_vizQ();
|
| 513 |
+
return;
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
if (tTts === 0 && tLlm > 0) {
|
| 517 |
+
tTts = Date.now();
|
| 518 |
+
mTts.textContent = tTts - tLlm + ' ms';
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
const src = ctx.createBufferSource();
|
| 522 |
+
src.buffer = decoded;
|
| 523 |
+
src.connect(ctx.destination);
|
| 524 |
+
|
| 525 |
+
const now = ctx.currentTime;
|
| 526 |
+
const start = Math.max(now + 0.01, _schedEnd);
|
| 527 |
+
src.start(start);
|
| 528 |
+
_schedEnd = start + decoded.duration;
|
| 529 |
+
|
| 530 |
+
src.onended = () => {
|
| 531 |
+
_inFlight = Math.max(0, _inFlight - 1);
|
| 532 |
+
_vizQ();
|
| 533 |
+
};
|
| 534 |
+
|
| 535 |
+
setState('speaking');
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
function _vizQ() {
|
| 539 |
+
if (chunksCount) chunksCount.textContent = _inFlight;
|
| 540 |
+
queueBars.forEach((b, i) => {
|
| 541 |
+
b.classList.toggle('active', i < _inFlight);
|
| 542 |
+
b.style.height = (i < _inFlight ? 12 + Math.random() * 30 : 4) + 'px';
|
| 543 |
+
});
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
function _scheduleEnd() {
|
| 547 |
+
clearTimeout(_endTimer);
|
| 548 |
+
const ctx = _ctx;
|
| 549 |
+
if (!ctx || ctx.state === 'closed') {
|
| 550 |
+
_done();
|
| 551 |
+
return;
|
| 552 |
+
}
|
| 553 |
+
const wait = Math.max(0, (_schedEnd - ctx.currentTime) * 1000) + 280;
|
| 554 |
+
_endTimer = setTimeout(() => {
|
| 555 |
+
if (!_cancelled) _done();
|
| 556 |
+
}, wait);
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
function _done() {
|
| 560 |
+
isProcessing = false;
|
| 561 |
+
_inFlight = 0;
|
| 562 |
+
_vizQ();
|
| 563 |
+
setState(isListening ? 'listening' : 'ready');
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
function stopAllAudio() {
|
| 567 |
+
_cancelled = true;
|
| 568 |
+
clearTimeout(_endTimer);
|
| 569 |
+
_endTimer = null;
|
| 570 |
+
_schedEnd = 0;
|
| 571 |
+
_inFlight = 0;
|
| 572 |
+
_vizQ();
|
| 573 |
+
if (_ctx && _ctx.state === 'running') _ctx.suspend().catch(() => {});
|
| 574 |
+
if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
|
| 575 |
+
voiceWS.send(JSON.stringify({ type: 'cancel' }));
|
| 576 |
+
}
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 580 |
+
// TEXT CHAT
|
| 581 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 582 |
+
|
| 583 |
+
sendBtn.onclick = sendText;
|
| 584 |
+
textInput.addEventListener('keydown', (e) => {
|
| 585 |
+
if (e.key === 'Enter' && !e.shiftKey) sendText();
|
| 586 |
+
});
|
| 587 |
+
|
| 588 |
+
function sendText() {
|
| 589 |
+
const text = textInput.value.trim();
|
| 590 |
+
console.log('Send button clicked, text:', text); // FIX-6
|
| 591 |
+
if (!text || isProcessing) return;
|
| 592 |
+
|
| 593 |
+
appendMsg(text, 'user');
|
| 594 |
+
textInput.value = '';
|
| 595 |
+
|
| 596 |
+
// FIX-4: always reset _cancelled before new turn so previous voice
|
| 597 |
+
// cancel doesn't block chat audio playback
|
| 598 |
+
_cancelled = false;
|
| 599 |
+
isProcessing = true;
|
| 600 |
+
tSend = Date.now();
|
| 601 |
+
tLlm = 0;
|
| 602 |
+
tTts = 0;
|
| 603 |
+
aiEl = null;
|
| 604 |
+
aiTxt = '';
|
| 605 |
+
|
| 606 |
+
setState('processing');
|
| 607 |
+
appendThinking(); // FIX-3: show "..." bubble immediately
|
| 608 |
+
|
| 609 |
+
console.log('[Chat] sending:', text); // FIX-6
|
| 610 |
+
|
| 611 |
+
// Try voice WS first (gives streaming tokens + TTS audio)
|
| 612 |
+
// Fall back to chat WS for text-only response
|
| 613 |
+
if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
|
| 614 |
+
// Send as a text query over voice WS — backend will handle it
|
| 615 |
+
// We need to send it as JSON text (not binary) to trigger chat path
|
| 616 |
+
// Since voice WS only handles binary audio + control JSON,
|
| 617 |
+
// we route text queries through the dedicated chat WS.
|
| 618 |
+
_sendViaChat(text);
|
| 619 |
+
} else {
|
| 620 |
+
_sendViaChat(text);
|
| 621 |
+
}
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
function _sendViaChat(text) {
|
| 625 |
+
// FIX-7: always include user_id in payload
|
| 626 |
+
const payload = JSON.stringify({ user_id: USER_ID, user_query: text });
|
| 627 |
+
console.log(
|
| 628 |
+
'[Chat WS] sending payload, readyState:',
|
| 629 |
+
chatWS ? chatWS.readyState : 'null',
|
| 630 |
+
);
|
| 631 |
+
|
| 632 |
+
if (chatWS && chatWS.readyState === WebSocket.OPEN) {
|
| 633 |
+
chatWS.send(payload);
|
| 634 |
+
} else {
|
| 635 |
+
// Queue with retry until connected
|
| 636 |
+
const _retry = () => {
|
| 637 |
+
if (chatWS && chatWS.readyState === WebSocket.OPEN) {
|
| 638 |
+
chatWS.send(payload);
|
| 639 |
+
} else {
|
| 640 |
+
setTimeout(_retry, 300);
|
| 641 |
+
}
|
| 642 |
+
};
|
| 643 |
+
_retry();
|
| 644 |
+
}
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 648 |
+
// MICROPHONE / VAD
|
| 649 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 650 |
|
| 651 |
micBtn.onclick = async () => {
|
| 652 |
+
if (isListening) stopListening();
|
| 653 |
+
else await startListening();
|
| 654 |
+
};
|
| 655 |
+
|
| 656 |
+
stopBtn.onclick = () => {
|
| 657 |
+
stopAllAudio();
|
| 658 |
+
isProcessing = false;
|
| 659 |
+
setState(isListening ? 'listening' : 'ready');
|
| 660 |
};
|
| 661 |
|
| 662 |
async function startListening() {
|
| 663 |
+
_ctxEnsure();
|
| 664 |
|
| 665 |
try {
|
| 666 |
micStream = await navigator.mediaDevices.getUserMedia({
|
|
|
|
| 672 |
sampleRate: 16000,
|
| 673 |
},
|
| 674 |
});
|
| 675 |
+
} catch (err) {
|
| 676 |
+
console.error('[Mic]', err);
|
| 677 |
+
appendMsg('⚠️ মাইক্রোফোন অ্যাক্সেস দেওয়া হয়নি।', 'system');
|
| 678 |
return;
|
| 679 |
}
|
| 680 |
|
| 681 |
+
analyserCtx = new AudioContext({ sampleRate: 16000 });
|
| 682 |
+
const src = analyserCtx.createMediaStreamSource(micStream);
|
| 683 |
+
analyser = analyserCtx.createAnalyser();
|
| 684 |
analyser.fftSize = 512;
|
| 685 |
+
analyser.smoothingTimeConstant = 0.6;
|
| 686 |
+
src.connect(analyser);
|
| 687 |
|
| 688 |
isListening = true;
|
| 689 |
+
setMic('listening');
|
| 690 |
+
setState('listening');
|
| 691 |
+
voiceViz.classList.add('active');
|
| 692 |
+
|
| 693 |
+
vadInt = setInterval(vadTick, VAD_MS);
|
| 694 |
+
vizInt = setInterval(vizTick, 60);
|
| 695 |
}
|
| 696 |
|
| 697 |
function stopListening() {
|
| 698 |
+
clearInterval(vadInt);
|
| 699 |
+
clearInterval(vizInt);
|
| 700 |
clearTimeout(silenceTimer);
|
| 701 |
+
vadInt = vizInt = silenceTimer = null;
|
| 702 |
|
| 703 |
+
if (isSpeaking) discardRecorder();
|
| 704 |
stopAllAudio();
|
| 705 |
|
| 706 |
micStream?.getTracks().forEach((t) => t.stop());
|
| 707 |
+
analyserCtx?.close().catch(() => {});
|
| 708 |
+
micStream = analyserCtx = analyser = null;
|
| 709 |
+
|
| 710 |
+
isListening = isSpeaking = isProcessing = false;
|
| 711 |
+
setMic('off');
|
| 712 |
+
setState('ready');
|
| 713 |
+
voiceViz.classList.remove('active');
|
| 714 |
+
vizBars.forEach((b) => (b.style.height = '4px'));
|
| 715 |
}
|
| 716 |
|
| 717 |
+
// ── VAD ────────────────────────────────────────────────────────────────────────
|
| 718 |
function vadTick() {
|
| 719 |
if (!analyser) return;
|
| 720 |
+
const buf = new Float32Array(analyser.frequencyBinCount);
|
| 721 |
+
analyser.getFloatTimeDomainData(buf);
|
| 722 |
|
| 723 |
+
let s = 0;
|
| 724 |
+
for (let i = 0; i < buf.length; i++) s += buf[i] * buf[i];
|
| 725 |
+
const db = 20 * Math.log10(Math.sqrt(s / buf.length) || 1e-10);
|
| 726 |
+
const speech = db > SILENCE_DB;
|
|
|
|
|
|
|
| 727 |
|
| 728 |
+
if (speech) {
|
| 729 |
if (isProcessing) {
|
|
|
|
| 730 |
stopAllAudio();
|
| 731 |
isProcessing = false;
|
| 732 |
}
|
|
|
|
| 733 |
clearTimeout(silenceTimer);
|
| 734 |
silenceTimer = null;
|
| 735 |
|
| 736 |
if (!isSpeaking) {
|
| 737 |
isSpeaking = true;
|
| 738 |
+
_cancelled = false;
|
| 739 |
+
_ctxEnsure();
|
| 740 |
startRecorder();
|
| 741 |
+
setMic('recording');
|
| 742 |
+
setState('recording');
|
| 743 |
}
|
| 744 |
} else {
|
| 745 |
if (isSpeaking && !silenceTimer) {
|
| 746 |
silenceTimer = setTimeout(() => {
|
| 747 |
silenceTimer = null;
|
| 748 |
isSpeaking = false;
|
|
|
|
| 749 |
isProcessing = true;
|
| 750 |
+
_cancelled = false;
|
| 751 |
+
tSend = Date.now();
|
| 752 |
+
tLlm = 0;
|
| 753 |
+
tTts = 0;
|
| 754 |
+
stopRecorder();
|
| 755 |
+
setMic('processing');
|
| 756 |
+
setState('processing');
|
| 757 |
+
}, SILENCE_MS);
|
| 758 |
}
|
| 759 |
}
|
| 760 |
}
|
| 761 |
|
| 762 |
+
// ── Viz tick ───────────────────────────────────────────────────────────────────
|
| 763 |
+
function vizTick() {
|
| 764 |
+
if (!analyser) return;
|
| 765 |
+
const data = new Uint8Array(analyser.frequencyBinCount);
|
| 766 |
+
analyser.getByteFrequencyData(data);
|
| 767 |
+
const step = Math.floor(data.length / vizBars.length);
|
| 768 |
+
vizBars.forEach((b, i) => {
|
| 769 |
+
const v = data[i * step] / 255;
|
| 770 |
+
b.style.height = Math.max(4, v * (isSpeaking ? 48 : 18)) + 'px';
|
| 771 |
+
});
|
| 772 |
+
}
|
| 773 |
+
|
| 774 |
+
// ── MediaRecorder ──────────────────────────────────────────────────────────────
|
| 775 |
function startRecorder() {
|
| 776 |
if (!micStream) return;
|
| 777 |
audioChunks = [];
|
| 778 |
+
const mime = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
|
|
|
| 779 |
? 'audio/webm;codecs=opus'
|
| 780 |
: 'audio/webm';
|
| 781 |
|
| 782 |
+
mediaRecorder = new MediaRecorder(micStream, { mimeType: mime });
|
| 783 |
mediaRecorder.ondataavailable = (e) => {
|
| 784 |
if (e.data.size > 0) audioChunks.push(e.data);
|
| 785 |
};
|
|
|
|
| 786 |
mediaRecorder.onstop = async () => {
|
| 787 |
+
if (!audioChunks.length) {
|
| 788 |
+
isProcessing = false;
|
| 789 |
+
if (isListening) setState('listening');
|
| 790 |
+
return;
|
| 791 |
+
}
|
| 792 |
+
const blob = new Blob(audioChunks, { type: mime });
|
| 793 |
audioChunks = [];
|
| 794 |
+
const buf = await blob.arrayBuffer();
|
| 795 |
+
console.log(
|
| 796 |
+
`[VAD] sending ${buf.byteLength.toLocaleString()} bytes to voice WS`,
|
| 797 |
+
);
|
| 798 |
+
|
| 799 |
+
if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
|
| 800 |
+
appendThinking(); // FIX-3: show thinking while STT runs
|
| 801 |
+
voiceWS.send(buf);
|
| 802 |
} else {
|
| 803 |
+
console.warn('[VAD] voice WS not open — dropping utterance');
|
| 804 |
isProcessing = false;
|
| 805 |
+
if (isListening) setState('listening');
|
| 806 |
}
|
| 807 |
};
|
|
|
|
| 808 |
mediaRecorder.start();
|
| 809 |
}
|
| 810 |
|
| 811 |
+
function stopRecorder() {
|
| 812 |
+
if (mediaRecorder && mediaRecorder.state !== 'inactive') mediaRecorder.stop();
|
| 813 |
+
mediaRecorder = null;
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
function discardRecorder() {
|
| 817 |
if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
|
| 818 |
+
mediaRecorder.ondataavailable = () => {};
|
| 819 |
+
mediaRecorder.onstop = () => {
|
| 820 |
+
audioChunks = [];
|
| 821 |
+
};
|
|
|
|
|
|
|
| 822 |
mediaRecorder.stop();
|
| 823 |
mediaRecorder = null;
|
| 824 |
}
|
| 825 |
|
| 826 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 827 |
+
// UI HELPERS
|
| 828 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 829 |
+
|
| 830 |
+
const STATE_MAP = {
|
| 831 |
+
ready: { label: 'প্রস্তুত', cls: '' },
|
| 832 |
+
listening: { label: 'শুনছি…', cls: 'listening' },
|
| 833 |
+
recording: { label: 'রেকর্ড হচ্ছে…', cls: 'recording' },
|
| 834 |
+
processing: { label: 'প্রক্রিয়া করছে…', cls: 'processing' },
|
| 835 |
+
speaking: { label: 'AI বলছে…', cls: 'speaking' },
|
| 836 |
+
};
|
| 837 |
+
|
| 838 |
+
function setState(s) {
|
| 839 |
+
const cfg = STATE_MAP[s] || STATE_MAP.ready;
|
| 840 |
+
stateLabel.textContent = cfg.label;
|
| 841 |
+
stateDot.className = 'state-dot' + (cfg.cls ? ' ' + cfg.cls : '');
|
| 842 |
}
|
| 843 |
|
| 844 |
+
const MIC_MAP = {
|
| 845 |
+
off: { cls: 'mic-off', label: 'Voice শুরু করুন', icon: '🎤' },
|
| 846 |
+
listening: {
|
| 847 |
+
cls: 'mic-listening',
|
| 848 |
+
label: 'শুনছি… (বন্ধ করতে ক্লিক)',
|
| 849 |
+
icon: '🟢',
|
| 850 |
+
},
|
| 851 |
+
recording: { cls: 'mic-recording', label: 'বলছেন…', icon: '🔴' },
|
| 852 |
+
processing: { cls: 'mic-processing', label: 'প্রক্রিয়া করছে…', icon: '⏳' },
|
| 853 |
+
};
|
| 854 |
|
| 855 |
+
function setMic(s) {
|
| 856 |
+
const cfg = MIC_MAP[s] || MIC_MAP.off;
|
| 857 |
+
micBtn.className = 'mic-btn ' + cfg.cls;
|
| 858 |
+
micLabel.textContent = cfg.label;
|
| 859 |
+
micBtn.querySelector('.mic-icon').textContent = cfg.icon;
|
| 860 |
+
}
|
| 861 |
+
|
| 862 |
+
function appendMsg(text, who) {
|
| 863 |
+
const d = document.createElement('div');
|
| 864 |
+
d.className = 'message ' + who;
|
| 865 |
+
if (who === 'ai' && typeof marked !== 'undefined') {
|
| 866 |
+
d.innerHTML = marked.parse(text || '');
|
| 867 |
} else {
|
| 868 |
+
d.textContent = text;
|
| 869 |
}
|
| 870 |
+
chatBox.appendChild(d);
|
|
|
|
| 871 |
chatBox.scrollTop = chatBox.scrollHeight;
|
| 872 |
+
return d;
|
| 873 |
}
|
| 874 |
+
|
| 875 |
+
// ── Clear chat ────────────────────────────────────────────────────────────────
|
| 876 |
+
clearBtn.onclick = () => {
|
| 877 |
+
chatBox.innerHTML = '';
|
| 878 |
+
thinkingEl = null; // FIX-3: reset reference after clear
|
| 879 |
+
appendMsg('চ্যাট পরিষ্কার করা হয়েছে।', 'system');
|
| 880 |
+
};
|
| 881 |
+
|
| 882 |
+
// ── Sidebar ───────────────────────────────────────────────────────────────────
|
| 883 |
+
sidebarToggle.onclick = () => {
|
| 884 |
+
sidebarEl.classList.toggle('collapsed');
|
| 885 |
+
sidebarToggle.textContent = sidebarEl.classList.contains('collapsed')
|
| 886 |
+
? '›'
|
| 887 |
+
: '‹';
|
| 888 |
+
};
|
| 889 |
+
mobileMenuBtn.onclick = () => sidebarEl.classList.toggle('mobile-open');
|
| 890 |
+
|
| 891 |
+
// ── Settings sliders ──────────────────────────────────────────────────────────
|
| 892 |
+
sThreshold.value = SILENCE_DB;
|
| 893 |
+
sThresholdVal.textContent = SILENCE_DB + ' dB';
|
| 894 |
+
sThreshold.oninput = () => {
|
| 895 |
+
SILENCE_DB = +sThreshold.value;
|
| 896 |
+
sThresholdVal.textContent = SILENCE_DB + ' dB';
|
| 897 |
+
};
|
| 898 |
+
|
| 899 |
+
sTimeout.value = SILENCE_MS;
|
| 900 |
+
sTimeoutVal.textContent = SILENCE_MS + ' ms';
|
| 901 |
+
sTimeout.oninput = () => {
|
| 902 |
+
SILENCE_MS = +sTimeout.value;
|
| 903 |
+
sTimeoutVal.textContent = SILENCE_MS + ' ms';
|
| 904 |
+
};
|
| 905 |
+
|
| 906 |
+
sVoice.onchange = () => appendMsg('🔊 TTS voice: ' + sVoice.value, 'system');
|
| 907 |
+
|
| 908 |
+
// ── Queue animation ───────────────────────────────────────────────────────────
|
| 909 |
+
setInterval(() => {
|
| 910 |
+
if (_inFlight > 0) _vizQ();
|
| 911 |
+
}, 140);
|
| 912 |
+
|
| 913 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 914 |
+
// START
|
| 915 |
+
// ═══════════════════════════════════════════════════════════════════════════════
|
| 916 |
+
boot();
|
frontend/style.css
CHANGED
|
@@ -1,152 +1,847 @@
|
|
| 1 |
-
*
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
}
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
body {
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
justify-content: center;
|
| 14 |
-
align-items: center;
|
| 15 |
}
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
-
.
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
}
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
-
/*
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
}
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
|
| 58 |
-
.
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
}
|
| 61 |
|
| 62 |
-
.
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
}
|
| 66 |
|
| 67 |
-
.
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
}
|
| 96 |
|
| 97 |
-
.
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
}
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
}
|
| 104 |
|
| 105 |
.message {
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
}
|
| 119 |
|
| 120 |
-
.
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
}
|
| 125 |
|
| 126 |
-
.
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
| 132 |
|
| 133 |
-
|
| 134 |
-
.
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
}
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
}
|
| 142 |
|
| 143 |
-
.
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
padding: 10px;
|
| 151 |
-
gap: 6px;
|
| 152 |
}
|
|
|
|
| 1 |
+
/* ── Reset & base ── */
|
| 2 |
+
*,
|
| 3 |
+
*::before,
|
| 4 |
+
*::after {
|
| 5 |
+
margin: 0;
|
| 6 |
+
padding: 0;
|
| 7 |
+
box-sizing: border-box;
|
| 8 |
}
|
| 9 |
|
| 10 |
+
:root {
|
| 11 |
+
--bg: #07090f;
|
| 12 |
+
--bg2: #0d1117;
|
| 13 |
+
--bg3: #121820;
|
| 14 |
+
--border: rgba(255, 255, 255, 0.07);
|
| 15 |
+
--border2: rgba(255, 255, 255, 0.12);
|
| 16 |
+
--text: #e2e8f0;
|
| 17 |
+
--text2: #8892a4;
|
| 18 |
+
--text3: #4a5568;
|
| 19 |
+
--accent: #22d3ee;
|
| 20 |
+
--accent2: #818cf8;
|
| 21 |
+
--accent3: #f472b6;
|
| 22 |
+
--green: #4ade80;
|
| 23 |
+
--red: #f87171;
|
| 24 |
+
--yellow: #fbbf24;
|
| 25 |
+
--user-bg: rgba(34, 211, 238, 0.1);
|
| 26 |
+
--ai-bg: rgba(129, 140, 248, 0.08);
|
| 27 |
+
--sidebar-w: 270px;
|
| 28 |
+
--transition: 0.25s cubic-bezier(0.4, 0, 0.2, 1);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
html,
|
| 32 |
body {
|
| 33 |
+
height: 100%;
|
| 34 |
+
background: var(--bg);
|
| 35 |
+
color: var(--text);
|
| 36 |
+
font-family: 'Hind Siliguri', 'Syne', sans-serif;
|
| 37 |
+
overflow: hidden;
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
+
/* ── Ambient orbs ── */
|
| 41 |
+
.bg-orb {
|
| 42 |
+
position: fixed;
|
| 43 |
+
border-radius: 50%;
|
| 44 |
+
filter: blur(80px);
|
| 45 |
+
pointer-events: none;
|
| 46 |
+
z-index: 0;
|
| 47 |
+
opacity: 0.18;
|
| 48 |
+
animation: orb-float 12s ease-in-out infinite;
|
| 49 |
+
}
|
| 50 |
+
.orb-1 {
|
| 51 |
+
width: 500px;
|
| 52 |
+
height: 500px;
|
| 53 |
+
background: radial-gradient(circle, #22d3ee, transparent);
|
| 54 |
+
top: -200px;
|
| 55 |
+
left: -150px;
|
| 56 |
+
animation-delay: 0s;
|
| 57 |
+
}
|
| 58 |
+
.orb-2 {
|
| 59 |
+
width: 400px;
|
| 60 |
+
height: 400px;
|
| 61 |
+
background: radial-gradient(circle, #818cf8, transparent);
|
| 62 |
+
bottom: -100px;
|
| 63 |
+
right: -100px;
|
| 64 |
+
animation-delay: -4s;
|
| 65 |
+
}
|
| 66 |
+
.orb-3 {
|
| 67 |
+
width: 300px;
|
| 68 |
+
height: 300px;
|
| 69 |
+
background: radial-gradient(circle, #f472b6, transparent);
|
| 70 |
+
top: 50%;
|
| 71 |
+
left: 50%;
|
| 72 |
+
transform: translate(-50%, -50%);
|
| 73 |
+
animation-delay: -8s;
|
| 74 |
+
}
|
| 75 |
+
@keyframes orb-float {
|
| 76 |
+
0%,
|
| 77 |
+
100% {
|
| 78 |
+
transform: translate(0, 0) scale(1);
|
| 79 |
+
}
|
| 80 |
+
33% {
|
| 81 |
+
transform: translate(30px, -20px) scale(1.05);
|
| 82 |
+
}
|
| 83 |
+
66% {
|
| 84 |
+
transform: translate(-20px, 15px) scale(0.97);
|
| 85 |
+
}
|
| 86 |
}
|
| 87 |
|
| 88 |
+
/* ── Init overlay ── */
|
| 89 |
+
.init-overlay {
|
| 90 |
+
position: fixed;
|
| 91 |
+
inset: 0;
|
| 92 |
+
z-index: 1000;
|
| 93 |
+
display: flex;
|
| 94 |
+
align-items: center;
|
| 95 |
+
justify-content: center;
|
| 96 |
+
background: var(--bg);
|
| 97 |
+
transition:
|
| 98 |
+
opacity 0.6s ease,
|
| 99 |
+
visibility 0.6s ease;
|
| 100 |
+
}
|
| 101 |
+
.init-overlay.hidden {
|
| 102 |
+
opacity: 0;
|
| 103 |
+
visibility: hidden;
|
| 104 |
+
pointer-events: none;
|
| 105 |
}
|
| 106 |
|
| 107 |
+
.init-card {
|
| 108 |
+
background: var(--bg2);
|
| 109 |
+
border: 1px solid var(--border2);
|
| 110 |
+
border-radius: 24px;
|
| 111 |
+
padding: 48px 56px;
|
| 112 |
+
width: 480px;
|
| 113 |
+
max-width: 95vw;
|
| 114 |
+
text-align: center;
|
| 115 |
+
box-shadow: 0 24px 80px rgba(0, 0, 0, 0.5);
|
| 116 |
+
}
|
| 117 |
+
.init-logo {
|
| 118 |
+
margin-bottom: 20px;
|
| 119 |
+
animation: logo-pulse 2s ease-in-out infinite;
|
| 120 |
+
}
|
| 121 |
+
@keyframes logo-pulse {
|
| 122 |
+
0%,
|
| 123 |
+
100% {
|
| 124 |
+
filter: drop-shadow(0 0 12px rgba(34, 211, 238, 0.4));
|
| 125 |
+
transform: scale(1);
|
| 126 |
+
}
|
| 127 |
+
50% {
|
| 128 |
+
filter: drop-shadow(0 0 24px rgba(129, 140, 248, 0.6));
|
| 129 |
+
transform: scale(1.06);
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
.init-title {
|
| 133 |
+
font-family: 'Syne', sans-serif;
|
| 134 |
+
font-size: 26px;
|
| 135 |
+
font-weight: 800;
|
| 136 |
+
background: linear-gradient(135deg, var(--accent), var(--accent2));
|
| 137 |
+
-webkit-background-clip: text;
|
| 138 |
+
-webkit-text-fill-color: transparent;
|
| 139 |
+
background-clip: text;
|
| 140 |
+
margin-bottom: 6px;
|
| 141 |
+
}
|
| 142 |
+
.init-subtitle {
|
| 143 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 144 |
+
color: var(--text2);
|
| 145 |
+
font-size: 15px;
|
| 146 |
+
margin-bottom: 36px;
|
| 147 |
+
}
|
| 148 |
+
.init-stages {
|
| 149 |
+
text-align: left;
|
| 150 |
+
margin-bottom: 28px;
|
| 151 |
+
}
|
| 152 |
+
.stage {
|
| 153 |
+
display: flex;
|
| 154 |
+
align-items: center;
|
| 155 |
+
gap: 12px;
|
| 156 |
+
padding: 10px 0;
|
| 157 |
+
font-size: 13px;
|
| 158 |
+
color: var(--text3);
|
| 159 |
+
border-bottom: 1px solid var(--border);
|
| 160 |
+
transition: color 0.3s;
|
| 161 |
+
}
|
| 162 |
+
.stage.active {
|
| 163 |
+
color: var(--accent);
|
| 164 |
+
}
|
| 165 |
+
.stage.done {
|
| 166 |
+
color: var(--green);
|
| 167 |
+
}
|
| 168 |
+
.stage-dot {
|
| 169 |
+
width: 8px;
|
| 170 |
+
height: 8px;
|
| 171 |
+
border-radius: 50%;
|
| 172 |
+
background: var(--text3);
|
| 173 |
+
flex-shrink: 0;
|
| 174 |
+
transition:
|
| 175 |
+
background 0.3s,
|
| 176 |
+
box-shadow 0.3s;
|
| 177 |
+
}
|
| 178 |
+
.stage.active .stage-dot {
|
| 179 |
+
background: var(--accent);
|
| 180 |
+
box-shadow: 0 0 8px var(--accent);
|
| 181 |
+
animation: blink-dot 0.8s ease-in-out infinite;
|
| 182 |
+
}
|
| 183 |
+
.stage.done .stage-dot {
|
| 184 |
+
background: var(--green);
|
| 185 |
+
}
|
| 186 |
+
@keyframes blink-dot {
|
| 187 |
+
0%,
|
| 188 |
+
100% {
|
| 189 |
+
opacity: 1;
|
| 190 |
+
}
|
| 191 |
+
50% {
|
| 192 |
+
opacity: 0.3;
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
.stage-check {
|
| 196 |
+
margin-left: auto;
|
| 197 |
+
opacity: 0;
|
| 198 |
+
transition: opacity 0.3s;
|
| 199 |
+
}
|
| 200 |
+
.stage.done .stage-check {
|
| 201 |
+
opacity: 1;
|
| 202 |
+
}
|
| 203 |
+
.stage span {
|
| 204 |
+
flex: 1;
|
| 205 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 206 |
}
|
| 207 |
|
| 208 |
+
.init-bar-wrap {
|
| 209 |
+
background: var(--bg3);
|
| 210 |
+
border-radius: 99px;
|
| 211 |
+
height: 6px;
|
| 212 |
+
overflow: hidden;
|
| 213 |
+
margin-bottom: 16px;
|
| 214 |
+
border: 1px solid var(--border);
|
| 215 |
+
}
|
| 216 |
+
.init-bar {
|
| 217 |
+
height: 100%;
|
| 218 |
+
background: linear-gradient(90deg, var(--accent), var(--accent2));
|
| 219 |
+
border-radius: 99px;
|
| 220 |
+
width: 0%;
|
| 221 |
+
transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1);
|
| 222 |
+
box-shadow: 0 0 12px rgba(34, 211, 238, 0.5);
|
| 223 |
+
}
|
| 224 |
+
.init-status {
|
| 225 |
+
font-size: 12px;
|
| 226 |
+
color: var(--text2);
|
| 227 |
+
font-family: 'JetBrains Mono', monospace;
|
| 228 |
}
|
| 229 |
|
| 230 |
+
/* ── App layout ── */
|
| 231 |
+
.app {
|
| 232 |
+
position: fixed;
|
| 233 |
+
inset: 0;
|
| 234 |
+
z-index: 1;
|
| 235 |
+
display: flex;
|
| 236 |
+
transition: opacity 0.5s ease;
|
| 237 |
+
}
|
| 238 |
|
| 239 |
+
/* ── Sidebar ── */
|
| 240 |
+
.sidebar {
|
| 241 |
+
width: var(--sidebar-w);
|
| 242 |
+
background: var(--bg2);
|
| 243 |
+
border-right: 1px solid var(--border);
|
| 244 |
+
display: flex;
|
| 245 |
+
flex-direction: column;
|
| 246 |
+
flex-shrink: 0;
|
| 247 |
+
overflow-y: auto;
|
| 248 |
+
transition:
|
| 249 |
+
width var(--transition),
|
| 250 |
+
transform var(--transition);
|
| 251 |
+
z-index: 10;
|
| 252 |
+
}
|
| 253 |
+
.sidebar.collapsed {
|
| 254 |
+
width: 0;
|
| 255 |
+
overflow: hidden;
|
| 256 |
+
}
|
| 257 |
+
.sidebar-header {
|
| 258 |
+
display: flex;
|
| 259 |
+
align-items: center;
|
| 260 |
+
justify-content: space-between;
|
| 261 |
+
padding: 20px 16px 16px;
|
| 262 |
+
border-bottom: 1px solid var(--border);
|
| 263 |
+
}
|
| 264 |
+
.brand {
|
| 265 |
+
display: flex;
|
| 266 |
+
align-items: center;
|
| 267 |
+
gap: 10px;
|
| 268 |
+
font-family: 'Syne', sans-serif;
|
| 269 |
+
font-weight: 700;
|
| 270 |
+
font-size: 14px;
|
| 271 |
+
color: var(--text);
|
| 272 |
+
}
|
| 273 |
+
.sidebar-toggle {
|
| 274 |
+
background: none;
|
| 275 |
+
border: 1px solid var(--border);
|
| 276 |
+
color: var(--text2);
|
| 277 |
+
border-radius: 8px;
|
| 278 |
+
padding: 4px 8px;
|
| 279 |
+
cursor: pointer;
|
| 280 |
+
font-size: 16px;
|
| 281 |
+
transition: all var(--transition);
|
| 282 |
+
}
|
| 283 |
+
.sidebar-toggle:hover {
|
| 284 |
+
background: var(--border);
|
| 285 |
+
color: var(--text);
|
| 286 |
}
|
| 287 |
|
| 288 |
+
.status-panel {
|
| 289 |
+
padding: 16px;
|
| 290 |
+
}
|
| 291 |
+
.status-row {
|
| 292 |
+
display: flex;
|
| 293 |
+
align-items: center;
|
| 294 |
+
justify-content: space-between;
|
| 295 |
+
padding: 6px 0;
|
| 296 |
+
}
|
| 297 |
+
.status-label {
|
| 298 |
+
font-size: 12px;
|
| 299 |
+
color: var(--text2);
|
| 300 |
+
}
|
| 301 |
+
.status-badge {
|
| 302 |
+
font-size: 10px;
|
| 303 |
+
font-family: 'JetBrains Mono', monospace;
|
| 304 |
+
padding: 2px 8px;
|
| 305 |
+
border-radius: 99px;
|
| 306 |
+
font-weight: 600;
|
| 307 |
+
letter-spacing: 0.03em;
|
| 308 |
+
}
|
| 309 |
+
.badge-green {
|
| 310 |
+
background: rgba(74, 222, 128, 0.12);
|
| 311 |
+
color: var(--green);
|
| 312 |
+
}
|
| 313 |
+
.badge-yellow {
|
| 314 |
+
background: rgba(251, 191, 36, 0.12);
|
| 315 |
+
color: var(--yellow);
|
| 316 |
+
}
|
| 317 |
+
.badge-red {
|
| 318 |
+
background: rgba(248, 113, 113, 0.12);
|
| 319 |
+
color: var(--red);
|
| 320 |
}
|
| 321 |
|
| 322 |
+
.sidebar-divider {
|
| 323 |
+
height: 1px;
|
| 324 |
+
background: var(--border);
|
| 325 |
+
margin: 4px 0;
|
| 326 |
}
|
| 327 |
|
| 328 |
+
.dash-section {
|
| 329 |
+
padding: 16px;
|
| 330 |
+
}
|
| 331 |
+
.dash-title {
|
| 332 |
+
font-size: 11px;
|
| 333 |
+
font-weight: 700;
|
| 334 |
+
text-transform: uppercase;
|
| 335 |
+
letter-spacing: 0.08em;
|
| 336 |
+
color: var(--text2);
|
| 337 |
+
margin-bottom: 12px;
|
| 338 |
+
}
|
| 339 |
+
.metric-grid {
|
| 340 |
+
display: grid;
|
| 341 |
+
grid-template-columns: 1fr 1fr;
|
| 342 |
+
gap: 8px;
|
| 343 |
+
}
|
| 344 |
+
.metric-card {
|
| 345 |
+
background: var(--bg3);
|
| 346 |
+
border: 1px solid var(--border);
|
| 347 |
+
border-radius: 10px;
|
| 348 |
+
padding: 10px;
|
| 349 |
+
text-align: center;
|
| 350 |
+
}
|
| 351 |
+
.metric-val {
|
| 352 |
+
font-family: 'JetBrains Mono', monospace;
|
| 353 |
+
font-size: 18px;
|
| 354 |
+
font-weight: 400;
|
| 355 |
+
color: var(--accent);
|
| 356 |
+
line-height: 1;
|
| 357 |
+
margin-bottom: 4px;
|
| 358 |
+
}
|
| 359 |
+
.metric-label {
|
| 360 |
+
font-size: 10px;
|
| 361 |
+
color: var(--text3);
|
| 362 |
}
|
| 363 |
|
| 364 |
+
.setting-row {
|
| 365 |
+
margin-bottom: 14px;
|
| 366 |
+
}
|
| 367 |
+
.setting-row label {
|
| 368 |
+
display: block;
|
| 369 |
+
font-size: 11px;
|
| 370 |
+
color: var(--text2);
|
| 371 |
+
margin-bottom: 6px;
|
| 372 |
+
}
|
| 373 |
+
.slider-wrap {
|
| 374 |
+
display: flex;
|
| 375 |
+
align-items: center;
|
| 376 |
+
gap: 8px;
|
| 377 |
+
}
|
| 378 |
+
.slider-wrap input[type='range'] {
|
| 379 |
+
flex: 1;
|
| 380 |
+
accent-color: var(--accent);
|
| 381 |
+
height: 4px;
|
| 382 |
+
cursor: pointer;
|
| 383 |
+
}
|
| 384 |
+
.slider-wrap span {
|
| 385 |
+
font-size: 11px;
|
| 386 |
+
font-family: 'JetBrains Mono', monospace;
|
| 387 |
+
color: var(--accent);
|
| 388 |
+
min-width: 58px;
|
| 389 |
+
text-align: right;
|
| 390 |
+
}
|
| 391 |
+
.setting-select {
|
| 392 |
+
width: 100%;
|
| 393 |
+
background: var(--bg3);
|
| 394 |
+
border: 1px solid var(--border);
|
| 395 |
+
color: var(--text);
|
| 396 |
+
border-radius: 8px;
|
| 397 |
+
padding: 6px 10px;
|
| 398 |
+
font-size: 12px;
|
| 399 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 400 |
+
cursor: pointer;
|
| 401 |
+
}
|
| 402 |
+
.setting-select:focus {
|
| 403 |
+
outline: none;
|
| 404 |
+
border-color: var(--accent);
|
| 405 |
}
|
| 406 |
|
| 407 |
+
.queue-vis {
|
| 408 |
+
display: flex;
|
| 409 |
+
align-items: flex-end;
|
| 410 |
+
gap: 4px;
|
| 411 |
+
height: 48px;
|
| 412 |
+
margin-bottom: 8px;
|
| 413 |
+
}
|
| 414 |
+
.queue-bar {
|
| 415 |
+
flex: 1;
|
| 416 |
+
background: var(--accent);
|
| 417 |
+
border-radius: 3px;
|
| 418 |
+
opacity: 0.3;
|
| 419 |
+
transition:
|
| 420 |
+
height 0.15s ease,
|
| 421 |
+
opacity 0.15s ease;
|
| 422 |
+
min-height: 4px;
|
| 423 |
+
}
|
| 424 |
+
.queue-bar.active {
|
| 425 |
+
opacity: 0.9;
|
| 426 |
+
}
|
| 427 |
+
.queue-label {
|
| 428 |
+
font-size: 11px;
|
| 429 |
+
color: var(--text2);
|
| 430 |
+
font-family: 'JetBrains Mono', monospace;
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
/* ── Main ── */
|
| 434 |
+
.main {
|
| 435 |
+
flex: 1;
|
| 436 |
+
display: flex;
|
| 437 |
+
flex-direction: column;
|
| 438 |
+
overflow: hidden;
|
| 439 |
+
min-width: 0;
|
| 440 |
}
|
| 441 |
|
| 442 |
+
/* ── Topbar ── */
|
| 443 |
+
.topbar {
|
| 444 |
+
display: flex;
|
| 445 |
+
align-items: center;
|
| 446 |
+
justify-content: space-between;
|
| 447 |
+
padding: 14px 20px;
|
| 448 |
+
background: var(--bg2);
|
| 449 |
+
border-bottom: 1px solid var(--border);
|
| 450 |
+
flex-shrink: 0;
|
| 451 |
+
}
|
| 452 |
+
.topbar-left {
|
| 453 |
+
display: flex;
|
| 454 |
+
align-items: center;
|
| 455 |
+
gap: 12px;
|
| 456 |
+
}
|
| 457 |
+
.topbar-center {
|
| 458 |
+
font-family: 'Syne', sans-serif;
|
| 459 |
+
font-weight: 700;
|
| 460 |
+
font-size: 15px;
|
| 461 |
+
color: var(--text);
|
| 462 |
+
position: absolute;
|
| 463 |
+
left: 50%;
|
| 464 |
+
transform: translateX(-50%);
|
| 465 |
+
}
|
| 466 |
+
.topbar-right {
|
| 467 |
+
display: flex;
|
| 468 |
+
gap: 8px;
|
| 469 |
+
}
|
| 470 |
+
.mobile-menu-btn {
|
| 471 |
+
display: none;
|
| 472 |
+
background: none;
|
| 473 |
+
border: 1px solid var(--border);
|
| 474 |
+
color: var(--text2);
|
| 475 |
+
border-radius: 8px;
|
| 476 |
+
padding: 6px 10px;
|
| 477 |
+
cursor: pointer;
|
| 478 |
+
font-size: 16px;
|
| 479 |
+
}
|
| 480 |
+
.state-dot {
|
| 481 |
+
width: 8px;
|
| 482 |
+
height: 8px;
|
| 483 |
+
border-radius: 50%;
|
| 484 |
+
background: var(--green);
|
| 485 |
+
box-shadow: 0 0 6px var(--green);
|
| 486 |
+
flex-shrink: 0;
|
| 487 |
+
transition:
|
| 488 |
+
background 0.3s,
|
| 489 |
+
box-shadow 0.3s;
|
| 490 |
+
}
|
| 491 |
+
.state-dot.listening {
|
| 492 |
+
background: var(--accent);
|
| 493 |
+
box-shadow: 0 0 8px var(--accent);
|
| 494 |
+
animation: blink-dot 0.8s infinite;
|
| 495 |
+
}
|
| 496 |
+
.state-dot.recording {
|
| 497 |
+
background: var(--red);
|
| 498 |
+
box-shadow: 0 0 10px var(--red);
|
| 499 |
+
animation: blink-dot 0.4s infinite;
|
| 500 |
+
}
|
| 501 |
+
.state-dot.processing {
|
| 502 |
+
background: var(--yellow);
|
| 503 |
+
box-shadow: 0 0 8px var(--yellow);
|
| 504 |
+
animation: blink-dot 1s infinite;
|
| 505 |
+
}
|
| 506 |
+
.state-dot.speaking {
|
| 507 |
+
background: var(--accent2);
|
| 508 |
+
box-shadow: 0 0 10px var(--accent2);
|
| 509 |
+
animation: blink-dot 0.6s infinite;
|
| 510 |
+
}
|
| 511 |
+
#state-label {
|
| 512 |
+
font-size: 13px;
|
| 513 |
+
color: var(--text2);
|
| 514 |
+
font-family: 'JetBrains Mono', monospace;
|
| 515 |
}
|
| 516 |
|
| 517 |
+
.clear-btn {
|
| 518 |
+
background: none;
|
| 519 |
+
border: 1px solid var(--border);
|
| 520 |
+
color: var(--text2);
|
| 521 |
+
border-radius: 8px;
|
| 522 |
+
padding: 6px 12px;
|
| 523 |
+
cursor: pointer;
|
| 524 |
+
font-size: 12px;
|
| 525 |
+
font-family: 'Syne', sans-serif;
|
| 526 |
+
transition: all var(--transition);
|
| 527 |
+
}
|
| 528 |
+
.clear-btn:hover {
|
| 529 |
+
border-color: var(--accent);
|
| 530 |
+
color: var(--accent);
|
| 531 |
}
|
| 532 |
|
| 533 |
+
/* ── Chat ── */
|
| 534 |
+
#chat-box {
|
| 535 |
+
flex: 1;
|
| 536 |
+
overflow-y: auto;
|
| 537 |
+
padding: 24px 20px 12px;
|
| 538 |
+
display: flex;
|
| 539 |
+
flex-direction: column;
|
| 540 |
+
gap: 12px;
|
| 541 |
+
scroll-behavior: smooth;
|
| 542 |
+
}
|
| 543 |
+
#chat-box::-webkit-scrollbar {
|
| 544 |
+
width: 4px;
|
| 545 |
+
}
|
| 546 |
+
#chat-box::-webkit-scrollbar-track {
|
| 547 |
+
background: transparent;
|
| 548 |
+
}
|
| 549 |
+
#chat-box::-webkit-scrollbar-thumb {
|
| 550 |
+
background: var(--border2);
|
| 551 |
+
border-radius: 99px;
|
| 552 |
}
|
| 553 |
|
| 554 |
.message {
|
| 555 |
+
max-width: 75%;
|
| 556 |
+
padding: 14px 18px;
|
| 557 |
+
border-radius: 16px;
|
| 558 |
+
line-height: 1.65;
|
| 559 |
+
font-size: 14.5px;
|
| 560 |
+
word-wrap: break-word;
|
| 561 |
+
overflow-wrap: break-word;
|
| 562 |
+
animation: msg-in 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 563 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 564 |
+
}
|
| 565 |
+
@keyframes msg-in {
|
| 566 |
+
from {
|
| 567 |
+
opacity: 0;
|
| 568 |
+
transform: translateY(10px) scale(0.97);
|
| 569 |
+
}
|
| 570 |
+
to {
|
| 571 |
+
opacity: 1;
|
| 572 |
+
transform: translateY(0) scale(1);
|
| 573 |
+
}
|
| 574 |
+
}
|
| 575 |
+
.message.user {
|
| 576 |
+
background: var(--user-bg);
|
| 577 |
+
border: 1px solid rgba(34, 211, 238, 0.2);
|
| 578 |
+
margin-left: auto;
|
| 579 |
+
border-bottom-right-radius: 4px;
|
| 580 |
+
}
|
| 581 |
+
.message.ai {
|
| 582 |
+
background: var(--ai-bg);
|
| 583 |
+
border: 1px solid rgba(129, 140, 248, 0.15);
|
| 584 |
+
border-bottom-left-radius: 4px;
|
| 585 |
+
}
|
| 586 |
+
.message.system {
|
| 587 |
+
background: rgba(251, 191, 36, 0.08);
|
| 588 |
+
border: 1px solid rgba(251, 191, 36, 0.2);
|
| 589 |
+
color: var(--yellow);
|
| 590 |
+
font-size: 12px;
|
| 591 |
+
font-family: 'JetBrains Mono', monospace;
|
| 592 |
+
align-self: center;
|
| 593 |
+
max-width: 90%;
|
| 594 |
+
}
|
| 595 |
+
.message ul,
|
| 596 |
+
.message ol {
|
| 597 |
+
padding-left: 20px;
|
| 598 |
+
margin: 8px 0;
|
| 599 |
+
}
|
| 600 |
+
.message li {
|
| 601 |
+
margin-bottom: 4px;
|
| 602 |
+
}
|
| 603 |
+
.message p {
|
| 604 |
+
margin: 6px 0;
|
| 605 |
+
}
|
| 606 |
+
.message code {
|
| 607 |
+
background: rgba(0, 0, 0, 0.3);
|
| 608 |
+
border-radius: 4px;
|
| 609 |
+
padding: 1px 6px;
|
| 610 |
+
font-family: 'JetBrains Mono', monospace;
|
| 611 |
+
font-size: 13px;
|
| 612 |
+
}
|
| 613 |
+
.message pre {
|
| 614 |
+
background: rgba(0, 0, 0, 0.3);
|
| 615 |
+
border-radius: 8px;
|
| 616 |
+
padding: 12px;
|
| 617 |
+
overflow-x: auto;
|
| 618 |
+
margin: 8px 0;
|
| 619 |
+
}
|
| 620 |
|
| 621 |
+
/* ── Voice visualizer ── */
|
| 622 |
+
.voice-visualizer {
|
| 623 |
+
display: flex;
|
| 624 |
+
align-items: center;
|
| 625 |
+
justify-content: center;
|
| 626 |
+
gap: 4px;
|
| 627 |
+
height: 0;
|
| 628 |
+
overflow: hidden;
|
| 629 |
+
transition: height 0.3s ease;
|
| 630 |
+
padding: 0 20px;
|
| 631 |
+
}
|
| 632 |
+
.voice-visualizer.active {
|
| 633 |
+
height: 56px;
|
| 634 |
+
}
|
| 635 |
+
.viz-bar {
|
| 636 |
+
width: 4px;
|
| 637 |
+
border-radius: 99px;
|
| 638 |
+
background: linear-gradient(180deg, var(--accent), var(--accent2));
|
| 639 |
+
height: 6px;
|
| 640 |
+
transition: height 0.08s ease;
|
| 641 |
+
flex-shrink: 0;
|
| 642 |
+
}
|
| 643 |
|
| 644 |
+
/* ── Controls ── */
|
| 645 |
+
.controls {
|
| 646 |
+
padding: 16px 20px 20px;
|
| 647 |
+
background: var(--bg2);
|
| 648 |
+
border-top: 1px solid var(--border);
|
| 649 |
+
flex-shrink: 0;
|
| 650 |
+
}
|
| 651 |
+
.text-row {
|
| 652 |
+
display: flex;
|
| 653 |
+
gap: 10px;
|
| 654 |
+
margin-bottom: 12px;
|
| 655 |
+
}
|
| 656 |
+
#text-input {
|
| 657 |
+
flex: 1;
|
| 658 |
+
background: var(--bg3);
|
| 659 |
+
border: 1px solid var(--border);
|
| 660 |
+
border-radius: 12px;
|
| 661 |
+
padding: 12px 16px;
|
| 662 |
+
color: var(--text);
|
| 663 |
+
font-size: 14px;
|
| 664 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 665 |
+
outline: none;
|
| 666 |
+
transition: border-color var(--transition);
|
| 667 |
+
}
|
| 668 |
+
#text-input::placeholder {
|
| 669 |
+
color: var(--text3);
|
| 670 |
+
}
|
| 671 |
+
#text-input:focus {
|
| 672 |
+
border-color: var(--accent);
|
| 673 |
+
}
|
| 674 |
|
| 675 |
+
#send-btn {
|
| 676 |
+
background: linear-gradient(135deg, var(--accent), var(--accent2));
|
| 677 |
+
border: none;
|
| 678 |
+
border-radius: 12px;
|
| 679 |
+
padding: 12px 16px;
|
| 680 |
+
cursor: pointer;
|
| 681 |
+
color: #000;
|
| 682 |
+
display: flex;
|
| 683 |
+
align-items: center;
|
| 684 |
+
transition:
|
| 685 |
+
opacity var(--transition),
|
| 686 |
+
transform 0.1s;
|
| 687 |
+
}
|
| 688 |
+
#send-btn:hover {
|
| 689 |
+
opacity: 0.88;
|
| 690 |
+
}
|
| 691 |
+
#send-btn:active {
|
| 692 |
+
transform: scale(0.95);
|
| 693 |
}
|
| 694 |
|
| 695 |
+
.voice-row {
|
| 696 |
+
display: flex;
|
| 697 |
+
gap: 10px;
|
| 698 |
+
}
|
| 699 |
+
.mic-btn {
|
| 700 |
+
flex: 1;
|
| 701 |
+
display: flex;
|
| 702 |
+
align-items: center;
|
| 703 |
+
justify-content: center;
|
| 704 |
+
gap: 8px;
|
| 705 |
+
padding: 13px 20px;
|
| 706 |
+
border-radius: 14px;
|
| 707 |
+
border: 1.5px solid var(--border2);
|
| 708 |
+
background: var(--bg3);
|
| 709 |
+
color: var(--text);
|
| 710 |
+
cursor: pointer;
|
| 711 |
+
font-size: 14px;
|
| 712 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 713 |
+
transition: all var(--transition);
|
| 714 |
+
position: relative;
|
| 715 |
+
overflow: hidden;
|
| 716 |
+
}
|
| 717 |
+
.mic-btn::before {
|
| 718 |
+
content: '';
|
| 719 |
+
position: absolute;
|
| 720 |
+
inset: 0;
|
| 721 |
+
background: linear-gradient(135deg, var(--accent), var(--accent2));
|
| 722 |
+
opacity: 0;
|
| 723 |
+
transition: opacity var(--transition);
|
| 724 |
+
}
|
| 725 |
+
.mic-btn:hover::before {
|
| 726 |
+
opacity: 0.08;
|
| 727 |
+
}
|
| 728 |
+
.mic-btn.mic-listening {
|
| 729 |
+
border-color: var(--accent);
|
| 730 |
+
box-shadow:
|
| 731 |
+
0 0 0 2px rgba(34, 211, 238, 0.2),
|
| 732 |
+
inset 0 0 20px rgba(34, 211, 238, 0.05);
|
| 733 |
+
}
|
| 734 |
+
.mic-btn.mic-recording {
|
| 735 |
+
border-color: var(--red);
|
| 736 |
+
animation: pulse-red 0.8s ease-in-out infinite;
|
| 737 |
+
}
|
| 738 |
+
@keyframes pulse-red {
|
| 739 |
+
0%,
|
| 740 |
+
100% {
|
| 741 |
+
box-shadow: 0 0 0 0 rgba(248, 113, 113, 0.4);
|
| 742 |
+
}
|
| 743 |
+
50% {
|
| 744 |
+
box-shadow: 0 0 0 8px rgba(248, 113, 113, 0);
|
| 745 |
+
}
|
| 746 |
+
}
|
| 747 |
+
.mic-btn.mic-processing {
|
| 748 |
+
border-color: var(--yellow);
|
| 749 |
+
box-shadow: 0 0 0 2px rgba(251, 191, 36, 0.15);
|
| 750 |
+
}
|
| 751 |
+
.mic-icon {
|
| 752 |
+
font-size: 18px;
|
| 753 |
+
position: relative;
|
| 754 |
+
z-index: 1;
|
| 755 |
+
}
|
| 756 |
+
.mic-label {
|
| 757 |
+
position: relative;
|
| 758 |
+
z-index: 1;
|
| 759 |
}
|
| 760 |
|
| 761 |
+
.stop-btn {
|
| 762 |
+
background: rgba(248, 113, 113, 0.1);
|
| 763 |
+
border: 1.5px solid rgba(248, 113, 113, 0.3);
|
| 764 |
+
color: var(--red);
|
| 765 |
+
border-radius: 14px;
|
| 766 |
+
padding: 13px 16px;
|
| 767 |
+
cursor: pointer;
|
| 768 |
+
font-size: 13px;
|
| 769 |
+
font-family: 'Hind Siliguri', sans-serif;
|
| 770 |
+
display: flex;
|
| 771 |
+
align-items: center;
|
| 772 |
+
gap: 6px;
|
| 773 |
+
transition: all var(--transition);
|
| 774 |
+
}
|
| 775 |
+
.stop-btn:hover {
|
| 776 |
+
background: rgba(248, 113, 113, 0.2);
|
| 777 |
+
border-color: var(--red);
|
| 778 |
+
}
|
| 779 |
+
.stop-btn:active {
|
| 780 |
+
transform: scale(0.95);
|
| 781 |
}
|
| 782 |
|
| 783 |
+
/* ── Scrollbar ── */
|
| 784 |
+
.sidebar::-webkit-scrollbar {
|
| 785 |
+
width: 4px;
|
| 786 |
+
}
|
| 787 |
+
.sidebar::-webkit-scrollbar-track {
|
| 788 |
+
background: transparent;
|
| 789 |
+
}
|
| 790 |
+
.sidebar::-webkit-scrollbar-thumb {
|
| 791 |
+
background: var(--border);
|
| 792 |
+
border-radius: 99px;
|
| 793 |
}
|
| 794 |
|
| 795 |
+
/* ── Responsive ── */
|
| 796 |
+
@media (max-width: 680px) {
|
| 797 |
+
.sidebar {
|
| 798 |
+
position: fixed;
|
| 799 |
+
left: 0;
|
| 800 |
+
top: 0;
|
| 801 |
+
bottom: 0;
|
| 802 |
+
transform: translateX(-100%);
|
| 803 |
+
z-index: 100;
|
| 804 |
+
}
|
| 805 |
+
.sidebar.mobile-open {
|
| 806 |
+
transform: translateX(0);
|
| 807 |
+
}
|
| 808 |
+
.mobile-menu-btn {
|
| 809 |
+
display: flex;
|
| 810 |
+
}
|
| 811 |
+
.topbar-center {
|
| 812 |
+
font-size: 13px;
|
| 813 |
+
}
|
| 814 |
+
.message {
|
| 815 |
+
max-width: 90%;
|
| 816 |
+
font-size: 14px;
|
| 817 |
+
}
|
| 818 |
}
|
| 819 |
|
| 820 |
+
/* ── Thinking bubble (animated "..." while AI processes) ── */
|
| 821 |
+
.message.thinking {
|
| 822 |
+
display: flex;
|
| 823 |
+
align-items: center;
|
| 824 |
+
gap: 5px;
|
| 825 |
+
padding: 12px 16px;
|
| 826 |
+
background: var(--ai-bg);
|
| 827 |
+
border: 1px solid var(--border);
|
| 828 |
+
border-radius: 16px 16px 16px 4px;
|
| 829 |
+
align-self: flex-start;
|
| 830 |
+
max-width: 80px;
|
| 831 |
}
|
| 832 |
+
.message.thinking .dot {
|
| 833 |
+
display: inline-block;
|
| 834 |
+
width: 7px;
|
| 835 |
+
height: 7px;
|
| 836 |
+
border-radius: 50%;
|
| 837 |
+
background: var(--accent2);
|
| 838 |
+
opacity: 0.4;
|
| 839 |
+
animation: dot-bounce 1.2s ease-in-out infinite;
|
| 840 |
+
}
|
| 841 |
+
.message.thinking .dot:nth-child(2) { animation-delay: 0.2s; }
|
| 842 |
+
.message.thinking .dot:nth-child(3) { animation-delay: 0.4s; }
|
| 843 |
|
| 844 |
+
@keyframes dot-bounce {
|
| 845 |
+
0%, 80%, 100% { transform: translateY(0); opacity: 0.4; }
|
| 846 |
+
40% { transform: translateY(-6px); opacity: 1; }
|
|
|
|
|
|
|
| 847 |
}
|
requirements.txt
CHANGED
|
@@ -58,3 +58,11 @@ mcp
|
|
| 58 |
# ===== Utility =====
|
| 59 |
uv
|
| 60 |
pytz
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# ===== Utility =====
|
| 59 |
uv
|
| 60 |
pytz
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# ELEVENHACKS-3AD25E55
|
services/__init__.py
ADDED
|
File without changes
|
services/streaming.py
CHANGED
|
@@ -1,133 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import re
|
| 5 |
from dataclasses import dataclass, field
|
| 6 |
-
from typing import
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
FIRST_FLUSH_HARD = 70
|
| 15 |
-
SUBSEQUENT_FLUSH_BOUNDARY_MIN = 40
|
| 16 |
-
SUBSEQUENT_FLUSH_HARD = 110
|
| 17 |
-
MIN_CHARS = 4
|
| 18 |
|
| 19 |
-
SENTENCE_BOUNDARIES = frozenset(".!?।॥\n")
|
| 20 |
-
CLAUSE_BOUNDARIES = frozenset(",;:—–")
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def _clean_for_tts(text: str) -> str:
|
| 24 |
-
|
| 25 |
-
text = re.sub(r"
|
| 26 |
-
text = re.sub(r"
|
| 27 |
-
text = re.sub(r"^\s*[
|
| 28 |
-
text = re.sub(r"
|
| 29 |
-
text = re.sub(r"
|
|
|
|
| 30 |
return text.strip()
|
| 31 |
|
| 32 |
|
| 33 |
-
def _should_flush(buffer: str, first_chunk: bool) -> bool:
|
| 34 |
-
"""
|
| 35 |
-
Return True if the buffer is ready to be sent to TTS.
|
| 36 |
|
| 37 |
-
Flushing strategy (per chunk):
|
| 38 |
-
1. If we hit a sentence boundary and have enough chars → flush.
|
| 39 |
-
2. If we're at the hard limit (even mid-sentence) → flush.
|
| 40 |
-
3. If we hit a clause boundary near the hard limit → flush early.
|
| 41 |
-
"""
|
| 42 |
-
n = len(buffer)
|
| 43 |
-
boundary_min = FIRST_FLUSH_BOUNDARY_MIN if first_chunk else SUBSEQUENT_FLUSH_BOUNDARY_MIN
|
| 44 |
-
hard_limit = FIRST_FLUSH_HARD if first_chunk else SUBSEQUENT_FLUSH_HARD
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
if n == 0:
|
| 47 |
return False
|
|
|
|
|
|
|
| 48 |
if n >= hard_limit:
|
| 49 |
return True
|
| 50 |
-
|
| 51 |
-
last_char = buffer[-1] if buffer else ""
|
| 52 |
if last_char in SENTENCE_BOUNDARIES and n >= boundary_min:
|
| 53 |
return True
|
| 54 |
-
if last_char in CLAUSE_BOUNDARIES and n >= hard_limit * 0.
|
| 55 |
return True
|
| 56 |
-
|
| 57 |
return False
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
@dataclass
|
| 61 |
class _AudioSlot:
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
chunks: list[bytes] = field(default_factory=list)
|
| 66 |
-
error: bool = False
|
| 67 |
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
synthesis → slot-ordered audio delivery.
|
| 73 |
|
| 74 |
-
Usage
|
| 75 |
-
─────
|
| 76 |
-
streamer = ParallelTTSStreamer()
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
| 88 |
"""
|
| 89 |
|
| 90 |
-
def __init__(self, voice: str =
|
| 91 |
-
|
| 92 |
-
self.
|
| 93 |
-
self.
|
|
|
|
| 94 |
self._first_chunk = True
|
| 95 |
-
self._slot_index = 0
|
| 96 |
self._slots: list[_AudioSlot] = []
|
| 97 |
self._slots_lock = asyncio.Lock()
|
| 98 |
self._tasks: list[asyncio.Task] = []
|
| 99 |
-
self.
|
|
|
|
| 100 |
|
|
|
|
| 101 |
|
| 102 |
async def add_token(self, token: str) -> None:
|
| 103 |
if not token or self._cancelled:
|
| 104 |
return
|
| 105 |
-
|
| 106 |
self.buffer += token
|
| 107 |
-
|
| 108 |
if _should_flush(self.buffer, self._first_chunk):
|
| 109 |
self._first_chunk = False
|
| 110 |
await self._schedule_chunk()
|
| 111 |
|
|
|
|
| 112 |
|
| 113 |
async def _schedule_chunk(self) -> None:
|
| 114 |
if self._cancelled:
|
| 115 |
self.buffer = ""
|
| 116 |
return
|
| 117 |
|
| 118 |
-
|
| 119 |
self.buffer = ""
|
| 120 |
-
|
| 121 |
-
text = _clean_for_tts(raw)
|
| 122 |
if len(text) < MIN_CHARS:
|
| 123 |
return
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
async with self._slots_lock:
|
| 128 |
slot = _AudioSlot(index=self._slot_index)
|
| 129 |
self._slot_index += 1
|
| 130 |
self._slots.append(slot)
|
|
|
|
| 131 |
|
| 132 |
task = asyncio.create_task(self._synthesise(text, slot))
|
| 133 |
self._tasks.append(task)
|
|
@@ -135,103 +196,118 @@ class ParallelTTSStreamer:
|
|
| 135 |
lambda t: self._tasks.remove(t) if t in self._tasks else None
|
| 136 |
)
|
| 137 |
|
|
|
|
| 138 |
|
| 139 |
async def _synthesise(self, text: str, slot: _AudioSlot) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
if self._cancelled:
|
| 141 |
-
slot.
|
| 142 |
-
slot.ready.set()
|
| 143 |
return
|
| 144 |
|
| 145 |
try:
|
| 146 |
-
|
| 147 |
-
async for chunk in communicate.stream():
|
| 148 |
if self._cancelled:
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
return
|
| 152 |
-
if chunk["type"] == "audio":
|
| 153 |
-
slot.chunks.append(chunk["data"])
|
| 154 |
except asyncio.CancelledError:
|
| 155 |
-
|
| 156 |
except Exception as exc:
|
| 157 |
-
print(f"[
|
| 158 |
-
slot.error = True
|
| 159 |
finally:
|
| 160 |
-
slot.
|
| 161 |
|
|
|
|
| 162 |
|
| 163 |
async def flush(self) -> None:
|
| 164 |
-
|
| 165 |
if self.buffer.strip():
|
| 166 |
await self._schedule_chunk()
|
|
|
|
| 167 |
|
| 168 |
-
|
| 169 |
-
if self._tasks:
|
| 170 |
-
await asyncio.gather(*self._tasks, return_exceptions=True)
|
| 171 |
-
|
| 172 |
-
self._done_event.set()
|
| 173 |
-
|
| 174 |
|
| 175 |
async def cancel(self) -> None:
|
| 176 |
"""
|
| 177 |
-
Immediately
|
| 178 |
-
|
| 179 |
-
|
|
|
|
| 180 |
"""
|
| 181 |
-
self._cancelled = True
|
| 182 |
|
| 183 |
-
|
| 184 |
-
task.cancel()
|
| 185 |
self._tasks.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
-
|
| 188 |
async with self._slots_lock:
|
| 189 |
for slot in self._slots:
|
| 190 |
-
if not slot.
|
| 191 |
-
slot.
|
| 192 |
-
slot.ready.set()
|
| 193 |
|
| 194 |
-
self.
|
|
|
|
| 195 |
|
|
|
|
| 196 |
|
| 197 |
-
async def stream_audio(self):
|
| 198 |
"""
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
proceed in parallel while the consumer is yielding slot N's bytes.
|
| 202 |
"""
|
| 203 |
delivered = 0
|
| 204 |
|
| 205 |
while True:
|
| 206 |
-
|
| 207 |
async with self._slots_lock:
|
| 208 |
-
if delivered < len(self._slots)
|
| 209 |
-
slot = self._slots[delivered]
|
| 210 |
-
else:
|
| 211 |
-
slot = None
|
| 212 |
|
| 213 |
if slot is None:
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
break
|
| 217 |
-
await asyncio.sleep(0.005)
|
| 218 |
continue
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
| 226 |
|
| 227 |
delivered += 1
|
| 228 |
|
|
|
|
| 229 |
|
| 230 |
def reset(self) -> None:
|
|
|
|
| 231 |
self._cancelled = False
|
| 232 |
self._first_chunk = True
|
| 233 |
self.buffer = ""
|
| 234 |
self._slot_index = 0
|
| 235 |
self._slots.clear()
|
| 236 |
self._tasks.clear()
|
| 237 |
-
self.
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
services/streaming.py — Production-grade parallel TTS streamer
|
| 3 |
+
with dual backend support (Edge-TTS & ElevenLabs)
|
| 4 |
+
|
| 5 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 6 |
+
ROUTING CONFIG — mirrors tts.py; must stay in sync
|
| 7 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 8 |
+
USE_ELEVENLABS = True → ElevenLabs streaming TTS
|
| 9 |
+
USE_ELEVENLABS = False → Edge-TTS (free, no API key needed)
|
| 10 |
+
|
| 11 |
+
Note: This flag is read from tts.py at import time so you only need to
|
| 12 |
+
change it in ONE place (tts.py). streaming.py re-exports it for clarity.
|
| 13 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 14 |
+
|
| 15 |
+
Changelog (vs previous streaming.py):
|
| 16 |
+
──────────────────────────────────────
|
| 17 |
+
1. DUAL BACKEND ROUTING — _synthesise() dispatches to either
|
| 18 |
+
_edge_tts_stream() or _elevenlabs_stream() via the shared
|
| 19 |
+
text_to_speech_stream() unified API in tts.py.
|
| 20 |
+
|
| 21 |
+
2. VOICE OVERRIDE PER INSTANCE — ParallelTTSStreamer.__init__ accepts
|
| 22 |
+
an optional `voice` param. For Edge-TTS pass a voice name string;
|
| 23 |
+
for ElevenLabs pass a voice ID. None uses the tts.py defaults.
|
| 24 |
+
|
| 25 |
+
3. ELEVENLABS LATENCY TUNING — When ElevenLabs is active, flush
|
| 26 |
+
thresholds are slightly tighter (FIRST_FLUSH_BOUNDARY_MIN = 8 chars,
|
| 27 |
+
FIRST_FLUSH_HARD = 35 chars) because ElevenLabs has higher per-request
|
| 28 |
+
latency than Edge-TTS and benefits from being called with slightly
|
| 29 |
+
larger chunks rather than many tiny requests.
|
| 30 |
+
|
| 31 |
+
4. ALL PREVIOUS FIXES RETAINED:
|
| 32 |
+
• FIRST_FLUSH_BOUNDARY_MIN 15→10 (Edge-TTS) / 10→8 (ElevenLabs)
|
| 33 |
+
• '॥' (double danda) in SENTENCE_BOUNDARIES
|
| 34 |
+
• cancel() sets _cancelled BEFORE task.cancel() (race fix)
|
| 35 |
+
• asyncio.Event-based slot wake (no spin polling)
|
| 36 |
+
• MIN_CHARS = 3 (was 4)
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
from __future__ import annotations
|
| 40 |
|
| 41 |
import asyncio
|
| 42 |
import re
|
| 43 |
from dataclasses import dataclass, field
|
| 44 |
+
from typing import AsyncGenerator
|
| 45 |
+
|
| 46 |
+
# Import the unified TTS API and the routing flag from tts.py
|
| 47 |
+
from services.tts import text_to_speech_stream, USE_ELEVENLABS, EDGE_VOICE
|
| 48 |
+
|
| 49 |
+
# ── Flush thresholds ───────────────────────────────────────────────────────────
|
| 50 |
+
# ElevenLabs has higher per-request overhead so we use slightly larger chunks
|
| 51 |
+
# to avoid many tiny API calls, while still starting audio quickly.
|
| 52 |
+
if USE_ELEVENLABS:
|
| 53 |
+
FIRST_FLUSH_BOUNDARY_MIN = 8 # Start TTS a touch earlier for latency
|
| 54 |
+
FIRST_FLUSH_HARD = 35
|
| 55 |
+
SUBSEQUENT_FLUSH_BOUNDARY_MIN = 35
|
| 56 |
+
SUBSEQUENT_FLUSH_HARD = 100
|
| 57 |
+
_backend_label = "ElevenLabs"
|
| 58 |
+
else:
|
| 59 |
+
FIRST_FLUSH_BOUNDARY_MIN = 10 # Edge-TTS: fine-grained chunking is cheap
|
| 60 |
+
FIRST_FLUSH_HARD = 40
|
| 61 |
+
SUBSEQUENT_FLUSH_BOUNDARY_MIN = 30
|
| 62 |
+
SUBSEQUENT_FLUSH_HARD = 90
|
| 63 |
+
_backend_label = "Edge-TTS"
|
| 64 |
+
|
| 65 |
+
print(f"[Streamer] TTS backend: {_backend_label}")
|
| 66 |
+
|
| 67 |
+
MIN_CHARS = 3 # Minimum chars to bother synthesising ("হ্যাঁ।" = 3 chars + danda)
|
| 68 |
|
| 69 |
+
SENTENCE_BOUNDARIES = frozenset(".!?।॥\n")
|
| 70 |
+
CLAUSE_BOUNDARIES = frozenset(",;:—–")
|
| 71 |
|
| 72 |
+
_SENTINEL = object()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 76 |
+
# TEXT CLEANING
|
| 77 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 78 |
|
| 79 |
def _clean_for_tts(text: str) -> str:
|
| 80 |
+
"""Strip markdown formatting that would be read aloud verbatim."""
|
| 81 |
+
text = re.sub(r"\*{1,3}", "", text)
|
| 82 |
+
text = re.sub(r"#+\s*", "", text)
|
| 83 |
+
text = re.sub(r"^\s*[-•]\s*", "", text, flags=re.MULTILINE)
|
| 84 |
+
text = re.sub(r"^\s*[\d০-৯]+[.)]\s*", "", text, flags=re.MULTILINE)
|
| 85 |
+
text = re.sub(r"`+", "", text)
|
| 86 |
+
text = re.sub(r"\n{2,}", "\n", text)
|
| 87 |
return text.strip()
|
| 88 |
|
| 89 |
|
|
|
|
|
|
|
|
|
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 93 |
+
# FLUSH LOGIC
|
| 94 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 95 |
+
|
| 96 |
+
def _should_flush(buffer: str, first_chunk: bool) -> bool:
|
| 97 |
+
n = len(buffer)
|
| 98 |
if n == 0:
|
| 99 |
return False
|
| 100 |
+
boundary_min = FIRST_FLUSH_BOUNDARY_MIN if first_chunk else SUBSEQUENT_FLUSH_BOUNDARY_MIN
|
| 101 |
+
hard_limit = FIRST_FLUSH_HARD if first_chunk else SUBSEQUENT_FLUSH_HARD
|
| 102 |
if n >= hard_limit:
|
| 103 |
return True
|
| 104 |
+
last_char = buffer[-1]
|
|
|
|
| 105 |
if last_char in SENTENCE_BOUNDARIES and n >= boundary_min:
|
| 106 |
return True
|
| 107 |
+
if last_char in CLAUSE_BOUNDARIES and n >= hard_limit * 0.75:
|
| 108 |
return True
|
|
|
|
| 109 |
return False
|
| 110 |
|
| 111 |
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 115 |
+
# AUDIO SLOT
|
| 116 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 117 |
+
|
| 118 |
@dataclass
|
| 119 |
class _AudioSlot:
|
| 120 |
+
index: int
|
| 121 |
+
queue: asyncio.Queue = field(default_factory=lambda: asyncio.Queue())
|
| 122 |
+
done: bool = False
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
def mark_done(self) -> None:
|
| 125 |
+
self.done = True
|
| 126 |
+
self.queue.put_nowait(_SENTINEL)
|
| 127 |
|
| 128 |
+
def mark_error(self) -> None:
|
| 129 |
+
self.done = True
|
| 130 |
+
self.queue.put_nowait(_SENTINEL)
|
|
|
|
| 131 |
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 134 |
+
# PARALLEL TTS STREAMER
|
| 135 |
+
# ══════════════════════════════════════════════════════════════════════════
|
| 136 |
|
| 137 |
+
class ParallelTTSStreamer:
|
| 138 |
+
"""
|
| 139 |
+
LLM tokens → sentence chunks → parallel TTS (Edge-TTS or ElevenLabs)
|
| 140 |
+
→ ordered audio delivery over WebSocket.
|
| 141 |
+
|
| 142 |
+
Usage:
|
| 143 |
+
streamer = ParallelTTSStreamer() # uses tts.py defaults
|
| 144 |
+
streamer = ParallelTTSStreamer(voice=...) # override voice/voice-ID
|
| 145 |
|
| 146 |
+
The `voice` parameter meaning depends on USE_ELEVENLABS:
|
| 147 |
+
• Edge-TTS → pass an Edge-TTS voice name string
|
| 148 |
+
• ElevenLabs → pass an ElevenLabs voice ID string
|
| 149 |
+
If None, the tts.py module defaults are used.
|
| 150 |
"""
|
| 151 |
|
| 152 |
+
def __init__(self, voice: str | None = None) -> None:
|
| 153 |
+
# None signals tts.py to use its own defaults
|
| 154 |
+
self.voice = voice
|
| 155 |
+
self.buffer = ""
|
| 156 |
+
self._cancelled = False
|
| 157 |
self._first_chunk = True
|
| 158 |
+
self._slot_index = 0
|
| 159 |
self._slots: list[_AudioSlot] = []
|
| 160 |
self._slots_lock = asyncio.Lock()
|
| 161 |
self._tasks: list[asyncio.Task] = []
|
| 162 |
+
self._llm_done = asyncio.Event()
|
| 163 |
+
self._slot_added = asyncio.Event() # wakes stream_audio without spin
|
| 164 |
|
| 165 |
+
# ── Token ingestion ────────────────────────────────────────────────────────
|
| 166 |
|
| 167 |
async def add_token(self, token: str) -> None:
|
| 168 |
if not token or self._cancelled:
|
| 169 |
return
|
|
|
|
| 170 |
self.buffer += token
|
|
|
|
| 171 |
if _should_flush(self.buffer, self._first_chunk):
|
| 172 |
self._first_chunk = False
|
| 173 |
await self._schedule_chunk()
|
| 174 |
|
| 175 |
+
# ── Chunk scheduling ───────────────────────────────────────────────────────
|
| 176 |
|
| 177 |
async def _schedule_chunk(self) -> None:
|
| 178 |
if self._cancelled:
|
| 179 |
self.buffer = ""
|
| 180 |
return
|
| 181 |
|
| 182 |
+
text = _clean_for_tts(self.buffer.strip())
|
| 183 |
self.buffer = ""
|
|
|
|
|
|
|
| 184 |
if len(text) < MIN_CHARS:
|
| 185 |
return
|
| 186 |
|
|
|
|
|
|
|
| 187 |
async with self._slots_lock:
|
| 188 |
slot = _AudioSlot(index=self._slot_index)
|
| 189 |
self._slot_index += 1
|
| 190 |
self._slots.append(slot)
|
| 191 |
+
self._slot_added.set() # wake stream_audio
|
| 192 |
|
| 193 |
task = asyncio.create_task(self._synthesise(text, slot))
|
| 194 |
self._tasks.append(task)
|
|
|
|
| 196 |
lambda t: self._tasks.remove(t) if t in self._tasks else None
|
| 197 |
)
|
| 198 |
|
| 199 |
+
# ── TTS synthesis — routes to active backend ───────────────────────────────
|
| 200 |
|
| 201 |
async def _synthesise(self, text: str, slot: _AudioSlot) -> None:
|
| 202 |
+
"""
|
| 203 |
+
Calls the unified text_to_speech_stream() from tts.py which internally
|
| 204 |
+
dispatches to Edge-TTS or ElevenLabs based on USE_ELEVENLABS.
|
| 205 |
+
|
| 206 |
+
The optional self.voice parameter is forwarded as-is:
|
| 207 |
+
• Edge-TTS → voice name string (e.g. "bn-BD-PradeepNeural")
|
| 208 |
+
• ElevenLabs → voice ID string (e.g. "pNInz6obpgDQGcFmaJgB")
|
| 209 |
+
"""
|
| 210 |
if self._cancelled:
|
| 211 |
+
slot.mark_error()
|
|
|
|
| 212 |
return
|
| 213 |
|
| 214 |
try:
|
| 215 |
+
async for chunk in text_to_speech_stream(text, voice=self.voice):
|
|
|
|
| 216 |
if self._cancelled:
|
| 217 |
+
break
|
| 218 |
+
await slot.queue.put(chunk)
|
|
|
|
|
|
|
|
|
|
| 219 |
except asyncio.CancelledError:
|
| 220 |
+
pass
|
| 221 |
except Exception as exc:
|
| 222 |
+
print(f"[Streamer] TTS error for '{text[:50]}': {exc}")
|
|
|
|
| 223 |
finally:
|
| 224 |
+
slot.mark_done()
|
| 225 |
|
| 226 |
+
# ── Flush ──────────────────────────────────────────────────────────────────
|
| 227 |
|
| 228 |
async def flush(self) -> None:
|
| 229 |
+
"""Call after the LLM stream ends to synthesise any buffered remainder."""
|
| 230 |
if self.buffer.strip():
|
| 231 |
await self._schedule_chunk()
|
| 232 |
+
self._llm_done.set()
|
| 233 |
|
| 234 |
+
# ── Cancel ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
async def cancel(self) -> None:
|
| 237 |
"""
|
| 238 |
+
Immediately stop all in-flight TTS tasks and unblock stream_audio.
|
| 239 |
+
|
| 240 |
+
Race fix: _cancelled is set to True BEFORE cancelling tasks so that
|
| 241 |
+
any still-running task that checks the flag won't enqueue more chunks.
|
| 242 |
"""
|
| 243 |
+
self._cancelled = True # set first — closes the race window
|
| 244 |
|
| 245 |
+
tasks = list(self._tasks)
|
|
|
|
| 246 |
self._tasks.clear()
|
| 247 |
+
for t in tasks:
|
| 248 |
+
t.cancel()
|
| 249 |
+
if tasks:
|
| 250 |
+
await asyncio.gather(*tasks, return_exceptions=True)
|
| 251 |
|
|
|
|
| 252 |
async with self._slots_lock:
|
| 253 |
for slot in self._slots:
|
| 254 |
+
if not slot.done:
|
| 255 |
+
slot.mark_error()
|
|
|
|
| 256 |
|
| 257 |
+
self._llm_done.set()
|
| 258 |
+
self._slot_added.set() # unblock any waiting stream_audio
|
| 259 |
|
| 260 |
+
# ── Audio delivery ─────────────────────────────────────────────────────────
|
| 261 |
|
| 262 |
+
async def stream_audio(self) -> AsyncGenerator[bytes, None]:
|
| 263 |
"""
|
| 264 |
+
Async generator — yields audio bytes in the exact order the TTS chunks
|
| 265 |
+
were scheduled (preserves sentence order even with parallel synthesis).
|
|
|
|
| 266 |
"""
|
| 267 |
delivered = 0
|
| 268 |
|
| 269 |
while True:
|
|
|
|
| 270 |
async with self._slots_lock:
|
| 271 |
+
slot = self._slots[delivered] if delivered < len(self._slots) else None
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
if slot is None:
|
| 274 |
+
if self._llm_done.is_set():
|
| 275 |
+
async with self._slots_lock:
|
| 276 |
+
total = len(self._slots)
|
| 277 |
+
if delivered >= total:
|
| 278 |
+
break
|
| 279 |
+
|
| 280 |
+
# Wait on event (no spin polling)
|
| 281 |
+
self._slot_added.clear()
|
| 282 |
+
try:
|
| 283 |
+
await asyncio.wait_for(
|
| 284 |
+
self._slot_added.wait(),
|
| 285 |
+
timeout=10.0 # ElevenLabs can be slower; 10 s guard
|
| 286 |
+
)
|
| 287 |
+
except asyncio.TimeoutError:
|
| 288 |
+
print("[Streamer] Timed out waiting for next TTS slot.")
|
| 289 |
break
|
|
|
|
| 290 |
continue
|
| 291 |
|
| 292 |
+
# Drain this slot's audio queue in order
|
| 293 |
+
while True:
|
| 294 |
+
item = await slot.queue.get()
|
| 295 |
+
if item is _SENTINEL:
|
| 296 |
+
break
|
| 297 |
+
if not self._cancelled:
|
| 298 |
+
yield item
|
| 299 |
|
| 300 |
delivered += 1
|
| 301 |
|
| 302 |
+
# ── Reset ──────────────────────────────────────────────────────────────────
|
| 303 |
|
| 304 |
def reset(self) -> None:
|
| 305 |
+
"""Reset state for reuse (e.g. across turns without re-instantiation)."""
|
| 306 |
self._cancelled = False
|
| 307 |
self._first_chunk = True
|
| 308 |
self.buffer = ""
|
| 309 |
self._slot_index = 0
|
| 310 |
self._slots.clear()
|
| 311 |
self._tasks.clear()
|
| 312 |
+
self._llm_done.clear()
|
| 313 |
+
self._slot_added.clear()
|
services/stt.py
CHANGED
|
@@ -1,103 +1,172 @@
|
|
| 1 |
"""
|
| 2 |
-
services/stt.py —
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
─────────────
|
| 6 |
-
1.
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"""
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
| 18 |
import asyncio
|
|
|
|
| 19 |
import os
|
| 20 |
import re
|
| 21 |
import subprocess
|
| 22 |
import tempfile
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
from faster_whisper import WhisperModel
|
| 26 |
|
| 27 |
# ── Bangla / wrong-script patterns ────────────────────────────────────────────
|
| 28 |
BANGLA_PATTERN = re.compile(r"[\u0980-\u09FF]")
|
| 29 |
WRONG_SCRIPT_PATTERN = re.compile(
|
| 30 |
-
r"[\u0600-\u06FF"
|
| 31 |
-
r"\u0750-\u077F"
|
| 32 |
-
r"\uFB50-\uFDFF"
|
| 33 |
-
r"\uFE70-\uFEFF]"
|
| 34 |
)
|
| 35 |
|
| 36 |
-
# ──
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
_model: WhisperModel | None = None
|
| 38 |
-
_model_lock
|
| 39 |
-
|
| 40 |
_gpu_semaphore: asyncio.Semaphore | None = None
|
| 41 |
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
Load WhisperModel on first call, return the cached instance thereafter.
|
| 46 |
-
Thread-safe via a threading.Lock (called from worker threads).
|
| 47 |
-
"""
|
| 48 |
global _model
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
with _model_lock:
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def _get_semaphore() -> asyncio.Semaphore:
|
| 63 |
-
"""
|
| 64 |
-
Return (or create) a per-event-loop asyncio.Semaphore(1).
|
| 65 |
-
Must be called from the async context (event-loop thread).
|
| 66 |
-
"""
|
| 67 |
global _gpu_semaphore
|
| 68 |
if _gpu_semaphore is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
_gpu_semaphore = asyncio.Semaphore(1)
|
| 70 |
return _gpu_semaphore
|
| 71 |
|
| 72 |
|
| 73 |
-
# ──
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def _is_valid_bangla(text: str) -> bool:
|
| 75 |
bangla_chars = len(BANGLA_PATTERN.findall(text))
|
| 76 |
wrong_chars = len(WRONG_SCRIPT_PATTERN.findall(text))
|
| 77 |
total_alpha = sum(1 for c in text if c.isalpha())
|
| 78 |
-
|
| 79 |
if total_alpha == 0:
|
| 80 |
-
return True
|
| 81 |
-
|
| 82 |
-
if (wrong_chars / total_alpha) > 0.30: # >30 % Arabic/Urdu → reject
|
| 83 |
return False
|
| 84 |
-
|
| 85 |
-
if total_alpha > 5 and bangla_chars == 0: # long but zero Bangla → reject
|
| 86 |
return False
|
| 87 |
-
|
| 88 |
return True
|
| 89 |
|
| 90 |
|
| 91 |
# ── Core processor ─────────────────────────────────────────────────────────────
|
| 92 |
class STTProcessor:
|
| 93 |
MIN_INPUT_BYTES = 3_000
|
|
|
|
| 94 |
|
| 95 |
-
# ── ffmpeg helper ──────────────────────────────────────────────────────────
|
| 96 |
@staticmethod
|
| 97 |
def _to_wav(audio_bytes: bytes) -> str | None:
|
| 98 |
"""
|
| 99 |
-
Convert browser WebM/Opus blob → 16 kHz mono WAV
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
"""
|
| 102 |
in_path = out_path = None
|
| 103 |
try:
|
|
@@ -112,42 +181,48 @@ class STTProcessor:
|
|
| 112 |
"ffmpeg", "-y", "-loglevel", "warning",
|
| 113 |
"-i", in_path,
|
| 114 |
"-ar", "16000", "-ac", "1",
|
| 115 |
-
"-af", "
|
| 116 |
"-f", "wav", out_path,
|
| 117 |
],
|
| 118 |
stdout=subprocess.DEVNULL,
|
| 119 |
stderr=subprocess.PIPE,
|
|
|
|
| 120 |
)
|
| 121 |
-
|
| 122 |
if result.returncode != 0:
|
| 123 |
print("[STT] ffmpeg error:", result.stderr.decode(errors="replace").strip())
|
| 124 |
return None
|
| 125 |
if not os.path.exists(out_path) or os.path.getsize(out_path) < 500:
|
| 126 |
print("[STT] ffmpeg produced empty WAV.")
|
| 127 |
return None
|
| 128 |
-
|
| 129 |
print(f"[STT] WAV ready: {os.path.getsize(out_path):,} bytes")
|
| 130 |
return out_path
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
except Exception as exc:
|
| 133 |
print(f"[STT] _to_wav: {exc}")
|
| 134 |
return None
|
| 135 |
finally:
|
| 136 |
if in_path and os.path.exists(in_path):
|
| 137 |
-
try:
|
| 138 |
-
|
| 139 |
-
except OSError:
|
| 140 |
-
pass
|
| 141 |
|
| 142 |
-
# ── Synchronous transcription (runs in worker thread) ─────────────────────
|
| 143 |
@staticmethod
|
| 144 |
def _transcribe_sync(wav_path: str) -> str | None:
|
| 145 |
"""
|
| 146 |
-
Whisper inference
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
"""
|
| 150 |
model = _get_model()
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
segments, info = model.transcribe(
|
| 153 |
wav_path,
|
|
@@ -157,61 +232,163 @@ class STTProcessor:
|
|
| 157 |
condition_on_previous_text=False,
|
| 158 |
temperature=0,
|
| 159 |
suppress_tokens=[-1],
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
| 162 |
)
|
| 163 |
-
|
| 164 |
text = " ".join(seg.text.strip() for seg in segments).strip()
|
| 165 |
print(f"[STT] Lang={info.language} prob={info.language_probability:.2f}")
|
| 166 |
return text
|
| 167 |
|
| 168 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
async def transcribe(self, audio_bytes: bytes) -> str | None:
|
| 170 |
-
"""
|
| 171 |
-
Full pipeline: validate → ffmpeg → GPU inference.
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
print(f"[STT] Too short ({len(audio_bytes)} B), skipping.")
|
| 179 |
return None
|
| 180 |
|
| 181 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
wav_path = await asyncio.to_thread(self._to_wav, audio_bytes)
|
| 183 |
if not wav_path:
|
| 184 |
return None
|
| 185 |
|
| 186 |
sem = _get_semaphore()
|
|
|
|
| 187 |
try:
|
| 188 |
-
async with sem:
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
except Exception as exc:
|
| 191 |
print(f"[STT] transcribe error: {exc}")
|
| 192 |
-
import traceback; traceback.print_exc()
|
| 193 |
return None
|
|
|
|
| 194 |
finally:
|
| 195 |
-
|
| 196 |
-
|
| 197 |
os.remove(wav_path)
|
| 198 |
-
|
| 199 |
-
|
| 200 |
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
| 202 |
print("[STT] Empty transcript.")
|
| 203 |
return None
|
| 204 |
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
words = text.split()
|
| 207 |
-
if len(words) > 5 and (len(set(words)) / len(words)) < 0.25:
|
| 208 |
-
print(f"[STT] Hallucination (repetition) discarded: {text[:60]}")
|
| 209 |
-
return None
|
| 210 |
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
return None
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
print(f"[STT] Transcript: {text}")
|
| 217 |
return text
|
|
|
|
| 1 |
"""
|
| 2 |
+
services/stt.py — Production-grade Faster-Whisper STT
|
| 3 |
+
|
| 4 |
+
Changes from original:
|
| 5 |
+
──────────────────────
|
| 6 |
+
1. LANGLA INITIAL PROMPT — A short Bangla seed sentence primes the decoder
|
| 7 |
+
to stay in Bengali Unicode (U+0980–U+09FF) space. Without this, Whisper
|
| 8 |
+
occasionally outputs romanised Bangla or Hindi for short/ambiguous clips.
|
| 9 |
+
|
| 10 |
+
2. TIGHTER THRESHOLDS:
|
| 11 |
+
- log_prob_threshold: -1.0 → -0.5
|
| 12 |
+
Original accepted EVERY segment regardless of model confidence. -0.5
|
| 13 |
+
rejects low-confidence hallucinations before the repetition guard runs,
|
| 14 |
+
saving GPU time and reducing bad outputs.
|
| 15 |
+
- no_speech_threshold: 0.5 → 0.6
|
| 16 |
+
Slightly stricter — avoids transcribing breath noises as text.
|
| 17 |
+
- compression_ratio_threshold: explicit 2.4 (same as default, but now
|
| 18 |
+
we can tune it easily).
|
| 19 |
+
|
| 20 |
+
3. BETTER FFMPEG PIPELINE — Replaced `loudnorm` (EBU R128, designed for
|
| 21 |
+
broadcast audio) with a lightweight chain:
|
| 22 |
+
highpass f=80 → afftdn nf=-25 → aresample=resampler=swr
|
| 23 |
+
This removes low-frequency rumble, light background noise, and resamples
|
| 24 |
+
cleanly to 16 kHz without the over-compression artefacts loudnorm
|
| 25 |
+
introduces on short (1–5 s) speech clips.
|
| 26 |
+
|
| 27 |
+
4. AUDIO SIZE CAP — Added MAX_INPUT_BYTES (5 MB). Prevents runaway memory
|
| 28 |
+
usage if a browser bug sends a huge blob.
|
| 29 |
+
|
| 30 |
+
5. MODEL SELECTION VIA ENV — STT_MODEL env var allows switching to
|
| 31 |
+
large-v3-turbo (4× faster, similar Bangla accuracy) without code changes.
|
| 32 |
+
Defaults to large-v3 for maximum quality.
|
| 33 |
+
|
| 34 |
+
6. All other logic (background preload, singleton, semaphore, hallucination
|
| 35 |
+
guard, script validation) is preserved unchanged.
|
| 36 |
"""
|
| 37 |
|
| 38 |
from __future__ import annotations
|
| 39 |
|
| 40 |
import asyncio
|
| 41 |
+
import io
|
| 42 |
import os
|
| 43 |
import re
|
| 44 |
import subprocess
|
| 45 |
import tempfile
|
| 46 |
+
import threading
|
| 47 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 48 |
|
| 49 |
from faster_whisper import WhisperModel
|
| 50 |
|
| 51 |
# ── Bangla / wrong-script patterns ────────────────────────────────────────────
|
| 52 |
BANGLA_PATTERN = re.compile(r"[\u0980-\u09FF]")
|
| 53 |
WRONG_SCRIPT_PATTERN = re.compile(
|
| 54 |
+
r"[\u0600-\u06FF"
|
| 55 |
+
r"\u0750-\u077F"
|
| 56 |
+
r"\uFB50-\uFDFF"
|
| 57 |
+
r"\uFE70-\uFEFF]"
|
| 58 |
)
|
| 59 |
|
| 60 |
+
# ── Bangla decoder seed ────────────────────────────────────────────────────────
|
| 61 |
+
# A short natural Bangla sentence primes the Whisper decoder to prefer the
|
| 62 |
+
# Bengali Unicode block. Keep it short (< 20 words) so it doesn't dominate
|
| 63 |
+
# the context window for short utterances.
|
| 64 |
+
_BANGLA_SEED = "আমি আপনার সাথে বাংলায় কথা বলছি।"
|
| 65 |
+
|
| 66 |
+
# ── Model configuration ────────────────────────────────────────────────────────
|
| 67 |
+
# Set STT_MODEL=large-v3-turbo in .env for faster (but still high-quality) STT.
|
| 68 |
+
_STT_MODEL = os.getenv("STT_MODEL", "large-v3")
|
| 69 |
+
_COMPUTE_TYPE = os.getenv("STT_COMPUTE_TYPE", "int8_float32")
|
| 70 |
+
|
| 71 |
+
# ── Singleton state ────────────────────────────────────────────────────────────
|
| 72 |
_model: WhisperModel | None = None
|
| 73 |
+
_model_lock = threading.Lock()
|
| 74 |
+
_model_ready = threading.Event()
|
| 75 |
_gpu_semaphore: asyncio.Semaphore | None = None
|
| 76 |
|
| 77 |
+
_inference_pool = ThreadPoolExecutor(max_workers=1, thread_name_prefix="whisper")
|
| 78 |
+
|
| 79 |
|
| 80 |
+
# ��─ Model loader ───────────────────────────────────────────────────────────────
|
| 81 |
+
def _load_and_warm() -> None:
|
|
|
|
|
|
|
|
|
|
| 82 |
global _model
|
| 83 |
+
try:
|
| 84 |
+
print(f"[STT] Loading Faster-Whisper {_STT_MODEL} on CUDA ({_COMPUTE_TYPE}) …")
|
| 85 |
+
m = WhisperModel(
|
| 86 |
+
_STT_MODEL,
|
| 87 |
+
device="cuda",
|
| 88 |
+
compute_type=_COMPUTE_TYPE,
|
| 89 |
+
num_workers=1,
|
| 90 |
+
)
|
| 91 |
+
print("[STT] Model loaded. Running GPU warmup …")
|
| 92 |
+
silence = _make_silence_wav(duration_s=0.5)
|
| 93 |
+
list(m.transcribe(silence, language="bn", beam_size=1)[0])
|
| 94 |
+
print("[STT] GPU warmup complete. STT ready.")
|
| 95 |
with _model_lock:
|
| 96 |
+
_model = m
|
| 97 |
+
except Exception as exc:
|
| 98 |
+
print(f"[STT] Model load/warmup failed: {exc}")
|
| 99 |
+
finally:
|
| 100 |
+
_model_ready.set()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _make_silence_wav(duration_s: float = 0.5, sample_rate: int = 16_000) -> io.BytesIO:
|
| 104 |
+
import struct, wave
|
| 105 |
+
buf = io.BytesIO()
|
| 106 |
+
n_samples = int(sample_rate * duration_s)
|
| 107 |
+
with wave.open(buf, "wb") as wf:
|
| 108 |
+
wf.setnchannels(1)
|
| 109 |
+
wf.setsampwidth(2)
|
| 110 |
+
wf.setframerate(sample_rate)
|
| 111 |
+
wf.writeframes(struct.pack(f"<{n_samples}h", *([0] * n_samples)))
|
| 112 |
+
buf.seek(0)
|
| 113 |
+
return buf
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _get_model() -> WhisperModel | None:
|
| 117 |
+
with _model_lock:
|
| 118 |
+
return _model
|
| 119 |
|
| 120 |
|
| 121 |
def _get_semaphore() -> asyncio.Semaphore:
|
| 122 |
+
"""Return (or lazily create) the GPU semaphore on the current event loop."""
|
|
|
|
|
|
|
|
|
|
| 123 |
global _gpu_semaphore
|
| 124 |
if _gpu_semaphore is None:
|
| 125 |
+
# FIX: Always create on the running loop to avoid cross-loop binding.
|
| 126 |
+
try:
|
| 127 |
+
loop = asyncio.get_running_loop()
|
| 128 |
+
except RuntimeError:
|
| 129 |
+
loop = None
|
| 130 |
_gpu_semaphore = asyncio.Semaphore(1)
|
| 131 |
return _gpu_semaphore
|
| 132 |
|
| 133 |
|
| 134 |
+
# ── Background load at import ──────────────────────────────────────────────────
|
| 135 |
+
_bg_thread = threading.Thread(target=_load_and_warm, daemon=True, name="whisper-loader")
|
| 136 |
+
_bg_thread.start()
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ── Bangla validation ──────────────────────────────────────────────────────────
|
| 140 |
def _is_valid_bangla(text: str) -> bool:
|
| 141 |
bangla_chars = len(BANGLA_PATTERN.findall(text))
|
| 142 |
wrong_chars = len(WRONG_SCRIPT_PATTERN.findall(text))
|
| 143 |
total_alpha = sum(1 for c in text if c.isalpha())
|
|
|
|
| 144 |
if total_alpha == 0:
|
| 145 |
+
return True
|
| 146 |
+
if (wrong_chars / total_alpha) > 0.30:
|
|
|
|
| 147 |
return False
|
| 148 |
+
if total_alpha > 5 and bangla_chars == 0:
|
|
|
|
| 149 |
return False
|
|
|
|
| 150 |
return True
|
| 151 |
|
| 152 |
|
| 153 |
# ── Core processor ─────────────────────────────────────────────────────────────
|
| 154 |
class STTProcessor:
|
| 155 |
MIN_INPUT_BYTES = 3_000
|
| 156 |
+
MAX_INPUT_BYTES = 5_242_880 # 5 MB cap — prevents runaway blobs
|
| 157 |
|
|
|
|
| 158 |
@staticmethod
|
| 159 |
def _to_wav(audio_bytes: bytes) -> str | None:
|
| 160 |
"""
|
| 161 |
+
Convert browser WebM/Opus blob → 16 kHz mono WAV.
|
| 162 |
+
|
| 163 |
+
FIX: Replaced `loudnorm` with a lighter chain:
|
| 164 |
+
highpass f=80 — removes low-frequency rumble / HVAC noise
|
| 165 |
+
afftdn nf=-25 — light spectral noise reduction (−25 dB floor)
|
| 166 |
+
aresample — ensures clean 16 kHz output
|
| 167 |
+
|
| 168 |
+
This avoids the two-pass EBU R128 behaviour that loudnorm exhibits in
|
| 169 |
+
single-pass mode and that over-compresses short speech clips.
|
| 170 |
"""
|
| 171 |
in_path = out_path = None
|
| 172 |
try:
|
|
|
|
| 181 |
"ffmpeg", "-y", "-loglevel", "warning",
|
| 182 |
"-i", in_path,
|
| 183 |
"-ar", "16000", "-ac", "1",
|
| 184 |
+
"-af", "highpass=f=80,afftdn=nf=-25,aresample=resampler=swr",
|
| 185 |
"-f", "wav", out_path,
|
| 186 |
],
|
| 187 |
stdout=subprocess.DEVNULL,
|
| 188 |
stderr=subprocess.PIPE,
|
| 189 |
+
timeout=30, # failsafe: kill runaway ffmpeg
|
| 190 |
)
|
|
|
|
| 191 |
if result.returncode != 0:
|
| 192 |
print("[STT] ffmpeg error:", result.stderr.decode(errors="replace").strip())
|
| 193 |
return None
|
| 194 |
if not os.path.exists(out_path) or os.path.getsize(out_path) < 500:
|
| 195 |
print("[STT] ffmpeg produced empty WAV.")
|
| 196 |
return None
|
|
|
|
| 197 |
print(f"[STT] WAV ready: {os.path.getsize(out_path):,} bytes")
|
| 198 |
return out_path
|
| 199 |
+
except subprocess.TimeoutExpired:
|
| 200 |
+
print("[STT] ffmpeg timed out.")
|
| 201 |
+
return None
|
| 202 |
except Exception as exc:
|
| 203 |
print(f"[STT] _to_wav: {exc}")
|
| 204 |
return None
|
| 205 |
finally:
|
| 206 |
if in_path and os.path.exists(in_path):
|
| 207 |
+
try: os.remove(in_path)
|
| 208 |
+
except OSError: pass
|
|
|
|
|
|
|
| 209 |
|
|
|
|
| 210 |
@staticmethod
|
| 211 |
def _transcribe_sync(wav_path: str) -> str | None:
|
| 212 |
"""
|
| 213 |
+
Whisper inference — runs in the dedicated inference thread pool.
|
| 214 |
+
|
| 215 |
+
Key param changes vs original:
|
| 216 |
+
───────────────────────────────
|
| 217 |
+
initial_prompt : Bangla seed → keeps decoder in বাংলা script
|
| 218 |
+
log_prob_threshold : -0.5 (was -1.0 = accept everything)
|
| 219 |
+
no_speech_threshold : 0.6 (was 0.5 = slightly stricter)
|
| 220 |
+
compression_ratio_threshold: 2.4 (same as default, now explicit)
|
| 221 |
"""
|
| 222 |
model = _get_model()
|
| 223 |
+
if model is None:
|
| 224 |
+
print("[STT] Model not available.")
|
| 225 |
+
return None
|
| 226 |
|
| 227 |
segments, info = model.transcribe(
|
| 228 |
wav_path,
|
|
|
|
| 232 |
condition_on_previous_text=False,
|
| 233 |
temperature=0,
|
| 234 |
suppress_tokens=[-1],
|
| 235 |
+
# ── FIX: Bangla-optimised thresholds ─────────────────────────────
|
| 236 |
+
initial_prompt=_BANGLA_SEED, # primes decoder for বাংলা script
|
| 237 |
+
no_speech_threshold=0.6, # was 0.5; avoids breath-noise transcription
|
| 238 |
+
log_prob_threshold=-0.5, # was -1.0; rejects low-confidence segments
|
| 239 |
+
compression_ratio_threshold=2.4, # filter repetitive/garbage output
|
| 240 |
)
|
|
|
|
| 241 |
text = " ".join(seg.text.strip() for seg in segments).strip()
|
| 242 |
print(f"[STT] Lang={info.language} prob={info.language_probability:.2f}")
|
| 243 |
return text
|
| 244 |
|
| 245 |
+
# async def transcribe(self, audio_bytes: bytes) -> str | None:
|
| 246 |
+
# """Full pipeline: validate → wait for model → ffmpeg → GPU inference."""
|
| 247 |
+
# if len(audio_bytes) < self.MIN_INPUT_BYTES:
|
| 248 |
+
# print(f"[STT] Too short ({len(audio_bytes)} B), skipping.")
|
| 249 |
+
# return None
|
| 250 |
+
|
| 251 |
+
# # FIX: Cap oversized blobs early
|
| 252 |
+
# if len(audio_bytes) > self.MAX_INPUT_BYTES:
|
| 253 |
+
# print(f"[STT] Input too large ({len(audio_bytes):,} B), capping.")
|
| 254 |
+
# audio_bytes = audio_bytes[: self.MAX_INPUT_BYTES]
|
| 255 |
+
|
| 256 |
+
# if not _model_ready.is_set():
|
| 257 |
+
# print("[STT] Model loading, waiting …")
|
| 258 |
+
# await asyncio.to_thread(_model_ready.wait)
|
| 259 |
+
|
| 260 |
+
# wav_path = await asyncio.to_thread(self._to_wav, audio_bytes)
|
| 261 |
+
# if not wav_path:
|
| 262 |
+
# return None
|
| 263 |
+
|
| 264 |
+
# sem = _get_semaphore()
|
| 265 |
+
# try:
|
| 266 |
+
# async with sem:
|
| 267 |
+
# loop = asyncio.get_running_loop()
|
| 268 |
+
# text = await loop.run_in_executor(
|
| 269 |
+
# _inference_pool, self._transcribe_sync, wav_path
|
| 270 |
+
# )
|
| 271 |
+
# except Exception as exc:
|
| 272 |
+
# print(f"[STT] transcribe error: {exc}")
|
| 273 |
+
# import traceback; traceback.print_exc()
|
| 274 |
+
# return None
|
| 275 |
+
# finally:
|
| 276 |
+
# if os.path.exists(wav_path):
|
| 277 |
+
# try: os.remove(wav_path)
|
| 278 |
+
# except OSError: pass
|
| 279 |
+
|
| 280 |
+
# if not text:
|
| 281 |
+
# print("[STT] Empty transcript.")
|
| 282 |
+
# return None
|
| 283 |
+
|
| 284 |
+
# # Hallucination guard
|
| 285 |
+
# words = text.split()
|
| 286 |
+
# unique_ratio = len(set(words)) / len(words) if words else 1.0
|
| 287 |
+
# if len(words) >= 3 and unique_ratio < 0.40:
|
| 288 |
+
# print(f"[STT] Hallucination discarded (repetition): {text[:60]}")
|
| 289 |
+
# return None
|
| 290 |
+
# if len(words) == 2 and words[0] == words[1]:
|
| 291 |
+
# print(f"[STT] Hallucination discarded (2-word repeat): {text[:60]}")
|
| 292 |
+
# return None
|
| 293 |
+
|
| 294 |
+
# if not _is_valid_bangla(text):
|
| 295 |
+
# print(f"[STT] Wrong script discarded: {text[:60]}")
|
| 296 |
+
# return None
|
| 297 |
+
|
| 298 |
+
# print(f"[STT] Transcript: {text}")
|
| 299 |
+
# return text
|
| 300 |
+
|
| 301 |
+
|
| 302 |
async def transcribe(self, audio_bytes: bytes) -> str | None:
|
| 303 |
+
"""Robust STT pipeline optimized for streaming voice."""
|
|
|
|
| 304 |
|
| 305 |
+
# ─────────────────────────────
|
| 306 |
+
# 1. VERY LIGHT sanity check (DO NOT OVER FILTER)
|
| 307 |
+
# ─────────────────────────────
|
| 308 |
+
if not audio_bytes or len(audio_bytes) < 300:
|
| 309 |
+
print(f"[STT] Ignored tiny packet ({len(audio_bytes)} B)")
|
|
|
|
| 310 |
return None
|
| 311 |
|
| 312 |
+
# soft cap (avoid memory spikes)
|
| 313 |
+
if len(audio_bytes) > self.MAX_INPUT_BYTES:
|
| 314 |
+
print(f"[STT] Large input capped ({len(audio_bytes):,} B)")
|
| 315 |
+
audio_bytes = audio_bytes[: self.MAX_INPUT_BYTES]
|
| 316 |
+
|
| 317 |
+
# ─────────────────────────────
|
| 318 |
+
# 2. Wait for model readiness (unchanged)
|
| 319 |
+
# ─────────────────────────────
|
| 320 |
+
if not _model_ready.is_set():
|
| 321 |
+
print("[STT] Model loading, waiting …")
|
| 322 |
+
await asyncio.to_thread(_model_ready.wait)
|
| 323 |
+
|
| 324 |
+
# ─────────────────────────────
|
| 325 |
+
# 3. Convert audio
|
| 326 |
+
# ─────────────────────────────
|
| 327 |
wav_path = await asyncio.to_thread(self._to_wav, audio_bytes)
|
| 328 |
if not wav_path:
|
| 329 |
return None
|
| 330 |
|
| 331 |
sem = _get_semaphore()
|
| 332 |
+
|
| 333 |
try:
|
| 334 |
+
async with sem:
|
| 335 |
+
loop = asyncio.get_running_loop()
|
| 336 |
+
text = await loop.run_in_executor(
|
| 337 |
+
_inference_pool,
|
| 338 |
+
self._transcribe_sync,
|
| 339 |
+
wav_path
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
except Exception as exc:
|
| 343 |
print(f"[STT] transcribe error: {exc}")
|
|
|
|
| 344 |
return None
|
| 345 |
+
|
| 346 |
finally:
|
| 347 |
+
try:
|
| 348 |
+
if wav_path and os.path.exists(wav_path):
|
| 349 |
os.remove(wav_path)
|
| 350 |
+
except OSError:
|
| 351 |
+
pass
|
| 352 |
|
| 353 |
+
# ─────────────────────────────
|
| 354 |
+
# 4. EMPTY CHECK
|
| 355 |
+
# ─────────────────────────────
|
| 356 |
+
if not text or not text.strip():
|
| 357 |
print("[STT] Empty transcript.")
|
| 358 |
return None
|
| 359 |
|
| 360 |
+
text = text.strip()
|
| 361 |
+
|
| 362 |
+
# ─────────────────────────────
|
| 363 |
+
# 5. SAFE hallucination filter (RELAXED)
|
| 364 |
+
# ─────────────────────────────
|
| 365 |
words = text.split()
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
+
if len(words) >= 6:
|
| 368 |
+
unique_ratio = len(set(words)) / len(words)
|
| 369 |
+
|
| 370 |
+
# only reject extreme repetition (not normal speech)
|
| 371 |
+
if unique_ratio < 0.25:
|
| 372 |
+
print(f"[STT] Rejected heavy repetition: {text[:60]}")
|
| 373 |
+
return None
|
| 374 |
+
|
| 375 |
+
# only catch obvious duplicates
|
| 376 |
+
if len(words) == 2 and words[0] == words[1]:
|
| 377 |
+
print(f"[STT] Duplicate word filtered: {text[:60]}")
|
| 378 |
return None
|
| 379 |
|
| 380 |
+
# ─────────────────────────────
|
| 381 |
+
# 6. Bangla validation (RELAXED)
|
| 382 |
+
# ─────────────────────────────
|
| 383 |
+
try:
|
| 384 |
+
if not _is_valid_bangla(text):
|
| 385 |
+
# do NOT drop aggressively — log only
|
| 386 |
+
print(f"[STT] Non-Bangla detected (kept anyway): {text[:60]}")
|
| 387 |
+
except Exception:
|
| 388 |
+
pass
|
| 389 |
+
|
| 390 |
+
# ─────────────────────────────
|
| 391 |
+
# 7. SUCCESS
|
| 392 |
+
# ─────────────────────────────
|
| 393 |
print(f"[STT] Transcript: {text}")
|
| 394 |
return text
|
services/tts.py
CHANGED
|
@@ -1,29 +1,207 @@
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
Yields:
|
| 17 |
-
bytes — raw MP3 audio data ready to send over WebSocket.
|
| 18 |
-
"""
|
| 19 |
text = text.strip()
|
| 20 |
if not text:
|
| 21 |
return
|
| 22 |
|
| 23 |
try:
|
| 24 |
communicate = edge_tts.Communicate(text, voice)
|
|
|
|
| 25 |
async for chunk in communicate.stream():
|
| 26 |
if chunk["type"] == "audio":
|
|
|
|
| 27 |
yield chunk["data"]
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
services/tts.py — Ultra Low-Latency Dual TTS Backend
|
| 3 |
|
| 4 |
+
Fixes applied:
|
| 5 |
+
- sentence-level streaming
|
| 6 |
+
- reduced chunk buffering (ElevenLabs)
|
| 7 |
+
- WebSocket-safe streaming design
|
| 8 |
+
- optional PCM mode (recommended for real-time apps)
|
| 9 |
+
- first-audio priority behavior
|
| 10 |
+
- no internal accumulation
|
| 11 |
+
- improved async flow stability
|
| 12 |
+
"""
|
| 13 |
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
import os
|
| 16 |
+
import re
|
| 17 |
+
import asyncio
|
| 18 |
|
| 19 |
+
load_dotenv()
|
| 20 |
|
| 21 |
+
# ─────────────────────────────────────────────
|
| 22 |
+
# ROUTE CONFIG
|
| 23 |
+
# ─────────────────────────────────────────────
|
| 24 |
+
USE_ELEVENLABS = False # True = ElevenLabs | False = Edge-TTS
|
| 25 |
|
| 26 |
+
# ─────────────────────────────────────────────
|
| 27 |
+
# EDGE-TTS CONFIG
|
| 28 |
+
# ─────────────────────────────────────────────
|
| 29 |
+
EDGE_VOICE = "bn-BD-NabanitaNeural"
|
| 30 |
+
|
| 31 |
+
# ─────────────────────────────────────────────
|
| 32 |
+
# ELEVENLABS CONFIG
|
| 33 |
+
# ─────────────────────────────────────────────
|
| 34 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "")
|
| 35 |
+
ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
|
| 36 |
+
ELEVENLABS_MODEL_ID = os.getenv("ELEVENLABS_MODEL_ID", "eleven_multilingual_v2")
|
| 37 |
+
|
| 38 |
+
# 🔥 LOW LATENCY FORMAT (IMPORTANT FIX)
|
| 39 |
+
ELEVENLABS_OUTPUT_FORMAT = "pcm_16000" # BEST for real-time (no MP3 decode delay)
|
| 40 |
+
|
| 41 |
+
ELEVENLABS_STABILITY = 0.45
|
| 42 |
+
ELEVENLABS_SIMILARITY = 0.80
|
| 43 |
+
ELEVENLABS_STYLE = 0.35
|
| 44 |
+
ELEVENLABS_SPEAKER_BOOST = True
|
| 45 |
+
|
| 46 |
+
if USE_ELEVENLABS and not ELEVENLABS_API_KEY:
|
| 47 |
+
raise RuntimeError("[TTS] ELEVENLABS_API_KEY missing")
|
| 48 |
+
|
| 49 |
+
print(f"[TTS] Backend: {'ElevenLabs' if USE_ELEVENLABS else 'Edge-TTS'}")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ─────────────────────────────────────────────
|
| 53 |
+
# TEXT SPLITTER (REAL LATENCY FIX)
|
| 54 |
+
# ─────────────────────────────────────────────
|
| 55 |
+
def split_sentences(text: str):
|
| 56 |
+
text = text.strip()
|
| 57 |
+
if not text:
|
| 58 |
+
return []
|
| 59 |
+
|
| 60 |
+
# Bangla + English sentence splitting
|
| 61 |
+
parts = re.split(r'(?<=[।.!?])\s+', text)
|
| 62 |
+
|
| 63 |
+
# prevent empty + reduce micro-chunks
|
| 64 |
+
return [p.strip() for p in parts if len(p.strip()) > 1]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# ─────────────────────────────────────────────
|
| 68 |
+
# EDGE-TTS STREAM (FIXED + NON-BLOCKING)
|
| 69 |
+
# ─────────────────────────────────────────────
|
| 70 |
+
async def _edge_tts_stream(text: str, voice: str = EDGE_VOICE):
|
| 71 |
+
import edge_tts
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
text = text.strip()
|
| 74 |
if not text:
|
| 75 |
return
|
| 76 |
|
| 77 |
try:
|
| 78 |
communicate = edge_tts.Communicate(text, voice)
|
| 79 |
+
|
| 80 |
async for chunk in communicate.stream():
|
| 81 |
if chunk["type"] == "audio":
|
| 82 |
+
# 🔥 immediate yield (no buffering)
|
| 83 |
yield chunk["data"]
|
| 84 |
+
|
| 85 |
+
# allow event loop fairness (prevents WebSocket lag spikes)
|
| 86 |
+
await asyncio.sleep(0)
|
| 87 |
+
|
| 88 |
+
except Exception as exc:
|
| 89 |
+
print(f"[TTS][Edge] Error: {exc}")
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# ─────────────────────────────────────────────
|
| 93 |
+
# ELEVENLABS STREAM (LOW LATENCY FIXED)
|
| 94 |
+
# ─────────────────────────────────────────────
|
| 95 |
+
async def _elevenlabs_stream(
|
| 96 |
+
text: str,
|
| 97 |
+
voice_id: str = ELEVENLABS_VOICE_ID,
|
| 98 |
+
model_id: str = ELEVENLABS_MODEL_ID,
|
| 99 |
+
output_format: str = ELEVENLABS_OUTPUT_FORMAT,
|
| 100 |
+
stability: float = ELEVENLABS_STABILITY,
|
| 101 |
+
similarity: float = ELEVENLABS_SIMILARITY,
|
| 102 |
+
style: float = ELEVENLABS_STYLE,
|
| 103 |
+
speaker_boost: bool = ELEVENLABS_SPEAKER_BOOST,
|
| 104 |
+
):
|
| 105 |
+
import httpx
|
| 106 |
+
|
| 107 |
+
text = text.strip()
|
| 108 |
+
if not text:
|
| 109 |
+
return
|
| 110 |
+
|
| 111 |
+
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"
|
| 112 |
+
|
| 113 |
+
headers = {
|
| 114 |
+
"xi-api-key": ELEVENLABS_API_KEY,
|
| 115 |
+
"Content-Type": "application/json",
|
| 116 |
+
"Accept": "audio/mpeg",
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
payload = {
|
| 120 |
+
"text": text,
|
| 121 |
+
"model_id": model_id,
|
| 122 |
+
"voice_settings": {
|
| 123 |
+
"stability": stability,
|
| 124 |
+
"similarity_boost": similarity,
|
| 125 |
+
"style": style,
|
| 126 |
+
"use_speaker_boost": speaker_boost,
|
| 127 |
+
},
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
params = {"output_format": output_format}
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
async with httpx.AsyncClient(
|
| 134 |
+
timeout=httpx.Timeout(connect=5.0, read=None)
|
| 135 |
+
) as client:
|
| 136 |
+
|
| 137 |
+
async with client.stream(
|
| 138 |
+
"POST",
|
| 139 |
+
url,
|
| 140 |
+
headers=headers,
|
| 141 |
+
json=payload,
|
| 142 |
+
params=params,
|
| 143 |
+
) as resp:
|
| 144 |
+
|
| 145 |
+
if resp.status_code != 200:
|
| 146 |
+
err = await resp.aread()
|
| 147 |
+
print(f"[TTS][ElevenLabs] HTTP {resp.status_code}: {err[:200]}")
|
| 148 |
+
return
|
| 149 |
+
|
| 150 |
+
# 🔥 smaller chunk size = lower latency
|
| 151 |
+
async for chunk in resp.aiter_bytes(chunk_size=512):
|
| 152 |
+
if chunk:
|
| 153 |
+
yield chunk
|
| 154 |
+
await asyncio.sleep(0)
|
| 155 |
+
|
| 156 |
+
except Exception as exc:
|
| 157 |
+
print(f"[TTS][ElevenLabs] Error: {exc}")
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# ─────────────────────────────────────────────
|
| 161 |
+
# PUBLIC API (ZERO BUFFER STREAM DESIGN)
|
| 162 |
+
# ─────────────────────────────────────────────
|
| 163 |
+
async def text_to_speech_stream(text: str, voice: str | None = None):
|
| 164 |
+
"""
|
| 165 |
+
Ultra-low latency streaming TTS generator.
|
| 166 |
+
|
| 167 |
+
Designed for:
|
| 168 |
+
- FastAPI WebSocket
|
| 169 |
+
- real-time AI agents
|
| 170 |
+
- Bangla-first voice systems
|
| 171 |
+
"""
|
| 172 |
+
|
| 173 |
+
text = text.strip()
|
| 174 |
+
if not text:
|
| 175 |
+
return
|
| 176 |
+
|
| 177 |
+
voice_to_use = voice
|
| 178 |
+
|
| 179 |
+
# ─────────────────────────────
|
| 180 |
+
# ELEVENLABS MODE
|
| 181 |
+
# ─────────────────────────────
|
| 182 |
+
if USE_ELEVENLABS:
|
| 183 |
+
for part in split_sentences(text):
|
| 184 |
+
|
| 185 |
+
# 🔥 stream immediately per sentence
|
| 186 |
+
async for chunk in _elevenlabs_stream(
|
| 187 |
+
part,
|
| 188 |
+
voice_id=voice_to_use or ELEVENLABS_VOICE_ID,
|
| 189 |
+
):
|
| 190 |
+
yield chunk
|
| 191 |
+
|
| 192 |
+
# yield control (prevents backend lag spikes)
|
| 193 |
+
await asyncio.sleep(0)
|
| 194 |
+
|
| 195 |
+
# ─────────────────────────────
|
| 196 |
+
# EDGE MODE
|
| 197 |
+
# ─────────────────────────────
|
| 198 |
+
else:
|
| 199 |
+
for part in split_sentences(text):
|
| 200 |
+
|
| 201 |
+
async for chunk in _edge_tts_stream(
|
| 202 |
+
part,
|
| 203 |
+
voice=voice_to_use or EDGE_VOICE,
|
| 204 |
+
):
|
| 205 |
+
yield chunk
|
| 206 |
+
|
| 207 |
+
await asyncio.sleep(0)
|
services/vad.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import webrtcvad
|
| 3 |
|
| 4 |
class VADDetector:
|
|
|
|
|
|
|
| 1 |
import webrtcvad
|
| 2 |
|
| 3 |
class VADDetector:
|