HawkEyesAI
/

Voice-AI-Agent

rakib72642 commited on 3 days ago

Commit

e33d11d

1 Parent(s): bed58cc

fixed stt and added whisper and elevenlabs stt + updated ++

Files changed (1) hide show

services/tts.py CHANGED Viewed

@@ -7,8 +7,7 @@ FIX-ISSUE4 (Normal-speed TTS):
     em-dashes) in addition to sentence endings, so synthesis tasks are
     smaller and start sooner. This pairs with streaming.py's 2–3 word
     flush threshold for maximum low-latency playback.
-  • Parallel synthesis of all parts preserved (all parts synthesised
-    concurrently; delivered in order).
 """
 from dotenv import load_dotenv
@@ -138,8 +137,8 @@ async def text_to_speech_stream(
     """
     Stream TTS audio for `text`.
-    Splits text into small clause-level parts, synthesises all in parallel,
-    yields one complete audio blob per part in order.
     IMPORTANT:
       The browser playback path uses decodeAudioData(), which expects a
@@ -202,19 +201,12 @@ async def text_to_speech_stream(
                 print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
             await q.put(_SENT)
-    # Create one queue per part, synthesise all in parallel
-    queues = [asyncio.Queue() for _ in parts]
-    tasks  = [asyncio.create_task(_synth_part(p, q)) for p, q in zip(parts, queues)]
-    # Deliver in part order
-    try:
-        for q in queues:
-            while True:
-                chunk = await q.get()
-                if chunk is _SENT:
-                    break
-                yield chunk
-    finally:
-        for t in tasks:
-            t.cancel()
-        await asyncio.gather(*tasks, return_exceptions=True)

     em-dashes) in addition to sentence endings, so synthesis tasks are
     smaller and start sooner. This pairs with streaming.py's 2–3 word
     flush threshold for maximum low-latency playback.
+  • Parts are synthesised sequentially to guarantee word order in playback.
 """
 from dotenv import load_dotenv
     """
     Stream TTS audio for `text`.
+    Splits text into small clause-level parts, synthesises each part in order,
+    and yields one complete audio blob per part in order.
     IMPORTANT:
       The browser playback path uses decodeAudioData(), which expects a
                 print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
             await q.put(_SENT)
+    # Sequential synthesis guarantees exact playback order.
+    for part in parts:
+        q: asyncio.Queue = asyncio.Queue()
+        await _synth_part(part, q)
+        while True:
+            chunk = await q.get()
+            if chunk is _SENT:
+                break
+            yield chunk