Commit ·
e33d11d
1
Parent(s): bed58cc
fixed stt and added whisper and elevenlabs stt + updated ++
Browse files- services/tts.py +12 -20
services/tts.py
CHANGED
|
@@ -7,8 +7,7 @@ FIX-ISSUE4 (Normal-speed TTS):
|
|
| 7 |
em-dashes) in addition to sentence endings, so synthesis tasks are
|
| 8 |
smaller and start sooner. This pairs with streaming.py's 2–3 word
|
| 9 |
flush threshold for maximum low-latency playback.
|
| 10 |
-
•
|
| 11 |
-
concurrently; delivered in order).
|
| 12 |
"""
|
| 13 |
|
| 14 |
from dotenv import load_dotenv
|
|
@@ -138,8 +137,8 @@ async def text_to_speech_stream(
|
|
| 138 |
"""
|
| 139 |
Stream TTS audio for `text`.
|
| 140 |
|
| 141 |
-
Splits text into small clause-level parts, synthesises
|
| 142 |
-
yields one complete audio blob per part in order.
|
| 143 |
|
| 144 |
IMPORTANT:
|
| 145 |
The browser playback path uses decodeAudioData(), which expects a
|
|
@@ -202,19 +201,12 @@ async def text_to_speech_stream(
|
|
| 202 |
print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
|
| 203 |
await q.put(_SENT)
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
if chunk is _SENT:
|
| 215 |
-
break
|
| 216 |
-
yield chunk
|
| 217 |
-
finally:
|
| 218 |
-
for t in tasks:
|
| 219 |
-
t.cancel()
|
| 220 |
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
| 7 |
em-dashes) in addition to sentence endings, so synthesis tasks are
|
| 8 |
smaller and start sooner. This pairs with streaming.py's 2–3 word
|
| 9 |
flush threshold for maximum low-latency playback.
|
| 10 |
+
• Parts are synthesised sequentially to guarantee word order in playback.
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
from dotenv import load_dotenv
|
|
|
|
| 137 |
"""
|
| 138 |
Stream TTS audio for `text`.
|
| 139 |
|
| 140 |
+
Splits text into small clause-level parts, synthesises each part in order,
|
| 141 |
+
and yields one complete audio blob per part in order.
|
| 142 |
|
| 143 |
IMPORTANT:
|
| 144 |
The browser playback path uses decodeAudioData(), which expects a
|
|
|
|
| 201 |
print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
|
| 202 |
await q.put(_SENT)
|
| 203 |
|
| 204 |
+
# Sequential synthesis guarantees exact playback order.
|
| 205 |
+
for part in parts:
|
| 206 |
+
q: asyncio.Queue = asyncio.Queue()
|
| 207 |
+
await _synth_part(part, q)
|
| 208 |
+
while True:
|
| 209 |
+
chunk = await q.get()
|
| 210 |
+
if chunk is _SENT:
|
| 211 |
+
break
|
| 212 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|