rakib72642 commited on
Commit
e33d11d
·
1 Parent(s): bed58cc

fixed stt and added whisper and elevenlabs stt + updated ++

Browse files
Files changed (1) hide show
  1. services/tts.py +12 -20
services/tts.py CHANGED
@@ -7,8 +7,7 @@ FIX-ISSUE4 (Normal-speed TTS):
7
  em-dashes) in addition to sentence endings, so synthesis tasks are
8
  smaller and start sooner. This pairs with streaming.py's 2–3 word
9
  flush threshold for maximum low-latency playback.
10
- Parallel synthesis of all parts preserved (all parts synthesised
11
- concurrently; delivered in order).
12
  """
13
 
14
  from dotenv import load_dotenv
@@ -138,8 +137,8 @@ async def text_to_speech_stream(
138
  """
139
  Stream TTS audio for `text`.
140
 
141
- Splits text into small clause-level parts, synthesises all in parallel,
142
- yields one complete audio blob per part in order.
143
 
144
  IMPORTANT:
145
  The browser playback path uses decodeAudioData(), which expects a
@@ -202,19 +201,12 @@ async def text_to_speech_stream(
202
  print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
203
  await q.put(_SENT)
204
 
205
- # Create one queue per part, synthesise all in parallel
206
- queues = [asyncio.Queue() for _ in parts]
207
- tasks = [asyncio.create_task(_synth_part(p, q)) for p, q in zip(parts, queues)]
208
-
209
- # Deliver in part order
210
- try:
211
- for q in queues:
212
- while True:
213
- chunk = await q.get()
214
- if chunk is _SENT:
215
- break
216
- yield chunk
217
- finally:
218
- for t in tasks:
219
- t.cancel()
220
- await asyncio.gather(*tasks, return_exceptions=True)
 
7
  em-dashes) in addition to sentence endings, so synthesis tasks are
8
  smaller and start sooner. This pairs with streaming.py's 2–3 word
9
  flush threshold for maximum low-latency playback.
10
+ Parts are synthesised sequentially to guarantee word order in playback.
 
11
  """
12
 
13
  from dotenv import load_dotenv
 
137
  """
138
  Stream TTS audio for `text`.
139
 
140
+ Splits text into small clause-level parts, synthesises each part in order,
141
+ and yields one complete audio blob per part in order.
142
 
143
  IMPORTANT:
144
  The browser playback path uses decodeAudioData(), which expects a
 
201
  print(f"[TTS] no audio produced for chunk: {part[:60]!r}")
202
  await q.put(_SENT)
203
 
204
+ # Sequential synthesis guarantees exact playback order.
205
+ for part in parts:
206
+ q: asyncio.Queue = asyncio.Queue()
207
+ await _synth_part(part, q)
208
+ while True:
209
+ chunk = await q.get()
210
+ if chunk is _SENT:
211
+ break
212
+ yield chunk