rakib72642 commited on
Commit
77a79ae
·
1 Parent(s): 16676c4

checkpoint 2

Browse files
Files changed (3) hide show
  1. app.py +3 -1
  2. core/backend.py +8 -7
  3. services/webrtc_pipeline.py +5 -1
app.py CHANGED
@@ -514,7 +514,6 @@ async def ws_voice(ws: WebSocket):
514
  full_text += token
515
  if not await _safe_text(ws, {"type": "llm_token", "token": token, "turn": my_turn}):
516
  break
517
- await tts_streamer.add_token(token)
518
  except asyncio.CancelledError:
519
  raise
520
  except Exception as exc:
@@ -524,6 +523,9 @@ async def ws_voice(ws: WebSocket):
524
  # recover if it missed any streamed tokens.
525
  if full_text:
526
  await _safe_text(ws, {"type": "llm_full", "text": _normalize_ai_text(full_text), "turn": my_turn})
 
 
 
527
  await tts_streamer.flush()
528
 
529
  async def run_tts_framed():
 
514
  full_text += token
515
  if not await _safe_text(ws, {"type": "llm_token", "token": token, "turn": my_turn}):
516
  break
 
517
  except asyncio.CancelledError:
518
  raise
519
  except Exception as exc:
 
523
  # recover if it missed any streamed tokens.
524
  if full_text:
525
  await _safe_text(ws, {"type": "llm_full", "text": _normalize_ai_text(full_text), "turn": my_turn})
526
+ # Voice synthesis uses the completed response so TTS gets
527
+ # full sentence context instead of fragmentary token chunks.
528
+ await tts_streamer.add_token(full_text)
529
  await tts_streamer.flush()
530
 
531
  async def run_tts_framed():
core/backend.py CHANGED
@@ -1430,7 +1430,7 @@ Important booking rules:
1430
  - If the user has already confirmed the details, book immediately.
1431
 
1432
  Important update rules:
1433
- - First ask what the user wants to change (date/doctor/time/phone/email).
1434
  - Ask whether to keep the rest of the existing appointment unchanged.
1435
  - If multiple appointments exist for a phone number, ask for the doctor name to select the correct one.
1436
  - Email is REQUIRED to update. If the existing record has no email, ask for it.
@@ -1664,16 +1664,17 @@ class AIBackend:
1664
  async for chunk, _meta in self.graph.astream(
1665
  initial_state, config=config, stream_mode="messages"
1666
  ):
1667
- # Only yield text content from AI messages.
1668
- # Exclude ToolMessage (tool execution results) they contain
1669
- # raw JSON that should not be streamed directly to the user.
 
1670
  if (
1671
  isinstance(chunk, (AIMessage, AIMessageChunk))
1672
  and not isinstance(chunk, ToolMessage)
1673
- and isinstance(chunk.content, str)
1674
- and chunk.content
1675
  ):
1676
- yield chunk.content
 
 
1677
 
1678
  # Auto-summarise in background when history grows long
1679
  try:
 
1430
  - If the user has already confirmed the details, book immediately.
1431
 
1432
  Important update rules:
1433
+ - First ask what the user wants to change (date/doctor/time).
1434
  - Ask whether to keep the rest of the existing appointment unchanged.
1435
  - If multiple appointments exist for a phone number, ask for the doctor name to select the correct one.
1436
  - Email is REQUIRED to update. If the existing record has no email, ask for it.
 
1664
  async for chunk, _meta in self.graph.astream(
1665
  initial_state, config=config, stream_mode="messages"
1666
  ):
1667
+ # Only yield assistant text. Gemini may return structured content
1668
+ # as a list of text parts, so flatten it before streaming.
1669
+ # Exclude ToolMessage (tool execution results) they contain raw
1670
+ # JSON that should not be streamed directly to the user.
1671
  if (
1672
  isinstance(chunk, (AIMessage, AIMessageChunk))
1673
  and not isinstance(chunk, ToolMessage)
 
 
1674
  ):
1675
+ text = _message_text(chunk.content)
1676
+ if text:
1677
+ yield text
1678
 
1679
  # Auto-summarise in background when history grows long
1680
  try:
services/webrtc_pipeline.py CHANGED
@@ -216,20 +216,24 @@ class _TurnPipeline:
216
 
217
  async def _run_llm(self, user_id: str, transcript: str) -> None:
218
  """Stream LLM tokens → TTS streamer (concurrent with audio delivery)."""
 
219
  try:
220
  stream = await self._ai.main(user_id, transcript)
221
  async for token in stream:
222
  if self._cancelled or not token:
223
  break
 
224
  if self._on_token:
225
  self._on_token(token)
226
  self._send_ctrl({"type": "llm_token", "token": token})
227
- await self._streamer.add_token(token)
228
  except asyncio.CancelledError:
229
  raise
230
  except Exception as exc:
231
  print(f"[Pipeline] LLM error: {exc}")
232
  finally:
 
 
 
233
  await self._streamer.flush()
234
 
235
  async def _run_tts_delivery(self) -> None:
 
216
 
217
  async def _run_llm(self, user_id: str, transcript: str) -> None:
218
  """Stream LLM tokens → TTS streamer (concurrent with audio delivery)."""
219
+ full_text = ""
220
  try:
221
  stream = await self._ai.main(user_id, transcript)
222
  async for token in stream:
223
  if self._cancelled or not token:
224
  break
225
+ full_text += token
226
  if self._on_token:
227
  self._on_token(token)
228
  self._send_ctrl({"type": "llm_token", "token": token})
 
229
  except asyncio.CancelledError:
230
  raise
231
  except Exception as exc:
232
  print(f"[Pipeline] LLM error: {exc}")
233
  finally:
234
+ # Feed the completed response to TTS for more reliable synthesis.
235
+ if full_text and not self._cancelled:
236
+ await self._streamer.add_token(full_text)
237
  await self._streamer.flush()
238
 
239
  async def _run_tts_delivery(self) -> None: