Spaces:

ShadowHunter222
/

hello

Sleeping

App Files Files Community

ShadowHunter222 commited on 11 days ago

Commit

b725430

verified ·

1 Parent(s): d61edf1

Upload 10 files

Browse files

Files changed (10) hide show

.gitattributes +4 -0
3cpo_prompt.wav +3 -0
Dockerfile +2 -1
aave_female_prompt.wav +3 -0
app.py +443 -126
chatterbox_wrapper.py +203 -6
config.py +9 -3
her_prompt.wav +3 -0
ivr_female_prompt.wav +3 -0
text_processor.py +193 -51

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+3cpo_prompt.wav filter=lfs diff=lfs merge=lfs -text
+aave_female_prompt.wav filter=lfs diff=lfs merge=lfs -text
+her_prompt.wav filter=lfs diff=lfs merge=lfs -text
+ivr_female_prompt.wav filter=lfs diff=lfs merge=lfs -text

3cpo_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a830bbf5494096e593dcfb6e099cfa334cb8b0b34d1403c69d36c02649c5ab15
+size 513452

Dockerfile CHANGED Viewed

@@ -17,8 +17,9 @@ RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/wh
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
 COPY config.py text_processor.py chatterbox_wrapper.py app.py ./
 # Pre-download ONNX models + tokenizer at build time
 RUN python -c "\

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code + local built-in voice samples from repo root
 COPY config.py text_processor.py chatterbox_wrapper.py app.py ./
+COPY *.wav ./
 # Pre-download ONNX models + tokenizer at build time
 RUN python -c "\

aave_female_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:971a3568a5a1521612bff565ed416aea62e30da3e00a53d771ff2c26da78276d
+size 1217636

app.py CHANGED Viewed

@@ -1,32 +1,16 @@
-"""
-Chatterbox Turbo TTS -- FastAPI Server
-======================================
-Production-ready API with true real-time MP3 streaming,
-in-memory voice cloning, and fully non-blocking inference.
-Endpoints:
-  GET  /health              -> health check + optional warmup
-  GET  /info                -> model info, supported tags, parameters
-  POST /tts                 -> full audio response (WAV/MP3/FLAC)
-  POST /tts/stream          -> chunked MP3 streaming (MediaSource-ready)
-  POST /tts/true-stream     -> alias for /tts/stream (Kokoro compat)
-  POST /tts/stop/{stream_id}-> cancel a specific active stream
-  POST /tts/stop            -> cancel ALL active streams
-  POST /v1/audio/speech     -> OpenAI-compatible streaming
-"""
 import asyncio
 import io
 import json
 import logging
 import queue as stdlib_queue
 import threading
 import time
-import urllib.error
 import urllib.parse
-import urllib.request
 import uuid
 from concurrent.futures import ThreadPoolExecutor
-from typing import Generator, Optional
 import numpy as np
 import soundfile as sf
@@ -111,11 +95,15 @@ async def cors_middleware(request: Request, call_next):
 async def _resolve_voice(
     voice_ref: Optional[UploadFile],
     wrapper: ChatterboxWrapper,
 ) -> VoiceProfile:
-    """Return a VoiceProfile from uploaded audio or the default voice."""
     if voice_ref is None or voice_ref.filename == "":
-        return wrapper.default_voice
     audio_bytes = await voice_ref.read()
     if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
@@ -164,32 +152,165 @@ def _encode_mp3_chunk(audio: np.ndarray) -> bytes:
     return data
-def _build_helper_endpoint(base_url: str, path: str) -> str:
-    return f"{base_url.rstrip('/')}{path}"
-def _internal_headers() -> dict[str, str]:
-    headers = {"Content-Type": "application/json", "Accept": "audio/mpeg"}
     if Config.INTERNAL_SHARED_SECRET:
         headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
     return headers
 def _helper_request_chunk(
     helper_base_url: str,
     payload: dict,
     timeout_sec: float,
 ) -> bytes:
-    url = _build_helper_endpoint(helper_base_url, "/internal/chunk/synthesize")
-    body = json.dumps(payload).encode("utf-8")
-    req = urllib.request.Request(
-        url=url,
-        data=body,
-        headers=_internal_headers(),
-        method="POST",
-    )
-    with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
-        return resp.read()
 def _helper_register_voice(
@@ -197,44 +318,45 @@ def _helper_register_voice(
     stream_id: str,
     audio_bytes: bytes,
     timeout_sec: float,
 ) -> str:
     """Register reference voice on helper once, return voice_key for chunk calls."""
-    query = urllib.parse.urlencode({"stream_id": stream_id})
-    url = _build_helper_endpoint(helper_base_url, f"/internal/voice/register?{query}")
-    headers = {"Content-Type": "application/octet-stream", "Accept": "application/json"}
-    if Config.INTERNAL_SHARED_SECRET:
-        headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
-    req = urllib.request.Request(
-        url=url,
-        data=audio_bytes,
-        headers=headers,
-        method="POST",
-    )
-    with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
-        data = json.loads(resp.read().decode("utf-8"))
-    voice_key = (data.get("voice_key") or "").strip()
-    if not voice_key:
-        raise RuntimeError("helper voice registration returned no voice_key")
-    return voice_key
 def _helper_cancel_stream(helper_base_url: str, stream_id: str):
     """Best-effort cancellation signal to helper."""
     try:
-        url = _build_helper_endpoint(helper_base_url, f"/internal/chunk/cancel/{stream_id}")
-        req = urllib.request.Request(
-            url=url,
-            data=b"",
-            headers=_internal_headers(),
-            method="POST",
-        )
-        with urllib.request.urlopen(req, timeout=3.0):
-            pass
     except Exception:
         pass
 # ═══════════════════════════════════════════════════════════════════
 # Endpoints
 # ═══════════════════════════════════════════════════════════════════
@@ -242,12 +364,19 @@ def _helper_cancel_stream(helper_base_url: str, stream_id: str):
 @app.get("/health")
 async def health(warm_up: bool = False):
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     status = {
         "status": "healthy" if wrapper else "loading",
         "model_loaded": wrapper is not None,
         "model_dtype": Config.MODEL_DTYPE,
         "streaming_supported": True,
         "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
     }
     if warm_up and wrapper:
         try:
@@ -259,27 +388,22 @@ async def health(warm_up: bool = False):
     return status
-@app.get("/info")
-async def info():
     return {
-        "model": Config.MODEL_ID,
-        "dtype": Config.MODEL_DTYPE,
-        "sample_rate": Config.SAMPLE_RATE,
-        "paralinguistic_tags": list(Config.PARALINGUISTIC_TAGS),
-        "tag_usage": "Insert tags directly in text, e.g. 'That is so funny! [laugh] Anyway…'",
-        "parameters": {
-            "max_new_tokens": {"default": Config.MAX_NEW_TOKENS, "range": "64–2048"},
-            "repetition_penalty": {"default": Config.REPETITION_PENALTY, "range": "1.0–2.0"},
-        },
-        "voice_cloning": {
-            "description": "Upload 3–30s reference WAV/MP3 as 'voice_ref' field",
-            "max_upload_mb": Config.MAX_VOICE_UPLOAD_BYTES // (1024 * 1024),
-        },
-        "parallel_mode": {
-            "enabled": Config.ENABLE_PARALLEL_MODE,
-            "helper_configured": bool(Config.HELPER_BASE_URL),
-            "helper_base_url": Config.HELPER_BASE_URL or None,
-            "supports_voice_ref": True,
         },
     }
@@ -290,6 +414,7 @@ async def info():
 async def text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
     output_format: str = Form("wav"),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
@@ -302,7 +427,7 @@ async def text_to_speech(
     if not text or not text.strip():
         raise HTTPException(400, "Text is required")
-    voice = await _resolve_voice(voice_ref, wrapper)
     loop = asyncio.get_running_loop()
     try:
@@ -329,9 +454,51 @@ async def text_to_speech(
 # ═══════════════════════════════════════════════════════════════════
 _active_streams: dict[str, threading.Event] = {}
-_internal_cancelled_streams: set[str] = set()
 _internal_cancel_lock = threading.Lock()
-_internal_stream_voice_keys: dict[str, set[str]] = {}
 # ═══════════════════════════════════════════════════════════════════
@@ -431,7 +598,7 @@ def _pipeline_stream_generator(
         _active_streams.pop(stream_id, None)
-def _parallel_odd_even_stream_generator(
     wrapper: ChatterboxWrapper,
     text: str,
     local_voice: VoiceProfile,
@@ -441,26 +608,43 @@ def _parallel_odd_even_stream_generator(
     stream_id: str,
     helper_base_url: str,
 ) -> Generator[bytes, None, None]:
-    """Additive odd/even split streamer (primary handles odd, helper handles even)."""
     cancel_event = threading.Event()
     _active_streams[stream_id] = cancel_event
     clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
     chunks = text_processor.split_for_streaming(clean_text)
     total_chunks = len(chunks)
     if total_chunks == 0:
         _active_streams.pop(stream_id, None)
         return
     lock = threading.Lock()
     cond = threading.Condition(lock)
-    ready: dict[int, bytes] = {}
     first_error: Optional[Exception] = None
     workers_done = 0
-    def _publish(idx: int, data: bytes):
         with cond:
-            ready[idx] = data
             cond.notify_all()
     def _set_error(err: Exception):
@@ -485,23 +669,46 @@ def _parallel_odd_even_stream_generator(
         )
         return _encode_mp3_chunk(audio)
-    def _odd_worker():
         try:
             for idx in range(0, total_chunks, 2):
                 if cancel_event.is_set():
                     break
                 data = _synth_local(chunks[idx])
-                _publish(idx, data)
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
-    def _even_worker():
-        helper_available = True
         helper_voice_key: Optional[str] = None
         try:
-            if helper_voice_bytes:
                 attempts = 2 if Config.HELPER_RETRY_ONCE else 1
                 last_err: Optional[Exception] = None
                 for _ in range(attempts):
@@ -510,19 +717,25 @@ def _parallel_odd_even_stream_generator(
                             helper_base_url=helper_base_url,
                             stream_id=stream_id,
                             audio_bytes=helper_voice_bytes,
-                            timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
                         )
                         last_err = None
                         break
                     except Exception as reg_err:
                         last_err = reg_err
                         continue
                 if last_err is not None:
                     helper_available = False
                     logger.warning(
-                        f"[{stream_id}] Helper voice registration failed; "
-                        "falling back to local synthesis for even chunks"
                     )
             for idx in range(1, total_chunks, 2):
                 if cancel_event.is_set():
@@ -547,9 +760,17 @@ def _parallel_odd_even_stream_generator(
                             helper_data = _helper_request_chunk(
                                 helper_base_url=helper_base_url,
                                 payload=payload,
-                                timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
                             )
-                            _publish(idx, helper_data)
                             last_err = None
                             break
                         except Exception as helper_err:
@@ -561,22 +782,31 @@ def _parallel_odd_even_stream_generator(
                     helper_available = False
                     logger.warning(
-                        f"[{stream_id}] Helper failed at chunk {idx}; "
-                        "falling back to local synthesis for remaining even chunks"
                     )
-                # Local fallback for even chunks
                 data = _synth_local(chunks[idx])
-                _publish(idx, data)
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
-    odd_thread = threading.Thread(target=_odd_worker, daemon=True)
-    even_thread = threading.Thread(target=_even_worker, daemon=True)
-    odd_thread.start()
-    even_thread.start()
     next_idx = 0
     try:
@@ -586,7 +816,7 @@ def _parallel_odd_even_stream_generator(
                     next_idx not in ready
                     and first_error is None
                     and not cancel_event.is_set()
-                    and workers_done < 2
                 ):
                     cond.wait(timeout=0.1)
@@ -594,11 +824,12 @@ def _parallel_odd_even_stream_generator(
                     break
                 if next_idx in ready:
-                    data = ready.pop(next_idx)
                 elif first_error is not None:
                     logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
                     break
-                elif workers_done >= 2:
                     logger.error(
                         f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
                     )
@@ -606,13 +837,39 @@ def _parallel_odd_even_stream_generator(
                 else:
                     continue
-            yield data
             next_idx += 1
     finally:
         cancel_event.set()
-        _helper_cancel_stream(helper_base_url, stream_id)
-        odd_thread.join(timeout=1.0)
-        even_thread.join(timeout=1.0)
         _active_streams.pop(stream_id, None)
@@ -623,6 +880,7 @@ def _parallel_odd_even_stream_generator(
 async def stream_text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
 ):
@@ -638,7 +896,7 @@ async def stream_text_to_speech(
     if not text or not text.strip():
         raise HTTPException(400, "Text is required")
-    voice = await _resolve_voice(voice_ref, wrapper)
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
@@ -660,11 +918,12 @@ async def stream_text_to_speech(
 async def parallel_stream_text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
     helper_url: Optional[str] = Form(None),
 ):
-    """Additive odd/even split stream mode (primary + helper)."""
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
@@ -689,17 +948,32 @@ async def parallel_stream_text_to_speech(
         except Exception as e:
             logger.error(f"Parallel voice encoding failed: {e}")
             raise HTTPException(400, "Could not process voice file for parallel mode")
     resolved_helper = (helper_url or Config.HELPER_BASE_URL).strip()
     if not resolved_helper:
         raise HTTPException(
             400,
-            "Helper URL not configured. Set CB_HELPER_BASE_URL or pass helper_url.",
         )
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
-        _parallel_odd_even_stream_generator(
             wrapper=wrapper,
             text=text,
             local_voice=local_voice,
@@ -714,7 +988,7 @@ async def parallel_stream_text_to_speech(
             "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
             "Transfer-Encoding": "chunked",
             "X-Stream-Id": stream_id,
-            "X-Streaming-Type": "parallel-odd-even",
             "Cache-Control": "no-cache",
         },
     )
@@ -777,8 +1051,13 @@ async def internal_voice_register(http_request: Request):
     stream_id = (http_request.query_params.get("stream_id") or "").strip()
     if stream_id:
         with _internal_cancel_lock:
-            keys = _internal_stream_voice_keys.setdefault(stream_id, set())
             keys.add(voice_key)
     return {"status": "registered", "voice_key": voice_key}
@@ -795,8 +1074,10 @@ async def internal_chunk_synthesize(
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
         if request.stream_id in _internal_cancelled_streams:
             raise HTTPException(409, "Stream already cancelled")
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
@@ -805,6 +1086,9 @@ async def internal_chunk_synthesize(
     voice_profile = wrapper.default_voice
     if request.voice_key:
         cached_voice = wrapper._voice_cache.get(request.voice_key)
         if cached_voice is None:
             raise HTTPException(409, "Voice key expired or not found")
         voice_profile = cached_voice
@@ -845,26 +1129,48 @@ async def internal_chunk_cancel(stream_id: str, http_request: Request):
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
-        _internal_cancelled_streams.add(stream_id)
         _internal_stream_voice_keys.pop(stream_id, None)
     return {"status": "cancelled", "stream_id": stream_id}
 @app.post("/v1/audio/speech")
 async def openai_compatible_tts(request: TTSJsonRequest):
     """OpenAI-compatible streaming endpoint (JSON body, no file upload).
-    Uses the default voice. For voice cloning, use /tts/stream with FormData.
     """
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
         _pipeline_stream_generator(
-            wrapper, request.text, wrapper.default_voice,
             request.max_new_tokens, request.repetition_penalty, stream_id,
         ),
         media_type="audio/mpeg",
@@ -889,6 +1195,10 @@ async def stop_stream(stream_id: str):
     event = _active_streams.get(stream_id)
     if event:
         event.set()
         logger.info(f"Stream {stream_id} cancelled by client")
         return {"status": "stopped", "stream_id": stream_id}
     return {"status": "not_found", "stream_id": stream_id}
@@ -897,9 +1207,16 @@ async def stop_stream(stream_id: str):
 @app.post("/tts/stop")
 async def stop_all_streams():
     """Emergency stop: cancel ALL active TTS streams."""
-    count = len(_active_streams)
-    for sid, event in list(_active_streams.items()):
         event.set()
     _active_streams.clear()
     logger.info(f"Stopped all streams ({count} active)")
     return {"status": "stopped_all", "count": count}

 import asyncio
+import http.client
 import io
 import json
 import logging
 import queue as stdlib_queue
 import threading
 import time
 import urllib.parse
 import uuid
 from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from typing import Any, Generator, Optional
 import numpy as np
 import soundfile as sf
 async def _resolve_voice(
     voice_ref: Optional[UploadFile],
+    voice_name: Optional[str],
     wrapper: ChatterboxWrapper,
 ) -> VoiceProfile:
+    """Return a VoiceProfile from uploaded audio or built-in voice selection."""
     if voice_ref is None or voice_ref.filename == "":
+        try:
+            return wrapper.get_builtin_voice(voice_name)
+        except ValueError as e:
+            raise HTTPException(status_code=400, detail=str(e))
     audio_bytes = await voice_ref.read()
     if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
     return data
+@dataclass(frozen=True)
+class _ChunkPacket:
+    index: int
+    data: bytes
+    lane: str
+    produced_at: float
+def _internal_headers(
+    *,
+    content_type: Optional[str] = "application/json",
+    accept: str = "audio/mpeg",
+) -> dict[str, str]:
+    headers: dict[str, str] = {"Accept": accept, "Connection": "keep-alive"}
+    if content_type:
+        headers["Content-Type"] = content_type
     if Config.INTERNAL_SHARED_SECRET:
         headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
     return headers
+class _HelperHttpClient:
+    """Small persistent HTTP client for helper server keep-alive calls."""
+    def __init__(self, base_url: str, default_timeout: float):
+        parsed = urllib.parse.urlparse((base_url or "").strip())
+        if parsed.scheme not in {"http", "https"} or not parsed.hostname:
+            raise ValueError(f"Invalid helper URL: {base_url!r}")
+        self._scheme = parsed.scheme
+        self._host = parsed.hostname
+        self._port = parsed.port
+        self._base_path = (parsed.path or "").rstrip("/")
+        self._default_timeout = max(1.0, float(default_timeout))
+        self._conn: Optional[http.client.HTTPConnection] = None
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc, tb):
+        self.close()
+    def close(self):
+        if self._conn is not None:
+            try:
+                self._conn.close()
+            except Exception:
+                pass
+            self._conn = None
+    def _target(self, path: str, query: Optional[str] = None) -> str:
+        normalized = path if path.startswith("/") else f"/{path}"
+        target = f"{self._base_path}{normalized}"
+        if query:
+            target = f"{target}?{query}"
+        return target
+    def _make_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
+        if self._scheme == "https":
+            return http.client.HTTPSConnection(self._host, self._port, timeout=timeout_sec)
+        return http.client.HTTPConnection(self._host, self._port, timeout=timeout_sec)
+    def _ensure_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
+        if self._conn is None:
+            self._conn = self._make_connection(timeout_sec)
+        else:
+            self._conn.timeout = timeout_sec
+        return self._conn
+    def _request(
+        self,
+        method: str,
+        path: str,
+        *,
+        body: Optional[bytes] = None,
+        headers: Optional[dict[str, str]] = None,
+        timeout_sec: Optional[float] = None,
+        query: Optional[str] = None,
+    ) -> tuple[int, bytes, dict[str, str]]:
+        timeout = max(1.0, float(timeout_sec or self._default_timeout))
+        target = self._target(path, query=query)
+        req_headers = headers or {}
+        conn = self._ensure_connection(timeout)
+        try:
+            conn.request(method=method, url=target, body=body, headers=req_headers)
+            resp = conn.getresponse()
+            payload = resp.read()
+            resp_headers = {k.lower(): v for k, v in resp.getheaders()}
+        except Exception:
+            # Force reconnect on next attempt if socket is stale/reset.
+            self.close()
+            raise
+        if resp.status >= 400:
+            snippet = payload[:256].decode("utf-8", errors="replace")
+            raise RuntimeError(
+                f"helper {method} {target} returned {resp.status}: {snippet}"
+            )
+        return resp.status, payload, resp_headers
+    def request_chunk(self, payload: dict[str, Any], timeout_sec: float) -> bytes:
+        _, data, _ = self._request(
+            "POST",
+            "/internal/chunk/synthesize",
+            body=json.dumps(payload).encode("utf-8"),
+            headers=_internal_headers(content_type="application/json", accept="audio/mpeg"),
+            timeout_sec=timeout_sec,
+        )
+        return data
+    def register_voice(self, stream_id: str, audio_bytes: bytes, timeout_sec: float) -> str:
+        query = urllib.parse.urlencode({"stream_id": stream_id})
+        _, data, _ = self._request(
+            "POST",
+            "/internal/voice/register",
+            query=query,
+            body=audio_bytes,
+            headers=_internal_headers(
+                content_type="application/octet-stream",
+                accept="application/json",
+            ),
+            timeout_sec=timeout_sec,
+        )
+        payload = json.loads(data.decode("utf-8"))
+        voice_key = (payload.get("voice_key") or "").strip()
+        if not voice_key:
+            raise RuntimeError("helper voice registration returned no voice_key")
+        return voice_key
+    def cancel_stream(self, stream_id: str, timeout_sec: float = 3.0):
+        self._request(
+            "POST",
+            f"/internal/chunk/cancel/{stream_id}",
+            body=b"",
+            headers=_internal_headers(),
+            timeout_sec=timeout_sec,
+        )
+    def complete_stream(self, stream_id: str, timeout_sec: float = 3.0):
+        self._request(
+            "POST",
+            f"/internal/chunk/complete/{stream_id}",
+            body=b"",
+            headers=_internal_headers(),
+            timeout_sec=timeout_sec,
+        )
 def _helper_request_chunk(
     helper_base_url: str,
     payload: dict,
     timeout_sec: float,
+    helper_client: Optional[_HelperHttpClient] = None,
 ) -> bytes:
+    if helper_client is not None:
+        return helper_client.request_chunk(payload, timeout_sec=timeout_sec)
+    with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
+        return helper_client_single.request_chunk(payload, timeout_sec=timeout_sec)
 def _helper_register_voice(
     stream_id: str,
     audio_bytes: bytes,
     timeout_sec: float,
+    helper_client: Optional[_HelperHttpClient] = None,
 ) -> str:
     """Register reference voice on helper once, return voice_key for chunk calls."""
+    if helper_client is not None:
+        return helper_client.register_voice(
+            stream_id=stream_id,
+            audio_bytes=audio_bytes,
+            timeout_sec=timeout_sec,
+        )
+    with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
+        return helper_client_single.register_voice(
+            stream_id=stream_id,
+            audio_bytes=audio_bytes,
+            timeout_sec=timeout_sec,
+        )
 def _helper_cancel_stream(helper_base_url: str, stream_id: str):
     """Best-effort cancellation signal to helper."""
     try:
+        with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
+            helper_client.cancel_stream(stream_id=stream_id, timeout_sec=3.0)
     except Exception:
         pass
+def _helper_complete_stream(helper_base_url: str, stream_id: str):
+    """Best-effort stream completion cleanup on helper.
+    Falls back to cancel for backwards compatibility if helper does not expose
+    the completion endpoint yet.
+    """
+    try:
+        with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
+            helper_client.complete_stream(stream_id=stream_id, timeout_sec=3.0)
+    except Exception:
+        _helper_cancel_stream(helper_base_url, stream_id)
 # ═══════════════════════════════════════════════════════════════════
 # Endpoints
 # ═══════════════════════════════════════════════════════════════════
 @app.get("/health")
 async def health(warm_up: bool = False):
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
+    with _internal_cancel_lock:
+        _purge_internal_stream_state_locked()
+        cancelled_count = len(_internal_cancelled_streams)
+        voice_state_count = len(_internal_stream_voice_keys)
     status = {
         "status": "healthy" if wrapper else "loading",
         "model_loaded": wrapper is not None,
         "model_dtype": Config.MODEL_DTYPE,
         "streaming_supported": True,
         "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
+        "internal_cancelled_streams": cancelled_count,
+        "internal_stream_voice_states": voice_state_count,
     }
     if warm_up and wrapper:
         try:
     return status
+@app.get("/voices")
+async def list_voices():
+    wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
+    if not wrapper:
+        raise HTTPException(503, "Model not loaded")
+    voices = wrapper.list_builtin_voices()
     return {
+        "count": len(voices),
+        "default_voice": wrapper.default_voice_name,
+        "voices": voices,
+        "usage": {
+            "form_field": "voice_name",
+            "json_field": "voice",
+            "note": "If voice_ref is uploaded, it overrides voice_name.",
         },
     }
 async def text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
+    voice_name: str = Form("default"),
     output_format: str = Form("wav"),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
     if not text or not text.strip():
         raise HTTPException(400, "Text is required")
+    voice = await _resolve_voice(voice_ref, voice_name, wrapper)
     loop = asyncio.get_running_loop()
     try:
 # ═══════════════════════════════════════════════════════════════════
 _active_streams: dict[str, threading.Event] = {}
+# stream_id -> expires_at epoch seconds
+_internal_cancelled_streams: dict[str, float] = {}
 _internal_cancel_lock = threading.Lock()
+# stream_id -> (voice_keys, expires_at)
+_internal_stream_voice_keys: dict[str, tuple[set[str], float]] = {}
+# stream_id -> helper base URLs (used to cancel helpers quickly on /tts/stop)
+_stream_helper_routes: dict[str, set[str]] = {}
+_stream_routes_lock = threading.Lock()
+def _purge_internal_stream_state_locked(now: Optional[float] = None):
+    now_ts = now if now is not None else time.time()
+    expired_cancel_ids = [
+        sid for sid, expires_at in _internal_cancelled_streams.items()
+        if expires_at <= now_ts
+    ]
+    for sid in expired_cancel_ids:
+        _internal_cancelled_streams.pop(sid, None)
+    expired_voice_state_ids = [
+        sid for sid, (_, expires_at) in _internal_stream_voice_keys.items()
+        if expires_at <= now_ts
+    ]
+    for sid in expired_voice_state_ids:
+        _internal_stream_voice_keys.pop(sid, None)
+def _touch_internal_stream_voice_keys_locked(stream_id: str):
+    if not stream_id:
+        return
+    entry = _internal_stream_voice_keys.get(stream_id)
+    if entry is None:
+        return
+    keys, _ = entry
+    _internal_stream_voice_keys[stream_id] = (
+        keys,
+        time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
+    )
+def _clear_internal_stream_state_locked(stream_id: str):
+    _internal_cancelled_streams.pop(stream_id, None)
+    _internal_stream_voice_keys.pop(stream_id, None)
 # ═══════════════════════════════════════════════════════════════════
         _active_streams.pop(stream_id, None)
+def _parallel_two_way_stream_generator(
     wrapper: ChatterboxWrapper,
     text: str,
     local_voice: VoiceProfile,
     stream_id: str,
     helper_base_url: str,
 ) -> Generator[bytes, None, None]:
+    """Additive 2-way split streamer (primary + helper).
+    Routing pattern:
+      - chunk 0,2,4...  -> primary (local)
+      - chunk 1,3,5...  -> helper
+    """
     cancel_event = threading.Event()
     _active_streams[stream_id] = cancel_event
+    helper_base_url = (helper_base_url or "").strip()
+    helper_route_set = {helper_base_url} if helper_base_url else set()
+    if helper_route_set:
+        with _stream_routes_lock:
+            _stream_helper_routes[stream_id] = set(helper_route_set)
     clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
     chunks = text_processor.split_for_streaming(clean_text)
     total_chunks = len(chunks)
     if total_chunks == 0:
+        with _stream_routes_lock:
+            _stream_helper_routes.pop(stream_id, None)
         _active_streams.pop(stream_id, None)
         return
     lock = threading.Lock()
     cond = threading.Condition(lock)
+    ready: dict[int, _ChunkPacket] = {}
     first_error: Optional[Exception] = None
     workers_done = 0
+    expected_workers = 2
+    stream_completed = False
+    def _publish(packet: _ChunkPacket):
         with cond:
+            # First write wins for an index to avoid duplicate retry races.
+            if packet.index not in ready:
+                ready[packet.index] = packet
             cond.notify_all()
     def _set_error(err: Exception):
         )
         return _encode_mp3_chunk(audio)
+    def _local_worker():
         try:
             for idx in range(0, total_chunks, 2):
                 if cancel_event.is_set():
                     break
                 data = _synth_local(chunks[idx])
+                _publish(
+                    _ChunkPacket(
+                        index=idx,
+                        data=data,
+                        lane="primary",
+                        produced_at=time.perf_counter(),
+                    )
+                )
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
+    def _helper_worker():
+        helper_available = bool(helper_base_url)
         helper_voice_key: Optional[str] = None
+        helper_timeout = max(1.0, Config.HELPER_TIMEOUT_SEC)
+        helper_client: Optional[_HelperHttpClient] = None
         try:
+            if helper_available:
+                try:
+                    helper_client = _HelperHttpClient(
+                        helper_base_url,
+                        default_timeout=helper_timeout,
+                    )
+                except Exception as conn_err:
+                    helper_available = False
+                    logger.warning(
+                        f"[{stream_id}] helper keep-alive init failed ({conn_err}); "
+                        "using local fallback for helper lane"
+                    )
+            if helper_available and helper_voice_bytes:
                 attempts = 2 if Config.HELPER_RETRY_ONCE else 1
                 last_err: Optional[Exception] = None
                 for _ in range(attempts):
                             helper_base_url=helper_base_url,
                             stream_id=stream_id,
                             audio_bytes=helper_voice_bytes,
+                            timeout_sec=helper_timeout,
+                            helper_client=helper_client,
                         )
                         last_err = None
                         break
                     except Exception as reg_err:
                         last_err = reg_err
                         continue
                 if last_err is not None:
                     helper_available = False
                     logger.warning(
+                        f"[{stream_id}] helper voice registration failed; "
+                        "falling back to local synthesis for helper lane"
                     )
+            elif not helper_available:
+                logger.info(
+                    f"[{stream_id}] helper URL not configured; using local fallback"
+                )
             for idx in range(1, total_chunks, 2):
                 if cancel_event.is_set():
                             helper_data = _helper_request_chunk(
                                 helper_base_url=helper_base_url,
                                 payload=payload,
+                                timeout_sec=helper_timeout,
+                                helper_client=helper_client,
+                            )
+                            _publish(
+                                _ChunkPacket(
+                                    index=idx,
+                                    data=helper_data,
+                                    lane="helper",
+                                    produced_at=time.perf_counter(),
+                                )
                             )
                             last_err = None
                             break
                         except Exception as helper_err:
                     helper_available = False
                     logger.warning(
+                        f"[{stream_id}] helper failed at chunk {idx}; "
+                        "falling back to local synthesis for remaining helper chunks"
                     )
+                # Local fallback for helper lane
                 data = _synth_local(chunks[idx])
+                _publish(
+                    _ChunkPacket(
+                        index=idx,
+                        data=data,
+                        lane="helper-local-fallback",
+                        produced_at=time.perf_counter(),
+                    )
+                )
         except Exception as e:
             _set_error(e)
         finally:
+            if helper_client is not None:
+                helper_client.close()
             _worker_done()
+    local_thread = threading.Thread(target=_local_worker, daemon=True)
+    helper_thread = threading.Thread(target=_helper_worker, daemon=True)
+    local_thread.start()
+    helper_thread.start()
     next_idx = 0
     try:
                     next_idx not in ready
                     and first_error is None
                     and not cancel_event.is_set()
+                    and workers_done < expected_workers
                 ):
                     cond.wait(timeout=0.1)
                     break
                 if next_idx in ready:
+                    packet = ready.pop(next_idx)
+                    buffered_chunks = len(ready)
                 elif first_error is not None:
                     logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
                     break
+                elif workers_done >= expected_workers:
                     logger.error(
                         f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
                     )
                 else:
                     continue
+            logger.debug(
+                "[%s] stitch emit chunk %s/%s from %s (buffered=%s)",
+                stream_id,
+                next_idx + 1,
+                total_chunks,
+                packet.lane,
+                buffered_chunks,
+            )
+            yield packet.data
             next_idx += 1
+        stream_completed = (
+            next_idx >= total_chunks
+            and first_error is None
+            and not cancel_event.is_set()
+        )
     finally:
         cancel_event.set()
+        # For fast stop/cancel, signal helpers first; for normal completion, wait for
+        # workers to flush and then ask helpers to clear stream state.
+        if not stream_completed:
+            for base_url in helper_route_set:
+                _helper_cancel_stream(base_url, stream_id)
+        local_thread.join(timeout=1.0)
+        helper_thread.join(timeout=1.0)
+        if stream_completed:
+            for base_url in helper_route_set:
+                _helper_complete_stream(base_url, stream_id)
+        with _stream_routes_lock:
+            _stream_helper_routes.pop(stream_id, None)
         _active_streams.pop(stream_id, None)
 async def stream_text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
+    voice_name: str = Form("default"),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
 ):
     if not text or not text.strip():
         raise HTTPException(400, "Text is required")
+    voice = await _resolve_voice(voice_ref, voice_name, wrapper)
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
 async def parallel_stream_text_to_speech(
     text: str = Form(...),
     voice_ref: Optional[UploadFile] = File(None),
+    voice_name: str = Form("default"),
     max_new_tokens: int = Form(Config.MAX_NEW_TOKENS),
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
     helper_url: Optional[str] = Form(None),
 ):
+    """Additive 2-way split stream mode (primary + helper)."""
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
         except Exception as e:
             logger.error(f"Parallel voice encoding failed: {e}")
             raise HTTPException(400, "Could not process voice file for parallel mode")
+    else:
+        try:
+            selected_voice_id = wrapper.resolve_voice_id(voice_name)
+            local_voice = wrapper.get_builtin_voice(selected_voice_id)
+        except ValueError as e:
+            raise HTTPException(status_code=400, detail=str(e))
+        # Ensure helper uses the same selected built-in voice.
+        if selected_voice_id != wrapper.default_voice_name:
+            helper_voice_bytes = wrapper.get_builtin_voice_bytes(selected_voice_id)
+            if not helper_voice_bytes:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Selected voice '{voice_name}' is unavailable for helper registration",
+                )
     resolved_helper = (helper_url or Config.HELPER_BASE_URL).strip()
     if not resolved_helper:
         raise HTTPException(
             400,
+            "No helper configured. Set CB_HELPER_BASE_URL or pass helper_url.",
         )
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
+        _parallel_two_way_stream_generator(
             wrapper=wrapper,
             text=text,
             local_voice=local_voice,
             "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
             "Transfer-Encoding": "chunked",
             "X-Stream-Id": stream_id,
+            "X-Streaming-Type": "parallel-2way",
             "Cache-Control": "no-cache",
         },
     )
     stream_id = (http_request.query_params.get("stream_id") or "").strip()
     if stream_id:
         with _internal_cancel_lock:
+            _purge_internal_stream_state_locked()
+            keys, _ = _internal_stream_voice_keys.get(stream_id, (set(), 0.0))
             keys.add(voice_key)
+            _internal_stream_voice_keys[stream_id] = (
+                keys,
+                time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
+            )
     return {"status": "registered", "voice_key": voice_key}
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
+        _purge_internal_stream_state_locked()
         if request.stream_id in _internal_cancelled_streams:
             raise HTTPException(409, "Stream already cancelled")
+        _touch_internal_stream_voice_keys_locked(request.stream_id)
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
     voice_profile = wrapper.default_voice
     if request.voice_key:
         cached_voice = wrapper._voice_cache.get(request.voice_key)
+        if cached_voice is None:
+            # Built-in voices are permanent in wrapper registry even if TTL cache entry expired.
+            cached_voice = wrapper.get_builtin_voice_by_hash(request.voice_key)
         if cached_voice is None:
             raise HTTPException(409, "Voice key expired or not found")
         voice_profile = cached_voice
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
+        _purge_internal_stream_state_locked()
+        _internal_cancelled_streams[stream_id] = (
+            time.time() + max(1, Config.INTERNAL_CANCEL_TTL_SEC)
+        )
         _internal_stream_voice_keys.pop(stream_id, None)
     return {"status": "cancelled", "stream_id": stream_id}
+@app.post("/internal/chunk/complete/{stream_id}")
+async def internal_chunk_complete(stream_id: str, http_request: Request):
+    """Best-effort immediate cleanup after stream completes normally."""
+    if Config.INTERNAL_SHARED_SECRET:
+        provided = http_request.headers.get("X-Internal-Secret", "")
+        if provided != Config.INTERNAL_SHARED_SECRET:
+            raise HTTPException(403, "Forbidden")
+    with _internal_cancel_lock:
+        _purge_internal_stream_state_locked()
+        _clear_internal_stream_state_locked(stream_id)
+    return {"status": "completed", "stream_id": stream_id}
 @app.post("/v1/audio/speech")
 async def openai_compatible_tts(request: TTSJsonRequest):
     """OpenAI-compatible streaming endpoint (JSON body, no file upload).
+    Uses built-in voice selection via `voice`. For voice cloning, use /tts/stream with FormData.
     """
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
+    try:
+        selected_voice = wrapper.get_builtin_voice(request.voice)
+    except ValueError as e:
+        raise HTTPException(400, str(e))
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
         _pipeline_stream_generator(
+            wrapper, request.text, selected_voice,
             request.max_new_tokens, request.repetition_penalty, stream_id,
         ),
         media_type="audio/mpeg",
     event = _active_streams.get(stream_id)
     if event:
         event.set()
+        with _stream_routes_lock:
+            helper_routes = set(_stream_helper_routes.pop(stream_id, set()))
+        for helper_url in helper_routes:
+            _helper_cancel_stream(helper_url, stream_id)
         logger.info(f"Stream {stream_id} cancelled by client")
         return {"status": "stopped", "stream_id": stream_id}
     return {"status": "not_found", "stream_id": stream_id}
 @app.post("/tts/stop")
 async def stop_all_streams():
     """Emergency stop: cancel ALL active TTS streams."""
+    active_items = list(_active_streams.items())
+    count = len(active_items)
+    with _stream_routes_lock:
+        stream_routes = {sid: set(urls) for sid, urls in _stream_helper_routes.items()}
+        _stream_helper_routes.clear()
+    for sid, event in active_items:
         event.set()
+        for helper_url in stream_routes.get(sid, set()):
+            _helper_cancel_stream(helper_url, sid)
     _active_streams.clear()
     logger.info(f"Stopped all streams ({count} active)")
     return {"status": "stopped_all", "count": count}

chatterbox_wrapper.py CHANGED Viewed

@@ -27,6 +27,7 @@ import tempfile
 import time
 from collections import OrderedDict
 from dataclasses import dataclass
 from typing import Callable, Generator, Optional
 import librosa
@@ -48,6 +49,21 @@ _SUPPORTED_AUDIO_EXTENSIONS = {
 }
 # ═══════════════════════════════════════════════════════════════════
 # Data Structures
 # ═══════════════════════════════════════════════════════════════════
@@ -203,8 +219,15 @@ class ChatterboxWrapper:
             ttl_seconds=self.cfg.VOICE_CACHE_TTL_SEC,
         )
-        logger.info("Encoding default reference voice …")
-        self.default_voice = self._load_default_voice()
         logger.info("✅ ChatterboxWrapper ready")
@@ -260,16 +283,190 @@ class ChatterboxWrapper:
         opts.enable_mem_reuse = True
         return opts
-    # ─── Default voice ────────────────────────────────────────────
-    def _load_default_voice(self) -> VoiceProfile:
         path = hf_hub_download(
             self.cfg.DEFAULT_VOICE_REPO,
             filename=self.cfg.DEFAULT_VOICE_FILE,
             cache_dir=self.cfg.MODELS_DIR,
         )
-        audio, _ = librosa.load(path, sr=self.cfg.SAMPLE_RATE)
-        return self._encode_audio_array(audio, audio_hash="__default__")
     # ─── Voice encoding ──────────────────────────────────────────

 import time
 from collections import OrderedDict
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Callable, Generator, Optional
 import librosa
 }
+def _slugify(text: str) -> str:
+    buf = []
+    prev_underscore = False
+    for ch in text.strip().lower():
+        if ch.isalnum():
+            buf.append(ch)
+            prev_underscore = False
+        else:
+            if not prev_underscore:
+                buf.append("_")
+                prev_underscore = True
+    slug = "".join(buf).strip("_")
+    return slug or "voice"
 # ═══════════════════════════════════════════════════════════════════
 # Data Structures
 # ═══════════════════════════════════════════════════════════════════
             ttl_seconds=self.cfg.VOICE_CACHE_TTL_SEC,
         )
+        self._builtin_voice_profiles: dict[str, VoiceProfile] = {}
+        self._builtin_voice_bytes: dict[str, bytes] = {}
+        self._builtin_voice_by_hash: dict[str, VoiceProfile] = {}
+        self._voice_alias_to_id: dict[str, str] = {}
+        self._builtin_voice_catalog: list[dict] = []
+        self._default_voice_id: str = "default"
+        logger.info("Loading built-in voices (HF default + local samples) …")
+        self.default_voice = self._load_builtin_voices()
         logger.info("✅ ChatterboxWrapper ready")
         opts.enable_mem_reuse = True
         return opts
+    # ─── Built-in voices (HF default + local samples) ────────────
+    def _download_hf_default_voice_bytes(self) -> bytes:
         path = hf_hub_download(
             self.cfg.DEFAULT_VOICE_REPO,
             filename=self.cfg.DEFAULT_VOICE_FILE,
             cache_dir=self.cfg.MODELS_DIR,
         )
+        return Path(path).read_bytes()
+    def _list_local_voice_paths(self) -> list[Path]:
+        wrapper_dir = Path(__file__).resolve().parent
+        # Support both module-level and repo-root deployment layouts.
+        candidates = []
+        for d in (wrapper_dir, Path.cwd().resolve(), wrapper_dir.parent):
+            try:
+                resolved = d.resolve()
+            except Exception:
+                continue
+            if resolved.is_dir() and resolved not in candidates:
+                candidates.append(resolved)
+        voices: list[Path] = []
+        seen_real_paths: set[str] = set()
+        for root in candidates:
+            try:
+                entries = sorted(root.iterdir(), key=lambda x: x.name.lower())
+            except Exception:
+                continue
+            for p in entries:
+                if not p.is_file():
+                    continue
+                if p.suffix.lower() not in _SUPPORTED_AUDIO_EXTENSIONS:
+                    continue
+                real_path = str(p.resolve())
+                if real_path in seen_real_paths:
+                    continue
+                seen_real_paths.add(real_path)
+                voices.append(p)
+        logger.info(
+            "Local voice scan complete: %s files across %s",
+            len(voices),
+            [str(x) for x in candidates],
+        )
+        return voices
+    def _make_unique_voice_id(self, preferred: str) -> str:
+        base = _slugify(preferred)
+        candidate = base
+        idx = 2
+        while candidate in self._builtin_voice_profiles:
+            candidate = f"{base}_{idx}"
+            idx += 1
+        return candidate
+    def _register_builtin_voice(
+        self,
+        *,
+        preferred_id: str,
+        display_name: str,
+        source: str,
+        source_ref: str,
+        audio_bytes: bytes,
+        is_default: bool = False,
+    ) -> str:
+        if not audio_bytes:
+            raise ValueError("Voice file is empty")
+        voice_id = self._make_unique_voice_id(preferred_id)
+        audio_hash = hashlib.md5(audio_bytes).hexdigest()
+        profile = self._voice_cache.get(audio_hash)
+        if profile is None:
+            audio = _load_audio_bytes(audio_bytes, sr=self.cfg.SAMPLE_RATE)
+            profile = self._encode_audio_array(audio, audio_hash=audio_hash)
+            self._voice_cache.put(audio_hash, profile)
+        else:
+            # Keep hash attached to cached profile for metadata/voice-key usage.
+            profile.audio_hash = audio_hash
+        self._builtin_voice_profiles[voice_id] = profile
+        self._builtin_voice_bytes[voice_id] = audio_bytes
+        if audio_hash:
+            self._builtin_voice_by_hash[audio_hash] = profile
+        aliases: list[str] = []
+        for alias in (voice_id, _slugify(Path(display_name).stem)):
+            if alias not in self._voice_alias_to_id:
+                self._voice_alias_to_id[alias] = voice_id
+                aliases.append(alias)
+        if is_default:
+            self._default_voice_id = voice_id
+            self._voice_alias_to_id["default"] = voice_id
+            if "default" not in aliases:
+                aliases.append("default")
+        self._builtin_voice_catalog.append(
+            {
+                "id": voice_id,
+                "display_name": display_name,
+                "source": source,
+                "source_ref": source_ref,
+                "aliases": aliases,
+                "voice_key": audio_hash,
+            }
+        )
+        return voice_id
+    def _load_builtin_voices(self) -> VoiceProfile:
+        # 1) HF default voice (kept as true default fallback)
+        hf_bytes = self._download_hf_default_voice_bytes()
+        self._register_builtin_voice(
+            preferred_id="default_hf_voice",
+            display_name=self.cfg.DEFAULT_VOICE_FILE,
+            source="huggingface",
+            source_ref=f"{self.cfg.DEFAULT_VOICE_REPO}:{self.cfg.DEFAULT_VOICE_FILE}",
+            audio_bytes=hf_bytes,
+            is_default=True,
+        )
+        # 2) Local voice samples placed next to app files
+        for path in self._list_local_voice_paths():
+            # Avoid duplicate entry if someone also copied default_voice.wav locally.
+            if path.name == self.cfg.DEFAULT_VOICE_FILE:
+                continue
+            try:
+                self._register_builtin_voice(
+                    preferred_id=path.stem,
+                    display_name=path.name,
+                    source="local",
+                    source_ref=str(path.name),
+                    audio_bytes=path.read_bytes(),
+                    is_default=False,
+                )
+            except Exception as e:
+                logger.warning(f"Skipping local voice {path.name}: {e}")
+        default_profile = self._builtin_voice_profiles.get(self._default_voice_id)
+        if default_profile is None:
+            raise RuntimeError("Default built-in voice could not be initialized")
+        logger.info(
+            f"Built-in voices loaded: {len(self._builtin_voice_catalog)} "
+            f"(default={self._default_voice_id})"
+        )
+        return default_profile
+    def list_builtin_voices(self) -> list[dict]:
+        """Return metadata for startup-preloaded voices."""
+        return [dict(v) for v in self._builtin_voice_catalog]
+    @property
+    def default_voice_name(self) -> str:
+        return self._default_voice_id
+    def resolve_voice_id(self, voice_name: Optional[str]) -> str:
+        if voice_name is None:
+            return self._default_voice_id
+        key = _slugify(str(voice_name))
+        if not key:
+            return self._default_voice_id
+        voice_id = self._voice_alias_to_id.get(key)
+        if voice_id is None:
+            available = ", ".join(sorted(self._voice_alias_to_id.keys()))
+            raise ValueError(f"Unknown voice '{voice_name}'. Available: {available}")
+        return voice_id
+    def get_builtin_voice(self, voice_name: Optional[str]) -> VoiceProfile:
+        voice_id = self.resolve_voice_id(voice_name)
+        profile = self._builtin_voice_profiles[voice_id]
+        if profile.audio_hash:
+            self._voice_cache.put(profile.audio_hash, profile)
+        return profile
+    def get_builtin_voice_bytes(self, voice_name: Optional[str]) -> Optional[bytes]:
+        voice_id = self.resolve_voice_id(voice_name)
+        return self._builtin_voice_bytes.get(voice_id)
+    def get_builtin_voice_by_hash(self, audio_hash: str) -> Optional[VoiceProfile]:
+        return self._builtin_voice_by_hash.get((audio_hash or "").strip())
     # ─── Voice encoding ──────────────────────────────────────────

config.py CHANGED Viewed

@@ -77,11 +77,14 @@ class Config:
     # Smaller chunks = faster TTFB (first audio arrives sooner)
     # ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU
     MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
-    # Additive parallel mode (odd/even split across primary/helper).
     ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
-    HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-hello2.hf.space").strip()
     HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
     HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
     # Optional shared secret for internal chunk endpoints.
     INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
@@ -91,10 +94,13 @@ class Config:
     ALLOWED_ORIGINS: list = [
         "https://toolboxesai.com",
         "http://localhost:8788",  "http://127.0.0.1:8788",
         "http://localhost:5502",  "http://127.0.0.1:5502",
         "http://localhost:5501",  "http://127.0.0.1:5501",
         "http://localhost:5500",  "http://127.0.0.1:5500",
         "http://localhost:5173",  "http://127.0.0.1:5173",
         "http://localhost:7860",  "http://127.0.0.1:7860",
-    ]

     # Smaller chunks = faster TTFB (first audio arrives sooner)
     # ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU
     MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
+    # Additive parallel mode (2-way split: primary + helper).
     ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
+    HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
     HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
     HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
+    # Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
+    INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
+    INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
     # Optional shared secret for internal chunk endpoints.
     INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
     ALLOWED_ORIGINS: list = [
         "https://toolboxesai.com",
+        "https://www.toolboxesai.com",
+        "www.toolboxesai.com",
+        "toolboxesai.com",
         "http://localhost:8788",  "http://127.0.0.1:8788",
         "http://localhost:5502",  "http://127.0.0.1:5502",
         "http://localhost:5501",  "http://127.0.0.1:5501",
         "http://localhost:5500",  "http://127.0.0.1:5500",
         "http://localhost:5173",  "http://127.0.0.1:5173",
         "http://localhost:7860",  "http://127.0.0.1:7860",
+    ]

her_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8eaabbeafe26ad6f78b56dcc32608763eeb69485db074c7136c6818f04a93ced
+size 725328

ivr_female_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64953bf94657c4334532319fd4f20e9859c31af4445940916b04f129ef1f89e6
+size 2779278

text_processor.py CHANGED Viewed

@@ -4,6 +4,26 @@ Chatterbox Turbo TTS — Text Processor
 Sanitizes raw input text and splits it into sentence-level chunks
 for streaming TTS.  Paralinguistic tags ([laugh], [cough], …) are
 explicitly preserved so the model can render them.
 """
 import re
 from typing import List
@@ -47,20 +67,64 @@ _RE_EMOJI        = re.compile(
     r"]+", re.UNICODE,
 )
 _RE_HTML_ENTITY  = re.compile(r"&(?:#x?[\da-fA-F]+|\w+);")
 _HTML_ENTITIES   = {
     "&amp;": " and ", "&lt;": " less than ", "&gt;": " greater than ",
     "&nbsp;": " ", "&quot;": '"', "&apos;": "'",
-    "&mdash;": ", ", "&ndash;": ", ", "&hellip;": ".",
 }
 # — Punctuation normalization
-_RE_REPEATED_DOT    = re.compile(r"\.{2,}")
-_RE_REPEATED_EXCLAM = re.compile(r"!{2,}")
-_RE_REPEATED_QUEST  = re.compile(r"\?{2,}")
-_RE_REPEATED_SEMI   = re.compile(r";{2,}")
-_RE_REPEATED_COLON  = re.compile(r":{2,}")
-_RE_REPEATED_COMMA  = re.compile(r",{2,}")
-_RE_REPEATED_DASH   = re.compile(r"-{3,}")
 # — Whitespace
 _RE_MULTI_SPACE      = re.compile(r"[ \t]+")
@@ -68,7 +132,14 @@ _RE_MULTI_NEWLINE    = re.compile(r"\n{3,}")
 _RE_SPACE_BEFORE_PUN = re.compile(r"\s+([.!?,;:])")
 # — Sentence boundary (split point)
-_RE_SENTENCE_SPLIT = re.compile(r"(?<=[.!?;:])\s+")
 _MIN_MERGE_WORDS = 5
@@ -78,11 +149,22 @@ _MIN_MERGE_WORDS = 5
 # ═══════════════════════════════════════════════════════════════════
 def sanitize(text: str) -> str:
-    """Clean raw input for TTS while preserving paralinguistic tags."""
     if not text:
         return text
-    # 1. Protect paralinguistic tags by replacing with placeholders
     tags_found: list[tuple[int, str]] = []
     def _protect_tag(m):
         idx = len(tags_found)
@@ -90,7 +172,16 @@ def sanitize(text: str) -> str:
         return f"§TAG{idx}§"
     text = _RE_PARA_TAG.sub(_protect_tag, text)
-    # 2. Strip non-speakable structures
     text = _RE_URL.sub("", text)
     text = _RE_CODE_BLOCK.sub("", text)
     text = _RE_IMAGE.sub(lambda m: m.group(1) if m.group(1) else "", text)
@@ -107,29 +198,36 @@ def sanitize(text: str) -> str:
     text = _RE_BULLET.sub("", text)
     text = _RE_ORDERED.sub("", text)
-    # 3. Emojis, hashtags
     text = _RE_EMOJI.sub("", text)
     text = re.sub(r"#(\w+)", r"\1", text)
-    # 4. HTML entities
     text = _RE_HTML_ENTITY.sub(lambda m: _HTML_ENTITIES.get(m.group(0), ""), text)
-    # 5. Collapse repeated punctuation
-    text = _RE_REPEATED_DOT.sub(".", text)
-    text = _RE_REPEATED_EXCLAM.sub("!", text)
-    text = _RE_REPEATED_QUEST.sub("?", text)
-    text = _RE_REPEATED_SEMI.sub(";", text)
-    text = _RE_REPEATED_COLON.sub(":", text)
-    text = _RE_REPEATED_COMMA.sub(",", text)
-    text = _RE_REPEATED_DASH.sub("—", text)
-    # 6. Whitespace
     text = _RE_SPACE_BEFORE_PUN.sub(r"\1", text)
     text = _RE_MULTI_SPACE.sub(" ", text)
     text = _RE_MULTI_NEWLINE.sub("\n\n", text)
     text = text.strip()
-    # 7. Restore paralinguistic tags
     for idx, original in tags_found:
         text = text.replace(f"§TAG{idx}§", original)
@@ -140,13 +238,25 @@ def split_for_streaming(text: str, max_chars: int = Config.MAX_CHUNK_CHARS) -> L
     """Split sanitized text into sentence-level chunks for streaming.
     Strategy:
-      1. Split on sentence-ending punctuation boundaries
-      2. Enforce max_chars per chunk (split long sentences on commas / spaces)
-      3. Merge short chunks (≤5 words) with the next to avoid tiny segments
     """
     if not text:
         return []
     # Step 1: sentence split
     raw_chunks = _RE_SENTENCE_SPLIT.split(text)
     raw_chunks = [c.strip() for c in raw_chunks if c.strip()]
@@ -161,23 +271,30 @@ def split_for_streaming(text: str, max_chars: int = Config.MAX_CHUNK_CHARS) -> L
     # Step 3: merge short chunks
     if len(sized) <= 1:
-        return sized
-    merged: List[str] = []
-    carry = ""
-    for i, chunk in enumerate(sized):
         if carry:
-            chunk = carry + " " + chunk
-            carry = ""
-        if len(chunk.split()) <= _MIN_MERGE_WORDS and i < len(sized) - 1:
-            carry = chunk
-        else:
-            merged.append(chunk)
-    if carry:
-        if merged:
-            merged[-1] += " " + carry
-        else:
-            merged.append(carry)
     return merged
@@ -191,16 +308,41 @@ def _break_long_chunk(text: str, max_chars: int) -> List[str]:
     parts: List[str] = []
     remaining = text
     while len(remaining) > max_chars:
-        # Try comma first
-        pos = remaining.rfind(",", 0, max_chars)
-        if pos == -1:
-            pos = remaining.rfind(" ", 0, max_chars)
-        if pos == -1:
-            pos = max_chars  # hard break
-        segment = remaining[:pos].strip()
         if segment:
             parts.append(segment)
-        remaining = remaining[pos:].lstrip(", ")
     if remaining.strip():
         parts.append(remaining.strip())
     return parts

 Sanitizes raw input text and splits it into sentence-level chunks
 for streaming TTS.  Paralinguistic tags ([laugh], [cough], …) are
 explicitly preserved so the model can render them.
+Punctuation Philosophy (based on Resemble AI recommendations):
+  ✅ PRESERVE (benefits prosody):
+     • Ellipsis ...    → dramatic pause, trailing thought, hesitation
+     • Em dash —       → abrupt transition, dramatic break
+     • Comma ,         → short natural pause / breathing point
+     • Period .        → full stop, pitch drop, sentence boundary
+     • ! and ?         → exclamatory / interrogative inflection
+     • Semicolon ;     → medium pause, clause bridge (NOT a split point)
+     • Colon :         → medium pause, introduces explanation (NOT a split point)
+     • Parentheses ()  → quieter/explanatory tone shift
+     • Quotes ""       → dialogue cue
+     • Apostrophe '    → contractions (don't, it's)
+     • CAPS words      → emphasis / volume increase
+  ❌ FILTER (harms output):
+     • Excessive repeated punctuation (!!!! → !, ???? → ?, ,,, → ,)
+     • 4+ dots (.... → ...)
+     • Emojis, URLs, markdown, HTML tags
+     • Non-standard Unicode punctuation (guillemets, etc.)
 """
 import re
 from typing import List
     r"]+", re.UNICODE,
 )
 _RE_HTML_ENTITY  = re.compile(r"&(?:#x?[\da-fA-F]+|\w+);")
+# HTML entities → speakable replacements
+# NOTE: &hellip; → "..." (preserves dramatic pause), &mdash;/&ndash; → "—" (preserves dramatic break)
 _HTML_ENTITIES   = {
     "&amp;": " and ", "&lt;": " less than ", "&gt;": " greater than ",
     "&nbsp;": " ", "&quot;": '"', "&apos;": "'",
+    "&mdash;": "—", "&ndash;": "—", "&hellip;": "...",
 }
+# — Smart/curly quote normalization → ASCII equivalents
+# These Unicode variants may confuse the tokenizer; normalizing ensures clean input.
+_SMART_QUOTE_MAP = str.maketrans({
+    "\u201c": '"',   # " left double quotation mark
+    "\u201d": '"',   # " right double quotation mark
+    "\u2018": "'",   # ' left single quotation mark
+    "\u2019": "'",   # ' right single quotation mark
+    "\u00ab": '"',   # « left guillemet
+    "\u00bb": '"',   # » right guillemet
+    "\u201e": '"',   # „ double low quotation mark
+    "\u201f": '"',   # ‟ double high reversed quotation mark
+    "\u2032": "'",   # ′ prime
+    "\u2033": '"',   # ″ double prime
+    "\u2013": "—",   # – en dash → em dash (dramatic pause)
+    "\u2014": "—",   # — em dash (keep as-is after mapping)
+    "\u2026": "...", # … horizontal ellipsis → three dots
+})
+# — ALL CAPS normalization
+# Words entirely in caps (length >= 4) often get spelled out by the TTS engine (e.g. NOTHING).
+# By converting them to Title Case, they'll be processed naturally as words.
+_RE_ALL_CAPS = re.compile(r"\b[A-Z]{4,}\b")
 # — Punctuation normalization
+#   Ellipsis (... / ..) is PRESERVED — it creates dramatic pauses in Chatterbox.
+#   Only 4+ dots are excessive and get capped to standard ellipsis.
+_RE_EXCESSIVE_DOTS   = re.compile(r"\.{4,}")       # ....+ → ... (cap excessive)
+_RE_NORMALIZE_DOTS   = re.compile(r"\.{2,3}")       # .. or ... → ... (standardize)
+_RE_REPEATED_EXCLAM  = re.compile(r"!{2,}")          # !! → !
+_RE_REPEATED_QUEST   = re.compile(r"\?{2,}")         # ?? → ?
+_RE_REPEATED_SEMI    = re.compile(r";{2,}")           # ;; → ;
+_RE_REPEATED_COLON   = re.compile(r":{2,}")           # :: → :
+_RE_REPEATED_COMMA   = re.compile(r",{2,}")           # ,, → ,
+_RE_REPEATED_DASH    = re.compile(r"-{3,}")           # --- → — (em dash)
+# — Abbreviation protection
+# Common abbreviations ending in "." that should NOT trigger sentence splitting.
+# These get a placeholder before splitting, then get restored.
+_ABBREVIATIONS = (
+    "Mr", "Mrs", "Ms", "Dr", "Prof", "Sr", "Jr", "St", "Ave", "Blvd",
+    "vs", "etc", "approx", "dept", "est", "govt", "inc", "corp", "ltd",
+    "Jan", "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
+    "Gen", "Col", "Sgt", "Capt", "Lt", "Cmdr", "Adm",
+    "Fig", "Vol", "No", "Ref", "Rev", "Ph",
+)
+_RE_ABBREV = re.compile(
+    r"\b(" + "|".join(re.escape(a) for a in _ABBREVIATIONS) + r")\.",
+    re.IGNORECASE,
+)
 # — Whitespace
 _RE_MULTI_SPACE      = re.compile(r"[ \t]+")
 _RE_SPACE_BEFORE_PUN = re.compile(r"\s+([.!?,;:])")
 # — Sentence boundary (split point)
+# Split ONLY on true sentence-ending punctuation: . ! ?
+# Semicolons and colons are clause connectors — they bridge related thoughts
+# and should NOT be used as split points (creates choppy, unnatural fragments).
+# Ellipsis (...) is also intentionally excluded from splitting: letting it split the stream
+# creates a compound lag between chunks, making the pause artificially excessive.
+_RE_SENTENCE_SPLIT = re.compile(
+    r"""(?:(?<=[.!?])(?<!\.\.\.)|(?<=[.!?][)\]"'])(?<!\.\.\.\.))\s+"""
+)
 _MIN_MERGE_WORDS = 5
 # ═══════════════════════════════════════════════════════════════════
 def sanitize(text: str) -> str:
+    """Clean raw input for TTS while preserving prosody-beneficial punctuation.
+    Preserves: ellipsis (...), em dashes (—), commas, periods, !, ?, ;, :, quotes.
+    Removes: emojis, URLs, markdown, HTML, excessive repeated punctuation.
+    """
     if not text:
         return text
+    # 0. Normalize smart/curly quotes and Unicode punctuation FIRST
+    #    This ensures downstream regex works on clean ASCII-like punctuation.
+    text = text.translate(_SMART_QUOTE_MAP)
+    # 1. Normalize ALL CAPS words to Title Case to prevent spelling out
+    text = _RE_ALL_CAPS.sub(lambda m: m.group(0).capitalize(), text)
+    # 2. Protect paralinguistic tags by replacing with placeholders
     tags_found: list[tuple[int, str]] = []
     def _protect_tag(m):
         idx = len(tags_found)
         return f"§TAG{idx}§"
     text = _RE_PARA_TAG.sub(_protect_tag, text)
+    # 3. Protect abbreviations from sentence-boundary splitting
+    #    "Dr. Smith" → "Dr§ Smith" (restored later)
+    abbrevs_found: list[tuple[int, str]] = []
+    def _protect_abbrev(m):
+        idx = len(abbrevs_found)
+        abbrevs_found.append((idx, m.group(0)))
+        return f"{m.group(1)}§ABR{idx}§"
+    text = _RE_ABBREV.sub(_protect_abbrev, text)
+    # 4. Strip non-speakable structures
     text = _RE_URL.sub("", text)
     text = _RE_CODE_BLOCK.sub("", text)
     text = _RE_IMAGE.sub(lambda m: m.group(1) if m.group(1) else "", text)
     text = _RE_BULLET.sub("", text)
     text = _RE_ORDERED.sub("", text)
+    # 5. Emojis, hashtags
     text = _RE_EMOJI.sub("", text)
     text = re.sub(r"#(\w+)", r"\1", text)
+    # 6. HTML entities → speakable text
     text = _RE_HTML_ENTITY.sub(lambda m: _HTML_ENTITIES.get(m.group(0), ""), text)
+    # 7. Normalize punctuation (PRESERVE prosody-beneficial marks)
+    #    Order matters: handle excessive dots first, then standardize ellipsis.
+    text = _RE_EXCESSIVE_DOTS.sub("...", text)       # ....+ → ... (cap)
+    text = _RE_NORMALIZE_DOTS.sub("...", text)        # .. or ... → ... (standardize)
+    text = _RE_REPEATED_EXCLAM.sub("!", text)         # !! → !
+    text = _RE_REPEATED_QUEST.sub("?", text)          # ?? → ?
+    text = _RE_REPEATED_SEMI.sub(";", text)           # ;; → ;
+    text = _RE_REPEATED_COLON.sub(":", text)          # :: → :
+    text = _RE_REPEATED_COMMA.sub(",", text)          # ,, → ,
+    text = _RE_REPEATED_DASH.sub("—", text)           # --- → em dash
+    # 8. Whitespace cleanup
     text = _RE_SPACE_BEFORE_PUN.sub(r"\1", text)
     text = _RE_MULTI_SPACE.sub(" ", text)
     text = _RE_MULTI_NEWLINE.sub("\n\n", text)
     text = text.strip()
+    # 9. Restore abbreviations
+    for idx, original in abbrevs_found:
+        # Restore the full abbreviation with its period
+        text = text.replace(f"§ABR{idx}§", ".")
+    # 10. Restore paralinguistic tags
     for idx, original in tags_found:
         text = text.replace(f"§TAG{idx}§", original)
     """Split sanitized text into sentence-level chunks for streaming.
     Strategy:
+      1. Protect abbreviation dots (Mr., Dr., etc.) from triggering splits
+      2. Split on sentence-ending punctuation boundaries (. ! ?)
+         — NOT on semicolons, colons, or ellipsis (those are non-breaking boundaries)
+      3. Enforce max_chars per chunk (split long sentences on commas / spaces)
+      4. Merge short chunks (≤5 words) with the next to avoid tiny segments
+      5. Restore abbreviation dots
     """
     if not text:
         return []
+    # Step 0: protect abbreviation dots from sentence-boundary splitting
+    #   "Mr. Smith" → "Mr§ABRS§ Smith" (prevents split on that period)
+    abbrev_placeholders: list[tuple[int, str]] = []
+    def _protect_abbrev_split(m):
+        idx = len(abbrev_placeholders)
+        abbrev_placeholders.append((idx, m.group(0)))
+        return f"{m.group(1)}§ABRS{idx}§"
+    text = _RE_ABBREV.sub(_protect_abbrev_split, text)
     # Step 1: sentence split
     raw_chunks = _RE_SENTENCE_SPLIT.split(text)
     raw_chunks = [c.strip() for c in raw_chunks if c.strip()]
     # Step 3: merge short chunks
     if len(sized) <= 1:
+        merged = sized
+    else:
+        merged = []
+        carry = ""
+        for i, chunk in enumerate(sized):
+            if carry:
+                chunk = carry + " " + chunk
+                carry = ""
+            if len(chunk.split()) <= _MIN_MERGE_WORDS and i < len(sized) - 1:
+                carry = chunk
+            else:
+                merged.append(chunk)
         if carry:
+            if merged:
+                merged[-1] += " " + carry
+            else:
+                merged.append(carry)
+    # Step 4: restore abbreviation dots
+    if abbrev_placeholders:
+        for i, chunk in enumerate(merged):
+            for idx, original in abbrev_placeholders:
+                chunk = chunk.replace(f"§ABRS{idx}§", ".")
+            merged[i] = chunk
     return merged
     parts: List[str] = []
     remaining = text
     while len(remaining) > max_chars:
+        break_pos = -1
+        include_break_char = False
+        # Prefer punctuation/pauses first to keep prosody natural.
+        for marker in (",", ";", ":", "—", "-", "!", "?"):
+            pos = remaining.rfind(marker, 0, max_chars)
+            if pos > break_pos:
+                break_pos = pos
+                include_break_char = True
+        # Then prefer nearest space before limit.
+        space_pos = remaining.rfind(" ", 0, max_chars)
+        if space_pos > break_pos:
+            break_pos = space_pos
+            include_break_char = False
+        # If nothing before limit, look slightly ahead to avoid mid-word cuts.
+        if break_pos == -1:
+            forward_limit = min(len(remaining), max_chars + 24)
+            m = re.search(r"[\s,;:!?]", remaining[max_chars:forward_limit])
+            if m:
+                break_pos = max_chars + m.start()
+                include_break_char = remaining[break_pos] in ",;:!?"
+            else:
+                break_pos = max_chars
+                include_break_char = False
+        cut_at = break_pos + (1 if include_break_char else 0)
+        if cut_at <= 0:
+            cut_at = min(max_chars, len(remaining))
+        segment = remaining[:cut_at].strip()
         if segment:
             parts.append(segment)
+        remaining = remaining[cut_at:].lstrip()
     if remaining.strip():
         parts.append(remaining.strip())
     return parts