Commit ·
2ce7b16
1
Parent(s): e7597f3
feat: gTTS as primary TTS — Coqui XTTS-v2 was never installed on Space
Browse filesThe HF Space had been emitting silent-stub WAV for every Speak click
because the local Coqui TTS package isn't in requirements (2GB+ install
+ slow CPU inference + the XTTS-via-AMD path was promised in a comment
but never wired). Switch to gTTS — Google's free TTS, tiny dep, real
audio in <1s.
Three-tier fallback:
1. Coqui XTTS-v2 if installed locally (best quality, slow).
2. gTTS — fast cloud TTS, free, MP3 output.
3. Silent stub — last resort so gradio Audio doesn't error.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- requirements.txt +4 -1
- signbridge/voice/tts.py +36 -15
requirements.txt
CHANGED
|
@@ -25,7 +25,10 @@ opencv-python-headless>=4.10
|
|
| 25 |
# Inference clients
|
| 26 |
openai>=1.54
|
| 27 |
|
| 28 |
-
# Audio (
|
|
|
|
|
|
|
|
|
|
| 29 |
soundfile>=0.12
|
| 30 |
|
| 31 |
# Vision pipeline (MediaPipe Holistic for the pose-debug overlay)
|
|
|
|
| 25 |
# Inference clients
|
| 26 |
openai>=1.54
|
| 27 |
|
| 28 |
+
# Audio: gTTS = primary (free, fast, ~tiny dep, real audio).
|
| 29 |
+
# Coqui XTTS-v2 would be higher quality but is 2GB and slow on basic CPU.
|
| 30 |
+
gtts>=2.5
|
| 31 |
+
# soundfile is the silent-stub fallback when even gTTS fails.
|
| 32 |
soundfile>=0.12
|
| 33 |
|
| 34 |
# Vision pipeline (MediaPipe Holistic for the pose-debug overlay)
|
signbridge/voice/tts.py
CHANGED
|
@@ -72,27 +72,48 @@ class _TTSEngine:
|
|
| 72 |
def synthesize(self, text: str) -> str | None:
|
| 73 |
if not text:
|
| 74 |
return None
|
| 75 |
-
self._ensure_loaded()
|
| 76 |
-
if self._tts is None:
|
| 77 |
-
return self._silent_stub(text)
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
try:
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
except Exception as exc: # noqa: BLE001
|
| 91 |
logger.warning(
|
| 92 |
-
"
|
|
|
|
| 93 |
)
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
|
| 97 |
def _silent_stub(self, text: str) -> str | None:
|
| 98 |
"""Emit a 0.5 s silent WAV so the Gradio audio component has something to play.
|
|
|
|
| 72 |
def synthesize(self, text: str) -> str | None:
|
| 73 |
if not text:
|
| 74 |
return None
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
# Disk cache hit — same text already synthesised this session.
|
| 77 |
+
cached_wav = self._cache_dir / f"{_cache_key(text)}.wav"
|
| 78 |
+
if cached_wav.exists():
|
| 79 |
+
return str(cached_wav)
|
| 80 |
+
cached_mp3 = self._cache_dir / f"{_cache_key(text)}.mp3"
|
| 81 |
+
if cached_mp3.exists():
|
| 82 |
+
return str(cached_mp3)
|
| 83 |
|
| 84 |
+
# Tier 1: Coqui XTTS-v2 if installed locally (full quality, slow).
|
| 85 |
+
self._ensure_loaded()
|
| 86 |
+
if self._tts is not None:
|
| 87 |
+
try:
|
| 88 |
+
self._tts.tts_to_file(
|
| 89 |
+
text=text,
|
| 90 |
+
file_path=str(cached_wav),
|
| 91 |
+
language="en",
|
| 92 |
+
)
|
| 93 |
+
return str(cached_wav)
|
| 94 |
+
except Exception as exc: # noqa: BLE001
|
| 95 |
+
logger.warning(
|
| 96 |
+
"XTTS synthesis failed (%s); falling through to gTTS.",
|
| 97 |
+
type(exc).__name__,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Tier 2: gTTS — tiny dep, free, fast (Google's TTS API).
|
| 101 |
try:
|
| 102 |
+
from gtts import gTTS # type: ignore[import-not-found]
|
| 103 |
+
tts = gTTS(text=text, lang="en", tld="com")
|
| 104 |
+
tts.save(str(cached_mp3))
|
| 105 |
+
print(f"[tts] gTTS synthesised: {cached_mp3}", flush=True)
|
| 106 |
+
return str(cached_mp3)
|
| 107 |
+
except ImportError:
|
| 108 |
+
logger.warning("gTTS not installed; falling through to silent stub.")
|
| 109 |
except Exception as exc: # noqa: BLE001
|
| 110 |
logger.warning(
|
| 111 |
+
"gTTS synthesis failed (%s); falling through to silent stub.",
|
| 112 |
+
type(exc).__name__,
|
| 113 |
)
|
| 114 |
+
|
| 115 |
+
# Tier 3: silent placeholder — better than crashing the audio component.
|
| 116 |
+
return self._silent_stub(text)
|
| 117 |
|
| 118 |
def _silent_stub(self, text: str) -> str | None:
|
| 119 |
"""Emit a 0.5 s silent WAV so the Gradio audio component has something to play.
|