Spaces:
Running
Running
added driver
Browse files- Dockerfile +5 -1
- backend/TextToVoice.py +21 -24
Dockerfile
CHANGED
|
@@ -4,7 +4,8 @@ FROM python:3.11-slim
|
|
| 4 |
# Prevent Python from writing .pyc files and buffer stdout/stderr
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
PYTHONUNBUFFERED=1 \
|
| 7 |
-
PIP_NO_CACHE_DIR=1
|
|
|
|
| 8 |
|
| 9 |
# System deps for OpenCV, audio (PyAudio/portaudio), and general build
|
| 10 |
# - libgl1, libglib2.0-0: needed by opencv-python
|
|
@@ -22,8 +23,11 @@ RUN apt-get update \
|
|
| 22 |
libxrender1 \
|
| 23 |
libportaudio2 \
|
| 24 |
libportaudiocpp0 \
|
|
|
|
| 25 |
portaudio19-dev \
|
| 26 |
libasound2-dev \
|
|
|
|
|
|
|
| 27 |
&& rm -rf /var/lib/apt/lists/*
|
| 28 |
|
| 29 |
# Create working directory
|
|
|
|
| 4 |
# Prevent Python from writing .pyc files and buffer stdout/stderr
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
PYTHONUNBUFFERED=1 \
|
| 7 |
+
PIP_NO_CACHE_DIR=1 \
|
| 8 |
+
SDL_AUDIODRIVER=dummy
|
| 9 |
|
| 10 |
# System deps for OpenCV, audio (PyAudio/portaudio), and general build
|
| 11 |
# - libgl1, libglib2.0-0: needed by opencv-python
|
|
|
|
| 23 |
libxrender1 \
|
| 24 |
libportaudio2 \
|
| 25 |
libportaudiocpp0 \
|
| 26 |
+
libasound2 \
|
| 27 |
portaudio19-dev \
|
| 28 |
libasound2-dev \
|
| 29 |
+
espeak-ng \
|
| 30 |
+
libespeak1 \
|
| 31 |
&& rm -rf /var/lib/apt/lists/*
|
| 32 |
|
| 33 |
# Create working directory
|
backend/TextToVoice.py
CHANGED
|
@@ -14,12 +14,16 @@ logger = logging.getLogger("EdgeTTS")
|
|
| 14 |
class EdgeTextToSpeech:
|
| 15 |
def __init__(self):
|
| 16 |
"""Initialize Microsoft Edge TTS."""
|
|
|
|
|
|
|
|
|
|
| 17 |
try:
|
| 18 |
pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
|
| 19 |
-
self.
|
| 20 |
-
logger.info("Edge TTS engine initialized successfully")
|
| 21 |
except Exception as e:
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
async def speak_async(self, text: str, voice: str = None):
|
| 25 |
"""Convert text to speech using Edge TTS (async)."""
|
|
@@ -34,13 +38,14 @@ class EdgeTextToSpeech:
|
|
| 34 |
temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
|
| 35 |
await communicate.save(temp_filename)
|
| 36 |
|
| 37 |
-
# Play the audio file
|
| 38 |
-
pygame.mixer.
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 44 |
|
| 45 |
# Clean up temporary file
|
| 46 |
try:
|
|
@@ -52,13 +57,8 @@ class EdgeTextToSpeech:
|
|
| 52 |
|
| 53 |
except Exception as e:
|
| 54 |
logger.error(f"Error in Edge TTS: {e}")
|
| 55 |
-
#
|
| 56 |
-
|
| 57 |
-
await self._fallback_play_async(text)
|
| 58 |
-
return True
|
| 59 |
-
except Exception as fe:
|
| 60 |
-
logger.error(f"Fallback TTS playback failed: {fe}")
|
| 61 |
-
return False
|
| 62 |
|
| 63 |
def speak(self, text: str, voice: str = None):
|
| 64 |
"""Synchronous wrapper for speak_async."""
|
|
@@ -171,6 +171,8 @@ class EdgeTextToSpeech:
|
|
| 171 |
def _save():
|
| 172 |
import pyttsx3
|
| 173 |
engine = pyttsx3.init()
|
|
|
|
|
|
|
| 174 |
# Optional: adjust rate/voice here if needed
|
| 175 |
engine.save_to_file(text, wav_path)
|
| 176 |
engine.runAndWait()
|
|
@@ -179,13 +181,8 @@ class EdgeTextToSpeech:
|
|
| 179 |
|
| 180 |
async def _fallback_play_async(self, text: str):
|
| 181 |
"""Play speech locally using pyttsx3 (blocking in thread)."""
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
engine = pyttsx3.init()
|
| 185 |
-
engine.say(text)
|
| 186 |
-
engine.runAndWait()
|
| 187 |
-
|
| 188 |
-
await asyncio.to_thread(_play)
|
| 189 |
|
| 190 |
def _has_ffmpeg(self) -> bool:
|
| 191 |
return shutil.which("ffmpeg") is not None
|
|
|
|
| 14 |
class EdgeTextToSpeech:
|
| 15 |
def __init__(self):
|
| 16 |
"""Initialize Microsoft Edge TTS."""
|
| 17 |
+
# Always set a default voice regardless of audio device availability
|
| 18 |
+
self.current_voice = "en-US-AriaNeural"
|
| 19 |
+
self.mixer_available = False
|
| 20 |
try:
|
| 21 |
pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
|
| 22 |
+
self.mixer_available = True
|
| 23 |
+
logger.info("Edge TTS engine initialized successfully (pygame mixer ready)")
|
| 24 |
except Exception as e:
|
| 25 |
+
# In containers there is no audio device; this is expected. We can still save audio files.
|
| 26 |
+
logger.warning(f"Pygame mixer not available (no audio device). File generation will still work. Details: {e}")
|
| 27 |
|
| 28 |
async def speak_async(self, text: str, voice: str = None):
|
| 29 |
"""Convert text to speech using Edge TTS (async)."""
|
|
|
|
| 38 |
temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
|
| 39 |
await communicate.save(temp_filename)
|
| 40 |
|
| 41 |
+
# Play the audio file if a mixer is available (local/dev only)
|
| 42 |
+
if self.mixer_available and pygame.mixer.get_init():
|
| 43 |
+
pygame.mixer.music.load(temp_filename)
|
| 44 |
+
pygame.mixer.music.play()
|
| 45 |
+
|
| 46 |
+
# Wait for playback to finish
|
| 47 |
+
while pygame.mixer.music.get_busy():
|
| 48 |
+
await asyncio.sleep(0.1)
|
| 49 |
|
| 50 |
# Clean up temporary file
|
| 51 |
try:
|
|
|
|
| 57 |
|
| 58 |
except Exception as e:
|
| 59 |
logger.error(f"Error in Edge TTS: {e}")
|
| 60 |
+
# In server/container contexts, skip local playback fallback.
|
| 61 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def speak(self, text: str, voice: str = None):
|
| 64 |
"""Synchronous wrapper for speak_async."""
|
|
|
|
| 171 |
def _save():
|
| 172 |
import pyttsx3
|
| 173 |
engine = pyttsx3.init()
|
| 174 |
+
# Prefer eSpeak NG in Linux containers
|
| 175 |
+
# engine.setProperty('voice', 'english') # optional
|
| 176 |
# Optional: adjust rate/voice here if needed
|
| 177 |
engine.save_to_file(text, wav_path)
|
| 178 |
engine.runAndWait()
|
|
|
|
| 181 |
|
| 182 |
async def _fallback_play_async(self, text: str):
|
| 183 |
"""Play speech locally using pyttsx3 (blocking in thread)."""
|
| 184 |
+
# In container/server context, do not attempt local playback
|
| 185 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
def _has_ffmpeg(self) -> bool:
|
| 188 |
return shutil.which("ffmpeg") is not None
|