0xarchit commited on
Commit
45e9602
·
1 Parent(s): 449a71e

added driver

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -1
  2. backend/TextToVoice.py +21 -24
Dockerfile CHANGED
@@ -4,7 +4,8 @@ FROM python:3.11-slim
4
  # Prevent Python from writing .pyc files and buffer stdout/stderr
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
7
- PIP_NO_CACHE_DIR=1
 
8
 
9
  # System deps for OpenCV, audio (PyAudio/portaudio), and general build
10
  # - libgl1, libglib2.0-0: needed by opencv-python
@@ -22,8 +23,11 @@ RUN apt-get update \
22
  libxrender1 \
23
  libportaudio2 \
24
  libportaudiocpp0 \
 
25
  portaudio19-dev \
26
  libasound2-dev \
 
 
27
  && rm -rf /var/lib/apt/lists/*
28
 
29
  # Create working directory
 
4
  # Prevent Python from writing .pyc files and buffer stdout/stderr
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
7
+ PIP_NO_CACHE_DIR=1 \
8
+ SDL_AUDIODRIVER=dummy
9
 
10
  # System deps for OpenCV, audio (PyAudio/portaudio), and general build
11
  # - libgl1, libglib2.0-0: needed by opencv-python
 
23
  libxrender1 \
24
  libportaudio2 \
25
  libportaudiocpp0 \
26
+ libasound2 \
27
  portaudio19-dev \
28
  libasound2-dev \
29
+ espeak-ng \
30
+ libespeak1 \
31
  && rm -rf /var/lib/apt/lists/*
32
 
33
  # Create working directory
backend/TextToVoice.py CHANGED
@@ -14,12 +14,16 @@ logger = logging.getLogger("EdgeTTS")
14
  class EdgeTextToSpeech:
15
  def __init__(self):
16
  """Initialize Microsoft Edge TTS."""
 
 
 
17
  try:
18
  pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
19
- self.current_voice = "en-US-AriaNeural"
20
- logger.info("Edge TTS engine initialized successfully")
21
  except Exception as e:
22
- logger.error(f"Failed to initialize pygame mixer: {e}")
 
23
 
24
  async def speak_async(self, text: str, voice: str = None):
25
  """Convert text to speech using Edge TTS (async)."""
@@ -34,13 +38,14 @@ class EdgeTextToSpeech:
34
  temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
35
  await communicate.save(temp_filename)
36
 
37
- # Play the audio file
38
- pygame.mixer.music.load(temp_filename)
39
- pygame.mixer.music.play()
40
-
41
- # Wait for playback to finish
42
- while pygame.mixer.music.get_busy():
43
- await asyncio.sleep(0.1)
 
44
 
45
  # Clean up temporary file
46
  try:
@@ -52,13 +57,8 @@ class EdgeTextToSpeech:
52
 
53
  except Exception as e:
54
  logger.error(f"Error in Edge TTS: {e}")
55
- # Fallback to local pyttsx3 playback if Edge TTS fails
56
- try:
57
- await self._fallback_play_async(text)
58
- return True
59
- except Exception as fe:
60
- logger.error(f"Fallback TTS playback failed: {fe}")
61
- return False
62
 
63
  def speak(self, text: str, voice: str = None):
64
  """Synchronous wrapper for speak_async."""
@@ -171,6 +171,8 @@ class EdgeTextToSpeech:
171
  def _save():
172
  import pyttsx3
173
  engine = pyttsx3.init()
 
 
174
  # Optional: adjust rate/voice here if needed
175
  engine.save_to_file(text, wav_path)
176
  engine.runAndWait()
@@ -179,13 +181,8 @@ class EdgeTextToSpeech:
179
 
180
  async def _fallback_play_async(self, text: str):
181
  """Play speech locally using pyttsx3 (blocking in thread)."""
182
- def _play():
183
- import pyttsx3
184
- engine = pyttsx3.init()
185
- engine.say(text)
186
- engine.runAndWait()
187
-
188
- await asyncio.to_thread(_play)
189
 
190
  def _has_ffmpeg(self) -> bool:
191
  return shutil.which("ffmpeg") is not None
 
14
  class EdgeTextToSpeech:
15
  def __init__(self):
16
  """Initialize Microsoft Edge TTS."""
17
+ # Always set a default voice regardless of audio device availability
18
+ self.current_voice = "en-US-AriaNeural"
19
+ self.mixer_available = False
20
  try:
21
  pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
22
+ self.mixer_available = True
23
+ logger.info("Edge TTS engine initialized successfully (pygame mixer ready)")
24
  except Exception as e:
25
+ # In containers there is no audio device; this is expected. We can still save audio files.
26
+ logger.warning(f"Pygame mixer not available (no audio device). File generation will still work. Details: {e}")
27
 
28
  async def speak_async(self, text: str, voice: str = None):
29
  """Convert text to speech using Edge TTS (async)."""
 
38
  temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
39
  await communicate.save(temp_filename)
40
 
41
+ # Play the audio file if a mixer is available (local/dev only)
42
+ if self.mixer_available and pygame.mixer.get_init():
43
+ pygame.mixer.music.load(temp_filename)
44
+ pygame.mixer.music.play()
45
+
46
+ # Wait for playback to finish
47
+ while pygame.mixer.music.get_busy():
48
+ await asyncio.sleep(0.1)
49
 
50
  # Clean up temporary file
51
  try:
 
57
 
58
  except Exception as e:
59
  logger.error(f"Error in Edge TTS: {e}")
60
+ # In server/container contexts, skip local playback fallback.
61
+ return False
 
 
 
 
 
62
 
63
  def speak(self, text: str, voice: str = None):
64
  """Synchronous wrapper for speak_async."""
 
171
  def _save():
172
  import pyttsx3
173
  engine = pyttsx3.init()
174
+ # Prefer eSpeak NG in Linux containers
175
+ # engine.setProperty('voice', 'english') # optional
176
  # Optional: adjust rate/voice here if needed
177
  engine.save_to_file(text, wav_path)
178
  engine.runAndWait()
 
181
 
182
  async def _fallback_play_async(self, text: str):
183
  """Play speech locally using pyttsx3 (blocking in thread)."""
184
+ # In container/server context, do not attempt local playback
185
+ return
 
 
 
 
 
186
 
187
  def _has_ffmpeg(self) -> bool:
188
  return shutil.which("ffmpeg") is not None