Spaces:

0xarchit
/

Classroom-Ai-Assistant

Running

App Files Files Community

0xarchit commited on Oct 1, 2025

Commit

45e9602

1 Parent(s): 449a71e

added driver

Browse files

Files changed (2) hide show

Dockerfile +5 -1
backend/TextToVoice.py +21 -24

Dockerfile CHANGED Viewed

@@ -4,7 +4,8 @@ FROM python:3.11-slim
 # Prevent Python from writing .pyc files and buffer stdout/stderr
 ENV PYTHONDONTWRITEBYTECODE=1 \
   PYTHONUNBUFFERED=1 \
-  PIP_NO_CACHE_DIR=1
 # System deps for OpenCV, audio (PyAudio/portaudio), and general build
 # - libgl1, libglib2.0-0: needed by opencv-python
@@ -22,8 +23,11 @@ RUN apt-get update \
   libxrender1 \
   libportaudio2 \
   libportaudiocpp0 \
   portaudio19-dev \
   libasound2-dev \
   && rm -rf /var/lib/apt/lists/*
 # Create working directory

 # Prevent Python from writing .pyc files and buffer stdout/stderr
 ENV PYTHONDONTWRITEBYTECODE=1 \
   PYTHONUNBUFFERED=1 \
+  PIP_NO_CACHE_DIR=1 \
+  SDL_AUDIODRIVER=dummy
 # System deps for OpenCV, audio (PyAudio/portaudio), and general build
 # - libgl1, libglib2.0-0: needed by opencv-python
   libxrender1 \
   libportaudio2 \
   libportaudiocpp0 \
+  libasound2 \
   portaudio19-dev \
   libasound2-dev \
+  espeak-ng \
+  libespeak1 \
   && rm -rf /var/lib/apt/lists/*
 # Create working directory

backend/TextToVoice.py CHANGED Viewed

@@ -14,12 +14,16 @@ logger = logging.getLogger("EdgeTTS")
 class EdgeTextToSpeech:
     def __init__(self):
         """Initialize Microsoft Edge TTS."""
         try:
             pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
-            self.current_voice = "en-US-AriaNeural"
-            logger.info("Edge TTS engine initialized successfully")
         except Exception as e:
-            logger.error(f"Failed to initialize pygame mixer: {e}")
     async def speak_async(self, text: str, voice: str = None):
         """Convert text to speech using Edge TTS (async)."""
@@ -34,13 +38,14 @@ class EdgeTextToSpeech:
             temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
             await communicate.save(temp_filename)
-            # Play the audio file
-            pygame.mixer.music.load(temp_filename)
-            pygame.mixer.music.play()
-            # Wait for playback to finish
-            while pygame.mixer.music.get_busy():
-                await asyncio.sleep(0.1)
             # Clean up temporary file
             try:
@@ -52,13 +57,8 @@ class EdgeTextToSpeech:
         except Exception as e:
             logger.error(f"Error in Edge TTS: {e}")
-            # Fallback to local pyttsx3 playback if Edge TTS fails
-            try:
-                await self._fallback_play_async(text)
-                return True
-            except Exception as fe:
-                logger.error(f"Fallback TTS playback failed: {fe}")
-                return False
     def speak(self, text: str, voice: str = None):
         """Synchronous wrapper for speak_async."""
@@ -171,6 +171,8 @@ class EdgeTextToSpeech:
         def _save():
             import pyttsx3
             engine = pyttsx3.init()
             # Optional: adjust rate/voice here if needed
             engine.save_to_file(text, wav_path)
             engine.runAndWait()
@@ -179,13 +181,8 @@ class EdgeTextToSpeech:
     async def _fallback_play_async(self, text: str):
         """Play speech locally using pyttsx3 (blocking in thread)."""
-        def _play():
-            import pyttsx3
-            engine = pyttsx3.init()
-            engine.say(text)
-            engine.runAndWait()
-        await asyncio.to_thread(_play)
     def _has_ffmpeg(self) -> bool:
         return shutil.which("ffmpeg") is not None

 class EdgeTextToSpeech:
     def __init__(self):
         """Initialize Microsoft Edge TTS."""
+        # Always set a default voice regardless of audio device availability
+        self.current_voice = "en-US-AriaNeural"
+        self.mixer_available = False
         try:
             pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
+            self.mixer_available = True
+            logger.info("Edge TTS engine initialized successfully (pygame mixer ready)")
         except Exception as e:
+            # In containers there is no audio device; this is expected. We can still save audio files.
+            logger.warning(f"Pygame mixer not available (no audio device). File generation will still work. Details: {e}")
     async def speak_async(self, text: str, voice: str = None):
         """Convert text to speech using Edge TTS (async)."""
             temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
             await communicate.save(temp_filename)
+            # Play the audio file if a mixer is available (local/dev only)
+            if self.mixer_available and pygame.mixer.get_init():
+                pygame.mixer.music.load(temp_filename)
+                pygame.mixer.music.play()
+                # Wait for playback to finish
+                while pygame.mixer.music.get_busy():
+                    await asyncio.sleep(0.1)
             # Clean up temporary file
             try:
         except Exception as e:
             logger.error(f"Error in Edge TTS: {e}")
+            # In server/container contexts, skip local playback fallback.
+            return False
     def speak(self, text: str, voice: str = None):
         """Synchronous wrapper for speak_async."""
         def _save():
             import pyttsx3
             engine = pyttsx3.init()
+            # Prefer eSpeak NG in Linux containers
+            # engine.setProperty('voice', 'english')  # optional
             # Optional: adjust rate/voice here if needed
             engine.save_to_file(text, wav_path)
             engine.runAndWait()
     async def _fallback_play_async(self, text: str):
         """Play speech locally using pyttsx3 (blocking in thread)."""
+        # In container/server context, do not attempt local playback
+        return
     def _has_ffmpeg(self) -> bool:
         return shutil.which("ffmpeg") is not None