carbonx
/

buddy-desktop

Model card Files Files and versions

xet

Community

carbonx commited on 11 days ago

Commit

96b68f8

verified ·

1 Parent(s): ea9caa4

Fix: Graceful audio imports for Windows

Browse files

Files changed (1) hide show

audio_io.py +50 -16

audio_io.py CHANGED Viewed

@@ -3,15 +3,37 @@ import io
 import wave
 import threading
 import numpy as np
-import sounddevice as sd
-import soundfile as sf
-import edge_tts
-import asyncio
 import tempfile
 import os
 from dataclasses import dataclass
-from typing import Callable, Optional
 @dataclass
@@ -19,18 +41,23 @@ class AudioConfig:
     sample_rate: int = 16000
     channels: int = 1
     dtype: str = "int16"
-    chunk_duration: float = 0.1  # seconds
 class PushToTalkRecorder:
     """Records audio while a key is held down."""
     def __init__(self, config: AudioConfig = AudioConfig()):
         self.config = config
         self._recording = False
         self._frames = []
         self._stream = None
-        self._thread = None
     def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
         if status:
@@ -52,7 +79,7 @@ class PushToTalkRecorder:
             callback=self._audio_callback,
         )
         self._stream.start()
-        print("[recorder] Started recording")
     def stop(self):
         """Stop recording and return WAV bytes."""
@@ -63,20 +90,19 @@ class PushToTalkRecorder:
         self._stream.close()
         if not self._frames:
-            print("[recorder] No audio captured")
             return None
         audio = np.concatenate(self._frames, axis=0)
-        # Convert to WAV bytes
         buf = io.BytesIO()
         with wave.open(buf, "wb") as wf:
             wf.setnchannels(self.config.channels)
-            wf.setsampwidth(2)  # int16 = 2 bytes
             wf.setframerate(self.config.sample_rate)
             wf.writeframes(audio.tobytes())
         buf.seek(0)
-        print("[recorder] Stopped. Captured %.1fs" % (len(audio)/self.config.sample_rate))
         return buf.read()
     def is_recording(self) -> bool:
@@ -89,28 +115,36 @@ class TTSEngine:
     def __init__(self, voice: str = "nb-NO-FinnNeural"):
         self.voice = voice
         self._playing = False
     async def _synthesize(self, text: str, output_path: str):
         communicate = edge_tts.Communicate(text, voice=self.voice)
         await communicate.save(output_path)
     def speak(self, text: str, blocking: bool = False):
-        """Speak the given text. If blocking=True, wait until done."""
         if not text.strip():
             return
         def _play():
             try:
                 with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
                     tmp_path = f.name
                 asyncio.run(self._synthesize(text, tmp_path))
                 data, sr = sf.read(tmp_path)
-                sd.play(data, sr)
-                sd.wait()
                 os.unlink(tmp_path)
             except Exception as e:
-                print("[tts] Error: %s" % e)
         if blocking:
             _play()

 import wave
 import threading
 import numpy as np
 import tempfile
 import os
+# Graceful imports for Windows compatibility
+try:
+    import sounddevice as sd
+    SD_AVAILABLE = True
+except ImportError as e:
+    print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
+    print("[audio_io]  -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
+    print("[audio_io]    eller: pip install sounddevice")
+    print("[audio_io]  -> Linux: sudo apt-get install libportaudio2")
+    SD_AVAILABLE = False
+    sd = None
+try:
+    import soundfile as sf
+    SF_AVAILABLE = True
+except ImportError:
+    SF_AVAILABLE = False
+    sf = None
+try:
+    import edge_tts
+    EDGE_AVAILABLE = True
+except ImportError:
+    EDGE_AVAILABLE = False
+    edge_tts = None
 from dataclasses import dataclass
+from typing import Optional
 @dataclass
     sample_rate: int = 16000
     channels: int = 1
     dtype: str = "int16"
+    chunk_duration: float = 0.1
 class PushToTalkRecorder:
     """Records audio while a key is held down."""
     def __init__(self, config: AudioConfig = AudioConfig()):
+        if not SD_AVAILABLE:
+            raise ImportError(
+                "sounddevice er ikke installert. "
+                "Windows: pip install sounddevice "
+                "(krever PortAudio; se README for wheel-link)"
+            )
         self.config = config
         self._recording = False
         self._frames = []
         self._stream = None
     def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
         if status:
             callback=self._audio_callback,
         )
         self._stream.start()
+        print("[recorder] Starter opptak")
     def stop(self):
         """Stop recording and return WAV bytes."""
         self._stream.close()
         if not self._frames:
+            print("[recorder] Ingen lyd fanget opp")
             return None
         audio = np.concatenate(self._frames, axis=0)
         buf = io.BytesIO()
         with wave.open(buf, "wb") as wf:
             wf.setnchannels(self.config.channels)
+            wf.setsampwidth(2)
             wf.setframerate(self.config.sample_rate)
             wf.writeframes(audio.tobytes())
         buf.seek(0)
+        print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
         return buf.read()
     def is_recording(self) -> bool:
     def __init__(self, voice: str = "nb-NO-FinnNeural"):
         self.voice = voice
         self._playing = False
+        if not EDGE_AVAILABLE:
+            print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")
     async def _synthesize(self, text: str, output_path: str):
         communicate = edge_tts.Communicate(text, voice=self.voice)
         await communicate.save(output_path)
     def speak(self, text: str, blocking: bool = False):
+        """Speak the given text."""
         if not text.strip():
             return
+        if not EDGE_AVAILABLE or not SF_AVAILABLE:
+            print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
+                EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
+            return
         def _play():
             try:
                 with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
                     tmp_path = f.name
+                import asyncio
                 asyncio.run(self._synthesize(text, tmp_path))
                 data, sr = sf.read(tmp_path)
+                if SD_AVAILABLE:
+                    sd.play(data, sr)
+                    sd.wait()
                 os.unlink(tmp_path)
             except Exception as e:
+                print("[tts] Feil: %s" % e)
         if blocking:
             _play()