buddy-desktop / audio_io.py
carbonx's picture
Fix: Graceful audio imports for Windows
96b68f8 verified
"""Audio I/O: Push-to-talk recording + playback"""
import io
import wave
import threading
import numpy as np
import tempfile
import os
# Graceful imports for Windows compatibility
try:
import sounddevice as sd
SD_AVAILABLE = True
except ImportError as e:
print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
print("[audio_io] -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
print("[audio_io] eller: pip install sounddevice")
print("[audio_io] -> Linux: sudo apt-get install libportaudio2")
SD_AVAILABLE = False
sd = None
try:
import soundfile as sf
SF_AVAILABLE = True
except ImportError:
SF_AVAILABLE = False
sf = None
try:
import edge_tts
EDGE_AVAILABLE = True
except ImportError:
EDGE_AVAILABLE = False
edge_tts = None
from dataclasses import dataclass
from typing import Optional
@dataclass
class AudioConfig:
sample_rate: int = 16000
channels: int = 1
dtype: str = "int16"
chunk_duration: float = 0.1
class PushToTalkRecorder:
"""Records audio while a key is held down."""
def __init__(self, config: AudioConfig = AudioConfig()):
if not SD_AVAILABLE:
raise ImportError(
"sounddevice er ikke installert. "
"Windows: pip install sounddevice "
"(krever PortAudio; se README for wheel-link)"
)
self.config = config
self._recording = False
self._frames = []
self._stream = None
def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
if status:
print("[audio] status: %s" % status)
if self._recording:
self._frames.append(indata.copy())
def start(self):
"""Begin recording audio from microphone."""
if self._recording:
return
self._recording = True
self._frames = []
self._stream = sd.InputStream(
samplerate=self.config.sample_rate,
channels=self.config.channels,
dtype=self.config.dtype,
blocksize=int(self.config.sample_rate * self.config.chunk_duration),
callback=self._audio_callback,
)
self._stream.start()
print("[recorder] Starter opptak")
def stop(self):
"""Stop recording and return WAV bytes."""
if not self._recording:
return None
self._recording = False
self._stream.stop()
self._stream.close()
if not self._frames:
print("[recorder] Ingen lyd fanget opp")
return None
audio = np.concatenate(self._frames, axis=0)
buf = io.BytesIO()
with wave.open(buf, "wb") as wf:
wf.setnchannels(self.config.channels)
wf.setsampwidth(2)
wf.setframerate(self.config.sample_rate)
wf.writeframes(audio.tobytes())
buf.seek(0)
print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
return buf.read()
def is_recording(self) -> bool:
return self._recording
class TTSEngine:
"""Text-to-Speech using Edge-TTS (Norwegian/others)."""
def __init__(self, voice: str = "nb-NO-FinnNeural"):
self.voice = voice
self._playing = False
if not EDGE_AVAILABLE:
print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")
async def _synthesize(self, text: str, output_path: str):
communicate = edge_tts.Communicate(text, voice=self.voice)
await communicate.save(output_path)
def speak(self, text: str, blocking: bool = False):
"""Speak the given text."""
if not text.strip():
return
if not EDGE_AVAILABLE or not SF_AVAILABLE:
print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
return
def _play():
try:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tmp_path = f.name
import asyncio
asyncio.run(self._synthesize(text, tmp_path))
data, sr = sf.read(tmp_path)
if SD_AVAILABLE:
sd.play(data, sr)
sd.wait()
os.unlink(tmp_path)
except Exception as e:
print("[tts] Feil: %s" % e)
if blocking:
_play()
else:
threading.Thread(target=_play, daemon=True).start()