carbonx
/

buddy-desktop

Model card Files Files and versions

xet

Community

carbonx commited on 11 days ago

Commit

2e7e0eb

verified ·

1 Parent(s): 93a741b

Add audio_io.py

Browse files

Files changed (1) hide show

audio_io.py +118 -0

audio_io.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""Audio I/O: Push-to-talk recording + playback"""
+import io
+import wave
+import threading
+import numpy as np
+import sounddevice as sd
+import soundfile as sf
+import edge_tts
+import asyncio
+import tempfile
+import os
+from dataclasses import dataclass
+from typing import Callable, Optional
+@dataclass
+class AudioConfig:
+    sample_rate: int = 16000
+    channels: int = 1
+    dtype: str = "int16"
+    chunk_duration: float = 0.1  # seconds
+class PushToTalkRecorder:
+    """Records audio while a key is held down."""
+    def __init__(self, config: AudioConfig = AudioConfig()):
+        self.config = config
+        self._recording = False
+        self._frames = []
+        self._stream = None
+        self._thread = None
+    def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
+        if status:
+            print("[audio] status: %s" % status)
+        if self._recording:
+            self._frames.append(indata.copy())
+    def start(self):
+        """Begin recording audio from microphone."""
+        if self._recording:
+            return
+        self._recording = True
+        self._frames = []
+        self._stream = sd.InputStream(
+            samplerate=self.config.sample_rate,
+            channels=self.config.channels,
+            dtype=self.config.dtype,
+            blocksize=int(self.config.sample_rate * self.config.chunk_duration),
+            callback=self._audio_callback,
+        )
+        self._stream.start()
+        print("[recorder] Started recording")
+    def stop(self):
+        """Stop recording and return WAV bytes."""
+        if not self._recording:
+            return None
+        self._recording = False
+        self._stream.stop()
+        self._stream.close()
+        if not self._frames:
+            print("[recorder] No audio captured")
+            return None
+        audio = np.concatenate(self._frames, axis=0)
+        # Convert to WAV bytes
+        buf = io.BytesIO()
+        with wave.open(buf, "wb") as wf:
+            wf.setnchannels(self.config.channels)
+            wf.setsampwidth(2)  # int16 = 2 bytes
+            wf.setframerate(self.config.sample_rate)
+            wf.writeframes(audio.tobytes())
+        buf.seek(0)
+        print("[recorder] Stopped. Captured %.1fs" % (len(audio)/self.config.sample_rate))
+        return buf.read()
+    def is_recording(self) -> bool:
+        return self._recording
+class TTSEngine:
+    """Text-to-Speech using Edge-TTS (Norwegian/others)."""
+    def __init__(self, voice: str = "nb-NO-FinnNeural"):
+        self.voice = voice
+        self._playing = False
+    async def _synthesize(self, text: str, output_path: str):
+        communicate = edge_tts.Communicate(text, voice=self.voice)
+        await communicate.save(output_path)
+    def speak(self, text: str, blocking: bool = False):
+        """Speak the given text. If blocking=True, wait until done."""
+        if not text.strip():
+            return
+        def _play():
+            try:
+                with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+                    tmp_path = f.name
+                asyncio.run(self._synthesize(text, tmp_path))
+                data, sr = sf.read(tmp_path)
+                sd.play(data, sr)
+                sd.wait()
+                os.unlink(tmp_path)
+            except Exception as e:
+                print("[tts] Error: %s" % e)
+        if blocking:
+            _play()
+        else:
+            threading.Thread(target=_play, daemon=True).start()