"""Audio I/O: Push-to-talk recording + playback""" import io import wave import threading import numpy as np import tempfile import os # Graceful imports for Windows compatibility try: import sounddevice as sd SD_AVAILABLE = True except ImportError as e: print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e) print("[audio_io] -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice") print("[audio_io] eller: pip install sounddevice") print("[audio_io] -> Linux: sudo apt-get install libportaudio2") SD_AVAILABLE = False sd = None try: import soundfile as sf SF_AVAILABLE = True except ImportError: SF_AVAILABLE = False sf = None try: import edge_tts EDGE_AVAILABLE = True except ImportError: EDGE_AVAILABLE = False edge_tts = None from dataclasses import dataclass from typing import Optional @dataclass class AudioConfig: sample_rate: int = 16000 channels: int = 1 dtype: str = "int16" chunk_duration: float = 0.1 class PushToTalkRecorder: """Records audio while a key is held down.""" def __init__(self, config: AudioConfig = AudioConfig()): if not SD_AVAILABLE: raise ImportError( "sounddevice er ikke installert. " "Windows: pip install sounddevice " "(krever PortAudio; se README for wheel-link)" ) self.config = config self._recording = False self._frames = [] self._stream = None def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status): if status: print("[audio] status: %s" % status) if self._recording: self._frames.append(indata.copy()) def start(self): """Begin recording audio from microphone.""" if self._recording: return self._recording = True self._frames = [] self._stream = sd.InputStream( samplerate=self.config.sample_rate, channels=self.config.channels, dtype=self.config.dtype, blocksize=int(self.config.sample_rate * self.config.chunk_duration), callback=self._audio_callback, ) self._stream.start() print("[recorder] Starter opptak") def stop(self): """Stop recording and return WAV bytes.""" if not self._recording: return None self._recording = False self._stream.stop() self._stream.close() if not self._frames: print("[recorder] Ingen lyd fanget opp") return None audio = np.concatenate(self._frames, axis=0) buf = io.BytesIO() with wave.open(buf, "wb") as wf: wf.setnchannels(self.config.channels) wf.setsampwidth(2) wf.setframerate(self.config.sample_rate) wf.writeframes(audio.tobytes()) buf.seek(0) print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate)) return buf.read() def is_recording(self) -> bool: return self._recording class TTSEngine: """Text-to-Speech using Edge-TTS (Norwegian/others).""" def __init__(self, voice: str = "nb-NO-FinnNeural"): self.voice = voice self._playing = False if not EDGE_AVAILABLE: print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.") async def _synthesize(self, text: str, output_path: str): communicate = edge_tts.Communicate(text, voice=self.voice) await communicate.save(output_path) def speak(self, text: str, blocking: bool = False): """Speak the given text.""" if not text.strip(): return if not EDGE_AVAILABLE or not SF_AVAILABLE: print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % ( EDGE_AVAILABLE, SF_AVAILABLE, text[:60])) return def _play(): try: with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: tmp_path = f.name import asyncio asyncio.run(self._synthesize(text, tmp_path)) data, sr = sf.read(tmp_path) if SD_AVAILABLE: sd.play(data, sr) sd.wait() os.unlink(tmp_path) except Exception as e: print("[tts] Feil: %s" % e) if blocking: _play() else: threading.Thread(target=_play, daemon=True).start()