File size: 4,617 Bytes

"""Audio I/O: Push-to-talk recording + playback"""
import io
import wave
import threading
import numpy as np
import tempfile
import os

# Graceful imports for Windows compatibility
try:
    import sounddevice as sd
    SD_AVAILABLE = True
except ImportError as e:
    print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
    print("[audio_io]  -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
    print("[audio_io]    eller: pip install sounddevice")
    print("[audio_io]  -> Linux: sudo apt-get install libportaudio2")
    SD_AVAILABLE = False
    sd = None

try:
    import soundfile as sf
    SF_AVAILABLE = True
except ImportError:
    SF_AVAILABLE = False
    sf = None

try:
    import edge_tts
    EDGE_AVAILABLE = True
except ImportError:
    EDGE_AVAILABLE = False
    edge_tts = None

from dataclasses import dataclass
from typing import Optional


@dataclass
class AudioConfig:
    sample_rate: int = 16000
    channels: int = 1
    dtype: str = "int16"
    chunk_duration: float = 0.1


class PushToTalkRecorder:
    """Records audio while a key is held down."""

    def __init__(self, config: AudioConfig = AudioConfig()):
        if not SD_AVAILABLE:
            raise ImportError(
                "sounddevice er ikke installert. "
                "Windows: pip install sounddevice "
                "(krever PortAudio; se README for wheel-link)"
            )
        self.config = config
        self._recording = False
        self._frames = []
        self._stream = None

    def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
        if status:
            print("[audio] status: %s" % status)
        if self._recording:
            self._frames.append(indata.copy())

    def start(self):
        """Begin recording audio from microphone."""
        if self._recording:
            return
        self._recording = True
        self._frames = []
        self._stream = sd.InputStream(
            samplerate=self.config.sample_rate,
            channels=self.config.channels,
            dtype=self.config.dtype,
            blocksize=int(self.config.sample_rate * self.config.chunk_duration),
            callback=self._audio_callback,
        )
        self._stream.start()
        print("[recorder] Starter opptak")

    def stop(self):
        """Stop recording and return WAV bytes."""
        if not self._recording:
            return None
        self._recording = False
        self._stream.stop()
        self._stream.close()

        if not self._frames:
            print("[recorder] Ingen lyd fanget opp")
            return None

        audio = np.concatenate(self._frames, axis=0)

        buf = io.BytesIO()
        with wave.open(buf, "wb") as wf:
            wf.setnchannels(self.config.channels)
            wf.setsampwidth(2)
            wf.setframerate(self.config.sample_rate)
            wf.writeframes(audio.tobytes())
        buf.seek(0)
        print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
        return buf.read()

    def is_recording(self) -> bool:
        return self._recording


class TTSEngine:
    """Text-to-Speech using Edge-TTS (Norwegian/others)."""

    def __init__(self, voice: str = "nb-NO-FinnNeural"):
        self.voice = voice
        self._playing = False
        if not EDGE_AVAILABLE:
            print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")

    async def _synthesize(self, text: str, output_path: str):
        communicate = edge_tts.Communicate(text, voice=self.voice)
        await communicate.save(output_path)

    def speak(self, text: str, blocking: bool = False):
        """Speak the given text."""
        if not text.strip():
            return
        if not EDGE_AVAILABLE or not SF_AVAILABLE:
            print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
                EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
            return

        def _play():
            try:
                with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
                    tmp_path = f.name

                import asyncio
                asyncio.run(self._synthesize(text, tmp_path))
                data, sr = sf.read(tmp_path)
                if SD_AVAILABLE:
                    sd.play(data, sr)
                    sd.wait()
                os.unlink(tmp_path)
            except Exception as e:
                print("[tts] Feil: %s" % e)

        if blocking:
            _play()
        else:
            threading.Thread(target=_play, daemon=True).start()