File size: 4,617 Bytes
2e7e0eb
 
 
 
 
 
 
 
96b68f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e7e0eb
96b68f8
2e7e0eb
 
 
 
 
 
 
96b68f8
2e7e0eb
 
 
 
 
 
96b68f8
 
 
 
 
 
2e7e0eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96b68f8
2e7e0eb
 
 
 
 
 
 
 
 
 
96b68f8
2e7e0eb
 
 
 
 
 
 
96b68f8
2e7e0eb
 
 
96b68f8
2e7e0eb
 
 
 
 
 
 
 
 
 
 
 
96b68f8
 
2e7e0eb
 
 
 
 
 
96b68f8
2e7e0eb
 
96b68f8
 
 
 
2e7e0eb
 
 
 
 
 
96b68f8
2e7e0eb
 
96b68f8
 
 
2e7e0eb
 
96b68f8
2e7e0eb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Audio I/O: Push-to-talk recording + playback"""
import io
import wave
import threading
import numpy as np
import tempfile
import os

# Graceful imports for Windows compatibility
try:
    import sounddevice as sd
    SD_AVAILABLE = True
except ImportError as e:
    print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
    print("[audio_io]  -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
    print("[audio_io]    eller: pip install sounddevice")
    print("[audio_io]  -> Linux: sudo apt-get install libportaudio2")
    SD_AVAILABLE = False
    sd = None

try:
    import soundfile as sf
    SF_AVAILABLE = True
except ImportError:
    SF_AVAILABLE = False
    sf = None

try:
    import edge_tts
    EDGE_AVAILABLE = True
except ImportError:
    EDGE_AVAILABLE = False
    edge_tts = None

from dataclasses import dataclass
from typing import Optional


@dataclass
class AudioConfig:
    sample_rate: int = 16000
    channels: int = 1
    dtype: str = "int16"
    chunk_duration: float = 0.1


class PushToTalkRecorder:
    """Records audio while a key is held down."""

    def __init__(self, config: AudioConfig = AudioConfig()):
        if not SD_AVAILABLE:
            raise ImportError(
                "sounddevice er ikke installert. "
                "Windows: pip install sounddevice "
                "(krever PortAudio; se README for wheel-link)"
            )
        self.config = config
        self._recording = False
        self._frames = []
        self._stream = None

    def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
        if status:
            print("[audio] status: %s" % status)
        if self._recording:
            self._frames.append(indata.copy())

    def start(self):
        """Begin recording audio from microphone."""
        if self._recording:
            return
        self._recording = True
        self._frames = []
        self._stream = sd.InputStream(
            samplerate=self.config.sample_rate,
            channels=self.config.channels,
            dtype=self.config.dtype,
            blocksize=int(self.config.sample_rate * self.config.chunk_duration),
            callback=self._audio_callback,
        )
        self._stream.start()
        print("[recorder] Starter opptak")

    def stop(self):
        """Stop recording and return WAV bytes."""
        if not self._recording:
            return None
        self._recording = False
        self._stream.stop()
        self._stream.close()

        if not self._frames:
            print("[recorder] Ingen lyd fanget opp")
            return None

        audio = np.concatenate(self._frames, axis=0)

        buf = io.BytesIO()
        with wave.open(buf, "wb") as wf:
            wf.setnchannels(self.config.channels)
            wf.setsampwidth(2)
            wf.setframerate(self.config.sample_rate)
            wf.writeframes(audio.tobytes())
        buf.seek(0)
        print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
        return buf.read()

    def is_recording(self) -> bool:
        return self._recording


class TTSEngine:
    """Text-to-Speech using Edge-TTS (Norwegian/others)."""

    def __init__(self, voice: str = "nb-NO-FinnNeural"):
        self.voice = voice
        self._playing = False
        if not EDGE_AVAILABLE:
            print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")

    async def _synthesize(self, text: str, output_path: str):
        communicate = edge_tts.Communicate(text, voice=self.voice)
        await communicate.save(output_path)

    def speak(self, text: str, blocking: bool = False):
        """Speak the given text."""
        if not text.strip():
            return
        if not EDGE_AVAILABLE or not SF_AVAILABLE:
            print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
                EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
            return

        def _play():
            try:
                with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
                    tmp_path = f.name

                import asyncio
                asyncio.run(self._synthesize(text, tmp_path))
                data, sr = sf.read(tmp_path)
                if SD_AVAILABLE:
                    sd.play(data, sr)
                    sd.wait()
                os.unlink(tmp_path)
            except Exception as e:
                print("[tts] Feil: %s" % e)

        if blocking:
            _play()
        else:
            threading.Thread(target=_play, daemon=True).start()