File size: 4,617 Bytes
2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb 96b68f8 2e7e0eb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | """Audio I/O: Push-to-talk recording + playback"""
import io
import wave
import threading
import numpy as np
import tempfile
import os
# Graceful imports for Windows compatibility
try:
import sounddevice as sd
SD_AVAILABLE = True
except ImportError as e:
print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
print("[audio_io] -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
print("[audio_io] eller: pip install sounddevice")
print("[audio_io] -> Linux: sudo apt-get install libportaudio2")
SD_AVAILABLE = False
sd = None
try:
import soundfile as sf
SF_AVAILABLE = True
except ImportError:
SF_AVAILABLE = False
sf = None
try:
import edge_tts
EDGE_AVAILABLE = True
except ImportError:
EDGE_AVAILABLE = False
edge_tts = None
from dataclasses import dataclass
from typing import Optional
@dataclass
class AudioConfig:
sample_rate: int = 16000
channels: int = 1
dtype: str = "int16"
chunk_duration: float = 0.1
class PushToTalkRecorder:
"""Records audio while a key is held down."""
def __init__(self, config: AudioConfig = AudioConfig()):
if not SD_AVAILABLE:
raise ImportError(
"sounddevice er ikke installert. "
"Windows: pip install sounddevice "
"(krever PortAudio; se README for wheel-link)"
)
self.config = config
self._recording = False
self._frames = []
self._stream = None
def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
if status:
print("[audio] status: %s" % status)
if self._recording:
self._frames.append(indata.copy())
def start(self):
"""Begin recording audio from microphone."""
if self._recording:
return
self._recording = True
self._frames = []
self._stream = sd.InputStream(
samplerate=self.config.sample_rate,
channels=self.config.channels,
dtype=self.config.dtype,
blocksize=int(self.config.sample_rate * self.config.chunk_duration),
callback=self._audio_callback,
)
self._stream.start()
print("[recorder] Starter opptak")
def stop(self):
"""Stop recording and return WAV bytes."""
if not self._recording:
return None
self._recording = False
self._stream.stop()
self._stream.close()
if not self._frames:
print("[recorder] Ingen lyd fanget opp")
return None
audio = np.concatenate(self._frames, axis=0)
buf = io.BytesIO()
with wave.open(buf, "wb") as wf:
wf.setnchannels(self.config.channels)
wf.setsampwidth(2)
wf.setframerate(self.config.sample_rate)
wf.writeframes(audio.tobytes())
buf.seek(0)
print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
return buf.read()
def is_recording(self) -> bool:
return self._recording
class TTSEngine:
"""Text-to-Speech using Edge-TTS (Norwegian/others)."""
def __init__(self, voice: str = "nb-NO-FinnNeural"):
self.voice = voice
self._playing = False
if not EDGE_AVAILABLE:
print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")
async def _synthesize(self, text: str, output_path: str):
communicate = edge_tts.Communicate(text, voice=self.voice)
await communicate.save(output_path)
def speak(self, text: str, blocking: bool = False):
"""Speak the given text."""
if not text.strip():
return
if not EDGE_AVAILABLE or not SF_AVAILABLE:
print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
return
def _play():
try:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tmp_path = f.name
import asyncio
asyncio.run(self._synthesize(text, tmp_path))
data, sr = sf.read(tmp_path)
if SD_AVAILABLE:
sd.play(data, sr)
sd.wait()
os.unlink(tmp_path)
except Exception as e:
print("[tts] Feil: %s" % e)
if blocking:
_play()
else:
threading.Thread(target=_play, daemon=True).start()
|