Fix: Graceful audio imports for Windows

96b68f8 verified 10 days ago

4.62 kB

	"""Audio I/O: Push-to-talk recording + playback"""
	import io
	import wave
	import threading
	import numpy as np
	import tempfile
	import os

	# Graceful imports for Windows compatibility
	try:
	import sounddevice as sd
	SD_AVAILABLE = True
	except ImportError as e:
	print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
	print("[audio_io] -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
	print("[audio_io] eller: pip install sounddevice")
	print("[audio_io] -> Linux: sudo apt-get install libportaudio2")
	SD_AVAILABLE = False
	sd = None

	try:
	import soundfile as sf
	SF_AVAILABLE = True
	except ImportError:
	SF_AVAILABLE = False
	sf = None

	try:
	import edge_tts
	EDGE_AVAILABLE = True
	except ImportError:
	EDGE_AVAILABLE = False
	edge_tts = None

	from dataclasses import dataclass
	from typing import Optional


	@dataclass
	class AudioConfig:
	sample_rate: int = 16000
	channels: int = 1
	dtype: str = "int16"
	chunk_duration: float = 0.1


	class PushToTalkRecorder:
	"""Records audio while a key is held down."""

	def __init__(self, config: AudioConfig = AudioConfig()):
	if not SD_AVAILABLE:
	raise ImportError(
	"sounddevice er ikke installert. "
	"Windows: pip install sounddevice "
	"(krever PortAudio; se README for wheel-link)"
	)
	self.config = config
	self._recording = False
	self._frames = []
	self._stream = None

	def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
	if status:
	print("[audio] status: %s" % status)
	if self._recording:
	self._frames.append(indata.copy())

	def start(self):
	"""Begin recording audio from microphone."""
	if self._recording:
	return
	self._recording = True
	self._frames = []
	self._stream = sd.InputStream(
	samplerate=self.config.sample_rate,
	channels=self.config.channels,
	dtype=self.config.dtype,
	blocksize=int(self.config.sample_rate * self.config.chunk_duration),
	callback=self._audio_callback,
	)
	self._stream.start()
	print("[recorder] Starter opptak")

	def stop(self):
	"""Stop recording and return WAV bytes."""
	if not self._recording:
	return None
	self._recording = False
	self._stream.stop()
	self._stream.close()

	if not self._frames:
	print("[recorder] Ingen lyd fanget opp")
	return None

	audio = np.concatenate(self._frames, axis=0)

	buf = io.BytesIO()
	with wave.open(buf, "wb") as wf:
	wf.setnchannels(self.config.channels)
	wf.setsampwidth(2)
	wf.setframerate(self.config.sample_rate)
	wf.writeframes(audio.tobytes())
	buf.seek(0)
	print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
	return buf.read()

	def is_recording(self) -> bool:
	return self._recording


	class TTSEngine:
	"""Text-to-Speech using Edge-TTS (Norwegian/others)."""

	def __init__(self, voice: str = "nb-NO-FinnNeural"):
	self.voice = voice
	self._playing = False
	if not EDGE_AVAILABLE:
	print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")

	async def _synthesize(self, text: str, output_path: str):
	communicate = edge_tts.Communicate(text, voice=self.voice)
	await communicate.save(output_path)

	def speak(self, text: str, blocking: bool = False):
	"""Speak the given text."""
	if not text.strip():
	return
	if not EDGE_AVAILABLE or not SF_AVAILABLE:
	print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
	EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
	return

	def _play():
	try:
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
	tmp_path = f.name

	import asyncio
	asyncio.run(self._synthesize(text, tmp_path))
	data, sr = sf.read(tmp_path)
	if SD_AVAILABLE:
	sd.play(data, sr)
	sd.wait()
	os.unlink(tmp_path)
	except Exception as e:
	print("[tts] Feil: %s" % e)

	if blocking:
	_play()
	else:
	threading.Thread(target=_play, daemon=True).start()