carbonx commited on
Commit
2e7e0eb
·
verified ·
1 Parent(s): 93a741b

Add audio_io.py

Browse files
Files changed (1) hide show
  1. audio_io.py +118 -0
audio_io.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Audio I/O: Push-to-talk recording + playback"""
2
+ import io
3
+ import wave
4
+ import threading
5
+ import numpy as np
6
+ import sounddevice as sd
7
+ import soundfile as sf
8
+ import edge_tts
9
+ import asyncio
10
+ import tempfile
11
+ import os
12
+
13
+ from dataclasses import dataclass
14
+ from typing import Callable, Optional
15
+
16
+
17
+ @dataclass
18
+ class AudioConfig:
19
+ sample_rate: int = 16000
20
+ channels: int = 1
21
+ dtype: str = "int16"
22
+ chunk_duration: float = 0.1 # seconds
23
+
24
+
25
+ class PushToTalkRecorder:
26
+ """Records audio while a key is held down."""
27
+
28
+ def __init__(self, config: AudioConfig = AudioConfig()):
29
+ self.config = config
30
+ self._recording = False
31
+ self._frames = []
32
+ self._stream = None
33
+ self._thread = None
34
+
35
+ def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
36
+ if status:
37
+ print("[audio] status: %s" % status)
38
+ if self._recording:
39
+ self._frames.append(indata.copy())
40
+
41
+ def start(self):
42
+ """Begin recording audio from microphone."""
43
+ if self._recording:
44
+ return
45
+ self._recording = True
46
+ self._frames = []
47
+ self._stream = sd.InputStream(
48
+ samplerate=self.config.sample_rate,
49
+ channels=self.config.channels,
50
+ dtype=self.config.dtype,
51
+ blocksize=int(self.config.sample_rate * self.config.chunk_duration),
52
+ callback=self._audio_callback,
53
+ )
54
+ self._stream.start()
55
+ print("[recorder] Started recording")
56
+
57
+ def stop(self):
58
+ """Stop recording and return WAV bytes."""
59
+ if not self._recording:
60
+ return None
61
+ self._recording = False
62
+ self._stream.stop()
63
+ self._stream.close()
64
+
65
+ if not self._frames:
66
+ print("[recorder] No audio captured")
67
+ return None
68
+
69
+ audio = np.concatenate(self._frames, axis=0)
70
+
71
+ # Convert to WAV bytes
72
+ buf = io.BytesIO()
73
+ with wave.open(buf, "wb") as wf:
74
+ wf.setnchannels(self.config.channels)
75
+ wf.setsampwidth(2) # int16 = 2 bytes
76
+ wf.setframerate(self.config.sample_rate)
77
+ wf.writeframes(audio.tobytes())
78
+ buf.seek(0)
79
+ print("[recorder] Stopped. Captured %.1fs" % (len(audio)/self.config.sample_rate))
80
+ return buf.read()
81
+
82
+ def is_recording(self) -> bool:
83
+ return self._recording
84
+
85
+
86
+ class TTSEngine:
87
+ """Text-to-Speech using Edge-TTS (Norwegian/others)."""
88
+
89
+ def __init__(self, voice: str = "nb-NO-FinnNeural"):
90
+ self.voice = voice
91
+ self._playing = False
92
+
93
+ async def _synthesize(self, text: str, output_path: str):
94
+ communicate = edge_tts.Communicate(text, voice=self.voice)
95
+ await communicate.save(output_path)
96
+
97
+ def speak(self, text: str, blocking: bool = False):
98
+ """Speak the given text. If blocking=True, wait until done."""
99
+ if not text.strip():
100
+ return
101
+
102
+ def _play():
103
+ try:
104
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
105
+ tmp_path = f.name
106
+
107
+ asyncio.run(self._synthesize(text, tmp_path))
108
+ data, sr = sf.read(tmp_path)
109
+ sd.play(data, sr)
110
+ sd.wait()
111
+ os.unlink(tmp_path)
112
+ except Exception as e:
113
+ print("[tts] Error: %s" % e)
114
+
115
+ if blocking:
116
+ _play()
117
+ else:
118
+ threading.Thread(target=_play, daemon=True).start()