carbonx commited on
Commit
96b68f8
·
verified ·
1 Parent(s): ea9caa4

Fix: Graceful audio imports for Windows

Browse files
Files changed (1) hide show
  1. audio_io.py +50 -16
audio_io.py CHANGED
@@ -3,15 +3,37 @@ import io
3
  import wave
4
  import threading
5
  import numpy as np
6
- import sounddevice as sd
7
- import soundfile as sf
8
- import edge_tts
9
- import asyncio
10
  import tempfile
11
  import os
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  from dataclasses import dataclass
14
- from typing import Callable, Optional
15
 
16
 
17
  @dataclass
@@ -19,18 +41,23 @@ class AudioConfig:
19
  sample_rate: int = 16000
20
  channels: int = 1
21
  dtype: str = "int16"
22
- chunk_duration: float = 0.1 # seconds
23
 
24
 
25
  class PushToTalkRecorder:
26
  """Records audio while a key is held down."""
27
 
28
  def __init__(self, config: AudioConfig = AudioConfig()):
 
 
 
 
 
 
29
  self.config = config
30
  self._recording = False
31
  self._frames = []
32
  self._stream = None
33
- self._thread = None
34
 
35
  def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
36
  if status:
@@ -52,7 +79,7 @@ class PushToTalkRecorder:
52
  callback=self._audio_callback,
53
  )
54
  self._stream.start()
55
- print("[recorder] Started recording")
56
 
57
  def stop(self):
58
  """Stop recording and return WAV bytes."""
@@ -63,20 +90,19 @@ class PushToTalkRecorder:
63
  self._stream.close()
64
 
65
  if not self._frames:
66
- print("[recorder] No audio captured")
67
  return None
68
 
69
  audio = np.concatenate(self._frames, axis=0)
70
 
71
- # Convert to WAV bytes
72
  buf = io.BytesIO()
73
  with wave.open(buf, "wb") as wf:
74
  wf.setnchannels(self.config.channels)
75
- wf.setsampwidth(2) # int16 = 2 bytes
76
  wf.setframerate(self.config.sample_rate)
77
  wf.writeframes(audio.tobytes())
78
  buf.seek(0)
79
- print("[recorder] Stopped. Captured %.1fs" % (len(audio)/self.config.sample_rate))
80
  return buf.read()
81
 
82
  def is_recording(self) -> bool:
@@ -89,28 +115,36 @@ class TTSEngine:
89
  def __init__(self, voice: str = "nb-NO-FinnNeural"):
90
  self.voice = voice
91
  self._playing = False
 
 
92
 
93
  async def _synthesize(self, text: str, output_path: str):
94
  communicate = edge_tts.Communicate(text, voice=self.voice)
95
  await communicate.save(output_path)
96
 
97
  def speak(self, text: str, blocking: bool = False):
98
- """Speak the given text. If blocking=True, wait until done."""
99
  if not text.strip():
100
  return
 
 
 
 
101
 
102
  def _play():
103
  try:
104
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
105
  tmp_path = f.name
106
 
 
107
  asyncio.run(self._synthesize(text, tmp_path))
108
  data, sr = sf.read(tmp_path)
109
- sd.play(data, sr)
110
- sd.wait()
 
111
  os.unlink(tmp_path)
112
  except Exception as e:
113
- print("[tts] Error: %s" % e)
114
 
115
  if blocking:
116
  _play()
 
3
  import wave
4
  import threading
5
  import numpy as np
 
 
 
 
6
  import tempfile
7
  import os
8
 
9
+ # Graceful imports for Windows compatibility
10
+ try:
11
+ import sounddevice as sd
12
+ SD_AVAILABLE = True
13
+ except ImportError as e:
14
+ print("[audio_io] Advarsel: sounddevice ikke tilgjengelig (%s)" % e)
15
+ print("[audio_io] -> Windows: last ned wheel fra https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
16
+ print("[audio_io] eller: pip install sounddevice")
17
+ print("[audio_io] -> Linux: sudo apt-get install libportaudio2")
18
+ SD_AVAILABLE = False
19
+ sd = None
20
+
21
+ try:
22
+ import soundfile as sf
23
+ SF_AVAILABLE = True
24
+ except ImportError:
25
+ SF_AVAILABLE = False
26
+ sf = None
27
+
28
+ try:
29
+ import edge_tts
30
+ EDGE_AVAILABLE = True
31
+ except ImportError:
32
+ EDGE_AVAILABLE = False
33
+ edge_tts = None
34
+
35
  from dataclasses import dataclass
36
+ from typing import Optional
37
 
38
 
39
  @dataclass
 
41
  sample_rate: int = 16000
42
  channels: int = 1
43
  dtype: str = "int16"
44
+ chunk_duration: float = 0.1
45
 
46
 
47
  class PushToTalkRecorder:
48
  """Records audio while a key is held down."""
49
 
50
  def __init__(self, config: AudioConfig = AudioConfig()):
51
+ if not SD_AVAILABLE:
52
+ raise ImportError(
53
+ "sounddevice er ikke installert. "
54
+ "Windows: pip install sounddevice "
55
+ "(krever PortAudio; se README for wheel-link)"
56
+ )
57
  self.config = config
58
  self._recording = False
59
  self._frames = []
60
  self._stream = None
 
61
 
62
  def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status):
63
  if status:
 
79
  callback=self._audio_callback,
80
  )
81
  self._stream.start()
82
+ print("[recorder] Starter opptak")
83
 
84
  def stop(self):
85
  """Stop recording and return WAV bytes."""
 
90
  self._stream.close()
91
 
92
  if not self._frames:
93
+ print("[recorder] Ingen lyd fanget opp")
94
  return None
95
 
96
  audio = np.concatenate(self._frames, axis=0)
97
 
 
98
  buf = io.BytesIO()
99
  with wave.open(buf, "wb") as wf:
100
  wf.setnchannels(self.config.channels)
101
+ wf.setsampwidth(2)
102
  wf.setframerate(self.config.sample_rate)
103
  wf.writeframes(audio.tobytes())
104
  buf.seek(0)
105
+ print("[recorder] Stoppet. Fanget %.1fs" % (len(audio)/self.config.sample_rate))
106
  return buf.read()
107
 
108
  def is_recording(self) -> bool:
 
115
  def __init__(self, voice: str = "nb-NO-FinnNeural"):
116
  self.voice = voice
117
  self._playing = False
118
+ if not EDGE_AVAILABLE:
119
+ print("[tts] Advarsel: edge-tts ikke tilgjengelig. TTS deaktivert.")
120
 
121
  async def _synthesize(self, text: str, output_path: str):
122
  communicate = edge_tts.Communicate(text, voice=self.voice)
123
  await communicate.save(output_path)
124
 
125
  def speak(self, text: str, blocking: bool = False):
126
+ """Speak the given text."""
127
  if not text.strip():
128
  return
129
+ if not EDGE_AVAILABLE or not SF_AVAILABLE:
130
+ print("[tts] (TTS deaktivert: edge-tts=%s soundfile=%s) %s" % (
131
+ EDGE_AVAILABLE, SF_AVAILABLE, text[:60]))
132
+ return
133
 
134
  def _play():
135
  try:
136
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
137
  tmp_path = f.name
138
 
139
+ import asyncio
140
  asyncio.run(self._synthesize(text, tmp_path))
141
  data, sr = sf.read(tmp_path)
142
+ if SD_AVAILABLE:
143
+ sd.play(data, sr)
144
+ sd.wait()
145
  os.unlink(tmp_path)
146
  except Exception as e:
147
+ print("[tts] Feil: %s" % e)
148
 
149
  if blocking:
150
  _play()