rakib72642's picture
updated voice module
f84481c
"""
services/vad.py — WebRTC VAD wrapper (unchanged — already correct)
Now also used by webrtc_pipeline.py's _VADSegmenter for PCM frame processing.
"""
import webrtcvad
class VADDetector:
def __init__(self, sample_rate=16000, frame_ms=30, aggressiveness=2):
self.vad = webrtcvad.Vad(aggressiveness)
self.sample_rate = sample_rate
self.frame_ms = frame_ms
self.frame_size = int(sample_rate * frame_ms / 1000) * 2
def is_valid(self, frame: bytes) -> bool:
return len(frame) == self.frame_size
def is_speech(self, frame: bytes) -> bool:
if not self.is_valid(frame):
return False
try:
return self.vad.is_speech(frame, self.sample_rate)
except Exception:
return False
class VADSegmenter:
def __init__(self, vad: VADDetector, silence_limit=8):
self.vad = vad
self.silence_limit = silence_limit
self.buffer = bytearray()
self.silence = 0
self.active = False
def add_frame(self, frame: bytes):
speech = self.vad.is_speech(frame)
if speech:
self.buffer.extend(frame)
self.active = True
self.silence = 0
elif self.active:
self.silence += 1
if self.active and self.silence > self.silence_limit:
audio = bytes(self.buffer)
self.buffer.clear()
self.silence = 0
self.active = False
return audio
return None