| """ |
| services/vad.py — WebRTC VAD wrapper (unchanged — already correct) |
| Now also used by webrtc_pipeline.py's _VADSegmenter for PCM frame processing. |
| """ |
|
|
| import webrtcvad |
|
|
|
|
| class VADDetector: |
| def __init__(self, sample_rate=16000, frame_ms=30, aggressiveness=2): |
| self.vad = webrtcvad.Vad(aggressiveness) |
| self.sample_rate = sample_rate |
| self.frame_ms = frame_ms |
| self.frame_size = int(sample_rate * frame_ms / 1000) * 2 |
|
|
| def is_valid(self, frame: bytes) -> bool: |
| return len(frame) == self.frame_size |
|
|
| def is_speech(self, frame: bytes) -> bool: |
| if not self.is_valid(frame): |
| return False |
| try: |
| return self.vad.is_speech(frame, self.sample_rate) |
| except Exception: |
| return False |
|
|
|
|
| class VADSegmenter: |
| def __init__(self, vad: VADDetector, silence_limit=8): |
| self.vad = vad |
| self.silence_limit = silence_limit |
| self.buffer = bytearray() |
| self.silence = 0 |
| self.active = False |
|
|
| def add_frame(self, frame: bytes): |
| speech = self.vad.is_speech(frame) |
|
|
| if speech: |
| self.buffer.extend(frame) |
| self.active = True |
| self.silence = 0 |
| elif self.active: |
| self.silence += 1 |
|
|
| if self.active and self.silence > self.silence_limit: |
| audio = bytes(self.buffer) |
| self.buffer.clear() |
| self.silence = 0 |
| self.active = False |
| return audio |
|
|
| return None |
|
|