Spaces:
Running
Running
| """ | |
| pipeline/audio.py | |
| Extracts a 16 kHz mono WAV from the source video for AssemblyAI. | |
| """ | |
| from pathlib import Path | |
| from utils import log, run_cmd | |
| def extract_audio(video_path: Path, output_dir: Path) -> Path: | |
| """ | |
| Extract audio from a video into a 16 kHz mono WAV. | |
| Why 16 kHz mono? | |
| - AssemblyAI is optimised for this format | |
| - Smaller file = faster upload | |
| - Speech intelligibility is fully preserved | |
| Returns | |
| ------- | |
| Path to the .wav file | |
| """ | |
| audio_path = output_dir / f"{video_path.stem}_audio.wav" | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-i", str(video_path), | |
| "-vn", # drop video stream | |
| "-ar", "16000", # 16 kHz | |
| "-ac", "1", # mono | |
| "-acodec", "pcm_s16le", # 16-bit PCM | |
| str(audio_path), | |
| ] | |
| run_cmd(cmd, f"Extracting audio → {audio_path.name}") | |
| size_mb = audio_path.stat().st_size / 1_048_576 | |
| log("✅", f"Audio extracted ({size_mb:.1f} MB)") | |
| return audio_path | |