""" Step 1-2: Extract audio track from input video. Outputs a 16 kHz mono WAV suitable for Whisper + Chatterbox. """ import subprocess from pathlib import Path def extract_audio(video_path: str, output_path: str = "tmp/audio/source/extracted_audio.wav") -> str: """ Extract audio from video using ffmpeg. Args: video_path: Path to the input video file. output_path: Where to save the extracted audio (WAV). Returns: Absolute path to the extracted audio file. """ Path(output_path).parent.mkdir(parents=True, exist_ok=True) cmd = [ "ffmpeg", "-y", "-i", video_path, "-vn", # no video "-acodec", "pcm_s16le", # PCM 16-bit "-ar", "16000", # 16 kHz (Whisper standard) "-ac", "1", # mono output_path, ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"FFmpeg audio extraction failed:\n{result.stderr}") print(f"[s1] Audio extracted → {output_path}") return output_path def extract_audio_hq(video_path: str, output_path: str = "tmp/audio/source/extracted_audio_hq.wav") -> str: """ Extract high-quality 44.1 kHz stereo audio for source separation (Demucs). Args: video_path: Path to the input video file. output_path: Where to save the HQ audio (WAV). Returns: Absolute path to the extracted HQ audio file. """ Path(output_path).parent.mkdir(parents=True, exist_ok=True) cmd = [ "ffmpeg", "-y", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", # 44.1 kHz for Demucs "-ac", "2", # stereo output_path, ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"FFmpeg HQ audio extraction failed:\n{result.stderr}") print(f"[s1] HQ audio extracted → {output_path}") return output_path