Spaces:

Rafii
/

videovoice

Running on Zero

App Files Files Community

videovoice / steps /s1_extract_audio.py

Rafii

deploy: switch to chatterbox requirements @ 787c1dc

02ad302 about 1 month ago

raw

history blame contribute delete

2.05 kB

	"""
	Step 1-2: Extract audio track from input video.
	Outputs a 16 kHz mono WAV suitable for Whisper + Chatterbox.
	"""
	import subprocess
	from pathlib import Path


	def extract_audio(video_path: str, output_path: str = "tmp/audio/source/extracted_audio.wav") -> str:
	"""
	Extract audio from video using ffmpeg.

	Args:
	video_path: Path to the input video file.
	output_path: Where to save the extracted audio (WAV).

	Returns:
	Absolute path to the extracted audio file.
	"""
	Path(output_path).parent.mkdir(parents=True, exist_ok=True)

	cmd = [
	"ffmpeg", "-y",
	"-i", video_path,
	"-vn", # no video
	"-acodec", "pcm_s16le", # PCM 16-bit
	"-ar", "16000", # 16 kHz (Whisper standard)
	"-ac", "1", # mono
	output_path,
	]

	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	raise RuntimeError(f"FFmpeg audio extraction failed:\n{result.stderr}")

	print(f"[s1] Audio extracted → {output_path}")
	return output_path


	def extract_audio_hq(video_path: str, output_path: str = "tmp/audio/source/extracted_audio_hq.wav") -> str:
	"""
	Extract high-quality 44.1 kHz stereo audio for source separation (Demucs).

	Args:
	video_path: Path to the input video file.
	output_path: Where to save the HQ audio (WAV).

	Returns:
	Absolute path to the extracted HQ audio file.
	"""
	Path(output_path).parent.mkdir(parents=True, exist_ok=True)

	cmd = [
	"ffmpeg", "-y",
	"-i", video_path,
	"-vn",
	"-acodec", "pcm_s16le",
	"-ar", "44100", # 44.1 kHz for Demucs
	"-ac", "2", # stereo
	output_path,
	]

	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	raise RuntimeError(f"FFmpeg HQ audio extraction failed:\n{result.stderr}")

	print(f"[s1] HQ audio extracted → {output_path}")
	return output_path