Spaces:

techfreakworm
/

ACE-Music-Studio

Running on Zero

App Files Files Community

ACE-Music-Studio / post_process.py

techfreakworm

feat(post): add demucs stems, lufs normalisation, mp3 export

b260242 unverified 3 days ago

raw

history blame

2.77 kB

	"""Post-generation: stem separation (Demucs), loudness normalisation
	(pyloudnorm), and MP3 export (ffmpeg)."""

	from __future__ import annotations

	import subprocess
	from pathlib import Path
	from typing import Any

	_DEMUCS = None


	def _get_demucs() -> Any:
	global _DEMUCS
	if _DEMUCS is None:
	from demucs.api import Separator

	_DEMUCS = Separator(model="htdemucs_ft")
	return _DEMUCS


	def separate_stems(audio_path: Path \| str) -> dict[str, Path]:
	"""Split into vocals/drums/bass/other via htdemucs_ft.

	Returns a dict mapping stem name to written file path.
	"""
	sep = _get_demucs()
	result = sep.separate_audio_file(str(audio_path))
	# `result` may be either {name: path} OR (origin, separated) tuple
	# depending on demucs version. Normalise to dict[str, Path].
	if isinstance(result, dict):
	return {name: Path(p) for name, p in result.items()}
	# Newer demucs returns (origin_tensor, separated_dict_of_tensors)
	# We persist tensors next to the input file with stem suffixes.
	import soundfile as sf

	_origin, sep_tensors = result
	base = Path(audio_path).with_suffix("")
	stems: dict[str, Path] = {}
	for name, tensor in sep_tensors.items():
	out = base.with_name(f"{base.name}.{name}.wav")
	data = tensor.detach().cpu().numpy()
	if data.ndim == 2 and data.shape[0] in (1, 2):
	data = data.T
	sf.write(str(out), data, sep.samplerate)
	stems[name] = out
	return stems


	def _pyloudnorm_normalise(in_path: str, out_path: str, target_lufs: float) -> None:
	"""Real pyloudnorm path; isolated for easy mocking in tests."""
	import pyloudnorm as pyln
	import soundfile as sf

	data, rate = sf.read(in_path)
	meter = pyln.Meter(rate)
	current = meter.integrated_loudness(data)
	normalised = pyln.normalize.loudness(data, current, target_lufs)
	sf.write(out_path, normalised, rate)


	def normalise_lufs(audio_path: Path \| str, target_lufs: float = -14.0) -> Path:
	"""Normalise to streaming-spec LUFS. Writes a new file alongside the input."""
	audio_path = Path(audio_path)
	out_path = audio_path.with_name(f"{audio_path.stem}.lufs{int(target_lufs)}.wav")
	_pyloudnorm_normalise(str(audio_path), str(out_path), target_lufs)
	return out_path


	def to_mp3(wav_path: Path \| str, bitrate_kbps: int = 320) -> Path:
	"""Encode WAV to MP3 via system ffmpeg."""
	wav_path = Path(wav_path)
	out_path = wav_path.with_suffix(".mp3")
	cmd = [
	"ffmpeg",
	"-y",
	"-i",
	str(wav_path),
	"-b:a",
	f"{bitrate_kbps}k",
	"-ar",
	"44100",
	str(out_path),
	]
	subprocess.run(cmd, check=True, capture_output=True)
	return out_path