Spaces:
Running on Zero
Running on Zero
File size: 2,765 Bytes
b260242 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | """Post-generation: stem separation (Demucs), loudness normalisation
(pyloudnorm), and MP3 export (ffmpeg)."""
from __future__ import annotations
import subprocess
from pathlib import Path
from typing import Any
_DEMUCS = None
def _get_demucs() -> Any:
global _DEMUCS
if _DEMUCS is None:
from demucs.api import Separator
_DEMUCS = Separator(model="htdemucs_ft")
return _DEMUCS
def separate_stems(audio_path: Path | str) -> dict[str, Path]:
"""Split into vocals/drums/bass/other via htdemucs_ft.
Returns a dict mapping stem name to written file path.
"""
sep = _get_demucs()
result = sep.separate_audio_file(str(audio_path))
# `result` may be either {name: path} OR (origin, separated) tuple
# depending on demucs version. Normalise to dict[str, Path].
if isinstance(result, dict):
return {name: Path(p) for name, p in result.items()}
# Newer demucs returns (origin_tensor, separated_dict_of_tensors)
# We persist tensors next to the input file with stem suffixes.
import soundfile as sf
_origin, sep_tensors = result
base = Path(audio_path).with_suffix("")
stems: dict[str, Path] = {}
for name, tensor in sep_tensors.items():
out = base.with_name(f"{base.name}.{name}.wav")
data = tensor.detach().cpu().numpy()
if data.ndim == 2 and data.shape[0] in (1, 2):
data = data.T
sf.write(str(out), data, sep.samplerate)
stems[name] = out
return stems
def _pyloudnorm_normalise(in_path: str, out_path: str, target_lufs: float) -> None:
"""Real pyloudnorm path; isolated for easy mocking in tests."""
import pyloudnorm as pyln
import soundfile as sf
data, rate = sf.read(in_path)
meter = pyln.Meter(rate)
current = meter.integrated_loudness(data)
normalised = pyln.normalize.loudness(data, current, target_lufs)
sf.write(out_path, normalised, rate)
def normalise_lufs(audio_path: Path | str, target_lufs: float = -14.0) -> Path:
"""Normalise to streaming-spec LUFS. Writes a new file alongside the input."""
audio_path = Path(audio_path)
out_path = audio_path.with_name(f"{audio_path.stem}.lufs{int(target_lufs)}.wav")
_pyloudnorm_normalise(str(audio_path), str(out_path), target_lufs)
return out_path
def to_mp3(wav_path: Path | str, bitrate_kbps: int = 320) -> Path:
"""Encode WAV to MP3 via system ffmpeg."""
wav_path = Path(wav_path)
out_path = wav_path.with_suffix(".mp3")
cmd = [
"ffmpeg",
"-y",
"-i",
str(wav_path),
"-b:a",
f"{bitrate_kbps}k",
"-ar",
"44100",
str(out_path),
]
subprocess.run(cmd, check=True, capture_output=True)
return out_path
|