techfreakworm commited on
Commit
b260242
·
unverified ·
1 Parent(s): fdfc10d

feat(post): add demucs stems, lufs normalisation, mp3 export

Browse files
Files changed (2) hide show
  1. post_process.py +86 -0
  2. tests/test_post_process.py +61 -0
post_process.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Post-generation: stem separation (Demucs), loudness normalisation
2
+ (pyloudnorm), and MP3 export (ffmpeg)."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import subprocess
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ _DEMUCS = None
11
+
12
+
13
+ def _get_demucs() -> Any:
14
+ global _DEMUCS
15
+ if _DEMUCS is None:
16
+ from demucs.api import Separator
17
+
18
+ _DEMUCS = Separator(model="htdemucs_ft")
19
+ return _DEMUCS
20
+
21
+
22
+ def separate_stems(audio_path: Path | str) -> dict[str, Path]:
23
+ """Split into vocals/drums/bass/other via htdemucs_ft.
24
+
25
+ Returns a dict mapping stem name to written file path.
26
+ """
27
+ sep = _get_demucs()
28
+ result = sep.separate_audio_file(str(audio_path))
29
+ # `result` may be either {name: path} OR (origin, separated) tuple
30
+ # depending on demucs version. Normalise to dict[str, Path].
31
+ if isinstance(result, dict):
32
+ return {name: Path(p) for name, p in result.items()}
33
+ # Newer demucs returns (origin_tensor, separated_dict_of_tensors)
34
+ # We persist tensors next to the input file with stem suffixes.
35
+ import soundfile as sf
36
+
37
+ _origin, sep_tensors = result
38
+ base = Path(audio_path).with_suffix("")
39
+ stems: dict[str, Path] = {}
40
+ for name, tensor in sep_tensors.items():
41
+ out = base.with_name(f"{base.name}.{name}.wav")
42
+ data = tensor.detach().cpu().numpy()
43
+ if data.ndim == 2 and data.shape[0] in (1, 2):
44
+ data = data.T
45
+ sf.write(str(out), data, sep.samplerate)
46
+ stems[name] = out
47
+ return stems
48
+
49
+
50
+ def _pyloudnorm_normalise(in_path: str, out_path: str, target_lufs: float) -> None:
51
+ """Real pyloudnorm path; isolated for easy mocking in tests."""
52
+ import pyloudnorm as pyln
53
+ import soundfile as sf
54
+
55
+ data, rate = sf.read(in_path)
56
+ meter = pyln.Meter(rate)
57
+ current = meter.integrated_loudness(data)
58
+ normalised = pyln.normalize.loudness(data, current, target_lufs)
59
+ sf.write(out_path, normalised, rate)
60
+
61
+
62
+ def normalise_lufs(audio_path: Path | str, target_lufs: float = -14.0) -> Path:
63
+ """Normalise to streaming-spec LUFS. Writes a new file alongside the input."""
64
+ audio_path = Path(audio_path)
65
+ out_path = audio_path.with_name(f"{audio_path.stem}.lufs{int(target_lufs)}.wav")
66
+ _pyloudnorm_normalise(str(audio_path), str(out_path), target_lufs)
67
+ return out_path
68
+
69
+
70
+ def to_mp3(wav_path: Path | str, bitrate_kbps: int = 320) -> Path:
71
+ """Encode WAV to MP3 via system ffmpeg."""
72
+ wav_path = Path(wav_path)
73
+ out_path = wav_path.with_suffix(".mp3")
74
+ cmd = [
75
+ "ffmpeg",
76
+ "-y",
77
+ "-i",
78
+ str(wav_path),
79
+ "-b:a",
80
+ f"{bitrate_kbps}k",
81
+ "-ar",
82
+ "44100",
83
+ str(out_path),
84
+ ]
85
+ subprocess.run(cmd, check=True, capture_output=True)
86
+ return out_path
tests/test_post_process.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """L2 tests for post-processing — Demucs and ffmpeg are mocked."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from unittest.mock import MagicMock
7
+
8
+ import post_process as pp
9
+
10
+
11
+ def test_separate_stems_returns_four_paths(tmp_path, monkeypatch):
12
+ src = tmp_path / "song.wav"
13
+ src.write_bytes(b"RIFF" + b"\0" * 100)
14
+
15
+ fake_sep = MagicMock()
16
+ fake_sep.separate_audio_file.return_value = {
17
+ "vocals": tmp_path / "vocals.wav",
18
+ "drums": tmp_path / "drums.wav",
19
+ "bass": tmp_path / "bass.wav",
20
+ "other": tmp_path / "other.wav",
21
+ }
22
+ for k in ("vocals", "drums", "bass", "other"):
23
+ (tmp_path / f"{k}.wav").write_bytes(b"RIFF" + b"\0" * 100)
24
+ monkeypatch.setattr(pp, "_get_demucs", lambda: fake_sep)
25
+
26
+ stems = pp.separate_stems(src)
27
+ assert set(stems.keys()) == {"vocals", "drums", "bass", "other"}
28
+ for p in stems.values():
29
+ assert Path(p).exists()
30
+
31
+
32
+ def test_normalise_lufs_invokes_pyloudnorm(monkeypatch, tmp_path):
33
+ src = tmp_path / "in.wav"
34
+ src.write_bytes(b"RIFF" + b"\0" * 100)
35
+ captured = {}
36
+
37
+ def fake_norm(in_path, out_path, target_lufs):
38
+ captured.update({"in": in_path, "out": out_path, "target": target_lufs})
39
+ Path(out_path).write_bytes(b"RIFF")
40
+
41
+ monkeypatch.setattr(pp, "_pyloudnorm_normalise", fake_norm)
42
+
43
+ out = pp.normalise_lufs(src, target_lufs=-14.0)
44
+ assert captured["target"] == -14.0
45
+ assert Path(out).exists()
46
+
47
+
48
+ def test_to_mp3_invokes_ffmpeg(monkeypatch, tmp_path):
49
+ src = tmp_path / "in.wav"
50
+ src.write_bytes(b"RIFF")
51
+ captured = {}
52
+
53
+ def fake_run(cmd, **kw):
54
+ captured["cmd"] = cmd
55
+ Path(cmd[-1]).write_bytes(b"ID3")
56
+ return MagicMock(returncode=0)
57
+
58
+ monkeypatch.setattr(pp.subprocess, "run", fake_run)
59
+ out = pp.to_mp3(src, bitrate_kbps=320)
60
+ assert Path(out).exists()
61
+ assert "320k" in " ".join(captured["cmd"])