Rahma89
/

voice-separation-model

@@ -10,6 +10,7 @@ import os
 import argparse
 import torch
 import torchaudio
 from src.model import build_model, load_checkpoint
 import yaml
@@ -33,11 +34,34 @@ def parse_args():
     return p.parse_args()
 def separate(mix_path, model, sample_rate, device, out_dir):
     """Charge un mixture.wav, sépare les sources, sauvegarde les .wav."""
     # ── Charger le fichier audio ─────────────
-    mixture, sr = torchaudio.load(mix_path)
     if sr != sample_rate:
         print(f"  Resample {sr} Hz → {sample_rate} Hz")
@@ -70,7 +94,7 @@ def separate(mix_path, model, sample_rate, device, out_dir):
             src_cpu = src_cpu / max_val * 0.9
         out_path = os.path.join(out_dir, f"{mix_name}_source_{i+1}.wav")
-        torchaudio.save(out_path, src_cpu, sample_rate)
         print(f"  ✓ Source {i+1} sauvegardée : {out_path}")
     return est_sources

 import argparse
 import torch
 import torchaudio
+import soundfile as sf
 from src.model import build_model, load_checkpoint
 import yaml
     return p.parse_args()
+def load_audio(path):
+    """Load audio as a mono/stereo tensor, with a fallback for TorchCodec-free envs."""
+    try:
+        return torchaudio.load(path)
+    except ImportError as exc:
+        if "TorchCodec" not in str(exc) and "torchcodec" not in str(exc):
+            raise
+        audio, sr = sf.read(path, dtype="float32", always_2d=True)
+        waveform = torch.from_numpy(audio).transpose(0, 1)
+        return waveform, sr
+def save_audio(path, waveform, sample_rate):
+    """Save audio with torchaudio when available, otherwise fall back to soundfile."""
+    try:
+        torchaudio.save(path, waveform.cpu(), sample_rate)
+    except ImportError as exc:
+        if "TorchCodec" not in str(exc) and "torchcodec" not in str(exc):
+            raise
+        audio = waveform.detach().cpu().transpose(0, 1).numpy()
+        sf.write(path, audio, sample_rate)
 def separate(mix_path, model, sample_rate, device, out_dir):
     """Charge un mixture.wav, sépare les sources, sauvegarde les .wav."""
     # ── Charger le fichier audio ─────────────
+    mixture, sr = load_audio(mix_path)
     if sr != sample_rate:
         print(f"  Resample {sr} Hz → {sample_rate} Hz")
             src_cpu = src_cpu / max_val * 0.9
         out_path = os.path.join(out_dir, f"{mix_name}_source_{i+1}.wav")
+        save_audio(out_path, src_cpu, sample_rate)
         print(f"  ✓ Source {i+1} sauvegardée : {out_path}")
     return est_sources