rvc

Sleeping

ibcplateformes Claude Opus 4.6 commited on Mar 30

Commit

a89afd6

1 Parent(s): dcf6e3c

Replace Applio preprocess subprocess with custom implementation

Applio's preprocess.py was running successfully but producing no output
files (likely argument format mismatch). Replaced with direct librosa-based
preprocessing: load, normalize, slice into 3.5s segments, save at target
SR and 16kHz. Simpler, more reliable, no subprocess dependency.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

pipeline/training.py +65 -79

pipeline/training.py CHANGED Viewed

@@ -43,97 +43,83 @@ def _setup_applio_env():
 def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
     """
-    Preprocess audio: slice, normalize, create 16kHz versions.
-    Runs on CPU (subprocess is fine here, no GPU needed).
     """
-    _setup_applio_env()
     exp_dir = os.path.join(LOGS_DIR, model_name)
-    os.makedirs(exp_dir, exist_ok=True)
-    dataset_dir = os.path.join(exp_dir, "dataset")
-    os.makedirs(dataset_dir, exist_ok=True)
-    shutil.copy2(audio_path, os.path.join(dataset_dir, os.path.basename(audio_path)))
-    preprocess_script = os.path.join(APPLIO_DIR, "rvc", "train", "preprocess", "preprocess.py")
-    command = [
-        sys.executable, preprocess_script,
-        exp_dir, dataset_dir, str(sample_rate),
-        "2", "Cut", "False", "True", "0.5", "3.5", "0.3", "none",
-    ]
-    logger.info(f"Running preprocessing for {model_name}...")
-    logger.info(f"Command: {' '.join(command)}")
-    logger.info(f"Dataset dir contents: {os.listdir(dataset_dir)}")
-    result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
-    logger.info(f"Preprocess returncode: {result.returncode}")
-    if result.stdout:
-        logger.info(f"Preprocess stdout: {result.stdout[-1000:]}")
-    if result.stderr:
-        logger.info(f"Preprocess stderr: {result.stderr[-1000:]}")
-    if result.returncode != 0:
-        raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
-    # Log what was created in exp_dir
-    logger.info(f"Contents of exp_dir ({exp_dir}):")
-    for item in os.listdir(exp_dir):
-        full = os.path.join(exp_dir, item)
-        if os.path.isdir(full):
-            contents = os.listdir(full)
-            logger.info(f"  {item}/ ({len(contents)} files): {contents[:5]}")
-        else:
-            logger.info(f"  {item} ({os.path.getsize(full)} bytes)")
-    # Count sliced audio files (Applio may nest them in subdirectories)
-    def count_wav_files(directory):
-        """Count .wav files recursively."""
-        count = 0
-        if os.path.exists(directory):
-            for root, dirs, files in os.walk(directory):
-                for f in files:
-                    if f.endswith(".wav"):
-                        count += 1
-        return count
-    sliced_dir = os.path.join(exp_dir, "sliced_audios")
-    sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
-    n_slices = count_wav_files(sliced_dir)
-    n_slices_16k = count_wav_files(sliced_16k_dir)
-    # Debug: show exact directory structure
-    def dir_tree(path, depth=2):
-        """Show directory tree for debugging."""
-        items = []
-        if os.path.exists(path):
-            for item in os.listdir(path):
-                full = os.path.join(path, item)
-                if os.path.isdir(full) and depth > 0:
-                    sub_items = os.listdir(full)
-                    items.append(f"{item}/({len(sub_items)} items: {sub_items[:3]})")
-                else:
-                    items.append(item)
-        return items
-    logger.info(f"sliced_audios tree: {dir_tree(sliced_dir)}")
-    logger.info(f"sliced_audios_16k tree: {dir_tree(sliced_16k_dir)}")
-    logger.info(f"WAV counts: sliced={n_slices}, 16k={n_slices_16k}")
-    if n_slices > 0 or n_slices_16k > 0:
-        total = max(n_slices, n_slices_16k)
-        logger.info(f"Preprocessing complete: {total} slices created.")
-        return total
-    else:
-        raise RuntimeError(
-            f"Preprocessing produced no audio slices. "
-            f"sliced_audios: {dir_tree(sliced_dir)}. "
-            f"sliced_audios_16k: {dir_tree(sliced_16k_dir)}. "
-            f"stdout: {result.stdout[-200:] if result.stdout else 'empty'}"
-        )
 @spaces.GPU(duration=120)

 def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
     """
+    Preprocess audio: load, normalize, slice into segments, save at target SR and 16kHz.
+    Custom implementation (no Applio subprocess dependency).
     """
+    import numpy as np
+    import librosa
+    import soundfile as sf
     exp_dir = os.path.join(LOGS_DIR, model_name)
+    sliced_dir = os.path.join(exp_dir, "sliced_audios")
+    sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
+    os.makedirs(sliced_dir, exist_ok=True)
+    os.makedirs(sliced_16k_dir, exist_ok=True)
+    logger.info(f"Preprocessing {audio_path} for model {model_name}...")
+    # Load audio at target sample rate
+    audio, sr = librosa.load(audio_path, sr=sample_rate, mono=True)
+    logger.info(f"Loaded audio: {len(audio)} samples, {len(audio)/sr:.1f}s at {sr}Hz")
+    if len(audio) < sr * 1:
+        raise RuntimeError("Audio trop court (< 1 seconde).")
+    # Normalize
+    peak = np.abs(audio).max()
+    if peak > 0:
+        audio = audio / peak * 0.95
+    # Also load at 16kHz
+    audio_16k, _ = librosa.load(audio_path, sr=16000, mono=True)
+    peak_16k = np.abs(audio_16k).max()
+    if peak_16k > 0:
+        audio_16k = audio_16k / peak_16k * 0.95
+    # Slice into segments of ~3.5 seconds with 0.3s overlap
+    segment_len = int(3.5 * sr)
+    hop = int(3.0 * sr)  # 3.5 - 0.5 overlap
+    segment_len_16k = int(3.5 * 16000)
+    hop_16k = int(3.0 * 16000)
+    n_slices = 0
+    idx = 0
+    while idx < len(audio):
+        # Slice at target sample rate
+        end = min(idx + segment_len, len(audio))
+        segment = audio[idx:end]
+        # Skip very short segments (< 0.5s)
+        if len(segment) < int(0.5 * sr):
+            idx += hop
+            continue
+        # Skip silent segments
+        if np.abs(segment).max() < 0.01:
+            idx += hop
+            continue
+        # Compute corresponding 16k positions
+        ratio = 16000 / sr
+        idx_16k = int(idx * ratio)
+        end_16k = int(end * ratio)
+        segment_16k = audio_16k[idx_16k:min(end_16k, len(audio_16k))]
+        # Save slices
+        fname = f"{model_name}_{n_slices:04d}.wav"
+        sf.write(os.path.join(sliced_dir, fname), segment, sr)
+        sf.write(os.path.join(sliced_16k_dir, fname), segment_16k, 16000)
+        n_slices += 1
+        idx += hop
+    logger.info(f"Preprocessing complete: {n_slices} slices created.")
+    if n_slices == 0:
+        raise RuntimeError("Preprocessing produced no audio slices. L'audio est peut-être silencieux.")
+    return n_slices
 @spaces.GPU(duration=120)