ibcplateformes Claude Opus 4.6 commited on
Commit
a89afd6
·
1 Parent(s): dcf6e3c

Replace Applio preprocess subprocess with custom implementation

Browse files

Applio's preprocess.py was running successfully but producing no output
files (likely argument format mismatch). Replaced with direct librosa-based
preprocessing: load, normalize, slice into 3.5s segments, save at target
SR and 16kHz. Simpler, more reliable, no subprocess dependency.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. pipeline/training.py +65 -79
pipeline/training.py CHANGED
@@ -43,97 +43,83 @@ def _setup_applio_env():
43
 
44
  def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
45
  """
46
- Preprocess audio: slice, normalize, create 16kHz versions.
47
- Runs on CPU (subprocess is fine here, no GPU needed).
48
  """
49
- _setup_applio_env()
 
 
50
 
51
  exp_dir = os.path.join(LOGS_DIR, model_name)
52
- os.makedirs(exp_dir, exist_ok=True)
 
 
 
53
 
54
- dataset_dir = os.path.join(exp_dir, "dataset")
55
- os.makedirs(dataset_dir, exist_ok=True)
56
- shutil.copy2(audio_path, os.path.join(dataset_dir, os.path.basename(audio_path)))
57
 
58
- preprocess_script = os.path.join(APPLIO_DIR, "rvc", "train", "preprocess", "preprocess.py")
 
 
59
 
60
- command = [
61
- sys.executable, preprocess_script,
62
- exp_dir, dataset_dir, str(sample_rate),
63
- "2", "Cut", "False", "True", "0.5", "3.5", "0.3", "none",
64
- ]
65
 
66
- logger.info(f"Running preprocessing for {model_name}...")
67
- logger.info(f"Command: {' '.join(command)}")
68
- logger.info(f"Dataset dir contents: {os.listdir(dataset_dir)}")
 
69
 
70
- result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
 
 
 
 
71
 
72
- logger.info(f"Preprocess returncode: {result.returncode}")
73
- if result.stdout:
74
- logger.info(f"Preprocess stdout: {result.stdout[-1000:]}")
75
- if result.stderr:
76
- logger.info(f"Preprocess stderr: {result.stderr[-1000:]}")
77
 
78
- if result.returncode != 0:
79
- raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
80
-
81
- # Log what was created in exp_dir
82
- logger.info(f"Contents of exp_dir ({exp_dir}):")
83
- for item in os.listdir(exp_dir):
84
- full = os.path.join(exp_dir, item)
85
- if os.path.isdir(full):
86
- contents = os.listdir(full)
87
- logger.info(f" {item}/ ({len(contents)} files): {contents[:5]}")
88
- else:
89
- logger.info(f" {item} ({os.path.getsize(full)} bytes)")
90
-
91
- # Count sliced audio files (Applio may nest them in subdirectories)
92
- def count_wav_files(directory):
93
- """Count .wav files recursively."""
94
- count = 0
95
- if os.path.exists(directory):
96
- for root, dirs, files in os.walk(directory):
97
- for f in files:
98
- if f.endswith(".wav"):
99
- count += 1
100
- return count
101
 
102
- sliced_dir = os.path.join(exp_dir, "sliced_audios")
103
- sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- n_slices = count_wav_files(sliced_dir)
106
- n_slices_16k = count_wav_files(sliced_16k_dir)
107
-
108
- # Debug: show exact directory structure
109
- def dir_tree(path, depth=2):
110
- """Show directory tree for debugging."""
111
- items = []
112
- if os.path.exists(path):
113
- for item in os.listdir(path):
114
- full = os.path.join(path, item)
115
- if os.path.isdir(full) and depth > 0:
116
- sub_items = os.listdir(full)
117
- items.append(f"{item}/({len(sub_items)} items: {sub_items[:3]})")
118
- else:
119
- items.append(item)
120
- return items
121
-
122
- logger.info(f"sliced_audios tree: {dir_tree(sliced_dir)}")
123
- logger.info(f"sliced_audios_16k tree: {dir_tree(sliced_16k_dir)}")
124
- logger.info(f"WAV counts: sliced={n_slices}, 16k={n_slices_16k}")
125
-
126
- if n_slices > 0 or n_slices_16k > 0:
127
- total = max(n_slices, n_slices_16k)
128
- logger.info(f"Preprocessing complete: {total} slices created.")
129
- return total
130
- else:
131
- raise RuntimeError(
132
- f"Preprocessing produced no audio slices. "
133
- f"sliced_audios: {dir_tree(sliced_dir)}. "
134
- f"sliced_audios_16k: {dir_tree(sliced_16k_dir)}. "
135
- f"stdout: {result.stdout[-200:] if result.stdout else 'empty'}"
136
- )
137
 
138
 
139
  @spaces.GPU(duration=120)
 
43
 
44
  def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
45
  """
46
+ Preprocess audio: load, normalize, slice into segments, save at target SR and 16kHz.
47
+ Custom implementation (no Applio subprocess dependency).
48
  """
49
+ import numpy as np
50
+ import librosa
51
+ import soundfile as sf
52
 
53
  exp_dir = os.path.join(LOGS_DIR, model_name)
54
+ sliced_dir = os.path.join(exp_dir, "sliced_audios")
55
+ sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
56
+ os.makedirs(sliced_dir, exist_ok=True)
57
+ os.makedirs(sliced_16k_dir, exist_ok=True)
58
 
59
+ logger.info(f"Preprocessing {audio_path} for model {model_name}...")
 
 
60
 
61
+ # Load audio at target sample rate
62
+ audio, sr = librosa.load(audio_path, sr=sample_rate, mono=True)
63
+ logger.info(f"Loaded audio: {len(audio)} samples, {len(audio)/sr:.1f}s at {sr}Hz")
64
 
65
+ if len(audio) < sr * 1:
66
+ raise RuntimeError("Audio trop court (< 1 seconde).")
 
 
 
67
 
68
+ # Normalize
69
+ peak = np.abs(audio).max()
70
+ if peak > 0:
71
+ audio = audio / peak * 0.95
72
 
73
+ # Also load at 16kHz
74
+ audio_16k, _ = librosa.load(audio_path, sr=16000, mono=True)
75
+ peak_16k = np.abs(audio_16k).max()
76
+ if peak_16k > 0:
77
+ audio_16k = audio_16k / peak_16k * 0.95
78
 
79
+ # Slice into segments of ~3.5 seconds with 0.3s overlap
80
+ segment_len = int(3.5 * sr)
81
+ hop = int(3.0 * sr) # 3.5 - 0.5 overlap
82
+ segment_len_16k = int(3.5 * 16000)
83
+ hop_16k = int(3.0 * 16000)
84
 
85
+ n_slices = 0
86
+ idx = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ while idx < len(audio):
89
+ # Slice at target sample rate
90
+ end = min(idx + segment_len, len(audio))
91
+ segment = audio[idx:end]
92
+
93
+ # Skip very short segments (< 0.5s)
94
+ if len(segment) < int(0.5 * sr):
95
+ idx += hop
96
+ continue
97
+
98
+ # Skip silent segments
99
+ if np.abs(segment).max() < 0.01:
100
+ idx += hop
101
+ continue
102
+
103
+ # Compute corresponding 16k positions
104
+ ratio = 16000 / sr
105
+ idx_16k = int(idx * ratio)
106
+ end_16k = int(end * ratio)
107
+ segment_16k = audio_16k[idx_16k:min(end_16k, len(audio_16k))]
108
+
109
+ # Save slices
110
+ fname = f"{model_name}_{n_slices:04d}.wav"
111
+ sf.write(os.path.join(sliced_dir, fname), segment, sr)
112
+ sf.write(os.path.join(sliced_16k_dir, fname), segment_16k, 16000)
113
+
114
+ n_slices += 1
115
+ idx += hop
116
+
117
+ logger.info(f"Preprocessing complete: {n_slices} slices created.")
118
+
119
+ if n_slices == 0:
120
+ raise RuntimeError("Preprocessing produced no audio slices. L'audio est peut-être silencieux.")
121
 
122
+ return n_slices
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
 
125
  @spaces.GPU(duration=120)