rvc

Sleeping

ibcplateformes Claude Opus 4.6 commited on Mar 31

Commit

266f7ad

1 Parent(s): db251d2

Optimize training for CPU: limit slices, reduce batch size and epochs

- Limit audio slices to 30 max (was unlimited ~85 for 4.5min audio)
- Reduce default epochs from 20 to 10
- Reduce batch_size from 8 to 4
- Update UI slider: max 30, default 10, with time estimates
- These changes make training feasible on CPU Basic (~20-30 min)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +4 -4
pipeline/training.py +5 -3

app.py CHANGED Viewed

@@ -110,7 +110,7 @@ def train_voice_model(audio_file, model_name, epochs, progress=gr.Progress()):
             model_name=model_name,
             epochs=int(epochs),
             sample_rate=40000,
-            batch_size=8,
             progress_callback=progress_callback,
         )
@@ -293,10 +293,10 @@ with gr.Blocks(
                     )
                     train_epochs = gr.Slider(
                         minimum=5,
-                        maximum=50,
-                        value=20,
                         step=5,
-                        label="Nombre d'époques (plus = meilleure qualité, plus long)",
                     )
                     train_btn = gr.Button(
                         "Lancer l'entraînement",

             model_name=model_name,
             epochs=int(epochs),
             sample_rate=40000,
+            batch_size=4,
             progress_callback=progress_callback,
         )
                     )
                     train_epochs = gr.Slider(
                         minimum=5,
+                        maximum=30,
+                        value=10,
                         step=5,
+                        label="Nombre d'époques (CPU: 10 ≈ 20-30 min, 20 ≈ 45-60 min)",
                     )
                     train_btn = gr.Button(
                         "Lancer l'entraînement",

pipeline/training.py CHANGED Viewed

@@ -95,10 +95,12 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
     segment_len_16k = int(3.5 * 16000)
     hop_16k = int(3.0 * 16000)
     n_slices = 0
     idx = 0
-    while idx < len(audio):
         # Slice at target sample rate
         end = min(idx + segment_len, len(audio))
         segment = audio[idx:end]
@@ -227,8 +229,8 @@ def extract_features(model_name: str, sample_rate: int = 40000, f0_method: str =
 def train_model(
     model_name: str,
     sample_rate: int = 40000,
-    total_epochs: int = 20,
-    batch_size: int = 8,
 ):
     """
     Train RVC v2 model. Runs IN-PROCESS with mp.Process patched to avoid

     segment_len_16k = int(3.5 * 16000)
     hop_16k = int(3.0 * 16000)
+    MAX_SLICES = 30  # Limit for CPU-feasible training
     n_slices = 0
     idx = 0
+    while idx < len(audio) and n_slices < MAX_SLICES:
         # Slice at target sample rate
         end = min(idx + segment_len, len(audio))
         segment = audio[idx:end]
 def train_model(
     model_name: str,
     sample_rate: int = 40000,
+    total_epochs: int = 10,
+    batch_size: int = 4,
 ):
     """
     Train RVC v2 model. Runs IN-PROCESS with mp.Process patched to avoid