Spaces:
Sleeping
Sleeping
ibcplateformes Claude Opus 4.6 commited on
Commit ·
266f7ad
1
Parent(s): db251d2
Optimize training for CPU: limit slices, reduce batch size and epochs
Browse files- Limit audio slices to 30 max (was unlimited ~85 for 4.5min audio)
- Reduce default epochs from 20 to 10
- Reduce batch_size from 8 to 4
- Update UI slider: max 30, default 10, with time estimates
- These changes make training feasible on CPU Basic (~20-30 min)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- app.py +4 -4
- pipeline/training.py +5 -3
app.py
CHANGED
|
@@ -110,7 +110,7 @@ def train_voice_model(audio_file, model_name, epochs, progress=gr.Progress()):
|
|
| 110 |
model_name=model_name,
|
| 111 |
epochs=int(epochs),
|
| 112 |
sample_rate=40000,
|
| 113 |
-
batch_size=
|
| 114 |
progress_callback=progress_callback,
|
| 115 |
)
|
| 116 |
|
|
@@ -293,10 +293,10 @@ with gr.Blocks(
|
|
| 293 |
)
|
| 294 |
train_epochs = gr.Slider(
|
| 295 |
minimum=5,
|
| 296 |
-
maximum=
|
| 297 |
-
value=
|
| 298 |
step=5,
|
| 299 |
-
label="Nombre d'époques (
|
| 300 |
)
|
| 301 |
train_btn = gr.Button(
|
| 302 |
"Lancer l'entraînement",
|
|
|
|
| 110 |
model_name=model_name,
|
| 111 |
epochs=int(epochs),
|
| 112 |
sample_rate=40000,
|
| 113 |
+
batch_size=4,
|
| 114 |
progress_callback=progress_callback,
|
| 115 |
)
|
| 116 |
|
|
|
|
| 293 |
)
|
| 294 |
train_epochs = gr.Slider(
|
| 295 |
minimum=5,
|
| 296 |
+
maximum=30,
|
| 297 |
+
value=10,
|
| 298 |
step=5,
|
| 299 |
+
label="Nombre d'époques (CPU: 10 ≈ 20-30 min, 20 ≈ 45-60 min)",
|
| 300 |
)
|
| 301 |
train_btn = gr.Button(
|
| 302 |
"Lancer l'entraînement",
|
pipeline/training.py
CHANGED
|
@@ -95,10 +95,12 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
|
|
| 95 |
segment_len_16k = int(3.5 * 16000)
|
| 96 |
hop_16k = int(3.0 * 16000)
|
| 97 |
|
|
|
|
|
|
|
| 98 |
n_slices = 0
|
| 99 |
idx = 0
|
| 100 |
|
| 101 |
-
while idx < len(audio):
|
| 102 |
# Slice at target sample rate
|
| 103 |
end = min(idx + segment_len, len(audio))
|
| 104 |
segment = audio[idx:end]
|
|
@@ -227,8 +229,8 @@ def extract_features(model_name: str, sample_rate: int = 40000, f0_method: str =
|
|
| 227 |
def train_model(
|
| 228 |
model_name: str,
|
| 229 |
sample_rate: int = 40000,
|
| 230 |
-
total_epochs: int =
|
| 231 |
-
batch_size: int =
|
| 232 |
):
|
| 233 |
"""
|
| 234 |
Train RVC v2 model. Runs IN-PROCESS with mp.Process patched to avoid
|
|
|
|
| 95 |
segment_len_16k = int(3.5 * 16000)
|
| 96 |
hop_16k = int(3.0 * 16000)
|
| 97 |
|
| 98 |
+
MAX_SLICES = 30 # Limit for CPU-feasible training
|
| 99 |
+
|
| 100 |
n_slices = 0
|
| 101 |
idx = 0
|
| 102 |
|
| 103 |
+
while idx < len(audio) and n_slices < MAX_SLICES:
|
| 104 |
# Slice at target sample rate
|
| 105 |
end = min(idx + segment_len, len(audio))
|
| 106 |
segment = audio[idx:end]
|
|
|
|
| 229 |
def train_model(
|
| 230 |
model_name: str,
|
| 231 |
sample_rate: int = 40000,
|
| 232 |
+
total_epochs: int = 10,
|
| 233 |
+
batch_size: int = 4,
|
| 234 |
):
|
| 235 |
"""
|
| 236 |
Train RVC v2 model. Runs IN-PROCESS with mp.Process patched to avoid
|