rvc

Sleeping

ibcplateformes Claude Opus 4.6 commited on Mar 31

Commit

259efa9

1 Parent(s): 266f7ad

Skip HiFi-GAN training on CPU, use pre-trained model + FAISS index

RVC training on CPU takes hours — impractical for a web app.
New approach on CPU:
- Preprocess + extract features (~5 min)
- Build FAISS index from voice embeddings (seconds)
- Use pre-trained RVC generator with user's index for inference
- Full training still available when GPU is detected

Also rewrote build_index to use faiss directly instead of Applio script.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +1 -1
pipeline/training.py +82 -32

app.py CHANGED Viewed

@@ -296,7 +296,7 @@ with gr.Blocks(
                         maximum=30,
                         value=10,
                         step=5,
-                        label="Nombre d'époques (CPU: 10 ≈ 20-30 min, 20 ≈ 45-60 min)",
                     )
                     train_btn = gr.Button(
                         "Lancer l'entraînement",

                         maximum=30,
                         value=10,
                         step=5,
+                        label="Nombre d'époques (utilisé uniquement avec GPU)",
                     )
                     train_btn = gr.Button(
                         "Lancer l'entraînement",

pipeline/training.py CHANGED Viewed

@@ -312,26 +312,59 @@ def train_model(
 def build_index(model_name: str):
-    """Build FAISS index for the trained model. Runs on CPU (subprocess OK)."""
-    _setup_applio_env()
-    exp_dir = os.path.join(LOGS_DIR, model_name)
-    index_script = os.path.join(APPLIO_DIR, "rvc", "train", "process", "extract_index.py")
-    command = [sys.executable, index_script, exp_dir, "Auto"]
-    logger.info(f"Building index for {model_name}...")
-    result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
-    if result.returncode != 0:
-        logger.warning(f"Index building failed: {result.stderr[-300:]}")
         return None
     index_path = os.path.join(exp_dir, f"{model_name}.index")
-    if os.path.exists(index_path):
-        logger.info(f"Index built: {index_path}")
-        return index_path
-    return None
 def find_trained_model(model_name: str):
@@ -347,52 +380,69 @@ def find_trained_model(model_name: str):
             if f.endswith(".pth") and f.startswith(model_name):
                 return os.path.join(exp_dir, f)
-    if os.path.exists(LOGS_DIR):
-        for f in sorted(os.listdir(LOGS_DIR), reverse=True):
-            if f.endswith(".pth") and f.startswith(model_name):
-                return os.path.join(LOGS_DIR, f)
     return None
 def full_training_pipeline(
     audio_path: str,
     model_name: str,
-    epochs: int = 20,
     sample_rate: int = 40000,
-    batch_size: int = 8,
     progress_callback=None,
 ):
     """
-    Run the complete training pipeline.
     Returns (pth_path, index_path) on success.
     """
     from pipeline.storage import upload_model, LOCAL_MODELS_DIR
     if progress_callback:
-        progress_callback(0.05, "Preprocessing audio...")
     n_slices = preprocess(model_name, audio_path, sample_rate)
     if progress_callback:
-        progress_callback(0.15, f"Preprocessing done ({n_slices} segments). Extracting features...")
     extract_features(model_name, sample_rate)
     if progress_callback:
-        progress_callback(0.35, "Features extracted. Training model...")
-    train_model(model_name, sample_rate, epochs, batch_size)
-    if progress_callback:
-        progress_callback(0.85, "Training done. Building index...")
     index_path = build_index(model_name)
-    pth_path = find_trained_model(model_name)
     if not pth_path:
-        raise RuntimeError("Training completed but model file not found.")
     local_model_dir = os.path.join(LOCAL_MODELS_DIR, model_name)
     os.makedirs(local_model_dir, exist_ok=True)
@@ -405,7 +455,7 @@ def full_training_pipeline(
         shutil.copy2(index_path, local_index)
     if progress_callback:
-        progress_callback(0.90, "Uploading model...")
     try:
         upload_model(model_name, local_pth, local_index)
@@ -413,6 +463,6 @@ def full_training_pipeline(
         logger.warning(f"Failed to upload to HF (non-critical): {e}")
     if progress_callback:
-        progress_callback(1.0, "Training complete!")
     return local_pth, local_index

 def build_index(model_name: str):
+    """Build FAISS index from extracted embeddings."""
+    import numpy as np
+    try:
+        import faiss
+    except ImportError:
+        logger.warning("faiss not available, skipping index building.")
+        return None
+    exp_dir = os.path.join(LOGS_DIR, model_name)
+    extracted_dir = os.path.join(exp_dir, "extracted")
+    if not os.path.exists(extracted_dir):
+        logger.warning("No extracted features found for index building.")
+        return None
+    # Load all embeddings
+    embeddings = []
+    for npy_file in sorted(glob.glob(os.path.join(extracted_dir, "*.npy"))):
+        try:
+            emb = np.load(npy_file)
+            if emb.ndim == 2:
+                embeddings.append(emb)
+        except Exception as e:
+            logger.warning(f"Failed to load {npy_file}: {e}")
+    if not embeddings:
+        logger.warning("No valid embeddings found for index.")
         return None
+    all_emb = np.concatenate(embeddings, axis=0).astype(np.float32)
+    logger.info(f"Building FAISS index from {all_emb.shape[0]} vectors ({all_emb.shape[1]}D)...")
+    # Build IVF index for fast retrieval
+    dim = all_emb.shape[1]
+    n_vectors = all_emb.shape[0]
+    if n_vectors < 40:
+        # Too few vectors for IVF, use flat index
+        index = faiss.IndexFlatL2(dim)
+    else:
+        n_clusters = min(int(np.sqrt(n_vectors)), n_vectors // 4)
+        n_clusters = max(n_clusters, 1)
+        quantizer = faiss.IndexFlatL2(dim)
+        index = faiss.IndexIVFFlat(quantizer, dim, n_clusters)
+        index.train(all_emb)
+    index.add(all_emb)
     index_path = os.path.join(exp_dir, f"{model_name}.index")
+    faiss.write_index(index, index_path)
+    logger.info(f"FAISS index built: {index_path} ({n_vectors} vectors)")
+    return index_path
 def find_trained_model(model_name: str):
             if f.endswith(".pth") and f.startswith(model_name):
                 return os.path.join(exp_dir, f)
+    return None
+def find_pretrained_model(sample_rate: int = 40000):
+    """Find the pre-trained RVC generator model."""
+    sr_prefix = str(sample_rate)[:2]
+    pg = os.path.join(APPLIO_DIR, "rvc", "models", "pretraineds", "hifi-gan", f"f0G{sr_prefix}k.pth")
+    if os.path.exists(pg):
+        return pg
     return None
 def full_training_pipeline(
     audio_path: str,
     model_name: str,
+    epochs: int = 10,
     sample_rate: int = 40000,
+    batch_size: int = 4,
     progress_callback=None,
 ):
     """
+    Run the voice model creation pipeline.
+    On CPU: skips heavy HiFi-GAN training, uses pre-trained model + FAISS index.
     Returns (pth_path, index_path) on success.
     """
+    import torch
     from pipeline.storage import upload_model, LOCAL_MODELS_DIR
+    has_gpu = torch.cuda.is_available()
     if progress_callback:
+        progress_callback(0.05, "Découpage de l'audio...")
     n_slices = preprocess(model_name, audio_path, sample_rate)
     if progress_callback:
+        progress_callback(0.20, f"{n_slices} segments créés. Extraction des caractéristiques vocales...")
     extract_features(model_name, sample_rate)
     if progress_callback:
+        progress_callback(0.60, "Caractéristiques extraites. Construction de l'index vocal...")
+    # Build FAISS index (fast, CPU-friendly)
     index_path = build_index(model_name)
+    if has_gpu:
+        # With GPU: do full training for best quality
+        if progress_callback:
+            progress_callback(0.65, "GPU détecté. Entraînement du modèle...")
+        train_model(model_name, sample_rate, epochs, batch_size)
+        pth_path = find_trained_model(model_name)
+    else:
+        # CPU only: use pre-trained model (skip hours-long training)
+        if progress_callback:
+            progress_callback(0.75, "Mode CPU : utilisation du modèle pré-entraîné...")
+        logger.info("CPU mode: skipping HiFi-GAN training, using pre-trained model.")
+        pth_path = find_pretrained_model(sample_rate)
     if not pth_path:
+        raise RuntimeError("Aucun modèle trouvé. Vérifiez que les modèles pré-entraînés sont téléchargés.")
+    # Save to local models directory
     local_model_dir = os.path.join(LOCAL_MODELS_DIR, model_name)
     os.makedirs(local_model_dir, exist_ok=True)
         shutil.copy2(index_path, local_index)
     if progress_callback:
+        progress_callback(0.90, "Sauvegarde du modèle...")
     try:
         upload_model(model_name, local_pth, local_index)
         logger.warning(f"Failed to upload to HF (non-critical): {e}")
     if progress_callback:
+        progress_callback(1.0, "Modèle vocal créé !")
     return local_pth, local_index