clone-vocal-rvc / app.py
ibcplateformes
Add voice similarity control + improve reference audio processing
969158e
"""
Clone Vocal - Outil web de clonage vocal base sur Seed-VC (zero-shot).
Interface Gradio en francais, deploye sur HuggingFace Spaces avec ZeroGPU.
"""
import os
import sys
import logging
import tempfile
import shutil
import gradio as gr
# Monkey-patch gradio_client to fix "argument of type 'bool' is not iterable"
try:
import gradio_client.utils as _gc_utils
_orig_get_type = _gc_utils.get_type
def _patched_get_type(schema, *args, **kwargs):
if not isinstance(schema, dict):
return "Any"
return _orig_get_type(schema, *args, **kwargs)
_gc_utils.get_type = _patched_get_type
_orig_json_schema = _gc_utils._json_schema_to_python_type
def _patched_json_schema(schema, *args, **kwargs):
if not isinstance(schema, dict):
return "Any"
return _orig_json_schema(schema, *args, **kwargs)
_gc_utils._json_schema_to_python_type = _patched_json_schema
_gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
schema, defs
)
except Exception:
pass
# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
# Startup: clone Seed-VC
logger.info("Initialisation de l'application...")
from pipeline.setup import setup_seed_vc
from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path
try:
setup_seed_vc()
except Exception as e:
logger.error("Erreur lors du setup: {}".format(e))
# Initialize model storage
HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
if HF_MODELS_REPO:
init_storage(HF_MODELS_REPO)
logger.info("Stockage HuggingFace configure: {}".format(HF_MODELS_REPO))
# Import GPU-decorated functions for ZeroGPU detection
from pipeline.training import save_voice_reference, _gpu_warmup
from pipeline.separation import separate_audio
from pipeline.inference import convert_voice
# -- Training Tab --
def train_voice_model(audio_file, model_name, progress=gr.Progress()):
"""Handler: save voice reference."""
if audio_file is None:
return "Erreur : Veuillez uploader un fichier audio.", None
if not model_name or not model_name.strip():
return "Erreur : Veuillez entrer un nom pour le modele.", None
model_name = model_name.strip().replace(" ", "_")
def progress_callback(value, desc):
progress(value, desc=desc)
try:
progress(0.0, desc="Demarrage...")
pth_path, ref_path = save_voice_reference(
audio_path=audio_file,
model_name=model_name,
progress_callback=progress_callback,
)
return "Reference vocale '{}' sauvegardee avec succes !".format(model_name), ref_path
except Exception as e:
import traceback
tb = traceback.format_exc()
logger.error("Erreur training: {}".format(tb))
return "Erreur : {}: {}\n\nDetails:\n{}".format(
type(e).__name__, str(e), tb[-500:]
), None
# -- Conversion Tab --
def get_model_choices():
"""Get list of trained model names for dropdown."""
models = list_models()
if not models:
return ["(aucun modele)"]
return models
def convert_song(
model_choice,
song_file,
pitch,
similarity,
diffusion_steps,
vocal_volume,
instrumental_volume,
progress=gr.Progress(),
):
"""Full pipeline: separate + convert + mix."""
if song_file is None:
return "Erreur : Veuillez uploader un fichier audio.", None, None, None
if model_choice == "(aucun modele)" or not model_choice:
return "Erreur : Veuillez d'abord enregistrer une reference vocale.", None, None, None
from pipeline.mixing import mix_audio
try:
# Step 1: Download model / find reference audio
progress(0.05, desc="Chargement du modele...")
pth_path, ref_or_index = download_model(model_choice)
if not pth_path:
return "Erreur : Modele '{}' introuvable.".format(model_choice), None, None, None
# Find the reference audio path
reference_path = get_reference_path(model_choice)
if not reference_path:
return "Erreur : Audio de reference introuvable pour '{}'.".format(model_choice), None, None, None
# Step 2: Separate vocals from instruments
progress(0.10, desc="Separation des pistes (Demucs)...")
vocals_path, instruments_path = separate_audio(song_file)
progress(0.40, desc="Conversion vocale (Seed-VC)...")
# Step 3: Convert vocals with Seed-VC
converted_path = convert_voice(
audio_path=vocals_path,
reference_path=reference_path,
pitch=int(pitch),
diffusion_steps=int(diffusion_steps),
similarity=float(similarity),
)
progress(0.85, desc="Mixage final...")
# Step 4: Mix converted vocals with instruments
final_path = mix_audio(
vocals_path=converted_path,
instruments_path=instruments_path,
vocal_volume=float(vocal_volume),
instrumental_volume=float(instrumental_volume),
)
progress(1.0, desc="Termine !")
return (
"Conversion terminee avec succes !",
vocals_path,
converted_path,
final_path,
)
except Exception as e:
import traceback
tb = traceback.format_exc()
logger.error("Erreur conversion: {}".format(tb))
return "Erreur : {}: {}\n\nDetails:\n{}".format(
type(e).__name__, str(e), tb[-500:]
), None, None, None
# -- Models Tab --
def refresh_models():
"""Refresh the model list as HTML."""
models = list_models()
if not models:
return "<p style='color:gray;'>Aucun modele enregistre</p>"
rows = "".join(
"<tr><td>{}</td><td>Disponible</td></tr>".format(m) for m in models
)
return (
"<table style='width:100%;border-collapse:collapse;'>"
"<tr><th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Nom</th>"
"<th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Statut</th></tr>"
"{}</table>".format(rows)
)
def delete_selected_model(model_name_to_delete):
"""Delete a model."""
if not model_name_to_delete or model_name_to_delete == "(aucun modele)":
return "Veuillez selectionner un modele a supprimer.", refresh_models()
try:
delete_model(model_name_to_delete)
return "Modele '{}' supprime.".format(model_name_to_delete), refresh_models()
except Exception as e:
return "Erreur : {}".format(e), refresh_models()
# -- Build Gradio UI --
DESCRIPTION = """
# Clone Vocal
Outil de clonage vocal **zero-shot** base sur **Seed-VC** (Diffusion Transformer).
**Comment utiliser :**
1. **Onglet "Ma voix"** : Uploadez un court extrait de votre voix (3-30 sec) pour creer votre profil vocal
2. **Onglet "Convertir"** : Uploadez un morceau de musique, l'outil remplace la voix par la votre
3. **Onglet "Modeles"** : Gerez vos profils vocaux
> **Zero-shot** : pas d'entrainement necessaire ! Juste 3-30 secondes de votre voix suffisent.
"""
with gr.Blocks(
title="Clone Vocal",
theme=gr.themes.Soft(),
) as app:
gr.Markdown(DESCRIPTION)
with gr.Tabs():
# Tab 1: Voice Reference
with gr.TabItem("Ma voix"):
gr.Markdown("### Enregistrer votre reference vocale")
with gr.Row():
with gr.Column(scale=2):
train_audio = gr.Audio(
label="Extrait de votre voix (WAV ou MP3, 3-30 secondes)",
type="filepath",
sources=["upload"],
)
train_model_name = gr.Textbox(
label="Nom du profil",
placeholder="ex: ma_voix",
max_lines=1,
)
train_btn = gr.Button(
"Sauvegarder",
variant="primary",
size="lg",
)
with gr.Column(scale=1):
train_status = gr.Textbox(
label="Statut",
interactive=False,
lines=3,
)
train_download = gr.File(
label="Fichier de reference",
interactive=False,
)
gr.Markdown(
"**Conseils :**\n"
"- Utilisez un enregistrement propre (pas de bruit de fond, pas de musique)\n"
"- Parlez ou chantez naturellement pendant 3 a 30 secondes\n"
"- Plus l'extrait est long et varie, meilleur sera le resultat\n"
"- Format WAV ou MP3 accepte"
)
train_btn.click(
fn=train_voice_model,
inputs=[train_audio, train_model_name],
outputs=[train_status, train_download],
)
# Tab 2: Conversion
with gr.TabItem("Convertir un morceau"):
gr.Markdown("### Remplacer la voix d'un morceau par la votre")
with gr.Row():
with gr.Column(scale=2):
convert_model = gr.Dropdown(
choices=get_model_choices(),
label="Profil vocal",
interactive=True,
)
refresh_btn = gr.Button("Rafraichir la liste", size="sm")
convert_audio = gr.Audio(
label="Morceau a convertir (WAV ou MP3)",
type="filepath",
sources=["upload"],
)
with gr.Accordion("Parametres avances", open=False):
convert_pitch = gr.Slider(
minimum=-24,
maximum=24,
value=0,
step=1,
label="Transposition (demi-tons)",
)
convert_similarity = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.7,
step=0.05,
label="Similarite vocale (0.5=naturel, 0.7=equilibre, 0.9=plus fidele)",
)
convert_diffusion = gr.Slider(
minimum=5,
maximum=100,
value=25,
step=5,
label="Qualite (10=rapide, 25=equilibre, 50=haute qualite)",
)
convert_vocal_vol = gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.1,
label="Volume de la voix",
)
convert_inst_vol = gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.1,
label="Volume des instruments",
)
convert_btn = gr.Button(
"Convertir et mixer",
variant="primary",
size="lg",
)
with gr.Column(scale=1):
convert_status = gr.Textbox(
label="Statut",
interactive=False,
lines=3,
)
gr.Markdown("**Apercu des pistes :**")
preview_vocals = gr.Audio(
label="Voix originale (separee)",
interactive=False,
)
preview_converted = gr.Audio(
label="Voix convertie",
interactive=False,
)
gr.Markdown("**Resultat final :**")
final_output = gr.Audio(
label="Morceau final (voix + instruments)",
interactive=False,
)
refresh_btn.click(
fn=lambda: gr.Dropdown(choices=get_model_choices()),
outputs=[convert_model],
)
convert_btn.click(
fn=convert_song,
inputs=[
convert_model,
convert_audio,
convert_pitch,
convert_similarity,
convert_diffusion,
convert_vocal_vol,
convert_inst_vol,
],
outputs=[convert_status, preview_vocals, preview_converted, final_output],
)
# Tab 3: Models
with gr.TabItem("Mes modeles"):
gr.Markdown("### Gerer vos profils vocaux")
models_table = gr.HTML(
value=refresh_models(),
label="Modeles enregistres",
)
with gr.Row():
models_refresh_btn = gr.Button("Rafraichir", size="sm")
models_delete_name = gr.Dropdown(
choices=get_model_choices(),
label="Modele a supprimer",
interactive=True,
)
models_delete_btn = gr.Button("Supprimer", variant="stop", size="sm")
models_delete_status = gr.Textbox(label="Statut", interactive=False)
models_refresh_btn.click(
fn=refresh_models,
outputs=[models_table],
)
models_refresh_btn.click(
fn=lambda: gr.Dropdown(choices=get_model_choices()),
outputs=[models_delete_name],
)
models_delete_btn.click(
fn=delete_selected_model,
inputs=[models_delete_name],
outputs=[models_delete_status, models_table],
)
# Tab 4: Debug (temporary)
with gr.TabItem("Debug GPU"):
gr.Markdown("### Logs GPU Worker (pour diagnostic)")
debug_output = gr.Textbox(
label="Derniers logs GPU",
interactive=False,
lines=20,
)
debug_btn = gr.Button("Lire les logs", size="sm")
def read_debug_log():
log_path = "/home/user/app/debug_gpu.log"
if os.path.exists(log_path):
with open(log_path, "r") as f:
return f.read()
return "Aucun log disponible. Lancez d'abord une conversion."
debug_btn.click(fn=read_debug_log, outputs=[debug_output])
if __name__ == "__main__":
app.launch(server_name="0.0.0.0")