""" Clone Vocal - Outil web de clonage vocal base sur Seed-VC (zero-shot). Interface Gradio en francais, deploye sur HuggingFace Spaces avec ZeroGPU. """ import os import sys import logging import tempfile import shutil import gradio as gr # Monkey-patch gradio_client to fix "argument of type 'bool' is not iterable" try: import gradio_client.utils as _gc_utils _orig_get_type = _gc_utils.get_type def _patched_get_type(schema, *args, **kwargs): if not isinstance(schema, dict): return "Any" return _orig_get_type(schema, *args, **kwargs) _gc_utils.get_type = _patched_get_type _orig_json_schema = _gc_utils._json_schema_to_python_type def _patched_json_schema(schema, *args, **kwargs): if not isinstance(schema, dict): return "Any" return _orig_json_schema(schema, *args, **kwargs) _gc_utils._json_schema_to_python_type = _patched_json_schema _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema( schema, defs ) except Exception: pass # Setup logging logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger(__name__) # Startup: clone Seed-VC logger.info("Initialisation de l'application...") from pipeline.setup import setup_seed_vc from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path try: setup_seed_vc() except Exception as e: logger.error("Erreur lors du setup: {}".format(e)) # Initialize model storage HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "") if HF_MODELS_REPO: init_storage(HF_MODELS_REPO) logger.info("Stockage HuggingFace configure: {}".format(HF_MODELS_REPO)) # Import GPU-decorated functions for ZeroGPU detection from pipeline.training import save_voice_reference, _gpu_warmup from pipeline.separation import separate_audio from pipeline.inference import convert_voice # -- Training Tab -- def train_voice_model(audio_file, model_name, progress=gr.Progress()): """Handler: save voice reference.""" if audio_file is None: return "Erreur : Veuillez uploader un fichier audio.", None if not model_name or not model_name.strip(): return "Erreur : Veuillez entrer un nom pour le modele.", None model_name = model_name.strip().replace(" ", "_") def progress_callback(value, desc): progress(value, desc=desc) try: progress(0.0, desc="Demarrage...") pth_path, ref_path = save_voice_reference( audio_path=audio_file, model_name=model_name, progress_callback=progress_callback, ) return "Reference vocale '{}' sauvegardee avec succes !".format(model_name), ref_path except Exception as e: import traceback tb = traceback.format_exc() logger.error("Erreur training: {}".format(tb)) return "Erreur : {}: {}\n\nDetails:\n{}".format( type(e).__name__, str(e), tb[-500:] ), None # -- Conversion Tab -- def get_model_choices(): """Get list of trained model names for dropdown.""" models = list_models() if not models: return ["(aucun modele)"] return models def convert_song( model_choice, song_file, pitch, similarity, diffusion_steps, vocal_volume, instrumental_volume, progress=gr.Progress(), ): """Full pipeline: separate + convert + mix.""" if song_file is None: return "Erreur : Veuillez uploader un fichier audio.", None, None, None if model_choice == "(aucun modele)" or not model_choice: return "Erreur : Veuillez d'abord enregistrer une reference vocale.", None, None, None from pipeline.mixing import mix_audio try: # Step 1: Download model / find reference audio progress(0.05, desc="Chargement du modele...") pth_path, ref_or_index = download_model(model_choice) if not pth_path: return "Erreur : Modele '{}' introuvable.".format(model_choice), None, None, None # Find the reference audio path reference_path = get_reference_path(model_choice) if not reference_path: return "Erreur : Audio de reference introuvable pour '{}'.".format(model_choice), None, None, None # Step 2: Separate vocals from instruments progress(0.10, desc="Separation des pistes (Demucs)...") vocals_path, instruments_path = separate_audio(song_file) progress(0.40, desc="Conversion vocale (Seed-VC)...") # Step 3: Convert vocals with Seed-VC converted_path = convert_voice( audio_path=vocals_path, reference_path=reference_path, pitch=int(pitch), diffusion_steps=int(diffusion_steps), similarity=float(similarity), ) progress(0.85, desc="Mixage final...") # Step 4: Mix converted vocals with instruments final_path = mix_audio( vocals_path=converted_path, instruments_path=instruments_path, vocal_volume=float(vocal_volume), instrumental_volume=float(instrumental_volume), ) progress(1.0, desc="Termine !") return ( "Conversion terminee avec succes !", vocals_path, converted_path, final_path, ) except Exception as e: import traceback tb = traceback.format_exc() logger.error("Erreur conversion: {}".format(tb)) return "Erreur : {}: {}\n\nDetails:\n{}".format( type(e).__name__, str(e), tb[-500:] ), None, None, None # -- Models Tab -- def refresh_models(): """Refresh the model list as HTML.""" models = list_models() if not models: return "

Aucun modele enregistre

" rows = "".join( "{}Disponible".format(m) for m in models ) return ( "" "" "" "{}
NomStatut
".format(rows) ) def delete_selected_model(model_name_to_delete): """Delete a model.""" if not model_name_to_delete or model_name_to_delete == "(aucun modele)": return "Veuillez selectionner un modele a supprimer.", refresh_models() try: delete_model(model_name_to_delete) return "Modele '{}' supprime.".format(model_name_to_delete), refresh_models() except Exception as e: return "Erreur : {}".format(e), refresh_models() # -- Build Gradio UI -- DESCRIPTION = """ # Clone Vocal Outil de clonage vocal **zero-shot** base sur **Seed-VC** (Diffusion Transformer). **Comment utiliser :** 1. **Onglet "Ma voix"** : Uploadez un court extrait de votre voix (3-30 sec) pour creer votre profil vocal 2. **Onglet "Convertir"** : Uploadez un morceau de musique, l'outil remplace la voix par la votre 3. **Onglet "Modeles"** : Gerez vos profils vocaux > **Zero-shot** : pas d'entrainement necessaire ! Juste 3-30 secondes de votre voix suffisent. """ with gr.Blocks( title="Clone Vocal", theme=gr.themes.Soft(), ) as app: gr.Markdown(DESCRIPTION) with gr.Tabs(): # Tab 1: Voice Reference with gr.TabItem("Ma voix"): gr.Markdown("### Enregistrer votre reference vocale") with gr.Row(): with gr.Column(scale=2): train_audio = gr.Audio( label="Extrait de votre voix (WAV ou MP3, 3-30 secondes)", type="filepath", sources=["upload"], ) train_model_name = gr.Textbox( label="Nom du profil", placeholder="ex: ma_voix", max_lines=1, ) train_btn = gr.Button( "Sauvegarder", variant="primary", size="lg", ) with gr.Column(scale=1): train_status = gr.Textbox( label="Statut", interactive=False, lines=3, ) train_download = gr.File( label="Fichier de reference", interactive=False, ) gr.Markdown( "**Conseils :**\n" "- Utilisez un enregistrement propre (pas de bruit de fond, pas de musique)\n" "- Parlez ou chantez naturellement pendant 3 a 30 secondes\n" "- Plus l'extrait est long et varie, meilleur sera le resultat\n" "- Format WAV ou MP3 accepte" ) train_btn.click( fn=train_voice_model, inputs=[train_audio, train_model_name], outputs=[train_status, train_download], ) # Tab 2: Conversion with gr.TabItem("Convertir un morceau"): gr.Markdown("### Remplacer la voix d'un morceau par la votre") with gr.Row(): with gr.Column(scale=2): convert_model = gr.Dropdown( choices=get_model_choices(), label="Profil vocal", interactive=True, ) refresh_btn = gr.Button("Rafraichir la liste", size="sm") convert_audio = gr.Audio( label="Morceau a convertir (WAV ou MP3)", type="filepath", sources=["upload"], ) with gr.Accordion("Parametres avances", open=False): convert_pitch = gr.Slider( minimum=-24, maximum=24, value=0, step=1, label="Transposition (demi-tons)", ) convert_similarity = gr.Slider( minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Similarite vocale (0.5=naturel, 0.7=equilibre, 0.9=plus fidele)", ) convert_diffusion = gr.Slider( minimum=5, maximum=100, value=25, step=5, label="Qualite (10=rapide, 25=equilibre, 50=haute qualite)", ) convert_vocal_vol = gr.Slider( minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Volume de la voix", ) convert_inst_vol = gr.Slider( minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Volume des instruments", ) convert_btn = gr.Button( "Convertir et mixer", variant="primary", size="lg", ) with gr.Column(scale=1): convert_status = gr.Textbox( label="Statut", interactive=False, lines=3, ) gr.Markdown("**Apercu des pistes :**") preview_vocals = gr.Audio( label="Voix originale (separee)", interactive=False, ) preview_converted = gr.Audio( label="Voix convertie", interactive=False, ) gr.Markdown("**Resultat final :**") final_output = gr.Audio( label="Morceau final (voix + instruments)", interactive=False, ) refresh_btn.click( fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[convert_model], ) convert_btn.click( fn=convert_song, inputs=[ convert_model, convert_audio, convert_pitch, convert_similarity, convert_diffusion, convert_vocal_vol, convert_inst_vol, ], outputs=[convert_status, preview_vocals, preview_converted, final_output], ) # Tab 3: Models with gr.TabItem("Mes modeles"): gr.Markdown("### Gerer vos profils vocaux") models_table = gr.HTML( value=refresh_models(), label="Modeles enregistres", ) with gr.Row(): models_refresh_btn = gr.Button("Rafraichir", size="sm") models_delete_name = gr.Dropdown( choices=get_model_choices(), label="Modele a supprimer", interactive=True, ) models_delete_btn = gr.Button("Supprimer", variant="stop", size="sm") models_delete_status = gr.Textbox(label="Statut", interactive=False) models_refresh_btn.click( fn=refresh_models, outputs=[models_table], ) models_refresh_btn.click( fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[models_delete_name], ) models_delete_btn.click( fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table], ) # Tab 4: Debug (temporary) with gr.TabItem("Debug GPU"): gr.Markdown("### Logs GPU Worker (pour diagnostic)") debug_output = gr.Textbox( label="Derniers logs GPU", interactive=False, lines=20, ) debug_btn = gr.Button("Lire les logs", size="sm") def read_debug_log(): log_path = "/home/user/app/debug_gpu.log" if os.path.exists(log_path): with open(log_path, "r") as f: return f.read() return "Aucun log disponible. Lancez d'abord une conversion." debug_btn.click(fn=read_debug_log, outputs=[debug_output]) if __name__ == "__main__": app.launch(server_name="0.0.0.0")