Spaces:
Paused
Paused
| """ | |
| Clone Vocal - Outil web de clonage vocal base sur Seed-VC (zero-shot). | |
| Interface Gradio en francais, deploye sur HuggingFace Spaces avec ZeroGPU. | |
| """ | |
| import os | |
| import sys | |
| import logging | |
| import tempfile | |
| import shutil | |
| import gradio as gr | |
| # Monkey-patch gradio_client to fix "argument of type 'bool' is not iterable" | |
| try: | |
| import gradio_client.utils as _gc_utils | |
| _orig_get_type = _gc_utils.get_type | |
| def _patched_get_type(schema, *args, **kwargs): | |
| if not isinstance(schema, dict): | |
| return "Any" | |
| return _orig_get_type(schema, *args, **kwargs) | |
| _gc_utils.get_type = _patched_get_type | |
| _orig_json_schema = _gc_utils._json_schema_to_python_type | |
| def _patched_json_schema(schema, *args, **kwargs): | |
| if not isinstance(schema, dict): | |
| return "Any" | |
| return _orig_json_schema(schema, *args, **kwargs) | |
| _gc_utils._json_schema_to_python_type = _patched_json_schema | |
| _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema( | |
| schema, defs | |
| ) | |
| except Exception: | |
| pass | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
| logger = logging.getLogger(__name__) | |
| # Startup: clone Seed-VC | |
| logger.info("Initialisation de l'application...") | |
| from pipeline.setup import setup_seed_vc | |
| from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path | |
| try: | |
| setup_seed_vc() | |
| except Exception as e: | |
| logger.error("Erreur lors du setup: {}".format(e)) | |
| # Initialize model storage | |
| HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "") | |
| if HF_MODELS_REPO: | |
| init_storage(HF_MODELS_REPO) | |
| logger.info("Stockage HuggingFace configure: {}".format(HF_MODELS_REPO)) | |
| # Import GPU-decorated functions for ZeroGPU detection | |
| from pipeline.training import save_voice_reference, _gpu_warmup | |
| from pipeline.separation import separate_audio | |
| from pipeline.inference import convert_voice | |
| # -- Training Tab -- | |
| def train_voice_model(audio_file, model_name, progress=gr.Progress()): | |
| """Handler: save voice reference.""" | |
| if audio_file is None: | |
| return "Erreur : Veuillez uploader un fichier audio.", None | |
| if not model_name or not model_name.strip(): | |
| return "Erreur : Veuillez entrer un nom pour le modele.", None | |
| model_name = model_name.strip().replace(" ", "_") | |
| def progress_callback(value, desc): | |
| progress(value, desc=desc) | |
| try: | |
| progress(0.0, desc="Demarrage...") | |
| pth_path, ref_path = save_voice_reference( | |
| audio_path=audio_file, | |
| model_name=model_name, | |
| progress_callback=progress_callback, | |
| ) | |
| return "Reference vocale '{}' sauvegardee avec succes !".format(model_name), ref_path | |
| except Exception as e: | |
| import traceback | |
| tb = traceback.format_exc() | |
| logger.error("Erreur training: {}".format(tb)) | |
| return "Erreur : {}: {}\n\nDetails:\n{}".format( | |
| type(e).__name__, str(e), tb[-500:] | |
| ), None | |
| # -- Conversion Tab -- | |
| def get_model_choices(): | |
| """Get list of trained model names for dropdown.""" | |
| models = list_models() | |
| if not models: | |
| return ["(aucun modele)"] | |
| return models | |
| def convert_song( | |
| model_choice, | |
| song_file, | |
| pitch, | |
| similarity, | |
| diffusion_steps, | |
| vocal_volume, | |
| instrumental_volume, | |
| progress=gr.Progress(), | |
| ): | |
| """Full pipeline: separate + convert + mix.""" | |
| if song_file is None: | |
| return "Erreur : Veuillez uploader un fichier audio.", None, None, None | |
| if model_choice == "(aucun modele)" or not model_choice: | |
| return "Erreur : Veuillez d'abord enregistrer une reference vocale.", None, None, None | |
| from pipeline.mixing import mix_audio | |
| try: | |
| # Step 1: Download model / find reference audio | |
| progress(0.05, desc="Chargement du modele...") | |
| pth_path, ref_or_index = download_model(model_choice) | |
| if not pth_path: | |
| return "Erreur : Modele '{}' introuvable.".format(model_choice), None, None, None | |
| # Find the reference audio path | |
| reference_path = get_reference_path(model_choice) | |
| if not reference_path: | |
| return "Erreur : Audio de reference introuvable pour '{}'.".format(model_choice), None, None, None | |
| # Step 2: Separate vocals from instruments | |
| progress(0.10, desc="Separation des pistes (Demucs)...") | |
| vocals_path, instruments_path = separate_audio(song_file) | |
| progress(0.40, desc="Conversion vocale (Seed-VC)...") | |
| # Step 3: Convert vocals with Seed-VC | |
| converted_path = convert_voice( | |
| audio_path=vocals_path, | |
| reference_path=reference_path, | |
| pitch=int(pitch), | |
| diffusion_steps=int(diffusion_steps), | |
| similarity=float(similarity), | |
| ) | |
| progress(0.85, desc="Mixage final...") | |
| # Step 4: Mix converted vocals with instruments | |
| final_path = mix_audio( | |
| vocals_path=converted_path, | |
| instruments_path=instruments_path, | |
| vocal_volume=float(vocal_volume), | |
| instrumental_volume=float(instrumental_volume), | |
| ) | |
| progress(1.0, desc="Termine !") | |
| return ( | |
| "Conversion terminee avec succes !", | |
| vocals_path, | |
| converted_path, | |
| final_path, | |
| ) | |
| except Exception as e: | |
| import traceback | |
| tb = traceback.format_exc() | |
| logger.error("Erreur conversion: {}".format(tb)) | |
| return "Erreur : {}: {}\n\nDetails:\n{}".format( | |
| type(e).__name__, str(e), tb[-500:] | |
| ), None, None, None | |
| # -- Models Tab -- | |
| def refresh_models(): | |
| """Refresh the model list as HTML.""" | |
| models = list_models() | |
| if not models: | |
| return "<p style='color:gray;'>Aucun modele enregistre</p>" | |
| rows = "".join( | |
| "<tr><td>{}</td><td>Disponible</td></tr>".format(m) for m in models | |
| ) | |
| return ( | |
| "<table style='width:100%;border-collapse:collapse;'>" | |
| "<tr><th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Nom</th>" | |
| "<th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Statut</th></tr>" | |
| "{}</table>".format(rows) | |
| ) | |
| def delete_selected_model(model_name_to_delete): | |
| """Delete a model.""" | |
| if not model_name_to_delete or model_name_to_delete == "(aucun modele)": | |
| return "Veuillez selectionner un modele a supprimer.", refresh_models() | |
| try: | |
| delete_model(model_name_to_delete) | |
| return "Modele '{}' supprime.".format(model_name_to_delete), refresh_models() | |
| except Exception as e: | |
| return "Erreur : {}".format(e), refresh_models() | |
| # -- Build Gradio UI -- | |
| DESCRIPTION = """ | |
| # Clone Vocal | |
| Outil de clonage vocal **zero-shot** base sur **Seed-VC** (Diffusion Transformer). | |
| **Comment utiliser :** | |
| 1. **Onglet "Ma voix"** : Uploadez un court extrait de votre voix (3-30 sec) pour creer votre profil vocal | |
| 2. **Onglet "Convertir"** : Uploadez un morceau de musique, l'outil remplace la voix par la votre | |
| 3. **Onglet "Modeles"** : Gerez vos profils vocaux | |
| > **Zero-shot** : pas d'entrainement necessaire ! Juste 3-30 secondes de votre voix suffisent. | |
| """ | |
| with gr.Blocks( | |
| title="Clone Vocal", | |
| theme=gr.themes.Soft(), | |
| ) as app: | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Tabs(): | |
| # Tab 1: Voice Reference | |
| with gr.TabItem("Ma voix"): | |
| gr.Markdown("### Enregistrer votre reference vocale") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| train_audio = gr.Audio( | |
| label="Extrait de votre voix (WAV ou MP3, 3-30 secondes)", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| train_model_name = gr.Textbox( | |
| label="Nom du profil", | |
| placeholder="ex: ma_voix", | |
| max_lines=1, | |
| ) | |
| train_btn = gr.Button( | |
| "Sauvegarder", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(scale=1): | |
| train_status = gr.Textbox( | |
| label="Statut", | |
| interactive=False, | |
| lines=3, | |
| ) | |
| train_download = gr.File( | |
| label="Fichier de reference", | |
| interactive=False, | |
| ) | |
| gr.Markdown( | |
| "**Conseils :**\n" | |
| "- Utilisez un enregistrement propre (pas de bruit de fond, pas de musique)\n" | |
| "- Parlez ou chantez naturellement pendant 3 a 30 secondes\n" | |
| "- Plus l'extrait est long et varie, meilleur sera le resultat\n" | |
| "- Format WAV ou MP3 accepte" | |
| ) | |
| train_btn.click( | |
| fn=train_voice_model, | |
| inputs=[train_audio, train_model_name], | |
| outputs=[train_status, train_download], | |
| ) | |
| # Tab 2: Conversion | |
| with gr.TabItem("Convertir un morceau"): | |
| gr.Markdown("### Remplacer la voix d'un morceau par la votre") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| convert_model = gr.Dropdown( | |
| choices=get_model_choices(), | |
| label="Profil vocal", | |
| interactive=True, | |
| ) | |
| refresh_btn = gr.Button("Rafraichir la liste", size="sm") | |
| convert_audio = gr.Audio( | |
| label="Morceau a convertir (WAV ou MP3)", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| with gr.Accordion("Parametres avances", open=False): | |
| convert_pitch = gr.Slider( | |
| minimum=-24, | |
| maximum=24, | |
| value=0, | |
| step=1, | |
| label="Transposition (demi-tons)", | |
| ) | |
| convert_similarity = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.05, | |
| label="Similarite vocale (0.5=naturel, 0.7=equilibre, 0.9=plus fidele)", | |
| ) | |
| convert_diffusion = gr.Slider( | |
| minimum=5, | |
| maximum=100, | |
| value=25, | |
| step=5, | |
| label="Qualite (10=rapide, 25=equilibre, 50=haute qualite)", | |
| ) | |
| convert_vocal_vol = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Volume de la voix", | |
| ) | |
| convert_inst_vol = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Volume des instruments", | |
| ) | |
| convert_btn = gr.Button( | |
| "Convertir et mixer", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(scale=1): | |
| convert_status = gr.Textbox( | |
| label="Statut", | |
| interactive=False, | |
| lines=3, | |
| ) | |
| gr.Markdown("**Apercu des pistes :**") | |
| preview_vocals = gr.Audio( | |
| label="Voix originale (separee)", | |
| interactive=False, | |
| ) | |
| preview_converted = gr.Audio( | |
| label="Voix convertie", | |
| interactive=False, | |
| ) | |
| gr.Markdown("**Resultat final :**") | |
| final_output = gr.Audio( | |
| label="Morceau final (voix + instruments)", | |
| interactive=False, | |
| ) | |
| refresh_btn.click( | |
| fn=lambda: gr.Dropdown(choices=get_model_choices()), | |
| outputs=[convert_model], | |
| ) | |
| convert_btn.click( | |
| fn=convert_song, | |
| inputs=[ | |
| convert_model, | |
| convert_audio, | |
| convert_pitch, | |
| convert_similarity, | |
| convert_diffusion, | |
| convert_vocal_vol, | |
| convert_inst_vol, | |
| ], | |
| outputs=[convert_status, preview_vocals, preview_converted, final_output], | |
| ) | |
| # Tab 3: Models | |
| with gr.TabItem("Mes modeles"): | |
| gr.Markdown("### Gerer vos profils vocaux") | |
| models_table = gr.HTML( | |
| value=refresh_models(), | |
| label="Modeles enregistres", | |
| ) | |
| with gr.Row(): | |
| models_refresh_btn = gr.Button("Rafraichir", size="sm") | |
| models_delete_name = gr.Dropdown( | |
| choices=get_model_choices(), | |
| label="Modele a supprimer", | |
| interactive=True, | |
| ) | |
| models_delete_btn = gr.Button("Supprimer", variant="stop", size="sm") | |
| models_delete_status = gr.Textbox(label="Statut", interactive=False) | |
| models_refresh_btn.click( | |
| fn=refresh_models, | |
| outputs=[models_table], | |
| ) | |
| models_refresh_btn.click( | |
| fn=lambda: gr.Dropdown(choices=get_model_choices()), | |
| outputs=[models_delete_name], | |
| ) | |
| models_delete_btn.click( | |
| fn=delete_selected_model, | |
| inputs=[models_delete_name], | |
| outputs=[models_delete_status, models_table], | |
| ) | |
| # Tab 4: Debug (temporary) | |
| with gr.TabItem("Debug GPU"): | |
| gr.Markdown("### Logs GPU Worker (pour diagnostic)") | |
| debug_output = gr.Textbox( | |
| label="Derniers logs GPU", | |
| interactive=False, | |
| lines=20, | |
| ) | |
| debug_btn = gr.Button("Lire les logs", size="sm") | |
| def read_debug_log(): | |
| log_path = "/home/user/app/debug_gpu.log" | |
| if os.path.exists(log_path): | |
| with open(log_path, "r") as f: | |
| return f.read() | |
| return "Aucun log disponible. Lancez d'abord une conversion." | |
| debug_btn.click(fn=read_debug_log, outputs=[debug_output]) | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0") | |