Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import logging | |
| import tempfile | |
| import shutil | |
| import gradio as gr | |
| import gc | |
| import time | |
| import numpy as np | |
| import torch | |
| # Patches para Gradio | |
| try: | |
| import gradio_client.utils as _gc_utils | |
| _orig_get_type = _gc_utils.get_type | |
| def _patched_get_type(schema, *args, **kwargs): | |
| if not isinstance(schema, dict): return "Any" | |
| return _orig_get_type(schema, *args, **kwargs) | |
| _gc_utils.get_type = _patched_get_type | |
| _orig_json_schema = _gc_utils._json_schema_to_python_type | |
| def _patched_json_schema(schema, *args, **kwargs): | |
| if not isinstance(schema, dict): return "Any" | |
| return _orig_json_schema(schema, *args, **kwargs) | |
| _gc_utils._json_schema_to_python_type = _patched_json_schema | |
| _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(schema, defs) | |
| except Exception: | |
| pass | |
| # Configuración de logs | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
| logger = logging.getLogger(__name__) | |
| from pipeline.setup import setup_seed_vc | |
| from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path | |
| from pipeline.training import save_voice_reference | |
| from pipeline.separation import _separate_audio_impl | |
| from pipeline.inference import _convert_voice_impl | |
| from pipeline.mixing import mix_audio | |
| from pipeline.rvc_training import train_rvc_model | |
| try: | |
| import spaces | |
| except ImportError: | |
| class spaces: | |
| def GPU(duration=60, **kwargs): | |
| def decorator(fn): return fn | |
| return decorator | |
| def check_file(path, label, logs): | |
| if os.path.exists(path): | |
| size = os.path.getsize(path) | |
| logs.append(f"✅ {label} generado: {os.path.basename(path)} ({size} bytes)") | |
| return size > 44 | |
| else: | |
| logs.append(f"❌ ERROR: {label} NO se encontró en {path}") | |
| return False | |
| def _full_pipeline_gpu(song_file, reference_path, pitch, diffusion_steps, similarity, | |
| vocal_volume, instrumental_volume): | |
| import torch | |
| import librosa | |
| import soundfile as sf | |
| logs = [] | |
| logs.append(f"🚀 Iniciando pipeline en GPU...") | |
| # Asegurar directorio de trabajo | |
| app_dir = os.path.dirname(os.path.abspath(__file__)) | |
| os.chdir(app_dir) | |
| try: | |
| # 1. Separación | |
| logs.append("⏳ Paso 1/3: Separando voces (Demucs)...") | |
| vocals_path, instruments_path = _separate_audio_impl(song_file) | |
| if not check_file(vocals_path, "Vocales", logs): return None, None, None, "\n".join(logs) | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # 2. Conversión | |
| logs.append("⏳ Paso 2/3: Convirtiendo voz (Seed-VC)...") | |
| converted_path = _convert_voice_impl(vocals_path, reference_path, int(pitch), int(diffusion_steps), float(similarity)) | |
| if not check_file(converted_path, "Voz convertida", logs): return None, None, None, "\n".join(logs) | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # 3. Mezcla | |
| logs.append("⏳ Paso 3/3: Mezclando pistas...") | |
| final_path = mix_audio(converted_path, instruments_path, float(vocal_volume), float(instrumental_volume)) | |
| if not check_file(final_path, "Resultado final", logs): return None, None, None, "\n".join(logs) | |
| # 4. Retornar DATOS (para evitar problemas de sincronización de archivos en ZeroGPU) | |
| logs.append("📦 Preparando audios para el reproductor...") | |
| def load_audio_to_numpy(p): | |
| data, sr = librosa.load(p, sr=None) | |
| data = np.nan_to_num(data) | |
| return (sr, data.astype(np.float32)) | |
| v_out = load_audio_to_numpy(vocals_path) | |
| c_out = load_audio_to_numpy(converted_path) | |
| f_out = load_audio_to_numpy(final_path) | |
| logs.append("✨ Proceso completado. Enviando al navegador...") | |
| return v_out, c_out, f_out, "\n".join(logs) | |
| except Exception as e: | |
| import traceback | |
| logs.append(f"💥 ERROR: {str(e)}\n{traceback.format_exc()}") | |
| return None, None, None, "\n".join(logs) | |
| def train_voice_model(audio_file, model_name, progress=gr.Progress()): | |
| if not audio_file or not model_name: return "Error: Datos incompletos.", None | |
| model_name = model_name.strip().replace(" ", "_") | |
| try: | |
| pth_path, ref_path = save_voice_reference(audio_path=audio_file, model_name=model_name) | |
| return f"¡Perfil '{model_name}' guardado!", ref_path | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| def train_rvc_model_ui(audio_path, model_name, epochs, batch_size, f0_method, save_every, progress=gr.Progress()): | |
| return train_rvc_model(audio_path, model_name, epochs, batch_size, f0_method, save_every, progress=progress) | |
| def get_model_choices(): | |
| models = list_models() | |
| if not models: | |
| return ["(ningún modelo)"] | |
| return models | |
| def refresh_models(): | |
| models = list_models() | |
| if not models: | |
| return "<p style='color:gray;'>Ningún modelo guardado</p>" | |
| rows = "".join( | |
| "<tr><td>{}</td><td>Disponible</td></tr>".format(m) for m in models | |
| ) | |
| return ( | |
| "<table style='width:100%;border-collapse:collapse;'>" | |
| "<tr><th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Nombre</th>" | |
| "<th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Estado</th></tr>" | |
| "{}</table>".format(rows) | |
| ) | |
| def delete_selected_model(model_name_to_delete): | |
| if not model_name_to_delete or model_name_to_delete == "(ningún modelo)": | |
| return "Por favor, selecciona un modelo para eliminar.", refresh_models() | |
| try: | |
| delete_model(model_name_to_delete) | |
| return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models() | |
| except Exception as e: | |
| return "Error : {}".format(e), refresh_models() | |
| def convert_song(model_choice, song_file, pitch, similarity, diffusion_steps, vocal_volume, instrumental_volume, progress=gr.Progress()): | |
| if not song_file or not model_choice or model_choice == "(ningún modelo)": | |
| return "Error: Faltan datos.", None, None, None, "Esperando..." | |
| try: | |
| progress(0.1, desc="Iniciando...") | |
| reference_path = get_reference_path(model_choice) | |
| v_out, c_out, f_out, logs = _full_pipeline_gpu( | |
| song_file, reference_path, pitch, diffusion_steps, similarity, vocal_volume, instrumental_volume | |
| ) | |
| status = "✅ Completado" if f_out is not None else "❌ Error (revisa logs)" | |
| return status, v_out, c_out, f_out, logs | |
| except Exception as e: | |
| import traceback | |
| return f"Error: {str(e)}", None, None, None, traceback.format_exc() | |
| # --- UI Layout --- | |
| with gr.Blocks(title="Voice Clone RVC", theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# 🎤 Aplicación de Clonación de Voz (Seed-VC)\n> Powered by Seed-VC + Demucs · ZeroGPU") | |
| with gr.Tabs(): | |
| # Pestaña 1: Perfil | |
| with gr.TabItem("1. Perfil"): | |
| gr.Markdown("### Guardar tu referencia de voz") | |
| with gr.Row(): | |
| with gr.Column(): | |
| train_audio = gr.Audio(label="Sube tu voz (3-30 seg)", type="filepath") | |
| train_name = gr.Textbox(label="Nombre del perfil", placeholder="ej: mi_voz") | |
| train_btn = gr.Button("Guardar Perfil", variant="primary") | |
| with gr.Column(): | |
| train_status = gr.Textbox(label="Estado") | |
| train_file = gr.File(label="Archivo de Referencia") | |
| # Pestaña 2: Conversión | |
| with gr.TabItem("2. Conversión"): | |
| gr.Markdown("### Reemplazar la voz de una canción") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| model_sel = gr.Dropdown(choices=get_model_choices(), label="Selecciona Perfil") | |
| refresh_btn_conv = gr.Button("🔄 Actualizar lista", size="sm") | |
| song_input = gr.Audio(label="Canción a convertir", type="filepath") | |
| with gr.Accordion("Ajustes Avanzados", open=False): | |
| pitch_shift = gr.Slider(-12, 12, 0, step=1, label="Tono (Pitch)") | |
| sim_slider = gr.Slider(0, 1, 0.7, step=0.1, label="Fidelidad/Similitud") | |
| diff_steps = gr.Slider(5, 50, 25, step=5, label="Calidad (Pasos de difusión)") | |
| v_vol = gr.Slider(0, 2, 1, step=0.1, label="Volumen Voz") | |
| i_vol = gr.Slider(0, 2, 1, step=0.1, label="Volumen Música") | |
| convert_btn = gr.Button("🚀 Iniciar Conversión", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| conv_status = gr.Textbox(label="Estado") | |
| out_vocals = gr.Audio(label="Voz Original (Separada)") | |
| out_conv = gr.Audio(label="Voz Clonada") | |
| out_final = gr.Audio(label="Resultado Final (Mezclado)") | |
| debug_logs = gr.Textbox(label="🔍 Logs de Procesamiento", lines=10) | |
| convert_btn.click(convert_song, | |
| [model_sel, song_input, pitch_shift, sim_slider, diff_steps, v_vol, i_vol], | |
| [conv_status, out_vocals, out_conv, out_final, debug_logs]) | |
| # Pestaña 3: Gestión de Modelos | |
| with gr.TabItem("3. Mis Modelos"): | |
| gr.Markdown("### Gestionar perfiles guardados") | |
| models_table_mg = gr.HTML(value=refresh_models()) | |
| with gr.Row(): | |
| models_refresh_btn = gr.Button("Actualizar", size="sm") | |
| models_delete_name = gr.Dropdown(choices=get_model_choices(), label="Eliminar perfil") | |
| models_delete_btn = gr.Button("Eliminar", variant="stop", size="sm") | |
| models_delete_status = gr.Textbox(label="Resultado") | |
| models_delete_btn.click(fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table_mg]) | |
| # Pestaña RVC: Entrenamiento | |
| with gr.TabItem("Entrenamiento RVC"): | |
| gr.Markdown("### Entrenar un modelo RVC (Máximo 100 epochs)") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| rvc_audio = gr.Audio( | |
| label="Dataset de voz (WAV/MP3 de 1 a 10 minutos)", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| rvc_model_name = gr.Textbox( | |
| label="Nombre del modelo (.pth)", | |
| placeholder="ej: mi_voz_rvc", | |
| max_lines=1, | |
| ) | |
| rvc_epochs = gr.Slider( | |
| minimum=1, | |
| maximum=100, | |
| value=100, | |
| step=1, | |
| label="Epochs (Iteraciones de entrenamiento)", | |
| ) | |
| with gr.Accordion("Opciones Avanzadas", open=False): | |
| rvc_f0_method = gr.Dropdown( | |
| choices=["rmvpe", "crepe", "fcpe"], | |
| value="rmvpe", | |
| label="Método de Extracción de Pitch (f0)" | |
| ) | |
| rvc_batch_size = gr.Slider( | |
| minimum=1, | |
| maximum=24, | |
| value=4, | |
| step=1, | |
| label="Batch Size (Tamaño de lote)" | |
| ) | |
| rvc_save_every = gr.Slider( | |
| minimum=1, | |
| maximum=50, | |
| value=10, | |
| step=1, | |
| label="Guardar Checkpoint cada (Epochs)" | |
| ) | |
| rvc_train_btn = gr.Button( | |
| "Iniciar Entrenamiento RVC", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(scale=1): | |
| rvc_status = gr.Textbox( | |
| label="Estado y Logs", | |
| interactive=False, | |
| lines=10, | |
| ) | |
| rvc_download = gr.File( | |
| label="Archivo .pth generado", | |
| interactive=False, | |
| ) | |
| gr.Markdown( | |
| "**🚀 Entrenamiento Resumible:**\n" | |
| "- Si ZeroGPU corta el entrenamiento por tiempo (10 min), puedes volver a dar clic en el botón y el proceso continuará desde el último punto guardado.\n" | |
| "- Los checkpoints se guardan cada **10 epochs** por defecto." | |
| ) | |
| rvc_train_btn.click( | |
| fn=train_rvc_model_ui, | |
| inputs=[rvc_audio, rvc_model_name, rvc_epochs, rvc_batch_size, rvc_f0_method, rvc_save_every], | |
| outputs=[rvc_status, rvc_download], | |
| ) | |
| # Pestaña 4: Debug | |
| with gr.TabItem("Depuración"): | |
| gr.Markdown("### Diagnóstico del sistema") | |
| debug_view = gr.Textbox(label="Logs de sistema", lines=20, interactive=False) | |
| debug_btn = gr.Button("Ver Logs") | |
| def read_logs(): | |
| log_path = "debug_gpu.log" # Or wherever it's saved | |
| if os.path.exists(log_path): | |
| with open(log_path, "r") as f: return f.read() | |
| return "No hay logs disponibles." | |
| # --- Eventos (Definidos al final para evitar errores de referencia) --- | |
| train_btn.click( | |
| fn=train_voice_model, | |
| inputs=[train_audio, train_name], | |
| outputs=[train_status, train_file] | |
| ).then( | |
| fn=refresh_models, outputs=[models_table_mg] | |
| ).then( | |
| fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[model_sel] | |
| ).then( | |
| fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[models_delete_name] | |
| ) | |
| refresh_btn_conv.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[model_sel]) | |
| models_refresh_btn.click(fn=refresh_models, outputs=[models_table_mg]) | |
| models_refresh_btn.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[models_delete_name]) | |
| debug_btn.click(read_logs, outputs=[debug_view]) | |
| if __name__ == "__main__": | |
| setup_seed_vc() | |
| os.makedirs("./results", exist_ok=True) | |
| app.launch(allowed_paths=[os.path.abspath("./results"), os.path.abspath("./pipeline/results")]) | |