rvc

Sleeping

File size: 9,840 Bytes

import os
import sys
import logging
import tempfile
import shutil
import gradio as gr

try:
    import gradio_client.utils as _gc_utils
    _orig_get_type = _gc_utils.get_type

    def _patched_get_type(schema, *args, **kwargs):
        if not isinstance(schema, dict):
            return "Any"
        return _orig_get_type(schema, *args, **kwargs)

    _gc_utils.get_type = _patched_get_type
    _orig_json_schema = _gc_utils._json_schema_to_python_type

    def _patched_json_schema(schema, *args, **kwargs):
        if not isinstance(schema, dict):
            return "Any"
        return _orig_json_schema(schema, *args, **kwargs)

    _gc_utils._json_schema_to_python_type = _patched_json_schema
    _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
        schema, defs
    )
except Exception:
    pass

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__) # Corregido: usualmente es __name__
logger.info("Inicializando la aplicación...")

from pipeline.setup import setup_seed_vc
from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path

try:
    setup_seed_vc()
except Exception as e:
    logger.error("Error durante la configuración: {}".format(e))

HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
if HF_MODELS_REPO:
    init_storage(HF_MODELS_REPO)
    logger.info("Almacenamiento HuggingFace configurado: {}".format(HF_MODELS_REPO))

from pipeline.training import save_voice_reference, _gpu_warmup
from pipeline.separation import separate_audio
from pipeline.inference import convert_voice

def train_voice_model(audio_file, model_name, progress=gr.Progress()):
    if audio_file is None:
        return "Error: Por favor suba un archivo de audio.", None

    if not model_name or not model_name.strip():
        return "Error: Por favor ingrese un nombre para el modelo.", None

    model_name = model_name.strip().replace(" ", "_")

    def progress_callback(value, desc):
        progress(value, desc=desc)

    try:
        progress(0.0, desc="Iniciando...")
        pth_path, ref_path = save_voice_reference(
            audio_path=audio_file,
            model_name=model_name,
            progress_callback=progress_callback,
        )
        return "Referencia de voz '{}' guardada con éxito".format(model_name), ref_path

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        logger.error("Error en entrenamiento: {}".format(tb))
        return "Error: {}: {}\n\nDetalles:\n{}".format(
            type(e).__name__, str(e), tb[-500:]
        ), None

def get_model_choices():
    models = list_models()
    if not models:
        return ["(ningún modelo)"]
    return models

def convert_song(
    model_choice,
    song_file,
    pitch,
    similarity,
    diffusion_steps,
    vocal_volume,
    instrumental_volume,
    progress=gr.Progress(),
):
    if song_file is None:
        return "Error: Por favor suba un archivo de audio.", None, None, None

    if model_choice == "(ningún modelo)" or not model_choice:
        return "Error: Por favor, registre una referencia de voz primero.", None, None, None

    from pipeline.mixing import mix_audio

    try:
        progress(0.05, desc="Cargando el modelo...")
        pth_path, ref_or_index = download_model(model_choice)
        if not pth_path:
            return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None

        reference_path = get_reference_path(model_choice)
        if not reference_path:
            return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None

        progress(0.10, desc="Separación de pistas (Demucs)...")
        vocals_path, instruments_path = separate_audio(song_file)

        progress(0.40, desc="Conversión de voz (Seed-VC)...")

        converted_path = convert_voice(
            audio_path=vocals_path,
            reference_path=reference_path,
            pitch=int(pitch),
            diffusion_steps=int(diffusion_steps),
            similarity=float(similarity),
        )

        progress(0.85, desc="Mezcla final...")

        final_path = mix_audio(
            vocals_path=converted_path,
            instruments_path=instruments_path,
            vocal_volume=float(vocal_volume),
            instrumental_volume=float(instrumental_volume),
        )

        progress(1.0, desc="Terminado")

        return (
            "Conversión terminada con éxito",
            vocals_path,
            converted_path,
            final_path,
        )

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        logger.error("Error en conversión: {}".format(tb))
        return "Error: {}: {}\n\nDetalles:\n{}".format(
            type(e).__name__, str(e), tb[-500:]
        ), None, None, None

def refresh_models():
    models = list_models()
    if not models:
        return "Ningún modelo registrado"
    rows = "".join(
        "{} Disponible".format(m) for m in models
    )
    return "Nombre | Estado\n" + rows # Simplificado para el ejemplo

def delete_selected_model(model_name_to_delete):
    if not model_name_to_delete or model_name_to_delete == "(ningún modelo)":
        return "Por favor seleccione un modelo para eliminar.", refresh_models()
    try:
        delete_model(model_name_to_delete)
        return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models()
    except Exception as e:
        return "Error: {}".format(e), refresh_models()

DESCRIPTION = """
# Clon Vocal
Herramienta de clonación de voz zero-shot basada en Seed-VC.
"""

with gr.Blocks(title="Clon Vocal", theme=gr.themes.Soft()) as app:
    gr.Markdown(DESCRIPTION)

    with gr.Tabs():
        with gr.TabItem("Mi voz"):
            gr.Markdown("Registrar su referencia de voz")
            with gr.Row():
                with gr.Column(scale=2):
                    train_audio = gr.Audio(label="Extracto de su voz", type="filepath", sources=["upload"])
                    train_model_name = gr.Textbox(label="Nombre del perfil", placeholder="ej: mi_voz")
                    train_btn = gr.Button("Guardar", variant="primary", size="lg")
                with gr.Column(scale=1):
                    train_status = gr.Textbox(label="Estado", interactive=False, lines=3)
                    train_download = gr.File(label="Archivo de referencia", interactive=False)

            train_btn.click(
                fn=train_voice_model,
                inputs=[train_audio, train_model_name],
                outputs=[train_status, train_download],
            )

        with gr.TabItem("Convertir una canción"):
            with gr.Row():
                with gr.Column(scale=2):
                    convert_model = gr.Dropdown(choices=get_model_choices(), label="Perfil vocal")
                    refresh_btn = gr.Button("Actualizar la lista", size="sm")
                    convert_audio = gr.Audio(label="Canción a convertir", type="filepath")
                    
                    with gr.Accordion("Parámetros avanzados", open=False):
                        convert_pitch = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch")
                        convert_similarity = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Similitud")
                        convert_diffusion = gr.Slider(minimum=5, maximum=100, value=25, step=5, label="Calidad")
                        convert_vocal_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Voz")
                        convert_inst_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Inst")
                    
                    convert_btn = gr.Button("Convertir", variant="primary", size="lg")

                with gr.Column(scale=1):
                    convert_status = gr.Textbox(label="Estado", interactive=False)
                    preview_vocals = gr.Audio(label="Original", interactive=False)
                    preview_converted = gr.Audio(label="Convertida", interactive=False)
                    final_output = gr.Audio(label="Final", interactive=False)

            refresh_btn.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[convert_model])
            convert_btn.click(
                fn=convert_song,
                inputs=[convert_model, convert_audio, convert_pitch, convert_similarity, convert_diffusion, convert_vocal_vol, convert_inst_vol],
                outputs=[convert_status, preview_vocals, preview_converted, final_output],
            )

        with gr.TabItem("Mis modelos"):
            models_table = gr.HTML(value=refresh_models())
            with gr.Row():
                models_refresh_btn = gr.Button("Actualizar")
                models_delete_name = gr.Dropdown(choices=get_model_choices(), label="Modelo a eliminar")
                models_delete_btn = gr.Button("Eliminar", variant="stop")
            models_delete_status = gr.Textbox(label="Estado")

            models_refresh_btn.click(fn=refresh_models, outputs=[models_table])
            models_delete_btn.click(fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table])

        with gr.TabItem("Depuración GPU"):
            debug_output = gr.Textbox(label="Registros", interactive=False, lines=20)
            debug_btn = gr.Button("Leer")

            def read_debug_log():
                log_path = "/home/user/app/debug_gpu.log"
                return open(log_path, "r").read() if os.path.exists(log_path) else "Sin registros."

            debug_btn.click(fn=read_debug_log, outputs=[debug_output])

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0")