File size: 9,840 Bytes
2376414
 
 
 
 
 
 
dbae9aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2376414
e940250
 
2376414
fea49f2
 
2376414
 
fea49f2
2376414
e940250
2376414
 
 
 
e940250
2376414
fea49f2
0ead6d9
 
4b6c221
fea49f2
2376414
e940250
2376414
 
e940250
2376414
 
 
 
 
 
 
e940250
fea49f2
2376414
 
 
 
e940250
2376414
 
32e0546
 
e940250
 
fea49f2
 
2376414
 
 
 
e940250
2376414
 
 
 
 
 
969158e
fea49f2
2376414
 
 
 
 
e940250
2376414
e940250
 
2376414
 
 
 
e940250
fea49f2
2376414
e940250
fea49f2
 
 
e940250
2376414
e940250
2376414
 
e940250
2376414
 
 
fea49f2
2376414
fea49f2
969158e
2376414
 
e940250
2376414
 
 
 
 
 
 
 
e940250
2376414
 
e940250
fea49f2
 
 
2376414
 
 
c5ea689
 
e940250
 
fea49f2
 
2376414
 
 
 
e940250
fea49f2
e940250
fea49f2
e940250
2376414
 
e940250
 
2376414
 
e940250
2376414
e940250
2376414
 
e940250
 
2376414
 
e940250
2376414
 
 
e940250
 
2376414
 
e940250
 
 
2376414
e940250
 
2376414
 
 
fea49f2
2376414
 
 
e940250
2376414
 
e940250
 
 
 
 
 
 
 
 
 
 
 
2376414
 
e940250
 
 
 
2376414
e940250
2376414
 
e940250
2376414
 
 
e940250
 
2376414
e940250
 
 
 
2376414
e940250
 
2376414
e940250
 
 
8a7554b
 
 
e940250
8a7554b
 
 
2376414
e940250
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import os
import sys
import logging
import tempfile
import shutil
import gradio as gr

try:
    import gradio_client.utils as _gc_utils
    _orig_get_type = _gc_utils.get_type

    def _patched_get_type(schema, *args, **kwargs):
        if not isinstance(schema, dict):
            return "Any"
        return _orig_get_type(schema, *args, **kwargs)

    _gc_utils.get_type = _patched_get_type
    _orig_json_schema = _gc_utils._json_schema_to_python_type

    def _patched_json_schema(schema, *args, **kwargs):
        if not isinstance(schema, dict):
            return "Any"
        return _orig_json_schema(schema, *args, **kwargs)

    _gc_utils._json_schema_to_python_type = _patched_json_schema
    _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
        schema, defs
    )
except Exception:
    pass

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__) # Corregido: usualmente es __name__
logger.info("Inicializando la aplicación...")

from pipeline.setup import setup_seed_vc
from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path

try:
    setup_seed_vc()
except Exception as e:
    logger.error("Error durante la configuración: {}".format(e))

HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
if HF_MODELS_REPO:
    init_storage(HF_MODELS_REPO)
    logger.info("Almacenamiento HuggingFace configurado: {}".format(HF_MODELS_REPO))

from pipeline.training import save_voice_reference, _gpu_warmup
from pipeline.separation import separate_audio
from pipeline.inference import convert_voice

def train_voice_model(audio_file, model_name, progress=gr.Progress()):
    if audio_file is None:
        return "Error: Por favor suba un archivo de audio.", None

    if not model_name or not model_name.strip():
        return "Error: Por favor ingrese un nombre para el modelo.", None

    model_name = model_name.strip().replace(" ", "_")

    def progress_callback(value, desc):
        progress(value, desc=desc)

    try:
        progress(0.0, desc="Iniciando...")
        pth_path, ref_path = save_voice_reference(
            audio_path=audio_file,
            model_name=model_name,
            progress_callback=progress_callback,
        )
        return "Referencia de voz '{}' guardada con éxito".format(model_name), ref_path

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        logger.error("Error en entrenamiento: {}".format(tb))
        return "Error: {}: {}\n\nDetalles:\n{}".format(
            type(e).__name__, str(e), tb[-500:]
        ), None

def get_model_choices():
    models = list_models()
    if not models:
        return ["(ningún modelo)"]
    return models

def convert_song(
    model_choice,
    song_file,
    pitch,
    similarity,
    diffusion_steps,
    vocal_volume,
    instrumental_volume,
    progress=gr.Progress(),
):
    if song_file is None:
        return "Error: Por favor suba un archivo de audio.", None, None, None

    if model_choice == "(ningún modelo)" or not model_choice:
        return "Error: Por favor, registre una referencia de voz primero.", None, None, None

    from pipeline.mixing import mix_audio

    try:
        progress(0.05, desc="Cargando el modelo...")
        pth_path, ref_or_index = download_model(model_choice)
        if not pth_path:
            return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None

        reference_path = get_reference_path(model_choice)
        if not reference_path:
            return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None

        progress(0.10, desc="Separación de pistas (Demucs)...")
        vocals_path, instruments_path = separate_audio(song_file)

        progress(0.40, desc="Conversión de voz (Seed-VC)...")

        converted_path = convert_voice(
            audio_path=vocals_path,
            reference_path=reference_path,
            pitch=int(pitch),
            diffusion_steps=int(diffusion_steps),
            similarity=float(similarity),
        )

        progress(0.85, desc="Mezcla final...")

        final_path = mix_audio(
            vocals_path=converted_path,
            instruments_path=instruments_path,
            vocal_volume=float(vocal_volume),
            instrumental_volume=float(instrumental_volume),
        )

        progress(1.0, desc="Terminado")

        return (
            "Conversión terminada con éxito",
            vocals_path,
            converted_path,
            final_path,
        )

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        logger.error("Error en conversión: {}".format(tb))
        return "Error: {}: {}\n\nDetalles:\n{}".format(
            type(e).__name__, str(e), tb[-500:]
        ), None, None, None

def refresh_models():
    models = list_models()
    if not models:
        return "Ningún modelo registrado"
    rows = "".join(
        "{} Disponible".format(m) for m in models
    )
    return "Nombre | Estado\n" + rows # Simplificado para el ejemplo

def delete_selected_model(model_name_to_delete):
    if not model_name_to_delete or model_name_to_delete == "(ningún modelo)":
        return "Por favor seleccione un modelo para eliminar.", refresh_models()
    try:
        delete_model(model_name_to_delete)
        return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models()
    except Exception as e:
        return "Error: {}".format(e), refresh_models()

DESCRIPTION = """
# Clon Vocal
Herramienta de clonación de voz zero-shot basada en Seed-VC.
"""

with gr.Blocks(title="Clon Vocal", theme=gr.themes.Soft()) as app:
    gr.Markdown(DESCRIPTION)

    with gr.Tabs():
        with gr.TabItem("Mi voz"):
            gr.Markdown("Registrar su referencia de voz")
            with gr.Row():
                with gr.Column(scale=2):
                    train_audio = gr.Audio(label="Extracto de su voz", type="filepath", sources=["upload"])
                    train_model_name = gr.Textbox(label="Nombre del perfil", placeholder="ej: mi_voz")
                    train_btn = gr.Button("Guardar", variant="primary", size="lg")
                with gr.Column(scale=1):
                    train_status = gr.Textbox(label="Estado", interactive=False, lines=3)
                    train_download = gr.File(label="Archivo de referencia", interactive=False)

            train_btn.click(
                fn=train_voice_model,
                inputs=[train_audio, train_model_name],
                outputs=[train_status, train_download],
            )

        with gr.TabItem("Convertir una canción"):
            with gr.Row():
                with gr.Column(scale=2):
                    convert_model = gr.Dropdown(choices=get_model_choices(), label="Perfil vocal")
                    refresh_btn = gr.Button("Actualizar la lista", size="sm")
                    convert_audio = gr.Audio(label="Canción a convertir", type="filepath")
                    
                    with gr.Accordion("Parámetros avanzados", open=False):
                        convert_pitch = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch")
                        convert_similarity = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Similitud")
                        convert_diffusion = gr.Slider(minimum=5, maximum=100, value=25, step=5, label="Calidad")
                        convert_vocal_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Voz")
                        convert_inst_vol = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Vol. Inst")
                    
                    convert_btn = gr.Button("Convertir", variant="primary", size="lg")

                with gr.Column(scale=1):
                    convert_status = gr.Textbox(label="Estado", interactive=False)
                    preview_vocals = gr.Audio(label="Original", interactive=False)
                    preview_converted = gr.Audio(label="Convertida", interactive=False)
                    final_output = gr.Audio(label="Final", interactive=False)

            refresh_btn.click(fn=lambda: gr.Dropdown(choices=get_model_choices()), outputs=[convert_model])
            convert_btn.click(
                fn=convert_song,
                inputs=[convert_model, convert_audio, convert_pitch, convert_similarity, convert_diffusion, convert_vocal_vol, convert_inst_vol],
                outputs=[convert_status, preview_vocals, preview_converted, final_output],
            )

        with gr.TabItem("Mis modelos"):
            models_table = gr.HTML(value=refresh_models())
            with gr.Row():
                models_refresh_btn = gr.Button("Actualizar")
                models_delete_name = gr.Dropdown(choices=get_model_choices(), label="Modelo a eliminar")
                models_delete_btn = gr.Button("Eliminar", variant="stop")
            models_delete_status = gr.Textbox(label="Estado")

            models_refresh_btn.click(fn=refresh_models, outputs=[models_table])
            models_delete_btn.click(fn=delete_selected_model, inputs=[models_delete_name], outputs=[models_delete_status, models_table])

        with gr.TabItem("Depuración GPU"):
            debug_output = gr.Textbox(label="Registros", interactive=False, lines=20)
            debug_btn = gr.Button("Leer")

            def read_debug_log():
                log_path = "/home/user/app/debug_gpu.log"
                return open(log_path, "r").read() if os.path.exists(log_path) else "Sin registros."

            debug_btn.click(fn=read_debug_log, outputs=[debug_output])

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0")