dimensionalpulsar commited on
Commit
b4b21bd
·
1 Parent(s): f0f02ad

HF adaptation: fix ZeroGPU requirements, launch(), README metadata + hardware:zero-gpu

Browse files
Files changed (3) hide show
  1. README.md +3 -2
  2. app.py +406 -406
  3. requirements.txt +2 -4
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Clone Vocal
3
- emoji: "\U0001F3A4"
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
@@ -9,6 +9,7 @@ python_version: "3.10"
9
  app_file: app.py
10
  pinned: false
11
  license: mit
 
12
  tags:
13
  - seed-vc
14
  - voice-cloning
 
1
  ---
2
+ title: Voice Clone RVC
3
+ emoji: 🎤
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
 
9
  app_file: app.py
10
  pinned: false
11
  license: mit
12
+ hardware: zero-gpu
13
  tags:
14
  - seed-vc
15
  - voice-cloning
app.py CHANGED
@@ -1,406 +1,406 @@
1
- import os
2
- import sys
3
- import logging
4
- import tempfile
5
- import shutil
6
- import gradio as gr
7
-
8
- try:
9
- import gradio_client.utils as _gc_utils
10
-
11
- _orig_get_type = _gc_utils.get_type
12
-
13
- def _patched_get_type(schema, *args, **kwargs):
14
- if not isinstance(schema, dict):
15
- return "Any"
16
- return _orig_get_type(schema, *args, **kwargs)
17
-
18
- _gc_utils.get_type = _patched_get_type
19
-
20
- _orig_json_schema = _gc_utils._json_schema_to_python_type
21
-
22
- def _patched_json_schema(schema, *args, **kwargs):
23
- if not isinstance(schema, dict):
24
- return "Any"
25
- return _orig_json_schema(schema, *args, **kwargs)
26
-
27
- _gc_utils._json_schema_to_python_type = _patched_json_schema
28
- _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
29
- schema, defs
30
- )
31
- except Exception:
32
- pass
33
-
34
- # Configuración de logs
35
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
36
- logger = logging.getLogger(__name__)
37
-
38
- # Inicio: clonar Seed-VC
39
- logger.info("Inicializando la aplicación...")
40
-
41
- from pipeline.setup import setup_seed_vc
42
- from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path
43
-
44
- try:
45
- setup_seed_vc()
46
- except Exception as e:
47
- logger.error("Error durante la configuración: {}".format(e))
48
-
49
- HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
50
- if HF_MODELS_REPO:
51
- init_storage(HF_MODELS_REPO)
52
- logger.info("Almacenamiento de HuggingFace configurado: {}".format(HF_MODELS_REPO))
53
-
54
- from pipeline.training import save_voice_reference, _gpu_warmup
55
- from pipeline.separation import separate_audio
56
- from pipeline.inference import convert_voice
57
-
58
- def train_voice_model(audio_file, model_name, progress=gr.Progress()):
59
- """Controlador: guardar referencia de voz."""
60
- if audio_file is None:
61
- return "Error: Por favor, sube un archivo de audio.", None
62
-
63
- if not model_name or not model_name.strip():
64
- return "Error: Por favor, ingresa un nombre para el modelo.", None
65
-
66
- model_name = model_name.strip().replace(" ", "_")
67
-
68
- def progress_callback(value, desc):
69
- progress(value, desc=desc)
70
-
71
- try:
72
- progress(0.0, desc="Iniciando...")
73
- pth_path, ref_path = save_voice_reference(
74
- audio_path=audio_file,
75
- model_name=model_name,
76
- progress_callback=progress_callback,
77
- )
78
-
79
- return "¡Referencia de voz '{}' guardada con éxito!".format(model_name), ref_path
80
-
81
- except Exception as e:
82
- import traceback
83
- tb = traceback.format_exc()
84
- logger.error("Error en el entrenamiento: {}".format(tb))
85
- return "Error : {}: {}\n\nDetalles:\n{}".format(
86
- type(e).__name__, str(e), tb[-500:]
87
- ), None
88
-
89
- def get_model_choices():
90
- """Obtener lista de nombres de modelos entrenados para el menú desplegable."""
91
- models = list_models()
92
- if not models:
93
- return ["(ningún modelo)"]
94
- return models
95
-
96
-
97
- def convert_song(
98
- model_choice,
99
- song_file,
100
- pitch,
101
- similarity,
102
- diffusion_steps,
103
- vocal_volume,
104
- instrumental_volume,
105
- progress=gr.Progress(),
106
- ):
107
- """Pipeline completo: separar + convertir + mezclar."""
108
- if song_file is None:
109
- return "Error: Por favor, sube un archivo de audio.", None, None, None
110
-
111
- if model_choice == "(ningún modelo)" or not model_choice:
112
- return "Error: Por favor, guarda una referencia de voz primero.", None, None, None
113
-
114
- from pipeline.mixing import mix_audio
115
-
116
- try:
117
- progress(0.05, desc="Cargando el modelo...")
118
- pth_path, ref_or_index = download_model(model_choice)
119
- if not pth_path:
120
- return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None
121
-
122
- reference_path = get_reference_path(model_choice)
123
- if not reference_path:
124
- return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None
125
-
126
- progress(0.10, desc="Separación de pistas (Demucs)...")
127
- vocals_path, instruments_path = separate_audio(song_file)
128
-
129
- progress(0.40, desc="Conversión de voz (Seed-VC)...")
130
-
131
- converted_path = convert_voice(
132
- audio_path=vocals_path,
133
- reference_path=reference_path,
134
- pitch=int(pitch),
135
- diffusion_steps=int(diffusion_steps),
136
- similarity=float(similarity),
137
- )
138
-
139
- progress(0.85, desc="Mezcla final...")
140
-
141
- final_path = mix_audio(
142
- vocals_path=converted_path,
143
- instruments_path=instruments_path,
144
- vocal_volume=float(vocal_volume),
145
- instrumental_volume=float(instrumental_volume),
146
- )
147
-
148
- progress(1.0, desc="¡Terminado!")
149
-
150
- return (
151
- "¡Conversión completada con éxito!",
152
- vocals_path,
153
- converted_path,
154
- final_path,
155
- )
156
-
157
- except Exception as e:
158
- import traceback
159
- tb = traceback.format_exc()
160
- logger.error("Error en la conversión: {}".format(tb))
161
- return "Error : {}: {}\n\nDetalles:\n{}".format(
162
- type(e).__name__, str(e), tb[-500:]
163
- ), None, None, None
164
-
165
- def refresh_models():
166
- """Actualizar la lista de modelos como HTML."""
167
- models = list_models()
168
- if not models:
169
- return "<p style='color:gray;'>Ningún modelo guardado</p>"
170
- rows = "".join(
171
- "<tr><td>{}</td><td>Disponible</td></tr>".format(m) for m in models
172
- )
173
- return (
174
- "<table style='width:100%;border-collapse:collapse;'>"
175
- "<tr><th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Nombre</th>"
176
- "<th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Estado</th></tr>"
177
- "{}</table>".format(rows)
178
- )
179
-
180
-
181
- def delete_selected_model(model_name_to_delete):
182
- """Eliminar un modelo."""
183
- if not model_name_to_delete or model_name_to_delete == "(ningún modelo)":
184
- return "Por favor, selecciona un modelo para eliminar.", refresh_models()
185
- try:
186
- delete_model(model_name_to_delete)
187
- return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models()
188
- except Exception as e:
189
- return "Error : {}".format(e), refresh_models()
190
-
191
- with gr.Blocks(
192
- title="Clon de Voz",
193
- theme=gr.themes.Soft(),
194
- ) as app:
195
-
196
- gr.Markdown(
197
- "# 🎤 Aplicación de Clonación de Voz (Seed-VC)\n"
198
- "> Powered by [Seed-VC](https://github.com/Plachta/seed-vc) + [Demucs](https://github.com/facebookresearch/demucs) · ZeroGPU · Zero-shot"
199
- )
200
-
201
- with gr.Tabs():
202
- # Pestaña 1: Referencia de voz
203
- with gr.TabItem("Mi voz"):
204
- gr.Markdown("### Guardar tu referencia de voz")
205
-
206
- with gr.Row():
207
- with gr.Column(scale=2):
208
- train_audio = gr.Audio(
209
- label="Extracto de tu voz (WAV o MP3, 3-30 segundos)",
210
- type="filepath",
211
- sources=["upload"],
212
- )
213
- train_model_name = gr.Textbox(
214
- label="Nombre del perfil",
215
- placeholder="ej: mi_voz",
216
- max_lines=1,
217
- )
218
- train_btn = gr.Button(
219
- "Guardar",
220
- variant="primary",
221
- size="lg",
222
- )
223
-
224
- with gr.Column(scale=1):
225
- train_status = gr.Textbox(
226
- label="Estado",
227
- interactive=False,
228
- lines=3,
229
- )
230
- train_download = gr.File(
231
- label="Archivo de referencia",
232
- interactive=False,
233
- )
234
-
235
- gr.Markdown(
236
- "**Consejos:**\n"
237
- "- Usa una grabación limpia (sin ruido de fondo, sin música)\n"
238
- "- Habla o canta naturalmente durante 3 a 30 segundos\n"
239
- "- Mientras más largo y variado sea el extracto, mejor será el resultado\n"
240
- "- Se aceptan formatos WAV o MP3"
241
- )
242
-
243
- train_btn.click(
244
- fn=train_voice_model,
245
- inputs=[train_audio, train_model_name],
246
- outputs=[train_status, train_download],
247
- )
248
-
249
- # Pestaña 2: Conversión
250
- with gr.TabItem("Convertir una canción"):
251
- gr.Markdown("### Reemplazar la voz de una canción por la tuya")
252
-
253
- with gr.Row():
254
- with gr.Column(scale=2):
255
- convert_model = gr.Dropdown(
256
- choices=get_model_choices(),
257
- label="Perfil de voz",
258
- interactive=True,
259
- )
260
- refresh_btn = gr.Button("Actualizar lista", size="sm")
261
- convert_audio = gr.Audio(
262
- label="Canción a convertir (WAV o MP3)",
263
- type="filepath",
264
- sources=["upload"],
265
- )
266
-
267
- with gr.Accordion("Parámetros avanzados", open=False):
268
- convert_pitch = gr.Slider(
269
- minimum=-24,
270
- maximum=24,
271
- value=0,
272
- step=1,
273
- label="Transposición (semitonos)",
274
- )
275
- convert_similarity = gr.Slider(
276
- minimum=0.0,
277
- maximum=1.0,
278
- value=0.7,
279
- step=0.05,
280
- label="Similitud de voz (0.5=natural, 0.7=equilibrado, 0.9=más fiel)",
281
- )
282
- convert_diffusion = gr.Slider(
283
- minimum=5,
284
- maximum=100,
285
- value=25,
286
- step=5,
287
- label="Calidad (10=rápido, 25=equilibrado, 50=alta calidad)",
288
- )
289
- convert_vocal_vol = gr.Slider(
290
- minimum=0.0,
291
- maximum=2.0,
292
- value=1.0,
293
- step=0.1,
294
- label="Volumen de la voz",
295
- )
296
- convert_inst_vol = gr.Slider(
297
- minimum=0.0,
298
- maximum=2.0,
299
- value=1.0,
300
- step=0.1,
301
- label="Volumen de los instrumentos",
302
- )
303
-
304
- convert_btn = gr.Button(
305
- "Convertir y mezclar",
306
- variant="primary",
307
- size="lg",
308
- )
309
-
310
- with gr.Column(scale=1):
311
- convert_status = gr.Textbox(
312
- label="Estado",
313
- interactive=False,
314
- lines=3,
315
- )
316
- gr.Markdown("**Vista previa de las pistas:**")
317
- preview_vocals = gr.Audio(
318
- label="Voz original (separada)",
319
- interactive=False,
320
- )
321
- preview_converted = gr.Audio(
322
- label="Voz convertida",
323
- interactive=False,
324
- )
325
- gr.Markdown("**Resultado final:**")
326
- final_output = gr.Audio(
327
- label="Canción final (voz + instrumentos)",
328
- interactive=False,
329
- )
330
-
331
- refresh_btn.click(
332
- fn=lambda: gr.Dropdown(choices=get_model_choices()),
333
- outputs=[convert_model],
334
- )
335
-
336
- convert_btn.click(
337
- fn=convert_song,
338
- inputs=[
339
- convert_model,
340
- convert_audio,
341
- convert_pitch,
342
- convert_similarity,
343
- convert_diffusion,
344
- convert_vocal_vol,
345
- convert_inst_vol,
346
- ],
347
- outputs=[convert_status, preview_vocals, preview_converted, final_output],
348
- )
349
-
350
- # Pestaña 3: Modelos
351
- with gr.TabItem("Mis modelos"):
352
- gr.Markdown("### Gestionar tus perfiles de voz")
353
-
354
- models_table = gr.HTML(
355
- value=refresh_models(),
356
- label="Modelos guardados",
357
- )
358
-
359
- with gr.Row():
360
- models_refresh_btn = gr.Button("Actualizar", size="sm")
361
- models_delete_name = gr.Dropdown(
362
- choices=get_model_choices(),
363
- label="Modelo a eliminar",
364
- interactive=True,
365
- )
366
- models_delete_btn = gr.Button("Eliminar", variant="stop", size="sm")
367
-
368
- models_delete_status = gr.Textbox(label="Estado", interactive=False)
369
-
370
- models_refresh_btn.click(
371
- fn=refresh_models,
372
- outputs=[models_table],
373
- )
374
- models_refresh_btn.click(
375
- fn=lambda: gr.Dropdown(choices=get_model_choices()),
376
- outputs=[models_delete_name],
377
- )
378
-
379
- models_delete_btn.click(
380
- fn=delete_selected_model,
381
- inputs=[models_delete_name],
382
- outputs=[models_delete_status, models_table],
383
- )
384
-
385
- # Pestaña 4: Debug (temporal)
386
- with gr.TabItem("Depuración GPU"):
387
- gr.Markdown("### Logs del Trabajador GPU (para diagnóstico)")
388
- debug_output = gr.Textbox(
389
- label="Últimos logs de GPU",
390
- interactive=False,
391
- lines=20,
392
- )
393
- debug_btn = gr.Button("Leer los logs", size="sm")
394
-
395
- def read_debug_log():
396
- log_path = "/home/user/app/debug_gpu.log"
397
- if os.path.exists(log_path):
398
- with open(log_path, "r") as f:
399
- return f.read()
400
- return "Ningún log disponible. Ejecuta una conversión primero."
401
-
402
- debug_btn.click(fn=read_debug_log, outputs=[debug_output])
403
-
404
-
405
- if __name__ == "__main__":
406
- app.launch(server_name="0.0.0.0", show_error=True)
 
1
+ import os
2
+ import sys
3
+ import logging
4
+ import tempfile
5
+ import shutil
6
+ import gradio as gr
7
+
8
+ try:
9
+ import gradio_client.utils as _gc_utils
10
+
11
+ _orig_get_type = _gc_utils.get_type
12
+
13
+ def _patched_get_type(schema, *args, **kwargs):
14
+ if not isinstance(schema, dict):
15
+ return "Any"
16
+ return _orig_get_type(schema, *args, **kwargs)
17
+
18
+ _gc_utils.get_type = _patched_get_type
19
+
20
+ _orig_json_schema = _gc_utils._json_schema_to_python_type
21
+
22
+ def _patched_json_schema(schema, *args, **kwargs):
23
+ if not isinstance(schema, dict):
24
+ return "Any"
25
+ return _orig_json_schema(schema, *args, **kwargs)
26
+
27
+ _gc_utils._json_schema_to_python_type = _patched_json_schema
28
+ _gc_utils.json_schema_to_python_type = lambda schema, defs=None: _patched_json_schema(
29
+ schema, defs
30
+ )
31
+ except Exception:
32
+ pass
33
+
34
+ # Configuración de logs
35
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # Inicio: clonar Seed-VC
39
+ logger.info("Inicializando la aplicación...")
40
+
41
+ from pipeline.setup import setup_seed_vc
42
+ from pipeline.storage import init_storage, list_models, download_model, delete_model, get_reference_path
43
+
44
+ try:
45
+ setup_seed_vc()
46
+ except Exception as e:
47
+ logger.error("Error durante la configuración: {}".format(e))
48
+
49
+ HF_MODELS_REPO = os.environ.get("HF_MODELS_REPO", "")
50
+ if HF_MODELS_REPO:
51
+ init_storage(HF_MODELS_REPO)
52
+ logger.info("Almacenamiento de HuggingFace configurado: {}".format(HF_MODELS_REPO))
53
+
54
+ from pipeline.training import save_voice_reference, _gpu_warmup
55
+ from pipeline.separation import separate_audio
56
+ from pipeline.inference import convert_voice
57
+
58
+ def train_voice_model(audio_file, model_name, progress=gr.Progress()):
59
+ """Controlador: guardar referencia de voz."""
60
+ if audio_file is None:
61
+ return "Error: Por favor, sube un archivo de audio.", None
62
+
63
+ if not model_name or not model_name.strip():
64
+ return "Error: Por favor, ingresa un nombre para el modelo.", None
65
+
66
+ model_name = model_name.strip().replace(" ", "_")
67
+
68
+ def progress_callback(value, desc):
69
+ progress(value, desc=desc)
70
+
71
+ try:
72
+ progress(0.0, desc="Iniciando...")
73
+ pth_path, ref_path = save_voice_reference(
74
+ audio_path=audio_file,
75
+ model_name=model_name,
76
+ progress_callback=progress_callback,
77
+ )
78
+
79
+ return "¡Referencia de voz '{}' guardada con éxito!".format(model_name), ref_path
80
+
81
+ except Exception as e:
82
+ import traceback
83
+ tb = traceback.format_exc()
84
+ logger.error("Error en el entrenamiento: {}".format(tb))
85
+ return "Error : {}: {}\n\nDetalles:\n{}".format(
86
+ type(e).__name__, str(e), tb[-500:]
87
+ ), None
88
+
89
+ def get_model_choices():
90
+ """Obtener lista de nombres de modelos entrenados para el menú desplegable."""
91
+ models = list_models()
92
+ if not models:
93
+ return ["(ningún modelo)"]
94
+ return models
95
+
96
+
97
+ def convert_song(
98
+ model_choice,
99
+ song_file,
100
+ pitch,
101
+ similarity,
102
+ diffusion_steps,
103
+ vocal_volume,
104
+ instrumental_volume,
105
+ progress=gr.Progress(),
106
+ ):
107
+ """Pipeline completo: separar + convertir + mezclar."""
108
+ if song_file is None:
109
+ return "Error: Por favor, sube un archivo de audio.", None, None, None
110
+
111
+ if model_choice == "(ningún modelo)" or not model_choice:
112
+ return "Error: Por favor, guarda una referencia de voz primero.", None, None, None
113
+
114
+ from pipeline.mixing import mix_audio
115
+
116
+ try:
117
+ progress(0.05, desc="Cargando el modelo...")
118
+ pth_path, ref_or_index = download_model(model_choice)
119
+ if not pth_path:
120
+ return "Error: Modelo '{}' no encontrado.".format(model_choice), None, None, None
121
+
122
+ reference_path = get_reference_path(model_choice)
123
+ if not reference_path:
124
+ return "Error: Audio de referencia no encontrado para '{}'.".format(model_choice), None, None, None
125
+
126
+ progress(0.10, desc="Separación de pistas (Demucs)...")
127
+ vocals_path, instruments_path = separate_audio(song_file)
128
+
129
+ progress(0.40, desc="Conversión de voz (Seed-VC)...")
130
+
131
+ converted_path = convert_voice(
132
+ audio_path=vocals_path,
133
+ reference_path=reference_path,
134
+ pitch=int(pitch),
135
+ diffusion_steps=int(diffusion_steps),
136
+ similarity=float(similarity),
137
+ )
138
+
139
+ progress(0.85, desc="Mezcla final...")
140
+
141
+ final_path = mix_audio(
142
+ vocals_path=converted_path,
143
+ instruments_path=instruments_path,
144
+ vocal_volume=float(vocal_volume),
145
+ instrumental_volume=float(instrumental_volume),
146
+ )
147
+
148
+ progress(1.0, desc="¡Terminado!")
149
+
150
+ return (
151
+ "¡Conversión completada con éxito!",
152
+ vocals_path,
153
+ converted_path,
154
+ final_path,
155
+ )
156
+
157
+ except Exception as e:
158
+ import traceback
159
+ tb = traceback.format_exc()
160
+ logger.error("Error en la conversión: {}".format(tb))
161
+ return "Error : {}: {}\n\nDetalles:\n{}".format(
162
+ type(e).__name__, str(e), tb[-500:]
163
+ ), None, None, None
164
+
165
+ def refresh_models():
166
+ """Actualizar la lista de modelos como HTML."""
167
+ models = list_models()
168
+ if not models:
169
+ return "<p style='color:gray;'>Ningún modelo guardado</p>"
170
+ rows = "".join(
171
+ "<tr><td>{}</td><td>Disponible</td></tr>".format(m) for m in models
172
+ )
173
+ return (
174
+ "<table style='width:100%;border-collapse:collapse;'>"
175
+ "<tr><th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Nombre</th>"
176
+ "<th style='text-align:left;border-bottom:1px solid #555;padding:8px;'>Estado</th></tr>"
177
+ "{}</table>".format(rows)
178
+ )
179
+
180
+
181
+ def delete_selected_model(model_name_to_delete):
182
+ """Eliminar un modelo."""
183
+ if not model_name_to_delete or model_name_to_delete == "(ningún modelo)":
184
+ return "Por favor, selecciona un modelo para eliminar.", refresh_models()
185
+ try:
186
+ delete_model(model_name_to_delete)
187
+ return "Modelo '{}' eliminado.".format(model_name_to_delete), refresh_models()
188
+ except Exception as e:
189
+ return "Error : {}".format(e), refresh_models()
190
+
191
+ with gr.Blocks(
192
+ title="Clon de Voz",
193
+ theme=gr.themes.Soft(),
194
+ ) as app:
195
+
196
+ gr.Markdown(
197
+ "# 🎤 Aplicación de Clonación de Voz (Seed-VC)\n"
198
+ "> Powered by [Seed-VC](https://github.com/Plachta/seed-vc) + [Demucs](https://github.com/facebookresearch/demucs) · ZeroGPU · Zero-shot"
199
+ )
200
+
201
+ with gr.Tabs():
202
+ # Pestaña 1: Referencia de voz
203
+ with gr.TabItem("Mi voz"):
204
+ gr.Markdown("### Guardar tu referencia de voz")
205
+
206
+ with gr.Row():
207
+ with gr.Column(scale=2):
208
+ train_audio = gr.Audio(
209
+ label="Extracto de tu voz (WAV o MP3, 3-30 segundos)",
210
+ type="filepath",
211
+ sources=["upload"],
212
+ )
213
+ train_model_name = gr.Textbox(
214
+ label="Nombre del perfil",
215
+ placeholder="ej: mi_voz",
216
+ max_lines=1,
217
+ )
218
+ train_btn = gr.Button(
219
+ "Guardar",
220
+ variant="primary",
221
+ size="lg",
222
+ )
223
+
224
+ with gr.Column(scale=1):
225
+ train_status = gr.Textbox(
226
+ label="Estado",
227
+ interactive=False,
228
+ lines=3,
229
+ )
230
+ train_download = gr.File(
231
+ label="Archivo de referencia",
232
+ interactive=False,
233
+ )
234
+
235
+ gr.Markdown(
236
+ "**Consejos:**\n"
237
+ "- Usa una grabación limpia (sin ruido de fondo, sin música)\n"
238
+ "- Habla o canta naturalmente durante 3 a 30 segundos\n"
239
+ "- Mientras más largo y variado sea el extracto, mejor será el resultado\n"
240
+ "- Se aceptan formatos WAV o MP3"
241
+ )
242
+
243
+ train_btn.click(
244
+ fn=train_voice_model,
245
+ inputs=[train_audio, train_model_name],
246
+ outputs=[train_status, train_download],
247
+ )
248
+
249
+ # Pestaña 2: Conversión
250
+ with gr.TabItem("Convertir una canción"):
251
+ gr.Markdown("### Reemplazar la voz de una canción por la tuya")
252
+
253
+ with gr.Row():
254
+ with gr.Column(scale=2):
255
+ convert_model = gr.Dropdown(
256
+ choices=get_model_choices(),
257
+ label="Perfil de voz",
258
+ interactive=True,
259
+ )
260
+ refresh_btn = gr.Button("Actualizar lista", size="sm")
261
+ convert_audio = gr.Audio(
262
+ label="Canción a convertir (WAV o MP3)",
263
+ type="filepath",
264
+ sources=["upload"],
265
+ )
266
+
267
+ with gr.Accordion("Parámetros avanzados", open=False):
268
+ convert_pitch = gr.Slider(
269
+ minimum=-24,
270
+ maximum=24,
271
+ value=0,
272
+ step=1,
273
+ label="Transposición (semitonos)",
274
+ )
275
+ convert_similarity = gr.Slider(
276
+ minimum=0.0,
277
+ maximum=1.0,
278
+ value=0.7,
279
+ step=0.05,
280
+ label="Similitud de voz (0.5=natural, 0.7=equilibrado, 0.9=más fiel)",
281
+ )
282
+ convert_diffusion = gr.Slider(
283
+ minimum=5,
284
+ maximum=100,
285
+ value=25,
286
+ step=5,
287
+ label="Calidad (10=rápido, 25=equilibrado, 50=alta calidad)",
288
+ )
289
+ convert_vocal_vol = gr.Slider(
290
+ minimum=0.0,
291
+ maximum=2.0,
292
+ value=1.0,
293
+ step=0.1,
294
+ label="Volumen de la voz",
295
+ )
296
+ convert_inst_vol = gr.Slider(
297
+ minimum=0.0,
298
+ maximum=2.0,
299
+ value=1.0,
300
+ step=0.1,
301
+ label="Volumen de los instrumentos",
302
+ )
303
+
304
+ convert_btn = gr.Button(
305
+ "Convertir y mezclar",
306
+ variant="primary",
307
+ size="lg",
308
+ )
309
+
310
+ with gr.Column(scale=1):
311
+ convert_status = gr.Textbox(
312
+ label="Estado",
313
+ interactive=False,
314
+ lines=3,
315
+ )
316
+ gr.Markdown("**Vista previa de las pistas:**")
317
+ preview_vocals = gr.Audio(
318
+ label="Voz original (separada)",
319
+ interactive=False,
320
+ )
321
+ preview_converted = gr.Audio(
322
+ label="Voz convertida",
323
+ interactive=False,
324
+ )
325
+ gr.Markdown("**Resultado final:**")
326
+ final_output = gr.Audio(
327
+ label="Canción final (voz + instrumentos)",
328
+ interactive=False,
329
+ )
330
+
331
+ refresh_btn.click(
332
+ fn=lambda: gr.Dropdown(choices=get_model_choices()),
333
+ outputs=[convert_model],
334
+ )
335
+
336
+ convert_btn.click(
337
+ fn=convert_song,
338
+ inputs=[
339
+ convert_model,
340
+ convert_audio,
341
+ convert_pitch,
342
+ convert_similarity,
343
+ convert_diffusion,
344
+ convert_vocal_vol,
345
+ convert_inst_vol,
346
+ ],
347
+ outputs=[convert_status, preview_vocals, preview_converted, final_output],
348
+ )
349
+
350
+ # Pestaña 3: Modelos
351
+ with gr.TabItem("Mis modelos"):
352
+ gr.Markdown("### Gestionar tus perfiles de voz")
353
+
354
+ models_table = gr.HTML(
355
+ value=refresh_models(),
356
+ label="Modelos guardados",
357
+ )
358
+
359
+ with gr.Row():
360
+ models_refresh_btn = gr.Button("Actualizar", size="sm")
361
+ models_delete_name = gr.Dropdown(
362
+ choices=get_model_choices(),
363
+ label="Modelo a eliminar",
364
+ interactive=True,
365
+ )
366
+ models_delete_btn = gr.Button("Eliminar", variant="stop", size="sm")
367
+
368
+ models_delete_status = gr.Textbox(label="Estado", interactive=False)
369
+
370
+ models_refresh_btn.click(
371
+ fn=refresh_models,
372
+ outputs=[models_table],
373
+ )
374
+ models_refresh_btn.click(
375
+ fn=lambda: gr.Dropdown(choices=get_model_choices()),
376
+ outputs=[models_delete_name],
377
+ )
378
+
379
+ models_delete_btn.click(
380
+ fn=delete_selected_model,
381
+ inputs=[models_delete_name],
382
+ outputs=[models_delete_status, models_table],
383
+ )
384
+
385
+ # Pestaña 4: Debug (temporal)
386
+ with gr.TabItem("Depuración GPU"):
387
+ gr.Markdown("### Logs del Trabajador GPU (para diagnóstico)")
388
+ debug_output = gr.Textbox(
389
+ label="Últimos logs de GPU",
390
+ interactive=False,
391
+ lines=20,
392
+ )
393
+ debug_btn = gr.Button("Leer los logs", size="sm")
394
+
395
+ def read_debug_log():
396
+ log_path = "/home/user/app/debug_gpu.log"
397
+ if os.path.exists(log_path):
398
+ with open(log_path, "r") as f:
399
+ return f.read()
400
+ return "Ningún log disponible. Ejecuta una conversión primero."
401
+
402
+ debug_btn.click(fn=read_debug_log, outputs=[debug_output])
403
+
404
+
405
+ if __name__ == "__main__":
406
+ app.launch()
requirements.txt CHANGED
@@ -4,10 +4,8 @@ gradio-client==1.5.4
4
  spaces>=0.30.0
5
  huggingface_hub>=0.23.0
6
 
7
- # PyTorch (ZeroGPU compatible)
8
- torch==2.5.1
9
- torchaudio==2.5.1
10
- torchvision==0.20.1
11
 
12
  # Audio processing
13
  librosa==0.10.2.post1
 
4
  spaces>=0.30.0
5
  huggingface_hub>=0.23.0
6
 
7
+ # PyTorch — managed by ZeroGPU, do NOT pin versions here
8
+ # torch, torchaudio, torchvision are pre-installed by the ZeroGPU runtime
 
 
9
 
10
  # Audio processing
11
  librosa==0.10.2.post1