刘鑫 commited on
Commit
44d9bcd
·
1 Parent(s): 364d86e

fix: harden async nanovllm demo stability

Browse files

Move the Gradio demo to an async nano-vLLM bridge, limit denoise concurrency, and improve user-facing error handling so bad requests do not disrupt other active sessions.

Files changed (2) hide show
  1. README.md +2 -1
  2. app.py +281 -73
README.md CHANGED
@@ -61,5 +61,6 @@ Recommended environment variables:
61
  - `NANOVLLM_TEMPERATURE`: defaults to `1.0`
62
  - `REQUEST_LOG_DIR`: optional persistent request log directory. Defaults to `/data/logs` when `/data` exists
63
  - `GRADIO_QUEUE_MAX_SIZE`: defaults to `10`
64
- - `GRADIO_DEFAULT_CONCURRENCY_LIMIT`: defaults to `1` (nanovllm-voxcpm event loop is not thread-safe; do NOT increase)
 
65
  - `GRADIO_SSR_MODE`: defaults to `false`
 
61
  - `NANOVLLM_TEMPERATURE`: defaults to `1.0`
62
  - `REQUEST_LOG_DIR`: optional persistent request log directory. Defaults to `/data/logs` when `/data` exists
63
  - `GRADIO_QUEUE_MAX_SIZE`: defaults to `10`
64
+ - `GRADIO_DEFAULT_CONCURRENCY_LIMIT`: defaults to `4` (uses async server pool bridge for thread-safe concurrency)
65
+ - `DENOISE_MAX_CONCURRENT`: defaults to `1` (limits concurrent ZipEnhancer denoise requests to avoid GPU OOM)
66
  - `GRADIO_SSR_MODE`: defaults to `false`
app.py CHANGED
@@ -1,12 +1,14 @@
 
1
  import atexit
2
  import json
3
  import logging
4
  import os
 
5
  import sys
6
  import tempfile
7
  from datetime import datetime, timezone
8
  from pathlib import Path
9
- from threading import Lock, Thread
10
  from typing import Optional, Tuple
11
 
12
  import gradio as gr
@@ -61,11 +63,15 @@ _asr_model = None
61
  _voxcpm_server = None
62
  _model_info = None
63
  _denoiser = None
 
64
  _server_lock = Lock()
65
  _prewarm_lock = Lock()
66
  _denoiser_lock = Lock()
 
67
  _prewarm_started = False
68
  _runtime_diag_logged = False
 
 
69
 
70
 
71
  def _get_int_env(name: str, default: int) -> int:
@@ -216,6 +222,23 @@ def _get_audio_duration_seconds(audio_path: str) -> float:
216
  return float(info.frames) / float(info.samplerate)
217
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  def _validate_reference_audio_duration(
220
  audio_path: str, request: Optional[gr.Request] = None
221
  ) -> None:
@@ -225,7 +248,10 @@ def _validate_reference_audio_duration(
225
 
226
 
227
  def _prepare_audio_for_encoding(
228
- audio_path: Optional[str], *, denoise: bool, request: Optional[gr.Request] = None
 
 
 
229
  ) -> tuple[bytes | None, str | None, Optional[str]]:
230
  if audio_path is None or not audio_path.strip():
231
  return None, None, None
@@ -236,22 +262,30 @@ def _prepare_audio_for_encoding(
236
  temp_path = None
237
  if denoise:
238
  logger.info("Applying ZipEnhancer denoising to reference audio ...")
 
 
 
239
  try:
240
  temp_path = get_denoiser().enhance(audio_path)
241
  source_path = temp_path
242
  except Exception as exc:
243
- raise RuntimeError(f"ZipEnhancer denoising failed: {exc}") from exc
 
 
 
244
 
245
  audio_bytes, audio_format = _read_audio_bytes(source_path)
246
  return audio_bytes, audio_format, temp_path
247
 
248
 
249
- def _safe_prompt_wav_recognition(use_prompt_text: bool, prompt_wav: Optional[str]) -> str:
 
 
250
  try:
251
  return prompt_wav_recognition(use_prompt_text, prompt_wav)
252
  except Exception as exc:
253
  logger.warning(f"ASR recognition failed: {exc}")
254
- return ""
255
 
256
 
257
 
@@ -261,12 +295,15 @@ def _stop_server_if_needed() -> None:
261
  if _voxcpm_server is None:
262
  return
263
 
264
- stop = getattr(_voxcpm_server, "stop", None)
265
- if callable(stop):
266
- try:
267
- stop()
268
- except Exception as exc:
269
- logger.warning(f"Failed to stop nano-vLLM server cleanly: {exc}")
 
 
 
270
 
271
  _voxcpm_server = None
272
  _model_info = None
@@ -370,6 +407,10 @@ _I18N_TRANSLATIONS = {
370
  "cfg_label": "CFG (guidance scale)",
371
  "cfg_info": "Higher → closer to the prompt / reference; lower → more creative variation",
372
  "reference_audio_too_long_error": "Reference audio is too long. Please upload audio no longer than 50 seconds.",
 
 
 
 
373
  "usage_instructions": _USAGE_INSTRUCTIONS_EN,
374
  "examples_footer": _EXAMPLES_FOOTER_EN,
375
  },
@@ -392,6 +433,10 @@ _I18N_TRANSLATIONS = {
392
  "cfg_label": "CFG(引导强度)",
393
  "cfg_info": "数值越高 → 越贴合提示/参考��色;数值越低 → 生成风格更自由",
394
  "reference_audio_too_long_error": "参考音频太长了,请上传不超过 50 秒的音频。",
 
 
 
 
395
  "usage_instructions": _USAGE_INSTRUCTIONS_ZH,
396
  "examples_footer": _EXAMPLES_FOOTER_ZH,
397
  },
@@ -499,7 +544,11 @@ _APP_THEME = gr.themes.Soft(
499
 
500
  def get_asr_model():
501
  global _asr_model
502
- if _asr_model is None:
 
 
 
 
503
  from funasr import AutoModel
504
  from huggingface_hub import snapshot_download
505
 
@@ -518,7 +567,142 @@ def get_asr_model():
518
  return _asr_model
519
 
520
 
521
- def get_voxcpm_server():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  global _voxcpm_server, _model_info
523
  if _voxcpm_server is not None:
524
  return _voxcpm_server
@@ -527,22 +711,10 @@ def get_voxcpm_server():
527
  if _voxcpm_server is not None:
528
  return _voxcpm_server
529
 
530
- _log_runtime_diagnostics_once()
531
- from nanovllm_voxcpm import VoxCPM
532
-
533
- model_ref = _resolve_model_ref()
534
- logger.info(f"Loading nano-vLLM VoxCPM server from {model_ref} ...")
535
- _voxcpm_server = VoxCPM.from_pretrained(
536
- model=model_ref,
537
- max_num_batched_tokens=_get_int_env("NANOVLLM_SERVERPOOL_MAX_NUM_BATCHED_TOKENS", 8192),
538
- max_num_seqs=_get_int_env("NANOVLLM_SERVERPOOL_MAX_NUM_SEQS", 16),
539
- max_model_len=_get_int_env("NANOVLLM_SERVERPOOL_MAX_MODEL_LEN", 4096),
540
- gpu_memory_utilization=_get_float_env("NANOVLLM_SERVERPOOL_GPU_MEMORY_UTILIZATION", 0.95),
541
- enforce_eager=_get_bool_env("NANOVLLM_SERVERPOOL_ENFORCE_EAGER", False),
542
- devices=_get_devices_env(),
543
- )
544
- _model_info = _voxcpm_server.get_model_info()
545
- logger.info(f"nano-vLLM VoxCPM server loaded: {_model_info}")
546
  return _voxcpm_server
547
 
548
 
@@ -690,6 +862,7 @@ def generate_tts_audio(
690
  denoise: bool = True,
691
  request: Optional[gr.Request] = None,
692
  ) -> Tuple[int, np.ndarray]:
 
693
  request_payload = {
694
  "event": "tts_request",
695
  "ui_language": _resolve_ui_language(request),
@@ -711,48 +884,80 @@ def generate_tts_audio(
711
  request_payload["reference_audio_duration_error"] = str(exc)
712
 
713
  try:
714
- result = _generate_tts_audio_once(
715
- text_input=text_input,
716
- control_instruction=control_instruction,
717
- reference_wav_path_input=reference_wav_path_input,
718
- use_prompt_text=use_prompt_text,
719
- prompt_text_input=prompt_text_input,
720
- cfg_value_input=cfg_value_input,
721
- do_normalize=do_normalize,
722
- denoise=denoise,
723
- request=request,
724
- )
725
  try:
726
- _append_request_log({**request_payload, "status": "success"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
  except Exception as exc:
728
- logger.warning(f"Failed to append request log: {exc}")
729
- return result
730
- except ValueError:
731
- raise
732
- except Exception as exc:
733
- try:
734
- _append_request_log({**request_payload, "status": "error", "error": str(exc)})
735
- except Exception as log_exc:
736
- logger.warning(f"Failed to append request log: {log_exc}")
737
- logger.warning(f"Generation failed, restarting backend and retrying once: {exc}")
738
- with _server_lock:
739
- _stop_server_if_needed()
740
- result = _generate_tts_audio_once(
741
- text_input=text_input,
742
- control_instruction=control_instruction,
743
- reference_wav_path_input=reference_wav_path_input,
744
- use_prompt_text=use_prompt_text,
745
- prompt_text_input=prompt_text_input,
746
- cfg_value_input=cfg_value_input,
747
- do_normalize=do_normalize,
748
- denoise=denoise,
749
- request=request,
750
- )
751
- try:
752
- _append_request_log({**request_payload, "status": "success_after_retry"})
753
- except Exception as log_exc:
754
- logger.warning(f"Failed to append request log: {log_exc}")
755
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
 
757
 
758
  # ---------- UI ----------
@@ -774,13 +979,16 @@ def create_demo_interface():
774
  gr.update(visible=True, interactive=True),
775
  )
776
 
777
- def _run_asr_if_needed(checked, audio_path):
778
  if not checked or not audio_path:
779
  return gr.update()
780
  logger.info("Running ASR on reference audio...")
781
- asr_text = _safe_prompt_wav_recognition(True, audio_path)
782
  logger.info(f"ASR result: {asr_text[:60]}...")
783
- return gr.update(value=asr_text)
 
 
 
784
 
785
  with gr.Blocks() as interface:
786
  if (assets_dir / "voxcpm_logo.png").exists():
@@ -888,7 +1096,7 @@ def run_demo(
888
  _start_background_prewarm()
889
  interface.queue(
890
  max_size=_get_int_env("GRADIO_QUEUE_MAX_SIZE", 10),
891
- default_concurrency_limit=_get_int_env("GRADIO_DEFAULT_CONCURRENCY_LIMIT", 1),
892
  ).launch(
893
  server_name=server_name,
894
  server_port=int(os.environ.get("PORT", server_port)),
 
1
+ import asyncio
2
  import atexit
3
  import json
4
  import logging
5
  import os
6
+ import queue
7
  import sys
8
  import tempfile
9
  from datetime import datetime, timezone
10
  from pathlib import Path
11
+ from threading import Lock, Semaphore, Thread
12
  from typing import Optional, Tuple
13
 
14
  import gradio as gr
 
63
  _voxcpm_server = None
64
  _model_info = None
65
  _denoiser = None
66
+ _asr_lock = Lock()
67
  _server_lock = Lock()
68
  _prewarm_lock = Lock()
69
  _denoiser_lock = Lock()
70
+ _denoise_semaphore = Semaphore(int(os.environ.get("DENOISE_MAX_CONCURRENT", "1")))
71
  _prewarm_started = False
72
  _runtime_diag_logged = False
73
+ _active_generation_requests = 0
74
+ _active_generation_lock = Lock()
75
 
76
 
77
  def _get_int_env(name: str, default: int) -> int:
 
222
  return float(info.frames) / float(info.samplerate)
223
 
224
 
225
+ def _begin_generation_request() -> None:
226
+ global _active_generation_requests
227
+ with _active_generation_lock:
228
+ _active_generation_requests += 1
229
+
230
+
231
+ def _end_generation_request() -> None:
232
+ global _active_generation_requests
233
+ with _active_generation_lock:
234
+ _active_generation_requests = max(0, _active_generation_requests - 1)
235
+
236
+
237
+ def _get_active_generation_requests() -> int:
238
+ with _active_generation_lock:
239
+ return _active_generation_requests
240
+
241
+
242
  def _validate_reference_audio_duration(
243
  audio_path: str, request: Optional[gr.Request] = None
244
  ) -> None:
 
248
 
249
 
250
  def _prepare_audio_for_encoding(
251
+ audio_path: Optional[str],
252
+ *,
253
+ denoise: bool,
254
+ request: Optional[gr.Request] = None,
255
  ) -> tuple[bytes | None, str | None, Optional[str]]:
256
  if audio_path is None or not audio_path.strip():
257
  return None, None, None
 
262
  temp_path = None
263
  if denoise:
264
  logger.info("Applying ZipEnhancer denoising to reference audio ...")
265
+ acquired = _denoise_semaphore.acquire(timeout=30)
266
+ if not acquired:
267
+ raise gr.Error(_get_i18n_text("denoise_busy_error", request))
268
  try:
269
  temp_path = get_denoiser().enhance(audio_path)
270
  source_path = temp_path
271
  except Exception as exc:
272
+ logger.exception("ZipEnhancer denoising failed")
273
+ raise gr.Error(_get_i18n_text("denoise_failed_error", request)) from exc
274
+ finally:
275
+ _denoise_semaphore.release()
276
 
277
  audio_bytes, audio_format = _read_audio_bytes(source_path)
278
  return audio_bytes, audio_format, temp_path
279
 
280
 
281
+ def _safe_prompt_wav_recognition(
282
+ use_prompt_text: bool, prompt_wav: Optional[str], request: Optional[gr.Request] = None
283
+ ) -> str:
284
  try:
285
  return prompt_wav_recognition(use_prompt_text, prompt_wav)
286
  except Exception as exc:
287
  logger.warning(f"ASR recognition failed: {exc}")
288
+ raise gr.Error(_get_i18n_text("asr_failed_error", request)) from exc
289
 
290
 
291
 
 
295
  if _voxcpm_server is None:
296
  return
297
 
298
+ if isinstance(_voxcpm_server, _AsyncServerBridge):
299
+ _voxcpm_server.stop()
300
+ else:
301
+ stop = getattr(_voxcpm_server, "stop", None)
302
+ if callable(stop):
303
+ try:
304
+ stop()
305
+ except Exception as exc:
306
+ logger.warning(f"Failed to stop nano-vLLM server cleanly: {exc}")
307
 
308
  _voxcpm_server = None
309
  _model_info = None
 
407
  "cfg_label": "CFG (guidance scale)",
408
  "cfg_info": "Higher → closer to the prompt / reference; lower → more creative variation",
409
  "reference_audio_too_long_error": "Reference audio is too long. Please upload audio no longer than 50 seconds.",
410
+ "denoise_busy_error": "Too many reference-audio enhancement requests are running. Please try again in a moment.",
411
+ "denoise_failed_error": "Reference audio enhancement failed. Please try disabling denoise or use a cleaner clip.",
412
+ "backend_retry_error": "The backend is temporarily unstable. Please try again in a moment.",
413
+ "asr_failed_error": "ASR failed. Please fill the transcript manually or try another reference audio.",
414
  "usage_instructions": _USAGE_INSTRUCTIONS_EN,
415
  "examples_footer": _EXAMPLES_FOOTER_EN,
416
  },
 
433
  "cfg_label": "CFG(引导强度)",
434
  "cfg_info": "数值越高 → 越贴合提示/参考��色;数值越低 → 生成风格更自由",
435
  "reference_audio_too_long_error": "参考音频太长了,请上传不超过 50 秒的音频。",
436
+ "denoise_busy_error": "当前参考音频降噪请求过多,请稍后再试。",
437
+ "denoise_failed_error": "参考音频降噪失败,请尝试关闭降噪或更换更干净的音频。",
438
+ "backend_retry_error": "后端暂时不稳定,请稍后再试。",
439
+ "asr_failed_error": "ASR 识别失败,请手动填写参考音频文本,或更换一段参考音频后重试。",
440
  "usage_instructions": _USAGE_INSTRUCTIONS_ZH,
441
  "examples_footer": _EXAMPLES_FOOTER_ZH,
442
  },
 
544
 
545
  def get_asr_model():
546
  global _asr_model
547
+ if _asr_model is not None:
548
+ return _asr_model
549
+ with _asr_lock:
550
+ if _asr_model is not None:
551
+ return _asr_model
552
  from funasr import AutoModel
553
  from huggingface_hub import snapshot_download
554
 
 
567
  return _asr_model
568
 
569
 
570
+ class _AsyncServerBridge:
571
+ """Thread-safe bridge to AsyncVoxCPM2ServerPool running in a dedicated event loop."""
572
+
573
+ def __init__(self):
574
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
575
+ self._thread: Optional[Thread] = None
576
+ self._server_pool = None
577
+ self._model_info: Optional[dict] = None
578
+ self._closed = False
579
+
580
+ def _run_loop(self) -> None:
581
+ assert self._loop is not None
582
+ asyncio.set_event_loop(self._loop)
583
+ self._loop.run_forever()
584
+
585
+ def start(self) -> None:
586
+ _log_runtime_diagnostics_once()
587
+ model_ref = _resolve_model_ref()
588
+ logger.info(f"Loading nano-vLLM VoxCPM async server from {model_ref} ...")
589
+
590
+ self._loop = asyncio.new_event_loop()
591
+ self._thread = Thread(target=self._run_loop, name="nanovllm-event-loop", daemon=True)
592
+ self._thread.start()
593
+
594
+ try:
595
+ async def _init():
596
+ from nanovllm_voxcpm import VoxCPM
597
+
598
+ pool = VoxCPM.from_pretrained(
599
+ model=model_ref,
600
+ max_num_batched_tokens=_get_int_env("NANOVLLM_SERVERPOOL_MAX_NUM_BATCHED_TOKENS", 8192),
601
+ max_num_seqs=_get_int_env("NANOVLLM_SERVERPOOL_MAX_NUM_SEQS", 16),
602
+ max_model_len=_get_int_env("NANOVLLM_SERVERPOOL_MAX_MODEL_LEN", 4096),
603
+ gpu_memory_utilization=_get_float_env("NANOVLLM_SERVERPOOL_GPU_MEMORY_UTILIZATION", 0.95),
604
+ enforce_eager=_get_bool_env("NANOVLLM_SERVERPOOL_ENFORCE_EAGER", False),
605
+ devices=_get_devices_env(),
606
+ )
607
+ await pool.wait_for_ready()
608
+ return pool
609
+
610
+ future = asyncio.run_coroutine_threadsafe(_init(), self._loop)
611
+ self._server_pool = future.result()
612
+
613
+ info_future = asyncio.run_coroutine_threadsafe(
614
+ self._server_pool.get_model_info(), self._loop
615
+ )
616
+ self._model_info = info_future.result()
617
+ logger.info(f"nano-vLLM async server loaded: {self._model_info}")
618
+ except Exception:
619
+ self.stop()
620
+ raise
621
+
622
+ def get_model_info(self) -> dict:
623
+ assert self._model_info is not None
624
+ return self._model_info
625
+
626
+ def encode_latents(self, wav: bytes, wav_format: str, timeout: float = 120) -> bytes:
627
+ if self._closed:
628
+ raise RuntimeError("nano-vLLM bridge is closed")
629
+ assert self._loop is not None and self._server_pool is not None
630
+ future = asyncio.run_coroutine_threadsafe(
631
+ self._server_pool.encode_latents(wav, wav_format), self._loop
632
+ )
633
+ try:
634
+ return future.result(timeout=timeout)
635
+ finally:
636
+ if not future.done():
637
+ future.cancel()
638
+
639
+ def generate(self, timeout: float = 300, **kwargs):
640
+ if self._closed:
641
+ raise RuntimeError("nano-vLLM bridge is closed")
642
+ assert self._loop is not None and self._server_pool is not None
643
+ result_queue: queue.Queue = queue.Queue()
644
+ import time as _time
645
+
646
+ async def _drain():
647
+ try:
648
+ async for chunk in self._server_pool.generate(**kwargs):
649
+ result_queue.put(chunk)
650
+ result_queue.put(None)
651
+ except Exception as exc:
652
+ result_queue.put(exc)
653
+
654
+ deadline = _time.monotonic() + timeout
655
+ future = asyncio.run_coroutine_threadsafe(_drain(), self._loop)
656
+ try:
657
+ while True:
658
+ remaining = deadline - _time.monotonic()
659
+ if remaining <= 0:
660
+ raise TimeoutError(f"Generation exceeded {timeout}s timeout")
661
+ try:
662
+ item = result_queue.get(timeout=min(0.5, remaining))
663
+ except queue.Empty:
664
+ if future.done():
665
+ exc = future.exception()
666
+ if exc is not None:
667
+ raise exc
668
+ continue
669
+ if item is None:
670
+ break
671
+ if isinstance(item, Exception):
672
+ raise item
673
+ yield item
674
+ finally:
675
+ if not future.done():
676
+ future.cancel()
677
+
678
+ def stop(self) -> None:
679
+ if self._closed:
680
+ return
681
+ self._closed = True
682
+ try:
683
+ if self._loop is not None and self._server_pool is not None:
684
+ future = asyncio.run_coroutine_threadsafe(self._server_pool.stop(), self._loop)
685
+ future.result(timeout=10)
686
+ except Exception as exc:
687
+ logger.warning(f"Failed to stop async server pool cleanly: {exc}")
688
+ finally:
689
+ if self._loop is not None:
690
+ self._loop.call_soon_threadsafe(self._loop.stop)
691
+ if self._thread is not None:
692
+ self._thread.join(timeout=5)
693
+ if (
694
+ self._loop is not None
695
+ and not self._loop.is_closed()
696
+ and (self._thread is None or not self._thread.is_alive())
697
+ ):
698
+ self._loop.close()
699
+ self._server_pool = None
700
+ self._model_info = None
701
+ self._thread = None
702
+ self._loop = None
703
+
704
+
705
+ def get_voxcpm_server() -> _AsyncServerBridge:
706
  global _voxcpm_server, _model_info
707
  if _voxcpm_server is not None:
708
  return _voxcpm_server
 
711
  if _voxcpm_server is not None:
712
  return _voxcpm_server
713
 
714
+ bridge = _AsyncServerBridge()
715
+ bridge.start()
716
+ _voxcpm_server = bridge
717
+ _model_info = bridge.get_model_info()
 
 
 
 
 
 
 
 
 
 
 
 
718
  return _voxcpm_server
719
 
720
 
 
862
  denoise: bool = True,
863
  request: Optional[gr.Request] = None,
864
  ) -> Tuple[int, np.ndarray]:
865
+ _begin_generation_request()
866
  request_payload = {
867
  "event": "tts_request",
868
  "ui_language": _resolve_ui_language(request),
 
884
  request_payload["reference_audio_duration_error"] = str(exc)
885
 
886
  try:
 
 
 
 
 
 
 
 
 
 
 
887
  try:
888
+ result = _generate_tts_audio_once(
889
+ text_input=text_input,
890
+ control_instruction=control_instruction,
891
+ reference_wav_path_input=reference_wav_path_input,
892
+ use_prompt_text=use_prompt_text,
893
+ prompt_text_input=prompt_text_input,
894
+ cfg_value_input=cfg_value_input,
895
+ do_normalize=do_normalize,
896
+ denoise=denoise,
897
+ request=request,
898
+ )
899
+ try:
900
+ _append_request_log({**request_payload, "status": "success"})
901
+ except Exception as exc:
902
+ logger.warning(f"Failed to append request log: {exc}")
903
+ return result
904
+ except (ValueError, gr.Error) as exc:
905
+ try:
906
+ _append_request_log(
907
+ {**request_payload, "status": "rejected", "error": str(exc)}
908
+ )
909
+ except Exception as log_exc:
910
+ logger.warning(f"Failed to append request log: {log_exc}")
911
+ if isinstance(exc, gr.Error):
912
+ raise
913
+ raise gr.Error(str(exc)) from exc
914
  except Exception as exc:
915
+ logger.exception("Generation failed")
916
+ try:
917
+ _append_request_log({**request_payload, "status": "error", "error": str(exc)})
918
+ except Exception as log_exc:
919
+ logger.warning(f"Failed to append request log: {log_exc}")
920
+
921
+ active_requests = _get_active_generation_requests()
922
+ if active_requests > 1:
923
+ logger.warning(
924
+ "Generation failed with %s active requests; skipping shared backend restart: %s",
925
+ active_requests,
926
+ exc,
927
+ )
928
+ raise gr.Error(_get_i18n_text("backend_retry_error", request)) from exc
929
+
930
+ logger.warning(f"Generation failed, restarting backend and retrying once: {exc}")
931
+ with _server_lock:
932
+ _stop_server_if_needed()
933
+ try:
934
+ result = _generate_tts_audio_once(
935
+ text_input=text_input,
936
+ control_instruction=control_instruction,
937
+ reference_wav_path_input=reference_wav_path_input,
938
+ use_prompt_text=use_prompt_text,
939
+ prompt_text_input=prompt_text_input,
940
+ cfg_value_input=cfg_value_input,
941
+ do_normalize=do_normalize,
942
+ denoise=denoise,
943
+ request=request,
944
+ )
945
+ try:
946
+ _append_request_log({**request_payload, "status": "success_after_retry"})
947
+ except Exception as log_exc:
948
+ logger.warning(f"Failed to append request log: {log_exc}")
949
+ return result
950
+ except Exception as retry_exc:
951
+ logger.exception("Retry failed")
952
+ try:
953
+ _append_request_log(
954
+ {**request_payload, "status": "retry_failed", "error": str(retry_exc)}
955
+ )
956
+ except Exception as log_exc:
957
+ logger.warning(f"Failed to append request log: {log_exc}")
958
+ raise gr.Error(_get_i18n_text("backend_retry_error", request)) from retry_exc
959
+ finally:
960
+ _end_generation_request()
961
 
962
 
963
  # ---------- UI ----------
 
979
  gr.update(visible=True, interactive=True),
980
  )
981
 
982
+ def _run_asr_if_needed(checked, audio_path, request: gr.Request = None):
983
  if not checked or not audio_path:
984
  return gr.update()
985
  logger.info("Running ASR on reference audio...")
986
+ asr_text = _safe_prompt_wav_recognition(True, audio_path, request=request)
987
  logger.info(f"ASR result: {asr_text[:60]}...")
988
+ return gr.update(
989
+ value=asr_text,
990
+ placeholder=_get_i18n_text("prompt_text_placeholder", request),
991
+ )
992
 
993
  with gr.Blocks() as interface:
994
  if (assets_dir / "voxcpm_logo.png").exists():
 
1096
  _start_background_prewarm()
1097
  interface.queue(
1098
  max_size=_get_int_env("GRADIO_QUEUE_MAX_SIZE", 10),
1099
+ default_concurrency_limit=_get_int_env("GRADIO_DEFAULT_CONCURRENCY_LIMIT", 4),
1100
  ).launch(
1101
  server_name=server_name,
1102
  server_port=int(os.environ.get("PORT", server_port)),