dimensionalpulsar commited on
Commit
6471984
·
1 Parent(s): 10addd5

fix: lazy-import torch/torchaudio in inference.py to avoid libcudart.so.13 crash; pin cu124 wheels in requirements to prevent demucs pulling CUDA-13 torchaudio

Browse files
Files changed (2) hide show
  1. pipeline/inference.py +18 -5
  2. requirements.txt +7 -0
pipeline/inference.py CHANGED
@@ -7,9 +7,9 @@ import os
7
  import sys
8
  import logging
9
  import numpy as np
10
- import torch
11
- import torchaudio
12
- import librosa
13
 
14
  logger = logging.getLogger(__name__)
15
 
@@ -34,6 +34,8 @@ def _load_seed_vc_models(device):
34
  if "model" in _model_cache:
35
  return _model_cache
36
 
 
 
37
  import yaml
38
  from modules.commons import recursive_munch, build_model, load_checkpoint
39
  from hf_utils import load_custom_model_from_hf
@@ -250,12 +252,23 @@ def convert_voice(
250
  raise
251
 
252
 
253
- @torch.no_grad()
254
- @torch.inference_mode()
255
  def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
256
  """Actual conversion implementation (called from GPU-decorated wrapper)."""
 
 
 
257
  import soundfile as sf
258
 
 
 
 
 
 
 
 
 
 
 
259
  os.makedirs(OUTPUT_DIR, exist_ok=True)
260
  base_name = os.path.splitext(os.path.basename(audio_path))[0]
261
  output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))
 
7
  import sys
8
  import logging
9
  import numpy as np
10
+ # NOTE: torch, torchaudio, librosa are imported lazily inside functions.
11
+ # Importing them at module level crashes ZeroGPU startup because the CUDA
12
+ # extension tries to load libcudart before any GPU context is available.
13
 
14
  logger = logging.getLogger(__name__)
15
 
 
34
  if "model" in _model_cache:
35
  return _model_cache
36
 
37
+ import torch
38
+ import torchaudio # noqa: lazy import — only safe after GPU context is active
39
  import yaml
40
  from modules.commons import recursive_munch, build_model, load_checkpoint
41
  from hf_utils import load_custom_model_from_hf
 
252
  raise
253
 
254
 
 
 
255
  def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
256
  """Actual conversion implementation (called from GPU-decorated wrapper)."""
257
+ import torch
258
+ import torchaudio
259
+ import librosa
260
  import soundfile as sf
261
 
262
+ with torch.no_grad():
263
+ return _convert_voice_core(
264
+ audio_path, reference_path, pitch, diffusion_steps, similarity,
265
+ torch, torchaudio, librosa, sf,
266
+ )
267
+
268
+
269
+ def _convert_voice_core(audio_path, reference_path, pitch, diffusion_steps, similarity,
270
+ torch, torchaudio, librosa, sf):
271
+ """Inner implementation with no_grad already active."""
272
  os.makedirs(OUTPUT_DIR, exist_ok=True)
273
  base_name = os.path.splitext(os.path.basename(audio_path))[0]
274
  output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))
requirements.txt CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  # Gradio + HuggingFace
2
  gradio==5.12.0
3
  gradio-client==1.5.4
 
1
+ # ── PyTorch CUDA 12.4 wheels (ZeroGPU uses CUDA 12.x) ──────────────────────────
2
+ # Must be pinned here so demucs' transitive deps don't pull torchaudio built
3
+ # for CUDA 13 (which causes libcudart.so.13 crash at startup).
4
+ --extra-index-url https://download.pytorch.org/whl/cu124
5
+ torch==2.5.1
6
+ torchaudio==2.5.1
7
+
8
  # Gradio + HuggingFace
9
  gradio==5.12.0
10
  gradio-client==1.5.4