Spaces:

dimensionalpulsar
/

voice-clone-rvc

Sleeping

dimensionalpulsar commited on 24 days ago

Commit

6471984

1 Parent(s): 10addd5

fix: lazy-import torch/torchaudio in inference.py to avoid libcudart.so.13 crash; pin cu124 wheels in requirements to prevent demucs pulling CUDA-13 torchaudio

Files changed (2) hide show

pipeline/inference.py CHANGED Viewed

@@ -7,9 +7,9 @@ import os
 import sys
 import logging
 import numpy as np
-import torch
-import torchaudio
-import librosa
 logger = logging.getLogger(__name__)
@@ -34,6 +34,8 @@ def _load_seed_vc_models(device):
     if "model" in _model_cache:
         return _model_cache
     import yaml
     from modules.commons import recursive_munch, build_model, load_checkpoint
     from hf_utils import load_custom_model_from_hf
@@ -250,12 +252,23 @@ def convert_voice(
         raise
-@torch.no_grad()
-@torch.inference_mode()
 def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
     """Actual conversion implementation (called from GPU-decorated wrapper)."""
     import soundfile as sf
     os.makedirs(OUTPUT_DIR, exist_ok=True)
     base_name = os.path.splitext(os.path.basename(audio_path))[0]
     output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))

 import sys
 import logging
 import numpy as np
+# NOTE: torch, torchaudio, librosa are imported lazily inside functions.
+# Importing them at module level crashes ZeroGPU startup because the CUDA
+# extension tries to load libcudart before any GPU context is available.
 logger = logging.getLogger(__name__)
     if "model" in _model_cache:
         return _model_cache
+    import torch
+    import torchaudio  # noqa: lazy import — only safe after GPU context is active
     import yaml
     from modules.commons import recursive_munch, build_model, load_checkpoint
     from hf_utils import load_custom_model_from_hf
         raise
 def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
     """Actual conversion implementation (called from GPU-decorated wrapper)."""
+    import torch
+    import torchaudio
+    import librosa
     import soundfile as sf
+    with torch.no_grad():
+        return _convert_voice_core(
+            audio_path, reference_path, pitch, diffusion_steps, similarity,
+            torch, torchaudio, librosa, sf,
+        )
+def _convert_voice_core(audio_path, reference_path, pitch, diffusion_steps, similarity,
+                        torch, torchaudio, librosa, sf):
+    """Inner implementation with no_grad already active."""
     os.makedirs(OUTPUT_DIR, exist_ok=True)
     base_name = os.path.splitext(os.path.basename(audio_path))[0]
     output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))

requirements.txt CHANGED Viewed

@@ -1,3 +1,10 @@
 # Gradio + HuggingFace
 gradio==5.12.0
 gradio-client==1.5.4

+# ── PyTorch CUDA 12.4 wheels (ZeroGPU uses CUDA 12.x) ──────────────────────────
+# Must be pinned here so demucs' transitive deps don't pull torchaudio built
+# for CUDA 13 (which causes libcudart.so.13 crash at startup).
+--extra-index-url https://download.pytorch.org/whl/cu124
+torch==2.5.1
+torchaudio==2.5.1
 # Gradio + HuggingFace
 gradio==5.12.0
 gradio-client==1.5.4