Spaces:
Sleeping
Sleeping
Commit ·
6471984
1
Parent(s): 10addd5
fix: lazy-import torch/torchaudio in inference.py to avoid libcudart.so.13 crash; pin cu124 wheels in requirements to prevent demucs pulling CUDA-13 torchaudio
Browse files- pipeline/inference.py +18 -5
- requirements.txt +7 -0
pipeline/inference.py
CHANGED
|
@@ -7,9 +7,9 @@ import os
|
|
| 7 |
import sys
|
| 8 |
import logging
|
| 9 |
import numpy as np
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
@@ -34,6 +34,8 @@ def _load_seed_vc_models(device):
|
|
| 34 |
if "model" in _model_cache:
|
| 35 |
return _model_cache
|
| 36 |
|
|
|
|
|
|
|
| 37 |
import yaml
|
| 38 |
from modules.commons import recursive_munch, build_model, load_checkpoint
|
| 39 |
from hf_utils import load_custom_model_from_hf
|
|
@@ -250,12 +252,23 @@ def convert_voice(
|
|
| 250 |
raise
|
| 251 |
|
| 252 |
|
| 253 |
-
@torch.no_grad()
|
| 254 |
-
@torch.inference_mode()
|
| 255 |
def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
|
| 256 |
"""Actual conversion implementation (called from GPU-decorated wrapper)."""
|
|
|
|
|
|
|
|
|
|
| 257 |
import soundfile as sf
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 260 |
base_name = os.path.splitext(os.path.basename(audio_path))[0]
|
| 261 |
output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))
|
|
|
|
| 7 |
import sys
|
| 8 |
import logging
|
| 9 |
import numpy as np
|
| 10 |
+
# NOTE: torch, torchaudio, librosa are imported lazily inside functions.
|
| 11 |
+
# Importing them at module level crashes ZeroGPU startup because the CUDA
|
| 12 |
+
# extension tries to load libcudart before any GPU context is available.
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
|
|
| 34 |
if "model" in _model_cache:
|
| 35 |
return _model_cache
|
| 36 |
|
| 37 |
+
import torch
|
| 38 |
+
import torchaudio # noqa: lazy import — only safe after GPU context is active
|
| 39 |
import yaml
|
| 40 |
from modules.commons import recursive_munch, build_model, load_checkpoint
|
| 41 |
from hf_utils import load_custom_model_from_hf
|
|
|
|
| 252 |
raise
|
| 253 |
|
| 254 |
|
|
|
|
|
|
|
| 255 |
def _convert_voice_impl(audio_path, reference_path, pitch, diffusion_steps, similarity=0.7):
|
| 256 |
"""Actual conversion implementation (called from GPU-decorated wrapper)."""
|
| 257 |
+
import torch
|
| 258 |
+
import torchaudio
|
| 259 |
+
import librosa
|
| 260 |
import soundfile as sf
|
| 261 |
|
| 262 |
+
with torch.no_grad():
|
| 263 |
+
return _convert_voice_core(
|
| 264 |
+
audio_path, reference_path, pitch, diffusion_steps, similarity,
|
| 265 |
+
torch, torchaudio, librosa, sf,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def _convert_voice_core(audio_path, reference_path, pitch, diffusion_steps, similarity,
|
| 270 |
+
torch, torchaudio, librosa, sf):
|
| 271 |
+
"""Inner implementation with no_grad already active."""
|
| 272 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 273 |
base_name = os.path.splitext(os.path.basename(audio_path))[0]
|
| 274 |
output_path = os.path.join(OUTPUT_DIR, "{}_converted.wav".format(base_name))
|
requirements.txt
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Gradio + HuggingFace
|
| 2 |
gradio==5.12.0
|
| 3 |
gradio-client==1.5.4
|
|
|
|
| 1 |
+
# ── PyTorch CUDA 12.4 wheels (ZeroGPU uses CUDA 12.x) ──────────────────────────
|
| 2 |
+
# Must be pinned here so demucs' transitive deps don't pull torchaudio built
|
| 3 |
+
# for CUDA 13 (which causes libcudart.so.13 crash at startup).
|
| 4 |
+
--extra-index-url https://download.pytorch.org/whl/cu124
|
| 5 |
+
torch==2.5.1
|
| 6 |
+
torchaudio==2.5.1
|
| 7 |
+
|
| 8 |
# Gradio + HuggingFace
|
| 9 |
gradio==5.12.0
|
| 10 |
gradio-client==1.5.4
|