Spaces:

Rafii
/

videovoice

Running on Zero

Rafii commited on Apr 22

Commit

3474e83

1 Parent(s): abc7c46

deploy: switch to chatterbox requirements @ 035108d

Files changed (1) hide show

steps/s1b_separate.py CHANGED Viewed

@@ -16,7 +16,6 @@ import spaces
 _MODEL = None
-_MODEL_DEVICE = None
 def _select_device() -> str:
@@ -30,19 +29,15 @@ def _select_device() -> str:
 def _get_model():
     """Lazy-load htdemucs once per process. Module-level semantics; we load
     on first call so the import itself stays cheap on non-GPU envs."""
-    global _MODEL, _MODEL_DEVICE
     if _MODEL is None:
         from demucs.pretrained import get_model
-        device = _select_device()
-        print(f"[s1b] Loading htdemucs on {device}...")
         model = get_model("htdemucs")
         model.eval()
-        # On ZeroGPU, CUDA emulation at import time lets this succeed even
-        # outside a @spaces.GPU scope. On Mac, this is MPS or CPU.
-        model.to(device)
         _MODEL = model
-        _MODEL_DEVICE = device
-    return _MODEL, _MODEL_DEVICE
 @spaces.GPU(duration=120)
@@ -50,7 +45,10 @@ def _apply_demucs(mix: torch.Tensor, device: str) -> torch.Tensor:
     """GPU-bound inference call. `mix` shape: [1, channels, time]."""
     from demucs.apply import apply_model
-    model, _ = _get_model()
     with torch.no_grad():
         # apply_model returns [batch, sources, channels, time]
         sources = apply_model(
@@ -99,7 +97,8 @@ def separate_audio(
     out = Path(output_dir)
     out.mkdir(parents=True, exist_ok=True)
-    model, device = _get_model()
     target_sr = model.samplerate
     target_ch = model.audio_channels
     source_names = list(model.sources)

 _MODEL = None
 def _select_device() -> str:
 def _get_model():
     """Lazy-load htdemucs once per process. Module-level semantics; we load
     on first call so the import itself stays cheap on non-GPU envs."""
+    global _MODEL
     if _MODEL is None:
         from demucs.pretrained import get_model
+        print("[s1b] Loading htdemucs on cpu...")
         model = get_model("htdemucs")
         model.eval()
+        model.to("cpu")
         _MODEL = model
+    return _MODEL
 @spaces.GPU(duration=120)
     """GPU-bound inference call. `mix` shape: [1, channels, time]."""
     from demucs.apply import apply_model
+    model = _get_model()
+    if next(model.parameters()).device.type != device:
+        print(f"[s1b] Moving htdemucs to {device} inside GPU scope...")
+        model = model.to(device)
     with torch.no_grad():
         # apply_model returns [batch, sources, channels, time]
         sources = apply_model(
     out = Path(output_dir)
     out.mkdir(parents=True, exist_ok=True)
+    model = _get_model()
+    device = _select_device()
     target_sr = model.samplerate
     target_ch = model.audio_channels
     source_names = list(model.sources)