Spaces:

ResembleAI
/

Dramabox

Running on Zero

Manmay Nakhashi commited on 27 days ago

Commit

b8b67ad

1 Parent(s): 8cd4942

Refactor for ZeroGPU: lazy TTSServer load + @spaces.GPU decorator

- Add 'spaces' to requirements
- Move TTSServer instantiation into _ensure_tts() so the GPU isn't touched at import time
- Wrap on_generate with @spaces.GPU(duration=120)
- Disable torch.compile (incompatible with ZeroGPU's brief GPU windows)
- Drop hardware:l40s from README frontmatter (Space hardware is now set to zero via API)

Files changed (3) hide show

README.md +1 -1
app.py +24 -12
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ pinned: true
 license: other
 license_name: ltx-2-community
 license_link: https://huggingface.co/ResembleAI/Dramabox/blob/main/LICENSE
-hardware: l40s
 short_description: Expressive TTS with voice cloning — DramaBox demo
 ---

 license: other
 license_name: ltx-2-community
 license_link: https://huggingface.co/ResembleAI/Dramabox/blob/main/LICENSE
+hf_oauth: false
 short_description: Expressive TTS with voice cloning — DramaBox demo
 ---

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import tempfile
 import time
 import gradio as gr
 # Local src import.
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "src"))
@@ -21,18 +22,27 @@ from model_downloader import get_all_paths  # noqa: E402
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logging.info("Fetching DramaBox checkpoints from HuggingFace (cached after first run)...")
-paths = get_all_paths()
-logging.info("Loading DramaBox warm server (Gemma + DiT + VAE + Decoder)...")
-tts = TTSServer(
-    checkpoint=paths["transformer"],
-    full_checkpoint=paths["audio_components"],
-    gemma_root=paths["gemma_root"],
-    device="cuda",
-    dtype=os.environ.get("LTX_DTYPE", "bf16"),
-    compile_model=os.environ.get("LTX_COMPILE", "0") == "1",
-    bnb_4bit=True,                                       # default Gemma is unsloth pre-quantized
-)
-logging.info("Server ready.")
 # ── Example prompts (shown as click-to-fill chips in the UI) ─────────────────
@@ -88,10 +98,12 @@ EXAMPLES: list[tuple[str, str]] = [
 ]
 def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float, seed: int):
     if not prompt or not prompt.strip():
         raise gr.Error("Prompt is empty.")
     t0 = time.time()
     ref_path = audio_ref if audio_ref and os.path.exists(str(audio_ref)) else None
     output = tempfile.mktemp(suffix=".wav", prefix="dramabox_")
     tts.generate_to_file(

 import time
 import gradio as gr
+import spaces
 # Local src import.
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "src"))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logging.info("Fetching DramaBox checkpoints from HuggingFace (cached after first run)...")
+PATHS = get_all_paths()  # CPU-side download is fine outside the GPU window
+# Lazy-loaded inside the @spaces.GPU function (no GPU available at import time on ZeroGPU).
+_TTS: TTSServer | None = None
+def _ensure_tts() -> TTSServer:
+    global _TTS
+    if _TTS is None:
+        logging.info("Loading DramaBox warm server (Gemma + DiT + VAE + Decoder)...")
+        _TTS = TTSServer(
+            checkpoint=PATHS["transformer"],
+            full_checkpoint=PATHS["audio_components"],
+            gemma_root=PATHS["gemma_root"],
+            device="cuda",
+            dtype=os.environ.get("LTX_DTYPE", "bf16"),
+            compile_model=False,                  # torch.compile breaks under ZeroGPU's brief GPU windows
+            bnb_4bit=True,                        # unsloth Gemma is pre-quantized
+        )
+        logging.info("TTSServer ready.")
+    return _TTS
 # ── Example prompts (shown as click-to-fill chips in the UI) ─────────────────
 ]
+@spaces.GPU(duration=120)
 def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float, seed: int):
     if not prompt or not prompt.strip():
         raise gr.Error("Prompt is empty.")
     t0 = time.time()
+    tts = _ensure_tts()
     ref_path = audio_ref if audio_ref and os.path.exists(str(audio_ref)) else None
     output = tempfile.mktemp(suffix=".wav", prefix="dramabox_")
     tts.generate_to_file(

requirements.txt CHANGED Viewed

@@ -12,5 +12,6 @@ transformers>=4.45.0
 huggingface_hub>=0.20.0,<1.0
 bitsandbytes>=0.43.0
 gradio>=4.0.0
 soundfile>=0.12.0
 resemble-perth @ git+https://github.com/resemble-ai/Perth.git@master

 huggingface_hub>=0.20.0,<1.0
 bitsandbytes>=0.43.0
 gradio>=4.0.0
+spaces>=0.30.0
 soundfile>=0.12.0
 resemble-perth @ git+https://github.com/resemble-ai/Perth.git@master