Spaces:

luh0502
/

NeAR

Running on Zero

luh1124 commited on 26 days ago

Commit

da36f0f

1 Parent(s): b5fb94e

feat(app): block Gradio bind until CPU weights load (no background preload)

- Run run_model_cpu_preload_blocking() inside demo.launch before server start
- Default NEAR_MODEL_CPU_PRELOAD_AT_START=1; set 0 for lazy load on first GPU click
- Remove daemon preload thread; test launch wrapper calls blocking preload

Made-with: Cursor

Files changed (2) hide show

app.py +19 -26
tests/test_app_architecture.py +8 -0

app.py CHANGED Viewed

@@ -253,18 +253,18 @@ def _truthy_env(name: str, default: str) -> bool:
     return v in ("1", "true", "yes", "on")
-# Background CPU preload runs in the Gradio host process. HF Stateless / ZeroGPU forbids
-# CUDA init there; torchvision + torch.load (weights_only) interactions can still fail
-# during NeAR build. Default to off when `spaces` is present (typical Space deploy); use
-# NEAR_MODEL_CPU_PRELOAD_AT_START=1 to force preload locally or on dedicated GPU VMs.
-_CPU_PRELOAD_DEFAULT = "0" if spaces is not None else "1"
 _CPU_PRELOAD_AT_START = _truthy_env(
     "NEAR_MODEL_CPU_PRELOAD_AT_START",
     _CPU_PRELOAD_DEFAULT,
 )
 print(
     f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
-    f"(default {_CPU_PRELOAD_DEFAULT!r} when spaces={'set' if spaces is not None else 'absent'}).",
     flush=True,
 )
@@ -402,24 +402,18 @@ def ensure_near_on_cuda() -> NeARImageToRelightable3DPipeline:
         return PIPELINE
-def _preload_models_cpu_worker() -> None:
-    try:
-        t0 = time.time()
-        print("[NeAR] background CPU preload start", flush=True)
-        with _model_lock:
-            _ensure_geometry_cpu_locked()
-            _ensure_near_cpu_locked()
-        print(f"[NeAR] background CPU preload done {time.time() - t0:.1f}s", flush=True)
-    except Exception as exc:
-        print(f"[NeAR] background CPU preload failed: {exc}", flush=True)
-def start_model_cpu_preload_thread() -> None:
-    threading.Thread(
-        target=_preload_models_cpu_worker,
-        daemon=True,
-        name="near-model-cpu-preload",
-    ).start()
 def set_tone_mapper(view_name: str):
@@ -1291,14 +1285,13 @@ _orig_blocks_launch = demo.launch
 def _near_launch(*args: Any, **kwargs: Any):
     kwargs.setdefault("theme", NEAR_GRADIO_THEME)
     kwargs.setdefault("css", CUSTOM_CSS)
     return _orig_blocks_launch(*args, **kwargs)
 demo.launch = _near_launch  # type: ignore[method-assign]
-if _CPU_PRELOAD_AT_START:
-    start_model_cpu_preload_thread()
 start_tmp_gradio_pruner()
 if __name__ == "__main__":

     return v in ("1", "true", "yes", "on")
+# When enabled, Hunyuan + NeAR weights are loaded on CPU inside demo.launch() *before* the
+# HTTP server binds, so the main UI only becomes reachable after CPU load finishes (avoids
+# clicks while models are missing → ZeroGPU timeout). Set NEAR_MODEL_CPU_PRELOAD_AT_START=0
+# to bind immediately and load on first @spaces.GPU click instead (faster "page up", riskier UX).
+_CPU_PRELOAD_DEFAULT = "1"
 _CPU_PRELOAD_AT_START = _truthy_env(
     "NEAR_MODEL_CPU_PRELOAD_AT_START",
     _CPU_PRELOAD_DEFAULT,
 )
 print(
     f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
+    "(1 = block server start until CPU weights ready; 0 = lazy load on first GPU action).",
     flush=True,
 )
         return PIPELINE
+def run_model_cpu_preload_blocking() -> None:
+    """Load Hunyuan + NeAR on CPU before Gradio binds (main UI appears only after this)."""
+    t0 = time.time()
+    print("[NeAR] blocking CPU preload before server bind ...", flush=True)
+    with _model_lock:
+        _ensure_geometry_cpu_locked()
+        _ensure_near_cpu_locked()
+    print(
+        f"[NeAR] CPU preload done {time.time() - t0:.1f}s — Gradio will accept traffic now.",
+        flush=True,
+    )
 def set_tone_mapper(view_name: str):
 def _near_launch(*args: Any, **kwargs: Any):
     kwargs.setdefault("theme", NEAR_GRADIO_THEME)
     kwargs.setdefault("css", CUSTOM_CSS)
+    if _CPU_PRELOAD_AT_START:
+        run_model_cpu_preload_blocking()
     return _orig_blocks_launch(*args, **kwargs)
 demo.launch = _near_launch  # type: ignore[method-assign]
 start_tmp_gradio_pruner()
 if __name__ == "__main__":

tests/test_app_architecture.py CHANGED Viewed

@@ -63,6 +63,14 @@ class AppArchitectureTests(unittest.TestCase):
         self.assertIn("ensure_near_on_cuda", called)
 if __name__ == "__main__":
     unittest.main()

         self.assertIn("ensure_near_on_cuda", called)
+    def test_cpu_preload_is_blocking_before_launch_not_background_thread(self) -> None:
+        source = APP_PATH.read_text(encoding="utf-8")
+        near_launch = _get_function(_load_tree(), "_near_launch")
+        called = _called_names(near_launch)
+        self.assertIn("run_model_cpu_preload_blocking", called)
+        self.assertNotIn("start_model_cpu_preload_thread", source)
 if __name__ == "__main__":
     unittest.main()