feat(app): block Gradio bind until CPU weights load (no background preload)
Browse files- Run run_model_cpu_preload_blocking() inside demo.launch before server start
- Default NEAR_MODEL_CPU_PRELOAD_AT_START=1; set 0 for lazy load on first GPU click
- Remove daemon preload thread; test launch wrapper calls blocking preload
Made-with: Cursor
- app.py +19 -26
- tests/test_app_architecture.py +8 -0
app.py
CHANGED
|
@@ -253,18 +253,18 @@ def _truthy_env(name: str, default: str) -> bool:
|
|
| 253 |
return v in ("1", "true", "yes", "on")
|
| 254 |
|
| 255 |
|
| 256 |
-
#
|
| 257 |
-
#
|
| 258 |
-
#
|
| 259 |
-
#
|
| 260 |
-
_CPU_PRELOAD_DEFAULT = "
|
| 261 |
_CPU_PRELOAD_AT_START = _truthy_env(
|
| 262 |
"NEAR_MODEL_CPU_PRELOAD_AT_START",
|
| 263 |
_CPU_PRELOAD_DEFAULT,
|
| 264 |
)
|
| 265 |
print(
|
| 266 |
f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
|
| 267 |
-
|
| 268 |
flush=True,
|
| 269 |
)
|
| 270 |
|
|
@@ -402,24 +402,18 @@ def ensure_near_on_cuda() -> NeARImageToRelightable3DPipeline:
|
|
| 402 |
return PIPELINE
|
| 403 |
|
| 404 |
|
| 405 |
-
def
|
| 406 |
-
|
| 407 |
-
t0 = time.time()
|
| 408 |
-
print("[NeAR] background CPU preload start", flush=True)
|
| 409 |
-
with _model_lock:
|
| 410 |
-
_ensure_geometry_cpu_locked()
|
| 411 |
-
_ensure_near_cpu_locked()
|
| 412 |
-
print(f"[NeAR] background CPU preload done {time.time() - t0:.1f}s", flush=True)
|
| 413 |
-
except Exception as exc:
|
| 414 |
-
print(f"[NeAR] background CPU preload failed: {exc}", flush=True)
|
| 415 |
-
|
| 416 |
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
|
| 425 |
def set_tone_mapper(view_name: str):
|
|
@@ -1291,14 +1285,13 @@ _orig_blocks_launch = demo.launch
|
|
| 1291 |
def _near_launch(*args: Any, **kwargs: Any):
|
| 1292 |
kwargs.setdefault("theme", NEAR_GRADIO_THEME)
|
| 1293 |
kwargs.setdefault("css", CUSTOM_CSS)
|
|
|
|
|
|
|
| 1294 |
return _orig_blocks_launch(*args, **kwargs)
|
| 1295 |
|
| 1296 |
|
| 1297 |
demo.launch = _near_launch # type: ignore[method-assign]
|
| 1298 |
|
| 1299 |
-
if _CPU_PRELOAD_AT_START:
|
| 1300 |
-
start_model_cpu_preload_thread()
|
| 1301 |
-
|
| 1302 |
start_tmp_gradio_pruner()
|
| 1303 |
|
| 1304 |
if __name__ == "__main__":
|
|
|
|
| 253 |
return v in ("1", "true", "yes", "on")
|
| 254 |
|
| 255 |
|
| 256 |
+
# When enabled, Hunyuan + NeAR weights are loaded on CPU inside demo.launch() *before* the
|
| 257 |
+
# HTTP server binds, so the main UI only becomes reachable after CPU load finishes (avoids
|
| 258 |
+
# clicks while models are missing → ZeroGPU timeout). Set NEAR_MODEL_CPU_PRELOAD_AT_START=0
|
| 259 |
+
# to bind immediately and load on first @spaces.GPU click instead (faster "page up", riskier UX).
|
| 260 |
+
_CPU_PRELOAD_DEFAULT = "1"
|
| 261 |
_CPU_PRELOAD_AT_START = _truthy_env(
|
| 262 |
"NEAR_MODEL_CPU_PRELOAD_AT_START",
|
| 263 |
_CPU_PRELOAD_DEFAULT,
|
| 264 |
)
|
| 265 |
print(
|
| 266 |
f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
|
| 267 |
+
"(1 = block server start until CPU weights ready; 0 = lazy load on first GPU action).",
|
| 268 |
flush=True,
|
| 269 |
)
|
| 270 |
|
|
|
|
| 402 |
return PIPELINE
|
| 403 |
|
| 404 |
|
| 405 |
+
def run_model_cpu_preload_blocking() -> None:
|
| 406 |
+
"""Load Hunyuan + NeAR on CPU before Gradio binds (main UI appears only after this)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
+
t0 = time.time()
|
| 409 |
+
print("[NeAR] blocking CPU preload before server bind ...", flush=True)
|
| 410 |
+
with _model_lock:
|
| 411 |
+
_ensure_geometry_cpu_locked()
|
| 412 |
+
_ensure_near_cpu_locked()
|
| 413 |
+
print(
|
| 414 |
+
f"[NeAR] CPU preload done {time.time() - t0:.1f}s — Gradio will accept traffic now.",
|
| 415 |
+
flush=True,
|
| 416 |
+
)
|
| 417 |
|
| 418 |
|
| 419 |
def set_tone_mapper(view_name: str):
|
|
|
|
| 1285 |
def _near_launch(*args: Any, **kwargs: Any):
|
| 1286 |
kwargs.setdefault("theme", NEAR_GRADIO_THEME)
|
| 1287 |
kwargs.setdefault("css", CUSTOM_CSS)
|
| 1288 |
+
if _CPU_PRELOAD_AT_START:
|
| 1289 |
+
run_model_cpu_preload_blocking()
|
| 1290 |
return _orig_blocks_launch(*args, **kwargs)
|
| 1291 |
|
| 1292 |
|
| 1293 |
demo.launch = _near_launch # type: ignore[method-assign]
|
| 1294 |
|
|
|
|
|
|
|
|
|
|
| 1295 |
start_tmp_gradio_pruner()
|
| 1296 |
|
| 1297 |
if __name__ == "__main__":
|
tests/test_app_architecture.py
CHANGED
|
@@ -63,6 +63,14 @@ class AppArchitectureTests(unittest.TestCase):
|
|
| 63 |
|
| 64 |
self.assertIn("ensure_near_on_cuda", called)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
if __name__ == "__main__":
|
| 68 |
unittest.main()
|
|
|
|
| 63 |
|
| 64 |
self.assertIn("ensure_near_on_cuda", called)
|
| 65 |
|
| 66 |
+
def test_cpu_preload_is_blocking_before_launch_not_background_thread(self) -> None:
|
| 67 |
+
source = APP_PATH.read_text(encoding="utf-8")
|
| 68 |
+
near_launch = _get_function(_load_tree(), "_near_launch")
|
| 69 |
+
called = _called_names(near_launch)
|
| 70 |
+
|
| 71 |
+
self.assertIn("run_model_cpu_preload_blocking", called)
|
| 72 |
+
self.assertNotIn("start_model_cpu_preload_thread", source)
|
| 73 |
+
|
| 74 |
|
| 75 |
if __name__ == "__main__":
|
| 76 |
unittest.main()
|