luh1124 commited on
Commit
da36f0f
·
1 Parent(s): b5fb94e

feat(app): block Gradio bind until CPU weights load (no background preload)

Browse files

- Run run_model_cpu_preload_blocking() inside demo.launch before server start
- Default NEAR_MODEL_CPU_PRELOAD_AT_START=1; set 0 for lazy load on first GPU click
- Remove daemon preload thread; test launch wrapper calls blocking preload

Made-with: Cursor

Files changed (2) hide show
  1. app.py +19 -26
  2. tests/test_app_architecture.py +8 -0
app.py CHANGED
@@ -253,18 +253,18 @@ def _truthy_env(name: str, default: str) -> bool:
253
  return v in ("1", "true", "yes", "on")
254
 
255
 
256
- # Background CPU preload runs in the Gradio host process. HF Stateless / ZeroGPU forbids
257
- # CUDA init there; torchvision + torch.load (weights_only) interactions can still fail
258
- # during NeAR build. Default to off when `spaces` is present (typical Space deploy); use
259
- # NEAR_MODEL_CPU_PRELOAD_AT_START=1 to force preload locally or on dedicated GPU VMs.
260
- _CPU_PRELOAD_DEFAULT = "0" if spaces is not None else "1"
261
  _CPU_PRELOAD_AT_START = _truthy_env(
262
  "NEAR_MODEL_CPU_PRELOAD_AT_START",
263
  _CPU_PRELOAD_DEFAULT,
264
  )
265
  print(
266
  f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
267
- f"(default {_CPU_PRELOAD_DEFAULT!r} when spaces={'set' if spaces is not None else 'absent'}).",
268
  flush=True,
269
  )
270
 
@@ -402,24 +402,18 @@ def ensure_near_on_cuda() -> NeARImageToRelightable3DPipeline:
402
  return PIPELINE
403
 
404
 
405
- def _preload_models_cpu_worker() -> None:
406
- try:
407
- t0 = time.time()
408
- print("[NeAR] background CPU preload start", flush=True)
409
- with _model_lock:
410
- _ensure_geometry_cpu_locked()
411
- _ensure_near_cpu_locked()
412
- print(f"[NeAR] background CPU preload done {time.time() - t0:.1f}s", flush=True)
413
- except Exception as exc:
414
- print(f"[NeAR] background CPU preload failed: {exc}", flush=True)
415
-
416
 
417
- def start_model_cpu_preload_thread() -> None:
418
- threading.Thread(
419
- target=_preload_models_cpu_worker,
420
- daemon=True,
421
- name="near-model-cpu-preload",
422
- ).start()
 
 
 
423
 
424
 
425
  def set_tone_mapper(view_name: str):
@@ -1291,14 +1285,13 @@ _orig_blocks_launch = demo.launch
1291
  def _near_launch(*args: Any, **kwargs: Any):
1292
  kwargs.setdefault("theme", NEAR_GRADIO_THEME)
1293
  kwargs.setdefault("css", CUSTOM_CSS)
 
 
1294
  return _orig_blocks_launch(*args, **kwargs)
1295
 
1296
 
1297
  demo.launch = _near_launch # type: ignore[method-assign]
1298
 
1299
- if _CPU_PRELOAD_AT_START:
1300
- start_model_cpu_preload_thread()
1301
-
1302
  start_tmp_gradio_pruner()
1303
 
1304
  if __name__ == "__main__":
 
253
  return v in ("1", "true", "yes", "on")
254
 
255
 
256
+ # When enabled, Hunyuan + NeAR weights are loaded on CPU inside demo.launch() *before* the
257
+ # HTTP server binds, so the main UI only becomes reachable after CPU load finishes (avoids
258
+ # clicks while models are missing ZeroGPU timeout). Set NEAR_MODEL_CPU_PRELOAD_AT_START=0
259
+ # to bind immediately and load on first @spaces.GPU click instead (faster "page up", riskier UX).
260
+ _CPU_PRELOAD_DEFAULT = "1"
261
  _CPU_PRELOAD_AT_START = _truthy_env(
262
  "NEAR_MODEL_CPU_PRELOAD_AT_START",
263
  _CPU_PRELOAD_DEFAULT,
264
  )
265
  print(
266
  f"[NeAR] NEAR_MODEL_CPU_PRELOAD_AT_START={'1' if _CPU_PRELOAD_AT_START else '0'} "
267
+ "(1 = block server start until CPU weights ready; 0 = lazy load on first GPU action).",
268
  flush=True,
269
  )
270
 
 
402
  return PIPELINE
403
 
404
 
405
+ def run_model_cpu_preload_blocking() -> None:
406
+ """Load Hunyuan + NeAR on CPU before Gradio binds (main UI appears only after this)."""
 
 
 
 
 
 
 
 
 
407
 
408
+ t0 = time.time()
409
+ print("[NeAR] blocking CPU preload before server bind ...", flush=True)
410
+ with _model_lock:
411
+ _ensure_geometry_cpu_locked()
412
+ _ensure_near_cpu_locked()
413
+ print(
414
+ f"[NeAR] CPU preload done {time.time() - t0:.1f}s — Gradio will accept traffic now.",
415
+ flush=True,
416
+ )
417
 
418
 
419
  def set_tone_mapper(view_name: str):
 
1285
  def _near_launch(*args: Any, **kwargs: Any):
1286
  kwargs.setdefault("theme", NEAR_GRADIO_THEME)
1287
  kwargs.setdefault("css", CUSTOM_CSS)
1288
+ if _CPU_PRELOAD_AT_START:
1289
+ run_model_cpu_preload_blocking()
1290
  return _orig_blocks_launch(*args, **kwargs)
1291
 
1292
 
1293
  demo.launch = _near_launch # type: ignore[method-assign]
1294
 
 
 
 
1295
  start_tmp_gradio_pruner()
1296
 
1297
  if __name__ == "__main__":
tests/test_app_architecture.py CHANGED
@@ -63,6 +63,14 @@ class AppArchitectureTests(unittest.TestCase):
63
 
64
  self.assertIn("ensure_near_on_cuda", called)
65
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
  unittest.main()
 
63
 
64
  self.assertIn("ensure_near_on_cuda", called)
65
 
66
+ def test_cpu_preload_is_blocking_before_launch_not_background_thread(self) -> None:
67
+ source = APP_PATH.read_text(encoding="utf-8")
68
+ near_launch = _get_function(_load_tree(), "_near_launch")
69
+ called = _called_names(near_launch)
70
+
71
+ self.assertIn("run_model_cpu_preload_blocking", called)
72
+ self.assertNotIn("start_model_cpu_preload_thread", source)
73
+
74
 
75
  if __name__ == "__main__":
76
  unittest.main()