luh1124 commited on
Commit
4258d6f
·
1 Parent(s): c5d64e1

refactor(hyshape): CPU preload at startup, GPU move on Generate Mesh

Browse files

- Remove demo.load GPU warmup; start daemon thread for from_pretrained(cpu)\n- ensure_geometry_on_cuda() in @GPU : .to(cuda) + inference\n- Replace NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD with NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START

Made-with: Cursor

DEPLOY_HF_SPACE.md CHANGED
@@ -78,7 +78,7 @@ If you maintain a separate template tree (e.g. `NeAR_space`), copy changes **int
78
  | `NEAR_GSPLAT_WARMUP` | `0` | `1` |
79
  | `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
80
  | `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
81
- | `NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD` | `1` when Space entry is **`app_hyshape.py`** (default in code: load Hunyuan on first page view via `demo.load` + `@spaces.GPU`) | `0` to load geometry only when the user clicks **Generate Mesh** (saves one GPU allocation per visit, but repeats cold start) |
82
 
83
  ### 2b2. Mirroring DINOv2 and other auxiliary assets
84
 
 
78
  | `NEAR_GSPLAT_WARMUP` | `0` | `1` |
79
  | `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
80
  | `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
81
+ | `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START` | `1` when Space entry is **`app_hyshape.py`** (default: background thread runs `from_pretrained(..., device="cpu")` at startup **no** `@spaces.GPU`) | `0` to defer CPU load until the first **Generate Mesh** click (inside the GPU callback; longer first click) |
82
 
83
  ### 2b2. Mirroring DINOv2 and other auxiliary assets
84
 
README.md CHANGED
@@ -49,7 +49,7 @@ This repository combines:
49
  ## ZeroGPU Runtime Notes
50
 
51
  - The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
52
- - **`app_hyshape.py`** defaults to **`NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD=1`**: opening the Space triggers one GPU callback that loads Hunyuan so **Generate Mesh** does not pay the full ~40s cold start again in the same session. Set to **`0`** to disable (saves GPU seconds per visitor, slower first mesh).
53
  - The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
54
  - Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
55
  - Binary wheels and mirrored auxiliary assets are stored separately:
 
49
  ## ZeroGPU Runtime Notes
50
 
51
  - The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
52
+ - **`app_hyshape.py`** defaults to **`NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=1`**: a **background thread** loads Hunyuan on **CPU** at container start (no ZeroGPU lease). **Generate Mesh** then only pays **GPU move + inference** inside `@spaces.GPU`. Set to **`0`** to skip background preload (first click loads on CPU inside the GPU callback, longer first click).
53
  - The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
54
  - Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
55
  - Binary wheels and mirrored auxiliary assets are stored separately:
app_hyshape.py CHANGED
@@ -58,12 +58,12 @@ def _truthy_env(name: str, default: str) -> bool:
58
  return value in ("1", "true", "yes", "on")
59
 
60
 
61
- # Default on for this probe Space: first "Generate Mesh" stays under ZeroGPU budget after weights are on GPU.
62
- _HYSHAPE_WARMUP_ON_LOAD = _truthy_env("NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD", "1")
63
  print(
64
- f"[HyShape] geometry warmup on page load: "
65
- f"{'enabled' if _HYSHAPE_WARMUP_ON_LOAD else 'disabled'} "
66
- f"(NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD, default 1).",
67
  flush=True,
68
  )
69
 
@@ -79,6 +79,7 @@ _MODEL_LOCK = threading.Lock()
79
  _LIGHT_PREPROCESS_LOCK = threading.Lock()
80
  _LIGHT_PREPROCESSOR: Any | None = None
81
  GEOMETRY_PIPELINE: Any | None = None
 
82
 
83
 
84
  def _path_is_git_lfs_pointer(path: Path) -> bool:
@@ -126,10 +127,6 @@ def end_session(req: gr.Request) -> None:
126
  _session_forget(str(session_id))
127
 
128
 
129
- def _runtime_device() -> str:
130
- return "cuda" if torch.cuda.is_available() else "cpu"
131
-
132
-
133
  def _ensure_rgba(image: Image.Image) -> Image.Image:
134
  if image.mode == "RGBA":
135
  return image
@@ -216,47 +213,59 @@ def preprocess_image_only(image_input: Optional[Image.Image]):
216
  return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
217
 
218
 
219
- def ensure_geometry_pipeline() -> Any:
 
220
  global GEOMETRY_PIPELINE
221
  if GEOMETRY_PIPELINE is not None:
222
  return GEOMETRY_PIPELINE
223
 
224
- with _MODEL_LOCK:
225
- if GEOMETRY_PIPELINE is not None:
226
- return GEOMETRY_PIPELINE
 
 
 
 
 
227
 
228
- from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
229
 
230
- device = _runtime_device()
231
- hy_id = os.environ.get("NEAR_HUNYUAN_PRETRAINED", "tencent/Hunyuan3D-2.1")
 
232
  started_at = time.time()
233
- print(f"[HyShape] Loading geometry pipeline from {hy_id!r}...", flush=True)
234
- geometry_pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(hy_id, device="cpu")
235
- print(f"[HyShape] from_pretrained done in {time.time() - started_at:.1f}s", flush=True)
236
- move_started_at = time.time()
237
- geometry_pipeline.to(device)
238
- print(f"[HyShape] moved geometry pipeline to {device} in {time.time() - move_started_at:.1f}s", flush=True)
239
- GEOMETRY_PIPELINE = geometry_pipeline
240
- print(f"[HyShape] geometry pipeline ready in {time.time() - started_at:.1f}s total", flush=True)
241
- return GEOMETRY_PIPELINE
242
 
243
 
244
- @GPU
245
- @torch.inference_mode()
246
- def warmup_hunyuan_geometry_on_load():
247
- """Pay Hunyuan load + GPU move on first page view so Generate Mesh does not repeat it."""
248
- started_at = time.time()
249
- print(
250
- "[HyShape] warmup_on_load: entered GPU callback "
251
- f"(cuda_available={torch.cuda.is_available()})",
252
- flush=True,
253
- )
254
- ensure_geometry_pipeline()
255
- elapsed = time.time() - started_at
256
- print(f"[HyShape] warmup_on_load: finished in {elapsed:.1f}s", flush=True)
257
- return (
258
- f"Geometry ready ({elapsed:.1f}s). Click **Generate Mesh** — model should already be on GPU."
259
- )
 
 
 
 
 
 
 
 
 
260
 
261
 
262
  @GPU
@@ -287,10 +296,10 @@ def generate_mesh(
287
  mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
288
  mesh_rgb.save(session_dir / "input_processed.png")
289
 
290
- progress(0.2, desc="Loading Hunyuan geometry")
291
- geometry_pipeline = ensure_geometry_pipeline()
292
 
293
- progress(0.6, desc="Generating geometry")
294
  mesh_started_at = time.time()
295
  mesh = geometry_pipeline(image=mesh_rgb)[0]
296
  print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
@@ -317,8 +326,8 @@ This diagnostic app isolates the Hunyuan geometry path.
317
 
318
  - Upload an image or click an example.
319
  - The upload path only performs lightweight preprocessing.
320
- - `Generate Mesh` is the main GPU callback and does not touch NeAR or gsplat.
321
- - With default settings, the first page load runs a **geometry warmup** on GPU so mesh generation does not pay the full cold start again.
322
  """
323
  )
324
 
@@ -356,12 +365,6 @@ This diagnostic app isolates the Hunyuan geometry path.
356
 
357
  demo.unload(end_session)
358
 
359
- if _HYSHAPE_WARMUP_ON_LOAD:
360
- demo.load(
361
- warmup_hunyuan_geometry_on_load,
362
- outputs=[status_md],
363
- )
364
-
365
  image_input.upload(
366
  preprocess_image_only,
367
  inputs=[image_input],
@@ -388,6 +391,9 @@ This diagnostic app isolates the Hunyuan geometry path.
388
  demo = build_app()
389
  demo.queue(max_size=2)
390
 
 
 
 
391
 
392
  if __name__ == "__main__":
393
  import argparse
 
58
  return value in ("1", "true", "yes", "on")
59
 
60
 
61
+ # Background CPU preload at process start: no ZeroGPU lease; first click only pays H2D + inference.
62
+ _CPU_PRELOAD_AT_START = _truthy_env("NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START", "1")
63
  print(
64
+ f"[HyShape] background CPU geometry preload at start: "
65
+ f"{'enabled' if _CPU_PRELOAD_AT_START else 'disabled'} "
66
+ f"(NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START, default 1).",
67
  flush=True,
68
  )
69
 
 
79
  _LIGHT_PREPROCESS_LOCK = threading.Lock()
80
  _LIGHT_PREPROCESSOR: Any | None = None
81
  GEOMETRY_PIPELINE: Any | None = None
82
+ _GEOMETRY_ON_CUDA = False
83
 
84
 
85
  def _path_is_git_lfs_pointer(path: Path) -> bool:
 
127
  _session_forget(str(session_id))
128
 
129
 
 
 
 
 
130
  def _ensure_rgba(image: Image.Image) -> Image.Image:
131
  if image.mode == "RGBA":
132
  return image
 
213
  return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
214
 
215
 
216
+ def _ensure_geometry_loaded_on_cpu_locked() -> Any:
217
+ """Caller must hold ``_MODEL_LOCK``. Loads weights on CPU only (no ``.to(cuda)``)."""
218
  global GEOMETRY_PIPELINE
219
  if GEOMETRY_PIPELINE is not None:
220
  return GEOMETRY_PIPELINE
221
 
222
+ from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
223
+
224
+ hy_id = os.environ.get("NEAR_HUNYUAN_PRETRAINED", "tencent/Hunyuan3D-2.1")
225
+ started_at = time.time()
226
+ print(f"[HyShape] Loading geometry on CPU from {hy_id!r}...", flush=True)
227
+ GEOMETRY_PIPELINE = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(hy_id, device="cpu")
228
+ print(f"[HyShape] from_pretrained (CPU only) done in {time.time() - started_at:.1f}s", flush=True)
229
+ return GEOMETRY_PIPELINE
230
 
 
231
 
232
+ def preload_geometry_cpu_worker() -> None:
233
+ """Runs in a daemon thread at Space startup; does not use ``@spaces.GPU``."""
234
+ try:
235
  started_at = time.time()
236
+ print("[HyShape] background: CPU geometry preload started", flush=True)
237
+ with _MODEL_LOCK:
238
+ _ensure_geometry_loaded_on_cpu_locked()
239
+ print(f"[HyShape] background: CPU geometry preload finished in {time.time() - started_at:.1f}s", flush=True)
240
+ except Exception as exc:
241
+ print(f"[HyShape] background: CPU geometry preload failed: {exc}", flush=True)
 
 
 
242
 
243
 
244
+ def start_geometry_cpu_preload_thread() -> None:
245
+ threading.Thread(
246
+ target=preload_geometry_cpu_worker,
247
+ daemon=True,
248
+ name="hyshape-geometry-cpu-preload",
249
+ ).start()
250
+
251
+
252
+ def ensure_geometry_on_cuda() -> Any:
253
+ """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
254
+ global _GEOMETRY_ON_CUDA
255
+ with _MODEL_LOCK:
256
+ pipeline = _ensure_geometry_loaded_on_cpu_locked()
257
+ if torch.cuda.is_available():
258
+ if not _GEOMETRY_ON_CUDA:
259
+ move_started_at = time.time()
260
+ pipeline.to("cuda")
261
+ _GEOMETRY_ON_CUDA = True
262
+ print(
263
+ f"[HyShape] geometry moved to GPU in {time.time() - move_started_at:.1f}s",
264
+ flush=True,
265
+ )
266
+ else:
267
+ print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
268
+ return pipeline
269
 
270
 
271
  @GPU
 
296
  mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
297
  mesh_rgb.save(session_dir / "input_processed.png")
298
 
299
+ progress(0.2, desc="Moving geometry to GPU")
300
+ geometry_pipeline = ensure_geometry_on_cuda()
301
 
302
+ progress(0.5, desc="Generating geometry")
303
  mesh_started_at = time.time()
304
  mesh = geometry_pipeline(image=mesh_rgb)[0]
305
  print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
 
326
 
327
  - Upload an image or click an example.
328
  - The upload path only performs lightweight preprocessing.
329
+ - `Generate Mesh` is the only place that requests ZeroGPU: it moves the CPU-loaded weights to GPU and runs inference.
330
+ - By default a **background thread** loads Hunyuan on **CPU at container start** (no GPU lease). Disable with `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=0`.
331
  """
332
  )
333
 
 
365
 
366
  demo.unload(end_session)
367
 
 
 
 
 
 
 
368
  image_input.upload(
369
  preprocess_image_only,
370
  inputs=[image_input],
 
391
  demo = build_app()
392
  demo.queue(max_size=2)
393
 
394
+ if _CPU_PRELOAD_AT_START:
395
+ start_geometry_cpu_preload_thread()
396
+
397
 
398
  if __name__ == "__main__":
399
  import argparse
tests/test_app_hyshape_architecture.py CHANGED
@@ -42,7 +42,7 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
42
  generate_mesh = _get_function(_load_tree(), "generate_mesh")
43
  called = _called_names(generate_mesh)
44
 
45
- self.assertIn("ensure_geometry_pipeline", called)
46
  self.assertNotIn("ensure_near_pipeline", called)
47
  self.assertNotIn("ensure_gsplat_ready", called)
48
 
@@ -51,18 +51,25 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
51
 
52
  self.assertIn("[HyShape] generate_mesh callback entered", source)
53
 
54
- def test_page_load_warmup_calls_geometry_loader_only(self) -> None:
55
  tree = _load_tree()
56
- warmup = _get_function(tree, "warmup_hunyuan_geometry_on_load")
57
- called = _called_names(warmup)
58
 
59
- self.assertIn("ensure_geometry_pipeline", called)
60
  self.assertNotIn("ensure_near_pipeline", called)
61
  self.assertNotIn("ensure_gsplat_ready", called)
62
 
63
  source = APP_PATH.read_text(encoding="utf-8")
64
- self.assertIn("demo.load(", source)
65
- self.assertIn("warmup_hunyuan_geometry_on_load", source)
 
 
 
 
 
 
 
66
 
67
 
68
  if __name__ == "__main__":
 
42
  generate_mesh = _get_function(_load_tree(), "generate_mesh")
43
  called = _called_names(generate_mesh)
44
 
45
+ self.assertIn("ensure_geometry_on_cuda", called)
46
  self.assertNotIn("ensure_near_pipeline", called)
47
  self.assertNotIn("ensure_gsplat_ready", called)
48
 
 
51
 
52
  self.assertIn("[HyShape] generate_mesh callback entered", source)
53
 
54
+ def test_cpu_preload_worker_only_loads_cpu_locked_path(self) -> None:
55
  tree = _load_tree()
56
+ worker = _get_function(tree, "preload_geometry_cpu_worker")
57
+ called = _called_names(worker)
58
 
59
+ self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
60
  self.assertNotIn("ensure_near_pipeline", called)
61
  self.assertNotIn("ensure_gsplat_ready", called)
62
 
63
  source = APP_PATH.read_text(encoding="utf-8")
64
+ self.assertIn("start_geometry_cpu_preload_thread", source)
65
+ self.assertNotIn("warmup_hunyuan_geometry_on_load", source)
66
+
67
+ def test_ensure_geometry_on_cuda_moves_to_gpu_not_near(self) -> None:
68
+ ensure = _get_function(_load_tree(), "ensure_geometry_on_cuda")
69
+ called = _called_names(ensure)
70
+
71
+ self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
72
+ self.assertNotIn("ensure_near_pipeline", called)
73
 
74
 
75
  if __name__ == "__main__":