Spaces:

luh0502
/

NeAR

Running on Zero

luh1124 commited on 27 days ago

Commit

4258d6f

1 Parent(s): c5d64e1

refactor(hyshape): CPU preload at startup, GPU move on Generate Mesh

- Remove demo.load GPU warmup; start daemon thread for from_pretrained(cpu)\n- ensure_geometry_on_cuda() in @GPU : .to(cuda) + inference\n- Replace NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD with NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START

Made-with: Cursor

Files changed (4) hide show

DEPLOY_HF_SPACE.md +1 -1
README.md +1 -1
app_hyshape.py +58 -52
tests/test_app_hyshape_architecture.py +14 -7

DEPLOY_HF_SPACE.md CHANGED Viewed

@@ -78,7 +78,7 @@ If you maintain a separate template tree (e.g. `NeAR_space`), copy changes **int
 | `NEAR_GSPLAT_WARMUP` | `0` | `1` |
 | `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
 | `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
-| `NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD` | `1` when Space entry is **`app_hyshape.py`** (default in code: load Hunyuan on first page view via `demo.load` + `@spaces.GPU`) | `0` to load geometry only when the user clicks **Generate Mesh** (saves one GPU allocation per visit, but repeats cold start) |
 ### 2b2. Mirroring DINOv2 and other auxiliary assets

 | `NEAR_GSPLAT_WARMUP` | `0` | `1` |
 | `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
 | `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
+| `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START` | `1` when Space entry is **`app_hyshape.py`** (default: background thread runs `from_pretrained(..., device="cpu")` at startup — **no** `@spaces.GPU`) | `0` to defer CPU load until the first **Generate Mesh** click (inside the GPU callback; longer first click) |
 ### 2b2. Mirroring DINOv2 and other auxiliary assets

README.md CHANGED Viewed

@@ -49,7 +49,7 @@ This repository combines:
 ## ZeroGPU Runtime Notes
 - The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
-- **`app_hyshape.py`** defaults to **`NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD=1`**: opening the Space triggers one GPU callback that loads Hunyuan so **Generate Mesh** does not pay the full ~40s cold start again in the same session. Set to **`0`** to disable (saves GPU seconds per visitor, slower first mesh).
 - The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
 - Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
 - Binary wheels and mirrored auxiliary assets are stored separately:

 ## ZeroGPU Runtime Notes
 - The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
+- **`app_hyshape.py`** defaults to **`NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=1`**: a **background thread** loads Hunyuan on **CPU** at container start (no ZeroGPU lease). **Generate Mesh** then only pays **GPU move + inference** inside `@spaces.GPU`. Set to **`0`** to skip background preload (first click loads on CPU inside the GPU callback, longer first click).
 - The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
 - Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
 - Binary wheels and mirrored auxiliary assets are stored separately:

app_hyshape.py CHANGED Viewed

@@ -58,12 +58,12 @@ def _truthy_env(name: str, default: str) -> bool:
     return value in ("1", "true", "yes", "on")
-# Default on for this probe Space: first "Generate Mesh" stays under ZeroGPU budget after weights are on GPU.
-_HYSHAPE_WARMUP_ON_LOAD = _truthy_env("NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD", "1")
 print(
-    f"[HyShape] geometry warmup on page load: "
-    f"{'enabled' if _HYSHAPE_WARMUP_ON_LOAD else 'disabled'} "
-    f"(NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD, default 1).",
     flush=True,
 )
@@ -79,6 +79,7 @@ _MODEL_LOCK = threading.Lock()
 _LIGHT_PREPROCESS_LOCK = threading.Lock()
 _LIGHT_PREPROCESSOR: Any | None = None
 GEOMETRY_PIPELINE: Any | None = None
 def _path_is_git_lfs_pointer(path: Path) -> bool:
@@ -126,10 +127,6 @@ def end_session(req: gr.Request) -> None:
     _session_forget(str(session_id))
-def _runtime_device() -> str:
-    return "cuda" if torch.cuda.is_available() else "cpu"
 def _ensure_rgba(image: Image.Image) -> Image.Image:
     if image.mode == "RGBA":
         return image
@@ -216,47 +213,59 @@ def preprocess_image_only(image_input: Optional[Image.Image]):
     return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
-def ensure_geometry_pipeline() -> Any:
     global GEOMETRY_PIPELINE
     if GEOMETRY_PIPELINE is not None:
         return GEOMETRY_PIPELINE
-    with _MODEL_LOCK:
-        if GEOMETRY_PIPELINE is not None:
-            return GEOMETRY_PIPELINE
-        from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline  # pyright: ignore[reportMissingImports]
-        device = _runtime_device()
-        hy_id = os.environ.get("NEAR_HUNYUAN_PRETRAINED", "tencent/Hunyuan3D-2.1")
         started_at = time.time()
-        print(f"[HyShape] Loading geometry pipeline from {hy_id!r}...", flush=True)
-        geometry_pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(hy_id, device="cpu")
-        print(f"[HyShape] from_pretrained done in {time.time() - started_at:.1f}s", flush=True)
-        move_started_at = time.time()
-        geometry_pipeline.to(device)
-        print(f"[HyShape] moved geometry pipeline to {device} in {time.time() - move_started_at:.1f}s", flush=True)
-        GEOMETRY_PIPELINE = geometry_pipeline
-        print(f"[HyShape] geometry pipeline ready in {time.time() - started_at:.1f}s total", flush=True)
-        return GEOMETRY_PIPELINE
-@GPU
-@torch.inference_mode()
-def warmup_hunyuan_geometry_on_load():
-    """Pay Hunyuan load + GPU move on first page view so Generate Mesh does not repeat it."""
-    started_at = time.time()
-    print(
-        "[HyShape] warmup_on_load: entered GPU callback "
-        f"(cuda_available={torch.cuda.is_available()})",
-        flush=True,
-    )
-    ensure_geometry_pipeline()
-    elapsed = time.time() - started_at
-    print(f"[HyShape] warmup_on_load: finished in {elapsed:.1f}s", flush=True)
-    return (
-        f"Geometry ready ({elapsed:.1f}s). Click **Generate Mesh** — model should already be on GPU."
-    )
 @GPU
@@ -287,10 +296,10 @@ def generate_mesh(
     mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
     mesh_rgb.save(session_dir / "input_processed.png")
-    progress(0.2, desc="Loading Hunyuan geometry")
-    geometry_pipeline = ensure_geometry_pipeline()
-    progress(0.6, desc="Generating geometry")
     mesh_started_at = time.time()
     mesh = geometry_pipeline(image=mesh_rgb)[0]
     print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
@@ -317,8 +326,8 @@ This diagnostic app isolates the Hunyuan geometry path.
 - Upload an image or click an example.
 - The upload path only performs lightweight preprocessing.
-- `Generate Mesh` is the main GPU callback and does not touch NeAR or gsplat.
-- With default settings, the first page load runs a **geometry warmup** on GPU so mesh generation does not pay the full cold start again.
             """
         )
@@ -356,12 +365,6 @@ This diagnostic app isolates the Hunyuan geometry path.
         demo.unload(end_session)
-        if _HYSHAPE_WARMUP_ON_LOAD:
-            demo.load(
-                warmup_hunyuan_geometry_on_load,
-                outputs=[status_md],
-            )
         image_input.upload(
             preprocess_image_only,
             inputs=[image_input],
@@ -388,6 +391,9 @@ This diagnostic app isolates the Hunyuan geometry path.
 demo = build_app()
 demo.queue(max_size=2)
 if __name__ == "__main__":
     import argparse

     return value in ("1", "true", "yes", "on")
+# Background CPU preload at process start: no ZeroGPU lease; first click only pays H2D + inference.
+_CPU_PRELOAD_AT_START = _truthy_env("NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START", "1")
 print(
+    f"[HyShape] background CPU geometry preload at start: "
+    f"{'enabled' if _CPU_PRELOAD_AT_START else 'disabled'} "
+    f"(NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START, default 1).",
     flush=True,
 )
 _LIGHT_PREPROCESS_LOCK = threading.Lock()
 _LIGHT_PREPROCESSOR: Any | None = None
 GEOMETRY_PIPELINE: Any | None = None
+_GEOMETRY_ON_CUDA = False
 def _path_is_git_lfs_pointer(path: Path) -> bool:
     _session_forget(str(session_id))
 def _ensure_rgba(image: Image.Image) -> Image.Image:
     if image.mode == "RGBA":
         return image
     return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
+def _ensure_geometry_loaded_on_cpu_locked() -> Any:
+    """Caller must hold ``_MODEL_LOCK``. Loads weights on CPU only (no ``.to(cuda)``)."""
     global GEOMETRY_PIPELINE
     if GEOMETRY_PIPELINE is not None:
         return GEOMETRY_PIPELINE
+    from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline  # pyright: ignore[reportMissingImports]
+    hy_id = os.environ.get("NEAR_HUNYUAN_PRETRAINED", "tencent/Hunyuan3D-2.1")
+    started_at = time.time()
+    print(f"[HyShape] Loading geometry on CPU from {hy_id!r}...", flush=True)
+    GEOMETRY_PIPELINE = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(hy_id, device="cpu")
+    print(f"[HyShape] from_pretrained (CPU only) done in {time.time() - started_at:.1f}s", flush=True)
+    return GEOMETRY_PIPELINE
+def preload_geometry_cpu_worker() -> None:
+    """Runs in a daemon thread at Space startup; does not use ``@spaces.GPU``."""
+    try:
         started_at = time.time()
+        print("[HyShape] background: CPU geometry preload started", flush=True)
+        with _MODEL_LOCK:
+            _ensure_geometry_loaded_on_cpu_locked()
+        print(f"[HyShape] background: CPU geometry preload finished in {time.time() - started_at:.1f}s", flush=True)
+    except Exception as exc:
+        print(f"[HyShape] background: CPU geometry preload failed: {exc}", flush=True)
+def start_geometry_cpu_preload_thread() -> None:
+    threading.Thread(
+        target=preload_geometry_cpu_worker,
+        daemon=True,
+        name="hyshape-geometry-cpu-preload",
+    ).start()
+def ensure_geometry_on_cuda() -> Any:
+    """Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
+    global _GEOMETRY_ON_CUDA
+    with _MODEL_LOCK:
+        pipeline = _ensure_geometry_loaded_on_cpu_locked()
+        if torch.cuda.is_available():
+            if not _GEOMETRY_ON_CUDA:
+                move_started_at = time.time()
+                pipeline.to("cuda")
+                _GEOMETRY_ON_CUDA = True
+                print(
+                    f"[HyShape] geometry moved to GPU in {time.time() - move_started_at:.1f}s",
+                    flush=True,
+                )
+        else:
+            print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
+        return pipeline
 @GPU
     mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
     mesh_rgb.save(session_dir / "input_processed.png")
+    progress(0.2, desc="Moving geometry to GPU")
+    geometry_pipeline = ensure_geometry_on_cuda()
+    progress(0.5, desc="Generating geometry")
     mesh_started_at = time.time()
     mesh = geometry_pipeline(image=mesh_rgb)[0]
     print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
 - Upload an image or click an example.
 - The upload path only performs lightweight preprocessing.
+- `Generate Mesh` is the only place that requests ZeroGPU: it moves the CPU-loaded weights to GPU and runs inference.
+- By default a **background thread** loads Hunyuan on **CPU at container start** (no GPU lease). Disable with `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=0`.
             """
         )
         demo.unload(end_session)
         image_input.upload(
             preprocess_image_only,
             inputs=[image_input],
 demo = build_app()
 demo.queue(max_size=2)
+if _CPU_PRELOAD_AT_START:
+    start_geometry_cpu_preload_thread()
 if __name__ == "__main__":
     import argparse

tests/test_app_hyshape_architecture.py CHANGED Viewed

@@ -42,7 +42,7 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
         generate_mesh = _get_function(_load_tree(), "generate_mesh")
         called = _called_names(generate_mesh)
-        self.assertIn("ensure_geometry_pipeline", called)
         self.assertNotIn("ensure_near_pipeline", called)
         self.assertNotIn("ensure_gsplat_ready", called)
@@ -51,18 +51,25 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
         self.assertIn("[HyShape] generate_mesh callback entered", source)
-    def test_page_load_warmup_calls_geometry_loader_only(self) -> None:
         tree = _load_tree()
-        warmup = _get_function(tree, "warmup_hunyuan_geometry_on_load")
-        called = _called_names(warmup)
-        self.assertIn("ensure_geometry_pipeline", called)
         self.assertNotIn("ensure_near_pipeline", called)
         self.assertNotIn("ensure_gsplat_ready", called)
         source = APP_PATH.read_text(encoding="utf-8")
-        self.assertIn("demo.load(", source)
-        self.assertIn("warmup_hunyuan_geometry_on_load", source)
 if __name__ == "__main__":

         generate_mesh = _get_function(_load_tree(), "generate_mesh")
         called = _called_names(generate_mesh)
+        self.assertIn("ensure_geometry_on_cuda", called)
         self.assertNotIn("ensure_near_pipeline", called)
         self.assertNotIn("ensure_gsplat_ready", called)
         self.assertIn("[HyShape] generate_mesh callback entered", source)
+    def test_cpu_preload_worker_only_loads_cpu_locked_path(self) -> None:
         tree = _load_tree()
+        worker = _get_function(tree, "preload_geometry_cpu_worker")
+        called = _called_names(worker)
+        self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
         self.assertNotIn("ensure_near_pipeline", called)
         self.assertNotIn("ensure_gsplat_ready", called)
         source = APP_PATH.read_text(encoding="utf-8")
+        self.assertIn("start_geometry_cpu_preload_thread", source)
+        self.assertNotIn("warmup_hunyuan_geometry_on_load", source)
+    def test_ensure_geometry_on_cuda_moves_to_gpu_not_near(self) -> None:
+        ensure = _get_function(_load_tree(), "ensure_geometry_on_cuda")
+        called = _called_names(ensure)
+        self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
+        self.assertNotIn("ensure_near_pipeline", called)
 if __name__ == "__main__":