refactor(hyshape): CPU preload at startup, GPU move on Generate Mesh
Browse files- Remove demo.load GPU warmup; start daemon thread for from_pretrained(cpu)\n- ensure_geometry_on_cuda() in @GPU : .to(cuda) + inference\n- Replace NEAR_HYSHAPE_GEOMETRY_WARMUP_ON_LOAD with NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START
Made-with: Cursor
- DEPLOY_HF_SPACE.md +1 -1
- README.md +1 -1
- app_hyshape.py +58 -52
- tests/test_app_hyshape_architecture.py +14 -7
DEPLOY_HF_SPACE.md
CHANGED
|
@@ -78,7 +78,7 @@ If you maintain a separate template tree (e.g. `NeAR_space`), copy changes **int
|
|
| 78 |
| `NEAR_GSPLAT_WARMUP` | `0` | `1` |
|
| 79 |
| `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
|
| 80 |
| `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
|
| 81 |
-
| `
|
| 82 |
|
| 83 |
### 2b2. Mirroring DINOv2 and other auxiliary assets
|
| 84 |
|
|
|
|
| 78 |
| `NEAR_GSPLAT_WARMUP` | `0` | `1` |
|
| 79 |
| `NEAR_GSPLAT_SOURCE_SPEC` | unset unless you have a proven build path | optional if you want build-time source compile |
|
| 80 |
| `NEAR_ZEROGPU_HF_CEILING_S` | `90` | tune to your tier |
|
| 81 |
+
| `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START` | `1` when Space entry is **`app_hyshape.py`** (default: background thread runs `from_pretrained(..., device="cpu")` at startup — **no** `@spaces.GPU`) | `0` to defer CPU load until the first **Generate Mesh** click (inside the GPU callback; longer first click) |
|
| 82 |
|
| 83 |
### 2b2. Mirroring DINOv2 and other auxiliary assets
|
| 84 |
|
README.md
CHANGED
|
@@ -49,7 +49,7 @@ This repository combines:
|
|
| 49 |
## ZeroGPU Runtime Notes
|
| 50 |
|
| 51 |
- The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
|
| 52 |
-
- **`app_hyshape.py`** defaults to **`
|
| 53 |
- The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
|
| 54 |
- Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
|
| 55 |
- Binary wheels and mirrored auxiliary assets are stored separately:
|
|
|
|
| 49 |
## ZeroGPU Runtime Notes
|
| 50 |
|
| 51 |
- The Space is temporarily pointed at **`app_hyshape.py`** (Hunyuan geometry only) for isolating ZeroGPU init issues. Restore **`app_file: app.py`** in the YAML header above when you want the full NeAR UI again.
|
| 52 |
+
- **`app_hyshape.py`** defaults to **`NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=1`**: a **background thread** loads Hunyuan on **CPU** at container start (no ZeroGPU lease). **Generate Mesh** then only pays **GPU move + inference** inside `@spaces.GPU`. Set to **`0`** to skip background preload (first click loads on CPU inside the GPU callback, longer first click).
|
| 53 |
- The full `app.py` Space keeps **page-load image defaults** and **HDRI preview** on lightweight CPU paths so the first page visit does not spend the first ZeroGPU allocation on model initialization.
|
| 54 |
- Runtime loading is split by responsibility: **Hunyuan3D geometry** is loaded only for mesh generation, **NeAR relighting** is loaded only for SLaT/render/export, and **gsplat warmup** is delayed until the first real render.
|
| 55 |
- Binary wheels and mirrored auxiliary assets are stored separately:
|
app_hyshape.py
CHANGED
|
@@ -58,12 +58,12 @@ def _truthy_env(name: str, default: str) -> bool:
|
|
| 58 |
return value in ("1", "true", "yes", "on")
|
| 59 |
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
print(
|
| 64 |
-
f"[HyShape]
|
| 65 |
-
f"{'enabled' if
|
| 66 |
-
f"(
|
| 67 |
flush=True,
|
| 68 |
)
|
| 69 |
|
|
@@ -79,6 +79,7 @@ _MODEL_LOCK = threading.Lock()
|
|
| 79 |
_LIGHT_PREPROCESS_LOCK = threading.Lock()
|
| 80 |
_LIGHT_PREPROCESSOR: Any | None = None
|
| 81 |
GEOMETRY_PIPELINE: Any | None = None
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def _path_is_git_lfs_pointer(path: Path) -> bool:
|
|
@@ -126,10 +127,6 @@ def end_session(req: gr.Request) -> None:
|
|
| 126 |
_session_forget(str(session_id))
|
| 127 |
|
| 128 |
|
| 129 |
-
def _runtime_device() -> str:
|
| 130 |
-
return "cuda" if torch.cuda.is_available() else "cpu"
|
| 131 |
-
|
| 132 |
-
|
| 133 |
def _ensure_rgba(image: Image.Image) -> Image.Image:
|
| 134 |
if image.mode == "RGBA":
|
| 135 |
return image
|
|
@@ -216,47 +213,59 @@ def preprocess_image_only(image_input: Optional[Image.Image]):
|
|
| 216 |
return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
|
| 217 |
|
| 218 |
|
| 219 |
-
def
|
|
|
|
| 220 |
global GEOMETRY_PIPELINE
|
| 221 |
if GEOMETRY_PIPELINE is not None:
|
| 222 |
return GEOMETRY_PIPELINE
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
|
|
|
| 232 |
started_at = time.time()
|
| 233 |
-
print(
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
print(f"[HyShape]
|
| 239 |
-
GEOMETRY_PIPELINE = geometry_pipeline
|
| 240 |
-
print(f"[HyShape] geometry pipeline ready in {time.time() - started_at:.1f}s total", flush=True)
|
| 241 |
-
return GEOMETRY_PIPELINE
|
| 242 |
|
| 243 |
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
|
| 262 |
@GPU
|
|
@@ -287,10 +296,10 @@ def generate_mesh(
|
|
| 287 |
mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
|
| 288 |
mesh_rgb.save(session_dir / "input_processed.png")
|
| 289 |
|
| 290 |
-
progress(0.2, desc="
|
| 291 |
-
geometry_pipeline =
|
| 292 |
|
| 293 |
-
progress(0.
|
| 294 |
mesh_started_at = time.time()
|
| 295 |
mesh = geometry_pipeline(image=mesh_rgb)[0]
|
| 296 |
print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
|
|
@@ -317,8 +326,8 @@ This diagnostic app isolates the Hunyuan geometry path.
|
|
| 317 |
|
| 318 |
- Upload an image or click an example.
|
| 319 |
- The upload path only performs lightweight preprocessing.
|
| 320 |
-
- `Generate Mesh` is the
|
| 321 |
-
-
|
| 322 |
"""
|
| 323 |
)
|
| 324 |
|
|
@@ -356,12 +365,6 @@ This diagnostic app isolates the Hunyuan geometry path.
|
|
| 356 |
|
| 357 |
demo.unload(end_session)
|
| 358 |
|
| 359 |
-
if _HYSHAPE_WARMUP_ON_LOAD:
|
| 360 |
-
demo.load(
|
| 361 |
-
warmup_hunyuan_geometry_on_load,
|
| 362 |
-
outputs=[status_md],
|
| 363 |
-
)
|
| 364 |
-
|
| 365 |
image_input.upload(
|
| 366 |
preprocess_image_only,
|
| 367 |
inputs=[image_input],
|
|
@@ -388,6 +391,9 @@ This diagnostic app isolates the Hunyuan geometry path.
|
|
| 388 |
demo = build_app()
|
| 389 |
demo.queue(max_size=2)
|
| 390 |
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
if __name__ == "__main__":
|
| 393 |
import argparse
|
|
|
|
| 58 |
return value in ("1", "true", "yes", "on")
|
| 59 |
|
| 60 |
|
| 61 |
+
# Background CPU preload at process start: no ZeroGPU lease; first click only pays H2D + inference.
|
| 62 |
+
_CPU_PRELOAD_AT_START = _truthy_env("NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START", "1")
|
| 63 |
print(
|
| 64 |
+
f"[HyShape] background CPU geometry preload at start: "
|
| 65 |
+
f"{'enabled' if _CPU_PRELOAD_AT_START else 'disabled'} "
|
| 66 |
+
f"(NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START, default 1).",
|
| 67 |
flush=True,
|
| 68 |
)
|
| 69 |
|
|
|
|
| 79 |
_LIGHT_PREPROCESS_LOCK = threading.Lock()
|
| 80 |
_LIGHT_PREPROCESSOR: Any | None = None
|
| 81 |
GEOMETRY_PIPELINE: Any | None = None
|
| 82 |
+
_GEOMETRY_ON_CUDA = False
|
| 83 |
|
| 84 |
|
| 85 |
def _path_is_git_lfs_pointer(path: Path) -> bool:
|
|
|
|
| 127 |
_session_forget(str(session_id))
|
| 128 |
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
def _ensure_rgba(image: Image.Image) -> Image.Image:
|
| 131 |
if image.mode == "RGBA":
|
| 132 |
return image
|
|
|
|
| 213 |
return rgba, rgba, f"Image preprocessed in {elapsed:.1f}s."
|
| 214 |
|
| 215 |
|
| 216 |
+
def _ensure_geometry_loaded_on_cpu_locked() -> Any:
|
| 217 |
+
"""Caller must hold ``_MODEL_LOCK``. Loads weights on CPU only (no ``.to(cuda)``)."""
|
| 218 |
global GEOMETRY_PIPELINE
|
| 219 |
if GEOMETRY_PIPELINE is not None:
|
| 220 |
return GEOMETRY_PIPELINE
|
| 221 |
|
| 222 |
+
from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline # pyright: ignore[reportMissingImports]
|
| 223 |
+
|
| 224 |
+
hy_id = os.environ.get("NEAR_HUNYUAN_PRETRAINED", "tencent/Hunyuan3D-2.1")
|
| 225 |
+
started_at = time.time()
|
| 226 |
+
print(f"[HyShape] Loading geometry on CPU from {hy_id!r}...", flush=True)
|
| 227 |
+
GEOMETRY_PIPELINE = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(hy_id, device="cpu")
|
| 228 |
+
print(f"[HyShape] from_pretrained (CPU only) done in {time.time() - started_at:.1f}s", flush=True)
|
| 229 |
+
return GEOMETRY_PIPELINE
|
| 230 |
|
|
|
|
| 231 |
|
| 232 |
+
def preload_geometry_cpu_worker() -> None:
|
| 233 |
+
"""Runs in a daemon thread at Space startup; does not use ``@spaces.GPU``."""
|
| 234 |
+
try:
|
| 235 |
started_at = time.time()
|
| 236 |
+
print("[HyShape] background: CPU geometry preload started", flush=True)
|
| 237 |
+
with _MODEL_LOCK:
|
| 238 |
+
_ensure_geometry_loaded_on_cpu_locked()
|
| 239 |
+
print(f"[HyShape] background: CPU geometry preload finished in {time.time() - started_at:.1f}s", flush=True)
|
| 240 |
+
except Exception as exc:
|
| 241 |
+
print(f"[HyShape] background: CPU geometry preload failed: {exc}", flush=True)
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
|
| 244 |
+
def start_geometry_cpu_preload_thread() -> None:
|
| 245 |
+
threading.Thread(
|
| 246 |
+
target=preload_geometry_cpu_worker,
|
| 247 |
+
daemon=True,
|
| 248 |
+
name="hyshape-geometry-cpu-preload",
|
| 249 |
+
).start()
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def ensure_geometry_on_cuda() -> Any:
|
| 253 |
+
"""Load on CPU if needed, then move to CUDA inside a ``@spaces.GPU`` callback."""
|
| 254 |
+
global _GEOMETRY_ON_CUDA
|
| 255 |
+
with _MODEL_LOCK:
|
| 256 |
+
pipeline = _ensure_geometry_loaded_on_cpu_locked()
|
| 257 |
+
if torch.cuda.is_available():
|
| 258 |
+
if not _GEOMETRY_ON_CUDA:
|
| 259 |
+
move_started_at = time.time()
|
| 260 |
+
pipeline.to("cuda")
|
| 261 |
+
_GEOMETRY_ON_CUDA = True
|
| 262 |
+
print(
|
| 263 |
+
f"[HyShape] geometry moved to GPU in {time.time() - move_started_at:.1f}s",
|
| 264 |
+
flush=True,
|
| 265 |
+
)
|
| 266 |
+
else:
|
| 267 |
+
print("[HyShape] CUDA unavailable in this callback; geometry stays on CPU.", flush=True)
|
| 268 |
+
return pipeline
|
| 269 |
|
| 270 |
|
| 271 |
@GPU
|
|
|
|
| 296 |
mesh_rgb = _flatten_rgba_on_matte(rgba, (1.0, 1.0, 1.0))
|
| 297 |
mesh_rgb.save(session_dir / "input_processed.png")
|
| 298 |
|
| 299 |
+
progress(0.2, desc="Moving geometry to GPU")
|
| 300 |
+
geometry_pipeline = ensure_geometry_on_cuda()
|
| 301 |
|
| 302 |
+
progress(0.5, desc="Generating geometry")
|
| 303 |
mesh_started_at = time.time()
|
| 304 |
mesh = geometry_pipeline(image=mesh_rgb)[0]
|
| 305 |
print(f"[HyShape] geometry generation done in {time.time() - mesh_started_at:.1f}s", flush=True)
|
|
|
|
| 326 |
|
| 327 |
- Upload an image or click an example.
|
| 328 |
- The upload path only performs lightweight preprocessing.
|
| 329 |
+
- `Generate Mesh` is the only place that requests ZeroGPU: it moves the CPU-loaded weights to GPU and runs inference.
|
| 330 |
+
- By default a **background thread** loads Hunyuan on **CPU at container start** (no GPU lease). Disable with `NEAR_HYSHAPE_GEOMETRY_CPU_PRELOAD_AT_START=0`.
|
| 331 |
"""
|
| 332 |
)
|
| 333 |
|
|
|
|
| 365 |
|
| 366 |
demo.unload(end_session)
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
image_input.upload(
|
| 369 |
preprocess_image_only,
|
| 370 |
inputs=[image_input],
|
|
|
|
| 391 |
demo = build_app()
|
| 392 |
demo.queue(max_size=2)
|
| 393 |
|
| 394 |
+
if _CPU_PRELOAD_AT_START:
|
| 395 |
+
start_geometry_cpu_preload_thread()
|
| 396 |
+
|
| 397 |
|
| 398 |
if __name__ == "__main__":
|
| 399 |
import argparse
|
tests/test_app_hyshape_architecture.py
CHANGED
|
@@ -42,7 +42,7 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
|
|
| 42 |
generate_mesh = _get_function(_load_tree(), "generate_mesh")
|
| 43 |
called = _called_names(generate_mesh)
|
| 44 |
|
| 45 |
-
self.assertIn("
|
| 46 |
self.assertNotIn("ensure_near_pipeline", called)
|
| 47 |
self.assertNotIn("ensure_gsplat_ready", called)
|
| 48 |
|
|
@@ -51,18 +51,25 @@ class AppHyShapeArchitectureTests(unittest.TestCase):
|
|
| 51 |
|
| 52 |
self.assertIn("[HyShape] generate_mesh callback entered", source)
|
| 53 |
|
| 54 |
-
def
|
| 55 |
tree = _load_tree()
|
| 56 |
-
|
| 57 |
-
called = _called_names(
|
| 58 |
|
| 59 |
-
self.assertIn("
|
| 60 |
self.assertNotIn("ensure_near_pipeline", called)
|
| 61 |
self.assertNotIn("ensure_gsplat_ready", called)
|
| 62 |
|
| 63 |
source = APP_PATH.read_text(encoding="utf-8")
|
| 64 |
-
self.assertIn("
|
| 65 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
|
|
|
| 42 |
generate_mesh = _get_function(_load_tree(), "generate_mesh")
|
| 43 |
called = _called_names(generate_mesh)
|
| 44 |
|
| 45 |
+
self.assertIn("ensure_geometry_on_cuda", called)
|
| 46 |
self.assertNotIn("ensure_near_pipeline", called)
|
| 47 |
self.assertNotIn("ensure_gsplat_ready", called)
|
| 48 |
|
|
|
|
| 51 |
|
| 52 |
self.assertIn("[HyShape] generate_mesh callback entered", source)
|
| 53 |
|
| 54 |
+
def test_cpu_preload_worker_only_loads_cpu_locked_path(self) -> None:
|
| 55 |
tree = _load_tree()
|
| 56 |
+
worker = _get_function(tree, "preload_geometry_cpu_worker")
|
| 57 |
+
called = _called_names(worker)
|
| 58 |
|
| 59 |
+
self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
|
| 60 |
self.assertNotIn("ensure_near_pipeline", called)
|
| 61 |
self.assertNotIn("ensure_gsplat_ready", called)
|
| 62 |
|
| 63 |
source = APP_PATH.read_text(encoding="utf-8")
|
| 64 |
+
self.assertIn("start_geometry_cpu_preload_thread", source)
|
| 65 |
+
self.assertNotIn("warmup_hunyuan_geometry_on_load", source)
|
| 66 |
+
|
| 67 |
+
def test_ensure_geometry_on_cuda_moves_to_gpu_not_near(self) -> None:
|
| 68 |
+
ensure = _get_function(_load_tree(), "ensure_geometry_on_cuda")
|
| 69 |
+
called = _called_names(ensure)
|
| 70 |
+
|
| 71 |
+
self.assertIn("_ensure_geometry_loaded_on_cpu_locked", called)
|
| 72 |
+
self.assertNotIn("ensure_near_pipeline", called)
|
| 73 |
|
| 74 |
|
| 75 |
if __name__ == "__main__":
|