sapiens2-pointmap

Runtime error

App Files Files Community

Rawal Khirodkar commited on 13 days ago

Commit

2b7cc62

1 Parent(s): 6b72fc2

Pointmap: smooth gradient-based vertex normals + process=True → kills flat-shaded triangle facets

Browse files

Files changed (1) hide show

app.py +25 -11

app.py CHANGED Viewed

@@ -191,20 +191,30 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
                       max_edge: float = 0.04):
     """Build a triangulated mesh from the (H, W) pointmap grid.
-    Returns (verts, uvs, faces). Each valid pixel becomes a vertex; adjacent
-    valid pixels form quads (2 tris). Triangles with any edge longer than
-    `max_edge` (meters) are dropped to kill stretched skin at depth jumps.
-    UVs are pixel-aligned for direct texturing with the input image.
     """
     H, W = pointmap_hwc.shape[:2]
     z = pointmap_hwc[:, :, 2]
     valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
     idx_map = np.full((H, W), -1, dtype=np.int64)
     yy, xx = np.where(valid)
     idx_map[yy, xx] = np.arange(len(yy))
     verts = pointmap_hwc[yy, xx].astype(np.float32)              # (N, 3)
     uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
                    axis=1).astype(np.float32)                    # (N, 2)
@@ -222,7 +232,7 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
     e20 = np.linalg.norm(p0 - p2, axis=1)
     keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
     faces = faces[keep].astype(np.int64)
-    return verts, uvs, faces
 def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
@@ -230,16 +240,15 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
     """`image_pil_texture` should be the HIGHEST-resolution input available
     (not the inference-capped one) — it's used as the GLB albedo so the mesh
     surface stays smooth even when zoomed in past the mesh-vertex density."""
-    verts, uvs, faces = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
     # Y-up flip so the viewer's default orientation matches photographic intuition.
     flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
     verts = verts * flip
     # Recenter the scene on the human's centroid so Three.js's auto-fit
-    # focuses tightly on the figure — without this, the camera marker at the
-    # original world origin (~1-2m behind the human) inflates the bounding
-    # box and the human ends up small in the default view.
     centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
     verts = verts - centroid
@@ -253,8 +262,13 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
     )
     visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
-    mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
-    _ = mesh.vertex_normals  # lazy compute → exported in glb
     # # Optional camera marker — disabled for now to keep the viewer focused on
     # # the human only. Re-enable by uncommenting:

                       max_edge: float = 0.04):
     """Build a triangulated mesh from the (H, W) pointmap grid.
+    Returns (verts, uvs, faces, vertex_normals). Each valid pixel → vertex;
+    adjacent valid pixels form quads (2 tris). Long-edge triangles are dropped
+    to kill stretched skin at depth jumps. Vertex normals are computed from the
+    pointmap's spatial gradient (smooth, per-pixel) instead of inferred from
+    triangle face normals (which would give flat-shaded facets).
     """
     H, W = pointmap_hwc.shape[:2]
     z = pointmap_hwc[:, :, 2]
     valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
+    # Smooth per-pixel normals from cross product of x- and y- spatial gradients.
+    px = np.zeros_like(pointmap_hwc, dtype=np.float32)
+    py = np.zeros_like(pointmap_hwc, dtype=np.float32)
+    px[:, 1:-1] = (pointmap_hwc[:, 2:] - pointmap_hwc[:, :-2]) * 0.5
+    py[1:-1, :] = (pointmap_hwc[2:, :] - pointmap_hwc[:-2, :]) * 0.5
+    n_grid = np.cross(px, py)
+    n_grid /= np.linalg.norm(n_grid, axis=2, keepdims=True).clip(min=1e-8)
     idx_map = np.full((H, W), -1, dtype=np.int64)
     yy, xx = np.where(valid)
     idx_map[yy, xx] = np.arange(len(yy))
     verts = pointmap_hwc[yy, xx].astype(np.float32)              # (N, 3)
+    normals = n_grid[yy, xx].astype(np.float32)                   # (N, 3)
     uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
                    axis=1).astype(np.float32)                    # (N, 2)
     e20 = np.linalg.norm(p0 - p2, axis=1)
     keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
     faces = faces[keep].astype(np.int64)
+    return verts, uvs, faces, normals
 def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
     """`image_pil_texture` should be the HIGHEST-resolution input available
     (not the inference-capped one) — it's used as the GLB albedo so the mesh
     surface stays smooth even when zoomed in past the mesh-vertex density."""
+    verts, uvs, faces, normals = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
     # Y-up flip so the viewer's default orientation matches photographic intuition.
     flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
     verts = verts * flip
+    normals = normals * flip   # normals transform with the same axis flip
     # Recenter the scene on the human's centroid so Three.js's auto-fit
+    # focuses tightly on the figure.
     centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
     verts = verts - centroid
     )
     visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
+    # Pass smooth, gradient-derived per-vertex normals explicitly. process=True
+    # lets trimesh fix winding + cache properly so the GLB exporter actually
+    # writes the normals (no flat-shading fallback in Three.js).
+    mesh = trimesh.Trimesh(
+        vertices=verts, faces=faces, visual=visual,
+        vertex_normals=normals, process=True,
+    )
     # # Optional camera marker — disabled for now to keep the viewer focused on
     # # the human only. Re-enable by uncommenting: