Spaces:
Runtime error
Runtime error
Rawal Khirodkar commited on
Commit ·
2b7cc62
1
Parent(s): 6b72fc2
Pointmap: smooth gradient-based vertex normals + process=True → kills flat-shaded triangle facets
Browse files
app.py
CHANGED
|
@@ -191,20 +191,30 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
|
|
| 191 |
max_edge: float = 0.04):
|
| 192 |
"""Build a triangulated mesh from the (H, W) pointmap grid.
|
| 193 |
|
| 194 |
-
Returns (verts, uvs, faces). Each valid pixel
|
| 195 |
-
valid pixels form quads (2 tris).
|
| 196 |
-
|
| 197 |
-
|
|
|
|
| 198 |
"""
|
| 199 |
H, W = pointmap_hwc.shape[:2]
|
| 200 |
z = pointmap_hwc[:, :, 2]
|
| 201 |
valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 204 |
yy, xx = np.where(valid)
|
| 205 |
idx_map[yy, xx] = np.arange(len(yy))
|
| 206 |
|
| 207 |
verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
|
|
|
|
| 208 |
uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
|
| 209 |
axis=1).astype(np.float32) # (N, 2)
|
| 210 |
|
|
@@ -222,7 +232,7 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
|
|
| 222 |
e20 = np.linalg.norm(p0 - p2, axis=1)
|
| 223 |
keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
|
| 224 |
faces = faces[keep].astype(np.int64)
|
| 225 |
-
return verts, uvs, faces
|
| 226 |
|
| 227 |
|
| 228 |
def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
@@ -230,16 +240,15 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
| 230 |
"""`image_pil_texture` should be the HIGHEST-resolution input available
|
| 231 |
(not the inference-capped one) — it's used as the GLB albedo so the mesh
|
| 232 |
surface stays smooth even when zoomed in past the mesh-vertex density."""
|
| 233 |
-
verts, uvs, faces = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
|
| 234 |
|
| 235 |
# Y-up flip so the viewer's default orientation matches photographic intuition.
|
| 236 |
flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
|
| 237 |
verts = verts * flip
|
|
|
|
| 238 |
|
| 239 |
# Recenter the scene on the human's centroid so Three.js's auto-fit
|
| 240 |
-
# focuses tightly on the figure
|
| 241 |
-
# original world origin (~1-2m behind the human) inflates the bounding
|
| 242 |
-
# box and the human ends up small in the default view.
|
| 243 |
centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
|
| 244 |
verts = verts - centroid
|
| 245 |
|
|
@@ -253,8 +262,13 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
| 253 |
)
|
| 254 |
visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
# # Optional camera marker — disabled for now to keep the viewer focused on
|
| 260 |
# # the human only. Re-enable by uncommenting:
|
|
|
|
| 191 |
max_edge: float = 0.04):
|
| 192 |
"""Build a triangulated mesh from the (H, W) pointmap grid.
|
| 193 |
|
| 194 |
+
Returns (verts, uvs, faces, vertex_normals). Each valid pixel → vertex;
|
| 195 |
+
adjacent valid pixels form quads (2 tris). Long-edge triangles are dropped
|
| 196 |
+
to kill stretched skin at depth jumps. Vertex normals are computed from the
|
| 197 |
+
pointmap's spatial gradient (smooth, per-pixel) instead of inferred from
|
| 198 |
+
triangle face normals (which would give flat-shaded facets).
|
| 199 |
"""
|
| 200 |
H, W = pointmap_hwc.shape[:2]
|
| 201 |
z = pointmap_hwc[:, :, 2]
|
| 202 |
valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
|
| 203 |
|
| 204 |
+
# Smooth per-pixel normals from cross product of x- and y- spatial gradients.
|
| 205 |
+
px = np.zeros_like(pointmap_hwc, dtype=np.float32)
|
| 206 |
+
py = np.zeros_like(pointmap_hwc, dtype=np.float32)
|
| 207 |
+
px[:, 1:-1] = (pointmap_hwc[:, 2:] - pointmap_hwc[:, :-2]) * 0.5
|
| 208 |
+
py[1:-1, :] = (pointmap_hwc[2:, :] - pointmap_hwc[:-2, :]) * 0.5
|
| 209 |
+
n_grid = np.cross(px, py)
|
| 210 |
+
n_grid /= np.linalg.norm(n_grid, axis=2, keepdims=True).clip(min=1e-8)
|
| 211 |
+
|
| 212 |
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 213 |
yy, xx = np.where(valid)
|
| 214 |
idx_map[yy, xx] = np.arange(len(yy))
|
| 215 |
|
| 216 |
verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
|
| 217 |
+
normals = n_grid[yy, xx].astype(np.float32) # (N, 3)
|
| 218 |
uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
|
| 219 |
axis=1).astype(np.float32) # (N, 2)
|
| 220 |
|
|
|
|
| 232 |
e20 = np.linalg.norm(p0 - p2, axis=1)
|
| 233 |
keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
|
| 234 |
faces = faces[keep].astype(np.int64)
|
| 235 |
+
return verts, uvs, faces, normals
|
| 236 |
|
| 237 |
|
| 238 |
def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
|
|
| 240 |
"""`image_pil_texture` should be the HIGHEST-resolution input available
|
| 241 |
(not the inference-capped one) — it's used as the GLB albedo so the mesh
|
| 242 |
surface stays smooth even when zoomed in past the mesh-vertex density."""
|
| 243 |
+
verts, uvs, faces, normals = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
|
| 244 |
|
| 245 |
# Y-up flip so the viewer's default orientation matches photographic intuition.
|
| 246 |
flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
|
| 247 |
verts = verts * flip
|
| 248 |
+
normals = normals * flip # normals transform with the same axis flip
|
| 249 |
|
| 250 |
# Recenter the scene on the human's centroid so Three.js's auto-fit
|
| 251 |
+
# focuses tightly on the figure.
|
|
|
|
|
|
|
| 252 |
centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
|
| 253 |
verts = verts - centroid
|
| 254 |
|
|
|
|
| 262 |
)
|
| 263 |
visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
|
| 264 |
|
| 265 |
+
# Pass smooth, gradient-derived per-vertex normals explicitly. process=True
|
| 266 |
+
# lets trimesh fix winding + cache properly so the GLB exporter actually
|
| 267 |
+
# writes the normals (no flat-shading fallback in Three.js).
|
| 268 |
+
mesh = trimesh.Trimesh(
|
| 269 |
+
vertices=verts, faces=faces, visual=visual,
|
| 270 |
+
vertex_normals=normals, process=True,
|
| 271 |
+
)
|
| 272 |
|
| 273 |
# # Optional camera marker — disabled for now to keep the viewer focused on
|
| 274 |
# # the human only. Re-enable by uncommenting:
|