Rawal Khirodkar commited on
Commit
2b7cc62
·
1 Parent(s): 6b72fc2

Pointmap: smooth gradient-based vertex normals + process=True → kills flat-shaded triangle facets

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -191,20 +191,30 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
191
  max_edge: float = 0.04):
192
  """Build a triangulated mesh from the (H, W) pointmap grid.
193
 
194
- Returns (verts, uvs, faces). Each valid pixel becomes a vertex; adjacent
195
- valid pixels form quads (2 tris). Triangles with any edge longer than
196
- `max_edge` (meters) are dropped to kill stretched skin at depth jumps.
197
- UVs are pixel-aligned for direct texturing with the input image.
 
198
  """
199
  H, W = pointmap_hwc.shape[:2]
200
  z = pointmap_hwc[:, :, 2]
201
  valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
202
 
 
 
 
 
 
 
 
 
203
  idx_map = np.full((H, W), -1, dtype=np.int64)
204
  yy, xx = np.where(valid)
205
  idx_map[yy, xx] = np.arange(len(yy))
206
 
207
  verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
 
208
  uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
209
  axis=1).astype(np.float32) # (N, 2)
210
 
@@ -222,7 +232,7 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
222
  e20 = np.linalg.norm(p0 - p2, axis=1)
223
  keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
224
  faces = faces[keep].astype(np.int64)
225
- return verts, uvs, faces
226
 
227
 
228
  def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
@@ -230,16 +240,15 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
230
  """`image_pil_texture` should be the HIGHEST-resolution input available
231
  (not the inference-capped one) — it's used as the GLB albedo so the mesh
232
  surface stays smooth even when zoomed in past the mesh-vertex density."""
233
- verts, uvs, faces = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
234
 
235
  # Y-up flip so the viewer's default orientation matches photographic intuition.
236
  flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
237
  verts = verts * flip
 
238
 
239
  # Recenter the scene on the human's centroid so Three.js's auto-fit
240
- # focuses tightly on the figure — without this, the camera marker at the
241
- # original world origin (~1-2m behind the human) inflates the bounding
242
- # box and the human ends up small in the default view.
243
  centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
244
  verts = verts - centroid
245
 
@@ -253,8 +262,13 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
253
  )
254
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
255
 
256
- mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
257
- _ = mesh.vertex_normals # lazy compute exported in glb
 
 
 
 
 
258
 
259
  # # Optional camera marker — disabled for now to keep the viewer focused on
260
  # # the human only. Re-enable by uncommenting:
 
191
  max_edge: float = 0.04):
192
  """Build a triangulated mesh from the (H, W) pointmap grid.
193
 
194
+ Returns (verts, uvs, faces, vertex_normals). Each valid pixel vertex;
195
+ adjacent valid pixels form quads (2 tris). Long-edge triangles are dropped
196
+ to kill stretched skin at depth jumps. Vertex normals are computed from the
197
+ pointmap's spatial gradient (smooth, per-pixel) instead of inferred from
198
+ triangle face normals (which would give flat-shaded facets).
199
  """
200
  H, W = pointmap_hwc.shape[:2]
201
  z = pointmap_hwc[:, :, 2]
202
  valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
203
 
204
+ # Smooth per-pixel normals from cross product of x- and y- spatial gradients.
205
+ px = np.zeros_like(pointmap_hwc, dtype=np.float32)
206
+ py = np.zeros_like(pointmap_hwc, dtype=np.float32)
207
+ px[:, 1:-1] = (pointmap_hwc[:, 2:] - pointmap_hwc[:, :-2]) * 0.5
208
+ py[1:-1, :] = (pointmap_hwc[2:, :] - pointmap_hwc[:-2, :]) * 0.5
209
+ n_grid = np.cross(px, py)
210
+ n_grid /= np.linalg.norm(n_grid, axis=2, keepdims=True).clip(min=1e-8)
211
+
212
  idx_map = np.full((H, W), -1, dtype=np.int64)
213
  yy, xx = np.where(valid)
214
  idx_map[yy, xx] = np.arange(len(yy))
215
 
216
  verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
217
+ normals = n_grid[yy, xx].astype(np.float32) # (N, 3)
218
  uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
219
  axis=1).astype(np.float32) # (N, 2)
220
 
 
232
  e20 = np.linalg.norm(p0 - p2, axis=1)
233
  keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
234
  faces = faces[keep].astype(np.int64)
235
+ return verts, uvs, faces, normals
236
 
237
 
238
  def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
 
240
  """`image_pil_texture` should be the HIGHEST-resolution input available
241
  (not the inference-capped one) — it's used as the GLB albedo so the mesh
242
  surface stays smooth even when zoomed in past the mesh-vertex density."""
243
+ verts, uvs, faces, normals = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
244
 
245
  # Y-up flip so the viewer's default orientation matches photographic intuition.
246
  flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
247
  verts = verts * flip
248
+ normals = normals * flip # normals transform with the same axis flip
249
 
250
  # Recenter the scene on the human's centroid so Three.js's auto-fit
251
+ # focuses tightly on the figure.
 
 
252
  centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
253
  verts = verts - centroid
254
 
 
262
  )
263
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
264
 
265
+ # Pass smooth, gradient-derived per-vertex normals explicitly. process=True
266
+ # lets trimesh fix winding + cache properly so the GLB exporter actually
267
+ # writes the normals (no flat-shading fallback in Three.js).
268
+ mesh = trimesh.Trimesh(
269
+ vertices=verts, faces=faces, visual=visual,
270
+ vertex_normals=normals, process=True,
271
+ )
272
 
273
  # # Optional camera marker — disabled for now to keep the viewer focused on
274
  # # the human only. Re-enable by uncommenting: