Rawal Khirodkar commited on
Commit
0b515f7
·
1 Parent(s): b371b2d

Pointmap: triangulate the depth grid into a mesh (no more sparse-on-zoom); drop point_cloud display_mode

Browse files
Files changed (1) hide show
  1. app.py +65 -27
app.py CHANGED
@@ -211,40 +211,79 @@ def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
211
  return verts, cols
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
215
- mask_hw: np.ndarray, max_points: int = 200_000) -> str:
216
  h, w = pointmap_hwc.shape[:2]
217
  image_rgb = np.asarray(image_pil_native.resize((w, h), Image.LANCZOS))
218
 
219
- pts = pointmap_hwc.reshape(-1, 3).astype(np.float32)
220
- cols_rgb = image_rgb.reshape(-1, 3).astype(np.uint8)
221
-
222
- z = pts[:, 2]
223
- finite = np.isfinite(pts).all(axis=1) & (z > 0.05) & (z < 25.0) & mask_hw.reshape(-1)
224
- pts, cols_rgb = pts[finite], cols_rgb[finite]
225
 
226
- if len(pts) > max_points:
227
- idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
228
- pts, cols_rgb = pts[idx], cols_rgb[idx]
229
 
230
- # Add scene aids: camera marker + XYZ axes + floor ring.
231
- cam_verts, cam_cols = _camera_marker()
232
- axis_verts, axis_cols = _xyz_axes()
233
- ring_verts, ring_cols = _floor_ring()
234
- verts = np.concatenate([pts, cam_verts, axis_verts, ring_verts], axis=0)
235
  cols_rgba = np.concatenate(
236
- [np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
237
- cam_cols, axis_cols, ring_cols], axis=0,
238
  )
239
-
240
- # Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
241
- # come in camera frame with Y down, Z forward — flip Y so the viewer's
242
- # default orientation matches photographic intuition.
243
- verts = verts * np.array([1.0, -1.0, -1.0], dtype=np.float32)
244
-
245
- pc = trimesh.PointCloud(vertices=verts, colors=cols_rgba)
 
 
 
 
 
246
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
247
- pc.export(out_path)
248
  return out_path
249
 
250
 
@@ -354,10 +393,9 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
354
  inp = gr.Image(label="Input", type="pil", height=640, scale=2)
355
  out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
356
  out_glb = gr.Model3D(
357
- label="Point cloud · drag to orbit · scroll to zoom · shift+drag to pan",
358
  height=640,
359
  clear_color=[0.10, 0.11, 0.14, 1.0],
360
- display_mode="point_cloud",
361
  camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
362
  zoom_speed=0.7,
363
  pan_speed=0.5,
 
211
  return verts, cols
212
 
213
 
214
+ def _triangulate_grid(pointmap_hwc: np.ndarray, image_rgb: np.ndarray,
215
+ mask_hw: np.ndarray, max_edge: float = 0.05):
216
+ """Build a triangulated mesh from the (H, W) pointmap grid.
217
+
218
+ Each valid pixel becomes a vertex; adjacent valid pixels form quads
219
+ (two triangles). Triangles whose edges exceed `max_edge` (meters) are
220
+ dropped so we don't stretch skin between the subject and the background.
221
+ """
222
+ H, W = pointmap_hwc.shape[:2]
223
+ z = pointmap_hwc[:, :, 2]
224
+ valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
225
+
226
+ # Vertex index per pixel; -1 if invalid.
227
+ idx_map = np.full((H, W), -1, dtype=np.int64)
228
+ yy, xx = np.where(valid)
229
+ idx_map[yy, xx] = np.arange(len(yy))
230
+
231
+ verts = pointmap_hwc[yy, xx] # (N, 3) float32
232
+ cols = image_rgb[yy, xx] # (N, 3) uint8
233
+
234
+ # Quad corners
235
+ a = idx_map[:-1, :-1] # top-left
236
+ b = idx_map[:-1, 1:] # top-right
237
+ c = idx_map[1:, :-1] # bottom-left
238
+ d = idx_map[1:, 1:] # bottom-right
239
+ quad_valid = (a != -1) & (b != -1) & (c != -1) & (d != -1)
240
+
241
+ a_v, b_v, c_v, d_v = a[quad_valid], b[quad_valid], c[quad_valid], d[quad_valid]
242
+ tri1 = np.stack([a_v, c_v, b_v], axis=1) # (M, 3)
243
+ tri2 = np.stack([b_v, c_v, d_v], axis=1) # (M, 3)
244
+ faces = np.concatenate([tri1, tri2], axis=0) # (2M, 3)
245
+
246
+ # Drop triangles with any edge longer than max_edge — kills stretched skins.
247
+ p0 = verts[faces[:, 0]]
248
+ p1 = verts[faces[:, 1]]
249
+ p2 = verts[faces[:, 2]]
250
+ e01 = np.linalg.norm(p1 - p0, axis=1)
251
+ e12 = np.linalg.norm(p2 - p1, axis=1)
252
+ e20 = np.linalg.norm(p0 - p2, axis=1)
253
+ keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
254
+ faces = faces[keep]
255
+
256
+ return verts.astype(np.float32), cols.astype(np.uint8), faces.astype(np.int64)
257
+
258
+
259
  def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
260
+ mask_hw: np.ndarray) -> str:
261
  h, w = pointmap_hwc.shape[:2]
262
  image_rgb = np.asarray(image_pil_native.resize((w, h), Image.LANCZOS))
263
 
264
+ verts, cols_rgb, faces = _triangulate_grid(pointmap_hwc, image_rgb, mask_hw)
 
 
 
 
 
265
 
266
+ # Y-up flip so the viewer's default orientation matches photographic intuition.
267
+ flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
268
+ verts = verts * flip
269
 
 
 
 
 
 
270
  cols_rgba = np.concatenate(
271
+ [cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1
 
272
  )
273
+ mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_colors=cols_rgba, process=False)
274
+
275
+ # Scene aids as separate point clouds.
276
+ aids_v, aids_c = [], []
277
+ for fn in (_camera_marker, _xyz_axes, _floor_ring):
278
+ v, c = fn()
279
+ aids_v.append(v * flip)
280
+ aids_c.append(c)
281
+ aids = trimesh.PointCloud(vertices=np.concatenate(aids_v, axis=0),
282
+ colors=np.concatenate(aids_c, axis=0))
283
+
284
+ scene = trimesh.Scene([mesh, aids])
285
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
286
+ scene.export(out_path)
287
  return out_path
288
 
289
 
 
393
  inp = gr.Image(label="Input", type="pil", height=640, scale=2)
394
  out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
395
  out_glb = gr.Model3D(
396
+ label="3D mesh · drag to orbit · scroll to zoom · shift+drag to pan",
397
  height=640,
398
  clear_color=[0.10, 0.11, 0.14, 1.0],
 
399
  camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
400
  zoom_speed=0.7,
401
  pan_speed=0.5,