Rawal Khirodkar commited on
Commit
c7182c9
Β·
1 Parent(s): 4751e1d

Pointmap: recenter mesh on human centroid; drop floor ring + axes; closer initial camera

Browse files
Files changed (1) hide show
  1. app.py +16 -47
app.py CHANGED
@@ -167,9 +167,9 @@ def _depth_to_rgb(depth: np.ndarray, mask: np.ndarray) -> np.ndarray:
167
  # -----------------------------------------------------------------------------
168
  # Point cloud export β€” trimesh β†’ .glb (much faster than Open3D .ply for Three.js)
169
 
170
- def _camera_marker(radius: float = 0.04, n_points: int = 800,
171
  color=(51, 140, 245)):
172
- """Tiny slate-blue Fibonacci sphere at the world origin. Returns (verts, cols)."""
173
  i = np.arange(n_points)
174
  phi = np.arccos(1 - 2 * (i + 0.5) / n_points)
175
  theta = np.pi * (1 + 5 ** 0.5) * (i + 0.5)
@@ -182,35 +182,6 @@ def _camera_marker(radius: float = 0.04, n_points: int = 800,
182
  return verts, cols
183
 
184
 
185
- def _xyz_axes(length: float = 0.4, n_per_axis: int = 200):
186
- """RGB axes from the origin β€” X red, Y green, Z blue."""
187
- t = np.linspace(0.06, length, n_per_axis, dtype=np.float32) # start beyond camera marker
188
- zeros = np.zeros_like(t)
189
- x_pts = np.stack([t, zeros, zeros], axis=1)
190
- y_pts = np.stack([zeros, t, zeros], axis=1)
191
- z_pts = np.stack([zeros, zeros, t], axis=1)
192
- verts = np.concatenate([x_pts, y_pts, z_pts])
193
- cols = np.concatenate([
194
- np.tile([235, 70, 70, 255], (n_per_axis, 1)), # red β€” X
195
- np.tile([70, 200, 100, 255], (n_per_axis, 1)), # green β€” Y
196
- np.tile([90, 145, 245, 255], (n_per_axis, 1)), # blue β€” Z (forward = depth)
197
- ]).astype(np.uint8)
198
- return verts.astype(np.float32), cols
199
-
200
-
201
- def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
202
- color=(140, 145, 160)):
203
- """Soft grey horizon ring on the world Y=0 plane β€” a subtle scale reference."""
204
- theta = np.linspace(0, 2 * np.pi, n_points, endpoint=False, dtype=np.float32)
205
- verts = np.stack([
206
- radius * np.cos(theta),
207
- np.full_like(theta, y),
208
- radius * np.sin(theta),
209
- ], axis=1)
210
- cols = np.tile(np.array(color + (180,), dtype=np.uint8), (n_points, 1))
211
- return verts, cols
212
-
213
-
214
  def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
215
  max_edge: float = 0.02):
216
  """Build a triangulated mesh from the (H, W) pointmap grid.
@@ -260,12 +231,15 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
260
  flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
261
  verts = verts * flip
262
 
263
- # MoGe-style: V flipped (image origin is top-left, GL UVs origin bottom-left).
 
 
 
 
 
 
264
  uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
265
 
266
- # PBR material with the input image as the albedo texture β€” much sharper than
267
- # vertex colors because each triangle interior is sampled from the image at
268
- # the correct pixel, not bilinearly between 3 corner colors.
269
  material = trimesh.visual.material.PBRMaterial(
270
  baseColorTexture=image_native,
271
  metallicFactor=0.0,
@@ -275,17 +249,12 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
275
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
276
 
277
  mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
278
- # Compute and attach per-vertex normals β†’ enables shading in Three.js viewer.
279
- _ = mesh.vertex_normals # triggers lazy compute; trimesh exports them in glb
280
-
281
- # Scene aids (camera marker, XYZ axes, floor ring) as a single point primitive.
282
- aids_v, aids_c = [], []
283
- for fn in (_camera_marker, _xyz_axes, _floor_ring):
284
- v, c = fn()
285
- aids_v.append(v * flip)
286
- aids_c.append(c)
287
- aids = trimesh.PointCloud(vertices=np.concatenate(aids_v, axis=0),
288
- colors=np.concatenate(aids_c, axis=0))
289
 
290
  scene = trimesh.Scene([mesh, aids])
291
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
@@ -402,7 +371,7 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
402
  label="Pointmap",
403
  height=640,
404
  clear_color=[0.10, 0.11, 0.14, 1.0],
405
- camera_position=(35, 70, 4.0),
406
  zoom_speed=0.7,
407
  pan_speed=0.5,
408
  scale=3,
 
167
  # -----------------------------------------------------------------------------
168
  # Point cloud export β€” trimesh β†’ .glb (much faster than Open3D .ply for Three.js)
169
 
170
+ def _camera_marker(radius: float = 0.025, n_points: int = 600,
171
  color=(51, 140, 245)):
172
+ """Tiny slate-blue Fibonacci sphere marking the camera. Returns (verts, cols)."""
173
  i = np.arange(n_points)
174
  phi = np.arccos(1 - 2 * (i + 0.5) / n_points)
175
  theta = np.pi * (1 + 5 ** 0.5) * (i + 0.5)
 
182
  return verts, cols
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
186
  max_edge: float = 0.02):
187
  """Build a triangulated mesh from the (H, W) pointmap grid.
 
231
  flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
232
  verts = verts * flip
233
 
234
+ # Recenter the scene on the human's centroid so Three.js's auto-fit
235
+ # focuses tightly on the figure β€” without this, the camera marker at the
236
+ # original world origin (~1-2m behind the human) inflates the bounding
237
+ # box and the human ends up small in the default view.
238
+ centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
239
+ verts = verts - centroid
240
+
241
  uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
242
 
 
 
 
243
  material = trimesh.visual.material.PBRMaterial(
244
  baseColorTexture=image_native,
245
  metallicFactor=0.0,
 
249
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
250
 
251
  mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
252
+ _ = mesh.vertex_normals # lazy compute β†’ exported in glb
253
+
254
+ # Tiny camera marker only β€” at the (now-shifted) origin of the camera frame.
255
+ cam_v, cam_c = _camera_marker()
256
+ cam_v = cam_v * flip - centroid
257
+ aids = trimesh.PointCloud(vertices=cam_v, colors=cam_c)
 
 
 
 
 
258
 
259
  scene = trimesh.Scene([mesh, aids])
260
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
 
371
  label="Pointmap",
372
  height=640,
373
  clear_color=[0.10, 0.11, 0.14, 1.0],
374
+ camera_position=(35, 70, 1.6), # closer, since scene is centered on the human
375
  zoom_speed=0.7,
376
  pan_speed=0.5,
377
  scale=3,