Rawal Khirodkar commited on
Commit
b371b2d
·
1 Parent(s): e7e18d0

Pointmap: better 3D experience — XYZ axes + floor ring + initial 3/4 camera view + lighter slate bg

Browse files
Files changed (1) hide show
  1. app.py +37 -4
app.py CHANGED
@@ -182,6 +182,35 @@ def _camera_marker(radius: float = 0.04, n_points: int = 800,
182
  return verts, cols
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
186
  mask_hw: np.ndarray, max_points: int = 200_000) -> str:
187
  h, w = pointmap_hwc.shape[:2]
@@ -198,11 +227,14 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
198
  idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
199
  pts, cols_rgb = pts[idx], cols_rgb[idx]
200
 
 
201
  cam_verts, cam_cols = _camera_marker()
202
- verts = np.concatenate([pts, cam_verts], axis=0)
 
 
203
  cols_rgba = np.concatenate(
204
  [np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
205
- cam_cols], axis=0,
206
  )
207
 
208
  # Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
@@ -322,10 +354,11 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
322
  inp = gr.Image(label="Input", type="pil", height=640, scale=2)
323
  out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
324
  out_glb = gr.Model3D(
325
- label="Point cloud",
326
  height=640,
327
- clear_color=[0.07, 0.09, 0.13, 1.0],
328
  display_mode="point_cloud",
 
329
  zoom_speed=0.7,
330
  pan_speed=0.5,
331
  scale=3,
 
182
  return verts, cols
183
 
184
 
185
+ def _xyz_axes(length: float = 0.4, n_per_axis: int = 200):
186
+ """RGB axes from the origin — X red, Y green, Z blue."""
187
+ t = np.linspace(0.06, length, n_per_axis, dtype=np.float32) # start beyond camera marker
188
+ zeros = np.zeros_like(t)
189
+ x_pts = np.stack([t, zeros, zeros], axis=1)
190
+ y_pts = np.stack([zeros, t, zeros], axis=1)
191
+ z_pts = np.stack([zeros, zeros, t], axis=1)
192
+ verts = np.concatenate([x_pts, y_pts, z_pts])
193
+ cols = np.concatenate([
194
+ np.tile([235, 70, 70, 255], (n_per_axis, 1)), # red — X
195
+ np.tile([70, 200, 100, 255], (n_per_axis, 1)), # green — Y
196
+ np.tile([90, 145, 245, 255], (n_per_axis, 1)), # blue — Z (forward = depth)
197
+ ]).astype(np.uint8)
198
+ return verts.astype(np.float32), cols
199
+
200
+
201
+ def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
202
+ color=(140, 145, 160)):
203
+ """Soft grey horizon ring on the world Y=0 plane — a subtle scale reference."""
204
+ theta = np.linspace(0, 2 * np.pi, n_points, endpoint=False, dtype=np.float32)
205
+ verts = np.stack([
206
+ radius * np.cos(theta),
207
+ np.full_like(theta, y),
208
+ radius * np.sin(theta),
209
+ ], axis=1)
210
+ cols = np.tile(np.array(color + (180,), dtype=np.uint8), (n_points, 1))
211
+ return verts, cols
212
+
213
+
214
  def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
215
  mask_hw: np.ndarray, max_points: int = 200_000) -> str:
216
  h, w = pointmap_hwc.shape[:2]
 
227
  idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
228
  pts, cols_rgb = pts[idx], cols_rgb[idx]
229
 
230
+ # Add scene aids: camera marker + XYZ axes + floor ring.
231
  cam_verts, cam_cols = _camera_marker()
232
+ axis_verts, axis_cols = _xyz_axes()
233
+ ring_verts, ring_cols = _floor_ring()
234
+ verts = np.concatenate([pts, cam_verts, axis_verts, ring_verts], axis=0)
235
  cols_rgba = np.concatenate(
236
  [np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
237
+ cam_cols, axis_cols, ring_cols], axis=0,
238
  )
239
 
240
  # Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
 
354
  inp = gr.Image(label="Input", type="pil", height=640, scale=2)
355
  out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
356
  out_glb = gr.Model3D(
357
+ label="Point cloud · drag to orbit · scroll to zoom · shift+drag to pan",
358
  height=640,
359
+ clear_color=[0.10, 0.11, 0.14, 1.0],
360
  display_mode="point_cloud",
361
+ camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
362
  zoom_speed=0.7,
363
  pan_speed=0.5,
364
  scale=3,