Spaces:

facebook
/

sapiens2-pointmap

Running on Zero

App Files Files Community

Rawal Khirodkar commited on 14 days ago

Commit

b371b2d

1 Parent(s): e7e18d0

Pointmap: better 3D experience — XYZ axes + floor ring + initial 3/4 camera view + lighter slate bg

Browse files

Files changed (1) hide show

app.py +37 -4

app.py CHANGED Viewed

@@ -182,6 +182,35 @@ def _camera_marker(radius: float = 0.04, n_points: int = 800,
     return verts, cols
 def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
               mask_hw: np.ndarray, max_points: int = 200_000) -> str:
     h, w = pointmap_hwc.shape[:2]
@@ -198,11 +227,14 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
         idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
         pts, cols_rgb = pts[idx], cols_rgb[idx]
     cam_verts, cam_cols = _camera_marker()
-    verts = np.concatenate([pts, cam_verts], axis=0)
     cols_rgba = np.concatenate(
         [np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
-         cam_cols], axis=0,
     )
     # Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
@@ -322,10 +354,11 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
         inp = gr.Image(label="Input", type="pil", height=640, scale=2)
         out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
         out_glb = gr.Model3D(
-            label="Point cloud",
             height=640,
-            clear_color=[0.07, 0.09, 0.13, 1.0],
             display_mode="point_cloud",
             zoom_speed=0.7,
             pan_speed=0.5,
             scale=3,

     return verts, cols
+def _xyz_axes(length: float = 0.4, n_per_axis: int = 200):
+    """RGB axes from the origin — X red, Y green, Z blue."""
+    t = np.linspace(0.06, length, n_per_axis, dtype=np.float32)  # start beyond camera marker
+    zeros = np.zeros_like(t)
+    x_pts = np.stack([t, zeros, zeros], axis=1)
+    y_pts = np.stack([zeros, t, zeros], axis=1)
+    z_pts = np.stack([zeros, zeros, t], axis=1)
+    verts = np.concatenate([x_pts, y_pts, z_pts])
+    cols = np.concatenate([
+        np.tile([235, 70, 70, 255],   (n_per_axis, 1)),  # red — X
+        np.tile([70, 200, 100, 255],  (n_per_axis, 1)),  # green — Y
+        np.tile([90, 145, 245, 255],  (n_per_axis, 1)),  # blue — Z (forward = depth)
+    ]).astype(np.uint8)
+    return verts.astype(np.float32), cols
+def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
+                color=(140, 145, 160)):
+    """Soft grey horizon ring on the world Y=0 plane — a subtle scale reference."""
+    theta = np.linspace(0, 2 * np.pi, n_points, endpoint=False, dtype=np.float32)
+    verts = np.stack([
+        radius * np.cos(theta),
+        np.full_like(theta, y),
+        radius * np.sin(theta),
+    ], axis=1)
+    cols = np.tile(np.array(color + (180,), dtype=np.uint8), (n_points, 1))
+    return verts, cols
 def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
               mask_hw: np.ndarray, max_points: int = 200_000) -> str:
     h, w = pointmap_hwc.shape[:2]
         idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
         pts, cols_rgb = pts[idx], cols_rgb[idx]
+    # Add scene aids: camera marker + XYZ axes + floor ring.
     cam_verts, cam_cols = _camera_marker()
+    axis_verts, axis_cols = _xyz_axes()
+    ring_verts, ring_cols = _floor_ring()
+    verts = np.concatenate([pts, cam_verts, axis_verts, ring_verts], axis=0)
     cols_rgba = np.concatenate(
         [np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
+         cam_cols, axis_cols, ring_cols], axis=0,
     )
     # Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
         inp = gr.Image(label="Input", type="pil", height=640, scale=2)
         out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
         out_glb = gr.Model3D(
+            label="Point cloud  ·  drag to orbit  ·  scroll to zoom  ·  shift+drag to pan",
             height=640,
+            clear_color=[0.10, 0.11, 0.14, 1.0],
             display_mode="point_cloud",
+            camera_position=(35, 70, 4.0),  # azimuth, elevation, radius — flattering 3/4 view
             zoom_speed=0.7,
             pan_speed=0.5,
             scale=3,