Spaces:
Running on Zero
Running on Zero
Rawal Khirodkar commited on
Commit ·
b371b2d
1
Parent(s): e7e18d0
Pointmap: better 3D experience — XYZ axes + floor ring + initial 3/4 camera view + lighter slate bg
Browse files
app.py
CHANGED
|
@@ -182,6 +182,35 @@ def _camera_marker(radius: float = 0.04, n_points: int = 800,
|
|
| 182 |
return verts, cols
|
| 183 |
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
|
| 186 |
mask_hw: np.ndarray, max_points: int = 200_000) -> str:
|
| 187 |
h, w = pointmap_hwc.shape[:2]
|
|
@@ -198,11 +227,14 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
|
|
| 198 |
idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
|
| 199 |
pts, cols_rgb = pts[idx], cols_rgb[idx]
|
| 200 |
|
|
|
|
| 201 |
cam_verts, cam_cols = _camera_marker()
|
| 202 |
-
|
|
|
|
|
|
|
| 203 |
cols_rgba = np.concatenate(
|
| 204 |
[np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
|
| 205 |
-
cam_cols], axis=0,
|
| 206 |
)
|
| 207 |
|
| 208 |
# Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
|
|
@@ -322,10 +354,11 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
|
|
| 322 |
inp = gr.Image(label="Input", type="pil", height=640, scale=2)
|
| 323 |
out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
|
| 324 |
out_glb = gr.Model3D(
|
| 325 |
-
label="Point cloud",
|
| 326 |
height=640,
|
| 327 |
-
clear_color=[0.
|
| 328 |
display_mode="point_cloud",
|
|
|
|
| 329 |
zoom_speed=0.7,
|
| 330 |
pan_speed=0.5,
|
| 331 |
scale=3,
|
|
|
|
| 182 |
return verts, cols
|
| 183 |
|
| 184 |
|
| 185 |
+
def _xyz_axes(length: float = 0.4, n_per_axis: int = 200):
|
| 186 |
+
"""RGB axes from the origin — X red, Y green, Z blue."""
|
| 187 |
+
t = np.linspace(0.06, length, n_per_axis, dtype=np.float32) # start beyond camera marker
|
| 188 |
+
zeros = np.zeros_like(t)
|
| 189 |
+
x_pts = np.stack([t, zeros, zeros], axis=1)
|
| 190 |
+
y_pts = np.stack([zeros, t, zeros], axis=1)
|
| 191 |
+
z_pts = np.stack([zeros, zeros, t], axis=1)
|
| 192 |
+
verts = np.concatenate([x_pts, y_pts, z_pts])
|
| 193 |
+
cols = np.concatenate([
|
| 194 |
+
np.tile([235, 70, 70, 255], (n_per_axis, 1)), # red — X
|
| 195 |
+
np.tile([70, 200, 100, 255], (n_per_axis, 1)), # green — Y
|
| 196 |
+
np.tile([90, 145, 245, 255], (n_per_axis, 1)), # blue — Z (forward = depth)
|
| 197 |
+
]).astype(np.uint8)
|
| 198 |
+
return verts.astype(np.float32), cols
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
|
| 202 |
+
color=(140, 145, 160)):
|
| 203 |
+
"""Soft grey horizon ring on the world Y=0 plane — a subtle scale reference."""
|
| 204 |
+
theta = np.linspace(0, 2 * np.pi, n_points, endpoint=False, dtype=np.float32)
|
| 205 |
+
verts = np.stack([
|
| 206 |
+
radius * np.cos(theta),
|
| 207 |
+
np.full_like(theta, y),
|
| 208 |
+
radius * np.sin(theta),
|
| 209 |
+
], axis=1)
|
| 210 |
+
cols = np.tile(np.array(color + (180,), dtype=np.uint8), (n_points, 1))
|
| 211 |
+
return verts, cols
|
| 212 |
+
|
| 213 |
+
|
| 214 |
def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
|
| 215 |
mask_hw: np.ndarray, max_points: int = 200_000) -> str:
|
| 216 |
h, w = pointmap_hwc.shape[:2]
|
|
|
|
| 227 |
idx = np.random.default_rng(0).choice(len(pts), size=max_points, replace=False)
|
| 228 |
pts, cols_rgb = pts[idx], cols_rgb[idx]
|
| 229 |
|
| 230 |
+
# Add scene aids: camera marker + XYZ axes + floor ring.
|
| 231 |
cam_verts, cam_cols = _camera_marker()
|
| 232 |
+
axis_verts, axis_cols = _xyz_axes()
|
| 233 |
+
ring_verts, ring_cols = _floor_ring()
|
| 234 |
+
verts = np.concatenate([pts, cam_verts, axis_verts, ring_verts], axis=0)
|
| 235 |
cols_rgba = np.concatenate(
|
| 236 |
[np.concatenate([cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1),
|
| 237 |
+
cam_cols, axis_cols, ring_cols], axis=0,
|
| 238 |
)
|
| 239 |
|
| 240 |
# Three.js viewers (and gr.Model3D) typically use Y-up. Sapiens2 pointmaps
|
|
|
|
| 354 |
inp = gr.Image(label="Input", type="pil", height=640, scale=2)
|
| 355 |
out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
|
| 356 |
out_glb = gr.Model3D(
|
| 357 |
+
label="Point cloud · drag to orbit · scroll to zoom · shift+drag to pan",
|
| 358 |
height=640,
|
| 359 |
+
clear_color=[0.10, 0.11, 0.14, 1.0],
|
| 360 |
display_mode="point_cloud",
|
| 361 |
+
camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
|
| 362 |
zoom_speed=0.7,
|
| 363 |
pan_speed=0.5,
|
| 364 |
scale=3,
|