Spaces:
Running on Zero
Running on Zero
Rawal Khirodkar commited on
Commit ·
0b515f7
1
Parent(s): b371b2d
Pointmap: triangulate the depth grid into a mesh (no more sparse-on-zoom); drop point_cloud display_mode
Browse files
app.py
CHANGED
|
@@ -211,40 +211,79 @@ def _floor_ring(radius: float = 1.5, n_points: int = 360, y: float = 0.0,
|
|
| 211 |
return verts, cols
|
| 212 |
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
|
| 215 |
-
mask_hw: np.ndarray
|
| 216 |
h, w = pointmap_hwc.shape[:2]
|
| 217 |
image_rgb = np.asarray(image_pil_native.resize((w, h), Image.LANCZOS))
|
| 218 |
|
| 219 |
-
|
| 220 |
-
cols_rgb = image_rgb.reshape(-1, 3).astype(np.uint8)
|
| 221 |
-
|
| 222 |
-
z = pts[:, 2]
|
| 223 |
-
finite = np.isfinite(pts).all(axis=1) & (z > 0.05) & (z < 25.0) & mask_hw.reshape(-1)
|
| 224 |
-
pts, cols_rgb = pts[finite], cols_rgb[finite]
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
|
| 230 |
-
# Add scene aids: camera marker + XYZ axes + floor ring.
|
| 231 |
-
cam_verts, cam_cols = _camera_marker()
|
| 232 |
-
axis_verts, axis_cols = _xyz_axes()
|
| 233 |
-
ring_verts, ring_cols = _floor_ring()
|
| 234 |
-
verts = np.concatenate([pts, cam_verts, axis_verts, ring_verts], axis=0)
|
| 235 |
cols_rgba = np.concatenate(
|
| 236 |
-
[
|
| 237 |
-
cam_cols, axis_cols, ring_cols], axis=0,
|
| 238 |
)
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
#
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
|
| 247 |
-
|
| 248 |
return out_path
|
| 249 |
|
| 250 |
|
|
@@ -354,10 +393,9 @@ with gr.Blocks(title="Sapiens2 Pointmap", theme=gr.themes.Soft(), css=CUSTOM_CSS
|
|
| 354 |
inp = gr.Image(label="Input", type="pil", height=640, scale=2)
|
| 355 |
out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
|
| 356 |
out_glb = gr.Model3D(
|
| 357 |
-
label="
|
| 358 |
height=640,
|
| 359 |
clear_color=[0.10, 0.11, 0.14, 1.0],
|
| 360 |
-
display_mode="point_cloud",
|
| 361 |
camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
|
| 362 |
zoom_speed=0.7,
|
| 363 |
pan_speed=0.5,
|
|
|
|
| 211 |
return verts, cols
|
| 212 |
|
| 213 |
|
| 214 |
+
def _triangulate_grid(pointmap_hwc: np.ndarray, image_rgb: np.ndarray,
|
| 215 |
+
mask_hw: np.ndarray, max_edge: float = 0.05):
|
| 216 |
+
"""Build a triangulated mesh from the (H, W) pointmap grid.
|
| 217 |
+
|
| 218 |
+
Each valid pixel becomes a vertex; adjacent valid pixels form quads
|
| 219 |
+
(two triangles). Triangles whose edges exceed `max_edge` (meters) are
|
| 220 |
+
dropped so we don't stretch skin between the subject and the background.
|
| 221 |
+
"""
|
| 222 |
+
H, W = pointmap_hwc.shape[:2]
|
| 223 |
+
z = pointmap_hwc[:, :, 2]
|
| 224 |
+
valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
|
| 225 |
+
|
| 226 |
+
# Vertex index per pixel; -1 if invalid.
|
| 227 |
+
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 228 |
+
yy, xx = np.where(valid)
|
| 229 |
+
idx_map[yy, xx] = np.arange(len(yy))
|
| 230 |
+
|
| 231 |
+
verts = pointmap_hwc[yy, xx] # (N, 3) float32
|
| 232 |
+
cols = image_rgb[yy, xx] # (N, 3) uint8
|
| 233 |
+
|
| 234 |
+
# Quad corners
|
| 235 |
+
a = idx_map[:-1, :-1] # top-left
|
| 236 |
+
b = idx_map[:-1, 1:] # top-right
|
| 237 |
+
c = idx_map[1:, :-1] # bottom-left
|
| 238 |
+
d = idx_map[1:, 1:] # bottom-right
|
| 239 |
+
quad_valid = (a != -1) & (b != -1) & (c != -1) & (d != -1)
|
| 240 |
+
|
| 241 |
+
a_v, b_v, c_v, d_v = a[quad_valid], b[quad_valid], c[quad_valid], d[quad_valid]
|
| 242 |
+
tri1 = np.stack([a_v, c_v, b_v], axis=1) # (M, 3)
|
| 243 |
+
tri2 = np.stack([b_v, c_v, d_v], axis=1) # (M, 3)
|
| 244 |
+
faces = np.concatenate([tri1, tri2], axis=0) # (2M, 3)
|
| 245 |
+
|
| 246 |
+
# Drop triangles with any edge longer than max_edge — kills stretched skins.
|
| 247 |
+
p0 = verts[faces[:, 0]]
|
| 248 |
+
p1 = verts[faces[:, 1]]
|
| 249 |
+
p2 = verts[faces[:, 2]]
|
| 250 |
+
e01 = np.linalg.norm(p1 - p0, axis=1)
|
| 251 |
+
e12 = np.linalg.norm(p2 - p1, axis=1)
|
| 252 |
+
e20 = np.linalg.norm(p0 - p2, axis=1)
|
| 253 |
+
keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
|
| 254 |
+
faces = faces[keep]
|
| 255 |
+
|
| 256 |
+
return verts.astype(np.float32), cols.astype(np.uint8), faces.astype(np.int64)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
|
| 260 |
+
mask_hw: np.ndarray) -> str:
|
| 261 |
h, w = pointmap_hwc.shape[:2]
|
| 262 |
image_rgb = np.asarray(image_pil_native.resize((w, h), Image.LANCZOS))
|
| 263 |
|
| 264 |
+
verts, cols_rgb, faces = _triangulate_grid(pointmap_hwc, image_rgb, mask_hw)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
+
# Y-up flip so the viewer's default orientation matches photographic intuition.
|
| 267 |
+
flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
|
| 268 |
+
verts = verts * flip
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
cols_rgba = np.concatenate(
|
| 271 |
+
[cols_rgb, np.full((len(cols_rgb), 1), 255, dtype=np.uint8)], axis=1
|
|
|
|
| 272 |
)
|
| 273 |
+
mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_colors=cols_rgba, process=False)
|
| 274 |
+
|
| 275 |
+
# Scene aids as separate point clouds.
|
| 276 |
+
aids_v, aids_c = [], []
|
| 277 |
+
for fn in (_camera_marker, _xyz_axes, _floor_ring):
|
| 278 |
+
v, c = fn()
|
| 279 |
+
aids_v.append(v * flip)
|
| 280 |
+
aids_c.append(c)
|
| 281 |
+
aids = trimesh.PointCloud(vertices=np.concatenate(aids_v, axis=0),
|
| 282 |
+
colors=np.concatenate(aids_c, axis=0))
|
| 283 |
+
|
| 284 |
+
scene = trimesh.Scene([mesh, aids])
|
| 285 |
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
|
| 286 |
+
scene.export(out_path)
|
| 287 |
return out_path
|
| 288 |
|
| 289 |
|
|
|
|
| 393 |
inp = gr.Image(label="Input", type="pil", height=640, scale=2)
|
| 394 |
out_depth = gr.Image(label="Depth (Z)", type="pil", height=640, scale=2)
|
| 395 |
out_glb = gr.Model3D(
|
| 396 |
+
label="3D mesh · drag to orbit · scroll to zoom · shift+drag to pan",
|
| 397 |
height=640,
|
| 398 |
clear_color=[0.10, 0.11, 0.14, 1.0],
|
|
|
|
| 399 |
camera_position=(35, 70, 4.0), # azimuth, elevation, radius — flattering 3/4 view
|
| 400 |
zoom_speed=0.7,
|
| 401 |
pan_speed=0.5,
|