Rawal Khirodkar commited on
Commit
bf3b357
·
1 Parent(s): a90c10a

Pointmap: byte-accurate MoGe-2 mesh recipe

Browse files

Match upstream utils3d.numpy exactly:
- _depth_edge: NaN-pad + nanmax (boundary pixels excluded), formula
diff / depth > rtol (raw depth in denominator).
- Quad winding [TL, BL, BR, TR] split as fan from TL into triangles
[TL, BL, BR] and [TL, BR, TR].

Drop dead helpers (_camera_marker, _triangulate_grid, _glb_inject_unlit).

Files changed (1) hide show
  1. app.py +46 -142
app.py CHANGED
@@ -170,87 +170,46 @@ def _depth_to_rgb(depth: np.ndarray, mask: np.ndarray) -> np.ndarray:
170
 
171
 
172
  # -----------------------------------------------------------------------------
173
- # Point cloud export — trimesh .glb (much faster than Open3D .ply for Three.js)
174
-
175
- def _camera_marker(radius: float = 0.025, n_points: int = 600,
176
- color=(51, 140, 245)):
177
- """Tiny slate-blue Fibonacci sphere marking the camera. Returns (verts, cols)."""
178
- i = np.arange(n_points)
179
- phi = np.arccos(1 - 2 * (i + 0.5) / n_points)
180
- theta = np.pi * (1 + 5 ** 0.5) * (i + 0.5)
181
- verts = np.stack([
182
- radius * np.sin(phi) * np.cos(theta),
183
- radius * np.sin(phi) * np.sin(theta),
184
- radius * np.cos(phi),
185
- ], axis=1).astype(np.float32)
186
- cols = np.tile(np.array(color + (255,), dtype=np.uint8), (n_points, 1))
187
- return verts, cols
188
-
189
-
190
- def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
191
- max_edge: float = 0.04):
192
- """Build a triangulated mesh from the (H, W) pointmap grid.
193
-
194
- Returns (verts, uvs, faces, vertex_normals). Each valid pixel → vertex;
195
- adjacent valid pixels form quads (2 tris). Long-edge triangles are dropped
196
- to kill stretched skin at depth jumps. Vertex normals are computed from the
197
- pointmap's spatial gradient (smooth, per-pixel) instead of inferred from
198
- triangle face normals (which would give flat-shaded facets).
199
  """
200
- H, W = pointmap_hwc.shape[:2]
201
- z = pointmap_hwc[:, :, 2]
202
- valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
203
-
204
- # Smooth per-pixel normals from cross product of x- and y- spatial gradients.
205
- px = np.zeros_like(pointmap_hwc, dtype=np.float32)
206
- py = np.zeros_like(pointmap_hwc, dtype=np.float32)
207
- px[:, 1:-1] = (pointmap_hwc[:, 2:] - pointmap_hwc[:, :-2]) * 0.5
208
- py[1:-1, :] = (pointmap_hwc[2:, :] - pointmap_hwc[:-2, :]) * 0.5
209
- n_grid = np.cross(px, py)
210
- n_grid /= np.linalg.norm(n_grid, axis=2, keepdims=True).clip(min=1e-8)
211
-
212
- idx_map = np.full((H, W), -1, dtype=np.int64)
213
- yy, xx = np.where(valid)
214
- idx_map[yy, xx] = np.arange(len(yy))
215
-
216
- verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
217
- normals = n_grid[yy, xx].astype(np.float32) # (N, 3)
218
- uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
219
- axis=1).astype(np.float32) # (N, 2)
220
-
221
- a = idx_map[:-1, :-1]; b = idx_map[:-1, 1:]
222
- c = idx_map[1:, :-1]; d = idx_map[1:, 1:]
223
- quad_valid = (a != -1) & (b != -1) & (c != -1) & (d != -1)
224
- a_v, b_v, c_v, d_v = a[quad_valid], b[quad_valid], c[quad_valid], d[quad_valid]
225
- tri1 = np.stack([a_v, c_v, b_v], axis=1)
226
- tri2 = np.stack([b_v, c_v, d_v], axis=1)
227
- faces = np.concatenate([tri1, tri2], axis=0)
228
-
229
- p0 = verts[faces[:, 0]]; p1 = verts[faces[:, 1]]; p2 = verts[faces[:, 2]]
230
- e01 = np.linalg.norm(p1 - p0, axis=1)
231
- e12 = np.linalg.norm(p2 - p1, axis=1)
232
- e20 = np.linalg.norm(p0 - p2, axis=1)
233
- keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
234
- faces = faces[keep].astype(np.int64)
235
- return verts, uvs, faces, normals
236
 
237
 
238
  def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
239
- mask_hw: np.ndarray, max_edge: float = 0.04) -> str:
240
- """Triangulated mesh, UV-textured with the input image MoGe-2's recipe.
241
-
242
- Each valid pixel = vertex; adjacent valid pixels form quads → 2 triangles.
243
- Triangles whose edges exceed `max_edge` (meters) are dropped to kill
244
- stretched skin at depth jumps. The input image is used as the GLB's albedo
245
- texture (per-triangle PBR sampling), and trimesh's lazy vertex_normals get
246
- exported so Three.js applies smooth shading instead of flat facets.
247
- """
248
  H, W = pointmap_hwc.shape[:2]
249
  image_native = image_pil_texture.resize((W, H), Image.LANCZOS)
250
 
251
- # Triangulate the (H, W) grid over the foreground mask.
252
  z = pointmap_hwc[:, :, 2]
253
  valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
 
 
254
  idx_map = np.full((H, W), -1, dtype=np.int64)
255
  yy, xx = np.where(valid)
256
  idx_map[yy, xx] = np.arange(len(yy))
@@ -258,30 +217,22 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
258
  verts = pointmap_hwc[yy, xx].astype(np.float32)
259
  uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)], axis=1).astype(np.float32)
260
 
261
- a = idx_map[:-1, :-1]; b = idx_map[:-1, 1:]
262
- c = idx_map[1:, :-1]; d = idx_map[1:, 1:]
263
- quad_valid = (a != -1) & (b != -1) & (c != -1) & (d != -1)
264
- a_v, b_v, c_v, d_v = a[quad_valid], b[quad_valid], c[quad_valid], d[quad_valid]
265
- tri1 = np.stack([a_v, c_v, b_v], axis=1)
266
- tri2 = np.stack([b_v, c_v, d_v], axis=1)
267
- faces = np.concatenate([tri1, tri2], axis=0)
268
-
269
- p0 = verts[faces[:, 0]]; p1 = verts[faces[:, 1]]; p2 = verts[faces[:, 2]]
270
- e01 = np.linalg.norm(p1 - p0, axis=1)
271
- e12 = np.linalg.norm(p2 - p1, axis=1)
272
- e20 = np.linalg.norm(p0 - p2, axis=1)
273
- keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
274
- faces = faces[keep].astype(np.int64)
275
 
276
  # MoGe-2: y/z flip on positions, v-flip on UVs.
277
- flip = np.array([1.0, -1.0, -1.0], dtype=np.float32)
278
- verts = verts * flip
279
  centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
280
  verts = verts - centroid
281
-
282
  uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
283
 
284
- # PBR with image as albedo texture — MoGe's exact material settings.
285
  material = trimesh.visual.material.PBRMaterial(
286
  baseColorTexture=image_native,
287
  metallicFactor=0.5,
@@ -291,65 +242,18 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
291
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
292
 
293
  mesh = trimesh.Trimesh(
294
- vertices=verts, faces=faces, visual=visual, process=False,
 
 
 
 
295
  )
296
- # Touch vertex_normals so trimesh caches them; the GLB exporter reads
297
- # the cached normals and writes them into the file → smooth shading
298
- # in Three.js (instead of the per-face fallback that looked like facets).
299
- _ = mesh.vertex_normals
300
 
301
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
302
  mesh.export(out_path)
303
  return out_path
304
 
305
 
306
- # -----------------------------------------------------------------------------
307
- # GLB post-processing: inject KHR_materials_unlit extension so Three.js skips
308
- # all lighting calculations and renders surfels as their raw vertex colour.
309
-
310
- import struct
311
- import json
312
-
313
-
314
- def _glb_inject_unlit(glb_path: str) -> None:
315
- """Patch a binary GLB to mark every material as KHR_materials_unlit.
316
- glTF spec for the extension: surfaces render with `baseColor` only,
317
- no shading from light sources or normals."""
318
- with open(glb_path, "rb") as f:
319
- data = f.read()
320
-
321
- # GLB header: magic | version | total_length (12 bytes)
322
- if data[:4] != b"glTF":
323
- return
324
- # First chunk: length(4) | type(4) | payload — JSON chunk.
325
- json_len, json_type = struct.unpack_from("<II", data, 12)
326
- if json_type != 0x4E4F534A: # "JSON"
327
- return
328
- json_bytes = data[20 : 20 + json_len]
329
- bin_tail = data[20 + json_len :] # everything after = BIN chunk(s)
330
-
331
- gltf = json.loads(json_bytes.rstrip(b" \x00").decode("utf-8"))
332
- used = gltf.setdefault("extensionsUsed", [])
333
- if "KHR_materials_unlit" not in used:
334
- used.append("KHR_materials_unlit")
335
- for mat in gltf.get("materials", []):
336
- mat.setdefault("extensions", {})["KHR_materials_unlit"] = {}
337
-
338
- new_json = json.dumps(gltf, separators=(",", ":")).encode("utf-8")
339
- pad = (4 - len(new_json) % 4) % 4
340
- new_json += b" " * pad
341
-
342
- new_total = 12 + 8 + len(new_json) + len(bin_tail)
343
- out = (
344
- b"glTF" + struct.pack("<II", 2, new_total)
345
- + struct.pack("<II", len(new_json), 0x4E4F534A)
346
- + new_json
347
- + bin_tail
348
- )
349
- with open(glb_path, "wb") as f:
350
- f.write(out)
351
-
352
-
353
  # -----------------------------------------------------------------------------
354
  # Gradio handler
355
 
 
170
 
171
 
172
  # -----------------------------------------------------------------------------
173
+ # Mesh export — MoGe-2's recipe (trimesh .glb)
174
+ #
175
+ # We build a regular grid mesh from the (H, W) pointmap: each valid pixel is a
176
+ # vertex, adjacent valid pixels form quads → 2 triangles each. The trick that
177
+ # makes MoGe-2's meshes look clean (no stretched-skin facets at depth jumps,
178
+ # no ragged silhouette) is what they call `mask_cleaned`:
179
+ #
180
+ # mask_cleaned = mask & ~depth_edge(depth, rtol=0.04)
181
+ #
182
+ # i.e. drop pixels sitting on a depth discontinuity *before* triangulation, so
183
+ # no triangle ever spans one. We don't post-filter triangles by edge length.
184
+
185
+ def _depth_edge(depth: np.ndarray, rtol: float = 0.04, kernel_size: int = 3) -> np.ndarray:
186
+ """NumPy port of `utils3d.numpy.depth_edge` (rtol-only).
187
+
188
+ For each pixel, look at the kernel×kernel window around it; if
189
+ (max − min)/depth > rtol, mark it as a depth-edge pixel.
 
 
 
 
 
 
 
 
 
190
  """
191
+ pad = kernel_size // 2
192
+ # NaN-pad + nanmax/nanmin = ignore out-of-bounds pixels at image borders
193
+ # (matches upstream `utils3d.numpy.max_pool_1d`).
194
+ padded = np.pad(depth.astype(np.float32), pad, mode="constant", constant_values=np.nan)
195
+ windows = np.lib.stride_tricks.sliding_window_view(padded, (kernel_size, kernel_size))
196
+ d_max = np.nanmax(windows, axis=(-2, -1))
197
+ d_min = np.nanmin(windows, axis=(-2, -1))
198
+ with np.errstate(divide="ignore", invalid="ignore"):
199
+ rel = (d_max - d_min) / depth
200
+ return np.nan_to_num(rel, nan=0.0, posinf=0.0, neginf=0.0) > rtol
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
 
203
  def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
204
+ mask_hw: np.ndarray, rtol: float = 0.04) -> str:
205
+ """Build a UV-textured triangulated mesh and export to .glb (MoGe-2 recipe)."""
 
 
 
 
 
 
 
206
  H, W = pointmap_hwc.shape[:2]
207
  image_native = image_pil_texture.resize((W, H), Image.LANCZOS)
208
 
 
209
  z = pointmap_hwc[:, :, 2]
210
  valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
211
+ valid &= ~_depth_edge(z, rtol=rtol)
212
+
213
  idx_map = np.full((H, W), -1, dtype=np.int64)
214
  yy, xx = np.where(valid)
215
  idx_map[yy, xx] = np.arange(len(yy))
 
217
  verts = pointmap_hwc[yy, xx].astype(np.float32)
218
  uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)], axis=1).astype(np.float32)
219
 
220
+ # Quad order matches upstream `utils3d.numpy.image_mesh`: [TL, BL, BR, TR],
221
+ # split into triangles as fan from TL → [TL, BL, BR] and [TL, BR, TR].
222
+ tl = idx_map[:-1, :-1]; tr = idx_map[:-1, 1:]
223
+ bl = idx_map[1:, :-1]; br = idx_map[1:, 1:]
224
+ quad_valid = (tl != -1) & (tr != -1) & (bl != -1) & (br != -1)
225
+ tl_v, tr_v, bl_v, br_v = tl[quad_valid], tr[quad_valid], bl[quad_valid], br[quad_valid]
226
+ tri1 = np.stack([tl_v, bl_v, br_v], axis=1)
227
+ tri2 = np.stack([tl_v, br_v, tr_v], axis=1)
228
+ faces = np.concatenate([tri1, tri2], axis=0).astype(np.int64)
 
 
 
 
 
229
 
230
  # MoGe-2: y/z flip on positions, v-flip on UVs.
231
+ verts = verts * np.array([1.0, -1.0, -1.0], dtype=np.float32)
 
232
  centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
233
  verts = verts - centroid
 
234
  uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
235
 
 
236
  material = trimesh.visual.material.PBRMaterial(
237
  baseColorTexture=image_native,
238
  metallicFactor=0.5,
 
242
  visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
243
 
244
  mesh = trimesh.Trimesh(
245
+ vertices=verts,
246
+ faces=faces,
247
+ vertex_normals=None, # MoGe-2 leaves this to the GLB consumer
248
+ visual=visual,
249
+ process=False,
250
  )
 
 
 
 
251
 
252
  out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
253
  mesh.export(out_path)
254
  return out_path
255
 
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  # -----------------------------------------------------------------------------
258
  # Gradio handler
259