Spaces:
Running on Zero
Running on Zero
Rawal Khirodkar commited on
Commit ·
bf3b357
1
Parent(s): a90c10a
Pointmap: byte-accurate MoGe-2 mesh recipe
Browse filesMatch upstream utils3d.numpy exactly:
- _depth_edge: NaN-pad + nanmax (boundary pixels excluded), formula
diff / depth > rtol (raw depth in denominator).
- Quad winding [TL, BL, BR, TR] split as fan from TL into triangles
[TL, BL, BR] and [TL, BR, TR].
Drop dead helpers (_camera_marker, _triangulate_grid, _glb_inject_unlit).
app.py
CHANGED
|
@@ -170,87 +170,46 @@ def _depth_to_rgb(depth: np.ndarray, mask: np.ndarray) -> np.ndarray:
|
|
| 170 |
|
| 171 |
|
| 172 |
# -----------------------------------------------------------------------------
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
|
| 191 |
-
max_edge: float = 0.04):
|
| 192 |
-
"""Build a triangulated mesh from the (H, W) pointmap grid.
|
| 193 |
-
|
| 194 |
-
Returns (verts, uvs, faces, vertex_normals). Each valid pixel → vertex;
|
| 195 |
-
adjacent valid pixels form quads (2 tris). Long-edge triangles are dropped
|
| 196 |
-
to kill stretched skin at depth jumps. Vertex normals are computed from the
|
| 197 |
-
pointmap's spatial gradient (smooth, per-pixel) instead of inferred from
|
| 198 |
-
triangle face normals (which would give flat-shaded facets).
|
| 199 |
"""
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
n_grid /= np.linalg.norm(n_grid, axis=2, keepdims=True).clip(min=1e-8)
|
| 211 |
-
|
| 212 |
-
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 213 |
-
yy, xx = np.where(valid)
|
| 214 |
-
idx_map[yy, xx] = np.arange(len(yy))
|
| 215 |
-
|
| 216 |
-
verts = pointmap_hwc[yy, xx].astype(np.float32) # (N, 3)
|
| 217 |
-
normals = n_grid[yy, xx].astype(np.float32) # (N, 3)
|
| 218 |
-
uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)],
|
| 219 |
-
axis=1).astype(np.float32) # (N, 2)
|
| 220 |
-
|
| 221 |
-
a = idx_map[:-1, :-1]; b = idx_map[:-1, 1:]
|
| 222 |
-
c = idx_map[1:, :-1]; d = idx_map[1:, 1:]
|
| 223 |
-
quad_valid = (a != -1) & (b != -1) & (c != -1) & (d != -1)
|
| 224 |
-
a_v, b_v, c_v, d_v = a[quad_valid], b[quad_valid], c[quad_valid], d[quad_valid]
|
| 225 |
-
tri1 = np.stack([a_v, c_v, b_v], axis=1)
|
| 226 |
-
tri2 = np.stack([b_v, c_v, d_v], axis=1)
|
| 227 |
-
faces = np.concatenate([tri1, tri2], axis=0)
|
| 228 |
-
|
| 229 |
-
p0 = verts[faces[:, 0]]; p1 = verts[faces[:, 1]]; p2 = verts[faces[:, 2]]
|
| 230 |
-
e01 = np.linalg.norm(p1 - p0, axis=1)
|
| 231 |
-
e12 = np.linalg.norm(p2 - p1, axis=1)
|
| 232 |
-
e20 = np.linalg.norm(p0 - p2, axis=1)
|
| 233 |
-
keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
|
| 234 |
-
faces = faces[keep].astype(np.int64)
|
| 235 |
-
return verts, uvs, faces, normals
|
| 236 |
|
| 237 |
|
| 238 |
def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
| 239 |
-
mask_hw: np.ndarray,
|
| 240 |
-
"""
|
| 241 |
-
|
| 242 |
-
Each valid pixel = vertex; adjacent valid pixels form quads → 2 triangles.
|
| 243 |
-
Triangles whose edges exceed `max_edge` (meters) are dropped to kill
|
| 244 |
-
stretched skin at depth jumps. The input image is used as the GLB's albedo
|
| 245 |
-
texture (per-triangle PBR sampling), and trimesh's lazy vertex_normals get
|
| 246 |
-
exported so Three.js applies smooth shading instead of flat facets.
|
| 247 |
-
"""
|
| 248 |
H, W = pointmap_hwc.shape[:2]
|
| 249 |
image_native = image_pil_texture.resize((W, H), Image.LANCZOS)
|
| 250 |
|
| 251 |
-
# Triangulate the (H, W) grid over the foreground mask.
|
| 252 |
z = pointmap_hwc[:, :, 2]
|
| 253 |
valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
|
|
|
|
|
|
|
| 254 |
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 255 |
yy, xx = np.where(valid)
|
| 256 |
idx_map[yy, xx] = np.arange(len(yy))
|
|
@@ -258,30 +217,22 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
| 258 |
verts = pointmap_hwc[yy, xx].astype(np.float32)
|
| 259 |
uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)], axis=1).astype(np.float32)
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
e01 = np.linalg.norm(p1 - p0, axis=1)
|
| 271 |
-
e12 = np.linalg.norm(p2 - p1, axis=1)
|
| 272 |
-
e20 = np.linalg.norm(p0 - p2, axis=1)
|
| 273 |
-
keep = (e01 < max_edge) & (e12 < max_edge) & (e20 < max_edge)
|
| 274 |
-
faces = faces[keep].astype(np.int64)
|
| 275 |
|
| 276 |
# MoGe-2: y/z flip on positions, v-flip on UVs.
|
| 277 |
-
|
| 278 |
-
verts = verts * flip
|
| 279 |
centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
|
| 280 |
verts = verts - centroid
|
| 281 |
-
|
| 282 |
uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
|
| 283 |
|
| 284 |
-
# PBR with image as albedo texture — MoGe's exact material settings.
|
| 285 |
material = trimesh.visual.material.PBRMaterial(
|
| 286 |
baseColorTexture=image_native,
|
| 287 |
metallicFactor=0.5,
|
|
@@ -291,65 +242,18 @@ def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
|
| 291 |
visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
|
| 292 |
|
| 293 |
mesh = trimesh.Trimesh(
|
| 294 |
-
vertices=verts,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
)
|
| 296 |
-
# Touch vertex_normals so trimesh caches them; the GLB exporter reads
|
| 297 |
-
# the cached normals and writes them into the file → smooth shading
|
| 298 |
-
# in Three.js (instead of the per-face fallback that looked like facets).
|
| 299 |
-
_ = mesh.vertex_normals
|
| 300 |
|
| 301 |
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
|
| 302 |
mesh.export(out_path)
|
| 303 |
return out_path
|
| 304 |
|
| 305 |
|
| 306 |
-
# -----------------------------------------------------------------------------
|
| 307 |
-
# GLB post-processing: inject KHR_materials_unlit extension so Three.js skips
|
| 308 |
-
# all lighting calculations and renders surfels as their raw vertex colour.
|
| 309 |
-
|
| 310 |
-
import struct
|
| 311 |
-
import json
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
def _glb_inject_unlit(glb_path: str) -> None:
|
| 315 |
-
"""Patch a binary GLB to mark every material as KHR_materials_unlit.
|
| 316 |
-
glTF spec for the extension: surfaces render with `baseColor` only,
|
| 317 |
-
no shading from light sources or normals."""
|
| 318 |
-
with open(glb_path, "rb") as f:
|
| 319 |
-
data = f.read()
|
| 320 |
-
|
| 321 |
-
# GLB header: magic | version | total_length (12 bytes)
|
| 322 |
-
if data[:4] != b"glTF":
|
| 323 |
-
return
|
| 324 |
-
# First chunk: length(4) | type(4) | payload — JSON chunk.
|
| 325 |
-
json_len, json_type = struct.unpack_from("<II", data, 12)
|
| 326 |
-
if json_type != 0x4E4F534A: # "JSON"
|
| 327 |
-
return
|
| 328 |
-
json_bytes = data[20 : 20 + json_len]
|
| 329 |
-
bin_tail = data[20 + json_len :] # everything after = BIN chunk(s)
|
| 330 |
-
|
| 331 |
-
gltf = json.loads(json_bytes.rstrip(b" \x00").decode("utf-8"))
|
| 332 |
-
used = gltf.setdefault("extensionsUsed", [])
|
| 333 |
-
if "KHR_materials_unlit" not in used:
|
| 334 |
-
used.append("KHR_materials_unlit")
|
| 335 |
-
for mat in gltf.get("materials", []):
|
| 336 |
-
mat.setdefault("extensions", {})["KHR_materials_unlit"] = {}
|
| 337 |
-
|
| 338 |
-
new_json = json.dumps(gltf, separators=(",", ":")).encode("utf-8")
|
| 339 |
-
pad = (4 - len(new_json) % 4) % 4
|
| 340 |
-
new_json += b" " * pad
|
| 341 |
-
|
| 342 |
-
new_total = 12 + 8 + len(new_json) + len(bin_tail)
|
| 343 |
-
out = (
|
| 344 |
-
b"glTF" + struct.pack("<II", 2, new_total)
|
| 345 |
-
+ struct.pack("<II", len(new_json), 0x4E4F534A)
|
| 346 |
-
+ new_json
|
| 347 |
-
+ bin_tail
|
| 348 |
-
)
|
| 349 |
-
with open(glb_path, "wb") as f:
|
| 350 |
-
f.write(out)
|
| 351 |
-
|
| 352 |
-
|
| 353 |
# -----------------------------------------------------------------------------
|
| 354 |
# Gradio handler
|
| 355 |
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
# -----------------------------------------------------------------------------
|
| 173 |
+
# Mesh export — MoGe-2's recipe (trimesh → .glb)
|
| 174 |
+
#
|
| 175 |
+
# We build a regular grid mesh from the (H, W) pointmap: each valid pixel is a
|
| 176 |
+
# vertex, adjacent valid pixels form quads → 2 triangles each. The trick that
|
| 177 |
+
# makes MoGe-2's meshes look clean (no stretched-skin facets at depth jumps,
|
| 178 |
+
# no ragged silhouette) is what they call `mask_cleaned`:
|
| 179 |
+
#
|
| 180 |
+
# mask_cleaned = mask & ~depth_edge(depth, rtol=0.04)
|
| 181 |
+
#
|
| 182 |
+
# i.e. drop pixels sitting on a depth discontinuity *before* triangulation, so
|
| 183 |
+
# no triangle ever spans one. We don't post-filter triangles by edge length.
|
| 184 |
+
|
| 185 |
+
def _depth_edge(depth: np.ndarray, rtol: float = 0.04, kernel_size: int = 3) -> np.ndarray:
|
| 186 |
+
"""NumPy port of `utils3d.numpy.depth_edge` (rtol-only).
|
| 187 |
+
|
| 188 |
+
For each pixel, look at the kernel×kernel window around it; if
|
| 189 |
+
(max − min)/depth > rtol, mark it as a depth-edge pixel.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
"""
|
| 191 |
+
pad = kernel_size // 2
|
| 192 |
+
# NaN-pad + nanmax/nanmin = ignore out-of-bounds pixels at image borders
|
| 193 |
+
# (matches upstream `utils3d.numpy.max_pool_1d`).
|
| 194 |
+
padded = np.pad(depth.astype(np.float32), pad, mode="constant", constant_values=np.nan)
|
| 195 |
+
windows = np.lib.stride_tricks.sliding_window_view(padded, (kernel_size, kernel_size))
|
| 196 |
+
d_max = np.nanmax(windows, axis=(-2, -1))
|
| 197 |
+
d_min = np.nanmin(windows, axis=(-2, -1))
|
| 198 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 199 |
+
rel = (d_max - d_min) / depth
|
| 200 |
+
return np.nan_to_num(rel, nan=0.0, posinf=0.0, neginf=0.0) > rtol
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
|
| 204 |
+
mask_hw: np.ndarray, rtol: float = 0.04) -> str:
|
| 205 |
+
"""Build a UV-textured triangulated mesh and export to .glb (MoGe-2 recipe)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
H, W = pointmap_hwc.shape[:2]
|
| 207 |
image_native = image_pil_texture.resize((W, H), Image.LANCZOS)
|
| 208 |
|
|
|
|
| 209 |
z = pointmap_hwc[:, :, 2]
|
| 210 |
valid = mask_hw & np.isfinite(pointmap_hwc).all(axis=2) & (z > 0.05) & (z < 25.0)
|
| 211 |
+
valid &= ~_depth_edge(z, rtol=rtol)
|
| 212 |
+
|
| 213 |
idx_map = np.full((H, W), -1, dtype=np.int64)
|
| 214 |
yy, xx = np.where(valid)
|
| 215 |
idx_map[yy, xx] = np.arange(len(yy))
|
|
|
|
| 217 |
verts = pointmap_hwc[yy, xx].astype(np.float32)
|
| 218 |
uvs = np.stack([xx / max(W - 1, 1), yy / max(H - 1, 1)], axis=1).astype(np.float32)
|
| 219 |
|
| 220 |
+
# Quad order matches upstream `utils3d.numpy.image_mesh`: [TL, BL, BR, TR],
|
| 221 |
+
# split into triangles as fan from TL → [TL, BL, BR] and [TL, BR, TR].
|
| 222 |
+
tl = idx_map[:-1, :-1]; tr = idx_map[:-1, 1:]
|
| 223 |
+
bl = idx_map[1:, :-1]; br = idx_map[1:, 1:]
|
| 224 |
+
quad_valid = (tl != -1) & (tr != -1) & (bl != -1) & (br != -1)
|
| 225 |
+
tl_v, tr_v, bl_v, br_v = tl[quad_valid], tr[quad_valid], bl[quad_valid], br[quad_valid]
|
| 226 |
+
tri1 = np.stack([tl_v, bl_v, br_v], axis=1)
|
| 227 |
+
tri2 = np.stack([tl_v, br_v, tr_v], axis=1)
|
| 228 |
+
faces = np.concatenate([tri1, tri2], axis=0).astype(np.int64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
# MoGe-2: y/z flip on positions, v-flip on UVs.
|
| 231 |
+
verts = verts * np.array([1.0, -1.0, -1.0], dtype=np.float32)
|
|
|
|
| 232 |
centroid = verts.mean(axis=0).astype(np.float32) if len(verts) else np.zeros(3, np.float32)
|
| 233 |
verts = verts - centroid
|
|
|
|
| 234 |
uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
|
| 235 |
|
|
|
|
| 236 |
material = trimesh.visual.material.PBRMaterial(
|
| 237 |
baseColorTexture=image_native,
|
| 238 |
metallicFactor=0.5,
|
|
|
|
| 242 |
visual = trimesh.visual.texture.TextureVisuals(uv=uvs, material=material)
|
| 243 |
|
| 244 |
mesh = trimesh.Trimesh(
|
| 245 |
+
vertices=verts,
|
| 246 |
+
faces=faces,
|
| 247 |
+
vertex_normals=None, # MoGe-2 leaves this to the GLB consumer
|
| 248 |
+
visual=visual,
|
| 249 |
+
process=False,
|
| 250 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
|
| 253 |
mesh.export(out_path)
|
| 254 |
return out_path
|
| 255 |
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
# -----------------------------------------------------------------------------
|
| 258 |
# Gradio handler
|
| 259 |
|