Spaces:

facebook
/

sapiens2-pointmap

Running on Zero

App Files Files Community

Rawal Khirodkar commited on 14 days ago

Commit

6b72fc2

1 Parent(s): 9a37519

Pointmap: use original-res image as GLB texture (kills mesh-zoom pixelation); drop camera marker

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -225,11 +225,11 @@ def _triangulate_grid(pointmap_hwc: np.ndarray, mask_hw: np.ndarray,
     return verts, uvs, faces
-def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
               mask_hw: np.ndarray, max_edge: float = 0.04) -> str:
-    h, w = pointmap_hwc.shape[:2]
-    image_native = image_pil_native.resize((w, h), Image.LANCZOS)
     verts, uvs, faces = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
     # Y-up flip so the viewer's default orientation matches photographic intuition.
@@ -246,7 +246,7 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
     uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
     material = trimesh.visual.material.PBRMaterial(
-        baseColorTexture=image_native,
         metallicFactor=0.0,
         roughnessFactor=1.0,
         doubleSided=True,
@@ -256,12 +256,14 @@ def _make_glb(image_pil_native: Image.Image, pointmap_hwc: np.ndarray,
     mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
     _ = mesh.vertex_normals  # lazy compute → exported in glb
-    # Tiny camera marker only — at the (now-shifted) origin of the camera frame.
-    cam_v, cam_c = _camera_marker()
-    cam_v = cam_v * flip - centroid
-    aids = trimesh.PointCloud(vertices=cam_v, colors=cam_c)
-    scene = trimesh.Scene([mesh, aids])
     out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
     scene.export(out_path)
     return out_path
@@ -276,9 +278,10 @@ def predict(image: Image.Image, size: str):
         return None, None
     t0 = _t.perf_counter()
-    image_pil = _cap_height(image.convert("RGB"))                # cap to 1024px height
     image_bgr = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
-    print(f"[time] convert+cap     {(_t.perf_counter()-t0)*1000:.0f} ms  (input {image_pil.size})")
     t = _t.perf_counter()
     model = _get_pointmap_model(size)
@@ -299,7 +302,7 @@ def predict(image: Image.Image, size: str):
     print(f"[time] depth+resize    {(_t.perf_counter()-t)*1000:.0f} ms")
     t = _t.perf_counter()
-    glb_path = _make_glb(image_pil, pointmap, mask)
     print(f"[time] glb export      {(_t.perf_counter()-t)*1000:.0f} ms")
     print(f"[time] TOTAL           {(_t.perf_counter()-t0)*1000:.0f} ms")

     return verts, uvs, faces
+def _make_glb(image_pil_texture: Image.Image, pointmap_hwc: np.ndarray,
               mask_hw: np.ndarray, max_edge: float = 0.04) -> str:
+    """`image_pil_texture` should be the HIGHEST-resolution input available
+    (not the inference-capped one) — it's used as the GLB albedo so the mesh
+    surface stays smooth even when zoomed in past the mesh-vertex density."""
     verts, uvs, faces = _triangulate_grid(pointmap_hwc, mask_hw, max_edge=max_edge)
     # Y-up flip so the viewer's default orientation matches photographic intuition.
     uvs = uvs * np.array([1.0, -1.0], dtype=np.float32) + np.array([0.0, 1.0], dtype=np.float32)
     material = trimesh.visual.material.PBRMaterial(
+        baseColorTexture=image_pil_texture,
         metallicFactor=0.0,
         roughnessFactor=1.0,
         doubleSided=True,
     mesh = trimesh.Trimesh(vertices=verts, faces=faces, visual=visual, process=False)
     _ = mesh.vertex_normals  # lazy compute → exported in glb
+    # # Optional camera marker — disabled for now to keep the viewer focused on
+    # # the human only. Re-enable by uncommenting:
+    # cam_v, cam_c = _camera_marker()
+    # cam_v = cam_v * flip - centroid
+    # aids = trimesh.PointCloud(vertices=cam_v, colors=cam_c)
+    # scene = trimesh.Scene([mesh, aids])
+    scene = trimesh.Scene([mesh])
     out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".glb").name
     scene.export(out_path)
     return out_path
         return None, None
     t0 = _t.perf_counter()
+    image_pil_full = image.convert("RGB")                        # original-res (used as texture)
+    image_pil = _cap_height(image_pil_full)                      # capped (used for inference)
     image_bgr = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
+    print(f"[time] convert+cap     {(_t.perf_counter()-t0)*1000:.0f} ms  (input {image_pil.size}, texture {image_pil_full.size})")
     t = _t.perf_counter()
     model = _get_pointmap_model(size)
     print(f"[time] depth+resize    {(_t.perf_counter()-t)*1000:.0f} ms")
     t = _t.perf_counter()
+    glb_path = _make_glb(image_pil_full, pointmap, mask)
     print(f"[time] glb export      {(_t.perf_counter()-t)*1000:.0f} ms")
     print(f"[time] TOTAL           {(_t.perf_counter()-t0)*1000:.0f} ms")