sapiens2-normal

Running

App Files Files Community

Rawal Khirodkar commited on 14 days ago

Commit

ff46806

1 Parent(s): 0bf7027

Normal: encode RGB at native res, then PIL Lanczos upsample to input size (sharper than bilinear-on-floats)

Browse files

Files changed (1) hide show

app.py +17 -10

app.py CHANGED Viewed

@@ -110,7 +110,9 @@ print("[startup] ready.")
 # Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
 def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
-    h0, w0 = image_bgr.shape[:2]
     data = model.pipeline(dict(img=image_bgr))         # resize + pad
     data = model.data_preprocessor(data)               # normalize + batch
     inputs, data_samples = data["inputs"], data["data_samples"]
@@ -125,8 +127,7 @@ def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
         pad_top : inputs.shape[2] - pad_bottom,
         pad_left : inputs.shape[3] - pad_right,
     ]
-    normal = F.interpolate(normal, size=(h0, w0), mode="bilinear", align_corners=False)
-    return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0)  # (H, W, 3) in [-1, 1]
 def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
@@ -158,16 +159,22 @@ def predict(image: Image.Image, size: str, bg_mode: str):
     h0, w0 = image_rgb.shape[:2]
     model = _get_normal_model(size)
-    normal = _estimate_normal(image_bgr, model)        # (H, W, 3) in [-1, 1]
-    raw = normal.copy()
     if bg_mode == "fg-bg":
-        mask = _foreground_mask(image_pil, h0, w0)
-        raw[~mask] = np.nan
-        normal[~mask] = -1.0                            # → RGB(0,0,0) after vis
-    rgb = _normal_to_rgb(normal)
-    rgb_pil = Image.fromarray(rgb)
     npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
     np.save(npy_path, raw.astype(np.float32))

 # Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
 def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
+    """Returns unit-length normals at the model's NATIVE (post-unpad) resolution
+    — no upsampling here. The caller upsamples the encoded RGB image instead,
+    which gives sharper edges than bilinear-upsampling the raw float vectors."""
     data = model.pipeline(dict(img=image_bgr))         # resize + pad
     data = model.data_preprocessor(data)               # normalize + batch
     inputs, data_samples = data["inputs"], data["data_samples"]
         pad_top : inputs.shape[2] - pad_bottom,
         pad_left : inputs.shape[3] - pad_right,
     ]
+    return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0)  # (H_native, W_native, 3)
 def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
     h0, w0 = image_rgb.shape[:2]
     model = _get_normal_model(size)
+    normal_native = _estimate_normal(image_bgr, model)  # (H_native, W_native, 3) in [-1, 1]
+    h_n, w_n = normal_native.shape[:2]
     if bg_mode == "fg-bg":
+        # Mask is computed at native resolution to keep things fast and
+        # consistent with the normal map's actual pixel grid.
+        mask_native = _foreground_mask(image_pil, h_n, w_n)
+        normal_native[~mask_native] = -1.0  # → RGB(0,0,0) after vis
+    rgb_native = _normal_to_rgb(normal_native)          # (H_native, W_native, 3) uint8
+    rgb_pil = Image.fromarray(rgb_native).resize((w0, h0), Image.LANCZOS)  # upsample IMAGE, lanczos
+    # Save raw normals at native resolution (NaN where bg removed for completeness).
+    raw = normal_native.copy()
+    if bg_mode == "fg-bg":
+        raw[~mask_native] = np.nan
     npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
     np.save(npy_path, raw.astype(np.float32))