Spaces:
Running
Running
Rawal Khirodkar commited on
Commit ·
ff46806
1
Parent(s): 0bf7027
Normal: encode RGB at native res, then PIL Lanczos upsample to input size (sharper than bilinear-on-floats)
Browse files
app.py
CHANGED
|
@@ -110,7 +110,9 @@ print("[startup] ready.")
|
|
| 110 |
# Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
|
| 111 |
|
| 112 |
def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
data = model.pipeline(dict(img=image_bgr)) # resize + pad
|
| 115 |
data = model.data_preprocessor(data) # normalize + batch
|
| 116 |
inputs, data_samples = data["inputs"], data["data_samples"]
|
|
@@ -125,8 +127,7 @@ def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
|
|
| 125 |
pad_top : inputs.shape[2] - pad_bottom,
|
| 126 |
pad_left : inputs.shape[3] - pad_right,
|
| 127 |
]
|
| 128 |
-
|
| 129 |
-
return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0) # (H, W, 3) in [-1, 1]
|
| 130 |
|
| 131 |
|
| 132 |
def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
|
|
@@ -158,16 +159,22 @@ def predict(image: Image.Image, size: str, bg_mode: str):
|
|
| 158 |
h0, w0 = image_rgb.shape[:2]
|
| 159 |
|
| 160 |
model = _get_normal_model(size)
|
| 161 |
-
|
|
|
|
| 162 |
|
| 163 |
-
raw = normal.copy()
|
| 164 |
if bg_mode == "fg-bg":
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
rgb_pil = Image.fromarray(rgb)
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
|
| 172 |
np.save(npy_path, raw.astype(np.float32))
|
| 173 |
|
|
|
|
| 110 |
# Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
|
| 111 |
|
| 112 |
def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
|
| 113 |
+
"""Returns unit-length normals at the model's NATIVE (post-unpad) resolution
|
| 114 |
+
— no upsampling here. The caller upsamples the encoded RGB image instead,
|
| 115 |
+
which gives sharper edges than bilinear-upsampling the raw float vectors."""
|
| 116 |
data = model.pipeline(dict(img=image_bgr)) # resize + pad
|
| 117 |
data = model.data_preprocessor(data) # normalize + batch
|
| 118 |
inputs, data_samples = data["inputs"], data["data_samples"]
|
|
|
|
| 127 |
pad_top : inputs.shape[2] - pad_bottom,
|
| 128 |
pad_left : inputs.shape[3] - pad_right,
|
| 129 |
]
|
| 130 |
+
return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0) # (H_native, W_native, 3)
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
|
|
|
|
| 159 |
h0, w0 = image_rgb.shape[:2]
|
| 160 |
|
| 161 |
model = _get_normal_model(size)
|
| 162 |
+
normal_native = _estimate_normal(image_bgr, model) # (H_native, W_native, 3) in [-1, 1]
|
| 163 |
+
h_n, w_n = normal_native.shape[:2]
|
| 164 |
|
|
|
|
| 165 |
if bg_mode == "fg-bg":
|
| 166 |
+
# Mask is computed at native resolution to keep things fast and
|
| 167 |
+
# consistent with the normal map's actual pixel grid.
|
| 168 |
+
mask_native = _foreground_mask(image_pil, h_n, w_n)
|
| 169 |
+
normal_native[~mask_native] = -1.0 # → RGB(0,0,0) after vis
|
|
|
|
| 170 |
|
| 171 |
+
rgb_native = _normal_to_rgb(normal_native) # (H_native, W_native, 3) uint8
|
| 172 |
+
rgb_pil = Image.fromarray(rgb_native).resize((w0, h0), Image.LANCZOS) # upsample IMAGE, lanczos
|
| 173 |
+
|
| 174 |
+
# Save raw normals at native resolution (NaN where bg removed for completeness).
|
| 175 |
+
raw = normal_native.copy()
|
| 176 |
+
if bg_mode == "fg-bg":
|
| 177 |
+
raw[~mask_native] = np.nan
|
| 178 |
npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
|
| 179 |
np.save(npy_path, raw.astype(np.float32))
|
| 180 |
|