Rawal Khirodkar commited on
Commit
ff46806
·
1 Parent(s): 0bf7027

Normal: encode RGB at native res, then PIL Lanczos upsample to input size (sharper than bilinear-on-floats)

Browse files
Files changed (1) hide show
  1. app.py +17 -10
app.py CHANGED
@@ -110,7 +110,9 @@ print("[startup] ready.")
110
  # Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
111
 
112
  def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
113
- h0, w0 = image_bgr.shape[:2]
 
 
114
  data = model.pipeline(dict(img=image_bgr)) # resize + pad
115
  data = model.data_preprocessor(data) # normalize + batch
116
  inputs, data_samples = data["inputs"], data["data_samples"]
@@ -125,8 +127,7 @@ def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
125
  pad_top : inputs.shape[2] - pad_bottom,
126
  pad_left : inputs.shape[3] - pad_right,
127
  ]
128
- normal = F.interpolate(normal, size=(h0, w0), mode="bilinear", align_corners=False)
129
- return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0) # (H, W, 3) in [-1, 1]
130
 
131
 
132
  def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
@@ -158,16 +159,22 @@ def predict(image: Image.Image, size: str, bg_mode: str):
158
  h0, w0 = image_rgb.shape[:2]
159
 
160
  model = _get_normal_model(size)
161
- normal = _estimate_normal(image_bgr, model) # (H, W, 3) in [-1, 1]
 
162
 
163
- raw = normal.copy()
164
  if bg_mode == "fg-bg":
165
- mask = _foreground_mask(image_pil, h0, w0)
166
- raw[~mask] = np.nan
167
- normal[~mask] = -1.0 # → RGB(0,0,0) after vis
168
- rgb = _normal_to_rgb(normal)
169
- rgb_pil = Image.fromarray(rgb)
170
 
 
 
 
 
 
 
 
171
  npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
172
  np.save(npy_path, raw.astype(np.float32))
173
 
 
110
  # Inference (mirrors sapiens/dense/tools/vis/vis_normal.py)
111
 
112
  def _estimate_normal(image_bgr: np.ndarray, model) -> np.ndarray:
113
+ """Returns unit-length normals at the model's NATIVE (post-unpad) resolution
114
+ — no upsampling here. The caller upsamples the encoded RGB image instead,
115
+ which gives sharper edges than bilinear-upsampling the raw float vectors."""
116
  data = model.pipeline(dict(img=image_bgr)) # resize + pad
117
  data = model.data_preprocessor(data) # normalize + batch
118
  inputs, data_samples = data["inputs"], data["data_samples"]
 
127
  pad_top : inputs.shape[2] - pad_bottom,
128
  pad_left : inputs.shape[3] - pad_right,
129
  ]
130
+ return normal.squeeze(0).cpu().float().numpy().transpose(1, 2, 0) # (H_native, W_native, 3)
 
131
 
132
 
133
  def _foreground_mask(image_pil: Image.Image, target_h: int, target_w: int) -> np.ndarray:
 
159
  h0, w0 = image_rgb.shape[:2]
160
 
161
  model = _get_normal_model(size)
162
+ normal_native = _estimate_normal(image_bgr, model) # (H_native, W_native, 3) in [-1, 1]
163
+ h_n, w_n = normal_native.shape[:2]
164
 
 
165
  if bg_mode == "fg-bg":
166
+ # Mask is computed at native resolution to keep things fast and
167
+ # consistent with the normal map's actual pixel grid.
168
+ mask_native = _foreground_mask(image_pil, h_n, w_n)
169
+ normal_native[~mask_native] = -1.0 # → RGB(0,0,0) after vis
 
170
 
171
+ rgb_native = _normal_to_rgb(normal_native) # (H_native, W_native, 3) uint8
172
+ rgb_pil = Image.fromarray(rgb_native).resize((w0, h0), Image.LANCZOS) # upsample IMAGE, lanczos
173
+
174
+ # Save raw normals at native resolution (NaN where bg removed for completeness).
175
+ raw = normal_native.copy()
176
+ if bg_mode == "fg-bg":
177
+ raw[~mask_native] = np.nan
178
  npy_path = tempfile.NamedTemporaryFile(delete=False, suffix=".npy").name
179
  np.save(npy_path, raw.astype(np.float32))
180