Rawal Khirodkar
Initial sapiens2-pointmap Space (HF download at startup, all 4 sizes, 3D viewer)
bff20b3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from abc import ABCMeta
from typing import Optional, Tuple
import cv2
import numpy as np
from .utils import (
generate_offset_heatmap,
generate_udp_gaussian_heatmaps,
get_heatmap_maximum,
refine_keypoints_dark_udp,
)
class UDPHeatmap(metaclass=ABCMeta):
auxiliary_encode_keys = set()
def __init__(
self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
heatmap_type: str = "gaussian",
sigma: float = 2.0,
radius_factor: float = 0.0546875,
blur_kernel_size: int = 11,
) -> None:
super().__init__()
self.input_size = input_size
self.heatmap_size = heatmap_size
self.sigma = sigma
self.radius_factor = radius_factor
self.heatmap_type = heatmap_type
self.blur_kernel_size = blur_kernel_size
self.scale_factor = (
(np.array(input_size) - 1) / (np.array(heatmap_size) - 1)
).astype(np.float32)
if self.heatmap_type not in {"gaussian", "combined"}:
raise ValueError(
f"{self.__class__.__name__} got invalid `heatmap_type` value"
f"{self.heatmap_type}. Should be one of "
'{"gaussian", "combined"}'
)
def encode(
self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None
) -> dict:
assert keypoints.shape[0] == 1, (
f"{self.__class__.__name__} only support single-instance keypoint encoding"
)
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.heatmap_type == "gaussian":
heatmaps, keypoint_weights = generate_udp_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
sigma=self.sigma,
)
elif self.heatmap_type == "combined":
heatmaps, keypoint_weights = generate_offset_heatmap(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
radius_factor=self.radius_factor,
)
else:
raise ValueError(
f"{self.__class__.__name__} got invalid `heatmap_type` value"
f"{self.heatmap_type}. Should be one of "
'{"gaussian", "combined"}'
)
encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights)
return encoded
def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
heatmaps = encoded.copy()
if self.heatmap_type == "gaussian":
keypoints, scores = get_heatmap_maximum(heatmaps)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None]
scores = scores[None]
keypoints = refine_keypoints_dark_udp(
keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size
)
elif self.heatmap_type == "combined":
_K, H, W = heatmaps.shape
K = _K // 3
for cls_heatmap in heatmaps[::3]:
# Apply Gaussian blur on classification maps
ks = 2 * self.blur_kernel_size + 1
cv2.GaussianBlur(cls_heatmap, (ks, ks), 0, cls_heatmap)
# valid radius
radius = self.radius_factor * max(W, H)
x_offset = heatmaps[1::3].flatten() * radius
y_offset = heatmaps[2::3].flatten() * radius
keypoints, scores = get_heatmap_maximum(heatmaps=heatmaps[::3])
index = (keypoints[..., 0] + keypoints[..., 1] * W).flatten()
index += W * H * np.arange(0, K)
index = index.astype(int)
keypoints += np.stack((x_offset[index], y_offset[index]), axis=-1)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None].astype(np.float32)
scores = scores[None]
W, H = self.heatmap_size
keypoints = keypoints / [W - 1, H - 1] * self.input_size
return keypoints, scores