omar-ah
/

vil-tracker

Model card Files Files and versions

xet

Community

omar-ah commited on 9 days ago

Commit

bb51611

verified ·

1 Parent(s): 3e094bb

Fix: mLSTM SiLU gate+activation, GroupNorm 192, stochastic depth 0.05, Hanning window

Browse files

Files changed (1) hide show

vil_tracker/models/heads.py +31 -2

vil_tracker/models/heads.py CHANGED Viewed

@@ -120,6 +120,7 @@ def decode_predictions(
     offset: torch.Tensor,
     search_size: int = 256,
     feat_size: int = 16,
 ) -> tuple:
     """Decode head outputs to bounding boxes.
@@ -129,6 +130,7 @@ def decode_predictions(
         offset: (B, 2, H, W) sub-pixel offset
         search_size: pixel size of search region
         feat_size: spatial size of feature map
     Returns:
         boxes: (B, 4) predicted boxes in [cx, cy, w, h] format, in pixels
@@ -137,8 +139,17 @@ def decode_predictions(
     B = heatmap.shape[0]
     stride = search_size / feat_size  # 256/16 = 16
-    # Find peak in heatmap
-    heatmap_flat = heatmap.view(B, -1)  # (B, H*W)
     scores, indices = heatmap_flat.max(dim=-1)  # (B,)
     scores = scores.sigmoid()
@@ -213,3 +224,21 @@ def generate_size_target(
         size_norm: (B, 2) normalized to [0, 1] relative to search region
     """
     return size.clamp(min=1) / search_size

     offset: torch.Tensor,
     search_size: int = 256,
     feat_size: int = 16,
+    hanning_window: torch.Tensor = None,
 ) -> tuple:
     """Decode head outputs to bounding boxes.
         offset: (B, 2, H, W) sub-pixel offset
         search_size: pixel size of search region
         feat_size: spatial size of feature map
+        hanning_window: optional (H, W) Hanning window for positional prior penalty
     Returns:
         boxes: (B, 4) predicted boxes in [cx, cy, w, h] format, in pixels
     B = heatmap.shape[0]
     stride = search_size / feat_size  # 256/16 = 16
+    # Apply Hanning window penalty to suppress false positives at search edges
+    heatmap_penalized = heatmap
+    if hanning_window is not None:
+        # hanning_window: (H, W) → broadcast to (1, 1, H, W)
+        hw = hanning_window.to(heatmap.device)
+        if hw.ndim == 2:
+            hw = hw.unsqueeze(0).unsqueeze(0)
+        heatmap_penalized = heatmap * hw
+    # Find peak in (penalized) heatmap
+    heatmap_flat = heatmap_penalized.view(B, -1)  # (B, H*W)
     scores, indices = heatmap_flat.max(dim=-1)  # (B,)
     scores = scores.sigmoid()
         size_norm: (B, 2) normalized to [0, 1] relative to search region
     """
     return size.clamp(min=1) / search_size
+def create_hanning_window(feat_size: int = 16) -> torch.Tensor:
+    """Create a 2D Hanning window for positional prior penalty.
+    Applied to the classification/heatmap score map before peak detection
+    during inference. Suppresses false positives near the edges of the
+    search region, where the target is unlikely to be (it should be near center).
+    Used by every SOTA tracker (OSTrack, SUTrack, SGLATrack, UETrack, DTPTrack).
+    Args:
+        feat_size: spatial size of feature map (16 for 256/16 stride)
+    Returns:
+        (feat_size, feat_size) Hanning window in [0, 1], peak=1 at center
+    """
+    hann_1d = torch.hann_window(feat_size, periodic=False)
+    hann_2d = hann_1d.unsqueeze(1) * hann_1d.unsqueeze(0)  # outer product
+    return hann_2d