omar-ah
/

vil-tracker

Model card Files Files and versions

xet

Community

omar-ah commited on 9 days ago

Commit

92a81c6

verified ·

1 Parent(s): bb51611

Fix: mLSTM SiLU gate+activation, GroupNorm 192, stochastic depth 0.05, Hanning window

Browse files

Files changed (1) hide show

vil_tracker/inference/online_tracker.py +27 -3

vil_tracker/inference/online_tracker.py CHANGED Viewed

@@ -22,6 +22,12 @@ from .kalman import KalmanFilter
 class OnlineTracker:
     """Online single-object tracker using ViL backbone.
     Usage:
         tracker = OnlineTracker(model, device='cuda')
         tracker.initialize(first_frame, init_bbox)  # [x, y, w, h]
@@ -38,6 +44,7 @@ class OnlineTracker:
         search_scale: float = 4.0,
         confidence_threshold: float = 0.3,
         template_update_threshold: float = 0.8,
     ):
         self.model = model
         self.device = device
@@ -49,6 +56,14 @@ class OnlineTracker:
         self.model.eval()
         # State
         self.template = None
         self.kalman = KalmanFilter()
@@ -114,9 +129,18 @@ class OnlineTracker:
                 use_temporal=(self.frame_count > 1),
             )
-        # Extract predictions
-        boxes = output['boxes'].cpu().numpy()[0]  # [cx, cy, w, h] in search region
-        score = output['scores'].cpu().item()
         # Map back to original frame coordinates
         scale_factor = self.search_scale * max(pred_sz) / self.search_size

 class OnlineTracker:
     """Online single-object tracker using ViL backbone.
+    Combines:
+    - Kalman filter for dynamic motion-model-based search centering (handles UAV ego-motion)
+    - Hanning window for positional prior penalty on heatmap (suppresses edge false positives)
+    - Uncertainty-adaptive Kalman measurement noise
+    - Confidence-gated template update
     Usage:
         tracker = OnlineTracker(model, device='cuda')
         tracker.initialize(first_frame, init_bbox)  # [x, y, w, h]
         search_scale: float = 4.0,
         confidence_threshold: float = 0.3,
         template_update_threshold: float = 0.8,
+        use_hanning: bool = True,
     ):
         self.model = model
         self.device = device
         self.model.eval()
+        # Hanning window for positional prior (generated once, reused every frame)
+        feat_size = search_size // 16  # 256/16 = 16
+        if use_hanning:
+            from ..models.heads import create_hanning_window
+            self.hanning_window = create_hanning_window(feat_size).to(device)
+        else:
+            self.hanning_window = None
         # State
         self.template = None
         self.kalman = KalmanFilter()
                 use_temporal=(self.frame_count > 1),
             )
+        # Extract predictions — re-decode with Hanning window for inference
+        from ..models.heads import decode_predictions
+        boxes_tensor, scores_tensor = decode_predictions(
+            output['heatmap'],
+            output['size'],
+            output['offset'],
+            search_size=self.search_size,
+            feat_size=self.search_size // 16,
+            hanning_window=self.hanning_window,
+        )
+        boxes = boxes_tensor.cpu().numpy()[0]  # [cx, cy, w, h] in search region
+        score = scores_tensor.cpu().item()
         # Map back to original frame coordinates
         scale_factor = self.search_scale * max(pred_sz) / self.search_size