Fix: mLSTM SiLU gate+activation, GroupNorm 192, stochastic depth 0.05, Hanning window
Browse files
vil_tracker/inference/online_tracker.py
CHANGED
|
@@ -22,6 +22,12 @@ from .kalman import KalmanFilter
|
|
| 22 |
class OnlineTracker:
|
| 23 |
"""Online single-object tracker using ViL backbone.
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
Usage:
|
| 26 |
tracker = OnlineTracker(model, device='cuda')
|
| 27 |
tracker.initialize(first_frame, init_bbox) # [x, y, w, h]
|
|
@@ -38,6 +44,7 @@ class OnlineTracker:
|
|
| 38 |
search_scale: float = 4.0,
|
| 39 |
confidence_threshold: float = 0.3,
|
| 40 |
template_update_threshold: float = 0.8,
|
|
|
|
| 41 |
):
|
| 42 |
self.model = model
|
| 43 |
self.device = device
|
|
@@ -49,6 +56,14 @@ class OnlineTracker:
|
|
| 49 |
|
| 50 |
self.model.eval()
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# State
|
| 53 |
self.template = None
|
| 54 |
self.kalman = KalmanFilter()
|
|
@@ -114,9 +129,18 @@ class OnlineTracker:
|
|
| 114 |
use_temporal=(self.frame_count > 1),
|
| 115 |
)
|
| 116 |
|
| 117 |
-
# Extract predictions
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Map back to original frame coordinates
|
| 122 |
scale_factor = self.search_scale * max(pred_sz) / self.search_size
|
|
|
|
| 22 |
class OnlineTracker:
|
| 23 |
"""Online single-object tracker using ViL backbone.
|
| 24 |
|
| 25 |
+
Combines:
|
| 26 |
+
- Kalman filter for dynamic motion-model-based search centering (handles UAV ego-motion)
|
| 27 |
+
- Hanning window for positional prior penalty on heatmap (suppresses edge false positives)
|
| 28 |
+
- Uncertainty-adaptive Kalman measurement noise
|
| 29 |
+
- Confidence-gated template update
|
| 30 |
+
|
| 31 |
Usage:
|
| 32 |
tracker = OnlineTracker(model, device='cuda')
|
| 33 |
tracker.initialize(first_frame, init_bbox) # [x, y, w, h]
|
|
|
|
| 44 |
search_scale: float = 4.0,
|
| 45 |
confidence_threshold: float = 0.3,
|
| 46 |
template_update_threshold: float = 0.8,
|
| 47 |
+
use_hanning: bool = True,
|
| 48 |
):
|
| 49 |
self.model = model
|
| 50 |
self.device = device
|
|
|
|
| 56 |
|
| 57 |
self.model.eval()
|
| 58 |
|
| 59 |
+
# Hanning window for positional prior (generated once, reused every frame)
|
| 60 |
+
feat_size = search_size // 16 # 256/16 = 16
|
| 61 |
+
if use_hanning:
|
| 62 |
+
from ..models.heads import create_hanning_window
|
| 63 |
+
self.hanning_window = create_hanning_window(feat_size).to(device)
|
| 64 |
+
else:
|
| 65 |
+
self.hanning_window = None
|
| 66 |
+
|
| 67 |
# State
|
| 68 |
self.template = None
|
| 69 |
self.kalman = KalmanFilter()
|
|
|
|
| 129 |
use_temporal=(self.frame_count > 1),
|
| 130 |
)
|
| 131 |
|
| 132 |
+
# Extract predictions — re-decode with Hanning window for inference
|
| 133 |
+
from ..models.heads import decode_predictions
|
| 134 |
+
boxes_tensor, scores_tensor = decode_predictions(
|
| 135 |
+
output['heatmap'],
|
| 136 |
+
output['size'],
|
| 137 |
+
output['offset'],
|
| 138 |
+
search_size=self.search_size,
|
| 139 |
+
feat_size=self.search_size // 16,
|
| 140 |
+
hanning_window=self.hanning_window,
|
| 141 |
+
)
|
| 142 |
+
boxes = boxes_tensor.cpu().numpy()[0] # [cx, cy, w, h] in search region
|
| 143 |
+
score = scores_tensor.cpu().item()
|
| 144 |
|
| 145 |
# Map back to original frame coordinates
|
| 146 |
scale_factor = self.search_scale * max(pred_sz) / self.search_size
|