omar-ah commited on
Commit
92a81c6
·
verified ·
1 Parent(s): bb51611

Fix: mLSTM SiLU gate+activation, GroupNorm 192, stochastic depth 0.05, Hanning window

Browse files
vil_tracker/inference/online_tracker.py CHANGED
@@ -22,6 +22,12 @@ from .kalman import KalmanFilter
22
  class OnlineTracker:
23
  """Online single-object tracker using ViL backbone.
24
 
 
 
 
 
 
 
25
  Usage:
26
  tracker = OnlineTracker(model, device='cuda')
27
  tracker.initialize(first_frame, init_bbox) # [x, y, w, h]
@@ -38,6 +44,7 @@ class OnlineTracker:
38
  search_scale: float = 4.0,
39
  confidence_threshold: float = 0.3,
40
  template_update_threshold: float = 0.8,
 
41
  ):
42
  self.model = model
43
  self.device = device
@@ -49,6 +56,14 @@ class OnlineTracker:
49
 
50
  self.model.eval()
51
 
 
 
 
 
 
 
 
 
52
  # State
53
  self.template = None
54
  self.kalman = KalmanFilter()
@@ -114,9 +129,18 @@ class OnlineTracker:
114
  use_temporal=(self.frame_count > 1),
115
  )
116
 
117
- # Extract predictions
118
- boxes = output['boxes'].cpu().numpy()[0] # [cx, cy, w, h] in search region
119
- score = output['scores'].cpu().item()
 
 
 
 
 
 
 
 
 
120
 
121
  # Map back to original frame coordinates
122
  scale_factor = self.search_scale * max(pred_sz) / self.search_size
 
22
  class OnlineTracker:
23
  """Online single-object tracker using ViL backbone.
24
 
25
+ Combines:
26
+ - Kalman filter for dynamic motion-model-based search centering (handles UAV ego-motion)
27
+ - Hanning window for positional prior penalty on heatmap (suppresses edge false positives)
28
+ - Uncertainty-adaptive Kalman measurement noise
29
+ - Confidence-gated template update
30
+
31
  Usage:
32
  tracker = OnlineTracker(model, device='cuda')
33
  tracker.initialize(first_frame, init_bbox) # [x, y, w, h]
 
44
  search_scale: float = 4.0,
45
  confidence_threshold: float = 0.3,
46
  template_update_threshold: float = 0.8,
47
+ use_hanning: bool = True,
48
  ):
49
  self.model = model
50
  self.device = device
 
56
 
57
  self.model.eval()
58
 
59
+ # Hanning window for positional prior (generated once, reused every frame)
60
+ feat_size = search_size // 16 # 256/16 = 16
61
+ if use_hanning:
62
+ from ..models.heads import create_hanning_window
63
+ self.hanning_window = create_hanning_window(feat_size).to(device)
64
+ else:
65
+ self.hanning_window = None
66
+
67
  # State
68
  self.template = None
69
  self.kalman = KalmanFilter()
 
129
  use_temporal=(self.frame_count > 1),
130
  )
131
 
132
+ # Extract predictions — re-decode with Hanning window for inference
133
+ from ..models.heads import decode_predictions
134
+ boxes_tensor, scores_tensor = decode_predictions(
135
+ output['heatmap'],
136
+ output['size'],
137
+ output['offset'],
138
+ search_size=self.search_size,
139
+ feat_size=self.search_size // 16,
140
+ hanning_window=self.hanning_window,
141
+ )
142
+ boxes = boxes_tensor.cpu().numpy()[0] # [cx, cy, w, h] in search region
143
+ score = scores_tensor.cpu().item()
144
 
145
  # Map back to original frame coordinates
146
  scale_factor = self.search_scale * max(pred_sz) / self.search_size