omar-ah
/

vil-tracker

Model card Files Files and versions

xet

Community

omar-ah commited on 9 days ago

Commit

8237685

verified ·

1 Parent(s): c08a3b0

Fix vil_tracker/models/tracker.py: audit corrections

Browse files

Files changed (1) hide show

vil_tracker/models/tracker.py +7 -13

vil_tracker/models/tracker.py CHANGED Viewed

@@ -4,7 +4,7 @@ ViL Tracker: Full model combining backbone, FiLM modulation, and prediction head
 Pipeline:
 1. Template (128x128) + Search (256x256) → PatchEmbed → tokens
 2. Concatenated tokens → ViL backbone (24 mLSTM blocks, bidirectional)
-3. FiLM temporal modulation at intervals (conditioned on prev frame)
 4. Search features → CenterHead → heatmap + size + offset
 5. Optional: UncertaintyHead → log variance for adaptive weighting
 """
@@ -68,7 +68,7 @@ class ViLTracker(nn.Module):
         dim = config['dim']
         depth = config['depth']
-        # Backbone
         self.backbone = ViLBackbone(
             dim=dim,
             depth=depth,
@@ -81,9 +81,10 @@ class ViLTracker(nn.Module):
             drop_path_rate=config['drop_path_rate'],
             tmoe_blocks=config['tmoe_blocks'],
             num_experts=config['num_experts'],
         )
-        # FiLM temporal modulation
         self.temporal_mod = TemporalModulationManager(
             dim=dim,
             num_blocks=depth,
@@ -113,16 +114,9 @@ class ViLTracker(nn.Module):
             dict with predictions: heatmap, size, offset, boxes, scores,
                                    and optionally uncertainty
         """
-        # Backbone forward
-        template_feat, search_feat = self.backbone(template, search)
-        # Optional FiLM temporal modulation on search features
-        if use_temporal:
-            for i in range(self.backbone.depth):
-                if self.temporal_mod.should_modulate(i):
-                    search_feat = self.temporal_mod.modulate(search_feat, i)
-            # Update temporal context for next frame
-            self.temporal_mod.update_temporal_context(search_feat)
         # Prediction heads
         preds = self.center_head(search_feat)

 Pipeline:
 1. Template (128x128) + Search (256x256) → PatchEmbed → tokens
 2. Concatenated tokens → ViL backbone (24 mLSTM blocks, bidirectional)
+3. FiLM temporal modulation integrated BETWEEN backbone blocks
 4. Search features → CenterHead → heatmap + size + offset
 5. Optional: UncertaintyHead → log variance for adaptive weighting
 """
         dim = config['dim']
         depth = config['depth']
+        # Backbone (now accepts temporal_mod_manager as forward arg)
         self.backbone = ViLBackbone(
             dim=dim,
             depth=depth,
             drop_path_rate=config['drop_path_rate'],
             tmoe_blocks=config['tmoe_blocks'],
             num_experts=config['num_experts'],
+            film_interval=config.get('film_interval', 6),
         )
+        # FiLM temporal modulation (applied BETWEEN backbone blocks)
         self.temporal_mod = TemporalModulationManager(
             dim=dim,
             num_blocks=depth,
             dict with predictions: heatmap, size, offset, boxes, scores,
                                    and optionally uncertainty
         """
+        # Backbone forward with optional integrated FiLM modulation
+        temporal_mgr = self.temporal_mod if use_temporal else None
+        template_feat, search_feat = self.backbone(template, search, temporal_mod_manager=temporal_mgr)
         # Prediction heads
         preds = self.center_head(search_feat)