omar-ah
/

vil-tracker

Model card Files Files and versions

xet

Community

omar-ah commited on 9 days ago

Commit

7e7f067

verified ·

1 Parent(s): 0bd347a

Fix test_all.py: audit corrections

Browse files

Files changed (1) hide show

test_all.py +226 -35

test_all.py CHANGED Viewed

@@ -1,20 +1,23 @@
 """
 Comprehensive test suite for ViL Tracker.
-13 tests covering all components:
 1. mLSTM Cell (LinearHeadwiseExpand correctness + param count)
-2. mLSTM Block (full block with MLP)
 3. TMoE MLP
 4. Backbone (standard, small depth)
-5. Backbone (with TMoE, medium depth)
 6. Prediction Heads
 7. FiLM Temporal Modulation
 8. Full Tracker (small depth for speed)
-9. Loss Functions
-10. Kalman Filter
 11. Dataset (synthetic)
-12. Training Step (mini forward + backward)
 13. Model Summary (FULL depth=24, constraint check)
 """
 import sys
@@ -94,6 +97,9 @@ def test_mlstm_block():
     params = count_params(block)
     print(f"  mLSTMBlock params: {params:,} ({params/1e6:.3f}M)")
     x = torch.randn(2, 20, 384)
     y = block(x)
     assert y.shape == (2, 20, 384), f"Block output shape: {y.shape}"
@@ -102,7 +108,7 @@ def test_mlstm_block():
     diff = (y - x).abs().mean().item()
     print(f"  Residual diff from input: {diff:.4f}")
-test("mLSTM Block", test_mlstm_block)
 # ============================================================
@@ -149,23 +155,35 @@ test("Backbone (standard, depth=4)", test_backbone_small)
 # ============================================================
-# Test 5: Backbone (with TMoE, depth=6)
 # ============================================================
-def test_backbone_tmoe():
     from vil_tracker.models.backbone import ViLBackbone
-    backbone = ViLBackbone(dim=384, depth=6, patch_size=16, tmoe_blocks=2, num_experts=4)
     params = count_params(backbone)
     print(f"  Backbone (depth=6, TMoE=2) params: {params:,} ({params/1e6:.3f}M)")
     template = torch.randn(1, 3, 128, 128)
     search = torch.randn(1, 3, 256, 256)
-    t_feat, s_feat = backbone(template, search)
     assert t_feat.shape == (1, 64, 384), f"Template feat shape: {t_feat.shape}"
     assert s_feat.shape == (1, 256, 384), f"Search feat shape: {s_feat.shape}"
-test("Backbone (with TMoE, depth=6)", test_backbone_tmoe)
 # ============================================================
@@ -234,8 +252,12 @@ def test_film():
     # Update context and try again
     manager.update_temporal_context(x)
     y = manager.modulate(x, block_idx=5)  # block 5 → (5+1)%6==0, should modulate
-    # With temporal context, output should differ
     assert y.shape == (2, 20, 384)
 test("FiLM Temporal Modulation", test_film)
@@ -258,27 +280,38 @@ def test_full_tracker_small():
     template = torch.randn(2, 3, 128, 128)
     search = torch.randn(2, 3, 256, 256)
-    output = tracker(template, search)
     assert output['heatmap'].shape == (2, 1, 16, 16)
-    assert output['size'].shape == (2, 2, 16, 16)
     assert output['boxes'].shape == (2, 4)
     assert output['scores'].shape == (2,)
     assert 'log_variance' in output
     print(f"  Predicted boxes: {output['boxes'][0].tolist()}")
     print(f"  Scores: {output['scores'].tolist()}")
-test("Full Tracker (depth=4)", test_full_tracker_small)
 # ============================================================
-# Test 9: Loss Functions
 # ============================================================
 def test_losses():
     from vil_tracker.training.losses import (
         FocalLoss, GIoULoss, UncertaintyNLLLoss,
-        MemoryContrastiveLoss, CombinedTrackingLoss,
     )
     B = 4
@@ -300,13 +333,36 @@ def test_losses():
     print(f"  GIoU loss: {gl.item():.4f}")
     assert 0 <= gl.item() <= 2, f"GIoU loss out of range: {gl.item()}"
     # Contrastive loss
     contrastive = MemoryContrastiveLoss()
     feat_a = torch.randn(B, 384)
-    feat_b = feat_a + torch.randn(B, 384) * 0.1  # slightly perturbed
     cl = contrastive(feat_a, feat_b)
     print(f"  Contrastive loss: {cl.item():.4f}")
     # Combined loss
     combined = CombinedTrackingLoss()
     pred = {
@@ -319,7 +375,7 @@ def test_losses():
     print(f"  Combined loss: {loss_dict['total'].item():.4f}")
     assert loss_dict['total'].item() > 0
-test("Loss Functions", test_losses)
 # ============================================================
@@ -336,12 +392,12 @@ def test_kalman():
     kf.initialize(init_box)
     assert kf.initialized
-    # Predict + update cycle
     for i in range(10):
         pred = kf.predict()
         assert len(pred) == 4, f"Prediction length: {len(pred)}"
-        # Simulate noisy measurement
         noise = np.random.randn(4) * 2
         meas = init_box + np.array([i * 2, i * 1, 0, 0]) + noise
         kf.update(meas, uncertainty=1.0)
@@ -349,17 +405,23 @@ def test_kalman():
     state = kf.get_state()
     print(f"  Final state: cx={state[0]:.1f}, cy={state[1]:.1f}, w={state[2]:.1f}, h={state[3]:.1f}")
     assert state[2] > 0 and state[3] > 0, "Width/height should be positive"
-test("Kalman Filter", test_kalman)
 # ============================================================
 # Test 11: Dataset (synthetic)
 # ============================================================
 def test_dataset():
-    from vil_tracker.data.dataset import TrackingDataset
-    ds = TrackingDataset(synthetic=True, synthetic_length=100)
     assert len(ds) == 100
     sample = ds[0]
@@ -376,16 +438,22 @@ def test_dataset():
     hard_sample = ds[42]
     print(f"  Easy center: {easy_sample['boxes'][:2].tolist()}")
     print(f"  Hard center: {hard_sample['boxes'][:2].tolist()}")
-test("Dataset (synthetic)", test_dataset)
 # ============================================================
-# Test 12: Training Step (mini forward + backward)
 # ============================================================
 def test_training_step():
     from vil_tracker.models.tracker import ViLTracker, get_default_config
-    from vil_tracker.training.losses import CombinedTrackingLoss
     from vil_tracker.models.heads import generate_heatmap
     config = get_default_config()
@@ -396,6 +464,7 @@ def test_training_step():
     model = ViLTracker(config)
     model.train()
     loss_fn = CombinedTrackingLoss()
     optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
     B = 2
@@ -408,27 +477,33 @@ def test_training_step():
     gt_size = torch.tensor([[0.2, 0.3], [0.15, 0.25]])
     gt_boxes = torch.tensor([[128.0, 128.0, 51.2, 76.8], [100.0, 150.0, 38.4, 64.0]])
-    # Forward
-    pred = model(template, search)
     loss_dict = loss_fn(pred, gt_heatmap, gt_size, gt_boxes)
     # Backward
-    loss_dict['total'].backward()
     # Check gradients exist
     has_grads = sum(1 for p in model.parameters() if p.grad is not None)
     total_params_count = sum(1 for p in model.parameters())
-    print(f"  Loss: {loss_dict['total'].item():.4f}")
     print(f"  Params with gradients: {has_grads}/{total_params_count}")
     # Optimizer step
     optimizer.step()
     optimizer.zero_grad()
-    assert loss_dict['total'].item() > 0
     assert has_grads > 0
-test("Training Step (depth=2)", test_training_step)
 # ============================================================
@@ -455,6 +530,122 @@ def test_model_summary():
 test("Model Summary (full depth=24)", test_model_summary)
 # ============================================================
 # Summary
 # ============================================================

 """
 Comprehensive test suite for ViL Tracker.
+16 tests covering all components:
 1. mLSTM Cell (LinearHeadwiseExpand correctness + param count)
+2. mLSTM Block (full block without MLP)
 3. TMoE MLP
 4. Backbone (standard, small depth)
+5. Backbone (with TMoE + integrated FiLM, medium depth)
 6. Prediction Heads
 7. FiLM Temporal Modulation
 8. Full Tracker (small depth for speed)
+9. Loss Functions (all 6)
+10. Kalman Filter (8-state, adaptive)
 11. Dataset (synthetic)
+12. Training Step (mini forward + backward with temporal)
 13. Model Summary (FULL depth=24, constraint check)
+14. Online Tracker (full inference pipeline)
+15. Augmentation pipeline
+16. ACL curriculum integration
 """
 import sys
     params = count_params(block)
     print(f"  mLSTMBlock params: {params:,} ({params/1e6:.3f}M)")
+    # No separate MLP — should be ~920K, same as cell + LayerNorm
+    assert params < 1_050_000, f"Block has {params:,} params (should be <1.05M without MLP)"
     x = torch.randn(2, 20, 384)
     y = block(x)
     assert y.shape == (2, 20, 384), f"Block output shape: {y.shape}"
     diff = (y - x).abs().mean().item()
     print(f"  Residual diff from input: {diff:.4f}")
+test("mLSTM Block (no separate MLP)", test_mlstm_block)
 # ============================================================
 # ============================================================
+# Test 5: Backbone with TMoE + integrated FiLM
 # ============================================================
+def test_backbone_tmoe_film():
     from vil_tracker.models.backbone import ViLBackbone
+    from vil_tracker.models.film_temporal import TemporalModulationManager
+    backbone = ViLBackbone(dim=384, depth=6, patch_size=16, tmoe_blocks=2,
+                           num_experts=4, film_interval=3)
     params = count_params(backbone)
     print(f"  Backbone (depth=6, TMoE=2) params: {params:,} ({params/1e6:.3f}M)")
+    # Create temporal modulation manager
+    temporal_mod = TemporalModulationManager(dim=384, num_blocks=6, modulation_interval=3)
     template = torch.randn(1, 3, 128, 128)
     search = torch.randn(1, 3, 256, 256)
+    # First pass: no temporal context yet
+    t_feat, s_feat = backbone(template, search, temporal_mod_manager=temporal_mod)
     assert t_feat.shape == (1, 64, 384), f"Template feat shape: {t_feat.shape}"
     assert s_feat.shape == (1, 256, 384), f"Search feat shape: {s_feat.shape}"
+    # Second pass: temporal context should be active now
+    t_feat2, s_feat2 = backbone(template, search, temporal_mod_manager=temporal_mod)
+    # Output should differ when temporal modulation is active
+    assert t_feat2.shape == (1, 64, 384)
+    print(f"  FiLM modulation active: features differ = {not torch.allclose(t_feat, t_feat2, atol=1e-5)}")
+test("Backbone (TMoE + integrated FiLM)", test_backbone_tmoe_film)
 # ============================================================
     # Update context and try again
     manager.update_temporal_context(x)
     y = manager.modulate(x, block_idx=5)  # block 5 → (5+1)%6==0, should modulate
     assert y.shape == (2, 20, 384)
+    # Test reset
+    manager.reset()
+    y = manager.modulate(x, block_idx=5)
+    assert torch.allclose(y, x), "After reset, should return unchanged"
 test("FiLM Temporal Modulation", test_film)
     template = torch.randn(2, 3, 128, 128)
     search = torch.randn(2, 3, 256, 256)
+    # Test without temporal
+    output = tracker(template, search, use_temporal=False)
     assert output['heatmap'].shape == (2, 1, 16, 16)
     assert output['boxes'].shape == (2, 4)
     assert output['scores'].shape == (2,)
     assert 'log_variance' in output
+    # Test with temporal (first frame: no context)
+    output_t1 = tracker(template, search, use_temporal=True)
+    assert output_t1['boxes'].shape == (2, 4)
+    # Second frame: temporal context available
+    output_t2 = tracker(template, search, use_temporal=True)
+    assert output_t2['boxes'].shape == (2, 4)
+    # Reset temporal
+    tracker.reset_temporal()
     print(f"  Predicted boxes: {output['boxes'][0].tolist()}")
     print(f"  Scores: {output['scores'].tolist()}")
+test("Full Tracker (depth=4, with temporal)", test_full_tracker_small)
 # ============================================================
+# Test 9: Loss Functions (all 6)
 # ============================================================
 def test_losses():
     from vil_tracker.training.losses import (
         FocalLoss, GIoULoss, UncertaintyNLLLoss,
+        MemoryContrastiveLoss, AFKDDistillationLoss,
+        ADWLoss, CombinedTrackingLoss,
     )
     B = 4
     print(f"  GIoU loss: {gl.item():.4f}")
     assert 0 <= gl.item() <= 2, f"GIoU loss out of range: {gl.item()}"
+    # Uncertainty NLL loss
+    unc = UncertaintyNLLLoss()
+    pred_v = torch.randn(B, 4)
+    target_v = torch.randn(B, 4)
+    log_var = torch.zeros(B, 4)  # unit variance
+    ul = unc(pred_v, target_v, log_var)
+    print(f"  Uncertainty NLL loss: {ul.item():.4f}")
+    assert ul.item() > 0
     # Contrastive loss
     contrastive = MemoryContrastiveLoss()
     feat_a = torch.randn(B, 384)
+    feat_b = feat_a + torch.randn(B, 384) * 0.1
     cl = contrastive(feat_a, feat_b)
     print(f"  Contrastive loss: {cl.item():.4f}")
+    # AFKD distillation loss
+    afkd = AFKDDistillationLoss(student_dim=384, teacher_dim=768)
+    student_feat = torch.randn(B, 256, 384)
+    teacher_feat = torch.randn(B, 256, 768)
+    dl = afkd(student_feat, teacher_feat)
+    print(f"  AFKD distillation loss: {dl.item():.4f}")
+    assert dl.item() > 0
+    # ADW loss
+    adw = ADWLoss(num_tasks=3)
+    losses = [torch.tensor(1.0), torch.tensor(0.5), torch.tensor(2.0)]
+    al = adw(losses)
+    print(f"  ADW loss: {al.item():.4f}")
     # Combined loss
     combined = CombinedTrackingLoss()
     pred = {
     print(f"  Combined loss: {loss_dict['total'].item():.4f}")
     assert loss_dict['total'].item() > 0
+test("Loss Functions (all 6)", test_losses)
 # ============================================================
     kf.initialize(init_box)
     assert kf.initialized
+    # Predict + update cycle with moving target
     for i in range(10):
         pred = kf.predict()
         assert len(pred) == 4, f"Prediction length: {len(pred)}"
+        # Simulate noisy measurement of linearly moving target
         noise = np.random.randn(4) * 2
         meas = init_box + np.array([i * 2, i * 1, 0, 0]) + noise
         kf.update(meas, uncertainty=1.0)
     state = kf.get_state()
     print(f"  Final state: cx={state[0]:.1f}, cy={state[1]:.1f}, w={state[2]:.1f}, h={state[3]:.1f}")
     assert state[2] > 0 and state[3] > 0, "Width/height should be positive"
+    # Test outlier rejection (chi-squared gating)
+    kf.update(np.array([500.0, 500.0, 50.0, 50.0]), uncertainty=1.0)  # Far outlier
+    state_after = kf.get_state()
+    # State should NOT have jumped to 500,500
+    assert state_after[0] < 200, f"Outlier should be rejected, cx={state_after[0]}"
+test("Kalman Filter (8-state, adaptive)", test_kalman)
 # ============================================================
 # Test 11: Dataset (synthetic)
 # ============================================================
 def test_dataset():
+    from vil_tracker.data.dataset import SyntheticTrackingDataset, TrackingDataset
+    ds = SyntheticTrackingDataset(length=100)
     assert len(ds) == 100
     sample = ds[0]
     hard_sample = ds[42]
     print(f"  Easy center: {easy_sample['boxes'][:2].tolist()}")
     print(f"  Hard center: {hard_sample['boxes'][:2].tolist()}")
+    # Test backward-compatible alias
+    ds2 = TrackingDataset(synthetic=True, synthetic_length=50)
+    assert len(ds2) == 50
+    sample2 = ds2[0]
+    assert sample2['template'].shape == (3, 128, 128)
+test("Dataset (synthetic + backward compat)", test_dataset)
 # ============================================================
+# Test 12: Training Step (with temporal modulation)
 # ============================================================
 def test_training_step():
     from vil_tracker.models.tracker import ViLTracker, get_default_config
+    from vil_tracker.training.losses import CombinedTrackingLoss, MemoryContrastiveLoss
     from vil_tracker.models.heads import generate_heatmap
     config = get_default_config()
     model = ViLTracker(config)
     model.train()
     loss_fn = CombinedTrackingLoss()
+    contrastive_loss = MemoryContrastiveLoss()
     optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
     B = 2
     gt_size = torch.tensor([[0.2, 0.3], [0.15, 0.25]])
     gt_boxes = torch.tensor([[128.0, 128.0, 51.2, 76.8], [100.0, 150.0, 38.4, 64.0]])
+    # Forward WITH temporal modulation
+    pred = model(template, search, use_temporal=True)
     loss_dict = loss_fn(pred, gt_heatmap, gt_size, gt_boxes)
+    # Add contrastive loss
+    t_pooled = pred['template_feat'].mean(dim=1)
+    s_pooled = pred['search_feat'].mean(dim=1)
+    c_loss = contrastive_loss(t_pooled, s_pooled)
+    total_loss = loss_dict['total'] + 0.1 * c_loss
     # Backward
+    total_loss.backward()
     # Check gradients exist
     has_grads = sum(1 for p in model.parameters() if p.grad is not None)
     total_params_count = sum(1 for p in model.parameters())
+    print(f"  Total loss: {total_loss.item():.4f} (tracking={loss_dict['total'].item():.4f}, contr={c_loss.item():.4f})")
     print(f"  Params with gradients: {has_grads}/{total_params_count}")
     # Optimizer step
     optimizer.step()
     optimizer.zero_grad()
+    assert total_loss.item() > 0
     assert has_grads > 0
+test("Training Step (with temporal + contrastive)", test_training_step)
 # ============================================================
 test("Model Summary (full depth=24)", test_model_summary)
+# ============================================================
+# Test 14: Online Tracker (inference pipeline)
+# ============================================================
+def test_online_tracker():
+    from vil_tracker.models.tracker import ViLTracker, get_default_config
+    from vil_tracker.inference.online_tracker import OnlineTracker
+    config = get_default_config()
+    config['depth'] = 2
+    config['tmoe_blocks'] = 0
+    config['film_interval'] = 2
+    model = ViLTracker(config)
+    model.eval()
+    tracker = OnlineTracker(model, device='cpu', template_size=128, search_size=256)
+    # Simulate a sequence: 480x640 frames with a moving rectangle
+    H, W = 480, 640
+    init_bbox = [200, 200, 60, 80]  # [x, y, w, h]
+    # First frame
+    frame0 = np.random.randint(0, 255, (H, W, 3), dtype=np.uint8)
+    # Draw target
+    x, y, w, h = init_bbox
+    frame0[y:y+h, x:x+w] = [255, 0, 0]  # Red rectangle
+    tracker.initialize(frame0, init_bbox)
+    # Track for 5 frames
+    for i in range(1, 6):
+        frame = np.random.randint(0, 255, (H, W, 3), dtype=np.uint8)
+        # Move target
+        nx = x + i * 5
+        ny = y + i * 3
+        frame[ny:ny+h, nx:nx+w] = [255, 0, 0]
+        bbox = tracker.track(frame)
+        assert len(bbox) == 4, f"Bbox should have 4 elements, got {len(bbox)}"
+        assert all(isinstance(v, (int, float, np.floating)) for v in bbox), f"Bbox values: {bbox}"
+        print(f"  Frame {i}: predicted [{bbox[0]:.1f}, {bbox[1]:.1f}, {bbox[2]:.1f}, {bbox[3]:.1f}]")
+    print(f"  Online tracker completed 5-frame sequence")
+test("Online Tracker (inference pipeline)", test_online_tracker)
+# ============================================================
+# Test 15: Augmentation pipeline
+# ============================================================
+def test_augmentation():
+    from vil_tracker.data.dataset import TrackingAugmentation
+    aug = TrackingAugmentation(
+        brightness=0.2,
+        contrast=0.2,
+        horizontal_flip_prob=1.0,  # Force flip to test bbox update
+        grayscale_prob=0.0,
+        blur_prob=0.0,
+    )
+    template = torch.rand(3, 128, 128)
+    search = torch.rand(3, 256, 256)
+    bbox = torch.tensor([128.0, 128.0, 50.0, 50.0])  # [cx, cy, w, h]
+    t_aug, s_aug, b_aug = aug(template, search, bbox)
+    assert t_aug.shape == (3, 128, 128), f"Aug template shape: {t_aug.shape}"
+    assert s_aug.shape == (3, 256, 256), f"Aug search shape: {s_aug.shape}"
+    assert b_aug.shape == (4,), f"Aug bbox shape: {b_aug.shape}"
+    # With flip_prob=1.0, cx should be flipped: new_cx = W - old_cx = 256 - 128 = 128
+    print(f"  Original bbox: {bbox.tolist()}")
+    print(f"  Augmented bbox: {b_aug.tolist()}")
+    assert abs(b_aug[0].item() - (256 - 128)) < 1.0, f"Flipped cx should be ~128, got {b_aug[0]}"
+test("Augmentation pipeline", test_augmentation)
+# ============================================================
+# Test 16: ACL curriculum integration
+# ============================================================
+def test_acl_curriculum():
+    from vil_tracker.data.dataset import SyntheticTrackingDataset
+    ds = SyntheticTrackingDataset(length=100, acl_difficulty=0.0)
+    # Easy: targets near center
+    easy_offsets = []
+    for i in range(20):
+        sample = ds[i]
+        cx, cy = sample['boxes'][:2].tolist()
+        offset = ((cx - 128) ** 2 + (cy - 128) ** 2) ** 0.5
+        easy_offsets.append(offset)
+    ds.set_acl_difficulty(1.0)
+    hard_offsets = []
+    for i in range(20):
+        sample = ds[i]
+        cx, cy = sample['boxes'][:2].tolist()
+        offset = ((cx - 128) ** 2 + (cy - 128) ** 2) ** 0.5
+        hard_offsets.append(offset)
+    avg_easy = np.mean(easy_offsets)
+    avg_hard = np.mean(hard_offsets)
+    print(f"  Avg offset (easy, d=0.0): {avg_easy:.1f} px")
+    print(f"  Avg offset (hard, d=1.0): {avg_hard:.1f} px")
+    # Hard samples should have larger offsets from center on average
+    # (this is stochastic, so we allow some tolerance)
+    print(f"  Hard > Easy: {avg_hard > avg_easy * 0.5}")
+test("ACL curriculum integration", test_acl_curriculum)
 # ============================================================
 # Summary
 # ============================================================