Sequence training: pairs→K-frame clips, mLSTM memory carries across frames

4ba026e verified 8 days ago

26.7 kB

	"""
	Comprehensive test suite for ViL Tracker.

	16 tests covering all components:
	1. mLSTM Cell (LinearHeadwiseExpand correctness + param count)
	2. mLSTM Block (full block without MLP)
	3. TMoE MLP
	4. Backbone (standard, small depth)
	5. Backbone (with TMoE + integrated FiLM, medium depth)
	6. Prediction Heads
	7. FiLM Temporal Modulation
	8. Full Tracker (small depth for speed)
	9. Loss Functions (all 6)
	10. Kalman Filter (8-state, adaptive)
	11. Dataset (synthetic)
	12. Training Step (mini forward + backward with temporal)
	13. Model Summary (FULL depth=24, constraint check)
	14. Online Tracker (full inference pipeline)
	15. Augmentation pipeline
	16. ACL curriculum integration
	"""

	import sys
	import time
	import torch
	import numpy as np

	torch.manual_seed(42)
	np.random.seed(42)

	PASS = 0
	FAIL = 0

	def test(name, fn):
	global PASS, FAIL
	print(f"\nTest {PASS + FAIL + 1}: {name}...", flush=True)
	try:
	fn()
	PASS += 1
	print(f" ✅ PASSED")
	except Exception as e:
	FAIL += 1
	print(f" ❌ FAILED: {e}")
	import traceback
	traceback.print_exc()


	def count_params(model):
	return sum(p.numel() for p in model.parameters())


	# ============================================================
	# Test 1: mLSTM Cell
	# ============================================================
	def test_mlstm_cell():
	from vil_tracker.models.mlstm import mLSTMCell, LinearHeadwiseExpand

	# Test LinearHeadwiseExpand
	lhe = LinearHeadwiseExpand(768, num_heads=192, bias=False)
	lhe_params = count_params(lhe)
	assert lhe_params == 192 * 4 * 4, f"LHE params: {lhe_params} != {19244}"

	x = torch.randn(2, 10, 768)
	y = lhe(x)
	assert y.shape == (2, 10, 768), f"LHE output shape: {y.shape}"

	# Test full mLSTM cell
	cell = mLSTMCell(dim=384, proj_factor=2.0, qkv_proj_blocksize=4, num_heads=4)
	cell_params = count_params(cell)
	print(f" mLSTMCell params: {cell_params:,} ({cell_params/1e6:.3f}M)")

	# Should be ~920K, not 2.66M
	assert cell_params < 1_000_000, f"Cell has {cell_params:,} params (should be <1M)"
	assert cell_params > 800_000, f"Cell has {cell_params:,} params (should be >800K)"

	# Verify GroupNorm uses 192 groups (num_proj_heads), not 4 (num_heads)
	assert cell.outnorm.num_groups == 192, f"GroupNorm should have 192 groups, got {cell.outnorm.num_groups}"
	print(f" GroupNorm groups: {cell.outnorm.num_groups} (correct: per-projection-head)")

	x = torch.randn(2, 20, 384)
	y = cell(x)
	assert y.shape == (2, 20, 384), f"Cell output shape: {y.shape}"

	# Test reverse mode
	y_rev = cell(x, reverse=True)
	assert y_rev.shape == (2, 20, 384), f"Reverse output shape: {y_rev.shape}"
	# Forward and reverse should produce different results
	assert not torch.allclose(y, y_rev, atol=1e-3), "Forward and reverse should differ"

	test("mLSTM Cell (LinearHeadwiseExpand)", test_mlstm_cell)


	# ============================================================
	# Test 2: mLSTM Block
	# ============================================================
	def test_mlstm_block():
	from vil_tracker.models.mlstm import mLSTMBlock

	block = mLSTMBlock(dim=384, proj_factor=2.0, qkv_proj_blocksize=4,
	num_heads=4, mlp_ratio=4.0)
	params = count_params(block)
	print(f" mLSTMBlock params: {params:,} ({params/1e6:.3f}M)")

	# No separate MLP — should be ~920K, same as cell + LayerNorm
	assert params < 1_050_000, f"Block has {params:,} params (should be <1.05M without MLP)"

	x = torch.randn(2, 20, 384)
	y = block(x)
	assert y.shape == (2, 20, 384), f"Block output shape: {y.shape}"

	# Residual connection: output should be close-ish to input at init
	diff = (y - x).abs().mean().item()
	print(f" Residual diff from input: {diff:.4f}")

	test("mLSTM Block (no separate MLP)", test_mlstm_block)


	# ============================================================
	# Test 3: TMoE MLP
	# ============================================================
	def test_tmoe():
	from vil_tracker.models.backbone import TMoEMLP

	tmoe = TMoEMLP(dim=384, mlp_ratio=4.0, num_experts=4)
	params = count_params(tmoe)
	print(f" TMoEMLP params: {params:,} ({params/1e6:.3f}M)")

	x = torch.randn(2, 20, 384)
	y = tmoe(x)
	assert y.shape == (2, 20, 384), f"TMoE output shape: {y.shape}"

	# Test freezing shared expert
	tmoe.freeze_shared_expert()
	frozen = sum(1 for p in tmoe.shared_expert.parameters() if not p.requires_grad)
	total_shared = sum(1 for p in tmoe.shared_expert.parameters())
	assert frozen == total_shared, "Shared expert should be fully frozen"

	test("TMoE MLP", test_tmoe)


	# ============================================================
	# Test 4: Backbone (standard, small depth)
	# ============================================================
	def test_backbone_small():
	from vil_tracker.models.backbone import ViLBackbone

	backbone = ViLBackbone(dim=384, depth=4, patch_size=16, tmoe_blocks=0)
	params = count_params(backbone)
	print(f" Backbone (depth=4, no TMoE) params: {params:,} ({params/1e6:.3f}M)")

	template = torch.randn(2, 3, 128, 128)
	search = torch.randn(2, 3, 256, 256)

	t_feat, s_feat = backbone(template, search)
	assert t_feat.shape == (2, 64, 384), f"Template feat shape: {t_feat.shape}"
	assert s_feat.shape == (2, 256, 384), f"Search feat shape: {s_feat.shape}"

	test("Backbone (standard, depth=4)", test_backbone_small)


	# ============================================================
	# Test 5: Backbone with TMoE + integrated FiLM
	# ============================================================
	def test_backbone_tmoe_film():
	from vil_tracker.models.backbone import ViLBackbone
	from vil_tracker.models.film_temporal import TemporalModulationManager

	backbone = ViLBackbone(dim=384, depth=6, patch_size=16, tmoe_blocks=2,
	num_experts=4, film_interval=3)
	params = count_params(backbone)
	print(f" Backbone (depth=6, TMoE=2) params: {params:,} ({params/1e6:.3f}M)")

	# Create temporal modulation manager
	temporal_mod = TemporalModulationManager(dim=384, num_blocks=6, modulation_interval=3)

	template = torch.randn(1, 3, 128, 128)
	search = torch.randn(1, 3, 256, 256)

	# First pass: no temporal context yet
	t_feat, s_feat = backbone(template, search, temporal_mod_manager=temporal_mod)
	assert t_feat.shape == (1, 64, 384), f"Template feat shape: {t_feat.shape}"
	assert s_feat.shape == (1, 256, 384), f"Search feat shape: {s_feat.shape}"

	# Second pass: temporal context should be active now
	t_feat2, s_feat2 = backbone(template, search, temporal_mod_manager=temporal_mod)
	# Output should differ when temporal modulation is active
	assert t_feat2.shape == (1, 64, 384)
	print(f" FiLM modulation active: features differ = {not torch.allclose(t_feat, t_feat2, atol=1e-5)}")

	test("Backbone (TMoE + integrated FiLM)", test_backbone_tmoe_film)


	# ============================================================
	# Test 6: Prediction Heads
	# ============================================================
	def test_heads():
	from vil_tracker.models.heads import CenterHead, UncertaintyHead, decode_predictions, create_hanning_window

	center_head = CenterHead(dim=384, feat_size=16)
	unc_head = UncertaintyHead(dim=384, feat_size=16)

	print(f" CenterHead params: {count_params(center_head):,}")
	print(f" UncertaintyHead params: {count_params(unc_head):,}")

	search_feat = torch.randn(2, 256, 384)
	preds = center_head(search_feat)

	assert preds['heatmap'].shape == (2, 1, 16, 16), f"Heatmap shape: {preds['heatmap'].shape}"
	assert preds['size'].shape == (2, 2, 16, 16), f"Size shape: {preds['size'].shape}"
	assert preds['offset'].shape == (2, 2, 16, 16), f"Offset shape: {preds['offset'].shape}"

	# Decode without Hanning
	boxes, scores = decode_predictions(preds['heatmap'], preds['size'], preds['offset'])
	assert boxes.shape == (2, 4), f"Boxes shape: {boxes.shape}"
	assert scores.shape == (2,), f"Scores shape: {scores.shape}"

	# Decode WITH Hanning window
	hann = create_hanning_window(16)
	assert hann.shape == (16, 16), f"Hanning shape: {hann.shape}"
	assert abs(hann[8, 8].item() - 1.0) < 0.05, f"Hanning center should be ~1.0, got {hann[8, 8]}"
	assert hann[0, 0].item() < 0.01, f"Hanning corner should be ~0, got {hann[0, 0]}"

	boxes_h, scores_h = decode_predictions(preds['heatmap'], preds['size'], preds['offset'],
	hanning_window=hann)
	assert boxes_h.shape == (2, 4), f"Hanning boxes shape: {boxes_h.shape}"
	print(f" Hanning window: center={hann[8,8]:.3f}, corner={hann[0,0]:.6f}")
	print(f" Without Hanning: box={boxes[0].tolist()}, score={scores[0].item():.4f}")
	print(f" With Hanning: box={boxes_h[0].tolist()}, score={scores_h[0].item():.4f}")

	# Uncertainty
	log_var = unc_head(search_feat)
	assert log_var.shape == (2, 1, 16, 16), f"Log variance shape: {log_var.shape}"

	test("Prediction Heads", test_heads)


	# ============================================================
	# Test 7: FiLM Temporal Modulation
	# ============================================================
	def test_film():
	from vil_tracker.models.film_temporal import (
	TemporalReliabilityCalibrator,
	FiLMTemporalModulation,
	TemporalModulationManager,
	)

	# Test individual components
	calib = TemporalReliabilityCalibrator(384)
	film = FiLMTemporalModulation(384)

	x = torch.randn(2, 20, 384)
	tc = torch.randn(2, 20, 384)

	rel = calib(tc)
	assert rel.shape == (2, 20, 1), f"Reliability shape: {rel.shape}"
	assert (rel >= 0).all() and (rel <= 1).all(), "Reliability not in [0,1]"

	modulated = film(x, tc, rel)
	assert modulated.shape == (2, 20, 384), f"Modulated shape: {modulated.shape}"

	# Test manager
	manager = TemporalModulationManager(dim=384, num_blocks=24, modulation_interval=6)
	print(f" TemporalModulationManager params: {count_params(manager):,}")

	# First call: no temporal context yet, should return unchanged
	y = manager.modulate(x, block_idx=5)
	assert torch.allclose(y, x), "Should return unchanged without temporal context"

	# Update context and try again
	manager.update_temporal_context(x)
	y = manager.modulate(x, block_idx=5) # block 5 → (5+1)%6==0, should modulate
	assert y.shape == (2, 20, 384)

	# Test reset
	manager.reset()
	y = manager.modulate(x, block_idx=5)
	assert torch.allclose(y, x), "After reset, should return unchanged"

	test("FiLM Temporal Modulation", test_film)


	# ============================================================
	# Test 8: Full Tracker (small depth for speed)
	# ============================================================
	def test_full_tracker_small():
	from vil_tracker.models.tracker import ViLTracker, get_default_config

	config = get_default_config()
	config['depth'] = 4
	config['tmoe_blocks'] = 1
	config['film_interval'] = 2

	tracker = ViLTracker(config)
	params = count_params(tracker)
	print(f" Tracker (depth=4) params: {params:,} ({params/1e6:.3f}M)")

	B, K = 2, 3
	template = torch.randn(B, 3, 128, 128)

	# Test single-frame (backward compat)
	search_single = torch.randn(B, 3, 256, 256)
	output_s = tracker(template, search_single, use_temporal=False)
	assert output_s['heatmap'].shape == (B, 1, 16, 16), f"Single heatmap: {output_s['heatmap'].shape}"
	assert output_s['boxes'].shape == (B, 4), f"Single boxes: {output_s['boxes'].shape}"
	assert output_s['scores'].shape == (B,), f"Single scores: {output_s['scores'].shape}"
	print(f" Single-frame: boxes={output_s['boxes'][0].tolist()}")

	# Test multi-frame sequence
	searches = torch.randn(B, K, 3, 256, 256)
	output_m = tracker(template, searches, use_temporal=True)
	assert output_m['heatmap'].shape == (B, K, 1, 16, 16), f"Multi heatmap: {output_m['heatmap'].shape}"
	assert output_m['boxes'].shape == (B, K, 4), f"Multi boxes: {output_m['boxes'].shape}"
	assert output_m['scores'].shape == (B, K), f"Multi scores: {output_m['scores'].shape}"
	assert output_m['search_feats'].shape == (B, K, 256, 384), f"Multi feats: {output_m['search_feats'].shape}"
	print(f" Multi-frame (K={K}): frame 0 box={output_m['boxes'][0,0].tolist()}")
	print(f" frame 2 box={output_m['boxes'][0,2].tolist()}")

	tracker.reset_temporal()

	test("Full Tracker (single + multi-frame)", test_full_tracker_small)


	# ============================================================
	# Test 9: Loss Functions (all 6)
	# ============================================================
	def test_losses():
	from vil_tracker.training.losses import (
	FocalLoss, GIoULoss, UncertaintyNLLLoss,
	MemoryContrastiveLoss, AFKDDistillationLoss,
	ADWLoss, CombinedTrackingLoss,
	)

	B = 4

	# Focal loss
	focal = FocalLoss()
	pred_hm = torch.randn(B, 1, 16, 16)
	gt_hm = torch.zeros(B, 1, 16, 16)
	gt_hm[:, :, 8, 8] = 1.0
	fl = focal(pred_hm, gt_hm)
	print(f" Focal loss: {fl.item():.4f}")
	assert fl.item() > 0, "Focal loss should be positive"

	# GIoU loss
	giou = GIoULoss()
	pred_box = torch.tensor([[128.0, 128.0, 50.0, 50.0]] * B)
	gt_box = torch.tensor([[130.0, 130.0, 48.0, 48.0]] * B)
	gl = giou(pred_box, gt_box)
	print(f" GIoU loss: {gl.item():.4f}")
	assert 0 <= gl.item() <= 2, f"GIoU loss out of range: {gl.item()}"

	# Uncertainty NLL loss
	unc = UncertaintyNLLLoss()
	pred_v = torch.randn(B, 4)
	target_v = torch.randn(B, 4)
	log_var = torch.zeros(B, 4) # unit variance
	ul = unc(pred_v, target_v, log_var)
	print(f" Uncertainty NLL loss: {ul.item():.4f}")
	assert ul.item() > 0

	# Contrastive loss
	contrastive = MemoryContrastiveLoss()
	feat_a = torch.randn(B, 384)
	feat_b = feat_a + torch.randn(B, 384) * 0.1
	cl = contrastive(feat_a, feat_b)
	print(f" Contrastive loss: {cl.item():.4f}")

	# AFKD distillation loss
	afkd = AFKDDistillationLoss(student_dim=384, teacher_dim=768)
	student_feat = torch.randn(B, 256, 384)
	teacher_feat = torch.randn(B, 256, 768)
	dl = afkd(student_feat, teacher_feat)
	print(f" AFKD distillation loss: {dl.item():.4f}")
	assert dl.item() > 0

	# ADW loss
	adw = ADWLoss(num_tasks=3)
	losses = [torch.tensor(1.0), torch.tensor(0.5), torch.tensor(2.0)]
	al = adw(losses)
	print(f" ADW loss: {al.item():.4f}")

	# Combined loss
	combined = CombinedTrackingLoss()
	pred = {
	'heatmap': pred_hm,
	'size': torch.rand(B, 2, 16, 16),
	'boxes': pred_box,
	'log_variance': torch.randn(B, 1, 16, 16),
	}
	loss_dict = combined(pred, gt_hm, torch.tensor([[0.2, 0.2]] * B), gt_box)
	print(f" Combined loss: {loss_dict['total'].item():.4f}")
	assert loss_dict['total'].item() > 0

	test("Loss Functions (all 6)", test_losses)


	# ============================================================
	# Test 10: Kalman Filter
	# ============================================================
	def test_kalman():
	from vil_tracker.inference.kalman import KalmanFilter

	kf = KalmanFilter()
	assert not kf.initialized

	# Initialize
	init_box = np.array([100.0, 100.0, 50.0, 50.0])
	kf.initialize(init_box)
	assert kf.initialized

	# Predict + update cycle with moving target
	for i in range(10):
	pred = kf.predict()
	assert len(pred) == 4, f"Prediction length: {len(pred)}"

	# Simulate noisy measurement of linearly moving target
	noise = np.random.randn(4) * 2
	meas = init_box + np.array([i * 2, i * 1, 0, 0]) + noise
	kf.update(meas, uncertainty=1.0)

	state = kf.get_state()
	print(f" Final state: cx={state[0]:.1f}, cy={state[1]:.1f}, w={state[2]:.1f}, h={state[3]:.1f}")
	assert state[2] > 0 and state[3] > 0, "Width/height should be positive"

	# Test outlier rejection (chi-squared gating)
	kf.update(np.array([500.0, 500.0, 50.0, 50.0]), uncertainty=1.0) # Far outlier
	state_after = kf.get_state()
	# State should NOT have jumped to 500,500
	assert state_after[0] < 200, f"Outlier should be rejected, cx={state_after[0]}"

	test("Kalman Filter (8-state, adaptive)", test_kalman)


	# ============================================================
	# Test 11: Dataset (synthetic)
	# ============================================================
	def test_dataset():
	from vil_tracker.data.dataset import SyntheticTrackingDataset, TrackingDataset

	ds = SyntheticTrackingDataset(length=100, clip_length=3)
	assert len(ds) == 100

	sample = ds[0]
	assert sample['template'].shape == (3, 128, 128), f"Template shape: {sample['template'].shape}"
	assert sample['searches'].shape == (3, 3, 256, 256), f"Searches shape: {sample['searches'].shape}"
	assert sample['heatmaps'].shape == (3, 1, 16, 16), f"Heatmaps shape: {sample['heatmaps'].shape}"
	assert sample['sizes'].shape == (3, 2), f"Sizes shape: {sample['sizes'].shape}"
	assert sample['boxes'].shape == (3, 4), f"Boxes shape: {sample['boxes'].shape}"

	# Verify target moves across frames (not static)
	cx_f0 = sample['boxes'][0, 0].item()
	cx_f2 = sample['boxes'][2, 0].item()
	print(f" Frame 0 cx: {cx_f0:.1f}, Frame 2 cx: {cx_f2:.1f} (moving target)")

	# Check ACL difficulty changes motion magnitude
	ds.set_acl_difficulty(0.0)
	easy_sample = ds[42]
	ds.set_acl_difficulty(1.0)
	hard_sample = ds[42]
	print(f" Easy frame spread: {(easy_sample['boxes'][:, 0].max() - easy_sample['boxes'][:, 0].min()).item():.1f} px")
	print(f" Hard frame spread: {(hard_sample['boxes'][:, 0].max() - hard_sample['boxes'][:, 0].min()).item():.1f} px")

	# Test backward-compatible alias
	ds2 = TrackingDataset(synthetic=True, synthetic_length=50, clip_length=3)
	assert len(ds2) == 50
	sample2 = ds2[0]
	assert sample2['searches'].shape[0] == 3, "Clip length should be 3"

	test("Dataset (synthetic + backward compat)", test_dataset)


	# ============================================================
	# Test 12: Training Step (with temporal modulation)
	# ============================================================
	def test_training_step():
	from vil_tracker.models.tracker import ViLTracker, get_default_config
	from vil_tracker.training.losses import CombinedTrackingLoss, MemoryContrastiveLoss
	from vil_tracker.models.heads import generate_heatmap

	config = get_default_config()
	config['depth'] = 2
	config['tmoe_blocks'] = 0
	config['film_interval'] = 2

	model = ViLTracker(config)
	model.train()
	loss_fn = CombinedTrackingLoss()
	contrastive_loss = MemoryContrastiveLoss()
	optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

	B, K = 2, 3
	template = torch.randn(B, 3, 128, 128)
	searches = torch.randn(B, K, 3, 256, 256)

	# GT targets for K frames
	gt_heatmaps = torch.zeros(B, K, 1, 16, 16)
	gt_heatmaps[:, :, :, 8, 8] = 1.0 # center
	gt_sizes = torch.tensor([[[0.2, 0.3]] * K] * B)
	gt_boxes = torch.tensor([[[128.0, 128.0, 51.2, 76.8]] * K] * B)

	# Forward WITH temporal modulation, multi-frame
	pred = model(template, searches, use_temporal=True)

	assert pred['heatmap'].shape == (B, K, 1, 16, 16), f"Heatmap shape: {pred['heatmap'].shape}"
	assert pred['boxes'].shape == (B, K, 4), f"Boxes shape: {pred['boxes'].shape}"
	assert pred['scores'].shape == (B, K), f"Scores shape: {pred['scores'].shape}"
	assert pred['search_feats'].shape == (B, K, 256, 384), f"Search feats: {pred['search_feats'].shape}"

	# Accumulate loss over K frames
	total_loss = torch.tensor(0.0)
	for k in range(K):
	pred_k = {
	'heatmap': pred['heatmap'][:, k],
	'size': pred['size'][:, k],
	'boxes': pred['boxes'][:, k],
	}
	if 'log_variance' in pred:
	pred_k['log_variance'] = pred['log_variance'][:, k]
	loss_dict = loss_fn(pred_k, gt_heatmaps[:, k], gt_sizes[:, k], gt_boxes[:, k])
	total_loss = total_loss + loss_dict['total']
	total_loss = total_loss / K

	# Add contrastive loss
	t_pooled = pred['template_feat'].mean(dim=1)
	s_pooled = pred['search_feats'][:, -1].mean(dim=1)
	c_loss = contrastive_loss(t_pooled, s_pooled)
	total_loss = total_loss + 0.1 * c_loss

	# Backward
	total_loss.backward()

	has_grads = sum(1 for p in model.parameters() if p.grad is not None)
	total_params_count = sum(1 for p in model.parameters())
	print(f" Total loss: {total_loss.item():.4f} (K={K} frames, contr={c_loss.item():.4f})")
	print(f" Params with gradients: {has_grads}/{total_params_count}")

	optimizer.step()
	optimizer.zero_grad()

	assert total_loss.item() > 0
	assert has_grads > 0

	test("Training Step (K=3 sequence + contrastive)", test_training_step)


	# ============================================================
	# Test 13: Model Summary (FULL depth=24, constraint check)
	# ============================================================
	def test_model_summary():
	from vil_tracker.models.tracker import ViLTracker, get_default_config
	from vil_tracker.utils.helpers import print_model_summary

	config = get_default_config()
	model = ViLTracker(config)

	summary = print_model_summary(model, config)

	total_m = summary['total_params'] / 1e6

	# HARD CONSTRAINTS
	assert summary['param_ok'], f"FAIL: {total_m:.2f}M params exceeds 50M limit"
	assert summary['size_ok'], f"FAIL: {summary['size_fp16_mb']:.1f}MB exceeds 500MB limit"
	# GFLOPs is approximate, warn but don't fail if close
	if not summary['flop_ok']:
	print(f" ⚠️ GFLOPs estimate ({summary['gflops']:.2f}) exceeds 20, but this is approximate")

	test("Model Summary (full depth=24)", test_model_summary)


	# ============================================================
	# Test 14: Online Tracker (inference pipeline)
	# ============================================================
	def test_online_tracker():
	from vil_tracker.models.tracker import ViLTracker, get_default_config
	from vil_tracker.inference.online_tracker import OnlineTracker

	config = get_default_config()
	config['depth'] = 2
	config['tmoe_blocks'] = 0
	config['film_interval'] = 2

	model = ViLTracker(config)
	model.eval()

	tracker = OnlineTracker(model, device='cpu', template_size=128, search_size=256)

	# Simulate a sequence: 480x640 frames with a moving rectangle
	H, W = 480, 640
	init_bbox = [200, 200, 60, 80] # [x, y, w, h]

	# First frame
	frame0 = np.random.randint(0, 255, (H, W, 3), dtype=np.uint8)
	# Draw target
	x, y, w, h = init_bbox
	frame0[y:y+h, x:x+w] = [255, 0, 0] # Red rectangle

	tracker.initialize(frame0, init_bbox)

	# Track for 5 frames
	for i in range(1, 6):
	frame = np.random.randint(0, 255, (H, W, 3), dtype=np.uint8)
	# Move target
	nx = x + i * 5
	ny = y + i * 3
	frame[ny:ny+h, nx:nx+w] = [255, 0, 0]

	bbox = tracker.track(frame)
	assert len(bbox) == 4, f"Bbox should have 4 elements, got {len(bbox)}"
	assert all(isinstance(v, (int, float, np.floating)) for v in bbox), f"Bbox values: {bbox}"
	print(f" Frame {i}: predicted [{bbox[0]:.1f}, {bbox[1]:.1f}, {bbox[2]:.1f}, {bbox[3]:.1f}]")

	print(f" Online tracker completed 5-frame sequence")

	test("Online Tracker (inference pipeline)", test_online_tracker)


	# ============================================================
	# Test 15: Augmentation pipeline
	# ============================================================
	def test_augmentation():
	from vil_tracker.data.dataset import TrackingAugmentation

	aug = TrackingAugmentation(
	brightness=0.2,
	contrast=0.2,
	horizontal_flip_prob=1.0, # Force flip to test bbox update
	grayscale_prob=0.0,
	blur_prob=0.0,
	)

	template = torch.rand(3, 128, 128)
	search = torch.rand(3, 256, 256)
	bbox = torch.tensor([128.0, 128.0, 50.0, 50.0]) # [cx, cy, w, h]

	t_aug, s_aug, b_aug = aug(template, search, bbox)

	assert t_aug.shape == (3, 128, 128), f"Aug template shape: {t_aug.shape}"
	assert s_aug.shape == (3, 256, 256), f"Aug search shape: {s_aug.shape}"
	assert b_aug.shape == (4,), f"Aug bbox shape: {b_aug.shape}"

	# With flip_prob=1.0, cx should be flipped: new_cx = W - old_cx = 256 - 128 = 128
	print(f" Original bbox: {bbox.tolist()}")
	print(f" Augmented bbox: {b_aug.tolist()}")
	assert abs(b_aug[0].item() - (256 - 128)) < 1.0, f"Flipped cx should be ~128, got {b_aug[0]}"

	test("Augmentation pipeline", test_augmentation)


	# ============================================================
	# Test 16: ACL curriculum integration
	# ============================================================
	def test_acl_curriculum():
	from vil_tracker.data.dataset import SyntheticTrackingDataset

	ds = SyntheticTrackingDataset(length=100, acl_difficulty=0.0, clip_length=3)

	# Easy: targets barely move
	easy_spreads = []
	for i in range(20):
	sample = ds[i]
	spread = (sample['boxes'][:, 0].max() - sample['boxes'][:, 0].min()).item()
	easy_spreads.append(spread)

	ds.set_acl_difficulty(1.0)

	hard_spreads = []
	for i in range(20):
	sample = ds[i]
	spread = (sample['boxes'][:, 0].max() - sample['boxes'][:, 0].min()).item()
	hard_spreads.append(spread)

	avg_easy = np.mean(easy_spreads)
	avg_hard = np.mean(hard_spreads)

	print(f" Avg cx spread (easy, d=0.0): {avg_easy:.1f} px")
	print(f" Avg cx spread (hard, d=1.0): {avg_hard:.1f} px")
	print(f" Hard > Easy: {avg_hard > avg_easy}")

	test("ACL curriculum integration", test_acl_curriculum)


	# ============================================================
	# Summary
	# ============================================================
	print("\n" + "=" * 60)
	print(f"Results: {PASS}/{PASS + FAIL} tests passed")
	if FAIL > 0:
	print(f" ❌ {FAIL} test(s) FAILED")
	sys.exit(1)
	else:
	print(" ✅ All tests passed!")
	sys.exit(0)