""" Benchmark evaluator for tracking datasets. Supports: - LaSOT: Large-scale Single Object Tracking (280 test sequences) - UAV123: UAV tracking at 123 fps (123 sequences) - DTB70: Drone Tracking Benchmark (70 sequences) - VisDrone-SOT: Vision meets Drone SOT Metrics: AUC (Success), Precision, Normalized Precision Dataset structure: LaSOT (test): root/{category}/{seq_name}/img/XXXXXXXX.jpg root/{category}/{seq_name}/groundtruth.txt UAV123: root/data_seq/UAV123/{seq_name}/*.jpg root/anno/UAV123/{seq_name}.txt DTB70: root/{seq_name}/img/*.jpg root/{seq_name}/groundtruth_rect.txt VisDrone-SOT (test-dev): root/sequences/{seq_name}/*.jpg root/annotations/{seq_name}.txt """ import os import glob import json import numpy as np from pathlib import Path from collections import defaultdict def compute_iou(box_a, box_b): """Compute IoU between two boxes in [x, y, w, h] format.""" xa1, ya1 = box_a[0], box_a[1] xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3] xb1, yb1 = box_b[0], box_b[1] xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3] inter_x1 = max(xa1, xb1) inter_y1 = max(ya1, yb1) inter_x2 = min(xa2, xb2) inter_y2 = min(ya2, yb2) inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1) area_a = box_a[2] * box_a[3] area_b = box_b[2] * box_b[3] union_area = area_a + area_b - inter_area return inter_area / max(union_area, 1e-6) def compute_center_distance(box_a, box_b): """Compute center distance between two boxes in [x, y, w, h] format.""" ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2]) cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2]) return np.linalg.norm(ca - cb) def compute_normalized_center_distance(box_a, box_b): """Compute center distance normalized by GT size (for normalized precision).""" ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2]) cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2]) dist = np.linalg.norm(ca - cb) # Normalize by GT diagonal gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6 return dist / gt_diag def compute_success_curve(ious, thresholds=None): """Compute success curve (fraction of frames with IoU > threshold).""" if thresholds is None: thresholds = np.arange(0, 1.05, 0.05) ious = np.array(ious) success = np.array([np.mean(ious >= t) for t in thresholds]) return thresholds, success def compute_auc(ious): """Compute AUC from IoU values (Area Under Success Curve).""" thresholds, success = compute_success_curve(ious) return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0]) def compute_precision(center_dists, threshold=20): """Compute precision at given pixel threshold.""" dists = np.array(center_dists) return np.mean(dists <= threshold) def compute_normalized_precision(norm_dists, threshold=0.5): """Compute normalized precision.""" dists = np.array(norm_dists) return np.mean(dists <= threshold) # ============================================================ # Dataset loaders # ============================================================ def load_annotations_txt(filepath): """Load annotations from a text file with x,y,w,h per line.""" boxes = [] with open(filepath, 'r') as f: for line in f: line = line.strip() if not line: boxes.append(None) continue parts = line.replace(',', ' ').replace('\t', ' ').split() try: vals = [float(x) for x in parts[:4]] # Skip zero-area boxes if vals[2] <= 0 or vals[3] <= 0: boxes.append(None) else: boxes.append(vals) except (ValueError, IndexError): boxes.append(None) return boxes def load_lasot_test(root): """Load LaSOT test sequences. Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt Test split: last 20% of sequences per category. """ root = Path(root) sequences = {} categories = sorted([d for d in root.iterdir() if d.is_dir()]) for cat_dir in categories: seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()]) # Test split: last 20% test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):] for seq_dir in test_seqs: gt_file = seq_dir / 'groundtruth.txt' img_dir = seq_dir / 'img' if not gt_file.exists() or not img_dir.exists(): continue gt_boxes = load_annotations_txt(str(gt_file)) frames = sorted(glob.glob(str(img_dir / '*.jpg'))) if len(frames) >= 2 and len(gt_boxes) >= 2: min_len = min(len(frames), len(gt_boxes)) seq_name = f"{cat_dir.name}/{seq_dir.name}" sequences[seq_name] = { 'frames': frames[:min_len], 'gt': gt_boxes[:min_len], } return sequences def load_uav123(root): """Load UAV123 sequences. Structure: root/data_seq/UAV123/{seq_name}/*.jpg root/anno/UAV123/{seq_name}.txt """ root = Path(root) sequences = {} anno_dir = root / 'anno' / 'UAV123' frame_dir = root / 'data_seq' / 'UAV123' if not anno_dir.exists(): # Alternative structure anno_dir = root / 'anno' frame_dir = root / 'data_seq' if not anno_dir.exists(): print(f"Warning: UAV123 annotations not found at {anno_dir}") return sequences for anno_file in sorted(anno_dir.glob('*.txt')): seq_name = anno_file.stem seq_frame_dir = frame_dir / seq_name if not seq_frame_dir.exists(): continue gt_boxes = load_annotations_txt(str(anno_file)) frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg'))) if not frames: frames = sorted(glob.glob(str(seq_frame_dir / '*.png'))) if len(frames) >= 2 and len(gt_boxes) >= 2: min_len = min(len(frames), len(gt_boxes)) sequences[seq_name] = { 'frames': frames[:min_len], 'gt': gt_boxes[:min_len], } return sequences def load_dtb70(root): """Load DTB70 sequences. Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt """ root = Path(root) sequences = {} for seq_dir in sorted(root.iterdir()): if not seq_dir.is_dir(): continue gt_file = seq_dir / 'groundtruth_rect.txt' if not gt_file.exists(): gt_file = seq_dir / 'groundtruth.txt' if not gt_file.exists(): continue img_dir = seq_dir / 'img' if not img_dir.exists(): img_dir = seq_dir # frames directly in seq dir gt_boxes = load_annotations_txt(str(gt_file)) frames = sorted(glob.glob(str(img_dir / '*.jpg'))) if not frames: frames = sorted(glob.glob(str(img_dir / '*.png'))) if len(frames) >= 2 and len(gt_boxes) >= 2: min_len = min(len(frames), len(gt_boxes)) sequences[seq_dir.name] = { 'frames': frames[:min_len], 'gt': gt_boxes[:min_len], } return sequences def load_visdrone_sot(root): """Load VisDrone-SOT sequences. Structure: root/sequences/{seq_name}/*.jpg root/annotations/{seq_name}.txt """ root = Path(root) sequences = {} anno_dir = root / 'annotations' seq_dir = root / 'sequences' if not anno_dir.exists() or not seq_dir.exists(): print(f"Warning: VisDrone-SOT not found at {root}") return sequences for anno_file in sorted(anno_dir.glob('*.txt')): seq_name = anno_file.stem frames_dir = seq_dir / seq_name if not frames_dir.exists(): continue gt_boxes = load_annotations_txt(str(anno_file)) frames = sorted(glob.glob(str(frames_dir / '*.jpg'))) if len(frames) >= 2 and len(gt_boxes) >= 2: min_len = min(len(frames), len(gt_boxes)) sequences[seq_name] = { 'frames': frames[:min_len], 'gt': gt_boxes[:min_len], } return sequences # ============================================================ # Evaluator # ============================================================ DATASET_LOADERS = { 'lasot': load_lasot_test, 'uav123': load_uav123, 'dtb70': load_dtb70, 'visdrone': load_visdrone_sot, } class BenchmarkEvaluator: """Evaluate tracker on standard benchmarks. Usage: from vil_tracker.inference.online_tracker import OnlineTracker from vil_tracker.evaluation.evaluate import BenchmarkEvaluator online_tracker = OnlineTracker(model, device='cuda') evaluator = BenchmarkEvaluator(online_tracker) results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot') print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}") """ def __init__(self, tracker, device='cuda'): self.tracker = tracker self.device = device def _load_image(self, path): """Load image from path.""" try: from PIL import Image img = Image.open(path).convert('RGB') return np.array(img) except ImportError: import cv2 img = cv2.imread(path) if img is not None: return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) return np.zeros((480, 640, 3), dtype=np.uint8) def evaluate_sequence(self, frames_paths, gt_boxes): """Evaluate on a single sequence. Args: frames_paths: list of image file paths gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent) Returns: dict with per-frame IoUs, distances, and metrics """ # Load first frame and initialize first_frame = self._load_image(frames_paths[0]) self.tracker.initialize(first_frame, gt_boxes[0]) pred_boxes = [gt_boxes[0]] # First frame is given ious = [1.0] center_dists = [0.0] norm_dists = [0.0] for i in range(1, len(frames_paths)): frame = self._load_image(frames_paths[i]) pred_box = self.tracker.track(frame) pred_boxes.append(pred_box) if gt_boxes[i] is not None: iou = compute_iou(pred_box, gt_boxes[i]) cdist = compute_center_distance(pred_box, gt_boxes[i]) ndist = compute_normalized_center_distance(pred_box, gt_boxes[i]) ious.append(iou) center_dists.append(cdist) norm_dists.append(ndist) else: # Target absent — score 0 if tracker predicts, 1 if it doesn't ious.append(0.0) center_dists.append(float('inf')) norm_dists.append(float('inf')) auc = compute_auc(ious) precision = compute_precision(center_dists) norm_precision = compute_normalized_precision(norm_dists) return { 'pred_boxes': pred_boxes, 'ious': ious, 'center_dists': center_dists, 'auc': auc, 'precision': precision, 'norm_precision': norm_precision, 'mean_iou': np.mean(ious), } def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None): """Evaluate on a full dataset. Args: dataset_path: path to dataset root dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone' save_results: optional path to save JSON results Returns: dict with overall metrics and per-sequence results """ loader = DATASET_LOADERS.get(dataset_type) if loader is None: raise ValueError(f"Unknown dataset type: {dataset_type}. " f"Supported: {list(DATASET_LOADERS.keys())}") sequences = loader(dataset_path) if not sequences: print(f"Warning: No sequences loaded from {dataset_path}") return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0} print(f"Evaluating on {dataset_type}: {len(sequences)} sequences") results = {} all_ious = [] all_center_dists = [] all_norm_dists = [] for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()): print(f" [{seq_idx+1}/{len(sequences)}] {seq_name} " f"({len(seq_data['frames'])} frames)...", end='', flush=True) seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt']) results[seq_name] = { 'auc': seq_result['auc'], 'precision': seq_result['precision'], 'norm_precision': seq_result['norm_precision'], 'mean_iou': seq_result['mean_iou'], 'num_frames': len(seq_data['frames']), } all_ious.extend(seq_result['ious']) all_center_dists.extend(seq_result['center_dists']) all_norm_dists.extend(seq_result['norm_dists']) print(f" AUC={seq_result['auc']:.3f}") overall_auc = compute_auc(all_ious) per_seq_auc = {name: r['auc'] for name, r in results.items()} mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0 overall_precision = compute_precision(all_center_dists) overall_norm_prec = compute_normalized_precision(all_norm_dists) summary = { 'dataset': dataset_type, 'overall_auc': float(overall_auc), 'mean_seq_auc': float(mean_seq_auc), 'precision_20px': float(overall_precision), 'normalized_precision': float(overall_norm_prec), 'num_sequences': len(sequences), 'num_frames': len(all_ious), 'per_sequence': results, } print(f"\n{'='*50}") print(f"{dataset_type.upper()} Results:") print(f" AUC (overall): {overall_auc:.3f}") print(f" AUC (mean seq): {mean_seq_auc:.3f}") print(f" Precision (20px): {overall_precision:.3f}") print(f" Norm. Precision: {overall_norm_prec:.3f}") print(f" Sequences: {len(sequences)}") print(f" Total frames: {len(all_ious)}") print(f"{'='*50}") # Save results to JSON if save_results: os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True) with open(save_results, 'w') as f: json.dump(summary, f, indent=2) print(f"Results saved to {save_results}") return summary def evaluate_multiple(self, dataset_configs): """Evaluate on multiple benchmarks. Args: dataset_configs: list of (dataset_path, dataset_type) tuples Returns: dict of {dataset_type: results} """ all_results = {} for dataset_path, dataset_type in dataset_configs: results = self.evaluate_dataset(dataset_path, dataset_type) all_results[dataset_type] = results # Print comparison table print(f"\n{'='*60}") print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}") print(f"{'-'*60}") for dt, r in all_results.items(): print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} " f"{r.get('precision_20px', 0):>8.3f} " f"{r.get('normalized_precision', 0):>8.3f} " f"{r['num_sequences']:>6}") print(f"{'='*60}") return all_results