""" Benchmark evaluator for tracking datasets. Supports: - LaSOT: Large-scale Single Object Tracking - UAV123: UAV tracking at 123 fps - DTB70: Drone Tracking Benchmark - VisDrone-SOT: Vision meets Drone SOT Metrics: AUC (Success), Precision, Normalized Precision """ import os import json import numpy as np from collections import defaultdict def compute_iou(box_a, box_b): """Compute IoU between two boxes in [x, y, w, h] format.""" xa1, ya1 = box_a[0], box_a[1] xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3] xb1, yb1 = box_b[0], box_b[1] xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3] inter_x1 = max(xa1, xb1) inter_y1 = max(ya1, yb1) inter_x2 = min(xa2, xb2) inter_y2 = min(ya2, yb2) inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1) area_a = box_a[2] * box_a[3] area_b = box_b[2] * box_b[3] union_area = area_a + area_b - inter_area return inter_area / max(union_area, 1e-6) def compute_center_distance(box_a, box_b): """Compute center distance between two boxes in [x, y, w, h] format.""" ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2]) cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2]) return np.linalg.norm(ca - cb) def compute_success_curve(ious, thresholds=None): """Compute success curve (fraction of frames with IoU > threshold).""" if thresholds is None: thresholds = np.arange(0, 1.05, 0.05) ious = np.array(ious) success = np.array([np.mean(ious >= t) for t in thresholds]) return thresholds, success def compute_auc(ious): """Compute AUC from IoU values.""" thresholds, success = compute_success_curve(ious) return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0]) class BenchmarkEvaluator: """Evaluate tracker on standard benchmarks.""" def __init__(self, tracker, device='cuda'): self.tracker = tracker self.device = device def evaluate_sequence(self, frames, gt_boxes): """Evaluate on a single sequence. Args: frames: list of (H, W, 3) numpy arrays gt_boxes: list of [x, y, w, h] ground truth boxes Returns: dict with per-frame IoUs and metrics """ # Initialize with first frame self.tracker.initialize(frames[0], gt_boxes[0]) pred_boxes = [gt_boxes[0]] # First frame is given ious = [1.0] for i in range(1, len(frames)): pred_box = self.tracker.track(frames[i]) pred_boxes.append(pred_box) if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0: iou = compute_iou(pred_box, gt_boxes[i]) ious.append(iou) else: ious.append(0.0) auc = compute_auc(ious) return { 'pred_boxes': pred_boxes, 'ious': ious, 'auc': auc, 'mean_iou': np.mean(ious), } def evaluate_dataset(self, dataset_path, dataset_type='lasot'): """Evaluate on a full dataset. Args: dataset_path: path to dataset root dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone' Returns: dict with overall metrics and per-sequence results """ sequences = self._load_dataset(dataset_path, dataset_type) results = {} all_ious = [] for seq_name, (frames, gt_boxes) in sequences.items(): print(f"Evaluating {seq_name}...") seq_result = self.evaluate_sequence(frames, gt_boxes) results[seq_name] = seq_result all_ious.extend(seq_result['ious']) overall_auc = compute_auc(all_ious) per_seq_auc = {name: r['auc'] for name, r in results.items()} mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0 return { 'overall_auc': overall_auc, 'mean_seq_auc': mean_seq_auc, 'per_sequence': per_seq_auc, 'num_sequences': len(sequences), 'num_frames': len(all_ious), } def _load_dataset(self, dataset_path, dataset_type): """Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}.""" # Placeholder - real implementation would load actual dataset files print(f"Loading {dataset_type} from {dataset_path}") return {}