| """ |
| Benchmark evaluator for tracking datasets. |
| |
| Supports: |
| - LaSOT: Large-scale Single Object Tracking (280 test sequences) |
| - UAV123: UAV tracking at 123 fps (123 sequences) |
| - DTB70: Drone Tracking Benchmark (70 sequences) |
| - VisDrone-SOT: Vision meets Drone SOT |
| |
| Metrics: AUC (Success), Precision, Normalized Precision |
| |
| Dataset structure: |
| LaSOT (test): |
| root/{category}/{seq_name}/img/XXXXXXXX.jpg |
| root/{category}/{seq_name}/groundtruth.txt |
| |
| UAV123: |
| root/data_seq/UAV123/{seq_name}/*.jpg |
| root/anno/UAV123/{seq_name}.txt |
| |
| DTB70: |
| root/{seq_name}/img/*.jpg |
| root/{seq_name}/groundtruth_rect.txt |
| |
| VisDrone-SOT (test-dev): |
| root/sequences/{seq_name}/*.jpg |
| root/annotations/{seq_name}.txt |
| """ |
|
|
| import os |
| import glob |
| import json |
| import numpy as np |
| from pathlib import Path |
| from collections import defaultdict |
|
|
|
|
| def compute_iou(box_a, box_b): |
| """Compute IoU between two boxes in [x, y, w, h] format.""" |
| xa1, ya1 = box_a[0], box_a[1] |
| xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3] |
| xb1, yb1 = box_b[0], box_b[1] |
| xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3] |
| |
| inter_x1 = max(xa1, xb1) |
| inter_y1 = max(ya1, yb1) |
| inter_x2 = min(xa2, xb2) |
| inter_y2 = min(ya2, yb2) |
| |
| inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1) |
| area_a = box_a[2] * box_a[3] |
| area_b = box_b[2] * box_b[3] |
| union_area = area_a + area_b - inter_area |
| |
| return inter_area / max(union_area, 1e-6) |
|
|
|
|
| def compute_center_distance(box_a, box_b): |
| """Compute center distance between two boxes in [x, y, w, h] format.""" |
| ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2]) |
| cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2]) |
| return np.linalg.norm(ca - cb) |
|
|
|
|
| def compute_normalized_center_distance(box_a, box_b): |
| """Compute center distance normalized by GT size (for normalized precision).""" |
| ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2]) |
| cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2]) |
| dist = np.linalg.norm(ca - cb) |
| |
| gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6 |
| return dist / gt_diag |
|
|
|
|
| def compute_success_curve(ious, thresholds=None): |
| """Compute success curve (fraction of frames with IoU > threshold).""" |
| if thresholds is None: |
| thresholds = np.arange(0, 1.05, 0.05) |
| |
| ious = np.array(ious) |
| success = np.array([np.mean(ious >= t) for t in thresholds]) |
| return thresholds, success |
|
|
|
|
| def compute_auc(ious): |
| """Compute AUC from IoU values (Area Under Success Curve).""" |
| thresholds, success = compute_success_curve(ious) |
| return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0]) |
|
|
|
|
| def compute_precision(center_dists, threshold=20): |
| """Compute precision at given pixel threshold.""" |
| dists = np.array(center_dists) |
| return np.mean(dists <= threshold) |
|
|
|
|
| def compute_normalized_precision(norm_dists, threshold=0.5): |
| """Compute normalized precision.""" |
| dists = np.array(norm_dists) |
| return np.mean(dists <= threshold) |
|
|
|
|
| |
| |
| |
|
|
| def load_annotations_txt(filepath): |
| """Load annotations from a text file with x,y,w,h per line.""" |
| boxes = [] |
| with open(filepath, 'r') as f: |
| for line in f: |
| line = line.strip() |
| if not line: |
| boxes.append(None) |
| continue |
| parts = line.replace(',', ' ').replace('\t', ' ').split() |
| try: |
| vals = [float(x) for x in parts[:4]] |
| |
| if vals[2] <= 0 or vals[3] <= 0: |
| boxes.append(None) |
| else: |
| boxes.append(vals) |
| except (ValueError, IndexError): |
| boxes.append(None) |
| return boxes |
|
|
|
|
| def load_lasot_test(root): |
| """Load LaSOT test sequences. |
| |
| Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt |
| Test split: last 20% of sequences per category. |
| """ |
| root = Path(root) |
| sequences = {} |
| |
| categories = sorted([d for d in root.iterdir() if d.is_dir()]) |
| for cat_dir in categories: |
| seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()]) |
| |
| test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):] |
| |
| for seq_dir in test_seqs: |
| gt_file = seq_dir / 'groundtruth.txt' |
| img_dir = seq_dir / 'img' |
| |
| if not gt_file.exists() or not img_dir.exists(): |
| continue |
| |
| gt_boxes = load_annotations_txt(str(gt_file)) |
| frames = sorted(glob.glob(str(img_dir / '*.jpg'))) |
| |
| if len(frames) >= 2 and len(gt_boxes) >= 2: |
| min_len = min(len(frames), len(gt_boxes)) |
| seq_name = f"{cat_dir.name}/{seq_dir.name}" |
| sequences[seq_name] = { |
| 'frames': frames[:min_len], |
| 'gt': gt_boxes[:min_len], |
| } |
| |
| return sequences |
|
|
|
|
| def load_uav123(root): |
| """Load UAV123 sequences. |
| |
| Structure: |
| root/data_seq/UAV123/{seq_name}/*.jpg |
| root/anno/UAV123/{seq_name}.txt |
| """ |
| root = Path(root) |
| sequences = {} |
| |
| anno_dir = root / 'anno' / 'UAV123' |
| frame_dir = root / 'data_seq' / 'UAV123' |
| |
| if not anno_dir.exists(): |
| |
| anno_dir = root / 'anno' |
| frame_dir = root / 'data_seq' |
| |
| if not anno_dir.exists(): |
| print(f"Warning: UAV123 annotations not found at {anno_dir}") |
| return sequences |
| |
| for anno_file in sorted(anno_dir.glob('*.txt')): |
| seq_name = anno_file.stem |
| seq_frame_dir = frame_dir / seq_name |
| |
| if not seq_frame_dir.exists(): |
| continue |
| |
| gt_boxes = load_annotations_txt(str(anno_file)) |
| frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg'))) |
| if not frames: |
| frames = sorted(glob.glob(str(seq_frame_dir / '*.png'))) |
| |
| if len(frames) >= 2 and len(gt_boxes) >= 2: |
| min_len = min(len(frames), len(gt_boxes)) |
| sequences[seq_name] = { |
| 'frames': frames[:min_len], |
| 'gt': gt_boxes[:min_len], |
| } |
| |
| return sequences |
|
|
|
|
| def load_dtb70(root): |
| """Load DTB70 sequences. |
| |
| Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt |
| """ |
| root = Path(root) |
| sequences = {} |
| |
| for seq_dir in sorted(root.iterdir()): |
| if not seq_dir.is_dir(): |
| continue |
| |
| gt_file = seq_dir / 'groundtruth_rect.txt' |
| if not gt_file.exists(): |
| gt_file = seq_dir / 'groundtruth.txt' |
| if not gt_file.exists(): |
| continue |
| |
| img_dir = seq_dir / 'img' |
| if not img_dir.exists(): |
| img_dir = seq_dir |
| |
| gt_boxes = load_annotations_txt(str(gt_file)) |
| frames = sorted(glob.glob(str(img_dir / '*.jpg'))) |
| if not frames: |
| frames = sorted(glob.glob(str(img_dir / '*.png'))) |
| |
| if len(frames) >= 2 and len(gt_boxes) >= 2: |
| min_len = min(len(frames), len(gt_boxes)) |
| sequences[seq_dir.name] = { |
| 'frames': frames[:min_len], |
| 'gt': gt_boxes[:min_len], |
| } |
| |
| return sequences |
|
|
|
|
| def load_visdrone_sot(root): |
| """Load VisDrone-SOT sequences. |
| |
| Structure: |
| root/sequences/{seq_name}/*.jpg |
| root/annotations/{seq_name}.txt |
| """ |
| root = Path(root) |
| sequences = {} |
| |
| anno_dir = root / 'annotations' |
| seq_dir = root / 'sequences' |
| |
| if not anno_dir.exists() or not seq_dir.exists(): |
| print(f"Warning: VisDrone-SOT not found at {root}") |
| return sequences |
| |
| for anno_file in sorted(anno_dir.glob('*.txt')): |
| seq_name = anno_file.stem |
| frames_dir = seq_dir / seq_name |
| |
| if not frames_dir.exists(): |
| continue |
| |
| gt_boxes = load_annotations_txt(str(anno_file)) |
| frames = sorted(glob.glob(str(frames_dir / '*.jpg'))) |
| |
| if len(frames) >= 2 and len(gt_boxes) >= 2: |
| min_len = min(len(frames), len(gt_boxes)) |
| sequences[seq_name] = { |
| 'frames': frames[:min_len], |
| 'gt': gt_boxes[:min_len], |
| } |
| |
| return sequences |
|
|
|
|
| |
| |
| |
|
|
| DATASET_LOADERS = { |
| 'lasot': load_lasot_test, |
| 'uav123': load_uav123, |
| 'dtb70': load_dtb70, |
| 'visdrone': load_visdrone_sot, |
| } |
|
|
|
|
| class BenchmarkEvaluator: |
| """Evaluate tracker on standard benchmarks. |
| |
| Usage: |
| from vil_tracker.inference.online_tracker import OnlineTracker |
| from vil_tracker.evaluation.evaluate import BenchmarkEvaluator |
| |
| online_tracker = OnlineTracker(model, device='cuda') |
| evaluator = BenchmarkEvaluator(online_tracker) |
| results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot') |
| print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}") |
| """ |
| |
| def __init__(self, tracker, device='cuda'): |
| self.tracker = tracker |
| self.device = device |
| |
| def _load_image(self, path): |
| """Load image from path.""" |
| try: |
| from PIL import Image |
| img = Image.open(path).convert('RGB') |
| return np.array(img) |
| except ImportError: |
| import cv2 |
| img = cv2.imread(path) |
| if img is not None: |
| return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| return np.zeros((480, 640, 3), dtype=np.uint8) |
| |
| def evaluate_sequence(self, frames_paths, gt_boxes): |
| """Evaluate on a single sequence. |
| |
| Args: |
| frames_paths: list of image file paths |
| gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent) |
| Returns: |
| dict with per-frame IoUs, distances, and metrics |
| """ |
| |
| first_frame = self._load_image(frames_paths[0]) |
| self.tracker.initialize(first_frame, gt_boxes[0]) |
| |
| pred_boxes = [gt_boxes[0]] |
| ious = [1.0] |
| center_dists = [0.0] |
| norm_dists = [0.0] |
| |
| for i in range(1, len(frames_paths)): |
| frame = self._load_image(frames_paths[i]) |
| pred_box = self.tracker.track(frame) |
| pred_boxes.append(pred_box) |
| |
| if gt_boxes[i] is not None: |
| iou = compute_iou(pred_box, gt_boxes[i]) |
| cdist = compute_center_distance(pred_box, gt_boxes[i]) |
| ndist = compute_normalized_center_distance(pred_box, gt_boxes[i]) |
| ious.append(iou) |
| center_dists.append(cdist) |
| norm_dists.append(ndist) |
| else: |
| |
| ious.append(0.0) |
| center_dists.append(float('inf')) |
| norm_dists.append(float('inf')) |
| |
| auc = compute_auc(ious) |
| precision = compute_precision(center_dists) |
| norm_precision = compute_normalized_precision(norm_dists) |
| |
| return { |
| 'pred_boxes': pred_boxes, |
| 'ious': ious, |
| 'center_dists': center_dists, |
| 'auc': auc, |
| 'precision': precision, |
| 'norm_precision': norm_precision, |
| 'mean_iou': np.mean(ious), |
| } |
| |
| def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None): |
| """Evaluate on a full dataset. |
| |
| Args: |
| dataset_path: path to dataset root |
| dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone' |
| save_results: optional path to save JSON results |
| Returns: |
| dict with overall metrics and per-sequence results |
| """ |
| loader = DATASET_LOADERS.get(dataset_type) |
| if loader is None: |
| raise ValueError(f"Unknown dataset type: {dataset_type}. " |
| f"Supported: {list(DATASET_LOADERS.keys())}") |
| |
| sequences = loader(dataset_path) |
| |
| if not sequences: |
| print(f"Warning: No sequences loaded from {dataset_path}") |
| return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0} |
| |
| print(f"Evaluating on {dataset_type}: {len(sequences)} sequences") |
| |
| results = {} |
| all_ious = [] |
| all_center_dists = [] |
| all_norm_dists = [] |
| |
| for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()): |
| print(f" [{seq_idx+1}/{len(sequences)}] {seq_name} " |
| f"({len(seq_data['frames'])} frames)...", end='', flush=True) |
| |
| seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt']) |
| results[seq_name] = { |
| 'auc': seq_result['auc'], |
| 'precision': seq_result['precision'], |
| 'norm_precision': seq_result['norm_precision'], |
| 'mean_iou': seq_result['mean_iou'], |
| 'num_frames': len(seq_data['frames']), |
| } |
| all_ious.extend(seq_result['ious']) |
| all_center_dists.extend(seq_result['center_dists']) |
| all_norm_dists.extend(seq_result['norm_dists']) |
| |
| print(f" AUC={seq_result['auc']:.3f}") |
| |
| overall_auc = compute_auc(all_ious) |
| per_seq_auc = {name: r['auc'] for name, r in results.items()} |
| mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0 |
| |
| overall_precision = compute_precision(all_center_dists) |
| overall_norm_prec = compute_normalized_precision(all_norm_dists) |
| |
| summary = { |
| 'dataset': dataset_type, |
| 'overall_auc': float(overall_auc), |
| 'mean_seq_auc': float(mean_seq_auc), |
| 'precision_20px': float(overall_precision), |
| 'normalized_precision': float(overall_norm_prec), |
| 'num_sequences': len(sequences), |
| 'num_frames': len(all_ious), |
| 'per_sequence': results, |
| } |
| |
| print(f"\n{'='*50}") |
| print(f"{dataset_type.upper()} Results:") |
| print(f" AUC (overall): {overall_auc:.3f}") |
| print(f" AUC (mean seq): {mean_seq_auc:.3f}") |
| print(f" Precision (20px): {overall_precision:.3f}") |
| print(f" Norm. Precision: {overall_norm_prec:.3f}") |
| print(f" Sequences: {len(sequences)}") |
| print(f" Total frames: {len(all_ious)}") |
| print(f"{'='*50}") |
| |
| |
| if save_results: |
| os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True) |
| with open(save_results, 'w') as f: |
| json.dump(summary, f, indent=2) |
| print(f"Results saved to {save_results}") |
| |
| return summary |
| |
| def evaluate_multiple(self, dataset_configs): |
| """Evaluate on multiple benchmarks. |
| |
| Args: |
| dataset_configs: list of (dataset_path, dataset_type) tuples |
| Returns: |
| dict of {dataset_type: results} |
| """ |
| all_results = {} |
| for dataset_path, dataset_type in dataset_configs: |
| results = self.evaluate_dataset(dataset_path, dataset_type) |
| all_results[dataset_type] = results |
| |
| |
| print(f"\n{'='*60}") |
| print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}") |
| print(f"{'-'*60}") |
| for dt, r in all_results.items(): |
| print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} " |
| f"{r.get('precision_20px', 0):>8.3f} " |
| f"{r.get('normalized_precision', 0):>8.3f} " |
| f"{r['num_sequences']:>6}") |
| print(f"{'='*60}") |
| |
| return all_results |
|
|