omar-ah's picture
Upload vil_tracker/evaluation/evaluate.py with huggingface_hub
4e8b763 verified
raw
history blame
4.55 kB
"""
Benchmark evaluator for tracking datasets.
Supports:
- LaSOT: Large-scale Single Object Tracking
- UAV123: UAV tracking at 123 fps
- DTB70: Drone Tracking Benchmark
- VisDrone-SOT: Vision meets Drone SOT
Metrics: AUC (Success), Precision, Normalized Precision
"""
import os
import json
import numpy as np
from collections import defaultdict
def compute_iou(box_a, box_b):
"""Compute IoU between two boxes in [x, y, w, h] format."""
xa1, ya1 = box_a[0], box_a[1]
xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
xb1, yb1 = box_b[0], box_b[1]
xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]
inter_x1 = max(xa1, xb1)
inter_y1 = max(ya1, yb1)
inter_x2 = min(xa2, xb2)
inter_y2 = min(ya2, yb2)
inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
area_a = box_a[2] * box_a[3]
area_b = box_b[2] * box_b[3]
union_area = area_a + area_b - inter_area
return inter_area / max(union_area, 1e-6)
def compute_center_distance(box_a, box_b):
"""Compute center distance between two boxes in [x, y, w, h] format."""
ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
return np.linalg.norm(ca - cb)
def compute_success_curve(ious, thresholds=None):
"""Compute success curve (fraction of frames with IoU > threshold)."""
if thresholds is None:
thresholds = np.arange(0, 1.05, 0.05)
ious = np.array(ious)
success = np.array([np.mean(ious >= t) for t in thresholds])
return thresholds, success
def compute_auc(ious):
"""Compute AUC from IoU values."""
thresholds, success = compute_success_curve(ious)
return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
class BenchmarkEvaluator:
"""Evaluate tracker on standard benchmarks."""
def __init__(self, tracker, device='cuda'):
self.tracker = tracker
self.device = device
def evaluate_sequence(self, frames, gt_boxes):
"""Evaluate on a single sequence.
Args:
frames: list of (H, W, 3) numpy arrays
gt_boxes: list of [x, y, w, h] ground truth boxes
Returns:
dict with per-frame IoUs and metrics
"""
# Initialize with first frame
self.tracker.initialize(frames[0], gt_boxes[0])
pred_boxes = [gt_boxes[0]] # First frame is given
ious = [1.0]
for i in range(1, len(frames)):
pred_box = self.tracker.track(frames[i])
pred_boxes.append(pred_box)
if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
iou = compute_iou(pred_box, gt_boxes[i])
ious.append(iou)
else:
ious.append(0.0)
auc = compute_auc(ious)
return {
'pred_boxes': pred_boxes,
'ious': ious,
'auc': auc,
'mean_iou': np.mean(ious),
}
def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
"""Evaluate on a full dataset.
Args:
dataset_path: path to dataset root
dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
Returns:
dict with overall metrics and per-sequence results
"""
sequences = self._load_dataset(dataset_path, dataset_type)
results = {}
all_ious = []
for seq_name, (frames, gt_boxes) in sequences.items():
print(f"Evaluating {seq_name}...")
seq_result = self.evaluate_sequence(frames, gt_boxes)
results[seq_name] = seq_result
all_ious.extend(seq_result['ious'])
overall_auc = compute_auc(all_ious)
per_seq_auc = {name: r['auc'] for name, r in results.items()}
mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
return {
'overall_auc': overall_auc,
'mean_seq_auc': mean_seq_auc,
'per_sequence': per_seq_auc,
'num_sequences': len(sequences),
'num_frames': len(all_ious),
}
def _load_dataset(self, dataset_path, dataset_type):
"""Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
# Placeholder - real implementation would load actual dataset files
print(f"Loading {dataset_type} from {dataset_path}")
return {}