omar-ah's picture
Fix vil_tracker/evaluation/evaluate.py: audit corrections
0bd347a verified
"""
Benchmark evaluator for tracking datasets.
Supports:
- LaSOT: Large-scale Single Object Tracking (280 test sequences)
- UAV123: UAV tracking at 123 fps (123 sequences)
- DTB70: Drone Tracking Benchmark (70 sequences)
- VisDrone-SOT: Vision meets Drone SOT
Metrics: AUC (Success), Precision, Normalized Precision
Dataset structure:
LaSOT (test):
root/{category}/{seq_name}/img/XXXXXXXX.jpg
root/{category}/{seq_name}/groundtruth.txt
UAV123:
root/data_seq/UAV123/{seq_name}/*.jpg
root/anno/UAV123/{seq_name}.txt
DTB70:
root/{seq_name}/img/*.jpg
root/{seq_name}/groundtruth_rect.txt
VisDrone-SOT (test-dev):
root/sequences/{seq_name}/*.jpg
root/annotations/{seq_name}.txt
"""
import os
import glob
import json
import numpy as np
from pathlib import Path
from collections import defaultdict
def compute_iou(box_a, box_b):
"""Compute IoU between two boxes in [x, y, w, h] format."""
xa1, ya1 = box_a[0], box_a[1]
xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
xb1, yb1 = box_b[0], box_b[1]
xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]
inter_x1 = max(xa1, xb1)
inter_y1 = max(ya1, yb1)
inter_x2 = min(xa2, xb2)
inter_y2 = min(ya2, yb2)
inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
area_a = box_a[2] * box_a[3]
area_b = box_b[2] * box_b[3]
union_area = area_a + area_b - inter_area
return inter_area / max(union_area, 1e-6)
def compute_center_distance(box_a, box_b):
"""Compute center distance between two boxes in [x, y, w, h] format."""
ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
return np.linalg.norm(ca - cb)
def compute_normalized_center_distance(box_a, box_b):
"""Compute center distance normalized by GT size (for normalized precision)."""
ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
dist = np.linalg.norm(ca - cb)
# Normalize by GT diagonal
gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6
return dist / gt_diag
def compute_success_curve(ious, thresholds=None):
"""Compute success curve (fraction of frames with IoU > threshold)."""
if thresholds is None:
thresholds = np.arange(0, 1.05, 0.05)
ious = np.array(ious)
success = np.array([np.mean(ious >= t) for t in thresholds])
return thresholds, success
def compute_auc(ious):
"""Compute AUC from IoU values (Area Under Success Curve)."""
thresholds, success = compute_success_curve(ious)
return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
def compute_precision(center_dists, threshold=20):
"""Compute precision at given pixel threshold."""
dists = np.array(center_dists)
return np.mean(dists <= threshold)
def compute_normalized_precision(norm_dists, threshold=0.5):
"""Compute normalized precision."""
dists = np.array(norm_dists)
return np.mean(dists <= threshold)
# ============================================================
# Dataset loaders
# ============================================================
def load_annotations_txt(filepath):
"""Load annotations from a text file with x,y,w,h per line."""
boxes = []
with open(filepath, 'r') as f:
for line in f:
line = line.strip()
if not line:
boxes.append(None)
continue
parts = line.replace(',', ' ').replace('\t', ' ').split()
try:
vals = [float(x) for x in parts[:4]]
# Skip zero-area boxes
if vals[2] <= 0 or vals[3] <= 0:
boxes.append(None)
else:
boxes.append(vals)
except (ValueError, IndexError):
boxes.append(None)
return boxes
def load_lasot_test(root):
"""Load LaSOT test sequences.
Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt
Test split: last 20% of sequences per category.
"""
root = Path(root)
sequences = {}
categories = sorted([d for d in root.iterdir() if d.is_dir()])
for cat_dir in categories:
seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()])
# Test split: last 20%
test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):]
for seq_dir in test_seqs:
gt_file = seq_dir / 'groundtruth.txt'
img_dir = seq_dir / 'img'
if not gt_file.exists() or not img_dir.exists():
continue
gt_boxes = load_annotations_txt(str(gt_file))
frames = sorted(glob.glob(str(img_dir / '*.jpg')))
if len(frames) >= 2 and len(gt_boxes) >= 2:
min_len = min(len(frames), len(gt_boxes))
seq_name = f"{cat_dir.name}/{seq_dir.name}"
sequences[seq_name] = {
'frames': frames[:min_len],
'gt': gt_boxes[:min_len],
}
return sequences
def load_uav123(root):
"""Load UAV123 sequences.
Structure:
root/data_seq/UAV123/{seq_name}/*.jpg
root/anno/UAV123/{seq_name}.txt
"""
root = Path(root)
sequences = {}
anno_dir = root / 'anno' / 'UAV123'
frame_dir = root / 'data_seq' / 'UAV123'
if not anno_dir.exists():
# Alternative structure
anno_dir = root / 'anno'
frame_dir = root / 'data_seq'
if not anno_dir.exists():
print(f"Warning: UAV123 annotations not found at {anno_dir}")
return sequences
for anno_file in sorted(anno_dir.glob('*.txt')):
seq_name = anno_file.stem
seq_frame_dir = frame_dir / seq_name
if not seq_frame_dir.exists():
continue
gt_boxes = load_annotations_txt(str(anno_file))
frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg')))
if not frames:
frames = sorted(glob.glob(str(seq_frame_dir / '*.png')))
if len(frames) >= 2 and len(gt_boxes) >= 2:
min_len = min(len(frames), len(gt_boxes))
sequences[seq_name] = {
'frames': frames[:min_len],
'gt': gt_boxes[:min_len],
}
return sequences
def load_dtb70(root):
"""Load DTB70 sequences.
Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt
"""
root = Path(root)
sequences = {}
for seq_dir in sorted(root.iterdir()):
if not seq_dir.is_dir():
continue
gt_file = seq_dir / 'groundtruth_rect.txt'
if not gt_file.exists():
gt_file = seq_dir / 'groundtruth.txt'
if not gt_file.exists():
continue
img_dir = seq_dir / 'img'
if not img_dir.exists():
img_dir = seq_dir # frames directly in seq dir
gt_boxes = load_annotations_txt(str(gt_file))
frames = sorted(glob.glob(str(img_dir / '*.jpg')))
if not frames:
frames = sorted(glob.glob(str(img_dir / '*.png')))
if len(frames) >= 2 and len(gt_boxes) >= 2:
min_len = min(len(frames), len(gt_boxes))
sequences[seq_dir.name] = {
'frames': frames[:min_len],
'gt': gt_boxes[:min_len],
}
return sequences
def load_visdrone_sot(root):
"""Load VisDrone-SOT sequences.
Structure:
root/sequences/{seq_name}/*.jpg
root/annotations/{seq_name}.txt
"""
root = Path(root)
sequences = {}
anno_dir = root / 'annotations'
seq_dir = root / 'sequences'
if not anno_dir.exists() or not seq_dir.exists():
print(f"Warning: VisDrone-SOT not found at {root}")
return sequences
for anno_file in sorted(anno_dir.glob('*.txt')):
seq_name = anno_file.stem
frames_dir = seq_dir / seq_name
if not frames_dir.exists():
continue
gt_boxes = load_annotations_txt(str(anno_file))
frames = sorted(glob.glob(str(frames_dir / '*.jpg')))
if len(frames) >= 2 and len(gt_boxes) >= 2:
min_len = min(len(frames), len(gt_boxes))
sequences[seq_name] = {
'frames': frames[:min_len],
'gt': gt_boxes[:min_len],
}
return sequences
# ============================================================
# Evaluator
# ============================================================
DATASET_LOADERS = {
'lasot': load_lasot_test,
'uav123': load_uav123,
'dtb70': load_dtb70,
'visdrone': load_visdrone_sot,
}
class BenchmarkEvaluator:
"""Evaluate tracker on standard benchmarks.
Usage:
from vil_tracker.inference.online_tracker import OnlineTracker
from vil_tracker.evaluation.evaluate import BenchmarkEvaluator
online_tracker = OnlineTracker(model, device='cuda')
evaluator = BenchmarkEvaluator(online_tracker)
results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot')
print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}")
"""
def __init__(self, tracker, device='cuda'):
self.tracker = tracker
self.device = device
def _load_image(self, path):
"""Load image from path."""
try:
from PIL import Image
img = Image.open(path).convert('RGB')
return np.array(img)
except ImportError:
import cv2
img = cv2.imread(path)
if img is not None:
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return np.zeros((480, 640, 3), dtype=np.uint8)
def evaluate_sequence(self, frames_paths, gt_boxes):
"""Evaluate on a single sequence.
Args:
frames_paths: list of image file paths
gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent)
Returns:
dict with per-frame IoUs, distances, and metrics
"""
# Load first frame and initialize
first_frame = self._load_image(frames_paths[0])
self.tracker.initialize(first_frame, gt_boxes[0])
pred_boxes = [gt_boxes[0]] # First frame is given
ious = [1.0]
center_dists = [0.0]
norm_dists = [0.0]
for i in range(1, len(frames_paths)):
frame = self._load_image(frames_paths[i])
pred_box = self.tracker.track(frame)
pred_boxes.append(pred_box)
if gt_boxes[i] is not None:
iou = compute_iou(pred_box, gt_boxes[i])
cdist = compute_center_distance(pred_box, gt_boxes[i])
ndist = compute_normalized_center_distance(pred_box, gt_boxes[i])
ious.append(iou)
center_dists.append(cdist)
norm_dists.append(ndist)
else:
# Target absent — score 0 if tracker predicts, 1 if it doesn't
ious.append(0.0)
center_dists.append(float('inf'))
norm_dists.append(float('inf'))
auc = compute_auc(ious)
precision = compute_precision(center_dists)
norm_precision = compute_normalized_precision(norm_dists)
return {
'pred_boxes': pred_boxes,
'ious': ious,
'center_dists': center_dists,
'auc': auc,
'precision': precision,
'norm_precision': norm_precision,
'mean_iou': np.mean(ious),
}
def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None):
"""Evaluate on a full dataset.
Args:
dataset_path: path to dataset root
dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
save_results: optional path to save JSON results
Returns:
dict with overall metrics and per-sequence results
"""
loader = DATASET_LOADERS.get(dataset_type)
if loader is None:
raise ValueError(f"Unknown dataset type: {dataset_type}. "
f"Supported: {list(DATASET_LOADERS.keys())}")
sequences = loader(dataset_path)
if not sequences:
print(f"Warning: No sequences loaded from {dataset_path}")
return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0}
print(f"Evaluating on {dataset_type}: {len(sequences)} sequences")
results = {}
all_ious = []
all_center_dists = []
all_norm_dists = []
for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()):
print(f" [{seq_idx+1}/{len(sequences)}] {seq_name} "
f"({len(seq_data['frames'])} frames)...", end='', flush=True)
seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt'])
results[seq_name] = {
'auc': seq_result['auc'],
'precision': seq_result['precision'],
'norm_precision': seq_result['norm_precision'],
'mean_iou': seq_result['mean_iou'],
'num_frames': len(seq_data['frames']),
}
all_ious.extend(seq_result['ious'])
all_center_dists.extend(seq_result['center_dists'])
all_norm_dists.extend(seq_result['norm_dists'])
print(f" AUC={seq_result['auc']:.3f}")
overall_auc = compute_auc(all_ious)
per_seq_auc = {name: r['auc'] for name, r in results.items()}
mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
overall_precision = compute_precision(all_center_dists)
overall_norm_prec = compute_normalized_precision(all_norm_dists)
summary = {
'dataset': dataset_type,
'overall_auc': float(overall_auc),
'mean_seq_auc': float(mean_seq_auc),
'precision_20px': float(overall_precision),
'normalized_precision': float(overall_norm_prec),
'num_sequences': len(sequences),
'num_frames': len(all_ious),
'per_sequence': results,
}
print(f"\n{'='*50}")
print(f"{dataset_type.upper()} Results:")
print(f" AUC (overall): {overall_auc:.3f}")
print(f" AUC (mean seq): {mean_seq_auc:.3f}")
print(f" Precision (20px): {overall_precision:.3f}")
print(f" Norm. Precision: {overall_norm_prec:.3f}")
print(f" Sequences: {len(sequences)}")
print(f" Total frames: {len(all_ious)}")
print(f"{'='*50}")
# Save results to JSON
if save_results:
os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True)
with open(save_results, 'w') as f:
json.dump(summary, f, indent=2)
print(f"Results saved to {save_results}")
return summary
def evaluate_multiple(self, dataset_configs):
"""Evaluate on multiple benchmarks.
Args:
dataset_configs: list of (dataset_path, dataset_type) tuples
Returns:
dict of {dataset_type: results}
"""
all_results = {}
for dataset_path, dataset_type in dataset_configs:
results = self.evaluate_dataset(dataset_path, dataset_type)
all_results[dataset_type] = results
# Print comparison table
print(f"\n{'='*60}")
print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}")
print(f"{'-'*60}")
for dt, r in all_results.items():
print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} "
f"{r.get('precision_20px', 0):>8.3f} "
f"{r.get('normalized_precision', 0):>8.3f} "
f"{r['num_sequences']:>6}")
print(f"{'='*60}")
return all_results