Fix vil_tracker/evaluation/evaluate.py: audit corrections

0bd347a verified 9 days ago

16.4 kB

	"""
	Benchmark evaluator for tracking datasets.

	Supports:
	- LaSOT: Large-scale Single Object Tracking (280 test sequences)
	- UAV123: UAV tracking at 123 fps (123 sequences)
	- DTB70: Drone Tracking Benchmark (70 sequences)
	- VisDrone-SOT: Vision meets Drone SOT

	Metrics: AUC (Success), Precision, Normalized Precision

	Dataset structure:
	LaSOT (test):
	root/{category}/{seq_name}/img/XXXXXXXX.jpg
	root/{category}/{seq_name}/groundtruth.txt

	UAV123:
	root/data_seq/UAV123/{seq_name}/*.jpg
	root/anno/UAV123/{seq_name}.txt

	DTB70:
	root/{seq_name}/img/*.jpg
	root/{seq_name}/groundtruth_rect.txt

	VisDrone-SOT (test-dev):
	root/sequences/{seq_name}/*.jpg
	root/annotations/{seq_name}.txt
	"""

	import os
	import glob
	import json
	import numpy as np
	from pathlib import Path
	from collections import defaultdict


	def compute_iou(box_a, box_b):
	"""Compute IoU between two boxes in [x, y, w, h] format."""
	xa1, ya1 = box_a[0], box_a[1]
	xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
	xb1, yb1 = box_b[0], box_b[1]
	xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]

	inter_x1 = max(xa1, xb1)
	inter_y1 = max(ya1, yb1)
	inter_x2 = min(xa2, xb2)
	inter_y2 = min(ya2, yb2)

	inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
	area_a = box_a[2] * box_a[3]
	area_b = box_b[2] * box_b[3]
	union_area = area_a + area_b - inter_area

	return inter_area / max(union_area, 1e-6)


	def compute_center_distance(box_a, box_b):
	"""Compute center distance between two boxes in [x, y, w, h] format."""
	ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
	cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
	return np.linalg.norm(ca - cb)


	def compute_normalized_center_distance(box_a, box_b):
	"""Compute center distance normalized by GT size (for normalized precision)."""
	ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
	cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
	dist = np.linalg.norm(ca - cb)
	# Normalize by GT diagonal
	gt_diag = np.sqrt(box_b[2] 2 + box_b[3] 2) + 1e-6
	return dist / gt_diag


	def compute_success_curve(ious, thresholds=None):
	"""Compute success curve (fraction of frames with IoU > threshold)."""
	if thresholds is None:
	thresholds = np.arange(0, 1.05, 0.05)

	ious = np.array(ious)
	success = np.array([np.mean(ious >= t) for t in thresholds])
	return thresholds, success


	def compute_auc(ious):
	"""Compute AUC from IoU values (Area Under Success Curve)."""
	thresholds, success = compute_success_curve(ious)
	return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])


	def compute_precision(center_dists, threshold=20):
	"""Compute precision at given pixel threshold."""
	dists = np.array(center_dists)
	return np.mean(dists <= threshold)


	def compute_normalized_precision(norm_dists, threshold=0.5):
	"""Compute normalized precision."""
	dists = np.array(norm_dists)
	return np.mean(dists <= threshold)


	# ============================================================
	# Dataset loaders
	# ============================================================

	def load_annotations_txt(filepath):
	"""Load annotations from a text file with x,y,w,h per line."""
	boxes = []
	with open(filepath, 'r') as f:
	for line in f:
	line = line.strip()
	if not line:
	boxes.append(None)
	continue
	parts = line.replace(',', ' ').replace('\t', ' ').split()
	try:
	vals = [float(x) for x in parts[:4]]
	# Skip zero-area boxes
	if vals[2] <= 0 or vals[3] <= 0:
	boxes.append(None)
	else:
	boxes.append(vals)
	except (ValueError, IndexError):
	boxes.append(None)
	return boxes


	def load_lasot_test(root):
	"""Load LaSOT test sequences.

	Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt
	Test split: last 20% of sequences per category.
	"""
	root = Path(root)
	sequences = {}

	categories = sorted([d for d in root.iterdir() if d.is_dir()])
	for cat_dir in categories:
	seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()])
	# Test split: last 20%
	test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):]

	for seq_dir in test_seqs:
	gt_file = seq_dir / 'groundtruth.txt'
	img_dir = seq_dir / 'img'

	if not gt_file.exists() or not img_dir.exists():
	continue

	gt_boxes = load_annotations_txt(str(gt_file))
	frames = sorted(glob.glob(str(img_dir / '*.jpg')))

	if len(frames) >= 2 and len(gt_boxes) >= 2:
	min_len = min(len(frames), len(gt_boxes))
	seq_name = f"{cat_dir.name}/{seq_dir.name}"
	sequences[seq_name] = {
	'frames': frames[:min_len],
	'gt': gt_boxes[:min_len],
	}

	return sequences


	def load_uav123(root):
	"""Load UAV123 sequences.

	Structure:
	root/data_seq/UAV123/{seq_name}/*.jpg
	root/anno/UAV123/{seq_name}.txt
	"""
	root = Path(root)
	sequences = {}

	anno_dir = root / 'anno' / 'UAV123'
	frame_dir = root / 'data_seq' / 'UAV123'

	if not anno_dir.exists():
	# Alternative structure
	anno_dir = root / 'anno'
	frame_dir = root / 'data_seq'

	if not anno_dir.exists():
	print(f"Warning: UAV123 annotations not found at {anno_dir}")
	return sequences

	for anno_file in sorted(anno_dir.glob('*.txt')):
	seq_name = anno_file.stem
	seq_frame_dir = frame_dir / seq_name

	if not seq_frame_dir.exists():
	continue

	gt_boxes = load_annotations_txt(str(anno_file))
	frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg')))
	if not frames:
	frames = sorted(glob.glob(str(seq_frame_dir / '*.png')))

	if len(frames) >= 2 and len(gt_boxes) >= 2:
	min_len = min(len(frames), len(gt_boxes))
	sequences[seq_name] = {
	'frames': frames[:min_len],
	'gt': gt_boxes[:min_len],
	}

	return sequences


	def load_dtb70(root):
	"""Load DTB70 sequences.

	Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt
	"""
	root = Path(root)
	sequences = {}

	for seq_dir in sorted(root.iterdir()):
	if not seq_dir.is_dir():
	continue

	gt_file = seq_dir / 'groundtruth_rect.txt'
	if not gt_file.exists():
	gt_file = seq_dir / 'groundtruth.txt'
	if not gt_file.exists():
	continue

	img_dir = seq_dir / 'img'
	if not img_dir.exists():
	img_dir = seq_dir # frames directly in seq dir

	gt_boxes = load_annotations_txt(str(gt_file))
	frames = sorted(glob.glob(str(img_dir / '*.jpg')))
	if not frames:
	frames = sorted(glob.glob(str(img_dir / '*.png')))

	if len(frames) >= 2 and len(gt_boxes) >= 2:
	min_len = min(len(frames), len(gt_boxes))
	sequences[seq_dir.name] = {
	'frames': frames[:min_len],
	'gt': gt_boxes[:min_len],
	}

	return sequences


	def load_visdrone_sot(root):
	"""Load VisDrone-SOT sequences.

	Structure:
	root/sequences/{seq_name}/*.jpg
	root/annotations/{seq_name}.txt
	"""
	root = Path(root)
	sequences = {}

	anno_dir = root / 'annotations'
	seq_dir = root / 'sequences'

	if not anno_dir.exists() or not seq_dir.exists():
	print(f"Warning: VisDrone-SOT not found at {root}")
	return sequences

	for anno_file in sorted(anno_dir.glob('*.txt')):
	seq_name = anno_file.stem
	frames_dir = seq_dir / seq_name

	if not frames_dir.exists():
	continue

	gt_boxes = load_annotations_txt(str(anno_file))
	frames = sorted(glob.glob(str(frames_dir / '*.jpg')))

	if len(frames) >= 2 and len(gt_boxes) >= 2:
	min_len = min(len(frames), len(gt_boxes))
	sequences[seq_name] = {
	'frames': frames[:min_len],
	'gt': gt_boxes[:min_len],
	}

	return sequences


	# ============================================================
	# Evaluator
	# ============================================================

	DATASET_LOADERS = {
	'lasot': load_lasot_test,
	'uav123': load_uav123,
	'dtb70': load_dtb70,
	'visdrone': load_visdrone_sot,
	}


	class BenchmarkEvaluator:
	"""Evaluate tracker on standard benchmarks.

	Usage:
	from vil_tracker.inference.online_tracker import OnlineTracker
	from vil_tracker.evaluation.evaluate import BenchmarkEvaluator

	online_tracker = OnlineTracker(model, device='cuda')
	evaluator = BenchmarkEvaluator(online_tracker)
	results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot')
	print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}")
	"""

	def __init__(self, tracker, device='cuda'):
	self.tracker = tracker
	self.device = device

	def _load_image(self, path):
	"""Load image from path."""
	try:
	from PIL import Image
	img = Image.open(path).convert('RGB')
	return np.array(img)
	except ImportError:
	import cv2
	img = cv2.imread(path)
	if img is not None:
	return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return np.zeros((480, 640, 3), dtype=np.uint8)

	def evaluate_sequence(self, frames_paths, gt_boxes):
	"""Evaluate on a single sequence.

	Args:
	frames_paths: list of image file paths
	gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent)
	Returns:
	dict with per-frame IoUs, distances, and metrics
	"""
	# Load first frame and initialize
	first_frame = self._load_image(frames_paths[0])
	self.tracker.initialize(first_frame, gt_boxes[0])

	pred_boxes = [gt_boxes[0]] # First frame is given
	ious = [1.0]
	center_dists = [0.0]
	norm_dists = [0.0]

	for i in range(1, len(frames_paths)):
	frame = self._load_image(frames_paths[i])
	pred_box = self.tracker.track(frame)
	pred_boxes.append(pred_box)

	if gt_boxes[i] is not None:
	iou = compute_iou(pred_box, gt_boxes[i])
	cdist = compute_center_distance(pred_box, gt_boxes[i])
	ndist = compute_normalized_center_distance(pred_box, gt_boxes[i])
	ious.append(iou)
	center_dists.append(cdist)
	norm_dists.append(ndist)
	else:
	# Target absent — score 0 if tracker predicts, 1 if it doesn't
	ious.append(0.0)
	center_dists.append(float('inf'))
	norm_dists.append(float('inf'))

	auc = compute_auc(ious)
	precision = compute_precision(center_dists)
	norm_precision = compute_normalized_precision(norm_dists)

	return {
	'pred_boxes': pred_boxes,
	'ious': ious,
	'center_dists': center_dists,
	'auc': auc,
	'precision': precision,
	'norm_precision': norm_precision,
	'mean_iou': np.mean(ious),
	}

	def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None):
	"""Evaluate on a full dataset.

	Args:
	dataset_path: path to dataset root
	dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
	save_results: optional path to save JSON results
	Returns:
	dict with overall metrics and per-sequence results
	"""
	loader = DATASET_LOADERS.get(dataset_type)
	if loader is None:
	raise ValueError(f"Unknown dataset type: {dataset_type}. "
	f"Supported: {list(DATASET_LOADERS.keys())}")

	sequences = loader(dataset_path)

	if not sequences:
	print(f"Warning: No sequences loaded from {dataset_path}")
	return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0}

	print(f"Evaluating on {dataset_type}: {len(sequences)} sequences")

	results = {}
	all_ious = []
	all_center_dists = []
	all_norm_dists = []

	for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()):
	print(f" [{seq_idx+1}/{len(sequences)}] {seq_name} "
	f"({len(seq_data['frames'])} frames)...", end='', flush=True)

	seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt'])
	results[seq_name] = {
	'auc': seq_result['auc'],
	'precision': seq_result['precision'],
	'norm_precision': seq_result['norm_precision'],
	'mean_iou': seq_result['mean_iou'],
	'num_frames': len(seq_data['frames']),
	}
	all_ious.extend(seq_result['ious'])
	all_center_dists.extend(seq_result['center_dists'])
	all_norm_dists.extend(seq_result['norm_dists'])

	print(f" AUC={seq_result['auc']:.3f}")

	overall_auc = compute_auc(all_ious)
	per_seq_auc = {name: r['auc'] for name, r in results.items()}
	mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0

	overall_precision = compute_precision(all_center_dists)
	overall_norm_prec = compute_normalized_precision(all_norm_dists)

	summary = {
	'dataset': dataset_type,
	'overall_auc': float(overall_auc),
	'mean_seq_auc': float(mean_seq_auc),
	'precision_20px': float(overall_precision),
	'normalized_precision': float(overall_norm_prec),
	'num_sequences': len(sequences),
	'num_frames': len(all_ious),
	'per_sequence': results,
	}

	print(f"\n{'='*50}")
	print(f"{dataset_type.upper()} Results:")
	print(f" AUC (overall): {overall_auc:.3f}")
	print(f" AUC (mean seq): {mean_seq_auc:.3f}")
	print(f" Precision (20px): {overall_precision:.3f}")
	print(f" Norm. Precision: {overall_norm_prec:.3f}")
	print(f" Sequences: {len(sequences)}")
	print(f" Total frames: {len(all_ious)}")
	print(f"{'='*50}")

	# Save results to JSON
	if save_results:
	os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True)
	with open(save_results, 'w') as f:
	json.dump(summary, f, indent=2)
	print(f"Results saved to {save_results}")

	return summary

	def evaluate_multiple(self, dataset_configs):
	"""Evaluate on multiple benchmarks.

	Args:
	dataset_configs: list of (dataset_path, dataset_type) tuples
	Returns:
	dict of {dataset_type: results}
	"""
	all_results = {}
	for dataset_path, dataset_type in dataset_configs:
	results = self.evaluate_dataset(dataset_path, dataset_type)
	all_results[dataset_type] = results

	# Print comparison table
	print(f"\n{'='*60}")
	print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}")
	print(f"{'-'*60}")
	for dt, r in all_results.items():
	print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} "
	f"{r.get('precision_20px', 0):>8.3f} "
	f"{r.get('normalized_precision', 0):>8.3f} "
	f"{r['num_sequences']:>6}")
	print(f"{'='*60}")

	return all_results