Upload vil_tracker/evaluation/evaluate.py with huggingface_hub

4e8b763 verified 13 days ago

4.55 kB

	"""
	Benchmark evaluator for tracking datasets.

	Supports:
	- LaSOT: Large-scale Single Object Tracking
	- UAV123: UAV tracking at 123 fps
	- DTB70: Drone Tracking Benchmark
	- VisDrone-SOT: Vision meets Drone SOT

	Metrics: AUC (Success), Precision, Normalized Precision
	"""

	import os
	import json
	import numpy as np
	from collections import defaultdict


	def compute_iou(box_a, box_b):
	"""Compute IoU between two boxes in [x, y, w, h] format."""
	xa1, ya1 = box_a[0], box_a[1]
	xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
	xb1, yb1 = box_b[0], box_b[1]
	xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]

	inter_x1 = max(xa1, xb1)
	inter_y1 = max(ya1, yb1)
	inter_x2 = min(xa2, xb2)
	inter_y2 = min(ya2, yb2)

	inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
	area_a = box_a[2] * box_a[3]
	area_b = box_b[2] * box_b[3]
	union_area = area_a + area_b - inter_area

	return inter_area / max(union_area, 1e-6)


	def compute_center_distance(box_a, box_b):
	"""Compute center distance between two boxes in [x, y, w, h] format."""
	ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
	cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
	return np.linalg.norm(ca - cb)


	def compute_success_curve(ious, thresholds=None):
	"""Compute success curve (fraction of frames with IoU > threshold)."""
	if thresholds is None:
	thresholds = np.arange(0, 1.05, 0.05)

	ious = np.array(ious)
	success = np.array([np.mean(ious >= t) for t in thresholds])
	return thresholds, success


	def compute_auc(ious):
	"""Compute AUC from IoU values."""
	thresholds, success = compute_success_curve(ious)
	return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])


	class BenchmarkEvaluator:
	"""Evaluate tracker on standard benchmarks."""

	def __init__(self, tracker, device='cuda'):
	self.tracker = tracker
	self.device = device

	def evaluate_sequence(self, frames, gt_boxes):
	"""Evaluate on a single sequence.

	Args:
	frames: list of (H, W, 3) numpy arrays
	gt_boxes: list of [x, y, w, h] ground truth boxes
	Returns:
	dict with per-frame IoUs and metrics
	"""
	# Initialize with first frame
	self.tracker.initialize(frames[0], gt_boxes[0])

	pred_boxes = [gt_boxes[0]] # First frame is given
	ious = [1.0]

	for i in range(1, len(frames)):
	pred_box = self.tracker.track(frames[i])
	pred_boxes.append(pred_box)

	if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
	iou = compute_iou(pred_box, gt_boxes[i])
	ious.append(iou)
	else:
	ious.append(0.0)

	auc = compute_auc(ious)

	return {
	'pred_boxes': pred_boxes,
	'ious': ious,
	'auc': auc,
	'mean_iou': np.mean(ious),
	}

	def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
	"""Evaluate on a full dataset.

	Args:
	dataset_path: path to dataset root
	dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
	Returns:
	dict with overall metrics and per-sequence results
	"""
	sequences = self._load_dataset(dataset_path, dataset_type)

	results = {}
	all_ious = []

	for seq_name, (frames, gt_boxes) in sequences.items():
	print(f"Evaluating {seq_name}...")
	seq_result = self.evaluate_sequence(frames, gt_boxes)
	results[seq_name] = seq_result
	all_ious.extend(seq_result['ious'])

	overall_auc = compute_auc(all_ious)
	per_seq_auc = {name: r['auc'] for name, r in results.items()}
	mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0

	return {
	'overall_auc': overall_auc,
	'mean_seq_auc': mean_seq_auc,
	'per_sequence': per_seq_auc,
	'num_sequences': len(sequences),
	'num_frames': len(all_ious),
	}

	def _load_dataset(self, dataset_path, dataset_type):
	"""Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
	# Placeholder - real implementation would load actual dataset files
	print(f"Loading {dataset_type} from {dataset_path}")
	return {}