File size: 16,368 Bytes

"""
Benchmark evaluator for tracking datasets.

Supports:
- LaSOT: Large-scale Single Object Tracking (280 test sequences)
- UAV123: UAV tracking at 123 fps (123 sequences)
- DTB70: Drone Tracking Benchmark (70 sequences)
- VisDrone-SOT: Vision meets Drone SOT

Metrics: AUC (Success), Precision, Normalized Precision

Dataset structure:
    LaSOT (test):
        root/{category}/{seq_name}/img/XXXXXXXX.jpg
        root/{category}/{seq_name}/groundtruth.txt

    UAV123:
        root/data_seq/UAV123/{seq_name}/*.jpg
        root/anno/UAV123/{seq_name}.txt

    DTB70:
        root/{seq_name}/img/*.jpg
        root/{seq_name}/groundtruth_rect.txt

    VisDrone-SOT (test-dev):
        root/sequences/{seq_name}/*.jpg
        root/annotations/{seq_name}.txt
"""

import os
import glob
import json
import numpy as np
from pathlib import Path
from collections import defaultdict


def compute_iou(box_a, box_b):
    """Compute IoU between two boxes in [x, y, w, h] format."""
    xa1, ya1 = box_a[0], box_a[1]
    xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
    xb1, yb1 = box_b[0], box_b[1]
    xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]
    
    inter_x1 = max(xa1, xb1)
    inter_y1 = max(ya1, yb1)
    inter_x2 = min(xa2, xb2)
    inter_y2 = min(ya2, yb2)
    
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    area_a = box_a[2] * box_a[3]
    area_b = box_b[2] * box_b[3]
    union_area = area_a + area_b - inter_area
    
    return inter_area / max(union_area, 1e-6)


def compute_center_distance(box_a, box_b):
    """Compute center distance between two boxes in [x, y, w, h] format."""
    ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
    cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
    return np.linalg.norm(ca - cb)


def compute_normalized_center_distance(box_a, box_b):
    """Compute center distance normalized by GT size (for normalized precision)."""
    ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
    cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
    dist = np.linalg.norm(ca - cb)
    # Normalize by GT diagonal
    gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6
    return dist / gt_diag


def compute_success_curve(ious, thresholds=None):
    """Compute success curve (fraction of frames with IoU > threshold)."""
    if thresholds is None:
        thresholds = np.arange(0, 1.05, 0.05)
    
    ious = np.array(ious)
    success = np.array([np.mean(ious >= t) for t in thresholds])
    return thresholds, success


def compute_auc(ious):
    """Compute AUC from IoU values (Area Under Success Curve)."""
    thresholds, success = compute_success_curve(ious)
    return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])


def compute_precision(center_dists, threshold=20):
    """Compute precision at given pixel threshold."""
    dists = np.array(center_dists)
    return np.mean(dists <= threshold)


def compute_normalized_precision(norm_dists, threshold=0.5):
    """Compute normalized precision."""
    dists = np.array(norm_dists)
    return np.mean(dists <= threshold)


# ============================================================
# Dataset loaders
# ============================================================

def load_annotations_txt(filepath):
    """Load annotations from a text file with x,y,w,h per line."""
    boxes = []
    with open(filepath, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                boxes.append(None)
                continue
            parts = line.replace(',', ' ').replace('\t', ' ').split()
            try:
                vals = [float(x) for x in parts[:4]]
                # Skip zero-area boxes
                if vals[2] <= 0 or vals[3] <= 0:
                    boxes.append(None)
                else:
                    boxes.append(vals)
            except (ValueError, IndexError):
                boxes.append(None)
    return boxes


def load_lasot_test(root):
    """Load LaSOT test sequences.
    
    Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt
    Test split: last 20% of sequences per category.
    """
    root = Path(root)
    sequences = {}
    
    categories = sorted([d for d in root.iterdir() if d.is_dir()])
    for cat_dir in categories:
        seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()])
        # Test split: last 20%
        test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):]
        
        for seq_dir in test_seqs:
            gt_file = seq_dir / 'groundtruth.txt'
            img_dir = seq_dir / 'img'
            
            if not gt_file.exists() or not img_dir.exists():
                continue
            
            gt_boxes = load_annotations_txt(str(gt_file))
            frames = sorted(glob.glob(str(img_dir / '*.jpg')))
            
            if len(frames) >= 2 and len(gt_boxes) >= 2:
                min_len = min(len(frames), len(gt_boxes))
                seq_name = f"{cat_dir.name}/{seq_dir.name}"
                sequences[seq_name] = {
                    'frames': frames[:min_len],
                    'gt': gt_boxes[:min_len],
                }
    
    return sequences


def load_uav123(root):
    """Load UAV123 sequences.
    
    Structure: 
        root/data_seq/UAV123/{seq_name}/*.jpg
        root/anno/UAV123/{seq_name}.txt
    """
    root = Path(root)
    sequences = {}
    
    anno_dir = root / 'anno' / 'UAV123'
    frame_dir = root / 'data_seq' / 'UAV123'
    
    if not anno_dir.exists():
        # Alternative structure
        anno_dir = root / 'anno'
        frame_dir = root / 'data_seq'
    
    if not anno_dir.exists():
        print(f"Warning: UAV123 annotations not found at {anno_dir}")
        return sequences
    
    for anno_file in sorted(anno_dir.glob('*.txt')):
        seq_name = anno_file.stem
        seq_frame_dir = frame_dir / seq_name
        
        if not seq_frame_dir.exists():
            continue
        
        gt_boxes = load_annotations_txt(str(anno_file))
        frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg')))
        if not frames:
            frames = sorted(glob.glob(str(seq_frame_dir / '*.png')))
        
        if len(frames) >= 2 and len(gt_boxes) >= 2:
            min_len = min(len(frames), len(gt_boxes))
            sequences[seq_name] = {
                'frames': frames[:min_len],
                'gt': gt_boxes[:min_len],
            }
    
    return sequences


def load_dtb70(root):
    """Load DTB70 sequences.
    
    Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt
    """
    root = Path(root)
    sequences = {}
    
    for seq_dir in sorted(root.iterdir()):
        if not seq_dir.is_dir():
            continue
        
        gt_file = seq_dir / 'groundtruth_rect.txt'
        if not gt_file.exists():
            gt_file = seq_dir / 'groundtruth.txt'
        if not gt_file.exists():
            continue
        
        img_dir = seq_dir / 'img'
        if not img_dir.exists():
            img_dir = seq_dir  # frames directly in seq dir
        
        gt_boxes = load_annotations_txt(str(gt_file))
        frames = sorted(glob.glob(str(img_dir / '*.jpg')))
        if not frames:
            frames = sorted(glob.glob(str(img_dir / '*.png')))
        
        if len(frames) >= 2 and len(gt_boxes) >= 2:
            min_len = min(len(frames), len(gt_boxes))
            sequences[seq_dir.name] = {
                'frames': frames[:min_len],
                'gt': gt_boxes[:min_len],
            }
    
    return sequences


def load_visdrone_sot(root):
    """Load VisDrone-SOT sequences.
    
    Structure:
        root/sequences/{seq_name}/*.jpg
        root/annotations/{seq_name}.txt
    """
    root = Path(root)
    sequences = {}
    
    anno_dir = root / 'annotations'
    seq_dir = root / 'sequences'
    
    if not anno_dir.exists() or not seq_dir.exists():
        print(f"Warning: VisDrone-SOT not found at {root}")
        return sequences
    
    for anno_file in sorted(anno_dir.glob('*.txt')):
        seq_name = anno_file.stem
        frames_dir = seq_dir / seq_name
        
        if not frames_dir.exists():
            continue
        
        gt_boxes = load_annotations_txt(str(anno_file))
        frames = sorted(glob.glob(str(frames_dir / '*.jpg')))
        
        if len(frames) >= 2 and len(gt_boxes) >= 2:
            min_len = min(len(frames), len(gt_boxes))
            sequences[seq_name] = {
                'frames': frames[:min_len],
                'gt': gt_boxes[:min_len],
            }
    
    return sequences


# ============================================================
# Evaluator
# ============================================================

DATASET_LOADERS = {
    'lasot': load_lasot_test,
    'uav123': load_uav123,
    'dtb70': load_dtb70,
    'visdrone': load_visdrone_sot,
}


class BenchmarkEvaluator:
    """Evaluate tracker on standard benchmarks.
    
    Usage:
        from vil_tracker.inference.online_tracker import OnlineTracker
        from vil_tracker.evaluation.evaluate import BenchmarkEvaluator
        
        online_tracker = OnlineTracker(model, device='cuda')
        evaluator = BenchmarkEvaluator(online_tracker)
        results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot')
        print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}")
    """
    
    def __init__(self, tracker, device='cuda'):
        self.tracker = tracker
        self.device = device
    
    def _load_image(self, path):
        """Load image from path."""
        try:
            from PIL import Image
            img = Image.open(path).convert('RGB')
            return np.array(img)
        except ImportError:
            import cv2
            img = cv2.imread(path)
            if img is not None:
                return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            return np.zeros((480, 640, 3), dtype=np.uint8)
    
    def evaluate_sequence(self, frames_paths, gt_boxes):
        """Evaluate on a single sequence.
        
        Args:
            frames_paths: list of image file paths
            gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent)
        Returns:
            dict with per-frame IoUs, distances, and metrics
        """
        # Load first frame and initialize
        first_frame = self._load_image(frames_paths[0])
        self.tracker.initialize(first_frame, gt_boxes[0])
        
        pred_boxes = [gt_boxes[0]]  # First frame is given
        ious = [1.0]
        center_dists = [0.0]
        norm_dists = [0.0]
        
        for i in range(1, len(frames_paths)):
            frame = self._load_image(frames_paths[i])
            pred_box = self.tracker.track(frame)
            pred_boxes.append(pred_box)
            
            if gt_boxes[i] is not None:
                iou = compute_iou(pred_box, gt_boxes[i])
                cdist = compute_center_distance(pred_box, gt_boxes[i])
                ndist = compute_normalized_center_distance(pred_box, gt_boxes[i])
                ious.append(iou)
                center_dists.append(cdist)
                norm_dists.append(ndist)
            else:
                # Target absent — score 0 if tracker predicts, 1 if it doesn't
                ious.append(0.0)
                center_dists.append(float('inf'))
                norm_dists.append(float('inf'))
        
        auc = compute_auc(ious)
        precision = compute_precision(center_dists)
        norm_precision = compute_normalized_precision(norm_dists)
        
        return {
            'pred_boxes': pred_boxes,
            'ious': ious,
            'center_dists': center_dists,
            'auc': auc,
            'precision': precision,
            'norm_precision': norm_precision,
            'mean_iou': np.mean(ious),
        }
    
    def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None):
        """Evaluate on a full dataset.
        
        Args:
            dataset_path: path to dataset root
            dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
            save_results: optional path to save JSON results
        Returns:
            dict with overall metrics and per-sequence results
        """
        loader = DATASET_LOADERS.get(dataset_type)
        if loader is None:
            raise ValueError(f"Unknown dataset type: {dataset_type}. "
                             f"Supported: {list(DATASET_LOADERS.keys())}")
        
        sequences = loader(dataset_path)
        
        if not sequences:
            print(f"Warning: No sequences loaded from {dataset_path}")
            return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0}
        
        print(f"Evaluating on {dataset_type}: {len(sequences)} sequences")
        
        results = {}
        all_ious = []
        all_center_dists = []
        all_norm_dists = []
        
        for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()):
            print(f"  [{seq_idx+1}/{len(sequences)}] {seq_name} "
                  f"({len(seq_data['frames'])} frames)...", end='', flush=True)
            
            seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt'])
            results[seq_name] = {
                'auc': seq_result['auc'],
                'precision': seq_result['precision'],
                'norm_precision': seq_result['norm_precision'],
                'mean_iou': seq_result['mean_iou'],
                'num_frames': len(seq_data['frames']),
            }
            all_ious.extend(seq_result['ious'])
            all_center_dists.extend(seq_result['center_dists'])
            all_norm_dists.extend(seq_result['norm_dists'])
            
            print(f" AUC={seq_result['auc']:.3f}")
        
        overall_auc = compute_auc(all_ious)
        per_seq_auc = {name: r['auc'] for name, r in results.items()}
        mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
        
        overall_precision = compute_precision(all_center_dists)
        overall_norm_prec = compute_normalized_precision(all_norm_dists)
        
        summary = {
            'dataset': dataset_type,
            'overall_auc': float(overall_auc),
            'mean_seq_auc': float(mean_seq_auc),
            'precision_20px': float(overall_precision),
            'normalized_precision': float(overall_norm_prec),
            'num_sequences': len(sequences),
            'num_frames': len(all_ious),
            'per_sequence': results,
        }
        
        print(f"\n{'='*50}")
        print(f"{dataset_type.upper()} Results:")
        print(f"  AUC (overall):       {overall_auc:.3f}")
        print(f"  AUC (mean seq):      {mean_seq_auc:.3f}")
        print(f"  Precision (20px):    {overall_precision:.3f}")
        print(f"  Norm. Precision:     {overall_norm_prec:.3f}")
        print(f"  Sequences:           {len(sequences)}")
        print(f"  Total frames:        {len(all_ious)}")
        print(f"{'='*50}")
        
        # Save results to JSON
        if save_results:
            os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True)
            with open(save_results, 'w') as f:
                json.dump(summary, f, indent=2)
            print(f"Results saved to {save_results}")
        
        return summary
    
    def evaluate_multiple(self, dataset_configs):
        """Evaluate on multiple benchmarks.
        
        Args:
            dataset_configs: list of (dataset_path, dataset_type) tuples
        Returns:
            dict of {dataset_type: results}
        """
        all_results = {}
        for dataset_path, dataset_type in dataset_configs:
            results = self.evaluate_dataset(dataset_path, dataset_type)
            all_results[dataset_type] = results
        
        # Print comparison table
        print(f"\n{'='*60}")
        print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}")
        print(f"{'-'*60}")
        for dt, r in all_results.items():
            print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} "
                  f"{r.get('precision_20px', 0):>8.3f} "
                  f"{r.get('normalized_precision', 0):>8.3f} "
                  f"{r['num_sequences']:>6}")
        print(f"{'='*60}")
        
        return all_results