File size: 4,548 Bytes
4e8b763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
Benchmark evaluator for tracking datasets.

Supports:
- LaSOT: Large-scale Single Object Tracking
- UAV123: UAV tracking at 123 fps  
- DTB70: Drone Tracking Benchmark
- VisDrone-SOT: Vision meets Drone SOT

Metrics: AUC (Success), Precision, Normalized Precision
"""

import os
import json
import numpy as np
from collections import defaultdict


def compute_iou(box_a, box_b):
    """Compute IoU between two boxes in [x, y, w, h] format."""
    xa1, ya1 = box_a[0], box_a[1]
    xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
    xb1, yb1 = box_b[0], box_b[1]
    xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]
    
    inter_x1 = max(xa1, xb1)
    inter_y1 = max(ya1, yb1)
    inter_x2 = min(xa2, xb2)
    inter_y2 = min(ya2, yb2)
    
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    area_a = box_a[2] * box_a[3]
    area_b = box_b[2] * box_b[3]
    union_area = area_a + area_b - inter_area
    
    return inter_area / max(union_area, 1e-6)


def compute_center_distance(box_a, box_b):
    """Compute center distance between two boxes in [x, y, w, h] format."""
    ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
    cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
    return np.linalg.norm(ca - cb)


def compute_success_curve(ious, thresholds=None):
    """Compute success curve (fraction of frames with IoU > threshold)."""
    if thresholds is None:
        thresholds = np.arange(0, 1.05, 0.05)
    
    ious = np.array(ious)
    success = np.array([np.mean(ious >= t) for t in thresholds])
    return thresholds, success


def compute_auc(ious):
    """Compute AUC from IoU values."""
    thresholds, success = compute_success_curve(ious)
    return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])


class BenchmarkEvaluator:
    """Evaluate tracker on standard benchmarks."""
    
    def __init__(self, tracker, device='cuda'):
        self.tracker = tracker
        self.device = device
    
    def evaluate_sequence(self, frames, gt_boxes):
        """Evaluate on a single sequence.
        
        Args:
            frames: list of (H, W, 3) numpy arrays
            gt_boxes: list of [x, y, w, h] ground truth boxes
        Returns:
            dict with per-frame IoUs and metrics
        """
        # Initialize with first frame
        self.tracker.initialize(frames[0], gt_boxes[0])
        
        pred_boxes = [gt_boxes[0]]  # First frame is given
        ious = [1.0]
        
        for i in range(1, len(frames)):
            pred_box = self.tracker.track(frames[i])
            pred_boxes.append(pred_box)
            
            if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
                iou = compute_iou(pred_box, gt_boxes[i])
                ious.append(iou)
            else:
                ious.append(0.0)
        
        auc = compute_auc(ious)
        
        return {
            'pred_boxes': pred_boxes,
            'ious': ious,
            'auc': auc,
            'mean_iou': np.mean(ious),
        }
    
    def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
        """Evaluate on a full dataset.
        
        Args:
            dataset_path: path to dataset root
            dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
        Returns:
            dict with overall metrics and per-sequence results
        """
        sequences = self._load_dataset(dataset_path, dataset_type)
        
        results = {}
        all_ious = []
        
        for seq_name, (frames, gt_boxes) in sequences.items():
            print(f"Evaluating {seq_name}...")
            seq_result = self.evaluate_sequence(frames, gt_boxes)
            results[seq_name] = seq_result
            all_ious.extend(seq_result['ious'])
        
        overall_auc = compute_auc(all_ious)
        per_seq_auc = {name: r['auc'] for name, r in results.items()}
        mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
        
        return {
            'overall_auc': overall_auc,
            'mean_seq_auc': mean_seq_auc,
            'per_sequence': per_seq_auc,
            'num_sequences': len(sequences),
            'num_frames': len(all_ious),
        }
    
    def _load_dataset(self, dataset_path, dataset_type):
        """Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
        # Placeholder - real implementation would load actual dataset files
        print(f"Loading {dataset_type} from {dataset_path}")
        return {}