omar-ah
/

vil-tracker

Model card Files Files and versions

xet

Community

omar-ah commited on 9 days ago

Commit

0bd347a

verified ·

1 Parent(s): 64c9f85

Fix vil_tracker/evaluation/evaluate.py: audit corrections

Browse files

Files changed (1) hide show

vil_tracker/evaluation/evaluate.py +372 -29

vil_tracker/evaluation/evaluate.py CHANGED Viewed

@@ -2,17 +2,36 @@
 Benchmark evaluator for tracking datasets.
 Supports:
-- LaSOT: Large-scale Single Object Tracking
-- UAV123: UAV tracking at 123 fps
-- DTB70: Drone Tracking Benchmark
 - VisDrone-SOT: Vision meets Drone SOT
 Metrics: AUC (Success), Precision, Normalized Precision
 """
 import os
 import json
 import numpy as np
 from collections import defaultdict
@@ -43,6 +62,16 @@ def compute_center_distance(box_a, box_b):
     return np.linalg.norm(ca - cb)
 def compute_success_curve(ious, thresholds=None):
     """Compute success curve (fraction of frames with IoU > threshold)."""
     if thresholds is None:
@@ -54,86 +83,400 @@ def compute_success_curve(ious, thresholds=None):
 def compute_auc(ious):
-    """Compute AUC from IoU values."""
     thresholds, success = compute_success_curve(ious)
     return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
 class BenchmarkEvaluator:
-    """Evaluate tracker on standard benchmarks."""
     def __init__(self, tracker, device='cuda'):
         self.tracker = tracker
         self.device = device
-    def evaluate_sequence(self, frames, gt_boxes):
         """Evaluate on a single sequence.
         Args:
-            frames: list of (H, W, 3) numpy arrays
-            gt_boxes: list of [x, y, w, h] ground truth boxes
         Returns:
-            dict with per-frame IoUs and metrics
         """
-        # Initialize with first frame
-        self.tracker.initialize(frames[0], gt_boxes[0])
         pred_boxes = [gt_boxes[0]]  # First frame is given
         ious = [1.0]
-        for i in range(1, len(frames)):
-            pred_box = self.tracker.track(frames[i])
             pred_boxes.append(pred_box)
-            if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
                 iou = compute_iou(pred_box, gt_boxes[i])
                 ious.append(iou)
             else:
                 ious.append(0.0)
         auc = compute_auc(ious)
         return {
             'pred_boxes': pred_boxes,
             'ious': ious,
             'auc': auc,
             'mean_iou': np.mean(ious),
         }
-    def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
         """Evaluate on a full dataset.
         Args:
             dataset_path: path to dataset root
             dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
         Returns:
             dict with overall metrics and per-sequence results
         """
-        sequences = self._load_dataset(dataset_path, dataset_type)
         results = {}
         all_ious = []
-        for seq_name, (frames, gt_boxes) in sequences.items():
-            print(f"Evaluating {seq_name}...")
-            seq_result = self.evaluate_sequence(frames, gt_boxes)
-            results[seq_name] = seq_result
             all_ious.extend(seq_result['ious'])
         overall_auc = compute_auc(all_ious)
         per_seq_auc = {name: r['auc'] for name, r in results.items()}
         mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
-        return {
-            'overall_auc': overall_auc,
-            'mean_seq_auc': mean_seq_auc,
-            'per_sequence': per_seq_auc,
             'num_sequences': len(sequences),
             'num_frames': len(all_ious),
         }
-    def _load_dataset(self, dataset_path, dataset_type):
-        """Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
-        # Placeholder - real implementation would load actual dataset files
-        print(f"Loading {dataset_type} from {dataset_path}")
-        return {}

 Benchmark evaluator for tracking datasets.
 Supports:
+- LaSOT: Large-scale Single Object Tracking (280 test sequences)
+- UAV123: UAV tracking at 123 fps (123 sequences)
+- DTB70: Drone Tracking Benchmark (70 sequences)
 - VisDrone-SOT: Vision meets Drone SOT
 Metrics: AUC (Success), Precision, Normalized Precision
+Dataset structure:
+    LaSOT (test):
+        root/{category}/{seq_name}/img/XXXXXXXX.jpg
+        root/{category}/{seq_name}/groundtruth.txt
+    UAV123:
+        root/data_seq/UAV123/{seq_name}/*.jpg
+        root/anno/UAV123/{seq_name}.txt
+    DTB70:
+        root/{seq_name}/img/*.jpg
+        root/{seq_name}/groundtruth_rect.txt
+    VisDrone-SOT (test-dev):
+        root/sequences/{seq_name}/*.jpg
+        root/annotations/{seq_name}.txt
 """
 import os
+import glob
 import json
 import numpy as np
+from pathlib import Path
 from collections import defaultdict
     return np.linalg.norm(ca - cb)
+def compute_normalized_center_distance(box_a, box_b):
+    """Compute center distance normalized by GT size (for normalized precision)."""
+    ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
+    cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
+    dist = np.linalg.norm(ca - cb)
+    # Normalize by GT diagonal
+    gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6
+    return dist / gt_diag
 def compute_success_curve(ious, thresholds=None):
     """Compute success curve (fraction of frames with IoU > threshold)."""
     if thresholds is None:
 def compute_auc(ious):
+    """Compute AUC from IoU values (Area Under Success Curve)."""
     thresholds, success = compute_success_curve(ious)
     return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
+def compute_precision(center_dists, threshold=20):
+    """Compute precision at given pixel threshold."""
+    dists = np.array(center_dists)
+    return np.mean(dists <= threshold)
+def compute_normalized_precision(norm_dists, threshold=0.5):
+    """Compute normalized precision."""
+    dists = np.array(norm_dists)
+    return np.mean(dists <= threshold)
+# ============================================================
+# Dataset loaders
+# ============================================================
+def load_annotations_txt(filepath):
+    """Load annotations from a text file with x,y,w,h per line."""
+    boxes = []
+    with open(filepath, 'r') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                boxes.append(None)
+                continue
+            parts = line.replace(',', ' ').replace('\t', ' ').split()
+            try:
+                vals = [float(x) for x in parts[:4]]
+                # Skip zero-area boxes
+                if vals[2] <= 0 or vals[3] <= 0:
+                    boxes.append(None)
+                else:
+                    boxes.append(vals)
+            except (ValueError, IndexError):
+                boxes.append(None)
+    return boxes
+def load_lasot_test(root):
+    """Load LaSOT test sequences.
+    Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt
+    Test split: last 20% of sequences per category.
+    """
+    root = Path(root)
+    sequences = {}
+    categories = sorted([d for d in root.iterdir() if d.is_dir()])
+    for cat_dir in categories:
+        seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()])
+        # Test split: last 20%
+        test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):]
+        for seq_dir in test_seqs:
+            gt_file = seq_dir / 'groundtruth.txt'
+            img_dir = seq_dir / 'img'
+            if not gt_file.exists() or not img_dir.exists():
+                continue
+            gt_boxes = load_annotations_txt(str(gt_file))
+            frames = sorted(glob.glob(str(img_dir / '*.jpg')))
+            if len(frames) >= 2 and len(gt_boxes) >= 2:
+                min_len = min(len(frames), len(gt_boxes))
+                seq_name = f"{cat_dir.name}/{seq_dir.name}"
+                sequences[seq_name] = {
+                    'frames': frames[:min_len],
+                    'gt': gt_boxes[:min_len],
+                }
+    return sequences
+def load_uav123(root):
+    """Load UAV123 sequences.
+    Structure:
+        root/data_seq/UAV123/{seq_name}/*.jpg
+        root/anno/UAV123/{seq_name}.txt
+    """
+    root = Path(root)
+    sequences = {}
+    anno_dir = root / 'anno' / 'UAV123'
+    frame_dir = root / 'data_seq' / 'UAV123'
+    if not anno_dir.exists():
+        # Alternative structure
+        anno_dir = root / 'anno'
+        frame_dir = root / 'data_seq'
+    if not anno_dir.exists():
+        print(f"Warning: UAV123 annotations not found at {anno_dir}")
+        return sequences
+    for anno_file in sorted(anno_dir.glob('*.txt')):
+        seq_name = anno_file.stem
+        seq_frame_dir = frame_dir / seq_name
+        if not seq_frame_dir.exists():
+            continue
+        gt_boxes = load_annotations_txt(str(anno_file))
+        frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg')))
+        if not frames:
+            frames = sorted(glob.glob(str(seq_frame_dir / '*.png')))
+        if len(frames) >= 2 and len(gt_boxes) >= 2:
+            min_len = min(len(frames), len(gt_boxes))
+            sequences[seq_name] = {
+                'frames': frames[:min_len],
+                'gt': gt_boxes[:min_len],
+            }
+    return sequences
+def load_dtb70(root):
+    """Load DTB70 sequences.
+    Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt
+    """
+    root = Path(root)
+    sequences = {}
+    for seq_dir in sorted(root.iterdir()):
+        if not seq_dir.is_dir():
+            continue
+        gt_file = seq_dir / 'groundtruth_rect.txt'
+        if not gt_file.exists():
+            gt_file = seq_dir / 'groundtruth.txt'
+        if not gt_file.exists():
+            continue
+        img_dir = seq_dir / 'img'
+        if not img_dir.exists():
+            img_dir = seq_dir  # frames directly in seq dir
+        gt_boxes = load_annotations_txt(str(gt_file))
+        frames = sorted(glob.glob(str(img_dir / '*.jpg')))
+        if not frames:
+            frames = sorted(glob.glob(str(img_dir / '*.png')))
+        if len(frames) >= 2 and len(gt_boxes) >= 2:
+            min_len = min(len(frames), len(gt_boxes))
+            sequences[seq_dir.name] = {
+                'frames': frames[:min_len],
+                'gt': gt_boxes[:min_len],
+            }
+    return sequences
+def load_visdrone_sot(root):
+    """Load VisDrone-SOT sequences.
+    Structure:
+        root/sequences/{seq_name}/*.jpg
+        root/annotations/{seq_name}.txt
+    """
+    root = Path(root)
+    sequences = {}
+    anno_dir = root / 'annotations'
+    seq_dir = root / 'sequences'
+    if not anno_dir.exists() or not seq_dir.exists():
+        print(f"Warning: VisDrone-SOT not found at {root}")
+        return sequences
+    for anno_file in sorted(anno_dir.glob('*.txt')):
+        seq_name = anno_file.stem
+        frames_dir = seq_dir / seq_name
+        if not frames_dir.exists():
+            continue
+        gt_boxes = load_annotations_txt(str(anno_file))
+        frames = sorted(glob.glob(str(frames_dir / '*.jpg')))
+        if len(frames) >= 2 and len(gt_boxes) >= 2:
+            min_len = min(len(frames), len(gt_boxes))
+            sequences[seq_name] = {
+                'frames': frames[:min_len],
+                'gt': gt_boxes[:min_len],
+            }
+    return sequences
+# ============================================================
+# Evaluator
+# ============================================================
+DATASET_LOADERS = {
+    'lasot': load_lasot_test,
+    'uav123': load_uav123,
+    'dtb70': load_dtb70,
+    'visdrone': load_visdrone_sot,
+}
 class BenchmarkEvaluator:
+    """Evaluate tracker on standard benchmarks.
+    Usage:
+        from vil_tracker.inference.online_tracker import OnlineTracker
+        from vil_tracker.evaluation.evaluate import BenchmarkEvaluator
+        online_tracker = OnlineTracker(model, device='cuda')
+        evaluator = BenchmarkEvaluator(online_tracker)
+        results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot')
+        print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}")
+    """
     def __init__(self, tracker, device='cuda'):
         self.tracker = tracker
         self.device = device
+    def _load_image(self, path):
+        """Load image from path."""
+        try:
+            from PIL import Image
+            img = Image.open(path).convert('RGB')
+            return np.array(img)
+        except ImportError:
+            import cv2
+            img = cv2.imread(path)
+            if img is not None:
+                return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            return np.zeros((480, 640, 3), dtype=np.uint8)
+    def evaluate_sequence(self, frames_paths, gt_boxes):
         """Evaluate on a single sequence.
         Args:
+            frames_paths: list of image file paths
+            gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent)
         Returns:
+            dict with per-frame IoUs, distances, and metrics
         """
+        # Load first frame and initialize
+        first_frame = self._load_image(frames_paths[0])
+        self.tracker.initialize(first_frame, gt_boxes[0])
         pred_boxes = [gt_boxes[0]]  # First frame is given
         ious = [1.0]
+        center_dists = [0.0]
+        norm_dists = [0.0]
+        for i in range(1, len(frames_paths)):
+            frame = self._load_image(frames_paths[i])
+            pred_box = self.tracker.track(frame)
             pred_boxes.append(pred_box)
+            if gt_boxes[i] is not None:
                 iou = compute_iou(pred_box, gt_boxes[i])
+                cdist = compute_center_distance(pred_box, gt_boxes[i])
+                ndist = compute_normalized_center_distance(pred_box, gt_boxes[i])
                 ious.append(iou)
+                center_dists.append(cdist)
+                norm_dists.append(ndist)
             else:
+                # Target absent — score 0 if tracker predicts, 1 if it doesn't
                 ious.append(0.0)
+                center_dists.append(float('inf'))
+                norm_dists.append(float('inf'))
         auc = compute_auc(ious)
+        precision = compute_precision(center_dists)
+        norm_precision = compute_normalized_precision(norm_dists)
         return {
             'pred_boxes': pred_boxes,
             'ious': ious,
+            'center_dists': center_dists,
             'auc': auc,
+            'precision': precision,
+            'norm_precision': norm_precision,
             'mean_iou': np.mean(ious),
         }
+    def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None):
         """Evaluate on a full dataset.
         Args:
             dataset_path: path to dataset root
             dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
+            save_results: optional path to save JSON results
         Returns:
             dict with overall metrics and per-sequence results
         """
+        loader = DATASET_LOADERS.get(dataset_type)
+        if loader is None:
+            raise ValueError(f"Unknown dataset type: {dataset_type}. "
+                             f"Supported: {list(DATASET_LOADERS.keys())}")
+        sequences = loader(dataset_path)
+        if not sequences:
+            print(f"Warning: No sequences loaded from {dataset_path}")
+            return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0}
+        print(f"Evaluating on {dataset_type}: {len(sequences)} sequences")
         results = {}
         all_ious = []
+        all_center_dists = []
+        all_norm_dists = []
+        for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()):
+            print(f"  [{seq_idx+1}/{len(sequences)}] {seq_name} "
+                  f"({len(seq_data['frames'])} frames)...", end='', flush=True)
+            seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt'])
+            results[seq_name] = {
+                'auc': seq_result['auc'],
+                'precision': seq_result['precision'],
+                'norm_precision': seq_result['norm_precision'],
+                'mean_iou': seq_result['mean_iou'],
+                'num_frames': len(seq_data['frames']),
+            }
             all_ious.extend(seq_result['ious'])
+            all_center_dists.extend(seq_result['center_dists'])
+            all_norm_dists.extend(seq_result['norm_dists'])
+            print(f" AUC={seq_result['auc']:.3f}")
         overall_auc = compute_auc(all_ious)
         per_seq_auc = {name: r['auc'] for name, r in results.items()}
         mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
+        overall_precision = compute_precision(all_center_dists)
+        overall_norm_prec = compute_normalized_precision(all_norm_dists)
+        summary = {
+            'dataset': dataset_type,
+            'overall_auc': float(overall_auc),
+            'mean_seq_auc': float(mean_seq_auc),
+            'precision_20px': float(overall_precision),
+            'normalized_precision': float(overall_norm_prec),
             'num_sequences': len(sequences),
             'num_frames': len(all_ious),
+            'per_sequence': results,
         }
+        print(f"\n{'='*50}")
+        print(f"{dataset_type.upper()} Results:")
+        print(f"  AUC (overall):       {overall_auc:.3f}")
+        print(f"  AUC (mean seq):      {mean_seq_auc:.3f}")
+        print(f"  Precision (20px):    {overall_precision:.3f}")
+        print(f"  Norm. Precision:     {overall_norm_prec:.3f}")
+        print(f"  Sequences:           {len(sequences)}")
+        print(f"  Total frames:        {len(all_ious)}")
+        print(f"{'='*50}")
+        # Save results to JSON
+        if save_results:
+            os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True)
+            with open(save_results, 'w') as f:
+                json.dump(summary, f, indent=2)
+            print(f"Results saved to {save_results}")
+        return summary
+    def evaluate_multiple(self, dataset_configs):
+        """Evaluate on multiple benchmarks.
+        Args:
+            dataset_configs: list of (dataset_path, dataset_type) tuples
+        Returns:
+            dict of {dataset_type: results}
+        """
+        all_results = {}
+        for dataset_path, dataset_type in dataset_configs:
+            results = self.evaluate_dataset(dataset_path, dataset_type)
+            all_results[dataset_type] = results
+        # Print comparison table
+        print(f"\n{'='*60}")
+        print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}")
+        print(f"{'-'*60}")
+        for dt, r in all_results.items():
+            print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} "
+                  f"{r.get('precision_20px', 0):>8.3f} "
+                  f"{r.get('normalized_precision', 0):>8.3f} "
+                  f"{r['num_sequences']:>6}")
+        print(f"{'='*60}")
+        return all_results