omar-ah commited on
Commit
0bd347a
·
verified ·
1 Parent(s): 64c9f85

Fix vil_tracker/evaluation/evaluate.py: audit corrections

Browse files
Files changed (1) hide show
  1. vil_tracker/evaluation/evaluate.py +372 -29
vil_tracker/evaluation/evaluate.py CHANGED
@@ -2,17 +2,36 @@
2
  Benchmark evaluator for tracking datasets.
3
 
4
  Supports:
5
- - LaSOT: Large-scale Single Object Tracking
6
- - UAV123: UAV tracking at 123 fps
7
- - DTB70: Drone Tracking Benchmark
8
  - VisDrone-SOT: Vision meets Drone SOT
9
 
10
  Metrics: AUC (Success), Precision, Normalized Precision
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
 
13
  import os
 
14
  import json
15
  import numpy as np
 
16
  from collections import defaultdict
17
 
18
 
@@ -43,6 +62,16 @@ def compute_center_distance(box_a, box_b):
43
  return np.linalg.norm(ca - cb)
44
 
45
 
 
 
 
 
 
 
 
 
 
 
46
  def compute_success_curve(ious, thresholds=None):
47
  """Compute success curve (fraction of frames with IoU > threshold)."""
48
  if thresholds is None:
@@ -54,86 +83,400 @@ def compute_success_curve(ious, thresholds=None):
54
 
55
 
56
  def compute_auc(ious):
57
- """Compute AUC from IoU values."""
58
  thresholds, success = compute_success_curve(ious)
59
  return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  class BenchmarkEvaluator:
63
- """Evaluate tracker on standard benchmarks."""
 
 
 
 
 
 
 
 
 
 
64
 
65
  def __init__(self, tracker, device='cuda'):
66
  self.tracker = tracker
67
  self.device = device
68
 
69
- def evaluate_sequence(self, frames, gt_boxes):
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  """Evaluate on a single sequence.
71
 
72
  Args:
73
- frames: list of (H, W, 3) numpy arrays
74
- gt_boxes: list of [x, y, w, h] ground truth boxes
75
  Returns:
76
- dict with per-frame IoUs and metrics
77
  """
78
- # Initialize with first frame
79
- self.tracker.initialize(frames[0], gt_boxes[0])
 
80
 
81
  pred_boxes = [gt_boxes[0]] # First frame is given
82
  ious = [1.0]
 
 
83
 
84
- for i in range(1, len(frames)):
85
- pred_box = self.tracker.track(frames[i])
 
86
  pred_boxes.append(pred_box)
87
 
88
- if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
89
  iou = compute_iou(pred_box, gt_boxes[i])
 
 
90
  ious.append(iou)
 
 
91
  else:
 
92
  ious.append(0.0)
 
 
93
 
94
  auc = compute_auc(ious)
 
 
95
 
96
  return {
97
  'pred_boxes': pred_boxes,
98
  'ious': ious,
 
99
  'auc': auc,
 
 
100
  'mean_iou': np.mean(ious),
101
  }
102
 
103
- def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
104
  """Evaluate on a full dataset.
105
 
106
  Args:
107
  dataset_path: path to dataset root
108
  dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
 
109
  Returns:
110
  dict with overall metrics and per-sequence results
111
  """
112
- sequences = self._load_dataset(dataset_path, dataset_type)
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  results = {}
115
  all_ious = []
 
 
116
 
117
- for seq_name, (frames, gt_boxes) in sequences.items():
118
- print(f"Evaluating {seq_name}...")
119
- seq_result = self.evaluate_sequence(frames, gt_boxes)
120
- results[seq_name] = seq_result
 
 
 
 
 
 
 
 
121
  all_ious.extend(seq_result['ious'])
 
 
 
 
122
 
123
  overall_auc = compute_auc(all_ious)
124
  per_seq_auc = {name: r['auc'] for name, r in results.items()}
125
  mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
126
 
127
- return {
128
- 'overall_auc': overall_auc,
129
- 'mean_seq_auc': mean_seq_auc,
130
- 'per_sequence': per_seq_auc,
 
 
 
 
 
131
  'num_sequences': len(sequences),
132
  'num_frames': len(all_ious),
 
133
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- def _load_dataset(self, dataset_path, dataset_type):
136
- """Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
137
- # Placeholder - real implementation would load actual dataset files
138
- print(f"Loading {dataset_type} from {dataset_path}")
139
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  Benchmark evaluator for tracking datasets.
3
 
4
  Supports:
5
+ - LaSOT: Large-scale Single Object Tracking (280 test sequences)
6
+ - UAV123: UAV tracking at 123 fps (123 sequences)
7
+ - DTB70: Drone Tracking Benchmark (70 sequences)
8
  - VisDrone-SOT: Vision meets Drone SOT
9
 
10
  Metrics: AUC (Success), Precision, Normalized Precision
11
+
12
+ Dataset structure:
13
+ LaSOT (test):
14
+ root/{category}/{seq_name}/img/XXXXXXXX.jpg
15
+ root/{category}/{seq_name}/groundtruth.txt
16
+
17
+ UAV123:
18
+ root/data_seq/UAV123/{seq_name}/*.jpg
19
+ root/anno/UAV123/{seq_name}.txt
20
+
21
+ DTB70:
22
+ root/{seq_name}/img/*.jpg
23
+ root/{seq_name}/groundtruth_rect.txt
24
+
25
+ VisDrone-SOT (test-dev):
26
+ root/sequences/{seq_name}/*.jpg
27
+ root/annotations/{seq_name}.txt
28
  """
29
 
30
  import os
31
+ import glob
32
  import json
33
  import numpy as np
34
+ from pathlib import Path
35
  from collections import defaultdict
36
 
37
 
 
62
  return np.linalg.norm(ca - cb)
63
 
64
 
65
+ def compute_normalized_center_distance(box_a, box_b):
66
+ """Compute center distance normalized by GT size (for normalized precision)."""
67
+ ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
68
+ cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
69
+ dist = np.linalg.norm(ca - cb)
70
+ # Normalize by GT diagonal
71
+ gt_diag = np.sqrt(box_b[2] ** 2 + box_b[3] ** 2) + 1e-6
72
+ return dist / gt_diag
73
+
74
+
75
  def compute_success_curve(ious, thresholds=None):
76
  """Compute success curve (fraction of frames with IoU > threshold)."""
77
  if thresholds is None:
 
83
 
84
 
85
  def compute_auc(ious):
86
+ """Compute AUC from IoU values (Area Under Success Curve)."""
87
  thresholds, success = compute_success_curve(ious)
88
  return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])
89
 
90
 
91
+ def compute_precision(center_dists, threshold=20):
92
+ """Compute precision at given pixel threshold."""
93
+ dists = np.array(center_dists)
94
+ return np.mean(dists <= threshold)
95
+
96
+
97
+ def compute_normalized_precision(norm_dists, threshold=0.5):
98
+ """Compute normalized precision."""
99
+ dists = np.array(norm_dists)
100
+ return np.mean(dists <= threshold)
101
+
102
+
103
+ # ============================================================
104
+ # Dataset loaders
105
+ # ============================================================
106
+
107
+ def load_annotations_txt(filepath):
108
+ """Load annotations from a text file with x,y,w,h per line."""
109
+ boxes = []
110
+ with open(filepath, 'r') as f:
111
+ for line in f:
112
+ line = line.strip()
113
+ if not line:
114
+ boxes.append(None)
115
+ continue
116
+ parts = line.replace(',', ' ').replace('\t', ' ').split()
117
+ try:
118
+ vals = [float(x) for x in parts[:4]]
119
+ # Skip zero-area boxes
120
+ if vals[2] <= 0 or vals[3] <= 0:
121
+ boxes.append(None)
122
+ else:
123
+ boxes.append(vals)
124
+ except (ValueError, IndexError):
125
+ boxes.append(None)
126
+ return boxes
127
+
128
+
129
+ def load_lasot_test(root):
130
+ """Load LaSOT test sequences.
131
+
132
+ Structure: root/{category}/{seq_name}/img/*.jpg + groundtruth.txt
133
+ Test split: last 20% of sequences per category.
134
+ """
135
+ root = Path(root)
136
+ sequences = {}
137
+
138
+ categories = sorted([d for d in root.iterdir() if d.is_dir()])
139
+ for cat_dir in categories:
140
+ seq_dirs = sorted([d for d in cat_dir.iterdir() if d.is_dir()])
141
+ # Test split: last 20%
142
+ test_seqs = seq_dirs[int(len(seq_dirs) * 0.8):]
143
+
144
+ for seq_dir in test_seqs:
145
+ gt_file = seq_dir / 'groundtruth.txt'
146
+ img_dir = seq_dir / 'img'
147
+
148
+ if not gt_file.exists() or not img_dir.exists():
149
+ continue
150
+
151
+ gt_boxes = load_annotations_txt(str(gt_file))
152
+ frames = sorted(glob.glob(str(img_dir / '*.jpg')))
153
+
154
+ if len(frames) >= 2 and len(gt_boxes) >= 2:
155
+ min_len = min(len(frames), len(gt_boxes))
156
+ seq_name = f"{cat_dir.name}/{seq_dir.name}"
157
+ sequences[seq_name] = {
158
+ 'frames': frames[:min_len],
159
+ 'gt': gt_boxes[:min_len],
160
+ }
161
+
162
+ return sequences
163
+
164
+
165
+ def load_uav123(root):
166
+ """Load UAV123 sequences.
167
+
168
+ Structure:
169
+ root/data_seq/UAV123/{seq_name}/*.jpg
170
+ root/anno/UAV123/{seq_name}.txt
171
+ """
172
+ root = Path(root)
173
+ sequences = {}
174
+
175
+ anno_dir = root / 'anno' / 'UAV123'
176
+ frame_dir = root / 'data_seq' / 'UAV123'
177
+
178
+ if not anno_dir.exists():
179
+ # Alternative structure
180
+ anno_dir = root / 'anno'
181
+ frame_dir = root / 'data_seq'
182
+
183
+ if not anno_dir.exists():
184
+ print(f"Warning: UAV123 annotations not found at {anno_dir}")
185
+ return sequences
186
+
187
+ for anno_file in sorted(anno_dir.glob('*.txt')):
188
+ seq_name = anno_file.stem
189
+ seq_frame_dir = frame_dir / seq_name
190
+
191
+ if not seq_frame_dir.exists():
192
+ continue
193
+
194
+ gt_boxes = load_annotations_txt(str(anno_file))
195
+ frames = sorted(glob.glob(str(seq_frame_dir / '*.jpg')))
196
+ if not frames:
197
+ frames = sorted(glob.glob(str(seq_frame_dir / '*.png')))
198
+
199
+ if len(frames) >= 2 and len(gt_boxes) >= 2:
200
+ min_len = min(len(frames), len(gt_boxes))
201
+ sequences[seq_name] = {
202
+ 'frames': frames[:min_len],
203
+ 'gt': gt_boxes[:min_len],
204
+ }
205
+
206
+ return sequences
207
+
208
+
209
+ def load_dtb70(root):
210
+ """Load DTB70 sequences.
211
+
212
+ Structure: root/{seq_name}/img/*.jpg + groundtruth_rect.txt
213
+ """
214
+ root = Path(root)
215
+ sequences = {}
216
+
217
+ for seq_dir in sorted(root.iterdir()):
218
+ if not seq_dir.is_dir():
219
+ continue
220
+
221
+ gt_file = seq_dir / 'groundtruth_rect.txt'
222
+ if not gt_file.exists():
223
+ gt_file = seq_dir / 'groundtruth.txt'
224
+ if not gt_file.exists():
225
+ continue
226
+
227
+ img_dir = seq_dir / 'img'
228
+ if not img_dir.exists():
229
+ img_dir = seq_dir # frames directly in seq dir
230
+
231
+ gt_boxes = load_annotations_txt(str(gt_file))
232
+ frames = sorted(glob.glob(str(img_dir / '*.jpg')))
233
+ if not frames:
234
+ frames = sorted(glob.glob(str(img_dir / '*.png')))
235
+
236
+ if len(frames) >= 2 and len(gt_boxes) >= 2:
237
+ min_len = min(len(frames), len(gt_boxes))
238
+ sequences[seq_dir.name] = {
239
+ 'frames': frames[:min_len],
240
+ 'gt': gt_boxes[:min_len],
241
+ }
242
+
243
+ return sequences
244
+
245
+
246
+ def load_visdrone_sot(root):
247
+ """Load VisDrone-SOT sequences.
248
+
249
+ Structure:
250
+ root/sequences/{seq_name}/*.jpg
251
+ root/annotations/{seq_name}.txt
252
+ """
253
+ root = Path(root)
254
+ sequences = {}
255
+
256
+ anno_dir = root / 'annotations'
257
+ seq_dir = root / 'sequences'
258
+
259
+ if not anno_dir.exists() or not seq_dir.exists():
260
+ print(f"Warning: VisDrone-SOT not found at {root}")
261
+ return sequences
262
+
263
+ for anno_file in sorted(anno_dir.glob('*.txt')):
264
+ seq_name = anno_file.stem
265
+ frames_dir = seq_dir / seq_name
266
+
267
+ if not frames_dir.exists():
268
+ continue
269
+
270
+ gt_boxes = load_annotations_txt(str(anno_file))
271
+ frames = sorted(glob.glob(str(frames_dir / '*.jpg')))
272
+
273
+ if len(frames) >= 2 and len(gt_boxes) >= 2:
274
+ min_len = min(len(frames), len(gt_boxes))
275
+ sequences[seq_name] = {
276
+ 'frames': frames[:min_len],
277
+ 'gt': gt_boxes[:min_len],
278
+ }
279
+
280
+ return sequences
281
+
282
+
283
+ # ============================================================
284
+ # Evaluator
285
+ # ============================================================
286
+
287
+ DATASET_LOADERS = {
288
+ 'lasot': load_lasot_test,
289
+ 'uav123': load_uav123,
290
+ 'dtb70': load_dtb70,
291
+ 'visdrone': load_visdrone_sot,
292
+ }
293
+
294
+
295
  class BenchmarkEvaluator:
296
+ """Evaluate tracker on standard benchmarks.
297
+
298
+ Usage:
299
+ from vil_tracker.inference.online_tracker import OnlineTracker
300
+ from vil_tracker.evaluation.evaluate import BenchmarkEvaluator
301
+
302
+ online_tracker = OnlineTracker(model, device='cuda')
303
+ evaluator = BenchmarkEvaluator(online_tracker)
304
+ results = evaluator.evaluate_dataset('/path/to/LaSOT', 'lasot')
305
+ print(f"LaSOT AUC: {results['mean_seq_auc']:.3f}")
306
+ """
307
 
308
  def __init__(self, tracker, device='cuda'):
309
  self.tracker = tracker
310
  self.device = device
311
 
312
+ def _load_image(self, path):
313
+ """Load image from path."""
314
+ try:
315
+ from PIL import Image
316
+ img = Image.open(path).convert('RGB')
317
+ return np.array(img)
318
+ except ImportError:
319
+ import cv2
320
+ img = cv2.imread(path)
321
+ if img is not None:
322
+ return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
323
+ return np.zeros((480, 640, 3), dtype=np.uint8)
324
+
325
+ def evaluate_sequence(self, frames_paths, gt_boxes):
326
  """Evaluate on a single sequence.
327
 
328
  Args:
329
+ frames_paths: list of image file paths
330
+ gt_boxes: list of [x, y, w, h] ground truth boxes (None = absent)
331
  Returns:
332
+ dict with per-frame IoUs, distances, and metrics
333
  """
334
+ # Load first frame and initialize
335
+ first_frame = self._load_image(frames_paths[0])
336
+ self.tracker.initialize(first_frame, gt_boxes[0])
337
 
338
  pred_boxes = [gt_boxes[0]] # First frame is given
339
  ious = [1.0]
340
+ center_dists = [0.0]
341
+ norm_dists = [0.0]
342
 
343
+ for i in range(1, len(frames_paths)):
344
+ frame = self._load_image(frames_paths[i])
345
+ pred_box = self.tracker.track(frame)
346
  pred_boxes.append(pred_box)
347
 
348
+ if gt_boxes[i] is not None:
349
  iou = compute_iou(pred_box, gt_boxes[i])
350
+ cdist = compute_center_distance(pred_box, gt_boxes[i])
351
+ ndist = compute_normalized_center_distance(pred_box, gt_boxes[i])
352
  ious.append(iou)
353
+ center_dists.append(cdist)
354
+ norm_dists.append(ndist)
355
  else:
356
+ # Target absent — score 0 if tracker predicts, 1 if it doesn't
357
  ious.append(0.0)
358
+ center_dists.append(float('inf'))
359
+ norm_dists.append(float('inf'))
360
 
361
  auc = compute_auc(ious)
362
+ precision = compute_precision(center_dists)
363
+ norm_precision = compute_normalized_precision(norm_dists)
364
 
365
  return {
366
  'pred_boxes': pred_boxes,
367
  'ious': ious,
368
+ 'center_dists': center_dists,
369
  'auc': auc,
370
+ 'precision': precision,
371
+ 'norm_precision': norm_precision,
372
  'mean_iou': np.mean(ious),
373
  }
374
 
375
+ def evaluate_dataset(self, dataset_path, dataset_type='lasot', save_results=None):
376
  """Evaluate on a full dataset.
377
 
378
  Args:
379
  dataset_path: path to dataset root
380
  dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
381
+ save_results: optional path to save JSON results
382
  Returns:
383
  dict with overall metrics and per-sequence results
384
  """
385
+ loader = DATASET_LOADERS.get(dataset_type)
386
+ if loader is None:
387
+ raise ValueError(f"Unknown dataset type: {dataset_type}. "
388
+ f"Supported: {list(DATASET_LOADERS.keys())}")
389
+
390
+ sequences = loader(dataset_path)
391
+
392
+ if not sequences:
393
+ print(f"Warning: No sequences loaded from {dataset_path}")
394
+ return {'overall_auc': 0, 'mean_seq_auc': 0, 'num_sequences': 0}
395
+
396
+ print(f"Evaluating on {dataset_type}: {len(sequences)} sequences")
397
 
398
  results = {}
399
  all_ious = []
400
+ all_center_dists = []
401
+ all_norm_dists = []
402
 
403
+ for seq_idx, (seq_name, seq_data) in enumerate(sequences.items()):
404
+ print(f" [{seq_idx+1}/{len(sequences)}] {seq_name} "
405
+ f"({len(seq_data['frames'])} frames)...", end='', flush=True)
406
+
407
+ seq_result = self.evaluate_sequence(seq_data['frames'], seq_data['gt'])
408
+ results[seq_name] = {
409
+ 'auc': seq_result['auc'],
410
+ 'precision': seq_result['precision'],
411
+ 'norm_precision': seq_result['norm_precision'],
412
+ 'mean_iou': seq_result['mean_iou'],
413
+ 'num_frames': len(seq_data['frames']),
414
+ }
415
  all_ious.extend(seq_result['ious'])
416
+ all_center_dists.extend(seq_result['center_dists'])
417
+ all_norm_dists.extend(seq_result['norm_dists'])
418
+
419
+ print(f" AUC={seq_result['auc']:.3f}")
420
 
421
  overall_auc = compute_auc(all_ious)
422
  per_seq_auc = {name: r['auc'] for name, r in results.items()}
423
  mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
424
 
425
+ overall_precision = compute_precision(all_center_dists)
426
+ overall_norm_prec = compute_normalized_precision(all_norm_dists)
427
+
428
+ summary = {
429
+ 'dataset': dataset_type,
430
+ 'overall_auc': float(overall_auc),
431
+ 'mean_seq_auc': float(mean_seq_auc),
432
+ 'precision_20px': float(overall_precision),
433
+ 'normalized_precision': float(overall_norm_prec),
434
  'num_sequences': len(sequences),
435
  'num_frames': len(all_ious),
436
+ 'per_sequence': results,
437
  }
438
+
439
+ print(f"\n{'='*50}")
440
+ print(f"{dataset_type.upper()} Results:")
441
+ print(f" AUC (overall): {overall_auc:.3f}")
442
+ print(f" AUC (mean seq): {mean_seq_auc:.3f}")
443
+ print(f" Precision (20px): {overall_precision:.3f}")
444
+ print(f" Norm. Precision: {overall_norm_prec:.3f}")
445
+ print(f" Sequences: {len(sequences)}")
446
+ print(f" Total frames: {len(all_ious)}")
447
+ print(f"{'='*50}")
448
+
449
+ # Save results to JSON
450
+ if save_results:
451
+ os.makedirs(os.path.dirname(save_results) or '.', exist_ok=True)
452
+ with open(save_results, 'w') as f:
453
+ json.dump(summary, f, indent=2)
454
+ print(f"Results saved to {save_results}")
455
+
456
+ return summary
457
 
458
+ def evaluate_multiple(self, dataset_configs):
459
+ """Evaluate on multiple benchmarks.
460
+
461
+ Args:
462
+ dataset_configs: list of (dataset_path, dataset_type) tuples
463
+ Returns:
464
+ dict of {dataset_type: results}
465
+ """
466
+ all_results = {}
467
+ for dataset_path, dataset_type in dataset_configs:
468
+ results = self.evaluate_dataset(dataset_path, dataset_type)
469
+ all_results[dataset_type] = results
470
+
471
+ # Print comparison table
472
+ print(f"\n{'='*60}")
473
+ print(f"{'Dataset':<15} {'AUC':>8} {'Prec@20':>8} {'NormPrec':>8} {'Seqs':>6}")
474
+ print(f"{'-'*60}")
475
+ for dt, r in all_results.items():
476
+ print(f"{dt:<15} {r['mean_seq_auc']:>8.3f} "
477
+ f"{r.get('precision_20px', 0):>8.3f} "
478
+ f"{r.get('normalized_precision', 0):>8.3f} "
479
+ f"{r['num_sequences']:>6}")
480
+ print(f"{'='*60}")
481
+
482
+ return all_results