"""
Benchmark evaluator for tracking datasets.

Supports:
- LaSOT: Large-scale Single Object Tracking
- UAV123: UAV tracking at 123 fps  
- DTB70: Drone Tracking Benchmark
- VisDrone-SOT: Vision meets Drone SOT

Metrics: AUC (Success), Precision, Normalized Precision
"""

import os
import json
import numpy as np
from collections import defaultdict


def compute_iou(box_a, box_b):
    """Compute IoU between two boxes in [x, y, w, h] format."""
    xa1, ya1 = box_a[0], box_a[1]
    xa2, ya2 = xa1 + box_a[2], ya1 + box_a[3]
    xb1, yb1 = box_b[0], box_b[1]
    xb2, yb2 = xb1 + box_b[2], yb1 + box_b[3]
    
    inter_x1 = max(xa1, xb1)
    inter_y1 = max(ya1, yb1)
    inter_x2 = min(xa2, xb2)
    inter_y2 = min(ya2, yb2)
    
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    area_a = box_a[2] * box_a[3]
    area_b = box_b[2] * box_b[3]
    union_area = area_a + area_b - inter_area
    
    return inter_area / max(union_area, 1e-6)


def compute_center_distance(box_a, box_b):
    """Compute center distance between two boxes in [x, y, w, h] format."""
    ca = np.array([box_a[0] + box_a[2] / 2, box_a[1] + box_a[3] / 2])
    cb = np.array([box_b[0] + box_b[2] / 2, box_b[1] + box_b[3] / 2])
    return np.linalg.norm(ca - cb)


def compute_success_curve(ious, thresholds=None):
    """Compute success curve (fraction of frames with IoU > threshold)."""
    if thresholds is None:
        thresholds = np.arange(0, 1.05, 0.05)
    
    ious = np.array(ious)
    success = np.array([np.mean(ious >= t) for t in thresholds])
    return thresholds, success


def compute_auc(ious):
    """Compute AUC from IoU values."""
    thresholds, success = compute_success_curve(ious)
    return np.trapz(success, thresholds) / (thresholds[-1] - thresholds[0])


class BenchmarkEvaluator:
    """Evaluate tracker on standard benchmarks."""
    
    def __init__(self, tracker, device='cuda'):
        self.tracker = tracker
        self.device = device
    
    def evaluate_sequence(self, frames, gt_boxes):
        """Evaluate on a single sequence.
        
        Args:
            frames: list of (H, W, 3) numpy arrays
            gt_boxes: list of [x, y, w, h] ground truth boxes
        Returns:
            dict with per-frame IoUs and metrics
        """
        # Initialize with first frame
        self.tracker.initialize(frames[0], gt_boxes[0])
        
        pred_boxes = [gt_boxes[0]]  # First frame is given
        ious = [1.0]
        
        for i in range(1, len(frames)):
            pred_box = self.tracker.track(frames[i])
            pred_boxes.append(pred_box)
            
            if gt_boxes[i] is not None and gt_boxes[i][2] > 0 and gt_boxes[i][3] > 0:
                iou = compute_iou(pred_box, gt_boxes[i])
                ious.append(iou)
            else:
                ious.append(0.0)
        
        auc = compute_auc(ious)
        
        return {
            'pred_boxes': pred_boxes,
            'ious': ious,
            'auc': auc,
            'mean_iou': np.mean(ious),
        }
    
    def evaluate_dataset(self, dataset_path, dataset_type='lasot'):
        """Evaluate on a full dataset.
        
        Args:
            dataset_path: path to dataset root
            dataset_type: 'lasot', 'uav123', 'dtb70', or 'visdrone'
        Returns:
            dict with overall metrics and per-sequence results
        """
        sequences = self._load_dataset(dataset_path, dataset_type)
        
        results = {}
        all_ious = []
        
        for seq_name, (frames, gt_boxes) in sequences.items():
            print(f"Evaluating {seq_name}...")
            seq_result = self.evaluate_sequence(frames, gt_boxes)
            results[seq_name] = seq_result
            all_ious.extend(seq_result['ious'])
        
        overall_auc = compute_auc(all_ious)
        per_seq_auc = {name: r['auc'] for name, r in results.items()}
        mean_seq_auc = np.mean(list(per_seq_auc.values())) if per_seq_auc else 0.0
        
        return {
            'overall_auc': overall_auc,
            'mean_seq_auc': mean_seq_auc,
            'per_sequence': per_seq_auc,
            'num_sequences': len(sequences),
            'num_frames': len(all_ious),
        }
    
    def _load_dataset(self, dataset_path, dataset_type):
        """Load dataset sequences. Returns dict of {name: (frames, gt_boxes)}."""
        # Placeholder - real implementation would load actual dataset files
        print(f"Loading {dataset_type} from {dataset_path}")
        return {}