""" Core evaluation metrics for face detection. Implements: - IoU computation (pairwise and matrix) - Average Precision (AP) with VOC-style 11-point interpolation - Recall at various IoU thresholds - WiderFace evaluation protocol helpers """ import numpy as np from typing import List, Tuple, Optional def compute_iou_matrix(boxes1: np.ndarray, boxes2: np.ndarray) -> np.ndarray: """ Compute pairwise IoU between two sets of boxes. Args: boxes1: [N, 4] (x1, y1, x2, y2) boxes2: [M, 4] (x1, y1, x2, y2) Returns: [N, M] IoU matrix """ x1 = np.maximum(boxes1[:, 0:1], boxes2[:, 0:1].T) y1 = np.maximum(boxes1[:, 1:2], boxes2[:, 1:2].T) x2 = np.minimum(boxes1[:, 2:3], boxes2[:, 2:3].T) y2 = np.minimum(boxes1[:, 3:4], boxes2[:, 3:4].T) inter = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1) area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) union = area1[:, None] + area2[None, :] - inter return inter / (union + 1e-6) def compute_ap(recall: np.ndarray, precision: np.ndarray, use_11_point: bool = True) -> float: """ Compute Average Precision from recall-precision curve. WiderFace uses 11-point interpolation (VOC2007 style). Args: recall: [N] sorted recall values precision: [N] corresponding precision values use_11_point: Use 11-point interpolation (default: True) Returns: AP value """ if use_11_point: # 11-point interpolation ap = 0.0 for t in np.arange(0, 1.1, 0.1): if np.sum(recall >= t) == 0: p = 0 else: p = np.max(precision[recall >= t]) ap += p / 11 return ap else: # All-point interpolation (VOC2010+ style) mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([0.0], precision, [0.0])) # Make precision monotonically decreasing for i in range(len(mpre) - 1, 0, -1): mpre[i - 1] = max(mpre[i - 1], mpre[i]) # Compute area under curve idx = np.where(mrec[1:] != mrec[:-1])[0] ap = np.sum((mrec[idx + 1] - mrec[idx]) * mpre[idx + 1]) return ap def compute_recall_at_iou(pred_boxes: np.ndarray, pred_scores: np.ndarray, gt_boxes: np.ndarray, iou_threshold: float = 0.5 ) -> Tuple[float, np.ndarray, np.ndarray]: """ Compute recall and precision at a given IoU threshold. Args: pred_boxes: [N, 4] predicted boxes sorted by score (descending) pred_scores: [N] prediction scores gt_boxes: [M, 4] ground truth boxes iou_threshold: IoU threshold for matching Returns: (ap, recall_curve, precision_curve) """ num_gt = gt_boxes.shape[0] if num_gt == 0: return 0.0, np.array([]), np.array([]) # Sort by score order = np.argsort(-pred_scores) pred_boxes = pred_boxes[order] iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes) # Greedy matching gt_matched = np.zeros(num_gt, dtype=bool) tp = np.zeros(len(pred_boxes)) fp = np.zeros(len(pred_boxes)) for i in range(len(pred_boxes)): if iou_matrix.shape[1] > 0: best_gt = iou_matrix[i].argmax() if iou_matrix[i, best_gt] >= iou_threshold and not gt_matched[best_gt]: tp[i] = 1 gt_matched[best_gt] = True else: fp[i] = 1 else: fp[i] = 1 tp_cumsum = np.cumsum(tp) fp_cumsum = np.cumsum(fp) recall = tp_cumsum / num_gt precision = tp_cumsum / (tp_cumsum + fp_cumsum) ap = compute_ap(recall, precision) return ap, recall, precision def match_detections_to_gt(pred_boxes: np.ndarray, gt_boxes: np.ndarray, iou_threshold: float = 0.5 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Match predictions to ground truth for detailed analysis. Returns: (tp_mask, fp_mask, fn_indices) tp_mask: [N] boolean, True for true positives fp_mask: [N] boolean, True for false positives fn_indices: indices of unmatched GT boxes (false negatives) """ if len(pred_boxes) == 0: return (np.array([], dtype=bool), np.array([], dtype=bool), np.arange(len(gt_boxes))) if len(gt_boxes) == 0: return (np.zeros(len(pred_boxes), dtype=bool), np.ones(len(pred_boxes), dtype=bool), np.array([], dtype=int)) iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes) gt_matched = np.zeros(len(gt_boxes), dtype=bool) tp_mask = np.zeros(len(pred_boxes), dtype=bool) for i in range(len(pred_boxes)): best_gt = iou_matrix[i].argmax() if iou_matrix[i, best_gt] >= iou_threshold and not gt_matched[best_gt]: tp_mask[i] = True gt_matched[best_gt] = True fp_mask = ~tp_mask fn_indices = np.where(~gt_matched)[0] return tp_mask, fp_mask, fn_indices