facedet / evaluation /metrics.py
cledouxluma's picture
Upload evaluation/metrics.py with huggingface_hub
11246bf verified
"""
Core evaluation metrics for face detection.
Implements:
- IoU computation (pairwise and matrix)
- Average Precision (AP) with VOC-style 11-point interpolation
- Recall at various IoU thresholds
- WiderFace evaluation protocol helpers
"""
import numpy as np
from typing import List, Tuple, Optional
def compute_iou_matrix(boxes1: np.ndarray, boxes2: np.ndarray) -> np.ndarray:
"""
Compute pairwise IoU between two sets of boxes.
Args:
boxes1: [N, 4] (x1, y1, x2, y2)
boxes2: [M, 4] (x1, y1, x2, y2)
Returns:
[N, M] IoU matrix
"""
x1 = np.maximum(boxes1[:, 0:1], boxes2[:, 0:1].T)
y1 = np.maximum(boxes1[:, 1:2], boxes2[:, 1:2].T)
x2 = np.minimum(boxes1[:, 2:3], boxes2[:, 2:3].T)
y2 = np.minimum(boxes1[:, 3:4], boxes2[:, 3:4].T)
inter = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
union = area1[:, None] + area2[None, :] - inter
return inter / (union + 1e-6)
def compute_ap(recall: np.ndarray, precision: np.ndarray,
use_11_point: bool = True) -> float:
"""
Compute Average Precision from recall-precision curve.
WiderFace uses 11-point interpolation (VOC2007 style).
Args:
recall: [N] sorted recall values
precision: [N] corresponding precision values
use_11_point: Use 11-point interpolation (default: True)
Returns:
AP value
"""
if use_11_point:
# 11-point interpolation
ap = 0.0
for t in np.arange(0, 1.1, 0.1):
if np.sum(recall >= t) == 0:
p = 0
else:
p = np.max(precision[recall >= t])
ap += p / 11
return ap
else:
# All-point interpolation (VOC2010+ style)
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([0.0], precision, [0.0]))
# Make precision monotonically decreasing
for i in range(len(mpre) - 1, 0, -1):
mpre[i - 1] = max(mpre[i - 1], mpre[i])
# Compute area under curve
idx = np.where(mrec[1:] != mrec[:-1])[0]
ap = np.sum((mrec[idx + 1] - mrec[idx]) * mpre[idx + 1])
return ap
def compute_recall_at_iou(pred_boxes: np.ndarray, pred_scores: np.ndarray,
gt_boxes: np.ndarray, iou_threshold: float = 0.5
) -> Tuple[float, np.ndarray, np.ndarray]:
"""
Compute recall and precision at a given IoU threshold.
Args:
pred_boxes: [N, 4] predicted boxes sorted by score (descending)
pred_scores: [N] prediction scores
gt_boxes: [M, 4] ground truth boxes
iou_threshold: IoU threshold for matching
Returns:
(ap, recall_curve, precision_curve)
"""
num_gt = gt_boxes.shape[0]
if num_gt == 0:
return 0.0, np.array([]), np.array([])
# Sort by score
order = np.argsort(-pred_scores)
pred_boxes = pred_boxes[order]
iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes)
# Greedy matching
gt_matched = np.zeros(num_gt, dtype=bool)
tp = np.zeros(len(pred_boxes))
fp = np.zeros(len(pred_boxes))
for i in range(len(pred_boxes)):
if iou_matrix.shape[1] > 0:
best_gt = iou_matrix[i].argmax()
if iou_matrix[i, best_gt] >= iou_threshold and not gt_matched[best_gt]:
tp[i] = 1
gt_matched[best_gt] = True
else:
fp[i] = 1
else:
fp[i] = 1
tp_cumsum = np.cumsum(tp)
fp_cumsum = np.cumsum(fp)
recall = tp_cumsum / num_gt
precision = tp_cumsum / (tp_cumsum + fp_cumsum)
ap = compute_ap(recall, precision)
return ap, recall, precision
def match_detections_to_gt(pred_boxes: np.ndarray, gt_boxes: np.ndarray,
iou_threshold: float = 0.5
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Match predictions to ground truth for detailed analysis.
Returns:
(tp_mask, fp_mask, fn_indices)
tp_mask: [N] boolean, True for true positives
fp_mask: [N] boolean, True for false positives
fn_indices: indices of unmatched GT boxes (false negatives)
"""
if len(pred_boxes) == 0:
return (np.array([], dtype=bool),
np.array([], dtype=bool),
np.arange(len(gt_boxes)))
if len(gt_boxes) == 0:
return (np.zeros(len(pred_boxes), dtype=bool),
np.ones(len(pred_boxes), dtype=bool),
np.array([], dtype=int))
iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes)
gt_matched = np.zeros(len(gt_boxes), dtype=bool)
tp_mask = np.zeros(len(pred_boxes), dtype=bool)
for i in range(len(pred_boxes)):
best_gt = iou_matrix[i].argmax()
if iou_matrix[i, best_gt] >= iou_threshold and not gt_matched[best_gt]:
tp_mask[i] = True
gt_matched[best_gt] = True
fp_mask = ~tp_mask
fn_indices = np.where(~gt_matched)[0]
return tp_mask, fp_mask, fn_indices