""" WiderFace Evaluation Protocol. Implements the official WiderFace evaluation methodology: 1. Run detection on all validation images 2. Save predictions in WiderFace submission format 3. Compute AP on Easy/Medium/Hard subsets 4. Generate precision-recall curves WiderFace difficulty levels: - Easy: Large, unoccluded, frontal faces - Medium: Medium-sized, partially occluded or non-frontal - Hard: Tiny (<16px), heavily occluded, extreme blur/pose The official evaluation uses: - IoU threshold = 0.5 - Prediction format: one file per event, sorted by confidence - AP computed via interpolated precision-recall curve """ import os import json import numpy as np from typing import Dict, List, Optional, Tuple from pathlib import Path from .metrics import compute_iou_matrix, compute_ap class WiderFaceEvaluator: """ WiderFace evaluation with Easy/Medium/Hard AP computation. Usage: evaluator = WiderFaceEvaluator(gt_dir='wider_face/wider_face_split') evaluator.add_prediction(filename, boxes, scores) results = evaluator.evaluate() print(f"Easy={results['easy_ap']:.4f}, Med={results['medium_ap']:.4f}, Hard={results['hard_ap']:.4f}") """ # WiderFace event names (61 event categories) EVENTS = [ '0--Parade', '1--Handshaking', '2--Demonstration', '3--Riot', '4--Dancing', '5--Car_Accident', '6--Funeral', '7--Cheering', '8--Election_Campain', '9--Press_Conference', '10--People_Marching', '11--Meeting', '12--Group', '13--Interview', '14--Traffic', '15--Stock_Market', '16--Award_Ceremony', '17--Ceremony', '18--Concerts', '19--Couple', '20--Family_Group', '21--Festival', '22--Picnic', '23--Shoppers', '24--Soldier_Firing', '25--Soldier_Patrol', '26--Soldier_Drilling', '27--Spa', '28--Sports_Fan', '29--Students_Schoolkids', '30--Surgeons', '31--Waiter_Waitress', '32--Workers_Laborers', '33--Running', '34--Baseball', '35--Basketball', '36--Football', '37--Soccer', '38--Tennis', '39--Ice_Skating', '40--Gymnastics', '41--Swimming', '42--Car_Racing', '43--Row_Boat', '44--Aerobics', '45--Balloonist', '46--Jockey', '47--Matador_Bullfighter', '48--Parachutist_Paraglider', '49--Greeting', '50--Celebration_Or_Party', '51--Dresses', '52--Photographers', '53--Raid', '54--Rescue', '55--Sports_Coach_Trainer', '56--Voter', '57--Angler', '58--Hockey', '59--people--driving--car', '60--Tableau', '61--Street_Battle', ] def __init__(self, gt_dir: Optional[str] = None, iou_threshold: float = 0.5): """ Args: gt_dir: Directory containing WiderFace ground truth annotation files iou_threshold: IoU threshold for matching (default: 0.5, WiderFace standard) """ self.gt_dir = gt_dir self.iou_threshold = iou_threshold self.predictions = {} # filename → (boxes, scores) self.ground_truth = {} # filename → boxes if gt_dir: self._load_ground_truth() def _load_ground_truth(self): """Load WiderFace validation ground truth.""" ann_file = os.path.join(self.gt_dir, 'wider_face_val_bbx_gt.txt') if not os.path.exists(ann_file): print(f"Warning: GT file not found: {ann_file}") return with open(ann_file, 'r') as f: while True: filename = f.readline().strip() if not filename: break num_faces = int(f.readline().strip()) boxes = [] for _ in range(max(num_faces, 1)): line = f.readline().strip() if num_faces == 0: continue parts = list(map(float, line.split())) x, y, w, h = parts[0], parts[1], parts[2], parts[3] if w > 0 and h > 0: boxes.append([x, y, x+w, y+h]) self.ground_truth[filename] = np.array(boxes, dtype=np.float32) \ if boxes else np.empty((0, 4), dtype=np.float32) def add_prediction(self, filename: str, boxes: np.ndarray, scores: np.ndarray): """Add prediction for a single image.""" self.predictions[filename] = (boxes.copy(), scores.copy()) def evaluate(self, difficulty: str = 'all') -> Dict: """ Run WiderFace evaluation. Args: difficulty: 'easy', 'medium', 'hard', or 'all' Returns: dict with AP values per difficulty level """ results = {} for diff in (['easy', 'medium', 'hard'] if difficulty == 'all' else [difficulty]): ap = self._evaluate_difficulty(diff) results[f'{diff}_ap'] = ap return results def _evaluate_difficulty(self, difficulty: str) -> float: """Evaluate AP for a single difficulty level.""" # For full evaluation, we'd need the official difficulty masks # Here we implement a simplified version based on face size size_thresholds = { 'easy': 50, # faces > 50px 'medium': 20, # faces > 20px 'hard': 0, # all faces } min_size = size_thresholds.get(difficulty, 0) all_tp = [] all_fp = [] all_scores = [] total_gt = 0 for filename in self.ground_truth: gt_boxes = self.ground_truth[filename] # Filter GT by size for difficulty level if min_size > 0 and len(gt_boxes) > 0: sizes = np.sqrt((gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1])) gt_mask = sizes >= min_size gt_boxes = gt_boxes[gt_mask] total_gt += len(gt_boxes) if filename not in self.predictions: continue pred_boxes, pred_scores = self.predictions[filename] if len(pred_boxes) == 0 or len(gt_boxes) == 0: all_fp.extend([1] * len(pred_boxes)) all_tp.extend([0] * len(pred_boxes)) all_scores.extend(pred_scores.tolist()) continue # Match predictions to GT iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes) gt_matched = np.zeros(len(gt_boxes), dtype=bool) # Sort predictions by score (descending) order = np.argsort(-pred_scores) for i in order: if iou_matrix.shape[1] > 0: best_gt = iou_matrix[i].argmax() if iou_matrix[i, best_gt] >= self.iou_threshold and not gt_matched[best_gt]: all_tp.append(1) all_fp.append(0) gt_matched[best_gt] = True else: all_tp.append(0) all_fp.append(1) else: all_tp.append(0) all_fp.append(1) all_scores.append(pred_scores[i]) if total_gt == 0: return 0.0 # Sort by score order = np.argsort(-np.array(all_scores)) tp = np.array(all_tp)[order] fp = np.array(all_fp)[order] tp_cumsum = np.cumsum(tp) fp_cumsum = np.cumsum(fp) recall = tp_cumsum / total_gt precision = tp_cumsum / (tp_cumsum + fp_cumsum) return compute_ap(recall, precision, use_11_point=True) def save_predictions(self, output_dir: str): """Save predictions in WiderFace submission format.""" os.makedirs(output_dir, exist_ok=True) for filename, (boxes, scores) in self.predictions.items(): event = os.path.dirname(filename) event_dir = os.path.join(output_dir, event) os.makedirs(event_dir, exist_ok=True) base = os.path.splitext(os.path.basename(filename))[0] pred_file = os.path.join(event_dir, f'{base}.txt') with open(pred_file, 'w') as f: f.write(f'{base}\n') f.write(f'{len(boxes)}\n') for i in range(len(boxes)): x1, y1, x2, y2 = boxes[i] w, h = x2 - x1, y2 - y1 f.write(f'{x1:.1f} {y1:.1f} {w:.1f} {h:.1f} {scores[i]:.4f}\n') def generate_report(self) -> str: """Generate a text report of evaluation results.""" results = self.evaluate() report = [ "=" * 60, "WiderFace Evaluation Results", "=" * 60, f" Easy AP: {results.get('easy_ap', 0):.4f}", f" Medium AP: {results.get('medium_ap', 0):.4f}", f" Hard AP: {results.get('hard_ap', 0):.4f}", f"", f" Total images with GT: {len(self.ground_truth)}", f" Total images with predictions: {len(self.predictions)}", f" IoU threshold: {self.iou_threshold}", "=" * 60, ] return '\n'.join(report)