File size: 9,144 Bytes
c0f802b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
"""
WiderFace Evaluation Protocol.

Implements the official WiderFace evaluation methodology:
1. Run detection on all validation images
2. Save predictions in WiderFace submission format
3. Compute AP on Easy/Medium/Hard subsets
4. Generate precision-recall curves

WiderFace difficulty levels:
- Easy: Large, unoccluded, frontal faces
- Medium: Medium-sized, partially occluded or non-frontal
- Hard: Tiny (<16px), heavily occluded, extreme blur/pose

The official evaluation uses:
- IoU threshold = 0.5
- Prediction format: one file per event, sorted by confidence
- AP computed via interpolated precision-recall curve
"""

import os
import json
import numpy as np
from typing import Dict, List, Optional, Tuple
from pathlib import Path

from .metrics import compute_iou_matrix, compute_ap


class WiderFaceEvaluator:
    """
    WiderFace evaluation with Easy/Medium/Hard AP computation.

    Usage:
        evaluator = WiderFaceEvaluator(gt_dir='wider_face/wider_face_split')
        evaluator.add_prediction(filename, boxes, scores)
        results = evaluator.evaluate()
        print(f"Easy={results['easy_ap']:.4f}, Med={results['medium_ap']:.4f}, Hard={results['hard_ap']:.4f}")
    """

    # WiderFace event names (61 event categories)
    EVENTS = [
        '0--Parade', '1--Handshaking', '2--Demonstration',
        '3--Riot', '4--Dancing', '5--Car_Accident',
        '6--Funeral', '7--Cheering', '8--Election_Campain',
        '9--Press_Conference', '10--People_Marching',
        '11--Meeting', '12--Group', '13--Interview',
        '14--Traffic', '15--Stock_Market', '16--Award_Ceremony',
        '17--Ceremony', '18--Concerts', '19--Couple',
        '20--Family_Group', '21--Festival', '22--Picnic',
        '23--Shoppers', '24--Soldier_Firing', '25--Soldier_Patrol',
        '26--Soldier_Drilling', '27--Spa', '28--Sports_Fan',
        '29--Students_Schoolkids', '30--Surgeons',
        '31--Waiter_Waitress', '32--Workers_Laborers',
        '33--Running', '34--Baseball', '35--Basketball',
        '36--Football', '37--Soccer', '38--Tennis',
        '39--Ice_Skating', '40--Gymnastics', '41--Swimming',
        '42--Car_Racing', '43--Row_Boat', '44--Aerobics',
        '45--Balloonist', '46--Jockey', '47--Matador_Bullfighter',
        '48--Parachutist_Paraglider', '49--Greeting',
        '50--Celebration_Or_Party', '51--Dresses',
        '52--Photographers', '53--Raid', '54--Rescue',
        '55--Sports_Coach_Trainer', '56--Voter',
        '57--Angler', '58--Hockey', '59--people--driving--car',
        '60--Tableau', '61--Street_Battle',
    ]

    def __init__(self, gt_dir: Optional[str] = None, iou_threshold: float = 0.5):
        """
        Args:
            gt_dir: Directory containing WiderFace ground truth annotation files
            iou_threshold: IoU threshold for matching (default: 0.5, WiderFace standard)
        """
        self.gt_dir = gt_dir
        self.iou_threshold = iou_threshold
        self.predictions = {}  # filename → (boxes, scores)
        self.ground_truth = {}  # filename → boxes

        if gt_dir:
            self._load_ground_truth()

    def _load_ground_truth(self):
        """Load WiderFace validation ground truth."""
        ann_file = os.path.join(self.gt_dir, 'wider_face_val_bbx_gt.txt')
        if not os.path.exists(ann_file):
            print(f"Warning: GT file not found: {ann_file}")
            return

        with open(ann_file, 'r') as f:
            while True:
                filename = f.readline().strip()
                if not filename:
                    break
                num_faces = int(f.readline().strip())
                boxes = []
                for _ in range(max(num_faces, 1)):
                    line = f.readline().strip()
                    if num_faces == 0:
                        continue
                    parts = list(map(float, line.split()))
                    x, y, w, h = parts[0], parts[1], parts[2], parts[3]
                    if w > 0 and h > 0:
                        boxes.append([x, y, x+w, y+h])

                self.ground_truth[filename] = np.array(boxes, dtype=np.float32) \
                    if boxes else np.empty((0, 4), dtype=np.float32)

    def add_prediction(self, filename: str, boxes: np.ndarray, scores: np.ndarray):
        """Add prediction for a single image."""
        self.predictions[filename] = (boxes.copy(), scores.copy())

    def evaluate(self, difficulty: str = 'all') -> Dict:
        """
        Run WiderFace evaluation.

        Args:
            difficulty: 'easy', 'medium', 'hard', or 'all'

        Returns:
            dict with AP values per difficulty level
        """
        results = {}

        for diff in (['easy', 'medium', 'hard'] if difficulty == 'all' else [difficulty]):
            ap = self._evaluate_difficulty(diff)
            results[f'{diff}_ap'] = ap

        return results

    def _evaluate_difficulty(self, difficulty: str) -> float:
        """Evaluate AP for a single difficulty level."""
        # For full evaluation, we'd need the official difficulty masks
        # Here we implement a simplified version based on face size
        size_thresholds = {
            'easy': 50,    # faces > 50px
            'medium': 20,  # faces > 20px
            'hard': 0,     # all faces
        }
        min_size = size_thresholds.get(difficulty, 0)

        all_tp = []
        all_fp = []
        all_scores = []
        total_gt = 0

        for filename in self.ground_truth:
            gt_boxes = self.ground_truth[filename]

            # Filter GT by size for difficulty level
            if min_size > 0 and len(gt_boxes) > 0:
                sizes = np.sqrt((gt_boxes[:, 2] - gt_boxes[:, 0]) *
                                (gt_boxes[:, 3] - gt_boxes[:, 1]))
                gt_mask = sizes >= min_size
                gt_boxes = gt_boxes[gt_mask]

            total_gt += len(gt_boxes)

            if filename not in self.predictions:
                continue

            pred_boxes, pred_scores = self.predictions[filename]

            if len(pred_boxes) == 0 or len(gt_boxes) == 0:
                all_fp.extend([1] * len(pred_boxes))
                all_tp.extend([0] * len(pred_boxes))
                all_scores.extend(pred_scores.tolist())
                continue

            # Match predictions to GT
            iou_matrix = compute_iou_matrix(pred_boxes, gt_boxes)
            gt_matched = np.zeros(len(gt_boxes), dtype=bool)

            # Sort predictions by score (descending)
            order = np.argsort(-pred_scores)
            for i in order:
                if iou_matrix.shape[1] > 0:
                    best_gt = iou_matrix[i].argmax()
                    if iou_matrix[i, best_gt] >= self.iou_threshold and not gt_matched[best_gt]:
                        all_tp.append(1)
                        all_fp.append(0)
                        gt_matched[best_gt] = True
                    else:
                        all_tp.append(0)
                        all_fp.append(1)
                else:
                    all_tp.append(0)
                    all_fp.append(1)
                all_scores.append(pred_scores[i])

        if total_gt == 0:
            return 0.0

        # Sort by score
        order = np.argsort(-np.array(all_scores))
        tp = np.array(all_tp)[order]
        fp = np.array(all_fp)[order]

        tp_cumsum = np.cumsum(tp)
        fp_cumsum = np.cumsum(fp)

        recall = tp_cumsum / total_gt
        precision = tp_cumsum / (tp_cumsum + fp_cumsum)

        return compute_ap(recall, precision, use_11_point=True)

    def save_predictions(self, output_dir: str):
        """Save predictions in WiderFace submission format."""
        os.makedirs(output_dir, exist_ok=True)

        for filename, (boxes, scores) in self.predictions.items():
            event = os.path.dirname(filename)
            event_dir = os.path.join(output_dir, event)
            os.makedirs(event_dir, exist_ok=True)

            base = os.path.splitext(os.path.basename(filename))[0]
            pred_file = os.path.join(event_dir, f'{base}.txt')

            with open(pred_file, 'w') as f:
                f.write(f'{base}\n')
                f.write(f'{len(boxes)}\n')
                for i in range(len(boxes)):
                    x1, y1, x2, y2 = boxes[i]
                    w, h = x2 - x1, y2 - y1
                    f.write(f'{x1:.1f} {y1:.1f} {w:.1f} {h:.1f} {scores[i]:.4f}\n')

    def generate_report(self) -> str:
        """Generate a text report of evaluation results."""
        results = self.evaluate()
        report = [
            "=" * 60,
            "WiderFace Evaluation Results",
            "=" * 60,
            f"  Easy AP:   {results.get('easy_ap', 0):.4f}",
            f"  Medium AP: {results.get('medium_ap', 0):.4f}",
            f"  Hard AP:   {results.get('hard_ap', 0):.4f}",
            f"",
            f"  Total images with GT: {len(self.ground_truth)}",
            f"  Total images with predictions: {len(self.predictions)}",
            f"  IoU threshold: {self.iou_threshold}",
            "=" * 60,
        ]
        return '\n'.join(report)