#!/usr/bin/env python3 """ Evaluate SCRFD model on WiderFace validation set. Usage: python scripts/evaluate.py \\ --model scrfd_34g \\ --checkpoint checkpoints/scrfd_34g_best.pth \\ --data-root data/wider_face \\ --output-dir results/scrfd_34g Output: - WiderFace Easy/Medium/Hard AP - Prediction files in WiderFace submission format - Speed benchmark results """ import os import sys import argparse import time import json from pathlib import Path import numpy as np import cv2 import torch sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from models.detector import build_detector from evaluation.widerface_eval import WiderFaceEvaluator from evaluation.speed_benchmark import SpeedBenchmark def parse_args(): parser = argparse.ArgumentParser(description='Evaluate SCRFD') parser.add_argument('--model', type=str, default='scrfd_34g') parser.add_argument('--checkpoint', type=str, required=True) parser.add_argument('--data-root', type=str, default='data/wider_face') parser.add_argument('--output-dir', type=str, default='results') parser.add_argument('--input-size', type=int, default=640) parser.add_argument('--score-thresh', type=float, default=0.02) parser.add_argument('--nms-thresh', type=float, default=0.4) parser.add_argument('--device', type=str, default='cuda') parser.add_argument('--benchmark', action='store_true', default=True) parser.add_argument('--multi-scale', action='store_true', help='Multi-scale testing (slower, higher AP)') parser.add_argument('--scales', nargs='+', type=int, default=[500, 800, 1100, 1400, 1700], help='Scales for multi-scale testing') return parser.parse_args() @torch.no_grad() def evaluate_single_scale(model, evaluator, data_root, input_size, device, score_thresh): """Run single-scale evaluation.""" img_dir = os.path.join(data_root, 'WIDER_val', 'images') mean = np.array([104.0, 117.0, 123.0], dtype=np.float32) total_time = 0 num_images = 0 for event in sorted(os.listdir(img_dir)): event_dir = os.path.join(img_dir, event) if not os.path.isdir(event_dir): continue for img_name in sorted(os.listdir(event_dir)): if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')): continue img_path = os.path.join(event_dir, img_name) img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] filename = f'{event}/{img_name}' # Preprocess scale = input_size / max(h, w) new_h, new_w = int(h * scale), int(w * scale) resized = cv2.resize(img, (new_w, new_h)) padded = np.zeros((input_size, input_size, 3), dtype=np.float32) padded[:new_h, :new_w] = resized padded = (padded - mean).transpose(2, 0, 1) tensor = torch.from_numpy(padded).unsqueeze(0).float().to(device) # Inference t0 = time.time() results = model(tensor) total_time += time.time() - t0 num_images += 1 # Post-process r = results[0] boxes = r['boxes'].cpu().numpy() scores = r['scores'].cpu().numpy() # Rescale to original if len(boxes) > 0: boxes /= scale mask = scores >= score_thresh boxes = boxes[mask] scores = scores[mask] evaluator.add_prediction(filename, boxes, scores) if num_images % 200 == 0: fps = num_images / max(total_time, 1e-6) print(f" Processed {num_images} images ({fps:.1f} FPS)") return total_time, num_images def main(): args = parse_args() os.makedirs(args.output_dir, exist_ok=True) # Load model print(f"Loading {args.model} from {args.checkpoint}") model = build_detector( args.model, score_threshold=args.score_thresh, nms_threshold=args.nms_thresh, ).to(args.device) checkpoint = torch.load(args.checkpoint, map_location='cpu') state_dict = checkpoint.get('model_state_dict', checkpoint) model.load_state_dict(state_dict, strict=False) model.eval() num_params = sum(p.numel() for p in model.parameters()) / 1e6 print(f" Parameters: {num_params:.2f}M") # WiderFace evaluation print("Running WiderFace evaluation...") evaluator = WiderFaceEvaluator( gt_dir=os.path.join(args.data_root, 'wider_face_split') ) total_time, num_images = evaluate_single_scale( model, evaluator, args.data_root, args.input_size, args.device, args.score_thresh ) # Results results = evaluator.evaluate() report = evaluator.generate_report() print(report) # Save predictions evaluator.save_predictions(os.path.join(args.output_dir, 'predictions')) # Speed benchmark if args.benchmark: print("\nRunning speed benchmark...") bench = SpeedBenchmark(device=args.device) for size in [320, 480, 640, 960]: bench.benchmark_model(model, args.model, input_size=size) bench.print_results() # Save markdown table with open(os.path.join(args.output_dir, 'speed_benchmark.md'), 'w') as f: f.write(bench.to_markdown()) # Save results results['num_images'] = num_images results['total_time'] = total_time results['avg_fps'] = num_images / max(total_time, 1e-6) results['model'] = args.model results['input_size'] = args.input_size with open(os.path.join(args.output_dir, 'results.json'), 'w') as f: json.dump(results, f, indent=2) print(f"\nResults saved to {args.output_dir}/") if __name__ == '__main__': main()