facedet / scripts /evaluate.py
cledouxluma's picture
Upload scripts/evaluate.py with huggingface_hub
f5bb51a verified
raw
history blame
5.97 kB
#!/usr/bin/env python3
"""
Evaluate SCRFD model on WiderFace validation set.
Usage:
python scripts/evaluate.py \\
--model scrfd_34g \\
--checkpoint checkpoints/scrfd_34g_best.pth \\
--data-root data/wider_face \\
--output-dir results/scrfd_34g
Output:
- WiderFace Easy/Medium/Hard AP
- Prediction files in WiderFace submission format
- Speed benchmark results
"""
import os
import sys
import argparse
import time
import json
from pathlib import Path
import numpy as np
import cv2
import torch
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from models.detector import build_detector
from evaluation.widerface_eval import WiderFaceEvaluator
from evaluation.speed_benchmark import SpeedBenchmark
def parse_args():
parser = argparse.ArgumentParser(description='Evaluate SCRFD')
parser.add_argument('--model', type=str, default='scrfd_34g')
parser.add_argument('--checkpoint', type=str, required=True)
parser.add_argument('--data-root', type=str, default='data/wider_face')
parser.add_argument('--output-dir', type=str, default='results')
parser.add_argument('--input-size', type=int, default=640)
parser.add_argument('--score-thresh', type=float, default=0.02)
parser.add_argument('--nms-thresh', type=float, default=0.4)
parser.add_argument('--device', type=str, default='cuda')
parser.add_argument('--benchmark', action='store_true', default=True)
parser.add_argument('--multi-scale', action='store_true',
help='Multi-scale testing (slower, higher AP)')
parser.add_argument('--scales', nargs='+', type=int,
default=[500, 800, 1100, 1400, 1700],
help='Scales for multi-scale testing')
return parser.parse_args()
@torch.no_grad()
def evaluate_single_scale(model, evaluator, data_root, input_size, device,
score_thresh):
"""Run single-scale evaluation."""
img_dir = os.path.join(data_root, 'WIDER_val', 'images')
mean = np.array([104.0, 117.0, 123.0], dtype=np.float32)
total_time = 0
num_images = 0
for event in sorted(os.listdir(img_dir)):
event_dir = os.path.join(img_dir, event)
if not os.path.isdir(event_dir):
continue
for img_name in sorted(os.listdir(event_dir)):
if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
continue
img_path = os.path.join(event_dir, img_name)
img = cv2.imread(img_path)
if img is None:
continue
h, w = img.shape[:2]
filename = f'{event}/{img_name}'
# Preprocess
scale = input_size / max(h, w)
new_h, new_w = int(h * scale), int(w * scale)
resized = cv2.resize(img, (new_w, new_h))
padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
padded[:new_h, :new_w] = resized
padded = (padded - mean).transpose(2, 0, 1)
tensor = torch.from_numpy(padded).unsqueeze(0).float().to(device)
# Inference
t0 = time.time()
results = model(tensor)
total_time += time.time() - t0
num_images += 1
# Post-process
r = results[0]
boxes = r['boxes'].cpu().numpy()
scores = r['scores'].cpu().numpy()
# Rescale to original
if len(boxes) > 0:
boxes /= scale
mask = scores >= score_thresh
boxes = boxes[mask]
scores = scores[mask]
evaluator.add_prediction(filename, boxes, scores)
if num_images % 200 == 0:
fps = num_images / max(total_time, 1e-6)
print(f" Processed {num_images} images ({fps:.1f} FPS)")
return total_time, num_images
def main():
args = parse_args()
os.makedirs(args.output_dir, exist_ok=True)
# Load model
print(f"Loading {args.model} from {args.checkpoint}")
model = build_detector(
args.model,
score_threshold=args.score_thresh,
nms_threshold=args.nms_thresh,
).to(args.device)
checkpoint = torch.load(args.checkpoint, map_location='cpu')
state_dict = checkpoint.get('model_state_dict', checkpoint)
model.load_state_dict(state_dict, strict=False)
model.eval()
num_params = sum(p.numel() for p in model.parameters()) / 1e6
print(f" Parameters: {num_params:.2f}M")
# WiderFace evaluation
print("Running WiderFace evaluation...")
evaluator = WiderFaceEvaluator(
gt_dir=os.path.join(args.data_root, 'wider_face_split')
)
total_time, num_images = evaluate_single_scale(
model, evaluator, args.data_root, args.input_size,
args.device, args.score_thresh
)
# Results
results = evaluator.evaluate()
report = evaluator.generate_report()
print(report)
# Save predictions
evaluator.save_predictions(os.path.join(args.output_dir, 'predictions'))
# Speed benchmark
if args.benchmark:
print("\nRunning speed benchmark...")
bench = SpeedBenchmark(device=args.device)
for size in [320, 480, 640, 960]:
bench.benchmark_model(model, args.model, input_size=size)
bench.print_results()
# Save markdown table
with open(os.path.join(args.output_dir, 'speed_benchmark.md'), 'w') as f:
f.write(bench.to_markdown())
# Save results
results['num_images'] = num_images
results['total_time'] = total_time
results['avg_fps'] = num_images / max(total_time, 1e-6)
results['model'] = args.model
results['input_size'] = args.input_size
with open(os.path.join(args.output_dir, 'results.json'), 'w') as f:
json.dump(results, f, indent=2)
print(f"\nResults saved to {args.output_dir}/")
if __name__ == '__main__':
main()