Upload scripts/evaluate.py with huggingface_hub

f5bb51a verified 15 days ago

5.97 kB

	#!/usr/bin/env python3
	"""
	Evaluate SCRFD model on WiderFace validation set.

	Usage:
	python scripts/evaluate.py \\
	--model scrfd_34g \\
	--checkpoint checkpoints/scrfd_34g_best.pth \\
	--data-root data/wider_face \\
	--output-dir results/scrfd_34g

	Output:
	- WiderFace Easy/Medium/Hard AP
	- Prediction files in WiderFace submission format
	- Speed benchmark results
	"""

	import os
	import sys
	import argparse
	import time
	import json
	from pathlib import Path

	import numpy as np
	import cv2
	import torch

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	from models.detector import build_detector
	from evaluation.widerface_eval import WiderFaceEvaluator
	from evaluation.speed_benchmark import SpeedBenchmark


	def parse_args():
	parser = argparse.ArgumentParser(description='Evaluate SCRFD')
	parser.add_argument('--model', type=str, default='scrfd_34g')
	parser.add_argument('--checkpoint', type=str, required=True)
	parser.add_argument('--data-root', type=str, default='data/wider_face')
	parser.add_argument('--output-dir', type=str, default='results')
	parser.add_argument('--input-size', type=int, default=640)
	parser.add_argument('--score-thresh', type=float, default=0.02)
	parser.add_argument('--nms-thresh', type=float, default=0.4)
	parser.add_argument('--device', type=str, default='cuda')
	parser.add_argument('--benchmark', action='store_true', default=True)
	parser.add_argument('--multi-scale', action='store_true',
	help='Multi-scale testing (slower, higher AP)')
	parser.add_argument('--scales', nargs='+', type=int,
	default=[500, 800, 1100, 1400, 1700],
	help='Scales for multi-scale testing')
	return parser.parse_args()


	@torch.no_grad()
	def evaluate_single_scale(model, evaluator, data_root, input_size, device,
	score_thresh):
	"""Run single-scale evaluation."""
	img_dir = os.path.join(data_root, 'WIDER_val', 'images')
	mean = np.array([104.0, 117.0, 123.0], dtype=np.float32)

	total_time = 0
	num_images = 0

	for event in sorted(os.listdir(img_dir)):
	event_dir = os.path.join(img_dir, event)
	if not os.path.isdir(event_dir):
	continue

	for img_name in sorted(os.listdir(event_dir)):
	if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
	continue

	img_path = os.path.join(event_dir, img_name)
	img = cv2.imread(img_path)
	if img is None:
	continue

	h, w = img.shape[:2]
	filename = f'{event}/{img_name}'

	# Preprocess
	scale = input_size / max(h, w)
	new_h, new_w = int(h * scale), int(w * scale)
	resized = cv2.resize(img, (new_w, new_h))

	padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
	padded[:new_h, :new_w] = resized
	padded = (padded - mean).transpose(2, 0, 1)

	tensor = torch.from_numpy(padded).unsqueeze(0).float().to(device)

	# Inference
	t0 = time.time()
	results = model(tensor)
	total_time += time.time() - t0
	num_images += 1

	# Post-process
	r = results[0]
	boxes = r['boxes'].cpu().numpy()
	scores = r['scores'].cpu().numpy()

	# Rescale to original
	if len(boxes) > 0:
	boxes /= scale
	mask = scores >= score_thresh
	boxes = boxes[mask]
	scores = scores[mask]

	evaluator.add_prediction(filename, boxes, scores)

	if num_images % 200 == 0:
	fps = num_images / max(total_time, 1e-6)
	print(f" Processed {num_images} images ({fps:.1f} FPS)")

	return total_time, num_images


	def main():
	args = parse_args()
	os.makedirs(args.output_dir, exist_ok=True)

	# Load model
	print(f"Loading {args.model} from {args.checkpoint}")
	model = build_detector(
	args.model,
	score_threshold=args.score_thresh,
	nms_threshold=args.nms_thresh,
	).to(args.device)

	checkpoint = torch.load(args.checkpoint, map_location='cpu')
	state_dict = checkpoint.get('model_state_dict', checkpoint)
	model.load_state_dict(state_dict, strict=False)
	model.eval()

	num_params = sum(p.numel() for p in model.parameters()) / 1e6
	print(f" Parameters: {num_params:.2f}M")

	# WiderFace evaluation
	print("Running WiderFace evaluation...")
	evaluator = WiderFaceEvaluator(
	gt_dir=os.path.join(args.data_root, 'wider_face_split')
	)

	total_time, num_images = evaluate_single_scale(
	model, evaluator, args.data_root, args.input_size,
	args.device, args.score_thresh
	)

	# Results
	results = evaluator.evaluate()
	report = evaluator.generate_report()
	print(report)

	# Save predictions
	evaluator.save_predictions(os.path.join(args.output_dir, 'predictions'))

	# Speed benchmark
	if args.benchmark:
	print("\nRunning speed benchmark...")
	bench = SpeedBenchmark(device=args.device)
	for size in [320, 480, 640, 960]:
	bench.benchmark_model(model, args.model, input_size=size)
	bench.print_results()

	# Save markdown table
	with open(os.path.join(args.output_dir, 'speed_benchmark.md'), 'w') as f:
	f.write(bench.to_markdown())

	# Save results
	results['num_images'] = num_images
	results['total_time'] = total_time
	results['avg_fps'] = num_images / max(total_time, 1e-6)
	results['model'] = args.model
	results['input_size'] = args.input_size

	with open(os.path.join(args.output_dir, 'results.json'), 'w') as f:
	json.dump(results, f, indent=2)

	print(f"\nResults saved to {args.output_dir}/")


	if __name__ == '__main__':
	main()