facedet / scripts /detect_video.py
cledouxluma's picture
Upload scripts/detect_video.py with huggingface_hub
307c3fe verified
#!/usr/bin/env python3
"""
Video face detection with tracking and temporal smoothing.
Usage:
# Process video file
python scripts/detect_video.py \\
--model scrfd_34g \\
--checkpoint checkpoints/scrfd_34g_best.pth \\
--input video.mp4 \\
--output output.mp4
# Webcam (real-time)
python scripts/detect_video.py \\
--model scrfd_2.5g \\
--checkpoint checkpoints/scrfd_2.5g_best.pth \\
--input 0 \\
--show
# RTSP stream
python scripts/detect_video.py \\
--model scrfd_2.5g \\
--checkpoint checkpoints/scrfd_2.5g_best.pth \\
--input rtsp://192.168.1.100/stream \\
--output output.mp4
"""
import os
import sys
import argparse
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from models.detector import build_detector
from engine.video_detector import VideoFaceDetector
def parse_args():
parser = argparse.ArgumentParser(description='Video Face Detection')
parser.add_argument('--model', type=str, default='scrfd_34g')
parser.add_argument('--checkpoint', type=str, required=True)
parser.add_argument('--input', type=str, required=True,
help='Video file, webcam index (0), or RTSP URL')
parser.add_argument('--output', type=str, default=None,
help='Output video path')
parser.add_argument('--input-size', type=int, default=640)
parser.add_argument('--score-thresh', type=float, default=0.4)
parser.add_argument('--nms-thresh', type=float, default=0.4)
parser.add_argument('--device', type=str, default='cuda')
parser.add_argument('--no-tracking', action='store_true')
parser.add_argument('--no-smoothing', action='store_true')
parser.add_argument('--keyframe-interval', type=int, default=0,
help='Run detector every N frames (0=every frame)')
parser.add_argument('--max-frames', type=int, default=-1)
parser.add_argument('--show', action='store_true',
help='Display video in window')
parser.add_argument('--onnx', type=str, default=None,
help='Use ONNX model instead of PyTorch')
return parser.parse_args()
def main():
args = parse_args()
# Build model
if args.onnx:
model = None
model_path = args.onnx
else:
model = build_detector(
args.model,
score_threshold=args.score_thresh,
nms_threshold=args.nms_thresh,
)
model_path = args.checkpoint
# Build video detector
detector = VideoFaceDetector(
model=model,
model_path=model_path,
model_name=args.model,
device=args.device,
score_threshold=args.score_thresh,
nms_threshold=args.nms_thresh,
input_size=args.input_size,
use_tracking=not args.no_tracking,
use_smoothing=not args.no_smoothing,
keyframe_interval=args.keyframe_interval,
)
# Process video
source = int(args.input) if args.input.isdigit() else args.input
stats = detector.process_video(
source=source,
max_frames=args.max_frames,
output_path=args.output,
show=args.show,
)
print(f"\nProcessing complete:")
print(f" Frames: {stats['total_frames']}")
print(f" Average FPS: {stats['avg_fps']:.1f}")
print(f" Average faces/frame: {stats['avg_faces_per_frame']:.1f}")
if args.output:
print(f" Output saved to: {args.output}")
if __name__ == '__main__':
main()