| import cv2 |
| import sys |
| from sahi.models.yolov8 import Yolov8DetectionModel |
| from sahi.predict import get_sliced_prediction |
| import supervision as sv |
| import numpy as np |
|
|
| |
| if len(sys.argv) != 8: |
| print("Usage: python yolov8_video_inference.py <model_path> <input_video_path> <output_video_path> <slice_height> <slice_width> <overlap_height_ratio> <overlap_width_ratio>") |
| sys.exit(1) |
|
|
| |
| model_path = sys.argv[1] |
| input_video_path = sys.argv[2] |
| output_video_path = sys.argv[3] |
| slice_height = int(sys.argv[4]) |
| slice_width = int(sys.argv[5]) |
| overlap_height_ratio = float(sys.argv[6]) |
| overlap_width_ratio = float(sys.argv[7]) |
|
|
| |
| detection_model = Yolov8DetectionModel( |
| model_path=model_path, |
| confidence_threshold=0.1, |
| device="cuda" |
| ) |
|
|
| |
| cap = cv2.VideoCapture(input_video_path) |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
| fps = cap.get(cv2.CAP_PROP_FPS) |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
|
| |
| out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height)) |
|
|
| |
| |
| box_annotator = sv.BoxCornerAnnotator(thickness=2) |
| label_annotator = sv.LabelAnnotator(text_scale=0.5, text_thickness=2) |
|
|
| |
| frame_count = 0 |
| while cap.isOpened(): |
| ret, frame = cap.read() |
| if not ret: |
| break |
|
|
| |
| result = get_sliced_prediction( |
| image=frame, |
| detection_model=detection_model, |
| slice_height=slice_height, |
| slice_width=slice_width, |
| overlap_height_ratio=overlap_height_ratio, |
| overlap_width_ratio=overlap_width_ratio |
| ) |
|
|
| |
| object_predictions = result.object_prediction_list |
|
|
| |
| xyxy = [] |
| confidences = [] |
| class_ids = [] |
| class_names = [] |
|
|
| |
| for pred in object_predictions: |
| bbox = pred.bbox.to_xyxy() |
| xyxy.append(bbox) |
| confidences.append(pred.score.value) |
| class_ids.append(pred.category.id) |
| class_names.append(pred.category.name) |
|
|
| |
| if xyxy: |
| |
| xyxy = np.array(xyxy, dtype=np.float32) |
| confidences = np.array(confidences, dtype=np.float32) |
| class_ids = np.array(class_ids, dtype=int) |
|
|
| |
| detections = sv.Detections( |
| xyxy=xyxy, |
| confidence=confidences, |
| class_id=class_ids |
| ) |
|
|
| |
| labels = [ |
| f"{class_name} {confidence:.2f}" |
| for class_name, confidence in zip(class_names, confidences) |
| ] |
|
|
| |
| annotated_frame = frame.copy() |
| annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections) |
| annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels) |
| else: |
| |
| annotated_frame = frame.copy() |
|
|
| |
| out.write(annotated_frame) |
|
|
| frame_count += 1 |
| print(f"Processed frame {frame_count}", end='\r') |
|
|
| |
| cap.release() |
| out.release() |
| print("\nInference complete. Video saved at", output_video_path) |
|
|
|
|