SWClassifier / detector /inference.py
tcooper-xx's picture
Initial Commit
34ecf0d
import time
from typing import List, Tuple, Union
import numpy as np
import torch
from ultralytics import YOLO
logger = app_logger.getChild("models.detector.ultralytics")
class YOLOInference(BaseInference):
def __init__(self, model_path: str, imsz: int = 640,
conf_threshold: float = 0.25, nms_threshold: float = 0.45,
device: str = "cpu"):
"""
Initializing the YOLO class using the official Ultralytics SDK.
Args:
model_path: Path to the model file (.pt, .onnx, or .torchscript).
imsz: Input image size for the model.
conf_threshold: Confidence threshold to filter out low-confidence boxes.
nms_threshold: IoU threshold for Non-Maximum Suppression.
device: Computing device ('cpu' or 'cuda').
"""
super().__init__(config={"device": device})
self.model_path = model_path
self.imsz = imsz
self.conf_threshold = conf_threshold
self.nms_threshold = nms_threshold
self.load_model(model_path)
def load_model(self, model_path: str):
"""
Loads the model into memory. Ultralytics handle various formats automatically.
"""
logger.info(f"[load] Loading Ultralytics model from {model_path} on {self.device}")
# The YOLO class automatically handles weights and architecture configuration
self.model = YOLO(model_path)
self.model.to(self.device)
def predict(self, im_bgr: Union[np.ndarray, List[np.ndarray]]) -> List[List[YOLOResult]]:
"""
Performs end-to-end inference including preprocessing, model forward pass, and NMS.
Args:
im_bgr: A single image or a list of images in BGR format (numpy arrays).
Returns:
A list of lists containing YOLOResult objects for each input image.
"""
if isinstance(im_bgr, np.ndarray):
im_bgr = [im_bgr]
start_time = time.time()
logger.debug(f"[infer] Starting detector inference on {len(im_bgr)} frame(s)")
final_results = []
try:
# Ultralytics .predict() handles letterboxing, normalization, and NMS internally.
# It also automatically scales coordinates back to the original image size.
results = self.model.predict(
source=im_bgr,
imgsz=self.imsz,
conf=self.conf_threshold,
iou=self.nms_threshold,
device=self.device,
verbose=False,
save=False
)
for i, res in enumerate(results):
# res.boxes.data contains [x1, y1, x2, y2, confidence, class_id]
boxes_data = res.boxes.data.cpu().numpy()
frame_results = []
for box in boxes_data:
# box[:5] extract [x1, y1, x2, y2, confidence]
# We pass the scaled coordinates and the original image to your YOLOResult wrapper
frame_results.append(YOLOResult(box[:5], im_bgr[i]))
final_results.append(frame_results)
return final_results
except Exception as e:
logger.error(f"Inference error occurred: {e}")
# Return empty lists to prevent the pipeline from breaking
return [[] for _ in range(len(im_bgr))]
finally:
logger.info(
f"[infer] Detector inference completed in {(time.time() - start_time) * 1000:.2f} ms "
f"for {len(im_bgr)} frame(s)"
)