Spaces:

tcooper-xx
/

SWClassifier

Sleeping

App Files Files Community

SWClassifier / detector /inference.py

tcooper-xx

Initial Commit

34ecf0d about 1 month ago

raw

history blame contribute delete

3.7 kB

	import time
	from typing import List, Tuple, Union

	import numpy as np
	import torch
	from ultralytics import YOLO

	logger = app_logger.getChild("models.detector.ultralytics")

	class YOLOInference(BaseInference):
	def __init__(self, model_path: str, imsz: int = 640,
	conf_threshold: float = 0.25, nms_threshold: float = 0.45,
	device: str = "cpu"):
	"""
	Initializing the YOLO class using the official Ultralytics SDK.

	Args:
	model_path: Path to the model file (.pt, .onnx, or .torchscript).
	imsz: Input image size for the model.
	conf_threshold: Confidence threshold to filter out low-confidence boxes.
	nms_threshold: IoU threshold for Non-Maximum Suppression.
	device: Computing device ('cpu' or 'cuda').
	"""
	super().__init__(config={"device": device})

	self.model_path = model_path
	self.imsz = imsz
	self.conf_threshold = conf_threshold
	self.nms_threshold = nms_threshold

	self.load_model(model_path)

	def load_model(self, model_path: str):
	"""
	Loads the model into memory. Ultralytics handle various formats automatically.
	"""
	logger.info(f"[load] Loading Ultralytics model from {model_path} on {self.device}")
	# The YOLO class automatically handles weights and architecture configuration
	self.model = YOLO(model_path)
	self.model.to(self.device)

	def predict(self, im_bgr: Union[np.ndarray, List[np.ndarray]]) -> List[List[YOLOResult]]:
	"""
	Performs end-to-end inference including preprocessing, model forward pass, and NMS.

	Args:
	im_bgr: A single image or a list of images in BGR format (numpy arrays).

	Returns:
	A list of lists containing YOLOResult objects for each input image.
	"""
	if isinstance(im_bgr, np.ndarray):
	im_bgr = [im_bgr]

	start_time = time.time()
	logger.debug(f"[infer] Starting detector inference on {len(im_bgr)} frame(s)")

	final_results = []

	try:
	# Ultralytics .predict() handles letterboxing, normalization, and NMS internally.
	# It also automatically scales coordinates back to the original image size.
	results = self.model.predict(
	source=im_bgr,
	imgsz=self.imsz,
	conf=self.conf_threshold,
	iou=self.nms_threshold,
	device=self.device,
	verbose=False,
	save=False
	)

	for i, res in enumerate(results):
	# res.boxes.data contains [x1, y1, x2, y2, confidence, class_id]
	boxes_data = res.boxes.data.cpu().numpy()

	frame_results = []
	for box in boxes_data:
	# box[:5] extract [x1, y1, x2, y2, confidence]
	# We pass the scaled coordinates and the original image to your YOLOResult wrapper
	frame_results.append(YOLOResult(box[:5], im_bgr[i]))

	final_results.append(frame_results)

	return final_results

	except Exception as e:
	logger.error(f"Inference error occurred: {e}")
	# Return empty lists to prevent the pipeline from breaking
	return [[] for _ in range(len(im_bgr))]

	finally:
	logger.info(
	f"[infer] Detector inference completed in {(time.time() - start_time) * 1000:.2f} ms "
	f"for {len(im_bgr)} frame(s)"
	)