yolox-s / infer_onnx.py

update infer code for NCHW->NHWC

5c2432b over 2 years ago

5.35 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import sys
	import cv2
	import pathlib
	import argparse
	import numpy as np
	import onnxruntime as ort

	CURRENT_DIR = pathlib.Path(__file__).parent
	sys.path.append(str(CURRENT_DIR))

	from coco import COCO_CLASSES
	from demo_utils import mkdir, multiclass_nms, demo_postprocess, vis


	def make_parser():
	parser = argparse.ArgumentParser("onnxruntime inference sample")
	parser.add_argument(
	"-m",
	"--model",
	type=str,
	default="yolox-s-int8.onnx",
	help="Input your onnx model.",
	)
	parser.add_argument(
	"-i",
	"--image_path",
	type=str,
	default='test_image.png',
	help="Path to your input image.",
	)
	parser.add_argument(
	"-o",
	"--output_dir",
	type=str,
	default='demo_output',
	help="Path to your output directory.",
	)
	parser.add_argument(
	"-s",
	"--score_thr",
	type=float,
	default=0.3,
	help="Score threshold to filter the result.",
	)
	parser.add_argument(
	"--input_shape",
	type=str,
	default="640,640",
	help="Specify an input shape for inference.",
	)
	parser.add_argument(
	"--ipu",
	action="store_true",
	help="Use IPU for inference.",
	)
	parser.add_argument(
	"--provider_config",
	type=str,
	default="vaip_config.json",
	help="Path of the config file for setting provider_options.",
	)
	return parser


	def preprocess(img, input_shape, swap=(2, 0, 1)):
	"""
	Preprocessing part of YOLOX for scaling and padding image as input to the network.

	Args:
	img (numpy.ndarray): H x W x C, image read with OpenCV
	input_shape (tuple(int)): input shape of the network for inference
	swap (tuple(int)): new order of axes to transpose the input image

	Returns:
	padded_img (numpy.ndarray): preprocessed image to be fed to the network
	ratio (float): ratio for scaling the image to the input shape
	"""
	if len(img.shape) == 3:
	padded_img = np.ones((input_shape[0], input_shape[1], 3), dtype=np.uint8) * 114
	else:
	padded_img = np.ones(input_shape, dtype=np.uint8) * 114
	ratio = min(input_shape[0] / img.shape[0], input_shape[1] / img.shape[1])
	resized_img = cv2.resize(
	img,
	(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
	interpolation=cv2.INTER_LINEAR,
	).astype(np.uint8)
	padded_img[: int(img.shape[0] * ratio), : int(img.shape[1] * ratio)] = resized_img
	padded_img = padded_img.transpose(swap)
	padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
	return padded_img, ratio


	def postprocess(outputs, input_shape, ratio):
	"""
	Post-processing part of YOLOX for generating final results from outputs of the network.

	Args:
	outputs (tuple(numpy.ndarray)): outputs of the detection heads with onnxruntime session
	input_shape (tuple(int)): input shape of the network for inference
	ratio (float): ratio for scaling the image to the input shape

	Returns:
	dets (numpy.ndarray): n x 6, dets[:,:4] -> boxes, dets[:,4] -> scores, dets[:,5] -> class indices
	"""
	outputs = [out.reshape(*out.shape[:2], -1).transpose(0,2,1) for out in outputs]
	outputs = np.concatenate(outputs, axis=1)
	outputs[..., 4:] = sigmoid(outputs[..., 4:])
	predictions = demo_postprocess(outputs, input_shape, p6=False)[0]
	boxes = predictions[:, :4]
	scores = predictions[:, 4:5] * predictions[:, 5:]
	boxes_xyxy = np.ones_like(boxes)
	boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
	boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
	boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
	boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
	boxes_xyxy /= ratio
	dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
	return dets


	def sigmoid(x):
	return 1.0 / (1.0 + np.exp(-x))


	if __name__ == '__main__':
	args = make_parser().parse_args()
	input_shape = tuple(map(int, args.input_shape.split(',')))
	origin_img = cv2.imread(args.image_path)
	img, ratio = preprocess(origin_img, input_shape)
	if args.ipu:
	providers = ["VitisAIExecutionProvider"]
	provider_options = [{"config_file": args.provider_config}]
	else:
	providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
	provider_options = None
	session = ort.InferenceSession(args.model, providers=providers, provider_options=provider_options)
	# ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}
	ort_inputs = {session.get_inputs()[0].name: np.transpose(img[None, :, :, :], (0, 2 ,3, 1))}
	outputs = session.run(None, ort_inputs)
	outputs = [np.transpose(out, (0, 3, 1, 2)) for out in outputs]
	dets = postprocess(outputs, input_shape, ratio)
	if dets is not None:
	final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
	origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,
	conf=args.score_thr, class_names=COCO_CLASSES)
	mkdir(args.output_dir)
	output_path = os.path.join(args.output_dir, os.path.basename(args.image_path))
	cv2.imwrite(output_path, origin_img)