Spaces:

sajith-0701
/

IntentDrive

Running

IntentDrive / backend /scripts /evaluation /benchmark_perf.py

sajith-0701

Deploy FastAPI backend to HF Spaces (Docker SDK)

98075af 1 day ago

3.5 kB

	import time

	import torch
	from PIL import Image
	from torchvision.models.detection import (
	fasterrcnn_resnet50_fpn,
	FasterRCNN_ResNet50_FPN_Weights,
	keypointrcnn_resnet50_fpn,
	KeypointRCNN_ResNet50_FPN_Weights,
	)

	from backend.app.ml.sensor_fusion import load_fusion_for_cam_frame
	from backend.app.ml.inference import predict, USING_FUSION_MODEL


	def main():
	img_path = r"DataSet/samples/CAM_FRONT/n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg"
	img = Image.open(img_path).convert("RGB")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("device", device)
	print("using_fusion_model", USING_FUSION_MODEL)

	t0 = time.perf_counter()
	w_det = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
	m_det = fasterrcnn_resnet50_fpn(weights=w_det, progress=False).to(device).eval()
	if device.type == "cuda":
	torch.cuda.synchronize()
	load_det = (time.perf_counter() - t0) * 1000

	t0 = time.perf_counter()
	w_pose = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
	m_pose = keypointrcnn_resnet50_fpn(weights=w_pose, progress=False).to(device).eval()
	if device.type == "cuda":
	torch.cuda.synchronize()
	load_pose = (time.perf_counter() - t0) * 1000

	print("load_ms_fasterrcnn", round(load_det, 2))
	print("load_ms_keypointrcnn", round(load_pose, 2))

	in_det = w_det.transforms()(img).unsqueeze(0).to(device)
	in_pose = w_pose.transforms()(img).unsqueeze(0).to(device)

	with torch.no_grad():
	_ = m_det(in_det)
	_ = m_pose(in_pose)
	if device.type == "cuda":
	torch.cuda.synchronize()

	n = 5

	st = time.perf_counter()
	with torch.no_grad():
	for _ in range(n):
	_ = m_det(in_det)
	if device.type == "cuda":
	torch.cuda.synchronize()
	det_ms = (time.perf_counter() - st) * 1000 / n

	st = time.perf_counter()
	with torch.no_grad():
	for _ in range(n):
	_ = m_pose(in_pose)
	if device.type == "cuda":
	torch.cuda.synchronize()
	pose_ms = (time.perf_counter() - st) * 1000 / n

	print("avg_ms_det_per_frame", round(det_ms, 2))
	print("avg_ms_pose_per_frame", round(pose_ms, 2))

	m = 30
	st = time.perf_counter()
	for _ in range(m):
	_ = load_fusion_for_cam_frame(
	"n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg",
	data_root="DataSet",
	)
	fusion_ms = (time.perf_counter() - st) * 1000 / m
	print("avg_ms_fusion_lookup", round(fusion_ms, 2))

	pts = [(0, 10), (2, 10), (4, 10), (6, 10)]
	neigh = [
	[(8, 12), (8.5, 12), (9, 12), (9.5, 12)],
	[(15, 7), (15.5, 7.2), (16, 7.5), (16.4, 7.7)],
	]
	fusion_feats = [[0.2, 0.1, 0.25], [0.25, 0.1, 0.3], [0.3, 0.12, 0.35], [0.35, 0.15, 0.4]]

	for _ in range(10):
	_ = predict(pts, neigh, fusion_feats=fusion_feats)
	if device.type == "cuda":
	torch.cuda.synchronize()

	k = 300
	st = time.perf_counter()
	for _ in range(k):
	_ = predict(pts, neigh, fusion_feats=fusion_feats)
	if device.type == "cuda":
	torch.cuda.synchronize()
	pred_ms = (time.perf_counter() - st) * 1000 / k
	print("avg_ms_transformer_predict", round(pred_ms, 4))

	approx = 2 * det_ms + pose_ms + fusion_ms + 6 * pred_ms
	fps = 1000.0 / approx if approx > 0 else 0.0
	print("approx_live_2frame_ms", round(approx, 2))
	print("approx_live_equiv_fps", round(fps, 2))


	if __name__ == "__main__":
	main()