import time import torch from PIL import Image from torchvision.models.detection import ( fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, ) from backend.app.ml.sensor_fusion import load_fusion_for_cam_frame from backend.app.ml.inference import predict, USING_FUSION_MODEL def main(): img_path = r"DataSet/samples/CAM_FRONT/n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg" img = Image.open(img_path).convert("RGB") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device", device) print("using_fusion_model", USING_FUSION_MODEL) t0 = time.perf_counter() w_det = FasterRCNN_ResNet50_FPN_Weights.DEFAULT m_det = fasterrcnn_resnet50_fpn(weights=w_det, progress=False).to(device).eval() if device.type == "cuda": torch.cuda.synchronize() load_det = (time.perf_counter() - t0) * 1000 t0 = time.perf_counter() w_pose = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT m_pose = keypointrcnn_resnet50_fpn(weights=w_pose, progress=False).to(device).eval() if device.type == "cuda": torch.cuda.synchronize() load_pose = (time.perf_counter() - t0) * 1000 print("load_ms_fasterrcnn", round(load_det, 2)) print("load_ms_keypointrcnn", round(load_pose, 2)) in_det = w_det.transforms()(img).unsqueeze(0).to(device) in_pose = w_pose.transforms()(img).unsqueeze(0).to(device) with torch.no_grad(): _ = m_det(in_det) _ = m_pose(in_pose) if device.type == "cuda": torch.cuda.synchronize() n = 5 st = time.perf_counter() with torch.no_grad(): for _ in range(n): _ = m_det(in_det) if device.type == "cuda": torch.cuda.synchronize() det_ms = (time.perf_counter() - st) * 1000 / n st = time.perf_counter() with torch.no_grad(): for _ in range(n): _ = m_pose(in_pose) if device.type == "cuda": torch.cuda.synchronize() pose_ms = (time.perf_counter() - st) * 1000 / n print("avg_ms_det_per_frame", round(det_ms, 2)) print("avg_ms_pose_per_frame", round(pose_ms, 2)) m = 30 st = time.perf_counter() for _ in range(m): _ = load_fusion_for_cam_frame( "n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg", data_root="DataSet", ) fusion_ms = (time.perf_counter() - st) * 1000 / m print("avg_ms_fusion_lookup", round(fusion_ms, 2)) pts = [(0, 10), (2, 10), (4, 10), (6, 10)] neigh = [ [(8, 12), (8.5, 12), (9, 12), (9.5, 12)], [(15, 7), (15.5, 7.2), (16, 7.5), (16.4, 7.7)], ] fusion_feats = [[0.2, 0.1, 0.25], [0.25, 0.1, 0.3], [0.3, 0.12, 0.35], [0.35, 0.15, 0.4]] for _ in range(10): _ = predict(pts, neigh, fusion_feats=fusion_feats) if device.type == "cuda": torch.cuda.synchronize() k = 300 st = time.perf_counter() for _ in range(k): _ = predict(pts, neigh, fusion_feats=fusion_feats) if device.type == "cuda": torch.cuda.synchronize() pred_ms = (time.perf_counter() - st) * 1000 / k print("avg_ms_transformer_predict", round(pred_ms, 4)) approx = 2 * det_ms + pose_ms + fusion_ms + 6 * pred_ms fps = 1000.0 / approx if approx > 0 else 0.0 print("approx_live_2frame_ms", round(approx, 2)) print("approx_live_equiv_fps", round(fps, 2)) if __name__ == "__main__": main()