| """ |
| DEIMv2: Real-Time Object Detection Meets DINOv3 |
| Copyright (c) 2025 The DEIMv2 Authors. All Rights Reserved. |
| --------------------------------------------------------------------------------- |
| Modified from D-FINE (https://github.com/Peterande/D-FINE) |
| Copyright (c) 2024 The D-FINE Authors. All Rights Reserved. |
| """ |
| import os |
| import sys |
| import cv2 |
| import time |
| import numpy as np |
| import axengine as axe |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import torchvision.transforms as T |
| from PIL import Image, ImageDraw |
| import pyzbar.pyzbar as pyzbar |
| import torchvision |
| import glob |
|
|
| def mod(a, b): |
| out = a - a // b * b |
| return out |
|
|
| mscoco_category2name = { |
| 1: 'person', |
| 2: 'bicycle', |
| 3: 'car', |
| 4: 'motorcycle', |
| 5: 'airplane', |
| 6: 'bus', |
| 7: 'train', |
| 8: 'truck', |
| 9: 'boat', |
| 10: 'traffic light', |
| 11: 'fire hydrant', |
| 13: 'stop sign', |
| 14: 'parking meter', |
| 15: 'bench', |
| 16: 'bird', |
| 17: 'cat', |
| 18: 'dog', |
| 19: 'horse', |
| 20: 'sheep', |
| 21: 'cow', |
| 22: 'elephant', |
| 23: 'bear', |
| 24: 'zebra', |
| 25: 'giraffe', |
| 27: 'backpack', |
| 28: 'umbrella', |
| 31: 'handbag', |
| 32: 'tie', |
| 33: 'suitcase', |
| 34: 'frisbee', |
| 35: 'skis', |
| 36: 'snowboard', |
| 37: 'sports ball', |
| 38: 'kite', |
| 39: 'baseball bat', |
| 40: 'baseball glove', |
| 41: 'skateboard', |
| 42: 'surfboard', |
| 43: 'tennis racket', |
| 44: 'bottle', |
| 46: 'wine glass', |
| 47: 'cup', |
| 48: 'fork', |
| 49: 'knife', |
| 50: 'spoon', |
| 51: 'bowl', |
| 52: 'banana', |
| 53: 'apple', |
| 54: 'sandwich', |
| 55: 'orange', |
| 56: 'broccoli', |
| 57: 'carrot', |
| 58: 'hot dog', |
| 59: 'pizza', |
| 60: 'donut', |
| 61: 'cake', |
| 62: 'chair', |
| 63: 'couch', |
| 64: 'potted plant', |
| 65: 'bed', |
| 67: 'dining table', |
| 70: 'toilet', |
| 72: 'tv', |
| 73: 'laptop', |
| 74: 'mouse', |
| 75: 'remote', |
| 76: 'keyboard', |
| 77: 'cell phone', |
| 78: 'microwave', |
| 79: 'oven', |
| 80: 'toaster', |
| 81: 'sink', |
| 82: 'refrigerator', |
| 84: 'book', |
| 85: 'clock', |
| 86: 'vase', |
| 87: 'scissors', |
| 88: 'teddy bear', |
| 89: 'hair drier', |
| 90: 'toothbrush' |
| } |
|
|
| mscoco_category2label = {k: i for i, k in enumerate(mscoco_category2name.keys())} |
| mscoco_label2category = {v: k for k, v in mscoco_category2label.items()} |
|
|
| class PostProcessor(nn.Module): |
| __share__ = [ |
| 'num_classes', |
| 'use_focal_loss', |
| 'num_top_queries', |
| 'remap_mscoco_category' |
| ] |
|
|
| def __init__( |
| self, |
| num_classes=80, |
| use_focal_loss=True, |
| num_top_queries=300, |
| remap_mscoco_category=False |
| ) -> None: |
| super().__init__() |
| self.use_focal_loss = use_focal_loss |
| self.num_top_queries = num_top_queries |
| self.num_classes = int(num_classes) |
| self.remap_mscoco_category = remap_mscoco_category |
| self.deploy_mode = False |
|
|
| def extra_repr(self) -> str: |
| return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' |
|
|
| |
| def forward(self, outputs, orig_target_sizes: torch.Tensor): |
| logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] |
| |
|
|
| bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') |
| bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1) |
|
|
| if self.use_focal_loss: |
| scores = F.sigmoid(logits) |
|
|
| scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1) |
| |
| labels = mod(index, self.num_classes) |
| index = index // self.num_classes |
| boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])) |
|
|
| else: |
| scores = F.softmax(logits)[:, :, :-1] |
| scores, labels = scores.max(dim=-1) |
| if scores.shape[1] > self.num_top_queries: |
| scores, index = torch.topk(scores, self.num_top_queries, dim=-1) |
| labels = torch.gather(labels, dim=1, index=index) |
| boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) |
|
|
| if self.deploy_mode: |
| return labels, boxes, scores |
|
|
| if self.remap_mscoco_category: |
| labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\ |
| .to(boxes.device).reshape(labels.shape) |
|
|
| results = [] |
| for lab, box, sco in zip(labels, boxes, scores): |
| result = dict(labels=lab, boxes=box, scores=sco) |
| results.append(result) |
|
|
| return results |
|
|
| def deploy(self, ): |
| self.eval() |
| self.deploy_mode = True |
| return self |
| def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR): |
| """Resizes an image while maintaining aspect ratio and pads it.""" |
| original_width, original_height = image.size |
| ratio = min(size / original_width, size / original_height) |
| new_width = int(original_width * ratio) |
| new_height = int(original_height * ratio) |
| image = image.resize((new_width, new_height), interpolation) |
|
|
| |
| new_image = Image.new("RGB", (size, size)) |
| new_image.paste(image, ((size - new_width) // 2, (size - new_height) // 2)) |
| return new_image, ratio, (size - new_width) // 2, (size - new_height) // 2 |
|
|
|
|
| def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.25): |
| result_images = [] |
| detections=[] |
| for i, im in enumerate(images): |
| draw = ImageDraw.Draw(im) |
| scr = scores[i] |
| lab = labels[i][scr > thrh] |
| box = boxes[i][scr > thrh] |
| scr = scr[scr > thrh] |
|
|
| ratio = ratios[i] |
| pad_w, pad_h = paddings[i] |
|
|
| for lbl, bb in zip(lab, box): |
| |
| bb = [ |
| (bb[0] - pad_w) / ratio, |
| (bb[1] - pad_h) / ratio, |
| (bb[2] - pad_w) / ratio, |
| (bb[3] - pad_h) / ratio, |
| ] |
| draw.rectangle(bb, outline='red') |
| draw.text((bb[0], bb[1]), text=str(lbl), fill='blue') |
| detection=[int(bb[i]) for i in range(len(bb))] |
| detections.append(detection) |
| result_images.append(im) |
| return result_images, detections |
|
|
|
|
| def process_image(sess, im_pil, post_processor, size=640, model_size='s'): |
|
|
| |
| resized_im_pil, ratio, pad_w, pad_h = resize_with_aspect_ratio(im_pil, size) |
| orig_size = torch.tensor([[resized_im_pil.size[1], resized_im_pil.size[0]]]) |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| im_data = np.array(resized_im_pil) |
| im_data = np.expand_dims(im_data, axis=0).astype(np.uint8) |
| output = sess.run( |
| output_names=None, |
| input_feed={'images': im_data} |
| ) |
|
|
| output = {"pred_logits": torch.from_numpy(output[0]), "pred_boxes": torch.from_numpy(output[1])} |
| output=post_processor(output,orig_size) |
| labels, boxes, scores = output |
|
|
| result_images, detections = draw( |
| [im_pil], labels, boxes, scores, |
| [ratio], [(pad_w, pad_h)] |
| ) |
|
|
| return detections, result_images |
|
|
|
|
| class QRCodeDecoder: |
| def crop_qr_regions(self, image, regions): |
| """ |
| 根据检测到的边界框裁剪二维码区域 |
| """ |
| cropped_images = [] |
| for idx, region in enumerate(regions): |
| x1, y1, x2, y2 = region |
| |
| x1-=15 |
| y1-=15 |
| x2+=15 |
| y2+=15 |
| |
| cropped = image[y1:y2, x1:x2] |
| if cropped.size > 0: |
| cropped_images.append({ |
| 'image': cropped, |
| 'bbox': region, |
| }) |
| |
| return cropped_images |
|
|
| def decode_qrcode_pyzbar(self, cropped_image): |
| """ |
| 使用pyzbar解码二维码 |
| """ |
| try: |
| |
| if len(cropped_image.shape) == 3: |
| gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) |
| else: |
| gray = cropped_image |
| |
| |
| decoded_objects = pyzbar.decode(gray) |
| results = [] |
| for obj in decoded_objects: |
| try: |
| data = obj.data.decode('utf-8') |
| results.append({ |
| 'data': data, |
| 'type': obj.type, |
| 'points': obj.polygon |
| }) |
| except: |
| continue |
| |
| return results |
| except Exception as e: |
| print(f"decode error: {e}") |
| return [] |
| |
| if __name__ == '__main__': |
|
|
| |
| sess = axe.InferenceSession('deimv2_femto_650_npu1_u16.axmodel') |
| size = sess.get_inputs()[0].shape[2] |
|
|
| |
| decoder = QRCodeDecoder() |
| img_path = './qrcode_test' |
| det_path='./DEIMv2_det_res' |
| crop_path='./DEIMv2_crop_res' |
|
|
| os.makedirs(det_path, exist_ok=True) |
| os.makedirs(crop_path, exist_ok=True) |
| |
| post_processor = PostProcessor(use_focal_loss=True, num_classes=1, num_top_queries=100) |
| post_processor.deploy() |
| |
| imgs = glob.glob(f"{img_path}/*.jpg") |
| totoal = len(imgs) |
| success = 0 |
| fail = 0 |
| start_time = time.time() |
| for idx,img in enumerate(imgs): |
| pic_name=os.path.basename(img).split('.')[0] |
| loop_start_time = time.time() |
| |
| im_pil = Image.open(img).convert('RGB') |
| img_cv2 = np.array(im_pil) |
| img_cv2 = cv2.cvtColor(img_cv2, cv2.COLOR_RGB2BGR) |
| det_result, res_img = process_image(sess, im_pil, post_processor, size, 'femto') |
| |
| |
| |
| cropped_images = decoder.crop_qr_regions(img_cv2, det_result) |
| |
| |
|
|
| all_decoded_results = [] |
| for i, cropped_data in enumerate(cropped_images): |
| decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) |
| all_decoded_results.extend(decoded_results) |
| |
| |
| if all_decoded_results: |
| success += 1 |
| print("识别成功!") |
| else: |
| fail += 1 |
| print("识别失败!") |
| loop_end_time = time.time() |
| print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") |
|
|
| end_time = time.time() |
| total_time = end_time - start_time |
|
|
| print(f"总共测试图片数量: {totoal}") |
| print(f"识别成功数量: {success}") |
| print(f"识别失败数量: {fail}") |
| print(f"识别成功率: {success/totoal*100:.2f}%") |
| print(f"整体处理耗时: {total_time:.4f} 秒") |
| print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") |
|
|