| |
| import argparse |
| import time |
| import torch |
| from mmcv import Config |
| from mmcv.parallel import MMDataParallel |
| from mmcv.runner import load_checkpoint, wrap_fp16_model |
| import sys |
| sys.path.append('.') |
| from projects.mmdet3d_plugin.datasets.builder import build_dataloader |
| from projects.mmdet3d_plugin.datasets import custom_build_dataset |
| |
| from mmdet3d.models import build_detector |
| |
|
|
|
|
| def parse_args(): |
| parser = argparse.ArgumentParser(description='MMDet benchmark a model') |
| parser.add_argument('config', help='test config file path') |
| parser.add_argument('--checkpoint', default=None, help='checkpoint file') |
| parser.add_argument('--samples', default=2000, help='samples to benchmark') |
| parser.add_argument( |
| '--log-interval', default=10, help='interval of logging') |
| parser.add_argument( |
| '--fuse-conv-bn', |
| action='store_true', |
| help='Whether to fuse conv and bn, this will slightly increase' |
| 'the inference speed') |
| args = parser.parse_args() |
| return args |
|
|
|
|
| def main(): |
| args = parse_args() |
|
|
| cfg = Config.fromfile(args.config) |
| |
| if cfg.get('cudnn_benchmark', False): |
| torch.backends.cudnn.benchmark = True |
| cfg.model.pretrained = None |
| cfg.data.test.test_mode = True |
|
|
| |
| |
| print(cfg.data.test) |
| dataset = custom_build_dataset(cfg.data.test) |
| data_loader = build_dataloader( |
| dataset, |
| samples_per_gpu=1, |
| workers_per_gpu=cfg.data.workers_per_gpu, |
| dist=False, |
| shuffle=False) |
|
|
| |
| cfg.model.train_cfg = None |
| model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) |
| fp16_cfg = cfg.get('fp16', None) |
| if fp16_cfg is not None: |
| wrap_fp16_model(model) |
| if args.checkpoint is not None: |
| load_checkpoint(model, args.checkpoint, map_location='cpu') |
| |
| |
|
|
| model = MMDataParallel(model, device_ids=[0]) |
|
|
| model.eval() |
|
|
| |
| num_warmup = 5 |
| pure_inf_time = 0 |
|
|
| |
| for i, data in enumerate(data_loader): |
| torch.cuda.synchronize() |
| start_time = time.perf_counter() |
| with torch.no_grad(): |
| model(return_loss=False, rescale=True, **data) |
|
|
| torch.cuda.synchronize() |
| elapsed = time.perf_counter() - start_time |
|
|
| if i >= num_warmup: |
| pure_inf_time += elapsed |
| if (i + 1) % args.log_interval == 0: |
| fps = (i + 1 - num_warmup) / pure_inf_time |
| print(f'Done image [{i + 1:<3}/ {args.samples}], ' |
| f'fps: {fps:.1f} img / s') |
|
|
| if (i + 1) == args.samples: |
| pure_inf_time += elapsed |
| fps = (i + 1 - num_warmup) / pure_inf_time |
| print(f'Overall fps: {fps:.1f} img / s') |
| break |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|