Support visualization (#25)

Files changed (6) hide show

README.md +17 -0
models/sparsebev_sampling.py +2 -2
models/sparsebev_transformer.py +4 -4
models/utils.py +2 -1
viz_bbox_predictions.py +244 -0
viz_sample_points.py +151 -0

README.md CHANGED Viewed

@@ -13,6 +13,7 @@ This is the official PyTorch implementation for our ICCV 2023 paper:
 ## News
 * 2023-09-23: We release [the native PyTorch implementation of sparse sampling](https://github.com/MCG-NJU/SparseBEV/blob/97c8c798284555accedd0625395dd397fa4511d2/models/csrc/wrapper.py#L14). You can use this version if you encounter problems when compiling CUDA operators. It’s only about 15% slower.
 * 2023-08-21: We release the paper, code and pretrained weights.
 * 2023-07-14: SparseBEV is accepted to ICCV 2023.
@@ -90,7 +91,9 @@ data/nuscenes
 ├── maps
 ├── nuscenes_infos_test_sweep.pkl
 ├── nuscenes_infos_train_sweep.pkl
 ├── nuscenes_infos_val_sweep.pkl
 ├── samples
 ├── sweeps
 ├── v1.0-test
@@ -149,6 +152,20 @@ export CUDA_VISIBLE_DEVICES=0
 python timing.py --config configs/r50_nuimg_704x256.py --weights checkpoints/r50_nuimg_704x256.pth
 ```
 ## Acknowledgements
 Many thanks to these excellent open-source projects:

 ## News
+* 2023-10-20: We provide code for visualizing the predictions and the sampling points, as requested in [#25](https://github.com/MCG-NJU/SparseBEV/issues/25).
 * 2023-09-23: We release [the native PyTorch implementation of sparse sampling](https://github.com/MCG-NJU/SparseBEV/blob/97c8c798284555accedd0625395dd397fa4511d2/models/csrc/wrapper.py#L14). You can use this version if you encounter problems when compiling CUDA operators. It’s only about 15% slower.
 * 2023-08-21: We release the paper, code and pretrained weights.
 * 2023-07-14: SparseBEV is accepted to ICCV 2023.
 ├── maps
 ├── nuscenes_infos_test_sweep.pkl
 ├── nuscenes_infos_train_sweep.pkl
+├── nuscenes_infos_train_mini_sweep.pkl
 ├── nuscenes_infos_val_sweep.pkl
+├── nuscenes_infos_val_mini_sweep.pkl
 ├── samples
 ├── sweeps
 ├── v1.0-test
 python timing.py --config configs/r50_nuimg_704x256.py --weights checkpoints/r50_nuimg_704x256.pth
 ```
+## Visualization
+Visualize the predicted bbox:
+```
+python viz_bbox_predictions.py --config configs/r50_nuimg_704x256.py --weights checkpoints/r50_nuimg_704x256.pth
+```
+Visualize the sampling points (like Fig. 6 in the paper):
+```
+python viz_sample_points.py --config configs/r50_nuimg_704x256.py --weights checkpoints/r50_nuimg_704x256.pth
+```
 ## Acknowledgements
 Many thanks to these excellent open-source projects:

models/sparsebev_sampling.py CHANGED Viewed

@@ -80,9 +80,9 @@ def sampling_4d(sample_points, mlvl_feats, scale_weights, lidar2img, image_h, im
     # for visualization only
     if DUMP.enabled:
-        torch.save(torch.cat([sample_points_cam, homo_nonzero], dim=-1),
                    '{}/sample_points_cam_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
-        torch.save(valid_mask,
                    '{}/sample_points_cam_valid_mask_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
     valid_mask = valid_mask.permute(0, 1, 3, 4, 2)  # [B, T, Q, GP, N]

     # for visualization only
     if DUMP.enabled:
+        torch.save(torch.cat([sample_points_cam, homo_nonzero], dim=-1).cpu(),
                    '{}/sample_points_cam_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
+        torch.save(valid_mask.cpu(),
                    '{}/sample_points_cam_valid_mask_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
     valid_mask = valid_mask.permute(0, 1, 3, 4, 2)  # [B, T, Q, GP, N]

models/sparsebev_transformer.py CHANGED Viewed

@@ -186,9 +186,9 @@ class SparseBEVTransformerDecoderLayer(BaseModule):
             query_bbox_dec = decode_bbox(query_bbox, self.pc_range)
             bbox_pred_dec = decode_bbox(bbox_pred, self.pc_range)
             cls_score_sig = torch.sigmoid(cls_score)
-            torch.save(query_bbox_dec, '{}/query_bbox_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
-            torch.save(bbox_pred_dec, '{}/bbox_pred_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
-            torch.save(cls_score_sig, '{}/cls_score_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
         return query_feat, cls_score, bbox_pred
@@ -216,7 +216,7 @@ class SparseBEVSelfAttention(BaseModule):
         tau = self.gen_tau(query_feat)  # [B, Q, 8]
         if DUMP.enabled:
-            torch.save(tau, '{}/sasa_tau_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
         tau = tau.permute(0, 2, 1)  # [B, 8, Q]
         attn_mask = dist[:, None, :, :] * tau[..., None]  # [B, 8, Q, Q]

             query_bbox_dec = decode_bbox(query_bbox, self.pc_range)
             bbox_pred_dec = decode_bbox(bbox_pred, self.pc_range)
             cls_score_sig = torch.sigmoid(cls_score)
+            torch.save(query_bbox_dec.cpu(), '{}/query_bbox_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
+            torch.save(bbox_pred_dec.cpu(), '{}/bbox_pred_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
+            torch.save(cls_score_sig.cpu(), '{}/cls_score_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
         return query_feat, cls_score, bbox_pred
         tau = self.gen_tau(query_feat)  # [B, Q, 8]
         if DUMP.enabled:
+            torch.save(tau.cpu(), '{}/sasa_tau_stage{}.pth'.format(DUMP.out_dir, DUMP.stage_count))
         tau = tau.permute(0, 2, 1)  # [B, 8, Q]
         attn_mask = dist[:, None, :, :] * tau[..., None]  # [B, 8, Q, Q]

models/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -308,7 +309,7 @@ class GpuPhotoMetricDistortion:
 class DumpConfig:
     def __init__(self):
         self.enabled = False
-        self.out_dir = 'outputs'
         self.stage_count = 0
         self.frame_count = 0

+import tempfile
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class DumpConfig:
     def __init__(self):
         self.enabled = False
+        self.out_dir = tempfile.mkdtemp()
         self.stage_count = 0
         self.frame_count = 0

viz_bbox_predictions.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import utils
+import logging
+import argparse
+import importlib
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+from PIL import Image
+from mmcv import Config, DictAction
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+from mmdet.apis import set_random_seed
+from mmdet3d.datasets import build_dataset, build_dataloader
+from mmdet3d.models import build_model
+from nuscenes.utils.data_classes import Box
+from pyquaternion import Quaternion
+from nuscenes.nuscenes import NuScenes
+from nuscenes.utils.geometry_utils import box_in_image
+from configs.r50_nuimg_704x256 import class_names
+from models.utils import VERSION
+classname_to_color = {  # RGB
+    'car': (255, 158, 0),  # Orange
+    'pedestrian': (0, 0, 230),  # Blue
+    'trailer': (255, 140, 0),  # Darkorange
+    'truck': (255, 99, 71),  # Tomato
+    'bus': (255, 127, 80),  # Coral
+    'motorcycle': (255, 61, 99),  # Red
+    'construction_vehicle': (233, 150, 70),  # Darksalmon
+    'bicycle': (220, 20, 60),  # Crimson
+    'barrier': (112, 128, 144),  # Slategrey
+    'traffic_cone': (47, 79, 79),  # Darkslategrey
+}
+def convert_to_nusc_box(bboxes, scores=None, labels=None, names=None, score_threshold=0.3, lift_center=False):
+    results = []
+    for q in range(bboxes.shape[0]):
+        if scores is not None:
+            score = scores[q]
+        else:
+            score = 1.0
+        if score < score_threshold:
+            continue
+        if labels is not None:
+            label = labels[q]
+        else:
+            label = 0
+        if names is not None:
+            name = names[q]
+        else:
+            name = class_names[label]
+        if name not in class_names:
+            name = class_names[-1]
+        bbox = bboxes[q].copy()
+        if lift_center:
+            bbox[2] += bbox[5] * 0.5
+        orientation = Quaternion(axis=[0, 0, 1], radians=bbox[6])
+        box = Box(
+            center=[bbox[0], bbox[1], bbox[2]],
+            size=[bbox[4], bbox[3], bbox[5]],
+            orientation=orientation,
+            score=score,
+            label=label,
+            velocity=(bbox[7], bbox[8], 0),
+            name=name
+        )
+        results.append(box)
+    return results
+def viz_bbox(nusc, bboxes, data_info, fig, gs):
+    cam_types = [
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+        'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT',
+    ]
+    for cam_id, cam_type in enumerate(cam_types):
+        sample_data_token = nusc.get('sample', data_info['token'])['data'][cam_type]
+        sd_record = nusc.get('sample_data', sample_data_token)
+        cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+        intrinsic = np.array(cs_record['camera_intrinsic'])
+        img_path = nusc.get_sample_data_path(sample_data_token)
+        img_size = (sd_record['width'], sd_record['height'])
+        ax = fig.add_subplot(gs[cam_id // 3, cam_id % 3])
+        ax.imshow(Image.open(img_path))
+        for bbox in bboxes:
+            bbox = bbox.copy()
+            # Move box to ego vehicle coord system
+            bbox.rotate(Quaternion(data_info['lidar2ego_rotation']))
+            bbox.translate(np.array(data_info['lidar2ego_translation']))
+            # Move box to sensor coord system
+            bbox.translate(-np.array(cs_record['translation']))
+            bbox.rotate(Quaternion(cs_record['rotation']).inverse)
+            if box_in_image(bbox, intrinsic, img_size):
+                c = np.array(classname_to_color[bbox.name]) / 255.0
+                bbox.render(ax, view=intrinsic, normalize=True, colors=(c, c, c), linewidth=1)
+        ax.axis('off')
+        ax.set_title(cam_type)
+        ax.set_xlim(0, img_size[0])
+        ax.set_ylim(img_size[1], 0)
+    sample = nusc.get('sample', data_info['token'])
+    lidar_data_token = sample['data']['LIDAR_TOP']
+    ax = fig.add_subplot(gs[0:2, 3])
+    nusc.explorer.render_sample_data(lidar_data_token, with_anns=False, ax=ax, verbose=False)
+    ax.axis('off')
+    ax.set_title('LIDAR_TOP')
+    ax.set_xlim(-40, 40)
+    ax.set_ylim(-40, 40)
+    sd_record = nusc.get('sample_data', lidar_data_token)
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+    for bbox in bboxes:
+        bbox = bbox.copy()
+        bbox.rotate(Quaternion(cs_record['rotation']))
+        bbox.translate(np.array(cs_record['translation']))
+        bbox.rotate(Quaternion(pose_record['rotation']))
+        yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
+        bbox.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
+        c = np.array(classname_to_color[bbox.name]) / 255.0
+        bbox.render(ax, view=np.eye(4), colors=(c, c, c))
+def main():
+    parser = argparse.ArgumentParser(description='Validate a detector')
+    parser.add_argument('--config', required=True)
+    parser.add_argument('--weights', required=True)
+    parser.add_argument('--override', nargs='+', action=DictAction)
+    parser.add_argument('--score_threshold', default=0.3)
+    args = parser.parse_args()
+    # parse configs
+    cfgs = Config.fromfile(args.config)
+    if args.override is not None:
+        cfgs.merge_from_dict(args.override)
+    # use val-mini for visualization
+    cfgs.data.val.ann_file = cfgs.data.val.ann_file.replace('val', 'val_mini')
+    # register custom module
+    importlib.import_module('models')
+    importlib.import_module('loaders')
+    # MMCV, please shut up
+    from mmcv.utils.logging import logger_initialized
+    logger_initialized['root'] = logging.Logger(__name__, logging.WARNING)
+    logger_initialized['mmcv'] = logging.Logger(__name__, logging.WARNING)
+    # you need one GPU
+    assert torch.cuda.is_available()
+    assert torch.cuda.device_count() == 1
+    utils.init_logging(None, cfgs.debug)
+    logging.info('Using GPU: %s' % torch.cuda.get_device_name(0))
+    logging.info('Setting random seed: 0')
+    set_random_seed(0, deterministic=True)
+    logging.info('Loading validation set from %s' % cfgs.data.val.data_root)
+    val_dataset = build_dataset(cfgs.data.val)
+    val_loader = build_dataloader(
+        val_dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfgs.data.workers_per_gpu,
+        num_gpus=1,
+        dist=False,
+        shuffle=False,
+        seed=0,
+    )
+    logging.info('Creating model: %s' % cfgs.model.type)
+    model = build_model(cfgs.model)
+    model.cuda()
+    model = MMDataParallel(model, [0])
+    logging.info('Loading checkpoint from %s' % args.weights)
+    checkpoint = load_checkpoint(
+        model, args.weights, map_location='cuda', strict=True,
+        logger=logging.Logger(__name__, logging.ERROR)
+    )
+    if 'version' in checkpoint:
+        VERSION.name = checkpoint['version']
+    logging.info('Initialize nuscenes toolkit...')
+    if 'mini' in cfgs.data.val.ann_file:
+        nusc = NuScenes(version='v1.0-mini', dataroot=cfgs.data.val.data_root, verbose=False)
+    else:
+        nusc = NuScenes(version='v1.0-trainval', dataroot=cfgs.data.val.data_root, verbose=False)
+    for i, data in enumerate(val_loader):
+        model.eval()
+        with torch.no_grad():
+            results = model(return_loss=False, rescale=True, **data)
+            results = results[0]['pts_bbox']
+        bboxes_pred = convert_to_nusc_box(
+            bboxes=results['boxes_3d'].tensor.numpy(),
+            scores=results['scores_3d'].numpy(),
+            labels=results['labels_3d'].numpy(),
+            score_threshold=args.score_threshold,
+            lift_center=True,
+        )
+        fig = plt.figure(figsize=(15.5, 5))
+        gs = GridSpec(2, 4, figure=fig)
+        viz_bbox(nusc, bboxes_pred, val_dataset.data_infos[i], fig, gs)
+        plt.tight_layout()
+        plt.savefig('outputs/bbox_%04d.jpg' % i, dpi=200)
+        plt.close()
+        logging.info('Visualized result is dumped to outputs/bbox_%04d.jpg' % i)
+if __name__ == '__main__':
+    main()

viz_sample_points.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import utils
+import logging
+import argparse
+import importlib
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+from mmcv import Config, DictAction
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+from mmdet.apis import set_random_seed
+from mmdet3d.datasets import build_dataset, build_dataloader
+from mmdet3d.models import build_model
+from models.utils import DUMP, VERSION
+def main():
+    parser = argparse.ArgumentParser(description='Validate a detector')
+    parser.add_argument('--config', required=True)
+    parser.add_argument('--weights', required=True)
+    parser.add_argument('--override', nargs='+', action=DictAction)
+    parser.add_argument('--score_threshold', default=0.3)
+    parser.add_argument('--stage_id', default=5)
+    parser.add_argument('--num_frames', default=3)
+    parser.add_argument('--num_views', default=6)
+    args = parser.parse_args()
+    # parse configs
+    cfgs = Config.fromfile(args.config)
+    if args.override is not None:
+        cfgs.merge_from_dict(args.override)
+    # use val-mini for visualization
+    cfgs.data.val.ann_file = cfgs.data.val.ann_file.replace('val', 'val_mini')
+    # register custom module
+    importlib.import_module('models')
+    importlib.import_module('loaders')
+    # MMCV, please shut up
+    from mmcv.utils.logging import logger_initialized
+    logger_initialized['root'] = logging.Logger(__name__, logging.WARNING)
+    logger_initialized['mmcv'] = logging.Logger(__name__, logging.WARNING)
+    # you need one GPU
+    assert torch.cuda.is_available()
+    assert torch.cuda.device_count() == 1
+    utils.init_logging(None, cfgs.debug)
+    logging.info('Using GPU: %s' % torch.cuda.get_device_name(0))
+    logging.info('Setting random seed: 0')
+    set_random_seed(0, deterministic=True)
+    logging.info('Loading validation set from %s' % cfgs.data.val.data_root)
+    val_dataset = build_dataset(cfgs.data.val)
+    val_loader = build_dataloader(
+        val_dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=2,
+        num_gpus=1,
+        dist=False,
+        shuffle=False,
+        seed=0,
+    )
+    logging.info('Creating model: %s' % cfgs.model.type)
+    model = build_model(cfgs.model)
+    model.cuda()
+    model = MMDataParallel(model, [0])
+    logging.info('Loading checkpoint from %s' % args.weights)
+    checkpoint = load_checkpoint(
+        model, args.weights, map_location='cuda', strict=True,
+        logger=logging.Logger(__name__, logging.ERROR)
+    )
+    if 'version' in checkpoint:
+        VERSION.name = checkpoint['version']
+    for idx, data in enumerate(val_loader):
+        DUMP.enabled = True
+        model.eval()
+        with torch.no_grad():
+            model(return_loss=False, rescale=True, **data)
+        cls_scores = torch.load('{}/cls_score_stage{}.pth'.format(DUMP.out_dir, args.stage_id))[0]
+        cls_scores, cls_ids = torch.max(cls_scores, dim=-1)
+        # only select queries with high confidence
+        query_ids = torch.where(cls_scores > args.score_threshold)[0]
+        cls_scores, cls_ids = cls_scores[query_ids], cls_ids[query_ids]
+        plt.figure(figsize=(240, 49))
+        view_mapping = [1, 2, 0, 4, 5, 3]
+        for frame_id in range(args.num_frames):
+            sample_points_cam = torch.load(
+                '{}/sample_points_cam_stage{}.pth'.format(DUMP.out_dir, args.stage_id)
+            )  # [1, 8f, 6view, 900, 32, 2]
+            valid_mask = torch.load(
+                '{}/sample_points_cam_valid_mask_stage{}.pth'.format(DUMP.out_dir, args.stage_id)
+            )  # [1, 8f, 6view, 900, 32]
+            for view_id in range(args.num_views):
+                filenames = data['img_metas'][0].data[0][0]['filename']
+                filename = filenames[frame_id * 6 + view_id]
+                # crop 1600x640 area
+                img = Image.open(filename)
+                img = img.crop((0, 260, 1600, 900))
+                # plot image
+                plot_id = frame_id * args.num_views + view_mapping[view_id] + 1
+                ax = plt.subplot(args.num_frames, args.num_views, plot_id)
+                ax.imshow(img)
+                ax.axis('off')
+                ax.set_xlim(0, 1600)
+                ax.set_ylim(640, 0)
+                # plot the sampling points for each query
+                for query_id in query_ids:
+                    xyz = sample_points_cam[0, frame_id, view_id, query_id].numpy()  # [32, 3]
+                    mask = valid_mask[0, frame_id, view_id, query_id].numpy()  # [32]
+                    mask = np.round(mask).astype(bool)
+                    cx = xyz[:, 0] * 1600
+                    cy = xyz[:, 1] * 640
+                    cz = xyz[:, 2]
+                    cz[np.where(cz <= 0)] = 1e8
+                    cz = np.log(60 / cz ** 0.8) * 2.4
+                    cx, cy, cz = cx[mask], cy[mask], cz[mask]
+                    if len(cz) == 0:
+                        continue
+                    ax.scatter(cx, cy, s=4**(cz + 1), alpha=0.7, color='C%d' % (query_id % 5))
+        plt.tight_layout()
+        plt.subplots_adjust(hspace=0.01, wspace=0.01)
+        plt.savefig('outputs/sp_%04d.jpg' % idx, dpi=20)
+        plt.close()
+        logging.info('Visualized result is dumped to outputs/sp_%04d.jpg' % idx)
+if __name__ == '__main__':
+    main()