| dataset_type = 'CustomNuScenesDataset' |
| dataset_root = 'data/nuscenes/' |
|
|
| input_modality = dict( |
| use_lidar=False, |
| use_camera=True, |
| use_radar=False, |
| use_map=False, |
| use_external=True |
| ) |
|
|
| |
| class_names = [ |
| 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', |
| 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' |
| ] |
|
|
| |
| |
| point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] |
| voxel_size = [0.2, 0.2, 8] |
|
|
| |
| embed_dims = 256 |
| num_layers = 6 |
| num_query = 900 |
| num_frames = 8 |
| num_levels = 4 |
| num_points = 4 |
|
|
| img_backbone = dict( |
| type='ResNet', |
| depth=50, |
| num_stages=4, |
| out_indices=(0, 1, 2, 3), |
| frozen_stages=1, |
| norm_cfg=dict(type='BN2d', requires_grad=True), |
| norm_eval=True, |
| style='pytorch', |
| with_cp=True) |
| img_neck = dict( |
| type='FPN', |
| in_channels=[256, 512, 1024, 2048], |
| out_channels=embed_dims, |
| num_outs=num_levels) |
| img_norm_cfg = dict( |
| mean=[123.675, 116.280, 103.530], |
| std=[58.395, 57.120, 57.375], |
| to_rgb=True) |
|
|
| model = dict( |
| type='SparseBEV', |
| data_aug=dict( |
| img_color_aug=True, |
| img_norm_cfg=img_norm_cfg, |
| img_pad_cfg=dict(size_divisor=32)), |
| stop_prev_grad=0, |
| img_backbone=img_backbone, |
| img_neck=img_neck, |
| pts_bbox_head=dict( |
| type='SparseBEVHead', |
| num_classes=10, |
| in_channels=embed_dims, |
| num_query=num_query, |
| query_denoising=True, |
| query_denoising_groups=10, |
| code_size=10, |
| code_weights=[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], |
| sync_cls_avg_factor=True, |
| transformer=dict( |
| type='SparseBEVTransformer', |
| embed_dims=embed_dims, |
| num_frames=num_frames, |
| num_points=num_points, |
| num_layers=num_layers, |
| num_levels=num_levels, |
| num_classes=10, |
| code_size=10, |
| pc_range=point_cloud_range), |
| bbox_coder=dict( |
| type='NMSFreeCoder', |
| post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], |
| pc_range=point_cloud_range, |
| max_num=300, |
| voxel_size=voxel_size, |
| score_threshold=0.05, |
| num_classes=10), |
| positional_encoding=dict( |
| type='SinePositionalEncoding', |
| num_feats=embed_dims // 2, |
| normalize=True, |
| offset=-0.5), |
| loss_cls=dict( |
| type='FocalLoss', |
| use_sigmoid=True, |
| gamma=2.0, |
| alpha=0.25, |
| loss_weight=2.0), |
| loss_bbox=dict(type='L1Loss', loss_weight=0.25), |
| loss_iou=dict(type='GIoULoss', loss_weight=0.0)), |
| train_cfg=dict(pts=dict( |
| grid_size=[512, 512, 1], |
| voxel_size=voxel_size, |
| point_cloud_range=point_cloud_range, |
| out_size_factor=4, |
| assigner=dict( |
| type='HungarianAssigner3D', |
| cls_cost=dict(type='FocalLossCost', weight=2.0), |
| reg_cost=dict(type='BBox3DL1Cost', weight=0.25), |
| iou_cost=dict(type='IoUCost', weight=0.0), |
| ) |
| )) |
| ) |
|
|
| ida_aug_conf = { |
| 'resize_lim': (0.38, 0.55), |
| 'final_dim': (256, 704), |
| 'bot_pct_lim': (0.0, 0.0), |
| 'rot_lim': (0.0, 0.0), |
| 'H': 900, 'W': 1600, |
| 'rand_flip': True, |
| } |
|
|
| train_pipeline = [ |
| dict(type='LoadMultiViewImageFromFiles', to_float32=False, color_type='color'), |
| dict(type='LoadMultiViewImageFromMultiSweeps', sweeps_num=num_frames - 1), |
| dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), |
| dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), |
| dict(type='ObjectNameFilter', classes=class_names), |
| dict(type='RandomTransformImage', ida_aug_conf=ida_aug_conf, training=True), |
| dict(type='GlobalRotScaleTransImage', rot_range=[-0.3925, 0.3925], scale_ratio_range=[0.95, 1.05]), |
| dict(type='DefaultFormatBundle3D', class_names=class_names), |
| dict(type='Collect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], meta_keys=( |
| 'filename', 'ori_shape', 'img_shape', 'pad_shape', 'lidar2img', 'img_timestamp')) |
| ] |
|
|
| test_pipeline = [ |
| dict(type='LoadMultiViewImageFromFiles', to_float32=False, color_type='color'), |
| dict(type='LoadMultiViewImageFromMultiSweeps', sweeps_num=num_frames - 1, test_mode=True), |
| dict(type='RandomTransformImage', ida_aug_conf=ida_aug_conf, training=False), |
| dict( |
| type='MultiScaleFlipAug3D', |
| img_scale=(1600, 900), |
| pts_scale_ratio=1, |
| flip=False, |
| transforms=[ |
| dict(type='DefaultFormatBundle3D', class_names=class_names, with_label=False), |
| dict(type='Collect3D', keys=['img'], meta_keys=( |
| 'filename', 'box_type_3d', 'ori_shape', 'img_shape', 'pad_shape', |
| 'lidar2img', 'img_timestamp')) |
| ]) |
| ] |
|
|
| data = dict( |
| workers_per_gpu=8, |
| train=dict( |
| type=dataset_type, |
| data_root=dataset_root, |
| ann_file=dataset_root + 'nuscenes_infos_train_sweep.pkl', |
| pipeline=train_pipeline, |
| classes=class_names, |
| modality=input_modality, |
| test_mode=False, |
| use_valid_flag=True, |
| box_type_3d='LiDAR'), |
| val=dict( |
| type=dataset_type, |
| data_root=dataset_root, |
| ann_file=dataset_root + 'nuscenes_infos_val_sweep.pkl', |
| pipeline=test_pipeline, |
| classes=class_names, |
| modality=input_modality, |
| test_mode=True, |
| box_type_3d='LiDAR'), |
| test=dict( |
| type=dataset_type, |
| data_root=dataset_root, |
| ann_file=dataset_root + 'nuscenes_infos_test_sweep.pkl', |
| pipeline=test_pipeline, |
| classes=class_names, |
| modality=input_modality, |
| test_mode=True, |
| box_type_3d='LiDAR') |
| ) |
|
|
| optimizer = dict( |
| type='AdamW', |
| lr=2e-4, |
| paramwise_cfg=dict(custom_keys={ |
| 'img_backbone': dict(lr_mult=0.1), |
| 'sampling_offset': dict(lr_mult=0.1), |
| }), |
| weight_decay=0.01 |
| ) |
|
|
| optimizer_config = dict( |
| type='Fp16OptimizerHook', |
| loss_scale=512.0, |
| grad_clip=dict(max_norm=35, norm_type=2) |
| ) |
|
|
| |
| lr_config = dict( |
| policy='CosineAnnealing', |
| warmup='linear', |
| warmup_iters=500, |
| warmup_ratio=1.0 / 3, |
| min_lr_ratio=1e-3 |
| ) |
| total_epochs = 24 |
| batch_size = 8 |
|
|
| |
| load_from = 'pretrain/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth' |
| revise_keys = [('backbone', 'img_backbone')] |
|
|
| |
| resume_from = None |
|
|
| |
| checkpoint_config = dict(interval=1, max_keep_ckpts=1) |
|
|
| |
| log_config = dict( |
| interval=1, |
| hooks=[ |
| dict(type='MyTextLoggerHook', interval=1, reset_flag=True), |
| dict(type='MyTensorboardLoggerHook', interval=500, reset_flag=True) |
| ] |
| ) |
|
|
| |
| eval_config = dict(interval=total_epochs) |
|
|
| |
| debug = False |
|
|