| norm_cfg = dict(type='SyncBN', requires_grad=True) |
| model = dict( |
| type='EncoderDecoder', |
| pretrained='pretrained/swin_small_patch4_window7_224.pth', |
| backbone=dict( |
| type='SwinTransformer', |
| embed_dim=96, |
| depths=[2, 2, 18, 2], |
| num_heads=[3, 6, 12, 24], |
| window_size=7, |
| mlp_ratio=4.0, |
| qkv_bias=True, |
| qk_scale=None, |
| drop_rate=0.0, |
| attn_drop_rate=0.0, |
| drop_path_rate=0.3, |
| ape=False, |
| patch_norm=True, |
| out_indices=(0, 1, 2, 3), |
| use_checkpoint=False), |
| decode_head=dict( |
| type='UPerHead', |
| in_channels=[96, 192, 384, 768], |
| in_index=[0, 1, 2, 3], |
| pool_scales=(1, 2, 3, 6), |
| channels=512, |
| dropout_ratio=0.1, |
| num_classes=104, |
| norm_cfg=dict(type='SyncBN', requires_grad=True), |
| align_corners=False, |
| loss_decode=dict( |
| type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), |
| auxiliary_head=dict( |
| type='FCNHead', |
| in_channels=384, |
| in_index=2, |
| channels=256, |
| num_convs=1, |
| concat_input=False, |
| dropout_ratio=0.1, |
| num_classes=104, |
| norm_cfg=dict(type='SyncBN', requires_grad=True), |
| align_corners=False, |
| loss_decode=dict( |
| type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), |
| train_cfg=dict(), |
| test_cfg=dict(mode='whole')) |
| dataset_type = 'CustomDataset' |
| data_root = './data/FoodSeg103/Images/' |
| img_norm_cfg = dict( |
| mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) |
| crop_size = (512, 1024) |
| train_pipeline = [ |
| dict(type='LoadImageFromFile'), |
| dict(type='LoadAnnotations'), |
| dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), |
| dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), |
| dict(type='RandomFlip', prob=0.5), |
| dict(type='PhotoMetricDistortion'), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375], |
| to_rgb=True), |
| dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255), |
| dict(type='DefaultFormatBundle'), |
| dict(type='Collect', keys=['img', 'gt_semantic_seg']) |
| ] |
| test_pipeline = [ |
| dict(type='LoadImageFromFile'), |
| dict( |
| type='MultiScaleFlipAug', |
| img_scale=(2048, 1024), |
| flip=False, |
| transforms=[ |
| dict(type='Resize', keep_ratio=True), |
| dict(type='RandomFlip'), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375], |
| to_rgb=True), |
| dict(type='ImageToTensor', keys=['img']), |
| dict(type='Collect', keys=['img']) |
| ]) |
| ] |
| data = dict( |
| samples_per_gpu=2, |
| workers_per_gpu=2, |
| train=dict( |
| type='CustomDataset', |
| data_root='./data/FoodSeg103/Images/', |
| img_dir='img_dir/train', |
| ann_dir='ann_dir/train', |
| pipeline=[ |
| dict(type='LoadImageFromFile'), |
| dict(type='LoadAnnotations'), |
| dict( |
| type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), |
| dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), |
| dict(type='RandomFlip', prob=0.5), |
| dict(type='PhotoMetricDistortion'), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375], |
| to_rgb=True), |
| dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255), |
| dict(type='DefaultFormatBundle'), |
| dict(type='Collect', keys=['img', 'gt_semantic_seg']) |
| ]), |
| val=dict( |
| type='CustomDataset', |
| data_root='./data/FoodSeg103/Images/', |
| img_dir='img_dir/test', |
| ann_dir='ann_dir/test', |
| pipeline=[ |
| dict(type='LoadImageFromFile'), |
| dict( |
| type='MultiScaleFlipAug', |
| img_scale=(2048, 1024), |
| flip=False, |
| transforms=[ |
| dict(type='Resize', keep_ratio=True), |
| dict(type='RandomFlip'), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375], |
| to_rgb=True), |
| dict(type='ImageToTensor', keys=['img']), |
| dict(type='Collect', keys=['img']) |
| ]) |
| ]), |
| test=dict( |
| type='CustomDataset', |
| data_root='./data/FoodSeg103/Images/', |
| img_dir='img_dir/test', |
| ann_dir='ann_dir/test', |
| pipeline=[ |
| dict(type='LoadImageFromFile'), |
| dict( |
| type='MultiScaleFlipAug', |
| img_scale=(2048, 1024), |
| flip=False, |
| transforms=[ |
| dict(type='Resize', keep_ratio=True), |
| dict(type='RandomFlip'), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375], |
| to_rgb=True), |
| dict(type='ImageToTensor', keys=['img']), |
| dict(type='Collect', keys=['img']) |
| ]) |
| ])) |
| log_config = dict( |
| interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)]) |
| dist_params = dict(backend='nccl') |
| log_level = 'INFO' |
| load_from = None |
| resume_from = None |
| workflow = [('train', 1)] |
| cudnn_benchmark = True |
| optimizer = dict( |
| type='AdamW', |
| lr=6e-05, |
| betas=(0.9, 0.999), |
| weight_decay=0.01, |
| paramwise_cfg=dict( |
| custom_keys=dict( |
| absolute_pos_embed=dict(decay_mult=0.0), |
| relative_position_bias_table=dict(decay_mult=0.0), |
| norm=dict(decay_mult=0.0)))) |
| optimizer_config = dict() |
| lr_config = dict( |
| policy='poly', |
| warmup='linear', |
| warmup_iters=1500, |
| warmup_ratio=1e-06, |
| power=1.0, |
| min_lr=0.0, |
| by_epoch=False) |
| runner = dict(type='IterBasedRunner', max_iters=80000) |
| checkpoint_config = dict(by_epoch=False, interval=8000) |
| evaluation = dict(interval=8000, metric='mIoU') |
| work_dir = './work_dirs/upernet_swin_small_patch4_window7_512x1024_80k' |
| gpu_ids = range(0, 1) |
|
|