| 2025-07-07 11:09:02,802 - PropVG - INFO - dataset = 'RefCOCOPlusUNC' |
| data_root = './data/seqtr_type/' |
| img_norm_cfg = dict( |
| mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) |
| train_pipeline = [ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle', |
| 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape', |
| 'scale_factor', 'gt_ori_mask', 'target', 'empty', |
| 'refer_target_index' |
| ]) |
| ] |
| val_pipeline = [ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle', |
| 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape', |
| 'scale_factor', 'gt_ori_mask', 'target', 'empty', |
| 'refer_target_index' |
| ]) |
| ] |
| test_pipeline = [ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle', |
| 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape', |
| 'scale_factor', 'gt_ori_mask', 'target', 'empty', |
| 'refer_target_index' |
| ]) |
| ] |
| word_emb_cfg = dict(type='GloVe') |
| data = dict( |
| samples_per_gpu=8, |
| workers_per_gpu=4, |
| train=dict( |
| type='RefCOCOPlusUNC', |
| which_set='train', |
| img_source=['coco'], |
| annsfile= |
| './data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json', |
| imgsfile='./data/seqtr_type/images/mscoco/train2014', |
| pipeline=[ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file= |
| 'data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', |
| 'gt_mask_rle', 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', |
| 'pad_shape', 'scale_factor', 'gt_ori_mask', 'target', |
| 'empty', 'refer_target_index' |
| ]) |
| ], |
| word_emb_cfg=dict(type='GloVe')), |
| val=dict( |
| type='RefCOCOPlusUNC', |
| which_set='val', |
| img_source=['coco'], |
| annsfile= |
| './data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json', |
| imgsfile='./data/seqtr_type/images/mscoco/train2014', |
| pipeline=[ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file= |
| 'data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', |
| 'gt_mask_rle', 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', |
| 'pad_shape', 'scale_factor', 'gt_ori_mask', 'target', |
| 'empty', 'refer_target_index' |
| ]) |
| ], |
| word_emb_cfg=dict(type='GloVe')), |
| testA=dict( |
| type='RefCOCOPlusUNC', |
| which_set='testA', |
| img_source=['coco'], |
| annsfile= |
| './data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json', |
| imgsfile='./data/seqtr_type/images/mscoco/train2014', |
| pipeline=[ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file= |
| 'data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', |
| 'gt_mask_rle', 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', |
| 'pad_shape', 'scale_factor', 'gt_ori_mask', 'target', |
| 'empty', 'refer_target_index' |
| ]) |
| ], |
| word_emb_cfg=dict(type='GloVe')), |
| testB=dict( |
| type='RefCOCOPlusUNC', |
| which_set='testB', |
| img_source=['coco'], |
| annsfile= |
| './data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json', |
| imgsfile='./data/seqtr_type/images/mscoco/train2014', |
| pipeline=[ |
| dict( |
| type='LoadImageAnnotationsFromFile_TO', |
| max_token=20, |
| with_mask=True, |
| with_bbox=True, |
| dataset='RefCOCOPlusUNC', |
| use_token_type='beit3', |
| refer_file= |
| 'data/seqtr_type/annotations/mixed-seg/coco_all.json', |
| object_area_filter=100, |
| object_area_rate_filter=[0.05, 0.8]), |
| dict(type='Resize', img_scale=(384, 384), keep_ratio=False), |
| dict( |
| type='Normalize', |
| mean=[123.675, 116.28, 103.53], |
| std=[58.395, 57.12, 57.375]), |
| dict(type='DefaultFormatBundle'), |
| dict( |
| type='CollectData', |
| keys=[ |
| 'img', 'ref_expr_inds', 'text_attention_mask', |
| 'gt_mask_rle', 'gt_bbox' |
| ], |
| meta_keys=[ |
| 'filename', 'expression', 'ori_shape', 'img_shape', |
| 'pad_shape', 'scale_factor', 'gt_ori_mask', 'target', |
| 'empty', 'refer_target_index' |
| ]) |
| ], |
| word_emb_cfg=dict(type='GloVe'))) |
| ema = False |
| ema_factor = 0.999 |
| use_fp16 = False |
| seed = 6666 |
| deterministic = True |
| log_level = 'INFO' |
| log_interval = 50 |
| save_interval = -1 |
| resume_from = None |
| load_from = 'work_dir/refcoco+/PropVG-refcoco+.pth' |
| finetune_from = None |
| evaluate_interval = 1 |
| start_evaluate_epoch = 0 |
| start_save_checkpoint = 20 |
| max_token = 20 |
| img_size = 384 |
| patch_size = 16 |
| model = dict( |
| type='MIXRefUniModel_OMG', |
| vis_enc=dict( |
| type='BEIT3', |
| img_size=384, |
| patch_size=16, |
| vit_type='base', |
| drop_path_rate=0.1, |
| vocab_size=64010, |
| freeze_layer=-1, |
| vision_embed_proj_interpolate=False, |
| pretrain='pretrain_weights/beit3_base_patch16_224.zip'), |
| lan_enc=None, |
| fusion=None, |
| head=dict( |
| type='REFHead', |
| input_channels=768, |
| hidden_channels=256, |
| num_queries=20, |
| detr_loss=dict( |
| criterion=dict(loss_class=1.0, loss_bbox=5.0, loss_giou=2.0), |
| matcher=dict(cost_class=1.0, cost_bbox=5.0, cost_giou=2.0)), |
| loss_weight=dict( |
| mask=dict(dice=1.0, bce=1.0, nt=0.2, neg=0), |
| bbox=0.1, |
| allbbox=0.1, |
| refer=1.0), |
| MTD=dict(K=100)), |
| post_params=dict( |
| score_weighted=False, |
| mask_threshold=0.5, |
| score_threshold=0.7, |
| with_nms=False, |
| with_mask=True), |
| process_visual=True, |
| visualize_params=dict(row_columns=(4, 5)), |
| visual_mode='test') |
| grad_norm_clip = 0.15 |
| lr = 0.0005 |
| optimizer_config = dict( |
| type='Adam', |
| lr=0.0005, |
| lr_vis_enc=5e-05, |
| lr_lan_enc=0.0005, |
| betas=(0.9, 0.98), |
| eps=1e-09, |
| weight_decay=0, |
| amsgrad=True) |
| scheduler_config = dict( |
| type='MultiStepLRWarmUp', |
| warmup_epochs=1, |
| decay_steps=[21, 27], |
| decay_ratio=0.1, |
| max_epoch=30) |
| launcher = 'pytorch' |
| distributed = True |
| rank = 0 |
| world_size = 4 |
|
|
| 2025-07-07 11:09:07,978 - PropVG - INFO - RefCOCOPlusUNC-val size: 10758 |
| 2025-07-07 11:09:13,867 - PropVG - INFO - RefCOCOPlusUNC-testA size: 5726 |
| 2025-07-07 11:09:19,990 - PropVG - INFO - RefCOCOPlusUNC-testB size: 4889 |
| 2025-07-07 11:09:24,879 - PropVG - INFO - loaded checkpoint from work_dir/refcoco+/PropVG-refcoco+.pth |
|
|
| 2025-07-07 11:09:24,886 - PropVG - INFO - PropVG - evaluating set val |
| 2025-07-07 11:11:17,140 - PropVG - INFO - ------------ validate ------------ time: 112.25, DetACC: 83.73, mIoU: 72.94, oIoU: 70.24, MaskACC@0.5-0.9: [83.12, 80.60, 76.04, 65.37, 33.26]DetACC@0.5-0.9: [83.73, 81.30, 77.10, 68.58, 42.65] |
| 2025-07-07 11:11:18,910 - PropVG - INFO - PropVG - evaluating set testA |
| 2025-07-07 11:12:32,835 - PropVG - INFO - ------------ validate ------------ time: 73.92, DetACC: 88.01, mIoU: 76.49, oIoU: 74.32, MaskACC@0.5-0.9: [88.04, 86.00, 81.37, 70.53, 33.52]DetACC@0.5-0.9: [88.01, 85.91, 82.12, 73.80, 47.14] |
| 2025-07-07 11:12:34,541 - PropVG - INFO - PropVG - evaluating set testB |
| 2025-07-07 11:13:39,576 - PropVG - INFO - ------------ validate ------------ time: 65.03, DetACC: 76.59, mIoU: 67.21, oIoU: 63.41, MaskACC@0.5-0.9: [75.57, 71.83, 66.95, 57.38, 33.87]DetACC@0.5-0.9: [76.59, 73.26, 68.11, 59.24, 36.12] |
| 2025-07-07 11:13:41,507 - PropVG - INFO - sucessfully save the results to work_dir/refcoco+/refer_output_thr0.7_no-nms_no-sw_0.5_100.xlsx !!! |
|
|