ApacheOne commited on
Commit
1a0140f
·
verified ·
1 Parent(s): 71040e1

Upload 19 files

Browse files
.gitattributes CHANGED
@@ -87,3 +87,4 @@ output_fp16_fp32_joined/008229_collage.png filter=lfs diff=lfs merge=lfs -text
87
  output_fp16_fp32_joined/008468_collage.png filter=lfs diff=lfs merge=lfs -text
88
  output_fp16_fp32_joined/008597_collage.png filter=lfs diff=lfs merge=lfs -text
89
  output_fp16_fp32_joined/008768_collage.png filter=lfs diff=lfs merge=lfs -text
 
 
87
  output_fp16_fp32_joined/008468_collage.png filter=lfs diff=lfs merge=lfs -text
88
  output_fp16_fp32_joined/008597_collage.png filter=lfs diff=lfs merge=lfs -text
89
  output_fp16_fp32_joined/008768_collage.png filter=lfs diff=lfs merge=lfs -text
90
+ pretrained_ckpts/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
pretrained_ckpts/auxiliray/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7914f1dccb37ab2f307703f2593efd74cc327d9db974424dff8d7510b46d85b5
3
+ size 7813380
pretrained_ckpts/auxiliray/model_ir_se50.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a035c768259b98ab1ce0e646312f48b9e1e218197a0f80ac6765e88f8b6ddf28
3
+ size 175367323
pretrained_ckpts/e4s/iteration_300000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a5b496f876442bc9051c6b7435d9268ee392a0d707edaa73cacd2846572771
3
+ size 1452893535
pretrained_ckpts/face_parsing/79999_iter.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468e13ca13a9b43cc0881a9f99083a430e9c0a38abd935431d1c28ee94b26567
3
+ size 53289463
pretrained_ckpts/face_parsing/segnext.base.512x512.celebamaskhq.160k.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='MSCAN',
8
+ embed_dims=[64, 128, 320, 512],
9
+ mlp_ratios=[8, 8, 4, 4],
10
+ drop_rate=0.0,
11
+ drop_path_rate=0.1,
12
+ depths=[3, 3, 12, 3],
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_b.pth')),
15
+ decode_head=dict(
16
+ type='LightHamHead',
17
+ in_channels=[128, 320, 512],
18
+ in_index=[1, 2, 3],
19
+ channels=512,
20
+ ham_channels=512,
21
+ dropout_ratio=0.1,
22
+ num_classes=150,
23
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
24
+ align_corners=False,
25
+ loss_decode=dict(
26
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
27
+ train_cfg=dict(),
28
+ test_cfg=dict(mode='whole'))
29
+ dataset_type = 'CelebAMaskHQDataset'
30
+ data_root = './data/CelebAMaskHQ'
31
+ img_norm_cfg = dict(
32
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
33
+ crop_size = (512, 512)
34
+ train_pipeline = [
35
+ dict(type='LoadImageFromFile'),
36
+ dict(type='LoadAnnotations', reduce_zero_label=False),
37
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
38
+ dict(type='RandomFlip', prob=0),
39
+ dict(type='PhotoMetricDistortion'),
40
+ dict(
41
+ type='Normalize',
42
+ mean=[123.675, 116.28, 103.53],
43
+ std=[58.395, 57.12, 57.375],
44
+ to_rgb=True),
45
+ dict(type='DefaultFormatBundle'),
46
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
47
+ ]
48
+ test_pipeline = [
49
+ dict(type='LoadImageFromFile'),
50
+ dict(
51
+ type='MultiScaleFlipAug',
52
+ img_scale=(1024, 1024),
53
+ img_ratios=[0.5],
54
+ flip=False,
55
+ transforms=[
56
+ dict(type='Resize', keep_ratio=True),
57
+ dict(type='RandomFlip', prob=0),
58
+ dict(
59
+ type='Normalize',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ to_rgb=True),
63
+ dict(type='ImageToTensor', keys=['img']),
64
+ dict(type='Collect', keys=['img'])
65
+ ])
66
+ ]
67
+ data = dict(
68
+ samples_per_gpu=6,
69
+ workers_per_gpu=8,
70
+ train=dict(
71
+ type='RepeatDataset',
72
+ times=50,
73
+ dataset=dict(
74
+ type='CelebAMaskHQDataset',
75
+ data_root='./data/CelebAMaskHQ',
76
+ img_dir='CelebA-HQ-img/',
77
+ ann_dir='CelebA-HQ-mask/',
78
+ pipeline=[
79
+ dict(type='LoadImageFromFile'),
80
+ dict(type='LoadAnnotations', reduce_zero_label=False),
81
+ dict(
82
+ type='Resize',
83
+ img_scale=(512, 512),
84
+ ratio_range=(1.0, 1.0)),
85
+ dict(type='RandomFlip', prob=0),
86
+ dict(type='PhotoMetricDistortion'),
87
+ dict(
88
+ type='Normalize',
89
+ mean=[123.675, 116.28, 103.53],
90
+ std=[58.395, 57.12, 57.375],
91
+ to_rgb=True),
92
+ dict(type='DefaultFormatBundle'),
93
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
94
+ ],
95
+ split='train_split.txt')),
96
+ val=dict(
97
+ type='CelebAMaskHQDataset',
98
+ data_root='./data/CelebAMaskHQ',
99
+ img_dir='CelebA-HQ-img/',
100
+ ann_dir='CelebA-HQ-mask/',
101
+ pipeline=[
102
+ dict(type='LoadImageFromFile'),
103
+ dict(
104
+ type='MultiScaleFlipAug',
105
+ img_scale=(1024, 1024),
106
+ img_ratios=[0.5],
107
+ flip=False,
108
+ transforms=[
109
+ dict(type='Resize', keep_ratio=True),
110
+ dict(type='RandomFlip', prob=0),
111
+ dict(
112
+ type='Normalize',
113
+ mean=[123.675, 116.28, 103.53],
114
+ std=[58.395, 57.12, 57.375],
115
+ to_rgb=True),
116
+ dict(type='ImageToTensor', keys=['img']),
117
+ dict(type='Collect', keys=['img'])
118
+ ])
119
+ ],
120
+ split='val_split.txt'),
121
+ test=dict(
122
+ type='CelebAMaskHQDataset',
123
+ data_root='./data/CelebAMaskHQ',
124
+ img_dir='CelebA-HQ-img/',
125
+ ann_dir='CelebA-HQ-mask/',
126
+ pipeline=[
127
+ dict(type='LoadImageFromFile'),
128
+ dict(
129
+ type='MultiScaleFlipAug',
130
+ img_scale=(1024, 1024),
131
+ img_ratios=[0.5],
132
+ flip=False,
133
+ transforms=[
134
+ dict(type='Resize', keep_ratio=True),
135
+ dict(type='RandomFlip', prob=0),
136
+ dict(
137
+ type='Normalize',
138
+ mean=[123.675, 116.28, 103.53],
139
+ std=[58.395, 57.12, 57.375],
140
+ to_rgb=True),
141
+ dict(type='ImageToTensor', keys=['img']),
142
+ dict(type='Collect', keys=['img'])
143
+ ])
144
+ ],
145
+ split='val_split.txt'))
146
+ log_config = dict(
147
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
148
+ dist_params = dict(backend='nccl')
149
+ log_level = 'INFO'
150
+ load_from = None
151
+ resume_from = None
152
+ workflow = [('train', 1)]
153
+ cudnn_benchmark = True
154
+ optimizer = dict(
155
+ type='AdamW',
156
+ lr=6e-05,
157
+ betas=(0.9, 0.999),
158
+ weight_decay=0.01,
159
+ paramwise_cfg=dict(
160
+ custom_keys=dict(
161
+ pos_block=dict(decay_mult=0.0),
162
+ norm=dict(decay_mult=0.0),
163
+ head=dict(lr_mult=10.0))))
164
+ optimizer_config = dict()
165
+ lr_config = dict(
166
+ policy='poly',
167
+ warmup='linear',
168
+ warmup_iters=1500,
169
+ warmup_ratio=1e-06,
170
+ power=1.0,
171
+ min_lr=0.0,
172
+ by_epoch=False)
173
+ runner = dict(type='IterBasedRunner', max_iters=160000)
174
+ checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
175
+ evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
176
+ find_unused_parameters = True
177
+ work_dir = './work_dirs/segnext.base.512x512.celebamaskhq.160k'
178
+ gpu_ids = [0]
179
+ auto_resume = False
pretrained_ckpts/face_parsing/segnext.large.512x512.celebamaskhq.160k.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='MSCAN',
8
+ embed_dims=[64, 128, 320, 512],
9
+ mlp_ratios=[8, 8, 4, 4],
10
+ drop_rate=0.0,
11
+ drop_path_rate=0.3,
12
+ depths=[3, 5, 27, 3],
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_l.pth')),
15
+ decode_head=dict(
16
+ type='LightHamHead',
17
+ in_channels=[128, 320, 512],
18
+ in_index=[1, 2, 3],
19
+ channels=1024,
20
+ ham_channels=1024,
21
+ dropout_ratio=0.1,
22
+ num_classes=150,
23
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
24
+ align_corners=False,
25
+ loss_decode=dict(
26
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
27
+ train_cfg=dict(),
28
+ test_cfg=dict(mode='whole'))
29
+ dataset_type = 'CelebAMaskHQDataset'
30
+ data_root = './data/CelebAMaskHQ'
31
+ img_norm_cfg = dict(
32
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
33
+ crop_size = (512, 512)
34
+ train_pipeline = [
35
+ dict(type='LoadImageFromFile'),
36
+ dict(type='LoadAnnotations', reduce_zero_label=False),
37
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
38
+ dict(type='RandomFlip', prob=0),
39
+ dict(type='PhotoMetricDistortion'),
40
+ dict(
41
+ type='Normalize',
42
+ mean=[123.675, 116.28, 103.53],
43
+ std=[58.395, 57.12, 57.375],
44
+ to_rgb=True),
45
+ dict(type='DefaultFormatBundle'),
46
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
47
+ ]
48
+ test_pipeline = [
49
+ dict(type='LoadImageFromFile'),
50
+ dict(
51
+ type='MultiScaleFlipAug',
52
+ img_scale=(1024, 1024),
53
+ img_ratios=[0.5],
54
+ flip=False,
55
+ transforms=[
56
+ dict(type='Resize', keep_ratio=True),
57
+ dict(type='RandomFlip', prob=0),
58
+ dict(
59
+ type='Normalize',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ to_rgb=True),
63
+ dict(type='ImageToTensor', keys=['img']),
64
+ dict(type='Collect', keys=['img'])
65
+ ])
66
+ ]
67
+ data = dict(
68
+ samples_per_gpu=8,
69
+ workers_per_gpu=8,
70
+ train=dict(
71
+ type='RepeatDataset',
72
+ times=50,
73
+ dataset=dict(
74
+ type='CelebAMaskHQDataset',
75
+ data_root='./data/CelebAMaskHQ',
76
+ img_dir='CelebA-HQ-img/',
77
+ ann_dir='CelebA-HQ-mask/',
78
+ pipeline=[
79
+ dict(type='LoadImageFromFile'),
80
+ dict(type='LoadAnnotations', reduce_zero_label=False),
81
+ dict(
82
+ type='Resize',
83
+ img_scale=(512, 512),
84
+ ratio_range=(1.0, 1.0)),
85
+ dict(type='RandomFlip', prob=0),
86
+ dict(type='PhotoMetricDistortion'),
87
+ dict(
88
+ type='Normalize',
89
+ mean=[123.675, 116.28, 103.53],
90
+ std=[58.395, 57.12, 57.375],
91
+ to_rgb=True),
92
+ dict(type='DefaultFormatBundle'),
93
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
94
+ ],
95
+ split='train_split.txt')),
96
+ val=dict(
97
+ type='CelebAMaskHQDataset',
98
+ data_root='./data/CelebAMaskHQ',
99
+ img_dir='CelebA-HQ-img/',
100
+ ann_dir='CelebA-HQ-mask/',
101
+ pipeline=[
102
+ dict(type='LoadImageFromFile'),
103
+ dict(
104
+ type='MultiScaleFlipAug',
105
+ img_scale=(1024, 1024),
106
+ img_ratios=[0.5],
107
+ flip=False,
108
+ transforms=[
109
+ dict(type='Resize', keep_ratio=True),
110
+ dict(type='RandomFlip', prob=0),
111
+ dict(
112
+ type='Normalize',
113
+ mean=[123.675, 116.28, 103.53],
114
+ std=[58.395, 57.12, 57.375],
115
+ to_rgb=True),
116
+ dict(type='ImageToTensor', keys=['img']),
117
+ dict(type='Collect', keys=['img'])
118
+ ])
119
+ ],
120
+ split='val_split.txt'),
121
+ test=dict(
122
+ type='CelebAMaskHQDataset',
123
+ data_root='./data/CelebAMaskHQ',
124
+ img_dir='CelebA-HQ-img/',
125
+ ann_dir='CelebA-HQ-mask/',
126
+ pipeline=[
127
+ dict(type='LoadImageFromFile'),
128
+ dict(
129
+ type='MultiScaleFlipAug',
130
+ img_scale=(1024, 1024),
131
+ img_ratios=[0.5],
132
+ flip=False,
133
+ transforms=[
134
+ dict(type='Resize', keep_ratio=True),
135
+ dict(type='RandomFlip', prob=0),
136
+ dict(
137
+ type='Normalize',
138
+ mean=[123.675, 116.28, 103.53],
139
+ std=[58.395, 57.12, 57.375],
140
+ to_rgb=True),
141
+ dict(type='ImageToTensor', keys=['img']),
142
+ dict(type='Collect', keys=['img'])
143
+ ])
144
+ ],
145
+ split='val_split.txt'))
146
+ log_config = dict(
147
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
148
+ dist_params = dict(backend='nccl')
149
+ log_level = 'INFO'
150
+ load_from = None
151
+ resume_from = None
152
+ workflow = [('train', 1)]
153
+ cudnn_benchmark = True
154
+ optimizer = dict(
155
+ type='AdamW',
156
+ lr=6e-05,
157
+ betas=(0.9, 0.999),
158
+ weight_decay=0.01,
159
+ paramwise_cfg=dict(
160
+ custom_keys=dict(
161
+ pos_block=dict(decay_mult=0.0),
162
+ norm=dict(decay_mult=0.0),
163
+ head=dict(lr_mult=10.0))))
164
+ optimizer_config = dict()
165
+ lr_config = dict(
166
+ policy='poly',
167
+ warmup='linear',
168
+ warmup_iters=1500,
169
+ warmup_ratio=1e-06,
170
+ power=1.0,
171
+ min_lr=0.0,
172
+ by_epoch=False)
173
+ runner = dict(type='IterBasedRunner', max_iters=160000)
174
+ checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
175
+ evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
176
+ find_unused_parameters = True
177
+ work_dir = './work_dirs/segnext.large.512x512.celebamaskhq.160k'
178
+ gpu_ids = [0]
179
+ auto_resume = False
pretrained_ckpts/face_parsing/segnext.small.512x512.celebamaskhq.160k.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='MSCAN',
8
+ embed_dims=[64, 128, 320, 512],
9
+ mlp_ratios=[8, 8, 4, 4],
10
+ drop_rate=0.0,
11
+ drop_path_rate=0.1,
12
+ depths=[2, 2, 4, 2],
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_s.pth')),
15
+ decode_head=dict(
16
+ type='LightHamHead',
17
+ in_channels=[128, 320, 512],
18
+ in_index=[1, 2, 3],
19
+ channels=256,
20
+ ham_channels=256,
21
+ dropout_ratio=0.1,
22
+ num_classes=19,
23
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
24
+ align_corners=False,
25
+ loss_decode=dict(
26
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
27
+ ham_kwargs=dict(MD_R=16)),
28
+ train_cfg=dict(),
29
+ test_cfg=dict(mode='whole'))
30
+ dataset_type = 'CelebAMaskHQDataset'
31
+ data_root = './data/CelebAMaskHQ'
32
+ img_norm_cfg = dict(
33
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34
+ crop_size = (512, 512)
35
+ train_pipeline = [
36
+ dict(type='LoadImageFromFile'),
37
+ dict(type='LoadAnnotations', reduce_zero_label=False),
38
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
39
+ dict(type='RandomFlip', prob=0),
40
+ dict(type='PhotoMetricDistortion'),
41
+ dict(
42
+ type='Normalize',
43
+ mean=[123.675, 116.28, 103.53],
44
+ std=[58.395, 57.12, 57.375],
45
+ to_rgb=True),
46
+ dict(type='DefaultFormatBundle'),
47
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
48
+ ]
49
+ test_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(
52
+ type='MultiScaleFlipAug',
53
+ img_scale=(1024, 1024),
54
+ img_ratios=[0.5],
55
+ flip=False,
56
+ transforms=[
57
+ dict(type='Resize', keep_ratio=True),
58
+ dict(type='RandomFlip', prob=0),
59
+ dict(
60
+ type='Normalize',
61
+ mean=[123.675, 116.28, 103.53],
62
+ std=[58.395, 57.12, 57.375],
63
+ to_rgb=True),
64
+ dict(type='ImageToTensor', keys=['img']),
65
+ dict(type='Collect', keys=['img'])
66
+ ])
67
+ ]
68
+ data = dict(
69
+ samples_per_gpu=16,
70
+ workers_per_gpu=8,
71
+ train=dict(
72
+ type='RepeatDataset',
73
+ times=50,
74
+ dataset=dict(
75
+ type='CelebAMaskHQDataset',
76
+ data_root='./data/CelebAMaskHQ',
77
+ img_dir='CelebA-HQ-img/',
78
+ ann_dir='CelebA-HQ-mask/',
79
+ pipeline=[
80
+ dict(type='LoadImageFromFile'),
81
+ dict(type='LoadAnnotations', reduce_zero_label=False),
82
+ dict(
83
+ type='Resize',
84
+ img_scale=(512, 512),
85
+ ratio_range=(1.0, 1.0)),
86
+ dict(type='RandomFlip', prob=0),
87
+ dict(type='PhotoMetricDistortion'),
88
+ dict(
89
+ type='Normalize',
90
+ mean=[123.675, 116.28, 103.53],
91
+ std=[58.395, 57.12, 57.375],
92
+ to_rgb=True),
93
+ dict(type='DefaultFormatBundle'),
94
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
95
+ ],
96
+ split='train_split.txt')),
97
+ val=dict(
98
+ type='CelebAMaskHQDataset',
99
+ data_root='./data/CelebAMaskHQ',
100
+ img_dir='CelebA-HQ-img/',
101
+ ann_dir='CelebA-HQ-mask/',
102
+ pipeline=[
103
+ dict(type='LoadImageFromFile'),
104
+ dict(
105
+ type='MultiScaleFlipAug',
106
+ img_scale=(1024, 1024),
107
+ img_ratios=[0.5],
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip', prob=0),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='ImageToTensor', keys=['img']),
118
+ dict(type='Collect', keys=['img'])
119
+ ])
120
+ ],
121
+ split='val_split.txt'),
122
+ test=dict(
123
+ type='CelebAMaskHQDataset',
124
+ data_root='./data/CelebAMaskHQ',
125
+ img_dir='CelebA-HQ-img/',
126
+ ann_dir='CelebA-HQ-mask/',
127
+ pipeline=[
128
+ dict(type='LoadImageFromFile'),
129
+ dict(
130
+ type='MultiScaleFlipAug',
131
+ img_scale=(1024, 1024),
132
+ img_ratios=[0.5],
133
+ flip=False,
134
+ transforms=[
135
+ dict(type='Resize', keep_ratio=True),
136
+ dict(type='RandomFlip', prob=0),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='ImageToTensor', keys=['img']),
143
+ dict(type='Collect', keys=['img'])
144
+ ])
145
+ ],
146
+ split='val_split.txt'))
147
+ log_config = dict(
148
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
149
+ dist_params = dict(backend='nccl')
150
+ log_level = 'INFO'
151
+ load_from = None
152
+ resume_from = None
153
+ workflow = [('train', 1)]
154
+ cudnn_benchmark = True
155
+ optimizer = dict(
156
+ type='AdamW',
157
+ lr=6e-05,
158
+ betas=(0.9, 0.999),
159
+ weight_decay=0.01,
160
+ paramwise_cfg=dict(
161
+ custom_keys=dict(
162
+ pos_block=dict(decay_mult=0.0),
163
+ norm=dict(decay_mult=0.0),
164
+ head=dict(lr_mult=10.0))))
165
+ optimizer_config = dict()
166
+ lr_config = dict(
167
+ policy='poly',
168
+ warmup='linear',
169
+ warmup_iters=1500,
170
+ warmup_ratio=1e-06,
171
+ power=1.0,
172
+ min_lr=0.0,
173
+ by_epoch=False)
174
+ runner = dict(type='IterBasedRunner', max_iters=160000)
175
+ checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
176
+ evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
177
+ find_unused_parameters = True
178
+ work_dir = './work_dirs/segnext.small.512x512.celebamaskhq.160k'
179
+ gpu_ids = [0]
180
+ auto_resume = False
pretrained_ckpts/face_parsing/segnext.small.best_mIoU_iter_140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f87738b6b6f5dca82cc63298d3d625f81915a9c1ed3d0a359b8866b2b76b321
3
+ size 167259923
pretrained_ckpts/face_parsing/segnext.tiny.512x512.celebamaskhq.160k.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='MSCAN',
8
+ embed_dims=[32, 64, 160, 256],
9
+ mlp_ratios=[8, 8, 4, 4],
10
+ drop_rate=0.0,
11
+ drop_path_rate=0.1,
12
+ depths=[3, 3, 5, 2],
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_t.pth')),
15
+ decode_head=dict(
16
+ type='LightHamHead',
17
+ in_channels=[64, 160, 256],
18
+ in_index=[1, 2, 3],
19
+ channels=256,
20
+ ham_channels=256,
21
+ dropout_ratio=0.1,
22
+ num_classes=150,
23
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
24
+ align_corners=False,
25
+ loss_decode=dict(
26
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
27
+ ham_kwargs=dict(MD_R=16)),
28
+ train_cfg=dict(),
29
+ test_cfg=dict(mode='whole'))
30
+ dataset_type = 'CelebAMaskHQDataset'
31
+ data_root = './data/CelebAMaskHQ'
32
+ img_norm_cfg = dict(
33
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34
+ crop_size = (512, 512)
35
+ train_pipeline = [
36
+ dict(type='LoadImageFromFile'),
37
+ dict(type='LoadAnnotations', reduce_zero_label=False),
38
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
39
+ dict(type='RandomFlip', prob=0),
40
+ dict(type='PhotoMetricDistortion'),
41
+ dict(
42
+ type='Normalize',
43
+ mean=[123.675, 116.28, 103.53],
44
+ std=[58.395, 57.12, 57.375],
45
+ to_rgb=True),
46
+ dict(type='DefaultFormatBundle'),
47
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
48
+ ]
49
+ test_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(
52
+ type='MultiScaleFlipAug',
53
+ img_scale=(1024, 1024),
54
+ img_ratios=[0.5],
55
+ flip=False,
56
+ transforms=[
57
+ dict(type='Resize', keep_ratio=True),
58
+ dict(type='RandomFlip', prob=0),
59
+ dict(
60
+ type='Normalize',
61
+ mean=[123.675, 116.28, 103.53],
62
+ std=[58.395, 57.12, 57.375],
63
+ to_rgb=True),
64
+ dict(type='ImageToTensor', keys=['img']),
65
+ dict(type='Collect', keys=['img'])
66
+ ])
67
+ ]
68
+ data = dict(
69
+ samples_per_gpu=16,
70
+ workers_per_gpu=8,
71
+ train=dict(
72
+ type='RepeatDataset',
73
+ times=50,
74
+ dataset=dict(
75
+ type='CelebAMaskHQDataset',
76
+ data_root='./data/CelebAMaskHQ',
77
+ img_dir='CelebA-HQ-img/',
78
+ ann_dir='CelebA-HQ-mask/',
79
+ pipeline=[
80
+ dict(type='LoadImageFromFile'),
81
+ dict(type='LoadAnnotations', reduce_zero_label=False),
82
+ dict(
83
+ type='Resize',
84
+ img_scale=(512, 512),
85
+ ratio_range=(1.0, 1.0)),
86
+ dict(type='RandomFlip', prob=0),
87
+ dict(type='PhotoMetricDistortion'),
88
+ dict(
89
+ type='Normalize',
90
+ mean=[123.675, 116.28, 103.53],
91
+ std=[58.395, 57.12, 57.375],
92
+ to_rgb=True),
93
+ dict(type='DefaultFormatBundle'),
94
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
95
+ ],
96
+ split='train_split.txt')),
97
+ val=dict(
98
+ type='CelebAMaskHQDataset',
99
+ data_root='./data/CelebAMaskHQ',
100
+ img_dir='CelebA-HQ-img/',
101
+ ann_dir='CelebA-HQ-mask/',
102
+ pipeline=[
103
+ dict(type='LoadImageFromFile'),
104
+ dict(
105
+ type='MultiScaleFlipAug',
106
+ img_scale=(1024, 1024),
107
+ img_ratios=[0.5],
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip', prob=0),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='ImageToTensor', keys=['img']),
118
+ dict(type='Collect', keys=['img'])
119
+ ])
120
+ ],
121
+ split='val_split.txt'),
122
+ test=dict(
123
+ type='CelebAMaskHQDataset',
124
+ data_root='./data/CelebAMaskHQ',
125
+ img_dir='CelebA-HQ-img/',
126
+ ann_dir='CelebA-HQ-mask/',
127
+ pipeline=[
128
+ dict(type='LoadImageFromFile'),
129
+ dict(
130
+ type='MultiScaleFlipAug',
131
+ img_scale=(1024, 1024),
132
+ img_ratios=[0.5],
133
+ flip=False,
134
+ transforms=[
135
+ dict(type='Resize', keep_ratio=True),
136
+ dict(type='RandomFlip', prob=0),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='ImageToTensor', keys=['img']),
143
+ dict(type='Collect', keys=['img'])
144
+ ])
145
+ ],
146
+ split='val_split.txt'))
147
+ log_config = dict(
148
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
149
+ dist_params = dict(backend='nccl')
150
+ log_level = 'INFO'
151
+ load_from = None
152
+ resume_from = None
153
+ workflow = [('train', 1)]
154
+ cudnn_benchmark = True
155
+ optimizer = dict(
156
+ type='AdamW',
157
+ lr=6e-05,
158
+ betas=(0.9, 0.999),
159
+ weight_decay=0.01,
160
+ paramwise_cfg=dict(
161
+ custom_keys=dict(
162
+ pos_block=dict(decay_mult=0.0),
163
+ norm=dict(decay_mult=0.0),
164
+ head=dict(lr_mult=10.0))))
165
+ optimizer_config = dict()
166
+ lr_config = dict(
167
+ policy='poly',
168
+ warmup='linear',
169
+ warmup_iters=1500,
170
+ warmup_ratio=1e-06,
171
+ power=1.0,
172
+ min_lr=0.0,
173
+ by_epoch=False)
174
+ runner = dict(type='IterBasedRunner', max_iters=160000)
175
+ checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
176
+ evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
177
+ find_unused_parameters = True
178
+ work_dir = './work_dirs/segnext.tiny.512x512.celebamaskhq.160k'
179
+ gpu_ids = [0]
180
+ auto_resume = False
pretrained_ckpts/facevid2vid/00000189-checkpoint.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec00af84967380eb696cd836b948de98a0eddbc8de683101ab886de182789cd
3
+ size 2112477593
pretrained_ckpts/facevid2vid/vox-256.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_params:
2
+ root_dir: /zlh/VoxCeleb/first-order-256
3
+ frame_shape: [256, 256, 3]
4
+ id_sampling: True
5
+ pairs_list: None
6
+ augmentation_params:
7
+ flip_param:
8
+ horizontal_flip: True
9
+ time_flip: True
10
+ jitter_param:
11
+ brightness: 0.1
12
+ contrast: 0.1
13
+ saturation: 0.1
14
+ hue: 0.1
15
+
16
+
17
+ model_params:
18
+ common_params:
19
+ num_kp: 15
20
+ image_channel: 3
21
+ feature_channel: 32
22
+ estimate_jacobian: False # True
23
+ kp_detector_params:
24
+ temperature: 0.1
25
+ block_expansion: 32
26
+ max_features: 1024
27
+ scale_factor: 0.25 # 0.25
28
+ num_blocks: 5
29
+ reshape_channel: 16384 # 16384 = 1024 * 16
30
+ reshape_depth: 16
31
+ he_estimator_params:
32
+ block_expansion: 64
33
+ max_features: 2048
34
+ num_bins: 66
35
+ generator_params:
36
+ block_expansion: 64
37
+ max_features: 512
38
+ num_down_blocks: 2
39
+ reshape_channel: 32
40
+ reshape_depth: 16 # 512 = 32 * 16
41
+ num_resblocks: 6
42
+ estimate_occlusion_map: True
43
+ dense_motion_params:
44
+ block_expansion: 32
45
+ max_features: 1024
46
+ num_blocks: 5
47
+ # reshape_channel: 32
48
+ reshape_depth: 16
49
+ compress: 4
50
+ discriminator_params:
51
+ scales: [1]
52
+ block_expansion: 32
53
+ max_features: 512
54
+ num_blocks: 4
55
+ sn: True
56
+
57
+ train_params:
58
+ num_epochs: 300
59
+ num_repeats: 75
60
+ epoch_milestones: [180,]
61
+ lr_generator: 2.0e-4
62
+ lr_discriminator: 2.0e-4
63
+ lr_kp_detector: 2.0e-4
64
+ lr_he_estimator: 2.0e-4
65
+ gan_mode: 'hinge' # hinge or ls
66
+ batch_size: 32
67
+ scales: [1, 0.5, 0.25, 0.125]
68
+ checkpoint_freq: 10
69
+ hopenet_snapshot: '/zlh/Project/deep-head-pose/checkpoints/hopenet_robust_alpha1.pkl'
70
+ transform_params:
71
+ sigma_affine: 0.05
72
+ sigma_tps: 0.005
73
+ points_tps: 5
74
+ loss_weights:
75
+ generator_gan: 1
76
+ discriminator_gan: 1
77
+ feature_matching: [10, 10, 10, 10]
78
+ perceptual: [10, 10, 10, 10, 10]
79
+ equivariance_value: 10
80
+ equivariance_jacobian: 0 # 10
81
+ keypoint: 10
82
+ headpose: 20
83
+ expression: 5
84
+
85
+ visualizer_params:
86
+ kp_size: 5
87
+ draw_border: True
88
+ colormap: 'gist_rainbow'
pretrained_ckpts/gpen/fetch_gepn_models.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ mkdir weights
2
+
3
+ wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/RetinaFace-R50.pth
4
+ wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/realesrnet_x4.pth
5
+ wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/GPEN-BFR-512.pth
6
+ wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/ParseNet-latest.pth
7
+
8
+ mv *.pth ./weights
pretrained_ckpts/gpen/weights/GPEN-BFR-512.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1002c41add95b0decad69604d80455576f7187dd99ca16bd611bcfd44c10b51
3
+ size 284085738
pretrained_ckpts/gpen/weights/ParseNet-latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
3
+ size 85331193
pretrained_ckpts/gpen/weights/RetinaFace-R50.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
3
+ size 109497761
pretrained_ckpts/gpen/weights/realesrnet_x4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa00f09ad753d88576b21ed977e97d634976377031b178acc3b5b238df463400
3
+ size 67040989
pretrained_ckpts/put_ckpts_accordingly.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ please download the pre-trained models according to the Doc and place them in the corresponding folder
pretrained_ckpts/shape_predictor_68_face_landmarks.dat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
3
+ size 99693937
pretrained_ckpts/stylegan2/stylegan2-ffhq-config-f.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae494ef77e32a9cd1792a81a3c167692a0e64f6bcd8b06592ff42917e2ed46e
3
+ size 381462551