Upload 19 files
Browse files- .gitattributes +1 -0
- pretrained_ckpts/auxiliray/model.pth +3 -0
- pretrained_ckpts/auxiliray/model_ir_se50.pth +3 -0
- pretrained_ckpts/e4s/iteration_300000.pt +3 -0
- pretrained_ckpts/face_parsing/79999_iter.pth +3 -0
- pretrained_ckpts/face_parsing/segnext.base.512x512.celebamaskhq.160k.py +179 -0
- pretrained_ckpts/face_parsing/segnext.large.512x512.celebamaskhq.160k.py +179 -0
- pretrained_ckpts/face_parsing/segnext.small.512x512.celebamaskhq.160k.py +180 -0
- pretrained_ckpts/face_parsing/segnext.small.best_mIoU_iter_140000.pth +3 -0
- pretrained_ckpts/face_parsing/segnext.tiny.512x512.celebamaskhq.160k.py +180 -0
- pretrained_ckpts/facevid2vid/00000189-checkpoint.pth.tar +3 -0
- pretrained_ckpts/facevid2vid/vox-256.yaml +88 -0
- pretrained_ckpts/gpen/fetch_gepn_models.sh +8 -0
- pretrained_ckpts/gpen/weights/GPEN-BFR-512.pth +3 -0
- pretrained_ckpts/gpen/weights/ParseNet-latest.pth +3 -0
- pretrained_ckpts/gpen/weights/RetinaFace-R50.pth +3 -0
- pretrained_ckpts/gpen/weights/realesrnet_x4.pth +3 -0
- pretrained_ckpts/put_ckpts_accordingly.txt +1 -0
- pretrained_ckpts/shape_predictor_68_face_landmarks.dat +3 -0
- pretrained_ckpts/stylegan2/stylegan2-ffhq-config-f.pt +3 -0
.gitattributes
CHANGED
|
@@ -87,3 +87,4 @@ output_fp16_fp32_joined/008229_collage.png filter=lfs diff=lfs merge=lfs -text
|
|
| 87 |
output_fp16_fp32_joined/008468_collage.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
output_fp16_fp32_joined/008597_collage.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
output_fp16_fp32_joined/008768_collage.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 87 |
output_fp16_fp32_joined/008468_collage.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
output_fp16_fp32_joined/008597_collage.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
output_fp16_fp32_joined/008768_collage.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
pretrained_ckpts/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
pretrained_ckpts/auxiliray/model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7914f1dccb37ab2f307703f2593efd74cc327d9db974424dff8d7510b46d85b5
|
| 3 |
+
size 7813380
|
pretrained_ckpts/auxiliray/model_ir_se50.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a035c768259b98ab1ce0e646312f48b9e1e218197a0f80ac6765e88f8b6ddf28
|
| 3 |
+
size 175367323
|
pretrained_ckpts/e4s/iteration_300000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38a5b496f876442bc9051c6b7435d9268ee392a0d707edaa73cacd2846572771
|
| 3 |
+
size 1452893535
|
pretrained_ckpts/face_parsing/79999_iter.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:468e13ca13a9b43cc0881a9f99083a430e9c0a38abd935431d1c28ee94b26567
|
| 3 |
+
size 53289463
|
pretrained_ckpts/face_parsing/segnext.base.512x512.celebamaskhq.160k.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MSCAN',
|
| 8 |
+
embed_dims=[64, 128, 320, 512],
|
| 9 |
+
mlp_ratios=[8, 8, 4, 4],
|
| 10 |
+
drop_rate=0.0,
|
| 11 |
+
drop_path_rate=0.1,
|
| 12 |
+
depths=[3, 3, 12, 3],
|
| 13 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 14 |
+
init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_b.pth')),
|
| 15 |
+
decode_head=dict(
|
| 16 |
+
type='LightHamHead',
|
| 17 |
+
in_channels=[128, 320, 512],
|
| 18 |
+
in_index=[1, 2, 3],
|
| 19 |
+
channels=512,
|
| 20 |
+
ham_channels=512,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=150,
|
| 23 |
+
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 27 |
+
train_cfg=dict(),
|
| 28 |
+
test_cfg=dict(mode='whole'))
|
| 29 |
+
dataset_type = 'CelebAMaskHQDataset'
|
| 30 |
+
data_root = './data/CelebAMaskHQ'
|
| 31 |
+
img_norm_cfg = dict(
|
| 32 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 33 |
+
crop_size = (512, 512)
|
| 34 |
+
train_pipeline = [
|
| 35 |
+
dict(type='LoadImageFromFile'),
|
| 36 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 37 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
|
| 38 |
+
dict(type='RandomFlip', prob=0),
|
| 39 |
+
dict(type='PhotoMetricDistortion'),
|
| 40 |
+
dict(
|
| 41 |
+
type='Normalize',
|
| 42 |
+
mean=[123.675, 116.28, 103.53],
|
| 43 |
+
std=[58.395, 57.12, 57.375],
|
| 44 |
+
to_rgb=True),
|
| 45 |
+
dict(type='DefaultFormatBundle'),
|
| 46 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 47 |
+
]
|
| 48 |
+
test_pipeline = [
|
| 49 |
+
dict(type='LoadImageFromFile'),
|
| 50 |
+
dict(
|
| 51 |
+
type='MultiScaleFlipAug',
|
| 52 |
+
img_scale=(1024, 1024),
|
| 53 |
+
img_ratios=[0.5],
|
| 54 |
+
flip=False,
|
| 55 |
+
transforms=[
|
| 56 |
+
dict(type='Resize', keep_ratio=True),
|
| 57 |
+
dict(type='RandomFlip', prob=0),
|
| 58 |
+
dict(
|
| 59 |
+
type='Normalize',
|
| 60 |
+
mean=[123.675, 116.28, 103.53],
|
| 61 |
+
std=[58.395, 57.12, 57.375],
|
| 62 |
+
to_rgb=True),
|
| 63 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 64 |
+
dict(type='Collect', keys=['img'])
|
| 65 |
+
])
|
| 66 |
+
]
|
| 67 |
+
data = dict(
|
| 68 |
+
samples_per_gpu=6,
|
| 69 |
+
workers_per_gpu=8,
|
| 70 |
+
train=dict(
|
| 71 |
+
type='RepeatDataset',
|
| 72 |
+
times=50,
|
| 73 |
+
dataset=dict(
|
| 74 |
+
type='CelebAMaskHQDataset',
|
| 75 |
+
data_root='./data/CelebAMaskHQ',
|
| 76 |
+
img_dir='CelebA-HQ-img/',
|
| 77 |
+
ann_dir='CelebA-HQ-mask/',
|
| 78 |
+
pipeline=[
|
| 79 |
+
dict(type='LoadImageFromFile'),
|
| 80 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 81 |
+
dict(
|
| 82 |
+
type='Resize',
|
| 83 |
+
img_scale=(512, 512),
|
| 84 |
+
ratio_range=(1.0, 1.0)),
|
| 85 |
+
dict(type='RandomFlip', prob=0),
|
| 86 |
+
dict(type='PhotoMetricDistortion'),
|
| 87 |
+
dict(
|
| 88 |
+
type='Normalize',
|
| 89 |
+
mean=[123.675, 116.28, 103.53],
|
| 90 |
+
std=[58.395, 57.12, 57.375],
|
| 91 |
+
to_rgb=True),
|
| 92 |
+
dict(type='DefaultFormatBundle'),
|
| 93 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 94 |
+
],
|
| 95 |
+
split='train_split.txt')),
|
| 96 |
+
val=dict(
|
| 97 |
+
type='CelebAMaskHQDataset',
|
| 98 |
+
data_root='./data/CelebAMaskHQ',
|
| 99 |
+
img_dir='CelebA-HQ-img/',
|
| 100 |
+
ann_dir='CelebA-HQ-mask/',
|
| 101 |
+
pipeline=[
|
| 102 |
+
dict(type='LoadImageFromFile'),
|
| 103 |
+
dict(
|
| 104 |
+
type='MultiScaleFlipAug',
|
| 105 |
+
img_scale=(1024, 1024),
|
| 106 |
+
img_ratios=[0.5],
|
| 107 |
+
flip=False,
|
| 108 |
+
transforms=[
|
| 109 |
+
dict(type='Resize', keep_ratio=True),
|
| 110 |
+
dict(type='RandomFlip', prob=0),
|
| 111 |
+
dict(
|
| 112 |
+
type='Normalize',
|
| 113 |
+
mean=[123.675, 116.28, 103.53],
|
| 114 |
+
std=[58.395, 57.12, 57.375],
|
| 115 |
+
to_rgb=True),
|
| 116 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 117 |
+
dict(type='Collect', keys=['img'])
|
| 118 |
+
])
|
| 119 |
+
],
|
| 120 |
+
split='val_split.txt'),
|
| 121 |
+
test=dict(
|
| 122 |
+
type='CelebAMaskHQDataset',
|
| 123 |
+
data_root='./data/CelebAMaskHQ',
|
| 124 |
+
img_dir='CelebA-HQ-img/',
|
| 125 |
+
ann_dir='CelebA-HQ-mask/',
|
| 126 |
+
pipeline=[
|
| 127 |
+
dict(type='LoadImageFromFile'),
|
| 128 |
+
dict(
|
| 129 |
+
type='MultiScaleFlipAug',
|
| 130 |
+
img_scale=(1024, 1024),
|
| 131 |
+
img_ratios=[0.5],
|
| 132 |
+
flip=False,
|
| 133 |
+
transforms=[
|
| 134 |
+
dict(type='Resize', keep_ratio=True),
|
| 135 |
+
dict(type='RandomFlip', prob=0),
|
| 136 |
+
dict(
|
| 137 |
+
type='Normalize',
|
| 138 |
+
mean=[123.675, 116.28, 103.53],
|
| 139 |
+
std=[58.395, 57.12, 57.375],
|
| 140 |
+
to_rgb=True),
|
| 141 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 142 |
+
dict(type='Collect', keys=['img'])
|
| 143 |
+
])
|
| 144 |
+
],
|
| 145 |
+
split='val_split.txt'))
|
| 146 |
+
log_config = dict(
|
| 147 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 148 |
+
dist_params = dict(backend='nccl')
|
| 149 |
+
log_level = 'INFO'
|
| 150 |
+
load_from = None
|
| 151 |
+
resume_from = None
|
| 152 |
+
workflow = [('train', 1)]
|
| 153 |
+
cudnn_benchmark = True
|
| 154 |
+
optimizer = dict(
|
| 155 |
+
type='AdamW',
|
| 156 |
+
lr=6e-05,
|
| 157 |
+
betas=(0.9, 0.999),
|
| 158 |
+
weight_decay=0.01,
|
| 159 |
+
paramwise_cfg=dict(
|
| 160 |
+
custom_keys=dict(
|
| 161 |
+
pos_block=dict(decay_mult=0.0),
|
| 162 |
+
norm=dict(decay_mult=0.0),
|
| 163 |
+
head=dict(lr_mult=10.0))))
|
| 164 |
+
optimizer_config = dict()
|
| 165 |
+
lr_config = dict(
|
| 166 |
+
policy='poly',
|
| 167 |
+
warmup='linear',
|
| 168 |
+
warmup_iters=1500,
|
| 169 |
+
warmup_ratio=1e-06,
|
| 170 |
+
power=1.0,
|
| 171 |
+
min_lr=0.0,
|
| 172 |
+
by_epoch=False)
|
| 173 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 174 |
+
checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
|
| 175 |
+
evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
|
| 176 |
+
find_unused_parameters = True
|
| 177 |
+
work_dir = './work_dirs/segnext.base.512x512.celebamaskhq.160k'
|
| 178 |
+
gpu_ids = [0]
|
| 179 |
+
auto_resume = False
|
pretrained_ckpts/face_parsing/segnext.large.512x512.celebamaskhq.160k.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MSCAN',
|
| 8 |
+
embed_dims=[64, 128, 320, 512],
|
| 9 |
+
mlp_ratios=[8, 8, 4, 4],
|
| 10 |
+
drop_rate=0.0,
|
| 11 |
+
drop_path_rate=0.3,
|
| 12 |
+
depths=[3, 5, 27, 3],
|
| 13 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 14 |
+
init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_l.pth')),
|
| 15 |
+
decode_head=dict(
|
| 16 |
+
type='LightHamHead',
|
| 17 |
+
in_channels=[128, 320, 512],
|
| 18 |
+
in_index=[1, 2, 3],
|
| 19 |
+
channels=1024,
|
| 20 |
+
ham_channels=1024,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=150,
|
| 23 |
+
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 27 |
+
train_cfg=dict(),
|
| 28 |
+
test_cfg=dict(mode='whole'))
|
| 29 |
+
dataset_type = 'CelebAMaskHQDataset'
|
| 30 |
+
data_root = './data/CelebAMaskHQ'
|
| 31 |
+
img_norm_cfg = dict(
|
| 32 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 33 |
+
crop_size = (512, 512)
|
| 34 |
+
train_pipeline = [
|
| 35 |
+
dict(type='LoadImageFromFile'),
|
| 36 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 37 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
|
| 38 |
+
dict(type='RandomFlip', prob=0),
|
| 39 |
+
dict(type='PhotoMetricDistortion'),
|
| 40 |
+
dict(
|
| 41 |
+
type='Normalize',
|
| 42 |
+
mean=[123.675, 116.28, 103.53],
|
| 43 |
+
std=[58.395, 57.12, 57.375],
|
| 44 |
+
to_rgb=True),
|
| 45 |
+
dict(type='DefaultFormatBundle'),
|
| 46 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 47 |
+
]
|
| 48 |
+
test_pipeline = [
|
| 49 |
+
dict(type='LoadImageFromFile'),
|
| 50 |
+
dict(
|
| 51 |
+
type='MultiScaleFlipAug',
|
| 52 |
+
img_scale=(1024, 1024),
|
| 53 |
+
img_ratios=[0.5],
|
| 54 |
+
flip=False,
|
| 55 |
+
transforms=[
|
| 56 |
+
dict(type='Resize', keep_ratio=True),
|
| 57 |
+
dict(type='RandomFlip', prob=0),
|
| 58 |
+
dict(
|
| 59 |
+
type='Normalize',
|
| 60 |
+
mean=[123.675, 116.28, 103.53],
|
| 61 |
+
std=[58.395, 57.12, 57.375],
|
| 62 |
+
to_rgb=True),
|
| 63 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 64 |
+
dict(type='Collect', keys=['img'])
|
| 65 |
+
])
|
| 66 |
+
]
|
| 67 |
+
data = dict(
|
| 68 |
+
samples_per_gpu=8,
|
| 69 |
+
workers_per_gpu=8,
|
| 70 |
+
train=dict(
|
| 71 |
+
type='RepeatDataset',
|
| 72 |
+
times=50,
|
| 73 |
+
dataset=dict(
|
| 74 |
+
type='CelebAMaskHQDataset',
|
| 75 |
+
data_root='./data/CelebAMaskHQ',
|
| 76 |
+
img_dir='CelebA-HQ-img/',
|
| 77 |
+
ann_dir='CelebA-HQ-mask/',
|
| 78 |
+
pipeline=[
|
| 79 |
+
dict(type='LoadImageFromFile'),
|
| 80 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 81 |
+
dict(
|
| 82 |
+
type='Resize',
|
| 83 |
+
img_scale=(512, 512),
|
| 84 |
+
ratio_range=(1.0, 1.0)),
|
| 85 |
+
dict(type='RandomFlip', prob=0),
|
| 86 |
+
dict(type='PhotoMetricDistortion'),
|
| 87 |
+
dict(
|
| 88 |
+
type='Normalize',
|
| 89 |
+
mean=[123.675, 116.28, 103.53],
|
| 90 |
+
std=[58.395, 57.12, 57.375],
|
| 91 |
+
to_rgb=True),
|
| 92 |
+
dict(type='DefaultFormatBundle'),
|
| 93 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 94 |
+
],
|
| 95 |
+
split='train_split.txt')),
|
| 96 |
+
val=dict(
|
| 97 |
+
type='CelebAMaskHQDataset',
|
| 98 |
+
data_root='./data/CelebAMaskHQ',
|
| 99 |
+
img_dir='CelebA-HQ-img/',
|
| 100 |
+
ann_dir='CelebA-HQ-mask/',
|
| 101 |
+
pipeline=[
|
| 102 |
+
dict(type='LoadImageFromFile'),
|
| 103 |
+
dict(
|
| 104 |
+
type='MultiScaleFlipAug',
|
| 105 |
+
img_scale=(1024, 1024),
|
| 106 |
+
img_ratios=[0.5],
|
| 107 |
+
flip=False,
|
| 108 |
+
transforms=[
|
| 109 |
+
dict(type='Resize', keep_ratio=True),
|
| 110 |
+
dict(type='RandomFlip', prob=0),
|
| 111 |
+
dict(
|
| 112 |
+
type='Normalize',
|
| 113 |
+
mean=[123.675, 116.28, 103.53],
|
| 114 |
+
std=[58.395, 57.12, 57.375],
|
| 115 |
+
to_rgb=True),
|
| 116 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 117 |
+
dict(type='Collect', keys=['img'])
|
| 118 |
+
])
|
| 119 |
+
],
|
| 120 |
+
split='val_split.txt'),
|
| 121 |
+
test=dict(
|
| 122 |
+
type='CelebAMaskHQDataset',
|
| 123 |
+
data_root='./data/CelebAMaskHQ',
|
| 124 |
+
img_dir='CelebA-HQ-img/',
|
| 125 |
+
ann_dir='CelebA-HQ-mask/',
|
| 126 |
+
pipeline=[
|
| 127 |
+
dict(type='LoadImageFromFile'),
|
| 128 |
+
dict(
|
| 129 |
+
type='MultiScaleFlipAug',
|
| 130 |
+
img_scale=(1024, 1024),
|
| 131 |
+
img_ratios=[0.5],
|
| 132 |
+
flip=False,
|
| 133 |
+
transforms=[
|
| 134 |
+
dict(type='Resize', keep_ratio=True),
|
| 135 |
+
dict(type='RandomFlip', prob=0),
|
| 136 |
+
dict(
|
| 137 |
+
type='Normalize',
|
| 138 |
+
mean=[123.675, 116.28, 103.53],
|
| 139 |
+
std=[58.395, 57.12, 57.375],
|
| 140 |
+
to_rgb=True),
|
| 141 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 142 |
+
dict(type='Collect', keys=['img'])
|
| 143 |
+
])
|
| 144 |
+
],
|
| 145 |
+
split='val_split.txt'))
|
| 146 |
+
log_config = dict(
|
| 147 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 148 |
+
dist_params = dict(backend='nccl')
|
| 149 |
+
log_level = 'INFO'
|
| 150 |
+
load_from = None
|
| 151 |
+
resume_from = None
|
| 152 |
+
workflow = [('train', 1)]
|
| 153 |
+
cudnn_benchmark = True
|
| 154 |
+
optimizer = dict(
|
| 155 |
+
type='AdamW',
|
| 156 |
+
lr=6e-05,
|
| 157 |
+
betas=(0.9, 0.999),
|
| 158 |
+
weight_decay=0.01,
|
| 159 |
+
paramwise_cfg=dict(
|
| 160 |
+
custom_keys=dict(
|
| 161 |
+
pos_block=dict(decay_mult=0.0),
|
| 162 |
+
norm=dict(decay_mult=0.0),
|
| 163 |
+
head=dict(lr_mult=10.0))))
|
| 164 |
+
optimizer_config = dict()
|
| 165 |
+
lr_config = dict(
|
| 166 |
+
policy='poly',
|
| 167 |
+
warmup='linear',
|
| 168 |
+
warmup_iters=1500,
|
| 169 |
+
warmup_ratio=1e-06,
|
| 170 |
+
power=1.0,
|
| 171 |
+
min_lr=0.0,
|
| 172 |
+
by_epoch=False)
|
| 173 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 174 |
+
checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
|
| 175 |
+
evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
|
| 176 |
+
find_unused_parameters = True
|
| 177 |
+
work_dir = './work_dirs/segnext.large.512x512.celebamaskhq.160k'
|
| 178 |
+
gpu_ids = [0]
|
| 179 |
+
auto_resume = False
|
pretrained_ckpts/face_parsing/segnext.small.512x512.celebamaskhq.160k.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MSCAN',
|
| 8 |
+
embed_dims=[64, 128, 320, 512],
|
| 9 |
+
mlp_ratios=[8, 8, 4, 4],
|
| 10 |
+
drop_rate=0.0,
|
| 11 |
+
drop_path_rate=0.1,
|
| 12 |
+
depths=[2, 2, 4, 2],
|
| 13 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 14 |
+
init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_s.pth')),
|
| 15 |
+
decode_head=dict(
|
| 16 |
+
type='LightHamHead',
|
| 17 |
+
in_channels=[128, 320, 512],
|
| 18 |
+
in_index=[1, 2, 3],
|
| 19 |
+
channels=256,
|
| 20 |
+
ham_channels=256,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=19,
|
| 23 |
+
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 27 |
+
ham_kwargs=dict(MD_R=16)),
|
| 28 |
+
train_cfg=dict(),
|
| 29 |
+
test_cfg=dict(mode='whole'))
|
| 30 |
+
dataset_type = 'CelebAMaskHQDataset'
|
| 31 |
+
data_root = './data/CelebAMaskHQ'
|
| 32 |
+
img_norm_cfg = dict(
|
| 33 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 34 |
+
crop_size = (512, 512)
|
| 35 |
+
train_pipeline = [
|
| 36 |
+
dict(type='LoadImageFromFile'),
|
| 37 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 38 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
|
| 39 |
+
dict(type='RandomFlip', prob=0),
|
| 40 |
+
dict(type='PhotoMetricDistortion'),
|
| 41 |
+
dict(
|
| 42 |
+
type='Normalize',
|
| 43 |
+
mean=[123.675, 116.28, 103.53],
|
| 44 |
+
std=[58.395, 57.12, 57.375],
|
| 45 |
+
to_rgb=True),
|
| 46 |
+
dict(type='DefaultFormatBundle'),
|
| 47 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 48 |
+
]
|
| 49 |
+
test_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(
|
| 52 |
+
type='MultiScaleFlipAug',
|
| 53 |
+
img_scale=(1024, 1024),
|
| 54 |
+
img_ratios=[0.5],
|
| 55 |
+
flip=False,
|
| 56 |
+
transforms=[
|
| 57 |
+
dict(type='Resize', keep_ratio=True),
|
| 58 |
+
dict(type='RandomFlip', prob=0),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[123.675, 116.28, 103.53],
|
| 62 |
+
std=[58.395, 57.12, 57.375],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 65 |
+
dict(type='Collect', keys=['img'])
|
| 66 |
+
])
|
| 67 |
+
]
|
| 68 |
+
data = dict(
|
| 69 |
+
samples_per_gpu=16,
|
| 70 |
+
workers_per_gpu=8,
|
| 71 |
+
train=dict(
|
| 72 |
+
type='RepeatDataset',
|
| 73 |
+
times=50,
|
| 74 |
+
dataset=dict(
|
| 75 |
+
type='CelebAMaskHQDataset',
|
| 76 |
+
data_root='./data/CelebAMaskHQ',
|
| 77 |
+
img_dir='CelebA-HQ-img/',
|
| 78 |
+
ann_dir='CelebA-HQ-mask/',
|
| 79 |
+
pipeline=[
|
| 80 |
+
dict(type='LoadImageFromFile'),
|
| 81 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 82 |
+
dict(
|
| 83 |
+
type='Resize',
|
| 84 |
+
img_scale=(512, 512),
|
| 85 |
+
ratio_range=(1.0, 1.0)),
|
| 86 |
+
dict(type='RandomFlip', prob=0),
|
| 87 |
+
dict(type='PhotoMetricDistortion'),
|
| 88 |
+
dict(
|
| 89 |
+
type='Normalize',
|
| 90 |
+
mean=[123.675, 116.28, 103.53],
|
| 91 |
+
std=[58.395, 57.12, 57.375],
|
| 92 |
+
to_rgb=True),
|
| 93 |
+
dict(type='DefaultFormatBundle'),
|
| 94 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 95 |
+
],
|
| 96 |
+
split='train_split.txt')),
|
| 97 |
+
val=dict(
|
| 98 |
+
type='CelebAMaskHQDataset',
|
| 99 |
+
data_root='./data/CelebAMaskHQ',
|
| 100 |
+
img_dir='CelebA-HQ-img/',
|
| 101 |
+
ann_dir='CelebA-HQ-mask/',
|
| 102 |
+
pipeline=[
|
| 103 |
+
dict(type='LoadImageFromFile'),
|
| 104 |
+
dict(
|
| 105 |
+
type='MultiScaleFlipAug',
|
| 106 |
+
img_scale=(1024, 1024),
|
| 107 |
+
img_ratios=[0.5],
|
| 108 |
+
flip=False,
|
| 109 |
+
transforms=[
|
| 110 |
+
dict(type='Resize', keep_ratio=True),
|
| 111 |
+
dict(type='RandomFlip', prob=0),
|
| 112 |
+
dict(
|
| 113 |
+
type='Normalize',
|
| 114 |
+
mean=[123.675, 116.28, 103.53],
|
| 115 |
+
std=[58.395, 57.12, 57.375],
|
| 116 |
+
to_rgb=True),
|
| 117 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 118 |
+
dict(type='Collect', keys=['img'])
|
| 119 |
+
])
|
| 120 |
+
],
|
| 121 |
+
split='val_split.txt'),
|
| 122 |
+
test=dict(
|
| 123 |
+
type='CelebAMaskHQDataset',
|
| 124 |
+
data_root='./data/CelebAMaskHQ',
|
| 125 |
+
img_dir='CelebA-HQ-img/',
|
| 126 |
+
ann_dir='CelebA-HQ-mask/',
|
| 127 |
+
pipeline=[
|
| 128 |
+
dict(type='LoadImageFromFile'),
|
| 129 |
+
dict(
|
| 130 |
+
type='MultiScaleFlipAug',
|
| 131 |
+
img_scale=(1024, 1024),
|
| 132 |
+
img_ratios=[0.5],
|
| 133 |
+
flip=False,
|
| 134 |
+
transforms=[
|
| 135 |
+
dict(type='Resize', keep_ratio=True),
|
| 136 |
+
dict(type='RandomFlip', prob=0),
|
| 137 |
+
dict(
|
| 138 |
+
type='Normalize',
|
| 139 |
+
mean=[123.675, 116.28, 103.53],
|
| 140 |
+
std=[58.395, 57.12, 57.375],
|
| 141 |
+
to_rgb=True),
|
| 142 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 143 |
+
dict(type='Collect', keys=['img'])
|
| 144 |
+
])
|
| 145 |
+
],
|
| 146 |
+
split='val_split.txt'))
|
| 147 |
+
log_config = dict(
|
| 148 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 149 |
+
dist_params = dict(backend='nccl')
|
| 150 |
+
log_level = 'INFO'
|
| 151 |
+
load_from = None
|
| 152 |
+
resume_from = None
|
| 153 |
+
workflow = [('train', 1)]
|
| 154 |
+
cudnn_benchmark = True
|
| 155 |
+
optimizer = dict(
|
| 156 |
+
type='AdamW',
|
| 157 |
+
lr=6e-05,
|
| 158 |
+
betas=(0.9, 0.999),
|
| 159 |
+
weight_decay=0.01,
|
| 160 |
+
paramwise_cfg=dict(
|
| 161 |
+
custom_keys=dict(
|
| 162 |
+
pos_block=dict(decay_mult=0.0),
|
| 163 |
+
norm=dict(decay_mult=0.0),
|
| 164 |
+
head=dict(lr_mult=10.0))))
|
| 165 |
+
optimizer_config = dict()
|
| 166 |
+
lr_config = dict(
|
| 167 |
+
policy='poly',
|
| 168 |
+
warmup='linear',
|
| 169 |
+
warmup_iters=1500,
|
| 170 |
+
warmup_ratio=1e-06,
|
| 171 |
+
power=1.0,
|
| 172 |
+
min_lr=0.0,
|
| 173 |
+
by_epoch=False)
|
| 174 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 175 |
+
checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
|
| 176 |
+
evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
|
| 177 |
+
find_unused_parameters = True
|
| 178 |
+
work_dir = './work_dirs/segnext.small.512x512.celebamaskhq.160k'
|
| 179 |
+
gpu_ids = [0]
|
| 180 |
+
auto_resume = False
|
pretrained_ckpts/face_parsing/segnext.small.best_mIoU_iter_140000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f87738b6b6f5dca82cc63298d3d625f81915a9c1ed3d0a359b8866b2b76b321
|
| 3 |
+
size 167259923
|
pretrained_ckpts/face_parsing/segnext.tiny.512x512.celebamaskhq.160k.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='MSCAN',
|
| 8 |
+
embed_dims=[32, 64, 160, 256],
|
| 9 |
+
mlp_ratios=[8, 8, 4, 4],
|
| 10 |
+
drop_rate=0.0,
|
| 11 |
+
drop_path_rate=0.1,
|
| 12 |
+
depths=[3, 3, 5, 2],
|
| 13 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 14 |
+
init_cfg=dict(type='Pretrained', checkpoint='pretrained/mscan_t.pth')),
|
| 15 |
+
decode_head=dict(
|
| 16 |
+
type='LightHamHead',
|
| 17 |
+
in_channels=[64, 160, 256],
|
| 18 |
+
in_index=[1, 2, 3],
|
| 19 |
+
channels=256,
|
| 20 |
+
ham_channels=256,
|
| 21 |
+
dropout_ratio=0.1,
|
| 22 |
+
num_classes=150,
|
| 23 |
+
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
|
| 24 |
+
align_corners=False,
|
| 25 |
+
loss_decode=dict(
|
| 26 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 27 |
+
ham_kwargs=dict(MD_R=16)),
|
| 28 |
+
train_cfg=dict(),
|
| 29 |
+
test_cfg=dict(mode='whole'))
|
| 30 |
+
dataset_type = 'CelebAMaskHQDataset'
|
| 31 |
+
data_root = './data/CelebAMaskHQ'
|
| 32 |
+
img_norm_cfg = dict(
|
| 33 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 34 |
+
crop_size = (512, 512)
|
| 35 |
+
train_pipeline = [
|
| 36 |
+
dict(type='LoadImageFromFile'),
|
| 37 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 38 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(1.0, 1.0)),
|
| 39 |
+
dict(type='RandomFlip', prob=0),
|
| 40 |
+
dict(type='PhotoMetricDistortion'),
|
| 41 |
+
dict(
|
| 42 |
+
type='Normalize',
|
| 43 |
+
mean=[123.675, 116.28, 103.53],
|
| 44 |
+
std=[58.395, 57.12, 57.375],
|
| 45 |
+
to_rgb=True),
|
| 46 |
+
dict(type='DefaultFormatBundle'),
|
| 47 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 48 |
+
]
|
| 49 |
+
test_pipeline = [
|
| 50 |
+
dict(type='LoadImageFromFile'),
|
| 51 |
+
dict(
|
| 52 |
+
type='MultiScaleFlipAug',
|
| 53 |
+
img_scale=(1024, 1024),
|
| 54 |
+
img_ratios=[0.5],
|
| 55 |
+
flip=False,
|
| 56 |
+
transforms=[
|
| 57 |
+
dict(type='Resize', keep_ratio=True),
|
| 58 |
+
dict(type='RandomFlip', prob=0),
|
| 59 |
+
dict(
|
| 60 |
+
type='Normalize',
|
| 61 |
+
mean=[123.675, 116.28, 103.53],
|
| 62 |
+
std=[58.395, 57.12, 57.375],
|
| 63 |
+
to_rgb=True),
|
| 64 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 65 |
+
dict(type='Collect', keys=['img'])
|
| 66 |
+
])
|
| 67 |
+
]
|
| 68 |
+
data = dict(
|
| 69 |
+
samples_per_gpu=16,
|
| 70 |
+
workers_per_gpu=8,
|
| 71 |
+
train=dict(
|
| 72 |
+
type='RepeatDataset',
|
| 73 |
+
times=50,
|
| 74 |
+
dataset=dict(
|
| 75 |
+
type='CelebAMaskHQDataset',
|
| 76 |
+
data_root='./data/CelebAMaskHQ',
|
| 77 |
+
img_dir='CelebA-HQ-img/',
|
| 78 |
+
ann_dir='CelebA-HQ-mask/',
|
| 79 |
+
pipeline=[
|
| 80 |
+
dict(type='LoadImageFromFile'),
|
| 81 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 82 |
+
dict(
|
| 83 |
+
type='Resize',
|
| 84 |
+
img_scale=(512, 512),
|
| 85 |
+
ratio_range=(1.0, 1.0)),
|
| 86 |
+
dict(type='RandomFlip', prob=0),
|
| 87 |
+
dict(type='PhotoMetricDistortion'),
|
| 88 |
+
dict(
|
| 89 |
+
type='Normalize',
|
| 90 |
+
mean=[123.675, 116.28, 103.53],
|
| 91 |
+
std=[58.395, 57.12, 57.375],
|
| 92 |
+
to_rgb=True),
|
| 93 |
+
dict(type='DefaultFormatBundle'),
|
| 94 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 95 |
+
],
|
| 96 |
+
split='train_split.txt')),
|
| 97 |
+
val=dict(
|
| 98 |
+
type='CelebAMaskHQDataset',
|
| 99 |
+
data_root='./data/CelebAMaskHQ',
|
| 100 |
+
img_dir='CelebA-HQ-img/',
|
| 101 |
+
ann_dir='CelebA-HQ-mask/',
|
| 102 |
+
pipeline=[
|
| 103 |
+
dict(type='LoadImageFromFile'),
|
| 104 |
+
dict(
|
| 105 |
+
type='MultiScaleFlipAug',
|
| 106 |
+
img_scale=(1024, 1024),
|
| 107 |
+
img_ratios=[0.5],
|
| 108 |
+
flip=False,
|
| 109 |
+
transforms=[
|
| 110 |
+
dict(type='Resize', keep_ratio=True),
|
| 111 |
+
dict(type='RandomFlip', prob=0),
|
| 112 |
+
dict(
|
| 113 |
+
type='Normalize',
|
| 114 |
+
mean=[123.675, 116.28, 103.53],
|
| 115 |
+
std=[58.395, 57.12, 57.375],
|
| 116 |
+
to_rgb=True),
|
| 117 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 118 |
+
dict(type='Collect', keys=['img'])
|
| 119 |
+
])
|
| 120 |
+
],
|
| 121 |
+
split='val_split.txt'),
|
| 122 |
+
test=dict(
|
| 123 |
+
type='CelebAMaskHQDataset',
|
| 124 |
+
data_root='./data/CelebAMaskHQ',
|
| 125 |
+
img_dir='CelebA-HQ-img/',
|
| 126 |
+
ann_dir='CelebA-HQ-mask/',
|
| 127 |
+
pipeline=[
|
| 128 |
+
dict(type='LoadImageFromFile'),
|
| 129 |
+
dict(
|
| 130 |
+
type='MultiScaleFlipAug',
|
| 131 |
+
img_scale=(1024, 1024),
|
| 132 |
+
img_ratios=[0.5],
|
| 133 |
+
flip=False,
|
| 134 |
+
transforms=[
|
| 135 |
+
dict(type='Resize', keep_ratio=True),
|
| 136 |
+
dict(type='RandomFlip', prob=0),
|
| 137 |
+
dict(
|
| 138 |
+
type='Normalize',
|
| 139 |
+
mean=[123.675, 116.28, 103.53],
|
| 140 |
+
std=[58.395, 57.12, 57.375],
|
| 141 |
+
to_rgb=True),
|
| 142 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 143 |
+
dict(type='Collect', keys=['img'])
|
| 144 |
+
])
|
| 145 |
+
],
|
| 146 |
+
split='val_split.txt'))
|
| 147 |
+
log_config = dict(
|
| 148 |
+
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
|
| 149 |
+
dist_params = dict(backend='nccl')
|
| 150 |
+
log_level = 'INFO'
|
| 151 |
+
load_from = None
|
| 152 |
+
resume_from = None
|
| 153 |
+
workflow = [('train', 1)]
|
| 154 |
+
cudnn_benchmark = True
|
| 155 |
+
optimizer = dict(
|
| 156 |
+
type='AdamW',
|
| 157 |
+
lr=6e-05,
|
| 158 |
+
betas=(0.9, 0.999),
|
| 159 |
+
weight_decay=0.01,
|
| 160 |
+
paramwise_cfg=dict(
|
| 161 |
+
custom_keys=dict(
|
| 162 |
+
pos_block=dict(decay_mult=0.0),
|
| 163 |
+
norm=dict(decay_mult=0.0),
|
| 164 |
+
head=dict(lr_mult=10.0))))
|
| 165 |
+
optimizer_config = dict()
|
| 166 |
+
lr_config = dict(
|
| 167 |
+
policy='poly',
|
| 168 |
+
warmup='linear',
|
| 169 |
+
warmup_iters=1500,
|
| 170 |
+
warmup_ratio=1e-06,
|
| 171 |
+
power=1.0,
|
| 172 |
+
min_lr=0.0,
|
| 173 |
+
by_epoch=False)
|
| 174 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 175 |
+
checkpoint_config = dict(by_epoch=False, interval=10000, max_keep_ckpts=5)
|
| 176 |
+
evaluation = dict(interval=10000, metric='mIoU', save_best='mIoU')
|
| 177 |
+
find_unused_parameters = True
|
| 178 |
+
work_dir = './work_dirs/segnext.tiny.512x512.celebamaskhq.160k'
|
| 179 |
+
gpu_ids = [0]
|
| 180 |
+
auto_resume = False
|
pretrained_ckpts/facevid2vid/00000189-checkpoint.pth.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fec00af84967380eb696cd836b948de98a0eddbc8de683101ab886de182789cd
|
| 3 |
+
size 2112477593
|
pretrained_ckpts/facevid2vid/vox-256.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_params:
|
| 2 |
+
root_dir: /zlh/VoxCeleb/first-order-256
|
| 3 |
+
frame_shape: [256, 256, 3]
|
| 4 |
+
id_sampling: True
|
| 5 |
+
pairs_list: None
|
| 6 |
+
augmentation_params:
|
| 7 |
+
flip_param:
|
| 8 |
+
horizontal_flip: True
|
| 9 |
+
time_flip: True
|
| 10 |
+
jitter_param:
|
| 11 |
+
brightness: 0.1
|
| 12 |
+
contrast: 0.1
|
| 13 |
+
saturation: 0.1
|
| 14 |
+
hue: 0.1
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
model_params:
|
| 18 |
+
common_params:
|
| 19 |
+
num_kp: 15
|
| 20 |
+
image_channel: 3
|
| 21 |
+
feature_channel: 32
|
| 22 |
+
estimate_jacobian: False # True
|
| 23 |
+
kp_detector_params:
|
| 24 |
+
temperature: 0.1
|
| 25 |
+
block_expansion: 32
|
| 26 |
+
max_features: 1024
|
| 27 |
+
scale_factor: 0.25 # 0.25
|
| 28 |
+
num_blocks: 5
|
| 29 |
+
reshape_channel: 16384 # 16384 = 1024 * 16
|
| 30 |
+
reshape_depth: 16
|
| 31 |
+
he_estimator_params:
|
| 32 |
+
block_expansion: 64
|
| 33 |
+
max_features: 2048
|
| 34 |
+
num_bins: 66
|
| 35 |
+
generator_params:
|
| 36 |
+
block_expansion: 64
|
| 37 |
+
max_features: 512
|
| 38 |
+
num_down_blocks: 2
|
| 39 |
+
reshape_channel: 32
|
| 40 |
+
reshape_depth: 16 # 512 = 32 * 16
|
| 41 |
+
num_resblocks: 6
|
| 42 |
+
estimate_occlusion_map: True
|
| 43 |
+
dense_motion_params:
|
| 44 |
+
block_expansion: 32
|
| 45 |
+
max_features: 1024
|
| 46 |
+
num_blocks: 5
|
| 47 |
+
# reshape_channel: 32
|
| 48 |
+
reshape_depth: 16
|
| 49 |
+
compress: 4
|
| 50 |
+
discriminator_params:
|
| 51 |
+
scales: [1]
|
| 52 |
+
block_expansion: 32
|
| 53 |
+
max_features: 512
|
| 54 |
+
num_blocks: 4
|
| 55 |
+
sn: True
|
| 56 |
+
|
| 57 |
+
train_params:
|
| 58 |
+
num_epochs: 300
|
| 59 |
+
num_repeats: 75
|
| 60 |
+
epoch_milestones: [180,]
|
| 61 |
+
lr_generator: 2.0e-4
|
| 62 |
+
lr_discriminator: 2.0e-4
|
| 63 |
+
lr_kp_detector: 2.0e-4
|
| 64 |
+
lr_he_estimator: 2.0e-4
|
| 65 |
+
gan_mode: 'hinge' # hinge or ls
|
| 66 |
+
batch_size: 32
|
| 67 |
+
scales: [1, 0.5, 0.25, 0.125]
|
| 68 |
+
checkpoint_freq: 10
|
| 69 |
+
hopenet_snapshot: '/zlh/Project/deep-head-pose/checkpoints/hopenet_robust_alpha1.pkl'
|
| 70 |
+
transform_params:
|
| 71 |
+
sigma_affine: 0.05
|
| 72 |
+
sigma_tps: 0.005
|
| 73 |
+
points_tps: 5
|
| 74 |
+
loss_weights:
|
| 75 |
+
generator_gan: 1
|
| 76 |
+
discriminator_gan: 1
|
| 77 |
+
feature_matching: [10, 10, 10, 10]
|
| 78 |
+
perceptual: [10, 10, 10, 10, 10]
|
| 79 |
+
equivariance_value: 10
|
| 80 |
+
equivariance_jacobian: 0 # 10
|
| 81 |
+
keypoint: 10
|
| 82 |
+
headpose: 20
|
| 83 |
+
expression: 5
|
| 84 |
+
|
| 85 |
+
visualizer_params:
|
| 86 |
+
kp_size: 5
|
| 87 |
+
draw_border: True
|
| 88 |
+
colormap: 'gist_rainbow'
|
pretrained_ckpts/gpen/fetch_gepn_models.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mkdir weights
|
| 2 |
+
|
| 3 |
+
wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/RetinaFace-R50.pth
|
| 4 |
+
wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/realesrnet_x4.pth
|
| 5 |
+
wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/GPEN-BFR-512.pth
|
| 6 |
+
wget https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/robin/models/ParseNet-latest.pth
|
| 7 |
+
|
| 8 |
+
mv *.pth ./weights
|
pretrained_ckpts/gpen/weights/GPEN-BFR-512.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1002c41add95b0decad69604d80455576f7187dd99ca16bd611bcfd44c10b51
|
| 3 |
+
size 284085738
|
pretrained_ckpts/gpen/weights/ParseNet-latest.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
|
| 3 |
+
size 85331193
|
pretrained_ckpts/gpen/weights/RetinaFace-R50.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
|
| 3 |
+
size 109497761
|
pretrained_ckpts/gpen/weights/realesrnet_x4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa00f09ad753d88576b21ed977e97d634976377031b178acc3b5b238df463400
|
| 3 |
+
size 67040989
|
pretrained_ckpts/put_ckpts_accordingly.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
please download the pre-trained models according to the Doc and place them in the corresponding folder
|
pretrained_ckpts/shape_predictor_68_face_landmarks.dat
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
| 3 |
+
size 99693937
|
pretrained_ckpts/stylegan2/stylegan2-ffhq-config-f.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bae494ef77e32a9cd1792a81a3c167692a0e64f6bcd8b06592ff42917e2ed46e
|
| 3 |
+
size 381462551
|