Add files using upload-large-folder tool
Browse files- preprocess/output/video00002_img00002/example/negative/0001.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0002.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0003.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0004.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0005.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0006.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0007.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0008.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0009.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0010.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0011.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0012.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0013.jpg +0 -0
- preprocess/output/video00002_img00002/example/negative/0014.jpg +0 -0
- preprocess/output/video00002_img00002/example/prompt.txt +1 -0
- preprocess/pose/config/yolox_l_8xb8-300e_coco.py +245 -0
- preprocess/pose/script/dwpose.py +143 -0
- preprocess/pose/script/tool.py +130 -0
- preprocess/pose/script/util.py +153 -0
- preprocess/pose/script/wholebody.py +121 -0
preprocess/output/video00002_img00002/example/negative/0001.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0002.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0003.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0004.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0005.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0006.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0007.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0008.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0009.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0010.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0011.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0012.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0013.jpg
ADDED
|
preprocess/output/video00002_img00002/example/negative/0014.jpg
ADDED
|
preprocess/output/video00002_img00002/example/prompt.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A person performs a lively, rhythmic dance routine on a rooftop under a clear blue sky. Moving with energy and confidence, weight shifts from side to side while arms extend in expressive, flowing gestures—sometimes raised above the head, sometimes swept outward. Movements remain fluid and coordinated, emphasizing the upper body and legs as light steps keep a steady rhythm. Transitions between poses are smooth, with occasional pauses to gesture directly toward the camera with a thumbs-up, adding a playful, engaging element to the performance.
|
preprocess/pose/config/yolox_l_8xb8-300e_coco.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
img_scale = (640, 640) # width, height
|
| 2 |
+
|
| 3 |
+
# model settings
|
| 4 |
+
model = dict(
|
| 5 |
+
type='YOLOX',
|
| 6 |
+
data_preprocessor=dict(
|
| 7 |
+
type='DetDataPreprocessor',
|
| 8 |
+
pad_size_divisor=32,
|
| 9 |
+
batch_augments=[
|
| 10 |
+
dict(
|
| 11 |
+
type='BatchSyncRandomResize',
|
| 12 |
+
random_size_range=(480, 800),
|
| 13 |
+
size_divisor=32,
|
| 14 |
+
interval=10)
|
| 15 |
+
]),
|
| 16 |
+
backbone=dict(
|
| 17 |
+
type='CSPDarknet',
|
| 18 |
+
deepen_factor=1.0,
|
| 19 |
+
widen_factor=1.0,
|
| 20 |
+
out_indices=(2, 3, 4),
|
| 21 |
+
use_depthwise=False,
|
| 22 |
+
spp_kernal_sizes=(5, 9, 13),
|
| 23 |
+
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
| 24 |
+
act_cfg=dict(type='Swish'),
|
| 25 |
+
),
|
| 26 |
+
neck=dict(
|
| 27 |
+
type='YOLOXPAFPN',
|
| 28 |
+
in_channels=[256, 512, 1024],
|
| 29 |
+
out_channels=256,
|
| 30 |
+
num_csp_blocks=3,
|
| 31 |
+
use_depthwise=False,
|
| 32 |
+
upsample_cfg=dict(scale_factor=2, mode='nearest'),
|
| 33 |
+
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
| 34 |
+
act_cfg=dict(type='Swish')),
|
| 35 |
+
bbox_head=dict(
|
| 36 |
+
type='YOLOXHead',
|
| 37 |
+
num_classes=80,
|
| 38 |
+
in_channels=256,
|
| 39 |
+
feat_channels=256,
|
| 40 |
+
stacked_convs=2,
|
| 41 |
+
strides=(8, 16, 32),
|
| 42 |
+
use_depthwise=False,
|
| 43 |
+
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
| 44 |
+
act_cfg=dict(type='Swish'),
|
| 45 |
+
loss_cls=dict(
|
| 46 |
+
type='CrossEntropyLoss',
|
| 47 |
+
use_sigmoid=True,
|
| 48 |
+
reduction='sum',
|
| 49 |
+
loss_weight=1.0),
|
| 50 |
+
loss_bbox=dict(
|
| 51 |
+
type='IoULoss',
|
| 52 |
+
mode='square',
|
| 53 |
+
eps=1e-16,
|
| 54 |
+
reduction='sum',
|
| 55 |
+
loss_weight=5.0),
|
| 56 |
+
loss_obj=dict(
|
| 57 |
+
type='CrossEntropyLoss',
|
| 58 |
+
use_sigmoid=True,
|
| 59 |
+
reduction='sum',
|
| 60 |
+
loss_weight=1.0),
|
| 61 |
+
loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
|
| 62 |
+
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
|
| 63 |
+
# In order to align the source code, the threshold of the val phase is
|
| 64 |
+
# 0.01, and the threshold of the test phase is 0.001.
|
| 65 |
+
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
|
| 66 |
+
|
| 67 |
+
# dataset settings
|
| 68 |
+
data_root = 'data/coco/'
|
| 69 |
+
dataset_type = 'CocoDataset'
|
| 70 |
+
|
| 71 |
+
# Example to use different file client
|
| 72 |
+
# Method 1: simply set the data root and let the file I/O module
|
| 73 |
+
# automatically infer from prefix (not support LMDB and Memcache yet)
|
| 74 |
+
|
| 75 |
+
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
| 76 |
+
|
| 77 |
+
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
| 78 |
+
# backend_args = dict(
|
| 79 |
+
# backend='petrel',
|
| 80 |
+
# path_mapping=dict({
|
| 81 |
+
# './data/': 's3://openmmlab/datasets/detection/',
|
| 82 |
+
# 'data/': 's3://openmmlab/datasets/detection/'
|
| 83 |
+
# }))
|
| 84 |
+
backend_args = None
|
| 85 |
+
|
| 86 |
+
train_pipeline = [
|
| 87 |
+
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
|
| 88 |
+
dict(
|
| 89 |
+
type='RandomAffine',
|
| 90 |
+
scaling_ratio_range=(0.1, 2),
|
| 91 |
+
# img_scale is (width, height)
|
| 92 |
+
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
| 93 |
+
dict(
|
| 94 |
+
type='MixUp',
|
| 95 |
+
img_scale=img_scale,
|
| 96 |
+
ratio_range=(0.8, 1.6),
|
| 97 |
+
pad_val=114.0),
|
| 98 |
+
dict(type='YOLOXHSVRandomAug'),
|
| 99 |
+
dict(type='RandomFlip', prob=0.5),
|
| 100 |
+
# According to the official implementation, multi-scale
|
| 101 |
+
# training is not considered here but in the
|
| 102 |
+
# 'mmdet/models/detectors/yolox.py'.
|
| 103 |
+
# Resize and Pad are for the last 15 epochs when Mosaic,
|
| 104 |
+
# RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
|
| 105 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
| 106 |
+
dict(
|
| 107 |
+
type='Pad',
|
| 108 |
+
pad_to_square=True,
|
| 109 |
+
# If the image is three-channel, the pad value needs
|
| 110 |
+
# to be set separately for each channel.
|
| 111 |
+
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
| 112 |
+
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
|
| 113 |
+
dict(type='PackDetInputs')
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
train_dataset = dict(
|
| 117 |
+
# use MultiImageMixDataset wrapper to support mosaic and mixup
|
| 118 |
+
type='MultiImageMixDataset',
|
| 119 |
+
dataset=dict(
|
| 120 |
+
type=dataset_type,
|
| 121 |
+
data_root=data_root,
|
| 122 |
+
ann_file='annotations/instances_train2017.json',
|
| 123 |
+
data_prefix=dict(img='train2017/'),
|
| 124 |
+
pipeline=[
|
| 125 |
+
dict(type='LoadImageFromFile', backend_args=backend_args),
|
| 126 |
+
dict(type='LoadAnnotations', with_bbox=True)
|
| 127 |
+
],
|
| 128 |
+
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
| 129 |
+
backend_args=backend_args),
|
| 130 |
+
pipeline=train_pipeline)
|
| 131 |
+
|
| 132 |
+
test_pipeline = [
|
| 133 |
+
dict(type='LoadImageFromFile', backend_args=backend_args),
|
| 134 |
+
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
| 135 |
+
dict(
|
| 136 |
+
type='Pad',
|
| 137 |
+
pad_to_square=True,
|
| 138 |
+
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
| 139 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 140 |
+
dict(
|
| 141 |
+
type='PackDetInputs',
|
| 142 |
+
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
| 143 |
+
'scale_factor'))
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
train_dataloader = dict(
|
| 147 |
+
batch_size=8,
|
| 148 |
+
num_workers=4,
|
| 149 |
+
persistent_workers=True,
|
| 150 |
+
sampler=dict(type='DefaultSampler', shuffle=True),
|
| 151 |
+
dataset=train_dataset)
|
| 152 |
+
val_dataloader = dict(
|
| 153 |
+
batch_size=8,
|
| 154 |
+
num_workers=4,
|
| 155 |
+
persistent_workers=True,
|
| 156 |
+
drop_last=False,
|
| 157 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
| 158 |
+
dataset=dict(
|
| 159 |
+
type=dataset_type,
|
| 160 |
+
data_root=data_root,
|
| 161 |
+
ann_file='annotations/instances_val2017.json',
|
| 162 |
+
data_prefix=dict(img='val2017/'),
|
| 163 |
+
test_mode=True,
|
| 164 |
+
pipeline=test_pipeline,
|
| 165 |
+
backend_args=backend_args))
|
| 166 |
+
test_dataloader = val_dataloader
|
| 167 |
+
|
| 168 |
+
val_evaluator = dict(
|
| 169 |
+
type='CocoMetric',
|
| 170 |
+
ann_file=data_root + 'annotations/instances_val2017.json',
|
| 171 |
+
metric='bbox',
|
| 172 |
+
backend_args=backend_args)
|
| 173 |
+
test_evaluator = val_evaluator
|
| 174 |
+
|
| 175 |
+
# training settings
|
| 176 |
+
max_epochs = 300
|
| 177 |
+
num_last_epochs = 15
|
| 178 |
+
interval = 10
|
| 179 |
+
|
| 180 |
+
train_cfg = dict(max_epochs=max_epochs, val_interval=interval)
|
| 181 |
+
|
| 182 |
+
# optimizer
|
| 183 |
+
# default 8 gpu
|
| 184 |
+
base_lr = 0.01
|
| 185 |
+
optim_wrapper = dict(
|
| 186 |
+
type='OptimWrapper',
|
| 187 |
+
optimizer=dict(
|
| 188 |
+
type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
|
| 189 |
+
nesterov=True),
|
| 190 |
+
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
|
| 191 |
+
|
| 192 |
+
# learning rate
|
| 193 |
+
param_scheduler = [
|
| 194 |
+
dict(
|
| 195 |
+
# use quadratic formula to warm up 5 epochs
|
| 196 |
+
# and lr is updated by iteration
|
| 197 |
+
# TODO: fix default scope in get function
|
| 198 |
+
type='mmdet.QuadraticWarmupLR',
|
| 199 |
+
by_epoch=True,
|
| 200 |
+
begin=0,
|
| 201 |
+
end=5,
|
| 202 |
+
convert_to_iter_based=True),
|
| 203 |
+
dict(
|
| 204 |
+
# use cosine lr from 5 to 285 epoch
|
| 205 |
+
type='CosineAnnealingLR',
|
| 206 |
+
eta_min=base_lr * 0.05,
|
| 207 |
+
begin=5,
|
| 208 |
+
T_max=max_epochs - num_last_epochs,
|
| 209 |
+
end=max_epochs - num_last_epochs,
|
| 210 |
+
by_epoch=True,
|
| 211 |
+
convert_to_iter_based=True),
|
| 212 |
+
dict(
|
| 213 |
+
# use fixed lr during last 15 epochs
|
| 214 |
+
type='ConstantLR',
|
| 215 |
+
by_epoch=True,
|
| 216 |
+
factor=1,
|
| 217 |
+
begin=max_epochs - num_last_epochs,
|
| 218 |
+
end=max_epochs,
|
| 219 |
+
)
|
| 220 |
+
]
|
| 221 |
+
|
| 222 |
+
default_hooks = dict(
|
| 223 |
+
checkpoint=dict(
|
| 224 |
+
interval=interval,
|
| 225 |
+
max_keep_ckpts=3 # only keep latest 3 checkpoints
|
| 226 |
+
))
|
| 227 |
+
|
| 228 |
+
custom_hooks = [
|
| 229 |
+
dict(
|
| 230 |
+
type='YOLOXModeSwitchHook',
|
| 231 |
+
num_last_epochs=num_last_epochs,
|
| 232 |
+
priority=48),
|
| 233 |
+
dict(type='SyncNormHook', priority=48),
|
| 234 |
+
dict(
|
| 235 |
+
type='EMAHook',
|
| 236 |
+
ema_type='ExpMomentumEMA',
|
| 237 |
+
momentum=0.0001,
|
| 238 |
+
update_buffers=True,
|
| 239 |
+
priority=49)
|
| 240 |
+
]
|
| 241 |
+
|
| 242 |
+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
| 243 |
+
# USER SHOULD NOT CHANGE ITS VALUES.
|
| 244 |
+
# base_batch_size = (8 GPUs) x (8 samples per GPU)
|
| 245 |
+
auto_scale_lr = dict(base_batch_size=64)
|
preprocess/pose/script/dwpose.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Openpose
|
| 2 |
+
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
| 3 |
+
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
|
| 4 |
+
# 3rd Edited by ControlNet
|
| 5 |
+
# 4th Edited by ControlNet (added face and correct hands)
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import torch
|
| 12 |
+
import numpy as np
|
| 13 |
+
from PIL import Image
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
import pose.script.util as util
|
| 17 |
+
|
| 18 |
+
def resize_image(input_image, resolution):
|
| 19 |
+
H, W, C = input_image.shape
|
| 20 |
+
H = float(H)
|
| 21 |
+
W = float(W)
|
| 22 |
+
k = float(resolution) / min(H, W)
|
| 23 |
+
H *= k
|
| 24 |
+
W *= k
|
| 25 |
+
H = int(np.round(H / 64.0)) * 64
|
| 26 |
+
W = int(np.round(W / 64.0)) * 64
|
| 27 |
+
img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA) # img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
|
| 28 |
+
return img
|
| 29 |
+
|
| 30 |
+
def HWC3(x):
|
| 31 |
+
assert x.dtype == np.uint8
|
| 32 |
+
if x.ndim == 2:
|
| 33 |
+
x = x[:, :, None]
|
| 34 |
+
assert x.ndim == 3
|
| 35 |
+
H, W, C = x.shape
|
| 36 |
+
assert C == 1 or C == 3 or C == 4
|
| 37 |
+
if C == 3:
|
| 38 |
+
return x
|
| 39 |
+
if C == 1:
|
| 40 |
+
return np.concatenate([x, x, x], axis=2)
|
| 41 |
+
if C == 4:
|
| 42 |
+
color = x[:, :, 0:3].astype(np.float32)
|
| 43 |
+
alpha = x[:, :, 3:4].astype(np.float32) / 255.0
|
| 44 |
+
y = color * alpha + 255.0 * (1.0 - alpha)
|
| 45 |
+
y = y.clip(0, 255).astype(np.uint8)
|
| 46 |
+
return y
|
| 47 |
+
|
| 48 |
+
def draw_pose(pose, H, W, draw_face):
|
| 49 |
+
bodies = pose['bodies']
|
| 50 |
+
faces = pose['faces']
|
| 51 |
+
hands = pose['hands']
|
| 52 |
+
candidate = bodies['candidate']
|
| 53 |
+
subset = bodies['subset']
|
| 54 |
+
|
| 55 |
+
# only the most significant person
|
| 56 |
+
faces = pose['faces'][:1]
|
| 57 |
+
hands = pose['hands'][:2]
|
| 58 |
+
candidate = bodies['candidate'][:18]
|
| 59 |
+
subset = bodies['subset'][:1]
|
| 60 |
+
|
| 61 |
+
# draw
|
| 62 |
+
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
|
| 63 |
+
canvas = util.draw_bodypose(canvas, candidate, subset)
|
| 64 |
+
canvas = util.draw_handpose(canvas, hands)
|
| 65 |
+
if draw_face == True:
|
| 66 |
+
canvas = util.draw_facepose(canvas, faces)
|
| 67 |
+
|
| 68 |
+
return canvas
|
| 69 |
+
|
| 70 |
+
class DWposeDetector:
|
| 71 |
+
def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu", keypoints_only=False):
|
| 72 |
+
from pose.script.wholebody import Wholebody
|
| 73 |
+
|
| 74 |
+
self.pose_estimation = Wholebody(det_config, det_ckpt, pose_config, pose_ckpt, device)
|
| 75 |
+
self.keypoints_only = keypoints_only
|
| 76 |
+
def to(self, device):
|
| 77 |
+
self.pose_estimation.to(device)
|
| 78 |
+
return self
|
| 79 |
+
'''
|
| 80 |
+
detect_resolution: 短边resize到多少 这是 draw pose 时的原始渲染分辨率。建议1024
|
| 81 |
+
image_resolution: 短边resize到多少 这是 save pose 时的文件分辨率。建议768
|
| 82 |
+
|
| 83 |
+
实际检测分辨率:
|
| 84 |
+
yolox: (640, 640)
|
| 85 |
+
dwpose:(288, 384)
|
| 86 |
+
'''
|
| 87 |
+
|
| 88 |
+
def __call__(self, input_image, detect_resolution=1024, image_resolution=768, output_type="pil", **kwargs):
|
| 89 |
+
|
| 90 |
+
input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)
|
| 91 |
+
# cv2.imshow('', input_image)
|
| 92 |
+
# cv2.waitKey(0)
|
| 93 |
+
|
| 94 |
+
input_image = HWC3(input_image)
|
| 95 |
+
input_image = resize_image(input_image, detect_resolution)
|
| 96 |
+
H, W, C = input_image.shape
|
| 97 |
+
|
| 98 |
+
with torch.no_grad():
|
| 99 |
+
candidate, subset = self.pose_estimation(input_image)
|
| 100 |
+
nums, keys, locs = candidate.shape
|
| 101 |
+
candidate[..., 0] /= float(W)
|
| 102 |
+
candidate[..., 1] /= float(H)
|
| 103 |
+
body = candidate[:,:18].copy()
|
| 104 |
+
body = body.reshape(nums*18, locs)
|
| 105 |
+
score = subset[:,:18]
|
| 106 |
+
|
| 107 |
+
for i in range(len(score)):
|
| 108 |
+
for j in range(len(score[i])):
|
| 109 |
+
if score[i][j] > 0.35: # if score[i][j] > 0.3:
|
| 110 |
+
score[i][j] = int(18*i+j)
|
| 111 |
+
else:
|
| 112 |
+
score[i][j] = -1
|
| 113 |
+
|
| 114 |
+
un_visible = subset<0.35 # un_visible = subset<0.3
|
| 115 |
+
candidate[un_visible] = -1
|
| 116 |
+
|
| 117 |
+
foot = candidate[:,18:24]
|
| 118 |
+
|
| 119 |
+
faces = candidate[:,24:92]
|
| 120 |
+
|
| 121 |
+
hands = candidate[:,92:113]
|
| 122 |
+
hands = np.vstack([hands, candidate[:,113:]])
|
| 123 |
+
|
| 124 |
+
bodies = dict(candidate=body, subset=score)
|
| 125 |
+
pose = dict(bodies=bodies, hands=hands, faces=faces)
|
| 126 |
+
|
| 127 |
+
if self.keypoints_only==True:
|
| 128 |
+
return pose
|
| 129 |
+
else:
|
| 130 |
+
detected_map = draw_pose(pose, H, W, draw_face=False)
|
| 131 |
+
detected_map = HWC3(detected_map)
|
| 132 |
+
img = resize_image(input_image, image_resolution)
|
| 133 |
+
H, W, C = img.shape
|
| 134 |
+
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
|
| 135 |
+
# cv2.imshow('detected_map',detected_map)
|
| 136 |
+
# cv2.waitKey(0)
|
| 137 |
+
|
| 138 |
+
if output_type == "pil":
|
| 139 |
+
detected_map = cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB)
|
| 140 |
+
detected_map = Image.fromarray(detected_map)
|
| 141 |
+
|
| 142 |
+
return detected_map, pose
|
| 143 |
+
|
preprocess/pose/script/tool.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
import os
|
| 3 |
+
import os.path as osp
|
| 4 |
+
import shutil
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import av
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
import torchvision
|
| 12 |
+
from einops import rearrange
|
| 13 |
+
from PIL import Image
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def seed_everything(seed):
|
| 17 |
+
import random
|
| 18 |
+
|
| 19 |
+
import numpy as np
|
| 20 |
+
|
| 21 |
+
torch.manual_seed(seed)
|
| 22 |
+
torch.cuda.manual_seed_all(seed)
|
| 23 |
+
np.random.seed(seed % (2**32))
|
| 24 |
+
random.seed(seed)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def import_filename(filename):
|
| 28 |
+
spec = importlib.util.spec_from_file_location("mymodule", filename)
|
| 29 |
+
module = importlib.util.module_from_spec(spec)
|
| 30 |
+
sys.modules[spec.name] = module
|
| 31 |
+
spec.loader.exec_module(module)
|
| 32 |
+
return module
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def delete_additional_ckpt(base_path, num_keep):
|
| 36 |
+
dirs = []
|
| 37 |
+
for d in os.listdir(base_path):
|
| 38 |
+
if d.startswith("checkpoint-"):
|
| 39 |
+
dirs.append(d)
|
| 40 |
+
num_tot = len(dirs)
|
| 41 |
+
if num_tot <= num_keep:
|
| 42 |
+
return
|
| 43 |
+
# ensure ckpt is sorted and delete the ealier!
|
| 44 |
+
del_dirs = sorted(dirs, key=lambda x: int(x.split("-")[-1]))[: num_tot - num_keep]
|
| 45 |
+
for d in del_dirs:
|
| 46 |
+
path_to_dir = osp.join(base_path, d)
|
| 47 |
+
if osp.exists(path_to_dir):
|
| 48 |
+
shutil.rmtree(path_to_dir)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def save_videos_from_pil(pil_images, path, fps):
|
| 52 |
+
|
| 53 |
+
save_fmt = Path(path).suffix
|
| 54 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 55 |
+
width, height = pil_images[0].size
|
| 56 |
+
|
| 57 |
+
if save_fmt == ".mp4":
|
| 58 |
+
codec = "libx264"
|
| 59 |
+
container = av.open(path, "w")
|
| 60 |
+
stream = container.add_stream(codec, rate=fps)
|
| 61 |
+
|
| 62 |
+
stream.width = width
|
| 63 |
+
stream.height = height
|
| 64 |
+
stream.pix_fmt = 'yuv420p'
|
| 65 |
+
stream.bit_rate = 10000000
|
| 66 |
+
stream.options["crf"] = "18"
|
| 67 |
+
|
| 68 |
+
for pil_image in pil_images:
|
| 69 |
+
# pil_image = Image.fromarray(image_arr).convert("RGB")
|
| 70 |
+
av_frame = av.VideoFrame.from_image(pil_image)
|
| 71 |
+
container.mux(stream.encode(av_frame))
|
| 72 |
+
container.mux(stream.encode())
|
| 73 |
+
container.close()
|
| 74 |
+
|
| 75 |
+
elif save_fmt == ".gif":
|
| 76 |
+
pil_images[0].save(
|
| 77 |
+
fp=path,
|
| 78 |
+
format="GIF",
|
| 79 |
+
append_images=pil_images[1:],
|
| 80 |
+
save_all=True,
|
| 81 |
+
duration=(1 / fps * 1000),
|
| 82 |
+
loop=0,
|
| 83 |
+
)
|
| 84 |
+
else:
|
| 85 |
+
raise ValueError("Unsupported file type. Use .mp4 or .gif.")
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, fps=8):
|
| 89 |
+
videos = rearrange(videos, "b c t h w -> t b c h w")
|
| 90 |
+
height, width = videos.shape[-2:]
|
| 91 |
+
outputs = []
|
| 92 |
+
|
| 93 |
+
for x in videos:
|
| 94 |
+
x = torchvision.utils.make_grid(x, nrow=n_rows) # (c h w)
|
| 95 |
+
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) # (h w c)
|
| 96 |
+
if rescale:
|
| 97 |
+
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
|
| 98 |
+
x = (x * 255).numpy().astype(np.uint8)
|
| 99 |
+
x = Image.fromarray(x)
|
| 100 |
+
|
| 101 |
+
outputs.append(x)
|
| 102 |
+
|
| 103 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 104 |
+
|
| 105 |
+
save_videos_from_pil(outputs, path, fps)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def read_frames(video_path):
|
| 109 |
+
container = av.open(video_path)
|
| 110 |
+
|
| 111 |
+
video_stream = next(s for s in container.streams if s.type == "video")
|
| 112 |
+
frames = []
|
| 113 |
+
for packet in container.demux(video_stream):
|
| 114 |
+
for frame in packet.decode():
|
| 115 |
+
image = Image.frombytes(
|
| 116 |
+
"RGB",
|
| 117 |
+
(frame.width, frame.height),
|
| 118 |
+
frame.to_rgb().to_ndarray(),
|
| 119 |
+
)
|
| 120 |
+
frames.append(image)
|
| 121 |
+
|
| 122 |
+
return frames
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def get_fps(video_path):
|
| 126 |
+
container = av.open(video_path)
|
| 127 |
+
video_stream = next(s for s in container.streams if s.type == "video")
|
| 128 |
+
fps = video_stream.average_rate
|
| 129 |
+
container.close()
|
| 130 |
+
return fps
|
preprocess/pose/script/util.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
eps = 0.01
|
| 7 |
+
|
| 8 |
+
def smart_width(d):
|
| 9 |
+
if d<5:
|
| 10 |
+
return 1
|
| 11 |
+
elif d<10:
|
| 12 |
+
return 2
|
| 13 |
+
elif d<20:
|
| 14 |
+
return 3
|
| 15 |
+
elif d<40:
|
| 16 |
+
return 4
|
| 17 |
+
elif d<80:
|
| 18 |
+
return 5
|
| 19 |
+
elif d<160:
|
| 20 |
+
return 6
|
| 21 |
+
elif d<320:
|
| 22 |
+
return 7
|
| 23 |
+
else:
|
| 24 |
+
return 8
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def draw_bodypose(canvas, candidate, subset):
|
| 29 |
+
H, W, C = canvas.shape
|
| 30 |
+
candidate = np.array(candidate)
|
| 31 |
+
subset = np.array(subset)
|
| 32 |
+
|
| 33 |
+
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
|
| 34 |
+
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
|
| 35 |
+
[1, 16], [16, 18], [3, 17], [6, 18]]
|
| 36 |
+
|
| 37 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
| 38 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
| 39 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
| 40 |
+
|
| 41 |
+
for i in range(17):
|
| 42 |
+
for n in range(len(subset)):
|
| 43 |
+
index = subset[n][np.array(limbSeq[i]) - 1]
|
| 44 |
+
if -1 in index:
|
| 45 |
+
continue
|
| 46 |
+
Y = candidate[index.astype(int), 0] * float(W)
|
| 47 |
+
X = candidate[index.astype(int), 1] * float(H)
|
| 48 |
+
mX = np.mean(X)
|
| 49 |
+
mY = np.mean(Y)
|
| 50 |
+
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
| 51 |
+
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
| 52 |
+
|
| 53 |
+
width = 4 # width = smart_width(length)
|
| 54 |
+
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), width), int(angle), 0, 360, 1)
|
| 55 |
+
cv2.fillConvexPoly(canvas, polygon, colors[i])
|
| 56 |
+
|
| 57 |
+
canvas = (canvas * 0.6).astype(np.uint8)
|
| 58 |
+
|
| 59 |
+
for i in range(18):
|
| 60 |
+
for n in range(len(subset)):
|
| 61 |
+
index = int(subset[n][i])
|
| 62 |
+
if index == -1:
|
| 63 |
+
continue
|
| 64 |
+
x, y = candidate[index][0:2]
|
| 65 |
+
x = int(x * W)
|
| 66 |
+
y = int(y * H)
|
| 67 |
+
radius = 4
|
| 68 |
+
cv2.circle(canvas, (int(x), int(y)), radius, colors[i], thickness=-1)
|
| 69 |
+
|
| 70 |
+
return canvas
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def draw_handpose(canvas, all_hand_peaks):
|
| 74 |
+
import matplotlib
|
| 75 |
+
|
| 76 |
+
H, W, C = canvas.shape
|
| 77 |
+
|
| 78 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
| 79 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
| 80 |
+
|
| 81 |
+
# (person_number*2, 21, 2)
|
| 82 |
+
for i in range(len(all_hand_peaks)):
|
| 83 |
+
peaks = all_hand_peaks[i]
|
| 84 |
+
peaks = np.array(peaks)
|
| 85 |
+
|
| 86 |
+
for ie, e in enumerate(edges):
|
| 87 |
+
|
| 88 |
+
x1, y1 = peaks[e[0]]
|
| 89 |
+
x2, y2 = peaks[e[1]]
|
| 90 |
+
|
| 91 |
+
x1 = int(x1 * W)
|
| 92 |
+
y1 = int(y1 * H)
|
| 93 |
+
x2 = int(x2 * W)
|
| 94 |
+
y2 = int(y2 * H)
|
| 95 |
+
if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
|
| 96 |
+
length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
|
| 97 |
+
width = 2 # width = smart_width(length)
|
| 98 |
+
cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=width)
|
| 99 |
+
|
| 100 |
+
for _, keyponit in enumerate(peaks):
|
| 101 |
+
x, y = keyponit
|
| 102 |
+
|
| 103 |
+
x = int(x * W)
|
| 104 |
+
y = int(y * H)
|
| 105 |
+
if x > eps and y > eps:
|
| 106 |
+
radius = 4 # radius = 3
|
| 107 |
+
cv2.circle(canvas, (x, y), radius, (0, 0, 255), thickness=-1)
|
| 108 |
+
return canvas
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def draw_facepose(canvas, all_lmks):
|
| 112 |
+
H, W, C = canvas.shape
|
| 113 |
+
for lmks in all_lmks:
|
| 114 |
+
lmks = np.array(lmks)
|
| 115 |
+
for lmk in lmks:
|
| 116 |
+
x, y = lmk
|
| 117 |
+
x = int(x * W)
|
| 118 |
+
y = int(y * H)
|
| 119 |
+
if x > eps and y > eps:
|
| 120 |
+
radius = 3
|
| 121 |
+
cv2.circle(canvas, (x, y), radius, (255, 255, 255), thickness=-1)
|
| 122 |
+
return canvas
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
# Calculate the resolution
|
| 128 |
+
def size_calculate(h, w, resolution):
|
| 129 |
+
|
| 130 |
+
H = float(h)
|
| 131 |
+
W = float(w)
|
| 132 |
+
|
| 133 |
+
# resize the short edge to the resolution
|
| 134 |
+
k = float(resolution) / min(H, W) # short edge
|
| 135 |
+
H *= k
|
| 136 |
+
W *= k
|
| 137 |
+
|
| 138 |
+
# resize to the nearest integer multiple of 64
|
| 139 |
+
H = int(np.round(H / 64.0)) * 64
|
| 140 |
+
W = int(np.round(W / 64.0)) * 64
|
| 141 |
+
return H, W
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def warpAffine_kps(kps, M):
|
| 146 |
+
a = M[:,:2]
|
| 147 |
+
t = M[:,2]
|
| 148 |
+
kps = np.dot(kps, a.T) + t
|
| 149 |
+
return kps
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
|
preprocess/pose/script/wholebody.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import os
|
| 3 |
+
import numpy as np
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
import mmcv
|
| 8 |
+
except ImportError:
|
| 9 |
+
warnings.warn(
|
| 10 |
+
"The module 'mmcv' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmcv>=2.0.1'"
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
from mmpose.apis import inference_topdown
|
| 15 |
+
from mmpose.apis import init_model as init_pose_estimator
|
| 16 |
+
from mmpose.evaluation.functional import nms
|
| 17 |
+
from mmpose.utils import adapt_mmdet_pipeline
|
| 18 |
+
from mmpose.structures import merge_data_samples
|
| 19 |
+
except ImportError:
|
| 20 |
+
warnings.warn(
|
| 21 |
+
"The module 'mmpose' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmpose>=1.1.0'"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
from mmdet.apis import inference_detector, init_detector
|
| 26 |
+
except ImportError:
|
| 27 |
+
warnings.warn(
|
| 28 |
+
"The module 'mmdet' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmdet>=3.1.0'"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class Wholebody:
|
| 33 |
+
def __init__(self,
|
| 34 |
+
det_config=None, det_ckpt=None,
|
| 35 |
+
pose_config=None, pose_ckpt=None,
|
| 36 |
+
device="cpu"):
|
| 37 |
+
|
| 38 |
+
if det_config is None:
|
| 39 |
+
det_config = os.path.join(os.path.dirname(__file__), "yolox_config/yolox_l_8xb8-300e_coco.py")
|
| 40 |
+
|
| 41 |
+
if pose_config is None:
|
| 42 |
+
pose_config = os.path.join(os.path.dirname(__file__), "dwpose_config/dwpose-l_384x288.py")
|
| 43 |
+
|
| 44 |
+
if det_ckpt is None:
|
| 45 |
+
det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
|
| 46 |
+
|
| 47 |
+
if pose_ckpt is None:
|
| 48 |
+
pose_ckpt = "https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth"
|
| 49 |
+
|
| 50 |
+
# build detector
|
| 51 |
+
self.detector = init_detector(det_config, det_ckpt, device=device)
|
| 52 |
+
self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
|
| 53 |
+
|
| 54 |
+
# build pose estimator
|
| 55 |
+
self.pose_estimator = init_pose_estimator(
|
| 56 |
+
pose_config,
|
| 57 |
+
pose_ckpt,
|
| 58 |
+
device=device)
|
| 59 |
+
|
| 60 |
+
def to(self, device):
|
| 61 |
+
self.detector.to(device)
|
| 62 |
+
self.pose_estimator.to(device)
|
| 63 |
+
return self
|
| 64 |
+
|
| 65 |
+
def __call__(self, oriImg):
|
| 66 |
+
# predict bbox
|
| 67 |
+
det_result = inference_detector(self.detector, oriImg)
|
| 68 |
+
pred_instance = det_result.pred_instances.cpu().numpy()
|
| 69 |
+
bboxes = np.concatenate(
|
| 70 |
+
(pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
|
| 71 |
+
bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
|
| 72 |
+
pred_instance.scores > 0.5)]
|
| 73 |
+
|
| 74 |
+
# set NMS threshold
|
| 75 |
+
bboxes = bboxes[nms(bboxes, 0.7), :4]
|
| 76 |
+
|
| 77 |
+
# predict keypoints
|
| 78 |
+
if len(bboxes) == 0:
|
| 79 |
+
pose_results = inference_topdown(self.pose_estimator, oriImg)
|
| 80 |
+
else:
|
| 81 |
+
pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
|
| 82 |
+
preds = merge_data_samples(pose_results)
|
| 83 |
+
preds = preds.pred_instances
|
| 84 |
+
|
| 85 |
+
# preds = pose_results[0].pred_instances
|
| 86 |
+
keypoints = preds.get('transformed_keypoints',
|
| 87 |
+
preds.keypoints)
|
| 88 |
+
if 'keypoint_scores' in preds:
|
| 89 |
+
scores = preds.keypoint_scores
|
| 90 |
+
else:
|
| 91 |
+
scores = np.ones(keypoints.shape[:-1])
|
| 92 |
+
|
| 93 |
+
if 'keypoints_visible' in preds:
|
| 94 |
+
visible = preds.keypoints_visible
|
| 95 |
+
else:
|
| 96 |
+
visible = np.ones(keypoints.shape[:-1])
|
| 97 |
+
keypoints_info = np.concatenate(
|
| 98 |
+
(keypoints, scores[..., None], visible[..., None]),
|
| 99 |
+
axis=-1)
|
| 100 |
+
# compute neck joint
|
| 101 |
+
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
|
| 102 |
+
# neck score when visualizing pred
|
| 103 |
+
neck[:, 2:4] = np.logical_and(
|
| 104 |
+
keypoints_info[:, 5, 2:4] > 0.3,
|
| 105 |
+
keypoints_info[:, 6, 2:4] > 0.3).astype(int)
|
| 106 |
+
new_keypoints_info = np.insert(
|
| 107 |
+
keypoints_info, 17, neck, axis=1)
|
| 108 |
+
mmpose_idx = [
|
| 109 |
+
17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
|
| 110 |
+
]
|
| 111 |
+
openpose_idx = [
|
| 112 |
+
1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
|
| 113 |
+
]
|
| 114 |
+
new_keypoints_info[:, openpose_idx] = \
|
| 115 |
+
new_keypoints_info[:, mmpose_idx]
|
| 116 |
+
keypoints_info = new_keypoints_info
|
| 117 |
+
|
| 118 |
+
keypoints, scores, visible = keypoints_info[
|
| 119 |
+
..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
|
| 120 |
+
|
| 121 |
+
return keypoints, scores
|