ak3385 commited on
Commit
5ec2d80
·
verified ·
1 Parent(s): 5a4e17c

Add files using upload-large-folder tool

Browse files
preprocess/output/video00002_img00002/example/negative/0001.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0002.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0003.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0004.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0005.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0006.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0007.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0008.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0009.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0010.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0011.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0012.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0013.jpg ADDED
preprocess/output/video00002_img00002/example/negative/0014.jpg ADDED
preprocess/output/video00002_img00002/example/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ A person performs a lively, rhythmic dance routine on a rooftop under a clear blue sky. Moving with energy and confidence, weight shifts from side to side while arms extend in expressive, flowing gestures—sometimes raised above the head, sometimes swept outward. Movements remain fluid and coordinated, emphasizing the upper body and legs as light steps keep a steady rhythm. Transitions between poses are smooth, with occasional pauses to gesture directly toward the camera with a thumbs-up, adding a playful, engaging element to the performance.
preprocess/pose/config/yolox_l_8xb8-300e_coco.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ img_scale = (640, 640) # width, height
2
+
3
+ # model settings
4
+ model = dict(
5
+ type='YOLOX',
6
+ data_preprocessor=dict(
7
+ type='DetDataPreprocessor',
8
+ pad_size_divisor=32,
9
+ batch_augments=[
10
+ dict(
11
+ type='BatchSyncRandomResize',
12
+ random_size_range=(480, 800),
13
+ size_divisor=32,
14
+ interval=10)
15
+ ]),
16
+ backbone=dict(
17
+ type='CSPDarknet',
18
+ deepen_factor=1.0,
19
+ widen_factor=1.0,
20
+ out_indices=(2, 3, 4),
21
+ use_depthwise=False,
22
+ spp_kernal_sizes=(5, 9, 13),
23
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
24
+ act_cfg=dict(type='Swish'),
25
+ ),
26
+ neck=dict(
27
+ type='YOLOXPAFPN',
28
+ in_channels=[256, 512, 1024],
29
+ out_channels=256,
30
+ num_csp_blocks=3,
31
+ use_depthwise=False,
32
+ upsample_cfg=dict(scale_factor=2, mode='nearest'),
33
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
34
+ act_cfg=dict(type='Swish')),
35
+ bbox_head=dict(
36
+ type='YOLOXHead',
37
+ num_classes=80,
38
+ in_channels=256,
39
+ feat_channels=256,
40
+ stacked_convs=2,
41
+ strides=(8, 16, 32),
42
+ use_depthwise=False,
43
+ norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
44
+ act_cfg=dict(type='Swish'),
45
+ loss_cls=dict(
46
+ type='CrossEntropyLoss',
47
+ use_sigmoid=True,
48
+ reduction='sum',
49
+ loss_weight=1.0),
50
+ loss_bbox=dict(
51
+ type='IoULoss',
52
+ mode='square',
53
+ eps=1e-16,
54
+ reduction='sum',
55
+ loss_weight=5.0),
56
+ loss_obj=dict(
57
+ type='CrossEntropyLoss',
58
+ use_sigmoid=True,
59
+ reduction='sum',
60
+ loss_weight=1.0),
61
+ loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
62
+ train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
63
+ # In order to align the source code, the threshold of the val phase is
64
+ # 0.01, and the threshold of the test phase is 0.001.
65
+ test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
66
+
67
+ # dataset settings
68
+ data_root = 'data/coco/'
69
+ dataset_type = 'CocoDataset'
70
+
71
+ # Example to use different file client
72
+ # Method 1: simply set the data root and let the file I/O module
73
+ # automatically infer from prefix (not support LMDB and Memcache yet)
74
+
75
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
76
+
77
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
78
+ # backend_args = dict(
79
+ # backend='petrel',
80
+ # path_mapping=dict({
81
+ # './data/': 's3://openmmlab/datasets/detection/',
82
+ # 'data/': 's3://openmmlab/datasets/detection/'
83
+ # }))
84
+ backend_args = None
85
+
86
+ train_pipeline = [
87
+ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
88
+ dict(
89
+ type='RandomAffine',
90
+ scaling_ratio_range=(0.1, 2),
91
+ # img_scale is (width, height)
92
+ border=(-img_scale[0] // 2, -img_scale[1] // 2)),
93
+ dict(
94
+ type='MixUp',
95
+ img_scale=img_scale,
96
+ ratio_range=(0.8, 1.6),
97
+ pad_val=114.0),
98
+ dict(type='YOLOXHSVRandomAug'),
99
+ dict(type='RandomFlip', prob=0.5),
100
+ # According to the official implementation, multi-scale
101
+ # training is not considered here but in the
102
+ # 'mmdet/models/detectors/yolox.py'.
103
+ # Resize and Pad are for the last 15 epochs when Mosaic,
104
+ # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
105
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
106
+ dict(
107
+ type='Pad',
108
+ pad_to_square=True,
109
+ # If the image is three-channel, the pad value needs
110
+ # to be set separately for each channel.
111
+ pad_val=dict(img=(114.0, 114.0, 114.0))),
112
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
113
+ dict(type='PackDetInputs')
114
+ ]
115
+
116
+ train_dataset = dict(
117
+ # use MultiImageMixDataset wrapper to support mosaic and mixup
118
+ type='MultiImageMixDataset',
119
+ dataset=dict(
120
+ type=dataset_type,
121
+ data_root=data_root,
122
+ ann_file='annotations/instances_train2017.json',
123
+ data_prefix=dict(img='train2017/'),
124
+ pipeline=[
125
+ dict(type='LoadImageFromFile', backend_args=backend_args),
126
+ dict(type='LoadAnnotations', with_bbox=True)
127
+ ],
128
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
129
+ backend_args=backend_args),
130
+ pipeline=train_pipeline)
131
+
132
+ test_pipeline = [
133
+ dict(type='LoadImageFromFile', backend_args=backend_args),
134
+ dict(type='Resize', scale=img_scale, keep_ratio=True),
135
+ dict(
136
+ type='Pad',
137
+ pad_to_square=True,
138
+ pad_val=dict(img=(114.0, 114.0, 114.0))),
139
+ dict(type='LoadAnnotations', with_bbox=True),
140
+ dict(
141
+ type='PackDetInputs',
142
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
143
+ 'scale_factor'))
144
+ ]
145
+
146
+ train_dataloader = dict(
147
+ batch_size=8,
148
+ num_workers=4,
149
+ persistent_workers=True,
150
+ sampler=dict(type='DefaultSampler', shuffle=True),
151
+ dataset=train_dataset)
152
+ val_dataloader = dict(
153
+ batch_size=8,
154
+ num_workers=4,
155
+ persistent_workers=True,
156
+ drop_last=False,
157
+ sampler=dict(type='DefaultSampler', shuffle=False),
158
+ dataset=dict(
159
+ type=dataset_type,
160
+ data_root=data_root,
161
+ ann_file='annotations/instances_val2017.json',
162
+ data_prefix=dict(img='val2017/'),
163
+ test_mode=True,
164
+ pipeline=test_pipeline,
165
+ backend_args=backend_args))
166
+ test_dataloader = val_dataloader
167
+
168
+ val_evaluator = dict(
169
+ type='CocoMetric',
170
+ ann_file=data_root + 'annotations/instances_val2017.json',
171
+ metric='bbox',
172
+ backend_args=backend_args)
173
+ test_evaluator = val_evaluator
174
+
175
+ # training settings
176
+ max_epochs = 300
177
+ num_last_epochs = 15
178
+ interval = 10
179
+
180
+ train_cfg = dict(max_epochs=max_epochs, val_interval=interval)
181
+
182
+ # optimizer
183
+ # default 8 gpu
184
+ base_lr = 0.01
185
+ optim_wrapper = dict(
186
+ type='OptimWrapper',
187
+ optimizer=dict(
188
+ type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
189
+ nesterov=True),
190
+ paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
191
+
192
+ # learning rate
193
+ param_scheduler = [
194
+ dict(
195
+ # use quadratic formula to warm up 5 epochs
196
+ # and lr is updated by iteration
197
+ # TODO: fix default scope in get function
198
+ type='mmdet.QuadraticWarmupLR',
199
+ by_epoch=True,
200
+ begin=0,
201
+ end=5,
202
+ convert_to_iter_based=True),
203
+ dict(
204
+ # use cosine lr from 5 to 285 epoch
205
+ type='CosineAnnealingLR',
206
+ eta_min=base_lr * 0.05,
207
+ begin=5,
208
+ T_max=max_epochs - num_last_epochs,
209
+ end=max_epochs - num_last_epochs,
210
+ by_epoch=True,
211
+ convert_to_iter_based=True),
212
+ dict(
213
+ # use fixed lr during last 15 epochs
214
+ type='ConstantLR',
215
+ by_epoch=True,
216
+ factor=1,
217
+ begin=max_epochs - num_last_epochs,
218
+ end=max_epochs,
219
+ )
220
+ ]
221
+
222
+ default_hooks = dict(
223
+ checkpoint=dict(
224
+ interval=interval,
225
+ max_keep_ckpts=3 # only keep latest 3 checkpoints
226
+ ))
227
+
228
+ custom_hooks = [
229
+ dict(
230
+ type='YOLOXModeSwitchHook',
231
+ num_last_epochs=num_last_epochs,
232
+ priority=48),
233
+ dict(type='SyncNormHook', priority=48),
234
+ dict(
235
+ type='EMAHook',
236
+ ema_type='ExpMomentumEMA',
237
+ momentum=0.0001,
238
+ update_buffers=True,
239
+ priority=49)
240
+ ]
241
+
242
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
243
+ # USER SHOULD NOT CHANGE ITS VALUES.
244
+ # base_batch_size = (8 GPUs) x (8 samples per GPU)
245
+ auto_scale_lr = dict(base_batch_size=64)
preprocess/pose/script/dwpose.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Openpose
2
+ # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3
+ # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4
+ # 3rd Edited by ControlNet
5
+ # 4th Edited by ControlNet (added face and correct hands)
6
+
7
+ import os
8
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
9
+
10
+ import cv2
11
+ import torch
12
+ import numpy as np
13
+ from PIL import Image
14
+
15
+
16
+ import pose.script.util as util
17
+
18
+ def resize_image(input_image, resolution):
19
+ H, W, C = input_image.shape
20
+ H = float(H)
21
+ W = float(W)
22
+ k = float(resolution) / min(H, W)
23
+ H *= k
24
+ W *= k
25
+ H = int(np.round(H / 64.0)) * 64
26
+ W = int(np.round(W / 64.0)) * 64
27
+ img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA) # img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
28
+ return img
29
+
30
+ def HWC3(x):
31
+ assert x.dtype == np.uint8
32
+ if x.ndim == 2:
33
+ x = x[:, :, None]
34
+ assert x.ndim == 3
35
+ H, W, C = x.shape
36
+ assert C == 1 or C == 3 or C == 4
37
+ if C == 3:
38
+ return x
39
+ if C == 1:
40
+ return np.concatenate([x, x, x], axis=2)
41
+ if C == 4:
42
+ color = x[:, :, 0:3].astype(np.float32)
43
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
44
+ y = color * alpha + 255.0 * (1.0 - alpha)
45
+ y = y.clip(0, 255).astype(np.uint8)
46
+ return y
47
+
48
+ def draw_pose(pose, H, W, draw_face):
49
+ bodies = pose['bodies']
50
+ faces = pose['faces']
51
+ hands = pose['hands']
52
+ candidate = bodies['candidate']
53
+ subset = bodies['subset']
54
+
55
+ # only the most significant person
56
+ faces = pose['faces'][:1]
57
+ hands = pose['hands'][:2]
58
+ candidate = bodies['candidate'][:18]
59
+ subset = bodies['subset'][:1]
60
+
61
+ # draw
62
+ canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
63
+ canvas = util.draw_bodypose(canvas, candidate, subset)
64
+ canvas = util.draw_handpose(canvas, hands)
65
+ if draw_face == True:
66
+ canvas = util.draw_facepose(canvas, faces)
67
+
68
+ return canvas
69
+
70
+ class DWposeDetector:
71
+ def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu", keypoints_only=False):
72
+ from pose.script.wholebody import Wholebody
73
+
74
+ self.pose_estimation = Wholebody(det_config, det_ckpt, pose_config, pose_ckpt, device)
75
+ self.keypoints_only = keypoints_only
76
+ def to(self, device):
77
+ self.pose_estimation.to(device)
78
+ return self
79
+ '''
80
+ detect_resolution: 短边resize到多少 这是 draw pose 时的原始渲染分辨率。建议1024
81
+ image_resolution: 短边resize到多少 这是 save pose 时的文件分辨率。建议768
82
+
83
+ 实际检测分辨率:
84
+ yolox: (640, 640)
85
+ dwpose:(288, 384)
86
+ '''
87
+
88
+ def __call__(self, input_image, detect_resolution=1024, image_resolution=768, output_type="pil", **kwargs):
89
+
90
+ input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)
91
+ # cv2.imshow('', input_image)
92
+ # cv2.waitKey(0)
93
+
94
+ input_image = HWC3(input_image)
95
+ input_image = resize_image(input_image, detect_resolution)
96
+ H, W, C = input_image.shape
97
+
98
+ with torch.no_grad():
99
+ candidate, subset = self.pose_estimation(input_image)
100
+ nums, keys, locs = candidate.shape
101
+ candidate[..., 0] /= float(W)
102
+ candidate[..., 1] /= float(H)
103
+ body = candidate[:,:18].copy()
104
+ body = body.reshape(nums*18, locs)
105
+ score = subset[:,:18]
106
+
107
+ for i in range(len(score)):
108
+ for j in range(len(score[i])):
109
+ if score[i][j] > 0.35: # if score[i][j] > 0.3:
110
+ score[i][j] = int(18*i+j)
111
+ else:
112
+ score[i][j] = -1
113
+
114
+ un_visible = subset<0.35 # un_visible = subset<0.3
115
+ candidate[un_visible] = -1
116
+
117
+ foot = candidate[:,18:24]
118
+
119
+ faces = candidate[:,24:92]
120
+
121
+ hands = candidate[:,92:113]
122
+ hands = np.vstack([hands, candidate[:,113:]])
123
+
124
+ bodies = dict(candidate=body, subset=score)
125
+ pose = dict(bodies=bodies, hands=hands, faces=faces)
126
+
127
+ if self.keypoints_only==True:
128
+ return pose
129
+ else:
130
+ detected_map = draw_pose(pose, H, W, draw_face=False)
131
+ detected_map = HWC3(detected_map)
132
+ img = resize_image(input_image, image_resolution)
133
+ H, W, C = img.shape
134
+ detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
135
+ # cv2.imshow('detected_map',detected_map)
136
+ # cv2.waitKey(0)
137
+
138
+ if output_type == "pil":
139
+ detected_map = cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB)
140
+ detected_map = Image.fromarray(detected_map)
141
+
142
+ return detected_map, pose
143
+
preprocess/pose/script/tool.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import os
3
+ import os.path as osp
4
+ import shutil
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import av
9
+ import numpy as np
10
+ import torch
11
+ import torchvision
12
+ from einops import rearrange
13
+ from PIL import Image
14
+
15
+
16
+ def seed_everything(seed):
17
+ import random
18
+
19
+ import numpy as np
20
+
21
+ torch.manual_seed(seed)
22
+ torch.cuda.manual_seed_all(seed)
23
+ np.random.seed(seed % (2**32))
24
+ random.seed(seed)
25
+
26
+
27
+ def import_filename(filename):
28
+ spec = importlib.util.spec_from_file_location("mymodule", filename)
29
+ module = importlib.util.module_from_spec(spec)
30
+ sys.modules[spec.name] = module
31
+ spec.loader.exec_module(module)
32
+ return module
33
+
34
+
35
+ def delete_additional_ckpt(base_path, num_keep):
36
+ dirs = []
37
+ for d in os.listdir(base_path):
38
+ if d.startswith("checkpoint-"):
39
+ dirs.append(d)
40
+ num_tot = len(dirs)
41
+ if num_tot <= num_keep:
42
+ return
43
+ # ensure ckpt is sorted and delete the ealier!
44
+ del_dirs = sorted(dirs, key=lambda x: int(x.split("-")[-1]))[: num_tot - num_keep]
45
+ for d in del_dirs:
46
+ path_to_dir = osp.join(base_path, d)
47
+ if osp.exists(path_to_dir):
48
+ shutil.rmtree(path_to_dir)
49
+
50
+
51
+ def save_videos_from_pil(pil_images, path, fps):
52
+
53
+ save_fmt = Path(path).suffix
54
+ os.makedirs(os.path.dirname(path), exist_ok=True)
55
+ width, height = pil_images[0].size
56
+
57
+ if save_fmt == ".mp4":
58
+ codec = "libx264"
59
+ container = av.open(path, "w")
60
+ stream = container.add_stream(codec, rate=fps)
61
+
62
+ stream.width = width
63
+ stream.height = height
64
+ stream.pix_fmt = 'yuv420p'
65
+ stream.bit_rate = 10000000
66
+ stream.options["crf"] = "18"
67
+
68
+ for pil_image in pil_images:
69
+ # pil_image = Image.fromarray(image_arr).convert("RGB")
70
+ av_frame = av.VideoFrame.from_image(pil_image)
71
+ container.mux(stream.encode(av_frame))
72
+ container.mux(stream.encode())
73
+ container.close()
74
+
75
+ elif save_fmt == ".gif":
76
+ pil_images[0].save(
77
+ fp=path,
78
+ format="GIF",
79
+ append_images=pil_images[1:],
80
+ save_all=True,
81
+ duration=(1 / fps * 1000),
82
+ loop=0,
83
+ )
84
+ else:
85
+ raise ValueError("Unsupported file type. Use .mp4 or .gif.")
86
+
87
+
88
+ def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, fps=8):
89
+ videos = rearrange(videos, "b c t h w -> t b c h w")
90
+ height, width = videos.shape[-2:]
91
+ outputs = []
92
+
93
+ for x in videos:
94
+ x = torchvision.utils.make_grid(x, nrow=n_rows) # (c h w)
95
+ x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) # (h w c)
96
+ if rescale:
97
+ x = (x + 1.0) / 2.0 # -1,1 -> 0,1
98
+ x = (x * 255).numpy().astype(np.uint8)
99
+ x = Image.fromarray(x)
100
+
101
+ outputs.append(x)
102
+
103
+ os.makedirs(os.path.dirname(path), exist_ok=True)
104
+
105
+ save_videos_from_pil(outputs, path, fps)
106
+
107
+
108
+ def read_frames(video_path):
109
+ container = av.open(video_path)
110
+
111
+ video_stream = next(s for s in container.streams if s.type == "video")
112
+ frames = []
113
+ for packet in container.demux(video_stream):
114
+ for frame in packet.decode():
115
+ image = Image.frombytes(
116
+ "RGB",
117
+ (frame.width, frame.height),
118
+ frame.to_rgb().to_ndarray(),
119
+ )
120
+ frames.append(image)
121
+
122
+ return frames
123
+
124
+
125
+ def get_fps(video_path):
126
+ container = av.open(video_path)
127
+ video_stream = next(s for s in container.streams if s.type == "video")
128
+ fps = video_stream.average_rate
129
+ container.close()
130
+ return fps
preprocess/pose/script/util.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import cv2
4
+
5
+
6
+ eps = 0.01
7
+
8
+ def smart_width(d):
9
+ if d<5:
10
+ return 1
11
+ elif d<10:
12
+ return 2
13
+ elif d<20:
14
+ return 3
15
+ elif d<40:
16
+ return 4
17
+ elif d<80:
18
+ return 5
19
+ elif d<160:
20
+ return 6
21
+ elif d<320:
22
+ return 7
23
+ else:
24
+ return 8
25
+
26
+
27
+
28
+ def draw_bodypose(canvas, candidate, subset):
29
+ H, W, C = canvas.shape
30
+ candidate = np.array(candidate)
31
+ subset = np.array(subset)
32
+
33
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
34
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
35
+ [1, 16], [16, 18], [3, 17], [6, 18]]
36
+
37
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
38
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
39
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
40
+
41
+ for i in range(17):
42
+ for n in range(len(subset)):
43
+ index = subset[n][np.array(limbSeq[i]) - 1]
44
+ if -1 in index:
45
+ continue
46
+ Y = candidate[index.astype(int), 0] * float(W)
47
+ X = candidate[index.astype(int), 1] * float(H)
48
+ mX = np.mean(X)
49
+ mY = np.mean(Y)
50
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
51
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
52
+
53
+ width = 4 # width = smart_width(length)
54
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), width), int(angle), 0, 360, 1)
55
+ cv2.fillConvexPoly(canvas, polygon, colors[i])
56
+
57
+ canvas = (canvas * 0.6).astype(np.uint8)
58
+
59
+ for i in range(18):
60
+ for n in range(len(subset)):
61
+ index = int(subset[n][i])
62
+ if index == -1:
63
+ continue
64
+ x, y = candidate[index][0:2]
65
+ x = int(x * W)
66
+ y = int(y * H)
67
+ radius = 4
68
+ cv2.circle(canvas, (int(x), int(y)), radius, colors[i], thickness=-1)
69
+
70
+ return canvas
71
+
72
+
73
+ def draw_handpose(canvas, all_hand_peaks):
74
+ import matplotlib
75
+
76
+ H, W, C = canvas.shape
77
+
78
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
79
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
80
+
81
+ # (person_number*2, 21, 2)
82
+ for i in range(len(all_hand_peaks)):
83
+ peaks = all_hand_peaks[i]
84
+ peaks = np.array(peaks)
85
+
86
+ for ie, e in enumerate(edges):
87
+
88
+ x1, y1 = peaks[e[0]]
89
+ x2, y2 = peaks[e[1]]
90
+
91
+ x1 = int(x1 * W)
92
+ y1 = int(y1 * H)
93
+ x2 = int(x2 * W)
94
+ y2 = int(y2 * H)
95
+ if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
96
+ length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
97
+ width = 2 # width = smart_width(length)
98
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=width)
99
+
100
+ for _, keyponit in enumerate(peaks):
101
+ x, y = keyponit
102
+
103
+ x = int(x * W)
104
+ y = int(y * H)
105
+ if x > eps and y > eps:
106
+ radius = 4 # radius = 3
107
+ cv2.circle(canvas, (x, y), radius, (0, 0, 255), thickness=-1)
108
+ return canvas
109
+
110
+
111
+ def draw_facepose(canvas, all_lmks):
112
+ H, W, C = canvas.shape
113
+ for lmks in all_lmks:
114
+ lmks = np.array(lmks)
115
+ for lmk in lmks:
116
+ x, y = lmk
117
+ x = int(x * W)
118
+ y = int(y * H)
119
+ if x > eps and y > eps:
120
+ radius = 3
121
+ cv2.circle(canvas, (x, y), radius, (255, 255, 255), thickness=-1)
122
+ return canvas
123
+
124
+
125
+
126
+
127
+ # Calculate the resolution
128
+ def size_calculate(h, w, resolution):
129
+
130
+ H = float(h)
131
+ W = float(w)
132
+
133
+ # resize the short edge to the resolution
134
+ k = float(resolution) / min(H, W) # short edge
135
+ H *= k
136
+ W *= k
137
+
138
+ # resize to the nearest integer multiple of 64
139
+ H = int(np.round(H / 64.0)) * 64
140
+ W = int(np.round(W / 64.0)) * 64
141
+ return H, W
142
+
143
+
144
+
145
+ def warpAffine_kps(kps, M):
146
+ a = M[:,:2]
147
+ t = M[:,2]
148
+ kps = np.dot(kps, a.T) + t
149
+ return kps
150
+
151
+
152
+
153
+
preprocess/pose/script/wholebody.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import os
3
+ import numpy as np
4
+ import warnings
5
+
6
+ try:
7
+ import mmcv
8
+ except ImportError:
9
+ warnings.warn(
10
+ "The module 'mmcv' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmcv>=2.0.1'"
11
+ )
12
+
13
+ try:
14
+ from mmpose.apis import inference_topdown
15
+ from mmpose.apis import init_model as init_pose_estimator
16
+ from mmpose.evaluation.functional import nms
17
+ from mmpose.utils import adapt_mmdet_pipeline
18
+ from mmpose.structures import merge_data_samples
19
+ except ImportError:
20
+ warnings.warn(
21
+ "The module 'mmpose' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmpose>=1.1.0'"
22
+ )
23
+
24
+ try:
25
+ from mmdet.apis import inference_detector, init_detector
26
+ except ImportError:
27
+ warnings.warn(
28
+ "The module 'mmdet' is not installed. The package will have limited functionality. Please install it using the command: mim install 'mmdet>=3.1.0'"
29
+ )
30
+
31
+
32
+ class Wholebody:
33
+ def __init__(self,
34
+ det_config=None, det_ckpt=None,
35
+ pose_config=None, pose_ckpt=None,
36
+ device="cpu"):
37
+
38
+ if det_config is None:
39
+ det_config = os.path.join(os.path.dirname(__file__), "yolox_config/yolox_l_8xb8-300e_coco.py")
40
+
41
+ if pose_config is None:
42
+ pose_config = os.path.join(os.path.dirname(__file__), "dwpose_config/dwpose-l_384x288.py")
43
+
44
+ if det_ckpt is None:
45
+ det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
46
+
47
+ if pose_ckpt is None:
48
+ pose_ckpt = "https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth"
49
+
50
+ # build detector
51
+ self.detector = init_detector(det_config, det_ckpt, device=device)
52
+ self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
53
+
54
+ # build pose estimator
55
+ self.pose_estimator = init_pose_estimator(
56
+ pose_config,
57
+ pose_ckpt,
58
+ device=device)
59
+
60
+ def to(self, device):
61
+ self.detector.to(device)
62
+ self.pose_estimator.to(device)
63
+ return self
64
+
65
+ def __call__(self, oriImg):
66
+ # predict bbox
67
+ det_result = inference_detector(self.detector, oriImg)
68
+ pred_instance = det_result.pred_instances.cpu().numpy()
69
+ bboxes = np.concatenate(
70
+ (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
71
+ bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
72
+ pred_instance.scores > 0.5)]
73
+
74
+ # set NMS threshold
75
+ bboxes = bboxes[nms(bboxes, 0.7), :4]
76
+
77
+ # predict keypoints
78
+ if len(bboxes) == 0:
79
+ pose_results = inference_topdown(self.pose_estimator, oriImg)
80
+ else:
81
+ pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
82
+ preds = merge_data_samples(pose_results)
83
+ preds = preds.pred_instances
84
+
85
+ # preds = pose_results[0].pred_instances
86
+ keypoints = preds.get('transformed_keypoints',
87
+ preds.keypoints)
88
+ if 'keypoint_scores' in preds:
89
+ scores = preds.keypoint_scores
90
+ else:
91
+ scores = np.ones(keypoints.shape[:-1])
92
+
93
+ if 'keypoints_visible' in preds:
94
+ visible = preds.keypoints_visible
95
+ else:
96
+ visible = np.ones(keypoints.shape[:-1])
97
+ keypoints_info = np.concatenate(
98
+ (keypoints, scores[..., None], visible[..., None]),
99
+ axis=-1)
100
+ # compute neck joint
101
+ neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
102
+ # neck score when visualizing pred
103
+ neck[:, 2:4] = np.logical_and(
104
+ keypoints_info[:, 5, 2:4] > 0.3,
105
+ keypoints_info[:, 6, 2:4] > 0.3).astype(int)
106
+ new_keypoints_info = np.insert(
107
+ keypoints_info, 17, neck, axis=1)
108
+ mmpose_idx = [
109
+ 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
110
+ ]
111
+ openpose_idx = [
112
+ 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
113
+ ]
114
+ new_keypoints_info[:, openpose_idx] = \
115
+ new_keypoints_info[:, mmpose_idx]
116
+ keypoints_info = new_keypoints_info
117
+
118
+ keypoints, scores, visible = keypoints_info[
119
+ ..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
120
+
121
+ return keypoints, scores