wangyh6 commited on
Commit
3c53ec2
·
verified ·
1 Parent(s): 1c33d49

Upload BlazeFace

Browse files
Files changed (5) hide show
  1. DCU_CONFIG.py +10 -0
  2. DCU_MODEL.py +483 -0
  3. README.md +199 -0
  4. config.json +11 -0
  5. model.safetensors +3 -0
DCU_CONFIG.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+ from typing import List
3
+
4
+
5
+ class DcuConfig(PretrainedConfig):
6
+ def __init__(
7
+ self,
8
+ **kwargs,
9
+ ):
10
+ super().__init__(**kwargs)
DCU_MODEL.py ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from transformers import PreTrainedModel
6
+
7
+ class BlazeBlock(nn.Module):
8
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
9
+ super(BlazeBlock, self).__init__()
10
+
11
+ self.stride = stride
12
+ self.channel_pad = out_channels - in_channels
13
+
14
+ # TFLite uses slightly different padding than PyTorch
15
+ # on the depthwise conv layer when the stride is 2.
16
+ if stride == 2:
17
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
18
+ padding = 0
19
+ else:
20
+ padding = (kernel_size - 1) // 2
21
+
22
+ self.convs = nn.Sequential(
23
+ nn.Conv2d(in_channels=in_channels, out_channels=in_channels,
24
+ kernel_size=kernel_size, stride=stride, padding=padding,
25
+ groups=in_channels, bias=True),
26
+ nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
27
+ kernel_size=1, stride=1, padding=0, bias=True),
28
+ )
29
+
30
+ self.act = nn.ReLU(inplace=True)
31
+
32
+ def forward(self, x):
33
+ if self.stride == 2:
34
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
35
+ x = self.max_pool(x)
36
+ else:
37
+ h = x
38
+
39
+ if self.channel_pad > 0:
40
+ x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0)
41
+
42
+ return self.act(self.convs(h) + x)
43
+
44
+
45
+ class FinalBlazeBlock(nn.Module):
46
+ def __init__(self, channels, kernel_size=3):
47
+ super(FinalBlazeBlock, self).__init__()
48
+ # TFLite uses slightly different padding than PyTorch
49
+ # on the depthwise conv layer when the stride is 2.
50
+ self.convs = nn.Sequential(
51
+ nn.Conv2d(in_channels=channels, out_channels=channels,
52
+ kernel_size=kernel_size, stride=2, padding=0,
53
+ groups=channels, bias=True),
54
+ nn.Conv2d(in_channels=channels, out_channels=channels,
55
+ kernel_size=1, stride=1, padding=0, bias=True),
56
+ )
57
+
58
+ self.act = nn.ReLU(inplace=True)
59
+
60
+ def forward(self, x):
61
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
62
+
63
+ return self.act(self.convs(h))
64
+
65
+
66
+ class BlazeFace(PreTrainedModel):
67
+ """The BlazeFace face detection model from MediaPipe.
68
+
69
+ The version from MediaPipe is simpler than the one in the paper;
70
+ it does not use the "double" BlazeBlocks.
71
+
72
+ Because we won't be training this model, it doesn't need to have
73
+ batchnorm layers. These have already been "folded" into the conv
74
+ weights by TFLite.
75
+
76
+ The conversion to PyTorch is fairly straightforward, but there are
77
+ some small differences between TFLite and PyTorch in how they handle
78
+ padding on conv layers with stride 2.
79
+
80
+ This version works on batches, while the MediaPipe version can only
81
+ handle a single image at a time.
82
+
83
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
84
+ https://github.com/google/mediapipe/
85
+ """
86
+
87
+ def __init__(self, config, back_model=False):
88
+ super(BlazeFace, self).__init__(config)
89
+ # super().__init__(config)
90
+ # These are the settings from the MediaPipe example graphs
91
+ # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
92
+ # and mediapipe/graphs/face_detection/face_detection_back_mobile_gpu.pbtxt
93
+ self.num_classes = 1
94
+ self.num_anchors = 896
95
+ self.num_coords = 16
96
+ self.score_clipping_thresh = 100.0
97
+ self.back_model = back_model
98
+ if back_model:
99
+ self.x_scale = 256.0
100
+ self.y_scale = 256.0
101
+ self.h_scale = 256.0
102
+ self.w_scale = 256.0
103
+ self.min_score_thresh = 0.65
104
+ else:
105
+ self.x_scale = 128.0
106
+ self.y_scale = 128.0
107
+ self.h_scale = 128.0
108
+ self.w_scale = 128.0
109
+ self.min_score_thresh = 0.75
110
+ self.min_suppression_threshold = 0.3
111
+
112
+ self._define_layers()
113
+
114
+ def _define_layers(self):
115
+ if self.back_model:
116
+ self.backbone = nn.Sequential(
117
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
118
+ nn.ReLU(inplace=True),
119
+
120
+ BlazeBlock(24, 24),
121
+ BlazeBlock(24, 24),
122
+ BlazeBlock(24, 24),
123
+ BlazeBlock(24, 24),
124
+ BlazeBlock(24, 24),
125
+ BlazeBlock(24, 24),
126
+ BlazeBlock(24, 24),
127
+ BlazeBlock(24, 24, stride=2),
128
+ BlazeBlock(24, 24),
129
+ BlazeBlock(24, 24),
130
+ BlazeBlock(24, 24),
131
+ BlazeBlock(24, 24),
132
+ BlazeBlock(24, 24),
133
+ BlazeBlock(24, 24),
134
+ BlazeBlock(24, 24),
135
+ BlazeBlock(24, 48, stride=2),
136
+ BlazeBlock(48, 48),
137
+ BlazeBlock(48, 48),
138
+ BlazeBlock(48, 48),
139
+ BlazeBlock(48, 48),
140
+ BlazeBlock(48, 48),
141
+ BlazeBlock(48, 48),
142
+ BlazeBlock(48, 48),
143
+ BlazeBlock(48, 96, stride=2),
144
+ BlazeBlock(96, 96),
145
+ BlazeBlock(96, 96),
146
+ BlazeBlock(96, 96),
147
+ BlazeBlock(96, 96),
148
+ BlazeBlock(96, 96),
149
+ BlazeBlock(96, 96),
150
+ BlazeBlock(96, 96),
151
+ )
152
+ self.final = FinalBlazeBlock(96)
153
+ self.classifier_8 = nn.Conv2d(96, 2, 1, bias=True)
154
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
155
+
156
+ self.regressor_8 = nn.Conv2d(96, 32, 1, bias=True)
157
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
158
+ else:
159
+ self.backbone1 = nn.Sequential(
160
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
161
+ nn.ReLU(inplace=True),
162
+
163
+ BlazeBlock(24, 24),
164
+ BlazeBlock(24, 28),
165
+ BlazeBlock(28, 32, stride=2),
166
+ BlazeBlock(32, 36),
167
+ BlazeBlock(36, 42),
168
+ BlazeBlock(42, 48, stride=2),
169
+ BlazeBlock(48, 56),
170
+ BlazeBlock(56, 64),
171
+ BlazeBlock(64, 72),
172
+ BlazeBlock(72, 80),
173
+ BlazeBlock(80, 88),
174
+ )
175
+
176
+ self.backbone2 = nn.Sequential(
177
+ BlazeBlock(88, 96, stride=2),
178
+ BlazeBlock(96, 96),
179
+ BlazeBlock(96, 96),
180
+ BlazeBlock(96, 96),
181
+ BlazeBlock(96, 96),
182
+ )
183
+ self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True)
184
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
185
+
186
+ self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True)
187
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
188
+
189
+ def forward(self, x):
190
+ # TFLite uses slightly different padding on the first conv layer
191
+ # than PyTorch, so do it manually.
192
+ x = F.pad(x, (1, 2, 1, 2), "constant", 0)
193
+
194
+ b = x.shape[0] # batch size, needed for reshaping later
195
+
196
+ if self.back_model:
197
+ x = self.backbone(x) # (b, 16, 16, 96)
198
+ h = self.final(x) # (b, 8, 8, 96)
199
+ else:
200
+ x = self.backbone1(x) # (b, 88, 16, 16)
201
+ h = self.backbone2(x) # (b, 96, 8, 8)
202
+
203
+ # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to
204
+ # permute the output from the conv layers before reshaping it.
205
+
206
+ c1 = self.classifier_8(x) # (b, 2, 16, 16)
207
+ c1 = c1.permute(0, 2, 3, 1) # (b, 16, 16, 2)
208
+ c1 = c1.reshape(b, -1, 1) # (b, 512, 1)
209
+
210
+ c2 = self.classifier_16(h) # (b, 6, 8, 8)
211
+ c2 = c2.permute(0, 2, 3, 1) # (b, 8, 8, 6)
212
+ c2 = c2.reshape(b, -1, 1) # (b, 384, 1)
213
+
214
+ c = torch.cat((c1, c2), dim=1) # (b, 896, 1)
215
+
216
+ r1 = self.regressor_8(x) # (b, 32, 16, 16)
217
+ r1 = r1.permute(0, 2, 3, 1) # (b, 16, 16, 32)
218
+ r1 = r1.reshape(b, -1, 16) # (b, 512, 16)
219
+
220
+ r2 = self.regressor_16(h) # (b, 96, 8, 8)
221
+ r2 = r2.permute(0, 2, 3, 1) # (b, 8, 8, 96)
222
+ r2 = r2.reshape(b, -1, 16) # (b, 384, 16)
223
+
224
+ r = torch.cat((r1, r2), dim=1) # (b, 896, 16)
225
+ return [r, c]
226
+
227
+ def _device(self):
228
+ """Which device (CPU or GPU) is being used by this model?"""
229
+ return self.classifier_8.weight.device
230
+
231
+ def load_weights(self, path):
232
+ self.load_state_dict(torch.load(path))
233
+ self.eval()
234
+
235
+ def load_anchors(self, path):
236
+ self.anchors = torch.tensor(np.load(path), dtype=torch.float32, device=self._device())
237
+ assert (self.anchors.ndimension() == 2)
238
+ assert (self.anchors.shape[0] == self.num_anchors)
239
+ assert (self.anchors.shape[1] == 4)
240
+
241
+ def _preprocess(self, x):
242
+ """Converts the image pixels to the range [-1, 1]."""
243
+ return x.float() / 127.5 - 1.0
244
+
245
+ def predict_on_image(self, img):
246
+ """Makes a prediction on a single image.
247
+
248
+ Arguments:
249
+ img: a NumPy array of shape (H, W, 3) or a PyTorch tensor of
250
+ shape (3, H, W). The image's height and width should be
251
+ 128 pixels.
252
+
253
+ Returns:
254
+ A tensor with face detections.
255
+ """
256
+ if isinstance(img, np.ndarray):
257
+ img = torch.from_numpy(img).permute((2, 0, 1))
258
+
259
+ return self.predict_on_batch(img.unsqueeze(0))[0]
260
+
261
+ def predict_on_batch(self, x):
262
+ """Makes a prediction on a batch of images.
263
+
264
+ Arguments:
265
+ x: a NumPy array of shape (b, H, W, 3) or a PyTorch tensor of
266
+ shape (b, 3, H, W). The height and width should be 128 pixels.
267
+
268
+ Returns:
269
+ A list containing a tensor of face detections for each image in
270
+ the batch. If no faces are found for an image, returns a tensor
271
+ of shape (0, 17).
272
+
273
+ Each face detection is a PyTorch tensor consisting of 17 numbers:
274
+ - ymin, xmin, ymax, xmax
275
+ - x,y-coordinates for the 6 keypoints
276
+ - confidence score
277
+ """
278
+ if isinstance(x, np.ndarray):
279
+ x = torch.from_numpy(x).permute((0, 3, 1, 2))
280
+
281
+ assert x.shape[1] == 3
282
+ if self.back_model:
283
+ assert x.shape[2] == 256
284
+ assert x.shape[3] == 256
285
+ else:
286
+ assert x.shape[2] == 128
287
+ assert x.shape[3] == 128
288
+
289
+ # 1. Preprocess the images into tensors:
290
+ x = x.to(self._device())
291
+ x = self._preprocess(x)
292
+
293
+ # 2. Run the neural network:
294
+ with torch.no_grad():
295
+ out = self.__call__(x)
296
+
297
+ # 3. Postprocess the raw predictions:
298
+ detections = self._tensors_to_detections(out[0], out[1], self.anchors)
299
+
300
+ # 4. Non-maximum suppression to remove overlapping detections:
301
+ filtered_detections = []
302
+ for i in range(len(detections)):
303
+ faces = self._weighted_non_max_suppression(detections[i])
304
+ faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, 17))
305
+ filtered_detections.append(faces)
306
+
307
+ return filtered_detections
308
+
309
+ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
310
+ """The output of the neural network is a tensor of shape (b, 896, 16)
311
+ containing the bounding box regressor predictions, as well as a tensor
312
+ of shape (b, 896, 1) with the classification confidences.
313
+
314
+ This function converts these two "raw" tensors into proper detections.
315
+ Returns a list of (num_detections, 17) tensors, one for each image in
316
+ the batch.
317
+
318
+ This is based on the source code from:
319
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc
320
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto
321
+ """
322
+ assert raw_box_tensor.ndimension() == 3
323
+ assert raw_box_tensor.shape[1] == self.num_anchors
324
+ assert raw_box_tensor.shape[2] == self.num_coords
325
+
326
+ assert raw_score_tensor.ndimension() == 3
327
+ assert raw_score_tensor.shape[1] == self.num_anchors
328
+ assert raw_score_tensor.shape[2] == self.num_classes
329
+
330
+ assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
331
+
332
+ detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
333
+
334
+ thresh = self.score_clipping_thresh
335
+ raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
336
+ detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
337
+
338
+ # Note: we stripped off the last dimension from the scores tensor
339
+ # because there is only has one class. Now we can simply use a mask
340
+ # to filter out the boxes with too low confidence.
341
+ mask = detection_scores >= self.min_score_thresh
342
+
343
+ # Because each image from the batch can have a different number of
344
+ # detections, process them one at a time using a loop.
345
+ output_detections = []
346
+ for i in range(raw_box_tensor.shape[0]):
347
+ boxes = detection_boxes[i, mask[i]]
348
+ scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
349
+ output_detections.append(torch.cat((boxes, scores), dim=-1))
350
+
351
+ return output_detections
352
+
353
+ def _decode_boxes(self, raw_boxes, anchors):
354
+ """Converts the predictions into actual coordinates using
355
+ the anchor boxes. Processes the entire batch at once.
356
+ """
357
+ boxes = torch.zeros_like(raw_boxes)
358
+
359
+ x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
360
+ y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
361
+
362
+ w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
363
+ h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
364
+
365
+ boxes[..., 0] = y_center - h / 2. # ymin
366
+ boxes[..., 1] = x_center - w / 2. # xmin
367
+ boxes[..., 2] = y_center + h / 2. # ymax
368
+ boxes[..., 3] = x_center + w / 2. # xmax
369
+
370
+ for k in range(6):
371
+ offset = 4 + k * 2
372
+ keypoint_x = raw_boxes[..., offset] / self.x_scale * anchors[:, 2] + anchors[:, 0]
373
+ keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
374
+ boxes[..., offset] = keypoint_x
375
+ boxes[..., offset + 1] = keypoint_y
376
+
377
+ return boxes
378
+
379
+ def _weighted_non_max_suppression(self, detections):
380
+ """The alternative NMS method as mentioned in the BlazeFace paper:
381
+
382
+ "We replace the suppression algorithm with a blending strategy that
383
+ estimates the regression parameters of a bounding box as a weighted
384
+ mean between the overlapping predictions."
385
+
386
+ The original MediaPipe code assigns the score of the most confident
387
+ detection to the weighted detection, but we take the average score
388
+ of the overlapping detections.
389
+
390
+ The input detections should be a Tensor of shape (count, 17).
391
+
392
+ Returns a list of PyTorch tensors, one for each detected face.
393
+
394
+ This is based on the source code from:
395
+ mediapipe/calculators/util/non_max_suppression_calculator.cc
396
+ mediapipe/calculators/util/non_max_suppression_calculator.proto
397
+ """
398
+ if len(detections) == 0: return []
399
+
400
+ output_detections = []
401
+
402
+ # Sort the detections from highest to lowest score.
403
+ remaining = torch.argsort(detections[:, 16], descending=True)
404
+
405
+ while len(remaining) > 0:
406
+ detection = detections[remaining[0]]
407
+
408
+ # Compute the overlap between the first box and the other
409
+ # remaining boxes. (Note that the other_boxes also include
410
+ # the first_box.)
411
+ first_box = detection[:4]
412
+ other_boxes = detections[remaining, :4]
413
+ ious = overlap_similarity(first_box, other_boxes)
414
+
415
+ # If two detections don't overlap enough, they are considered
416
+ # to be from different faces.
417
+ mask = ious > self.min_suppression_threshold
418
+ overlapping = remaining[mask]
419
+ remaining = remaining[~mask]
420
+
421
+ # Take an average of the coordinates from the overlapping
422
+ # detections, weighted by their confidence scores.
423
+ weighted_detection = detection.clone()
424
+ if len(overlapping) > 1:
425
+ coordinates = detections[overlapping, :16]
426
+ scores = detections[overlapping, 16:17]
427
+ total_score = scores.sum()
428
+ weighted = (coordinates * scores).sum(dim=0) / total_score
429
+ weighted_detection[:16] = weighted
430
+ weighted_detection[16] = total_score / len(overlapping)
431
+
432
+ output_detections.append(weighted_detection)
433
+
434
+ return output_detections
435
+
436
+ # IOU code from https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py
437
+
438
+
439
+ def intersect(box_a, box_b):
440
+ """ We resize both tensors to [A,B,2] without new malloc:
441
+ [A,2] -> [A,1,2] -> [A,B,2]
442
+ [B,2] -> [1,B,2] -> [A,B,2]
443
+ Then we compute the area of intersect between box_a and box_b.
444
+ Args:
445
+ box_a: (tensor) bounding boxes, Shape: [A,4].
446
+ box_b: (tensor) bounding boxes, Shape: [B,4].
447
+ Return:
448
+ (tensor) intersection area, Shape: [A,B].
449
+ """
450
+ A = box_a.size(0)
451
+ B = box_b.size(0)
452
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
453
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
454
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
455
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
456
+ inter = torch.clamp((max_xy - min_xy), min=0)
457
+ return inter[:, :, 0] * inter[:, :, 1]
458
+
459
+
460
+ def jaccard(box_a, box_b):
461
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
462
+ is simply the intersection over union of two boxes. Here we operate on
463
+ ground truth boxes and default boxes.
464
+ E.g.:
465
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
466
+ Args:
467
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
468
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
469
+ Return:
470
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
471
+ """
472
+ inter = intersect(box_a, box_b)
473
+ area_a = ((box_a[:, 2] - box_a[:, 0]) *
474
+ (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
475
+ area_b = ((box_b[:, 2] - box_b[:, 0]) *
476
+ (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
477
+ union = area_a + area_b - inter
478
+ return inter / union # [A,B]
479
+
480
+
481
+ def overlap_similarity(box, other_boxes):
482
+ """Computes the IOU between a bounding box and set of other boxes."""
483
+ return jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
README.md ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags: []
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+ This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BlazeFace"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "DCU_CONFIG.DcuConfig",
7
+ "AutoModel": "DCU_MODEL.BlazeFace"
8
+ },
9
+ "torch_dtype": "float32",
10
+ "transformers_version": "4.42.4"
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b1b873fa868df292abd44b5fc1a08849d9503e99ad3cae2582710000ac22354
3
+ size 412216