cledouxluma commited on
Commit
4931970
·
verified ·
1 Parent(s): d967739

Upload data/augmentations.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data/augmentations.py +309 -0
data/augmentations.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Augmentation Pipeline for Face Detection.
3
+
4
+ Implements SCRFD's "Sample Redistribution" strategy plus production-grade
5
+ robustness augmentations for:
6
+ - Tiny faces (large-scale crops generate small face positives)
7
+ - Blur (Gaussian, motion blur)
8
+ - Compression artifacts (JPEG quality degradation)
9
+ - Low-light / poor illumination (brightness/gamma jitter)
10
+ - Occlusion (random erasing simulating partial occlusion)
11
+
12
+ Training augmentation pipeline (from SCRFD + TinaFace papers):
13
+ 1. Random crop with scale [0.3, 2.0] (Sample Redistribution)
14
+ 2. Resize to target size (640×640)
15
+ 3. Photometric distortion (brightness, contrast, hue, saturation)
16
+ 4. Horizontal flip (p=0.5)
17
+ 5. Random blur / compression / lighting degradation
18
+ 6. Normalize (ImageNet stats)
19
+ """
20
+
21
+ import numpy as np
22
+ import cv2
23
+ from typing import Dict, Tuple, Optional
24
+
25
+
26
+ class TrainAugmentation:
27
+ """
28
+ Full training augmentation with SCRFD Sample Redistribution.
29
+
30
+ The key insight: using crop scales up to 2.0× generates more
31
+ small-face positive anchors at stride 8 (72K → 118K per paper).
32
+ """
33
+
34
+ def __init__(self,
35
+ target_size: int = 640,
36
+ crop_scales: list = None,
37
+ mean: tuple = (104.0, 117.0, 123.0),
38
+ flip_prob: float = 0.5,
39
+ enable_robustness: bool = True):
40
+ self.target_size = target_size
41
+ self.crop_scales = crop_scales or [0.3, 0.45, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
42
+ self.mean = np.array(mean, dtype=np.float32)
43
+ self.flip_prob = flip_prob
44
+ self.enable_robustness = enable_robustness
45
+ self.robustness_aug = RobustnessAugmentation() if enable_robustness else None
46
+
47
+ def __call__(self, image: np.ndarray, boxes: np.ndarray,
48
+ landmarks: np.ndarray) -> Dict:
49
+ h, w = image.shape[:2]
50
+
51
+ # 1. Random crop with Sample Redistribution
52
+ image, boxes, landmarks = self._random_crop(image, boxes, landmarks)
53
+
54
+ # 2. Resize to target
55
+ image, boxes, landmarks = self._resize(image, boxes, landmarks)
56
+
57
+ # 3. Photometric distortion
58
+ image = self._photometric_distort(image)
59
+
60
+ # 4. Horizontal flip
61
+ if np.random.random() < self.flip_prob:
62
+ image, boxes, landmarks = self._hflip(image, boxes, landmarks)
63
+
64
+ # 5. Robustness augmentations (blur, compression, lighting)
65
+ if self.enable_robustness and self.robustness_aug:
66
+ image = self.robustness_aug(image)
67
+
68
+ # 6. Mean subtraction (SCRFD-style normalization)
69
+ image = image.astype(np.float32) - self.mean
70
+
71
+ return {'image': image, 'boxes': boxes, 'landmarks': landmarks}
72
+
73
+ def _random_crop(self, image: np.ndarray, boxes: np.ndarray,
74
+ landmarks: np.ndarray) -> Tuple:
75
+ """Random crop with sample redistribution scales."""
76
+ h, w = image.shape[:2]
77
+ scale = np.random.choice(self.crop_scales)
78
+ crop_size = int(min(h, w) * scale)
79
+ crop_size = max(crop_size, 32)
80
+
81
+ # If crop is larger than image, pad first
82
+ if crop_size > max(h, w):
83
+ pad_h = max(crop_size - h, 0)
84
+ pad_w = max(crop_size - w, 0)
85
+ image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w,
86
+ cv2.BORDER_CONSTANT, value=(0, 0, 0))
87
+ h, w = image.shape[:2]
88
+
89
+ # Random crop location
90
+ max_x = w - crop_size
91
+ max_y = h - crop_size
92
+ x1 = np.random.randint(0, max(max_x, 1))
93
+ y1 = np.random.randint(0, max(max_y, 1))
94
+ x2 = x1 + crop_size
95
+ y2 = y1 + crop_size
96
+
97
+ # Crop image
98
+ cropped = image[y1:y2, x1:x2]
99
+
100
+ # Adjust boxes
101
+ new_boxes = boxes.copy()
102
+ new_boxes[:, 0] -= x1
103
+ new_boxes[:, 1] -= y1
104
+ new_boxes[:, 2] -= x1
105
+ new_boxes[:, 3] -= y1
106
+
107
+ # Clip to crop boundaries
108
+ new_boxes[:, 0] = np.clip(new_boxes[:, 0], 0, crop_size)
109
+ new_boxes[:, 1] = np.clip(new_boxes[:, 1], 0, crop_size)
110
+ new_boxes[:, 2] = np.clip(new_boxes[:, 2], 0, crop_size)
111
+ new_boxes[:, 3] = np.clip(new_boxes[:, 3], 0, crop_size)
112
+
113
+ # Filter valid boxes (at least 20% of original area visible)
114
+ orig_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
115
+ new_widths = new_boxes[:, 2] - new_boxes[:, 0]
116
+ new_heights = new_boxes[:, 3] - new_boxes[:, 1]
117
+ new_areas = new_widths * new_heights
118
+ valid = (new_widths > 2) & (new_heights > 2) & (new_areas > 0.2 * orig_areas)
119
+
120
+ if valid.sum() == 0:
121
+ # Fallback: return original image
122
+ return image[:min(h, w), :min(h, w)], boxes, landmarks
123
+
124
+ new_boxes = new_boxes[valid]
125
+
126
+ # Adjust landmarks
127
+ new_lmk = landmarks[valid].copy()
128
+ for i in range(5):
129
+ new_lmk[:, i*2] -= x1
130
+ new_lmk[:, i*2+1] -= y1
131
+
132
+ return cropped, new_boxes, new_lmk
133
+
134
+ def _resize(self, image: np.ndarray, boxes: np.ndarray,
135
+ landmarks: np.ndarray) -> Tuple:
136
+ """Resize to target size."""
137
+ h, w = image.shape[:2]
138
+ scale_x = self.target_size / w
139
+ scale_y = self.target_size / h
140
+
141
+ image = cv2.resize(image, (self.target_size, self.target_size))
142
+
143
+ boxes[:, 0] *= scale_x
144
+ boxes[:, 1] *= scale_y
145
+ boxes[:, 2] *= scale_x
146
+ boxes[:, 3] *= scale_y
147
+
148
+ for i in range(5):
149
+ landmarks[:, i*2] *= scale_x
150
+ landmarks[:, i*2+1] *= scale_y
151
+
152
+ return image, boxes, landmarks
153
+
154
+ def _photometric_distort(self, image: np.ndarray) -> np.ndarray:
155
+ """Random photometric distortion (brightness, contrast, hue, saturation)."""
156
+ image = image.astype(np.float32)
157
+
158
+ # Brightness
159
+ if np.random.random() < 0.5:
160
+ delta = np.random.uniform(-32, 32)
161
+ image += delta
162
+
163
+ # Contrast
164
+ if np.random.random() < 0.5:
165
+ alpha = np.random.uniform(0.5, 1.5)
166
+ image *= alpha
167
+
168
+ # Color jitter in HSV
169
+ if np.random.random() < 0.5:
170
+ image_uint8 = np.clip(image, 0, 255).astype(np.uint8)
171
+ hsv = cv2.cvtColor(image_uint8, cv2.COLOR_RGB2HSV).astype(np.float32)
172
+
173
+ # Hue
174
+ hsv[:, :, 0] += np.random.uniform(-18, 18)
175
+ hsv[:, :, 0] = np.clip(hsv[:, :, 0], 0, 180)
176
+
177
+ # Saturation
178
+ hsv[:, :, 1] *= np.random.uniform(0.5, 1.5)
179
+ hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 255)
180
+
181
+ image = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32)
182
+
183
+ return np.clip(image, 0, 255)
184
+
185
+ def _hflip(self, image: np.ndarray, boxes: np.ndarray,
186
+ landmarks: np.ndarray) -> Tuple:
187
+ """Horizontal flip with landmark reordering."""
188
+ w = image.shape[1]
189
+ image = image[:, ::-1].copy()
190
+
191
+ new_boxes = boxes.copy()
192
+ new_boxes[:, 0] = w - boxes[:, 2]
193
+ new_boxes[:, 2] = w - boxes[:, 0]
194
+
195
+ new_lmk = landmarks.copy()
196
+ for i in range(5):
197
+ new_lmk[:, i*2] = w - landmarks[:, i*2]
198
+
199
+ # Reorder landmarks for face symmetry:
200
+ # Standard 5-point: left_eye, right_eye, nose, left_mouth, right_mouth
201
+ # After flip: swap left↔right
202
+ if new_lmk.shape[0] > 0 and np.any(new_lmk > 0):
203
+ # Swap left_eye ↔ right_eye
204
+ new_lmk[:, [0, 1, 2, 3]] = new_lmk[:, [2, 3, 0, 1]]
205
+ # Swap left_mouth ↔ right_mouth
206
+ new_lmk[:, [6, 7, 8, 9]] = new_lmk[:, [8, 9, 6, 7]]
207
+
208
+ return image, new_boxes, new_lmk
209
+
210
+
211
+ class ValAugmentation:
212
+ """Validation: resize + normalize only."""
213
+
214
+ def __init__(self, target_size: int = 640,
215
+ mean: tuple = (104.0, 117.0, 123.0)):
216
+ self.target_size = target_size
217
+ self.mean = np.array(mean, dtype=np.float32)
218
+
219
+ def __call__(self, image: np.ndarray, boxes: np.ndarray,
220
+ landmarks: np.ndarray) -> Dict:
221
+ h, w = image.shape[:2]
222
+
223
+ # Resize keeping aspect ratio
224
+ scale = self.target_size / max(h, w)
225
+ new_h, new_w = int(h * scale), int(w * scale)
226
+ image = cv2.resize(image, (new_w, new_h))
227
+
228
+ # Pad to target size
229
+ pad_h = self.target_size - new_h
230
+ pad_w = self.target_size - new_w
231
+ image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w,
232
+ cv2.BORDER_CONSTANT, value=(0, 0, 0))
233
+
234
+ # Scale boxes
235
+ boxes[:, 0] *= scale
236
+ boxes[:, 1] *= scale
237
+ boxes[:, 2] *= scale
238
+ boxes[:, 3] *= scale
239
+
240
+ for i in range(5):
241
+ landmarks[:, i*2] *= scale
242
+ landmarks[:, i*2+1] *= scale
243
+
244
+ image = image.astype(np.float32) - self.mean
245
+
246
+ return {'image': image, 'boxes': boxes, 'landmarks': landmarks}
247
+
248
+
249
+ class RobustnessAugmentation:
250
+ """
251
+ Production-grade robustness augmentations targeting known failure modes.
252
+
253
+ Applied with probability during training to make the detector robust to:
254
+ 1. Gaussian blur (σ = 0.5–3.0) — camera defocus, motion blur
255
+ 2. JPEG compression (Q = 20–80) — streaming/compression artifacts
256
+ 3. Low-light gamma (γ = 1.5–3.0) — dark environments
257
+ 4. Random occlusion (Cutout) — partial face occlusion
258
+ 5. Gaussian noise — sensor noise, low-light grain
259
+ """
260
+
261
+ def __init__(self,
262
+ blur_prob: float = 0.2,
263
+ jpeg_prob: float = 0.2,
264
+ lowlight_prob: float = 0.15,
265
+ occlusion_prob: float = 0.1,
266
+ noise_prob: float = 0.15):
267
+ self.blur_prob = blur_prob
268
+ self.jpeg_prob = jpeg_prob
269
+ self.lowlight_prob = lowlight_prob
270
+ self.occlusion_prob = occlusion_prob
271
+ self.noise_prob = noise_prob
272
+
273
+ def __call__(self, image: np.ndarray) -> np.ndarray:
274
+ # Gaussian blur
275
+ if np.random.random() < self.blur_prob:
276
+ sigma = np.random.uniform(0.5, 3.0)
277
+ ksize = int(sigma * 6) | 1 # Ensure odd
278
+ image = cv2.GaussianBlur(image, (ksize, ksize), sigma)
279
+
280
+ # JPEG compression artifacts
281
+ if np.random.random() < self.jpeg_prob:
282
+ quality = np.random.randint(20, 80)
283
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
284
+ _, buf = cv2.imencode('.jpg', image.astype(np.uint8), encode_param)
285
+ image = cv2.imdecode(buf, cv2.IMREAD_COLOR).astype(np.float32)
286
+
287
+ # Low-light simulation (gamma darkening)
288
+ if np.random.random() < self.lowlight_prob:
289
+ gamma = np.random.uniform(1.5, 3.0)
290
+ image = np.clip(image, 0, 255)
291
+ image = ((image / 255.0) ** gamma * 255.0)
292
+
293
+ # Random occlusion (Cutout)
294
+ if np.random.random() < self.occlusion_prob:
295
+ h, w = image.shape[:2]
296
+ # Random rectangle
297
+ rh = np.random.randint(h // 10, h // 4)
298
+ rw = np.random.randint(w // 10, w // 4)
299
+ ry = np.random.randint(0, h - rh)
300
+ rx = np.random.randint(0, w - rw)
301
+ image[ry:ry+rh, rx:rx+rw] = np.random.randint(0, 255, 3)
302
+
303
+ # Gaussian noise
304
+ if np.random.random() < self.noise_prob:
305
+ sigma = np.random.uniform(5, 25)
306
+ noise = np.random.randn(*image.shape) * sigma
307
+ image = np.clip(image + noise, 0, 255)
308
+
309
+ return image.astype(np.float32)