cledouxluma
/

facedet

Model card Files Files and versions

xet

Community

cledouxluma commited on 15 days ago

Commit

4931970

verified ·

1 Parent(s): d967739

Upload data/augmentations.py with huggingface_hub

Browse files

Files changed (1) hide show

data/augmentations.py +309 -0

data/augmentations.py ADDED Viewed

	@@ -0,0 +1,309 @@

+"""
+Augmentation Pipeline for Face Detection.
+Implements SCRFD's "Sample Redistribution" strategy plus production-grade
+robustness augmentations for:
+- Tiny faces (large-scale crops generate small face positives)
+- Blur (Gaussian, motion blur)
+- Compression artifacts (JPEG quality degradation)
+- Low-light / poor illumination (brightness/gamma jitter)
+- Occlusion (random erasing simulating partial occlusion)
+Training augmentation pipeline (from SCRFD + TinaFace papers):
+1. Random crop with scale [0.3, 2.0] (Sample Redistribution)
+2. Resize to target size (640×640)
+3. Photometric distortion (brightness, contrast, hue, saturation)
+4. Horizontal flip (p=0.5)
+5. Random blur / compression / lighting degradation
+6. Normalize (ImageNet stats)
+"""
+import numpy as np
+import cv2
+from typing import Dict, Tuple, Optional
+class TrainAugmentation:
+    """
+    Full training augmentation with SCRFD Sample Redistribution.
+    The key insight: using crop scales up to 2.0× generates more
+    small-face positive anchors at stride 8 (72K → 118K per paper).
+    """
+    def __init__(self,
+                 target_size: int = 640,
+                 crop_scales: list = None,
+                 mean: tuple = (104.0, 117.0, 123.0),
+                 flip_prob: float = 0.5,
+                 enable_robustness: bool = True):
+        self.target_size = target_size
+        self.crop_scales = crop_scales or [0.3, 0.45, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
+        self.mean = np.array(mean, dtype=np.float32)
+        self.flip_prob = flip_prob
+        self.enable_robustness = enable_robustness
+        self.robustness_aug = RobustnessAugmentation() if enable_robustness else None
+    def __call__(self, image: np.ndarray, boxes: np.ndarray,
+                 landmarks: np.ndarray) -> Dict:
+        h, w = image.shape[:2]
+        # 1. Random crop with Sample Redistribution
+        image, boxes, landmarks = self._random_crop(image, boxes, landmarks)
+        # 2. Resize to target
+        image, boxes, landmarks = self._resize(image, boxes, landmarks)
+        # 3. Photometric distortion
+        image = self._photometric_distort(image)
+        # 4. Horizontal flip
+        if np.random.random() < self.flip_prob:
+            image, boxes, landmarks = self._hflip(image, boxes, landmarks)
+        # 5. Robustness augmentations (blur, compression, lighting)
+        if self.enable_robustness and self.robustness_aug:
+            image = self.robustness_aug(image)
+        # 6. Mean subtraction (SCRFD-style normalization)
+        image = image.astype(np.float32) - self.mean
+        return {'image': image, 'boxes': boxes, 'landmarks': landmarks}
+    def _random_crop(self, image: np.ndarray, boxes: np.ndarray,
+                     landmarks: np.ndarray) -> Tuple:
+        """Random crop with sample redistribution scales."""
+        h, w = image.shape[:2]
+        scale = np.random.choice(self.crop_scales)
+        crop_size = int(min(h, w) * scale)
+        crop_size = max(crop_size, 32)
+        # If crop is larger than image, pad first
+        if crop_size > max(h, w):
+            pad_h = max(crop_size - h, 0)
+            pad_w = max(crop_size - w, 0)
+            image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w,
+                                        cv2.BORDER_CONSTANT, value=(0, 0, 0))
+            h, w = image.shape[:2]
+        # Random crop location
+        max_x = w - crop_size
+        max_y = h - crop_size
+        x1 = np.random.randint(0, max(max_x, 1))
+        y1 = np.random.randint(0, max(max_y, 1))
+        x2 = x1 + crop_size
+        y2 = y1 + crop_size
+        # Crop image
+        cropped = image[y1:y2, x1:x2]
+        # Adjust boxes
+        new_boxes = boxes.copy()
+        new_boxes[:, 0] -= x1
+        new_boxes[:, 1] -= y1
+        new_boxes[:, 2] -= x1
+        new_boxes[:, 3] -= y1
+        # Clip to crop boundaries
+        new_boxes[:, 0] = np.clip(new_boxes[:, 0], 0, crop_size)
+        new_boxes[:, 1] = np.clip(new_boxes[:, 1], 0, crop_size)
+        new_boxes[:, 2] = np.clip(new_boxes[:, 2], 0, crop_size)
+        new_boxes[:, 3] = np.clip(new_boxes[:, 3], 0, crop_size)
+        # Filter valid boxes (at least 20% of original area visible)
+        orig_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        new_widths = new_boxes[:, 2] - new_boxes[:, 0]
+        new_heights = new_boxes[:, 3] - new_boxes[:, 1]
+        new_areas = new_widths * new_heights
+        valid = (new_widths > 2) & (new_heights > 2) & (new_areas > 0.2 * orig_areas)
+        if valid.sum() == 0:
+            # Fallback: return original image
+            return image[:min(h, w), :min(h, w)], boxes, landmarks
+        new_boxes = new_boxes[valid]
+        # Adjust landmarks
+        new_lmk = landmarks[valid].copy()
+        for i in range(5):
+            new_lmk[:, i*2] -= x1
+            new_lmk[:, i*2+1] -= y1
+        return cropped, new_boxes, new_lmk
+    def _resize(self, image: np.ndarray, boxes: np.ndarray,
+                landmarks: np.ndarray) -> Tuple:
+        """Resize to target size."""
+        h, w = image.shape[:2]
+        scale_x = self.target_size / w
+        scale_y = self.target_size / h
+        image = cv2.resize(image, (self.target_size, self.target_size))
+        boxes[:, 0] *= scale_x
+        boxes[:, 1] *= scale_y
+        boxes[:, 2] *= scale_x
+        boxes[:, 3] *= scale_y
+        for i in range(5):
+            landmarks[:, i*2] *= scale_x
+            landmarks[:, i*2+1] *= scale_y
+        return image, boxes, landmarks
+    def _photometric_distort(self, image: np.ndarray) -> np.ndarray:
+        """Random photometric distortion (brightness, contrast, hue, saturation)."""
+        image = image.astype(np.float32)
+        # Brightness
+        if np.random.random() < 0.5:
+            delta = np.random.uniform(-32, 32)
+            image += delta
+        # Contrast
+        if np.random.random() < 0.5:
+            alpha = np.random.uniform(0.5, 1.5)
+            image *= alpha
+        # Color jitter in HSV
+        if np.random.random() < 0.5:
+            image_uint8 = np.clip(image, 0, 255).astype(np.uint8)
+            hsv = cv2.cvtColor(image_uint8, cv2.COLOR_RGB2HSV).astype(np.float32)
+            # Hue
+            hsv[:, :, 0] += np.random.uniform(-18, 18)
+            hsv[:, :, 0] = np.clip(hsv[:, :, 0], 0, 180)
+            # Saturation
+            hsv[:, :, 1] *= np.random.uniform(0.5, 1.5)
+            hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 255)
+            image = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32)
+        return np.clip(image, 0, 255)
+    def _hflip(self, image: np.ndarray, boxes: np.ndarray,
+               landmarks: np.ndarray) -> Tuple:
+        """Horizontal flip with landmark reordering."""
+        w = image.shape[1]
+        image = image[:, ::-1].copy()
+        new_boxes = boxes.copy()
+        new_boxes[:, 0] = w - boxes[:, 2]
+        new_boxes[:, 2] = w - boxes[:, 0]
+        new_lmk = landmarks.copy()
+        for i in range(5):
+            new_lmk[:, i*2] = w - landmarks[:, i*2]
+        # Reorder landmarks for face symmetry:
+        # Standard 5-point: left_eye, right_eye, nose, left_mouth, right_mouth
+        # After flip: swap left↔right
+        if new_lmk.shape[0] > 0 and np.any(new_lmk > 0):
+            # Swap left_eye ↔ right_eye
+            new_lmk[:, [0, 1, 2, 3]] = new_lmk[:, [2, 3, 0, 1]]
+            # Swap left_mouth ↔ right_mouth
+            new_lmk[:, [6, 7, 8, 9]] = new_lmk[:, [8, 9, 6, 7]]
+        return image, new_boxes, new_lmk
+class ValAugmentation:
+    """Validation: resize + normalize only."""
+    def __init__(self, target_size: int = 640,
+                 mean: tuple = (104.0, 117.0, 123.0)):
+        self.target_size = target_size
+        self.mean = np.array(mean, dtype=np.float32)
+    def __call__(self, image: np.ndarray, boxes: np.ndarray,
+                 landmarks: np.ndarray) -> Dict:
+        h, w = image.shape[:2]
+        # Resize keeping aspect ratio
+        scale = self.target_size / max(h, w)
+        new_h, new_w = int(h * scale), int(w * scale)
+        image = cv2.resize(image, (new_w, new_h))
+        # Pad to target size
+        pad_h = self.target_size - new_h
+        pad_w = self.target_size - new_w
+        image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w,
+                                    cv2.BORDER_CONSTANT, value=(0, 0, 0))
+        # Scale boxes
+        boxes[:, 0] *= scale
+        boxes[:, 1] *= scale
+        boxes[:, 2] *= scale
+        boxes[:, 3] *= scale
+        for i in range(5):
+            landmarks[:, i*2] *= scale
+            landmarks[:, i*2+1] *= scale
+        image = image.astype(np.float32) - self.mean
+        return {'image': image, 'boxes': boxes, 'landmarks': landmarks}
+class RobustnessAugmentation:
+    """
+    Production-grade robustness augmentations targeting known failure modes.
+    Applied with probability during training to make the detector robust to:
+    1. Gaussian blur (σ = 0.5–3.0) — camera defocus, motion blur
+    2. JPEG compression (Q = 20–80) — streaming/compression artifacts
+    3. Low-light gamma (γ = 1.5–3.0) — dark environments
+    4. Random occlusion (Cutout) — partial face occlusion
+    5. Gaussian noise — sensor noise, low-light grain
+    """
+    def __init__(self,
+                 blur_prob: float = 0.2,
+                 jpeg_prob: float = 0.2,
+                 lowlight_prob: float = 0.15,
+                 occlusion_prob: float = 0.1,
+                 noise_prob: float = 0.15):
+        self.blur_prob = blur_prob
+        self.jpeg_prob = jpeg_prob
+        self.lowlight_prob = lowlight_prob
+        self.occlusion_prob = occlusion_prob
+        self.noise_prob = noise_prob
+    def __call__(self, image: np.ndarray) -> np.ndarray:
+        # Gaussian blur
+        if np.random.random() < self.blur_prob:
+            sigma = np.random.uniform(0.5, 3.0)
+            ksize = int(sigma * 6) | 1  # Ensure odd
+            image = cv2.GaussianBlur(image, (ksize, ksize), sigma)
+        # JPEG compression artifacts
+        if np.random.random() < self.jpeg_prob:
+            quality = np.random.randint(20, 80)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+            _, buf = cv2.imencode('.jpg', image.astype(np.uint8), encode_param)
+            image = cv2.imdecode(buf, cv2.IMREAD_COLOR).astype(np.float32)
+        # Low-light simulation (gamma darkening)
+        if np.random.random() < self.lowlight_prob:
+            gamma = np.random.uniform(1.5, 3.0)
+            image = np.clip(image, 0, 255)
+            image = ((image / 255.0) ** gamma * 255.0)
+        # Random occlusion (Cutout)
+        if np.random.random() < self.occlusion_prob:
+            h, w = image.shape[:2]
+            # Random rectangle
+            rh = np.random.randint(h // 10, h // 4)
+            rw = np.random.randint(w // 10, w // 4)
+            ry = np.random.randint(0, h - rh)
+            rx = np.random.randint(0, w - rw)
+            image[ry:ry+rh, rx:rx+rw] = np.random.randint(0, 255, 3)
+        # Gaussian noise
+        if np.random.random() < self.noise_prob:
+            sigma = np.random.uniform(5, 25)
+            noise = np.random.randn(*image.shape) * sigma
+            image = np.clip(image + noise, 0, 255)
+        return image.astype(np.float32)