| """ |
| WiderFace Dataset Loader. |
| |
| WIDER FACE (Yang et al., 2016): |
| - 32,203 images, 393,703 annotated face bounding boxes |
| - Split: 40% train (12,880), 10% val (3,226), 50% test (labels not public) |
| - 3 difficulty levels: Easy, Medium, Hard |
| - Annotations include: bbox, blur, expression, illumination, occlusion, pose, invalid |
| |
| Directory structure expected: |
| wider_face/ |
| βββ WIDER_train/ |
| β βββ images/ |
| β βββ 0--Parade/ |
| β βββ 1--Handshaking/ |
| β βββ ... |
| βββ WIDER_val/ |
| β βββ images/ |
| β βββ ... |
| βββ wider_face_split/ |
| β βββ wider_face_train_bbx_gt.txt |
| β βββ wider_face_val_bbx_gt.txt |
| β βββ ... |
| βββ retinaface_gt/ (optional, for landmarks) |
| βββ train/ |
| β βββ label.txt |
| βββ val/ |
| βββ label.txt |
| """ |
|
|
| import os |
| import numpy as np |
| import cv2 |
| from typing import List, Dict, Optional, Tuple, Callable |
| import torch |
| from torch.utils.data import Dataset |
|
|
|
|
| class WiderFaceDataset(Dataset): |
| """ |
| WIDER FACE dataset with support for: |
| - Standard WiderFace bbox annotations |
| - RetinaFace-format 5-point landmark annotations |
| - Filtering invalid/tiny faces |
| - On-the-fly augmentation |
| """ |
|
|
| def __init__(self, |
| root_dir: str, |
| split: str = 'train', |
| transform: Optional[Callable] = None, |
| min_face_size: int = 2, |
| use_landmarks: bool = False, |
| annotation_format: str = 'widerface'): |
| """ |
| Args: |
| root_dir: Path to wider_face/ directory |
| split: 'train' or 'val' |
| transform: Augmentation callable |
| min_face_size: Minimum face size to keep (pixels) |
| use_landmarks: Load 5-point landmarks (requires retinaface_gt/) |
| annotation_format: 'widerface' (standard) or 'retinaface' (with landmarks) |
| """ |
| self.root_dir = root_dir |
| self.split = split |
| self.transform = transform |
| self.min_face_size = min_face_size |
| self.use_landmarks = use_landmarks |
|
|
| if annotation_format == 'retinaface' and use_landmarks: |
| self.samples = self._load_retinaface_annotations() |
| else: |
| self.samples = self._load_widerface_annotations() |
|
|
| print(f"[WiderFace {split}] Loaded {len(self.samples)} images") |
|
|
| def _load_widerface_annotations(self) -> List[Dict]: |
| """Load standard WiderFace bbox annotations.""" |
| ann_file = os.path.join( |
| self.root_dir, 'wider_face_split', |
| f'wider_face_{self.split}_bbx_gt.txt' |
| ) |
| img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images') |
|
|
| samples = [] |
| with open(ann_file, 'r') as f: |
| while True: |
| filename = f.readline().strip() |
| if not filename: |
| break |
|
|
| num_faces = int(f.readline().strip()) |
| boxes = [] |
| for _ in range(max(num_faces, 1)): |
| line = f.readline().strip() |
| parts = list(map(float, line.split())) |
| if num_faces == 0: |
| continue |
| x, y, w, h = parts[0], parts[1], parts[2], parts[3] |
| |
| if w < self.min_face_size or h < self.min_face_size: |
| continue |
| |
| boxes.append([x, y, x + w, y + h]) |
|
|
| if boxes: |
| samples.append({ |
| 'image_path': os.path.join(img_dir, filename), |
| 'boxes': np.array(boxes, dtype=np.float32), |
| 'filename': filename, |
| }) |
|
|
| return samples |
|
|
| def _load_retinaface_annotations(self) -> List[Dict]: |
| """Load RetinaFace-format annotations with 5-point landmarks.""" |
| ann_file = os.path.join( |
| self.root_dir, 'retinaface_gt', self.split, 'label.txt' |
| ) |
| img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images') |
|
|
| samples = [] |
| current_file = None |
| current_boxes = [] |
| current_lmks = [] |
|
|
| with open(ann_file, 'r') as f: |
| for line in f: |
| line = line.strip() |
| if line.startswith('#'): |
| |
| if current_file and current_boxes: |
| samples.append({ |
| 'image_path': os.path.join(img_dir, current_file), |
| 'boxes': np.array(current_boxes, dtype=np.float32), |
| 'landmarks': np.array(current_lmks, dtype=np.float32), |
| 'filename': current_file, |
| }) |
| current_file = line[2:].strip() |
| current_boxes = [] |
| current_lmks = [] |
| else: |
| parts = list(map(float, line.split())) |
| if len(parts) >= 4: |
| x, y, w, h = parts[0], parts[1], parts[2], parts[3] |
| if w < self.min_face_size or h < self.min_face_size: |
| continue |
| current_boxes.append([x, y, x + w, y + h]) |
| if len(parts) >= 14: |
| |
| lmk = parts[4:14] |
| current_lmks.append(lmk) |
| else: |
| current_lmks.append([-1]*10) |
|
|
| |
| if current_file and current_boxes: |
| samples.append({ |
| 'image_path': os.path.join(img_dir, current_file), |
| 'boxes': np.array(current_boxes, dtype=np.float32), |
| 'landmarks': np.array(current_lmks, dtype=np.float32), |
| 'filename': current_file, |
| }) |
|
|
| return samples |
|
|
| def __len__(self) -> int: |
| return len(self.samples) |
|
|
| def __getitem__(self, idx: int) -> Dict: |
| sample = self.samples[idx] |
|
|
| |
| img = cv2.imread(sample['image_path']) |
| if img is None: |
| raise IOError(f"Failed to load image: {sample['image_path']}") |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
| boxes = sample['boxes'].copy() |
| landmarks = sample.get('landmarks', np.zeros((boxes.shape[0], 10), dtype=np.float32)).copy() |
|
|
| |
| if self.transform: |
| result = self.transform(img, boxes, landmarks) |
| img, boxes, landmarks = result['image'], result['boxes'], result['landmarks'] |
|
|
| |
| img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float() |
| boxes_tensor = torch.from_numpy(boxes).float() |
|
|
| target = { |
| 'boxes': boxes_tensor, |
| 'labels': torch.ones(boxes_tensor.shape[0], dtype=torch.long), |
| } |
| if self.use_landmarks: |
| target['landmarks'] = torch.from_numpy(landmarks).float() |
|
|
| return img_tensor, target |
|
|
| @staticmethod |
| def collate_fn(batch): |
| """Custom collate for variable-length targets.""" |
| images = torch.stack([item[0] for item in batch]) |
| targets = [item[1] for item in batch] |
| return images, targets |
|
|