""" WiderFace Dataset Loader. WIDER FACE (Yang et al., 2016): - 32,203 images, 393,703 annotated face bounding boxes - Split: 40% train (12,880), 10% val (3,226), 50% test (labels not public) - 3 difficulty levels: Easy, Medium, Hard - Annotations include: bbox, blur, expression, illumination, occlusion, pose, invalid Directory structure expected: wider_face/ ├── WIDER_train/ │ └── images/ │ ├── 0--Parade/ │ ├── 1--Handshaking/ │ └── ... ├── WIDER_val/ │ └── images/ │ └── ... ├── wider_face_split/ │ ├── wider_face_train_bbx_gt.txt │ ├── wider_face_val_bbx_gt.txt │ └── ... └── retinaface_gt/ (optional, for landmarks) ├── train/ │ └── label.txt └── val/ └── label.txt """ import os import numpy as np import cv2 from typing import List, Dict, Optional, Tuple, Callable import torch from torch.utils.data import Dataset class WiderFaceDataset(Dataset): """ WIDER FACE dataset with support for: - Standard WiderFace bbox annotations - RetinaFace-format 5-point landmark annotations - Filtering invalid/tiny faces - On-the-fly augmentation """ def __init__(self, root_dir: str, split: str = 'train', transform: Optional[Callable] = None, min_face_size: int = 2, use_landmarks: bool = False, annotation_format: str = 'widerface'): """ Args: root_dir: Path to wider_face/ directory split: 'train' or 'val' transform: Augmentation callable min_face_size: Minimum face size to keep (pixels) use_landmarks: Load 5-point landmarks (requires retinaface_gt/) annotation_format: 'widerface' (standard) or 'retinaface' (with landmarks) """ self.root_dir = root_dir self.split = split self.transform = transform self.min_face_size = min_face_size self.use_landmarks = use_landmarks if annotation_format == 'retinaface' and use_landmarks: self.samples = self._load_retinaface_annotations() else: self.samples = self._load_widerface_annotations() print(f"[WiderFace {split}] Loaded {len(self.samples)} images") def _load_widerface_annotations(self) -> List[Dict]: """Load standard WiderFace bbox annotations.""" ann_file = os.path.join( self.root_dir, 'wider_face_split', f'wider_face_{self.split}_bbx_gt.txt' ) img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images') samples = [] with open(ann_file, 'r') as f: while True: filename = f.readline().strip() if not filename: break num_faces = int(f.readline().strip()) boxes = [] for _ in range(max(num_faces, 1)): line = f.readline().strip() parts = list(map(float, line.split())) if num_faces == 0: continue # Skip placeholder line for 0-face images x, y, w, h = parts[0], parts[1], parts[2], parts[3] # Filter tiny/invalid faces if w < self.min_face_size or h < self.min_face_size: continue # Convert to x1, y1, x2, y2 boxes.append([x, y, x + w, y + h]) if boxes: samples.append({ 'image_path': os.path.join(img_dir, filename), 'boxes': np.array(boxes, dtype=np.float32), 'filename': filename, }) return samples def _load_retinaface_annotations(self) -> List[Dict]: """Load RetinaFace-format annotations with 5-point landmarks.""" ann_file = os.path.join( self.root_dir, 'retinaface_gt', self.split, 'label.txt' ) img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images') samples = [] current_file = None current_boxes = [] current_lmks = [] with open(ann_file, 'r') as f: for line in f: line = line.strip() if line.startswith('#'): # Save previous image if current_file and current_boxes: samples.append({ 'image_path': os.path.join(img_dir, current_file), 'boxes': np.array(current_boxes, dtype=np.float32), 'landmarks': np.array(current_lmks, dtype=np.float32), 'filename': current_file, }) current_file = line[2:].strip() current_boxes = [] current_lmks = [] else: parts = list(map(float, line.split())) if len(parts) >= 4: x, y, w, h = parts[0], parts[1], parts[2], parts[3] if w < self.min_face_size or h < self.min_face_size: continue current_boxes.append([x, y, x + w, y + h]) if len(parts) >= 14: # 5 landmarks: (x1,y1, x2,y2, x3,y3, x4,y4, x5,y5) lmk = parts[4:14] current_lmks.append(lmk) else: current_lmks.append([-1]*10) # Invalid landmarks # Save last image if current_file and current_boxes: samples.append({ 'image_path': os.path.join(img_dir, current_file), 'boxes': np.array(current_boxes, dtype=np.float32), 'landmarks': np.array(current_lmks, dtype=np.float32), 'filename': current_file, }) return samples def __len__(self) -> int: return len(self.samples) def __getitem__(self, idx: int) -> Dict: sample = self.samples[idx] # Load image img = cv2.imread(sample['image_path']) if img is None: raise IOError(f"Failed to load image: {sample['image_path']}") img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) boxes = sample['boxes'].copy() landmarks = sample.get('landmarks', np.zeros((boxes.shape[0], 10), dtype=np.float32)).copy() # Apply augmentation if self.transform: result = self.transform(img, boxes, landmarks) img, boxes, landmarks = result['image'], result['boxes'], result['landmarks'] # Convert to tensors img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float() boxes_tensor = torch.from_numpy(boxes).float() target = { 'boxes': boxes_tensor, 'labels': torch.ones(boxes_tensor.shape[0], dtype=torch.long), } if self.use_landmarks: target['landmarks'] = torch.from_numpy(landmarks).float() return img_tensor, target @staticmethod def collate_fn(batch): """Custom collate for variable-length targets.""" images = torch.stack([item[0] for item in batch]) targets = [item[1] for item in batch] return images, targets