cledouxluma
/

facedet

Model card Files Files and versions

xet

Community

cledouxluma commited on 15 days ago

Commit

550b1d5

verified ·

1 Parent(s): 6953619

Upload data/widerface.py with huggingface_hub

Browse files

Files changed (1) hide show

data/widerface.py +205 -0

data/widerface.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""
+WiderFace Dataset Loader.
+WIDER FACE (Yang et al., 2016):
+- 32,203 images, 393,703 annotated face bounding boxes
+- Split: 40% train (12,880), 10% val (3,226), 50% test (labels not public)
+- 3 difficulty levels: Easy, Medium, Hard
+- Annotations include: bbox, blur, expression, illumination, occlusion, pose, invalid
+Directory structure expected:
+    wider_face/
+    ├── WIDER_train/
+    │   └── images/
+    │       ├── 0--Parade/
+    │       ├── 1--Handshaking/
+    │       └── ...
+    ├── WIDER_val/
+    │   └── images/
+    │       └── ...
+    ├── wider_face_split/
+    │   ├── wider_face_train_bbx_gt.txt
+    │   ├── wider_face_val_bbx_gt.txt
+    │   └── ...
+    └── retinaface_gt/  (optional, for landmarks)
+        ├── train/
+        │   └── label.txt
+        └── val/
+            └── label.txt
+"""
+import os
+import numpy as np
+import cv2
+from typing import List, Dict, Optional, Tuple, Callable
+import torch
+from torch.utils.data import Dataset
+class WiderFaceDataset(Dataset):
+    """
+    WIDER FACE dataset with support for:
+    - Standard WiderFace bbox annotations
+    - RetinaFace-format 5-point landmark annotations
+    - Filtering invalid/tiny faces
+    - On-the-fly augmentation
+    """
+    def __init__(self,
+                 root_dir: str,
+                 split: str = 'train',
+                 transform: Optional[Callable] = None,
+                 min_face_size: int = 2,
+                 use_landmarks: bool = False,
+                 annotation_format: str = 'widerface'):
+        """
+        Args:
+            root_dir: Path to wider_face/ directory
+            split: 'train' or 'val'
+            transform: Augmentation callable
+            min_face_size: Minimum face size to keep (pixels)
+            use_landmarks: Load 5-point landmarks (requires retinaface_gt/)
+            annotation_format: 'widerface' (standard) or 'retinaface' (with landmarks)
+        """
+        self.root_dir = root_dir
+        self.split = split
+        self.transform = transform
+        self.min_face_size = min_face_size
+        self.use_landmarks = use_landmarks
+        if annotation_format == 'retinaface' and use_landmarks:
+            self.samples = self._load_retinaface_annotations()
+        else:
+            self.samples = self._load_widerface_annotations()
+        print(f"[WiderFace {split}] Loaded {len(self.samples)} images")
+    def _load_widerface_annotations(self) -> List[Dict]:
+        """Load standard WiderFace bbox annotations."""
+        ann_file = os.path.join(
+            self.root_dir, 'wider_face_split',
+            f'wider_face_{self.split}_bbx_gt.txt'
+        )
+        img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images')
+        samples = []
+        with open(ann_file, 'r') as f:
+            while True:
+                filename = f.readline().strip()
+                if not filename:
+                    break
+                num_faces = int(f.readline().strip())
+                boxes = []
+                for _ in range(max(num_faces, 1)):
+                    line = f.readline().strip()
+                    parts = list(map(float, line.split()))
+                    if num_faces == 0:
+                        continue  # Skip placeholder line for 0-face images
+                    x, y, w, h = parts[0], parts[1], parts[2], parts[3]
+                    # Filter tiny/invalid faces
+                    if w < self.min_face_size or h < self.min_face_size:
+                        continue
+                    # Convert to x1, y1, x2, y2
+                    boxes.append([x, y, x + w, y + h])
+                if boxes:
+                    samples.append({
+                        'image_path': os.path.join(img_dir, filename),
+                        'boxes': np.array(boxes, dtype=np.float32),
+                        'filename': filename,
+                    })
+        return samples
+    def _load_retinaface_annotations(self) -> List[Dict]:
+        """Load RetinaFace-format annotations with 5-point landmarks."""
+        ann_file = os.path.join(
+            self.root_dir, 'retinaface_gt', self.split, 'label.txt'
+        )
+        img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images')
+        samples = []
+        current_file = None
+        current_boxes = []
+        current_lmks = []
+        with open(ann_file, 'r') as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith('#'):
+                    # Save previous image
+                    if current_file and current_boxes:
+                        samples.append({
+                            'image_path': os.path.join(img_dir, current_file),
+                            'boxes': np.array(current_boxes, dtype=np.float32),
+                            'landmarks': np.array(current_lmks, dtype=np.float32),
+                            'filename': current_file,
+                        })
+                    current_file = line[2:].strip()
+                    current_boxes = []
+                    current_lmks = []
+                else:
+                    parts = list(map(float, line.split()))
+                    if len(parts) >= 4:
+                        x, y, w, h = parts[0], parts[1], parts[2], parts[3]
+                        if w < self.min_face_size or h < self.min_face_size:
+                            continue
+                        current_boxes.append([x, y, x + w, y + h])
+                        if len(parts) >= 14:
+                            # 5 landmarks: (x1,y1, x2,y2, x3,y3, x4,y4, x5,y5)
+                            lmk = parts[4:14]
+                            current_lmks.append(lmk)
+                        else:
+                            current_lmks.append([-1]*10)  # Invalid landmarks
+        # Save last image
+        if current_file and current_boxes:
+            samples.append({
+                'image_path': os.path.join(img_dir, current_file),
+                'boxes': np.array(current_boxes, dtype=np.float32),
+                'landmarks': np.array(current_lmks, dtype=np.float32),
+                'filename': current_file,
+            })
+        return samples
+    def __len__(self) -> int:
+        return len(self.samples)
+    def __getitem__(self, idx: int) -> Dict:
+        sample = self.samples[idx]
+        # Load image
+        img = cv2.imread(sample['image_path'])
+        if img is None:
+            raise IOError(f"Failed to load image: {sample['image_path']}")
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        boxes = sample['boxes'].copy()
+        landmarks = sample.get('landmarks', np.zeros((boxes.shape[0], 10), dtype=np.float32)).copy()
+        # Apply augmentation
+        if self.transform:
+            result = self.transform(img, boxes, landmarks)
+            img, boxes, landmarks = result['image'], result['boxes'], result['landmarks']
+        # Convert to tensors
+        img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float()
+        boxes_tensor = torch.from_numpy(boxes).float()
+        target = {
+            'boxes': boxes_tensor,
+            'labels': torch.ones(boxes_tensor.shape[0], dtype=torch.long),
+        }
+        if self.use_landmarks:
+            target['landmarks'] = torch.from_numpy(landmarks).float()
+        return img_tensor, target
+    @staticmethod
+    def collate_fn(batch):
+        """Custom collate for variable-length targets."""
+        images = torch.stack([item[0] for item in batch])
+        targets = [item[1] for item in batch]
+        return images, targets