facedet / data /widerface.py
cledouxluma's picture
Upload data/widerface.py with huggingface_hub
550b1d5 verified
"""
WiderFace Dataset Loader.
WIDER FACE (Yang et al., 2016):
- 32,203 images, 393,703 annotated face bounding boxes
- Split: 40% train (12,880), 10% val (3,226), 50% test (labels not public)
- 3 difficulty levels: Easy, Medium, Hard
- Annotations include: bbox, blur, expression, illumination, occlusion, pose, invalid
Directory structure expected:
wider_face/
β”œβ”€β”€ WIDER_train/
β”‚ └── images/
β”‚ β”œβ”€β”€ 0--Parade/
β”‚ β”œβ”€β”€ 1--Handshaking/
β”‚ └── ...
β”œβ”€β”€ WIDER_val/
β”‚ └── images/
β”‚ └── ...
β”œβ”€β”€ wider_face_split/
β”‚ β”œβ”€β”€ wider_face_train_bbx_gt.txt
β”‚ β”œβ”€β”€ wider_face_val_bbx_gt.txt
β”‚ └── ...
└── retinaface_gt/ (optional, for landmarks)
β”œβ”€β”€ train/
β”‚ └── label.txt
└── val/
└── label.txt
"""
import os
import numpy as np
import cv2
from typing import List, Dict, Optional, Tuple, Callable
import torch
from torch.utils.data import Dataset
class WiderFaceDataset(Dataset):
"""
WIDER FACE dataset with support for:
- Standard WiderFace bbox annotations
- RetinaFace-format 5-point landmark annotations
- Filtering invalid/tiny faces
- On-the-fly augmentation
"""
def __init__(self,
root_dir: str,
split: str = 'train',
transform: Optional[Callable] = None,
min_face_size: int = 2,
use_landmarks: bool = False,
annotation_format: str = 'widerface'):
"""
Args:
root_dir: Path to wider_face/ directory
split: 'train' or 'val'
transform: Augmentation callable
min_face_size: Minimum face size to keep (pixels)
use_landmarks: Load 5-point landmarks (requires retinaface_gt/)
annotation_format: 'widerface' (standard) or 'retinaface' (with landmarks)
"""
self.root_dir = root_dir
self.split = split
self.transform = transform
self.min_face_size = min_face_size
self.use_landmarks = use_landmarks
if annotation_format == 'retinaface' and use_landmarks:
self.samples = self._load_retinaface_annotations()
else:
self.samples = self._load_widerface_annotations()
print(f"[WiderFace {split}] Loaded {len(self.samples)} images")
def _load_widerface_annotations(self) -> List[Dict]:
"""Load standard WiderFace bbox annotations."""
ann_file = os.path.join(
self.root_dir, 'wider_face_split',
f'wider_face_{self.split}_bbx_gt.txt'
)
img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images')
samples = []
with open(ann_file, 'r') as f:
while True:
filename = f.readline().strip()
if not filename:
break
num_faces = int(f.readline().strip())
boxes = []
for _ in range(max(num_faces, 1)):
line = f.readline().strip()
parts = list(map(float, line.split()))
if num_faces == 0:
continue # Skip placeholder line for 0-face images
x, y, w, h = parts[0], parts[1], parts[2], parts[3]
# Filter tiny/invalid faces
if w < self.min_face_size or h < self.min_face_size:
continue
# Convert to x1, y1, x2, y2
boxes.append([x, y, x + w, y + h])
if boxes:
samples.append({
'image_path': os.path.join(img_dir, filename),
'boxes': np.array(boxes, dtype=np.float32),
'filename': filename,
})
return samples
def _load_retinaface_annotations(self) -> List[Dict]:
"""Load RetinaFace-format annotations with 5-point landmarks."""
ann_file = os.path.join(
self.root_dir, 'retinaface_gt', self.split, 'label.txt'
)
img_dir = os.path.join(self.root_dir, f'WIDER_{self.split}', 'images')
samples = []
current_file = None
current_boxes = []
current_lmks = []
with open(ann_file, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('#'):
# Save previous image
if current_file and current_boxes:
samples.append({
'image_path': os.path.join(img_dir, current_file),
'boxes': np.array(current_boxes, dtype=np.float32),
'landmarks': np.array(current_lmks, dtype=np.float32),
'filename': current_file,
})
current_file = line[2:].strip()
current_boxes = []
current_lmks = []
else:
parts = list(map(float, line.split()))
if len(parts) >= 4:
x, y, w, h = parts[0], parts[1], parts[2], parts[3]
if w < self.min_face_size or h < self.min_face_size:
continue
current_boxes.append([x, y, x + w, y + h])
if len(parts) >= 14:
# 5 landmarks: (x1,y1, x2,y2, x3,y3, x4,y4, x5,y5)
lmk = parts[4:14]
current_lmks.append(lmk)
else:
current_lmks.append([-1]*10) # Invalid landmarks
# Save last image
if current_file and current_boxes:
samples.append({
'image_path': os.path.join(img_dir, current_file),
'boxes': np.array(current_boxes, dtype=np.float32),
'landmarks': np.array(current_lmks, dtype=np.float32),
'filename': current_file,
})
return samples
def __len__(self) -> int:
return len(self.samples)
def __getitem__(self, idx: int) -> Dict:
sample = self.samples[idx]
# Load image
img = cv2.imread(sample['image_path'])
if img is None:
raise IOError(f"Failed to load image: {sample['image_path']}")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
boxes = sample['boxes'].copy()
landmarks = sample.get('landmarks', np.zeros((boxes.shape[0], 10), dtype=np.float32)).copy()
# Apply augmentation
if self.transform:
result = self.transform(img, boxes, landmarks)
img, boxes, landmarks = result['image'], result['boxes'], result['landmarks']
# Convert to tensors
img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float()
boxes_tensor = torch.from_numpy(boxes).float()
target = {
'boxes': boxes_tensor,
'labels': torch.ones(boxes_tensor.shape[0], dtype=torch.long),
}
if self.use_landmarks:
target['landmarks'] = torch.from_numpy(landmarks).float()
return img_tensor, target
@staticmethod
def collate_fn(batch):
"""Custom collate for variable-length targets."""
images = torch.stack([item[0] for item in batch])
targets = [item[1] for item in batch]
return images, targets