| """ |
| PriviGaze Dataset - Synthetic Gaze Dataset Generator and MPIIGaze Loader |
| |
| Since gaze datasets are not readily available on HF Hub, this module provides: |
| 1. A synthetic gaze dataset generator using UnityEyes-style rendering |
| 2. MPIIGaze dataset loader (if dataset is available locally) |
| |
| The synthetic generator creates realistic face/eye crops with known gaze vectors, |
| enabling the teacher-student distillation pipeline to be tested end-to-end. |
| """ |
|
|
| import os |
| import numpy as np |
| import torch |
| from torch.utils.data import Dataset, DataLoader |
| from PIL import Image, ImageFilter, ImageOps, ImageEnhance |
| import json |
| from pathlib import Path |
| from typing import Optional, Tuple, Dict, List |
|
|
|
|
| class SyntheticGazeDataset(Dataset): |
| """Generates synthetic eye/face crops with known gaze vectors. |
| |
| Creates simple but realistic eye and face patterns where the gaze direction |
| is encoded in the relative positions of pupil and iris within the eye crop. |
| |
| This allows end-to-end testing and training of the gaze estimation pipeline |
| when real gaze datasets are not available. |
| |
| Each sample includes: |
| - left_eye_rgb: [3, 112, 112] simulated eye with pupil position encoding gaze |
| - right_eye_rgb: [3, 112, 112] |
| - face_blurred_gray: [1, 224, 224] blurred grayscale face |
| - face_gray: [1, 224, 224] light-corrected grayscale face (for student) |
| - pitch: float (degrees, -90 to +90) |
| - yaw: float (degrees, -90 to +90) |
| """ |
| |
| def __init__( |
| self, |
| num_samples: int = 50000, |
| img_size_eye: int = 112, |
| img_size_face: int = 224, |
| seed: int = 42, |
| noise_level: float = 0.1, |
| ): |
| self.num_samples = num_samples |
| self.img_size_eye = img_size_eye |
| self.img_size_face = img_size_face |
| self.noise_level = noise_level |
| |
| |
| rng = np.random.RandomState(seed) |
| self.pitch_angles = rng.uniform(-60, 60, num_samples).astype(np.float32) |
| self.yaw_angles = rng.uniform(-60, 60, num_samples).astype(np.float32) |
| |
| |
| self.iris_colors = rng.uniform(0.3, 0.9, (num_samples, 3)).astype(np.float32) |
| self.skin_colors = rng.uniform(0.4, 0.9, (num_samples, 3)).astype(np.float32) |
| |
| def __len__(self): |
| return self.num_samples |
| |
| def _generate_eye(self, pitch: float, yaw: float, iris_color: np.ndarray, |
| eye_idx: int = 0) -> Image.Image: |
| """Generate a synthetic eye image with pupil position encoding gaze. |
| |
| Args: |
| pitch: gaze pitch angle in degrees |
| yaw: gaze yaw angle in degrees |
| iris_color: [3] RGB iris color |
| eye_idx: 0 for left eye, 1 for right eye |
| |
| Returns: |
| PIL Image of size (img_size_eye, img_size_eye) |
| """ |
| size = self.img_size_eye |
| img = np.ones((size, size, 3), dtype=np.float32) * 0.95 |
| |
| |
| center_y, center_x = size // 2, size // 2 |
| y_grid, x_grid = np.ogrid[:size, :size] |
| |
| |
| eye_mask = ((x_grid - center_x) ** 2 / (size * 0.35) ** 2 + |
| (y_grid - center_y) ** 2 / (size * 0.25) ** 2) <= 1.0 |
| |
| |
| skin_mask = ~eye_mask |
| skin_color = np.array([0.85, 0.7, 0.6]) |
| img[skin_mask] = skin_color * 0.9 + np.random.randn(size, size)[..., None][skin_mask] * 0.02 |
| |
| |
| iris_radius = size * 0.18 |
| |
| |
| |
| max_displacement = size * 0.12 |
| pupil_dx = yaw / 90.0 * max_displacement |
| pupil_dy = -pitch / 90.0 * max_displacement |
| |
| iris_cy = center_y + int(pupil_dy) |
| iris_cx = center_x + int(pupil_dx) |
| |
| |
| iris_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2 |
| iris_mask = iris_mask & eye_mask |
| |
| |
| img[iris_mask] = iris_color |
| |
| |
| pupil_radius = iris_radius * 0.4 |
| pupil_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_radius ** 2 |
| img[pupil_mask] = np.array([0.05, 0.05, 0.05]) |
| |
| |
| highlight_radius = iris_radius * 0.15 |
| highlight_cy = iris_cy - int(iris_radius * 0.3) |
| highlight_cx = iris_cx - int(iris_radius * 0.2) |
| highlight_mask = (x_grid - highlight_cx) ** 2 + (y_grid - highlight_cy) ** 2 <= highlight_radius ** 2 |
| img[highlight_mask] = np.clip(img[highlight_mask] + 0.3, 0, 1.0) |
| |
| |
| eyelid_thickness = 0.15 |
| top_lid_mask = (y_grid - center_y) / (size * 0.25) < -0.7 + eyelid_thickness |
| bottom_lid_mask = (y_grid - center_y) / (size * 0.25) > 0.7 - eyelid_thickness |
| eyelid_color = skin_color * 0.85 |
| img[top_lid_mask & eye_mask] = eyelid_color |
| img[bottom_lid_mask & eye_mask] = eyelid_color |
| |
| |
| noise = np.random.randn(size, size, 3) * self.noise_level |
| img = np.clip(img + noise, 0, 1.0) |
| |
| |
| img_uint8 = (img * 255).astype(np.uint8) |
| return Image.fromarray(img_uint8) |
| |
| def _generate_face(self, pitch: float, yaw: float, skin_color: np.ndarray) -> Image.Image: |
| """Generate a simple face-like pattern. |
| |
| The face contains both eyes positioned according to gaze direction, |
| providing the geometric information that the teacher model uses |
| (via blurred version) and the student must learn from directly. |
| """ |
| size = self.img_size_face |
| img = np.ones((size, size, 3), dtype=np.float32) * skin_color |
| |
| center_y, center_x = size // 2, size // 2 |
| |
| |
| y_grid, x_grid = np.ogrid[:size, :size] |
| face_mask = ((x_grid - center_x) ** 2 / (size * 0.38) ** 2 + |
| (y_grid - center_y) ** 2 / (size * 0.45) ** 2) <= 1.0 |
| |
| |
| img[~face_mask] = np.array([0.3, 0.3, 0.35]) |
| |
| |
| left_eye_cx = center_x - int(size * 0.12) |
| right_eye_cx = center_x + int(size * 0.12) |
| eye_cy = center_y - int(size * 0.08) |
| |
| |
| displacement = size * 0.02 |
| pupil_dx = yaw / 90.0 * displacement |
| pupil_dy = -pitch / 90.0 * displacement |
| |
| |
| eye_size = size * 0.06 |
| for eye_cx in [left_eye_cx, right_eye_cx]: |
| |
| eye_white = (x_grid - eye_cx) ** 2 + (y_grid - eye_cy) ** 2 <= eye_size ** 2 |
| img[eye_white] = np.array([0.95, 0.95, 0.95]) |
| |
| |
| iris_radius = eye_size * 0.5 |
| iris_cy = eye_cy + int(pupil_dy) |
| iris_cx = eye_cx + int(pupil_dx) |
| iris = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2 |
| img[iris] = np.array([0.3, 0.5, 0.7]) |
| |
| |
| pupil_r = iris_radius * 0.4 |
| pupil = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_r ** 2 |
| img[pupil] = np.array([0.05, 0.05, 0.05]) |
| |
| |
| nose_cx, nose_cy = center_x, center_y + int(size * 0.1) |
| nose = (x_grid - nose_cx) ** 2 + (y_grid - nose_cy) ** 2 <= (size * 0.03) ** 2 |
| img[nose] = skin_color * 0.85 |
| |
| |
| noise = np.random.randn(size, size, 3) * self.noise_level |
| img = np.clip(img + noise, 0, 1.0) |
| |
| img_uint8 = (img * 255).astype(np.uint8) |
| return Image.fromarray(img_uint8) |
| |
| def __getitem__(self, idx): |
| pitch = float(self.pitch_angles[idx]) |
| yaw = float(self.yaw_angles[idx]) |
| iris_color = self.iris_colors[idx] |
| skin_color = self.skin_colors[idx] |
| |
| |
| |
| left_eye = self._generate_eye(pitch, yaw, iris_color, eye_idx=0) |
| right_eye = self._generate_eye(pitch, yaw, iris_color * 0.95, eye_idx=1) |
| |
| |
| face_rgb = self._generate_face(pitch, yaw, skin_color) |
| |
| |
| face_gray = ImageOps.grayscale(face_rgb) |
| face_blurred = face_gray.filter(ImageFilter.GaussianBlur(radius=8.0)) |
| |
| |
| |
| enhancer = ImageEnhance.Brightness(face_gray) |
| face_light_corrected = enhancer.enhance(0.8 + 0.4 * np.random.random()) |
| enhancer = ImageEnhance.Contrast(face_light_corrected) |
| face_light_corrected = enhancer.enhance(0.9 + 0.2 * np.random.random()) |
| |
| |
| left_eye_tensor = torch.from_numpy(np.array(left_eye)).permute(2, 0, 1).float() / 255.0 |
| right_eye_tensor = torch.from_numpy(np.array(right_eye)).permute(2, 0, 1).float() / 255.0 |
| face_blurred_tensor = torch.from_numpy(np.array(face_blurred)).unsqueeze(0).float() / 255.0 |
| face_light_tensor = torch.from_numpy(np.array(face_light_corrected)).unsqueeze(0).float() / 255.0 |
| |
| |
| left_eye_tensor = left_eye_tensor * 2 - 1 |
| right_eye_tensor = right_eye_tensor * 2 - 1 |
| face_blurred_tensor = face_blurred_tensor * 2 - 1 |
| face_light_tensor = face_light_tensor * 2 - 1 |
| |
| return { |
| 'left_eye': left_eye_tensor, |
| 'right_eye': right_eye_tensor, |
| 'face_blurred_gray': face_blurred_tensor, |
| 'face_gray': face_light_tensor, |
| 'pitch': torch.tensor(pitch), |
| 'yaw': torch.tensor(yaw), |
| } |
|
|
|
|
| class MPIIGazeDataset(Dataset): |
| """Loader for MPIIGaze/MPIIFaceGaze dataset. |
| |
| MPIIFaceGaze contains: |
| - Face images normalized to 224x224 |
| - Left and right eye patches extracted from face images |
| - 3D gaze direction vectors |
| |
| Dataset format: HDF5 files with keys: |
| - 'image': face image [224, 224, 3] |
| - 'left_eye': left eye patch [varies, varies, 3] |
| - 'right_eye': right eye patch [varies, varies, 3] |
| - 'gaze': gaze vector [3] (unit vector in camera coordinate system) |
| - 'head_pose': head rotation vector [3] |
| """ |
| |
| def __init__( |
| self, |
| data_dir: str, |
| split: str = 'train', |
| img_size_eye: int = 112, |
| img_size_face: int = 224, |
| transform=None, |
| ): |
| self.data_dir = Path(data_dir) |
| self.split = split |
| self.img_size_eye = img_size_eye |
| self.img_size_face = img_size_face |
| self.transform = transform |
| |
| |
| self.samples = self._load_samples() |
| |
| def _load_samples(self) -> List[Dict]: |
| """Load sample metadata from the dataset.""" |
| samples = [] |
| |
| |
| |
| data_path = self.data_dir / self.split |
| if not data_path.exists(): |
| raise FileNotFoundError(f"Data directory not found: {data_path}") |
| |
| |
| |
| return samples |
| |
| def _gaze_to_angles(self, gaze_vector: np.ndarray) -> Tuple[float, float]: |
| """Convert 3D gaze direction vector to pitch/yaw angles.""" |
| |
| |
| x, y, z = gaze_vector |
| |
| |
| yaw = np.arctan2(x, z) * 180.0 / np.pi |
| |
| |
| pitch = np.arctan2(-y, np.sqrt(x**2 + z**2)) * 180.0 / np.pi |
| |
| return float(pitch), float(yaw) |
| |
| def __len__(self): |
| return len(self.samples) |
| |
| def __getitem__(self, idx): |
| |
| raise NotImplementedError( |
| "MPIIGaze dataset loader requires the actual dataset files. " |
| "Use SyntheticGazeDataset for development and testing." |
| ) |
|
|
|
|
| def create_dataloaders( |
| num_train: int = 40000, |
| num_val: int = 5000, |
| num_test: int = 5000, |
| batch_size: int = 64, |
| num_workers: int = 4, |
| seed: int = 42, |
| ): |
| """Create train/val/test dataloaders with synthetic data.""" |
| |
| train_dataset = SyntheticGazeDataset( |
| num_samples=num_train, |
| seed=seed, |
| noise_level=0.08, |
| ) |
| |
| val_dataset = SyntheticGazeDataset( |
| num_samples=num_val, |
| seed=seed + 1, |
| noise_level=0.05, |
| ) |
| |
| test_dataset = SyntheticGazeDataset( |
| num_samples=num_test, |
| seed=seed + 2, |
| noise_level=0.05, |
| ) |
| |
| train_loader = DataLoader( |
| train_dataset, |
| batch_size=batch_size, |
| shuffle=True, |
| num_workers=num_workers, |
| pin_memory=True, |
| drop_last=True, |
| ) |
| |
| val_loader = DataLoader( |
| val_dataset, |
| batch_size=batch_size, |
| shuffle=False, |
| num_workers=num_workers, |
| pin_memory=True, |
| ) |
| |
| test_loader = DataLoader( |
| test_dataset, |
| batch_size=batch_size, |
| shuffle=False, |
| num_workers=num_workers, |
| pin_memory=True, |
| ) |
| |
| return train_loader, val_loader, test_loader |
|
|