""" PriviGaze Dataset - Synthetic Gaze Dataset Generator and MPIIGaze Loader Since gaze datasets are not readily available on HF Hub, this module provides: 1. A synthetic gaze dataset generator using UnityEyes-style rendering 2. MPIIGaze dataset loader (if dataset is available locally) The synthetic generator creates realistic face/eye crops with known gaze vectors, enabling the teacher-student distillation pipeline to be tested end-to-end. """ import os import numpy as np import torch from torch.utils.data import Dataset, DataLoader from PIL import Image, ImageFilter, ImageOps, ImageEnhance import json from pathlib import Path from typing import Optional, Tuple, Dict, List class SyntheticGazeDataset(Dataset): """Generates synthetic eye/face crops with known gaze vectors. Creates simple but realistic eye and face patterns where the gaze direction is encoded in the relative positions of pupil and iris within the eye crop. This allows end-to-end testing and training of the gaze estimation pipeline when real gaze datasets are not available. Each sample includes: - left_eye_rgb: [3, 112, 112] simulated eye with pupil position encoding gaze - right_eye_rgb: [3, 112, 112] - face_blurred_gray: [1, 224, 224] blurred grayscale face - face_gray: [1, 224, 224] light-corrected grayscale face (for student) - pitch: float (degrees, -90 to +90) - yaw: float (degrees, -90 to +90) """ def __init__( self, num_samples: int = 50000, img_size_eye: int = 112, img_size_face: int = 224, seed: int = 42, noise_level: float = 0.1, ): self.num_samples = num_samples self.img_size_eye = img_size_eye self.img_size_face = img_size_face self.noise_level = noise_level # Generate all gaze angles upfront rng = np.random.RandomState(seed) self.pitch_angles = rng.uniform(-60, 60, num_samples).astype(np.float32) self.yaw_angles = rng.uniform(-60, 60, num_samples).astype(np.float32) # Generate random iris colors self.iris_colors = rng.uniform(0.3, 0.9, (num_samples, 3)).astype(np.float32) self.skin_colors = rng.uniform(0.4, 0.9, (num_samples, 3)).astype(np.float32) def __len__(self): return self.num_samples def _generate_eye(self, pitch: float, yaw: float, iris_color: np.ndarray, eye_idx: int = 0) -> Image.Image: """Generate a synthetic eye image with pupil position encoding gaze. Args: pitch: gaze pitch angle in degrees yaw: gaze yaw angle in degrees iris_color: [3] RGB iris color eye_idx: 0 for left eye, 1 for right eye Returns: PIL Image of size (img_size_eye, img_size_eye) """ size = self.img_size_eye img = np.ones((size, size, 3), dtype=np.float32) * 0.95 # White background (sclera) # Eye oval (sclera boundary) center_y, center_x = size // 2, size // 2 y_grid, x_grid = np.ogrid[:size, :size] # Eye shape: oval eye_mask = ((x_grid - center_x) ** 2 / (size * 0.35) ** 2 + (y_grid - center_y) ** 2 / (size * 0.25) ** 2) <= 1.0 # Add slight skin around eye skin_mask = ~eye_mask skin_color = np.array([0.85, 0.7, 0.6]) # Default skin tone img[skin_mask] = skin_color * 0.9 + np.random.randn(size, size)[..., None][skin_mask] * 0.02 # Iris circle iris_radius = size * 0.18 # Pupil position: yaw moves left/right, pitch moves up/down # Scale: max displacement = iris can move within eye oval max_displacement = size * 0.12 pupil_dx = yaw / 90.0 * max_displacement # Positive yaw = looking right = pupil right pupil_dy = -pitch / 90.0 * max_displacement # Positive pitch = looking up = pupil up iris_cy = center_y + int(pupil_dy) iris_cx = center_x + int(pupil_dx) # Create iris mask iris_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2 iris_mask = iris_mask & eye_mask # Clip to eye boundary # Fill iris with color img[iris_mask] = iris_color # Pupil (black circle in center of iris) pupil_radius = iris_radius * 0.4 pupil_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_radius ** 2 img[pupil_mask] = np.array([0.05, 0.05, 0.05]) # Specular highlight (reflection) highlight_radius = iris_radius * 0.15 highlight_cy = iris_cy - int(iris_radius * 0.3) highlight_cx = iris_cx - int(iris_radius * 0.2) highlight_mask = (x_grid - highlight_cx) ** 2 + (y_grid - highlight_cy) ** 2 <= highlight_radius ** 2 img[highlight_mask] = np.clip(img[highlight_mask] + 0.3, 0, 1.0) # Eyelids (top and bottom) eyelid_thickness = 0.15 top_lid_mask = (y_grid - center_y) / (size * 0.25) < -0.7 + eyelid_thickness bottom_lid_mask = (y_grid - center_y) / (size * 0.25) > 0.7 - eyelid_thickness eyelid_color = skin_color * 0.85 img[top_lid_mask & eye_mask] = eyelid_color img[bottom_lid_mask & eye_mask] = eyelid_color # Add noise noise = np.random.randn(size, size, 3) * self.noise_level img = np.clip(img + noise, 0, 1.0) # Convert to PIL img_uint8 = (img * 255).astype(np.uint8) return Image.fromarray(img_uint8) def _generate_face(self, pitch: float, yaw: float, skin_color: np.ndarray) -> Image.Image: """Generate a simple face-like pattern. The face contains both eyes positioned according to gaze direction, providing the geometric information that the teacher model uses (via blurred version) and the student must learn from directly. """ size = self.img_size_face img = np.ones((size, size, 3), dtype=np.float32) * skin_color center_y, center_x = size // 2, size // 2 # Simple oval face shape y_grid, x_grid = np.ogrid[:size, :size] face_mask = ((x_grid - center_x) ** 2 / (size * 0.38) ** 2 + (y_grid - center_y) ** 2 / (size * 0.45) ** 2) <= 1.0 # Background img[~face_mask] = np.array([0.3, 0.3, 0.35]) # Eye positions on face (further apart, higher up) left_eye_cx = center_x - int(size * 0.12) right_eye_cx = center_x + int(size * 0.12) eye_cy = center_y - int(size * 0.08) # Gaze-displaced pupil positions on each eye displacement = size * 0.02 pupil_dx = yaw / 90.0 * displacement pupil_dy = -pitch / 90.0 * displacement # Draw eyes on face eye_size = size * 0.06 for eye_cx in [left_eye_cx, right_eye_cx]: # Eye white eye_white = (x_grid - eye_cx) ** 2 + (y_grid - eye_cy) ** 2 <= eye_size ** 2 img[eye_white] = np.array([0.95, 0.95, 0.95]) # Iris iris_radius = eye_size * 0.5 iris_cy = eye_cy + int(pupil_dy) iris_cx = eye_cx + int(pupil_dx) iris = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2 img[iris] = np.array([0.3, 0.5, 0.7]) # Pupil pupil_r = iris_radius * 0.4 pupil = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_r ** 2 img[pupil] = np.array([0.05, 0.05, 0.05]) # Nose hint nose_cx, nose_cy = center_x, center_y + int(size * 0.1) nose = (x_grid - nose_cx) ** 2 + (y_grid - nose_cy) ** 2 <= (size * 0.03) ** 2 img[nose] = skin_color * 0.85 # Add noise noise = np.random.randn(size, size, 3) * self.noise_level img = np.clip(img + noise, 0, 1.0) img_uint8 = (img * 255).astype(np.uint8) return Image.fromarray(img_uint8) def __getitem__(self, idx): pitch = float(self.pitch_angles[idx]) yaw = float(self.yaw_angles[idx]) iris_color = self.iris_colors[idx] skin_color = self.skin_colors[idx] # Generate left and right eyes # Left eye: slightly different iris color for realism left_eye = self._generate_eye(pitch, yaw, iris_color, eye_idx=0) right_eye = self._generate_eye(pitch, yaw, iris_color * 0.95, eye_idx=1) # Generate face face_rgb = self._generate_face(pitch, yaw, skin_color) # Create blurred grayscale face (teacher input - only geometric info) face_gray = ImageOps.grayscale(face_rgb) face_blurred = face_gray.filter(ImageFilter.GaussianBlur(radius=8.0)) # Create light-corrected grayscale face (student input) # Simulate varied lighting by adjusting brightness/contrast enhancer = ImageEnhance.Brightness(face_gray) face_light_corrected = enhancer.enhance(0.8 + 0.4 * np.random.random()) enhancer = ImageEnhance.Contrast(face_light_corrected) face_light_corrected = enhancer.enhance(0.9 + 0.2 * np.random.random()) # Convert to tensors left_eye_tensor = torch.from_numpy(np.array(left_eye)).permute(2, 0, 1).float() / 255.0 right_eye_tensor = torch.from_numpy(np.array(right_eye)).permute(2, 0, 1).float() / 255.0 face_blurred_tensor = torch.from_numpy(np.array(face_blurred)).unsqueeze(0).float() / 255.0 face_light_tensor = torch.from_numpy(np.array(face_light_corrected)).unsqueeze(0).float() / 255.0 # Normalize to [-1, 1] left_eye_tensor = left_eye_tensor * 2 - 1 right_eye_tensor = right_eye_tensor * 2 - 1 face_blurred_tensor = face_blurred_tensor * 2 - 1 face_light_tensor = face_light_tensor * 2 - 1 return { 'left_eye': left_eye_tensor, # [3, 112, 112] 'right_eye': right_eye_tensor, # [3, 112, 112] 'face_blurred_gray': face_blurred_tensor, # [1, 224, 224] 'face_gray': face_light_tensor, # [1, 224, 224] 'pitch': torch.tensor(pitch), 'yaw': torch.tensor(yaw), } class MPIIGazeDataset(Dataset): """Loader for MPIIGaze/MPIIFaceGaze dataset. MPIIFaceGaze contains: - Face images normalized to 224x224 - Left and right eye patches extracted from face images - 3D gaze direction vectors Dataset format: HDF5 files with keys: - 'image': face image [224, 224, 3] - 'left_eye': left eye patch [varies, varies, 3] - 'right_eye': right eye patch [varies, varies, 3] - 'gaze': gaze vector [3] (unit vector in camera coordinate system) - 'head_pose': head rotation vector [3] """ def __init__( self, data_dir: str, split: str = 'train', img_size_eye: int = 112, img_size_face: int = 224, transform=None, ): self.data_dir = Path(data_dir) self.split = split self.img_size_eye = img_size_eye self.img_size_face = img_size_face self.transform = transform # Load data indices self.samples = self._load_samples() def _load_samples(self) -> List[Dict]: """Load sample metadata from the dataset.""" samples = [] # Implementation depends on actual dataset format # For MPIIGaze: scans .mat or .h5 files # This is a placeholder - fill in based on actual data data_path = self.data_dir / self.split if not data_path.exists(): raise FileNotFoundError(f"Data directory not found: {data_path}") # TODO: Implement actual MPIIGaze loading # See: https://github.com/hysts/pytorch_mpiigaze for reference return samples def _gaze_to_angles(self, gaze_vector: np.ndarray) -> Tuple[float, float]: """Convert 3D gaze direction vector to pitch/yaw angles.""" # Gaze vector is [x, y, z] in camera coordinates # Z points forward, X right, Y down x, y, z = gaze_vector # Yaw: rotation around Y axis (left-right) yaw = np.arctan2(x, z) * 180.0 / np.pi # Pitch: rotation around X axis (up-down) pitch = np.arctan2(-y, np.sqrt(x**2 + z**2)) * 180.0 / np.pi return float(pitch), float(yaw) def __len__(self): return len(self.samples) def __getitem__(self, idx): # Placeholder - implement based on actual data format raise NotImplementedError( "MPIIGaze dataset loader requires the actual dataset files. " "Use SyntheticGazeDataset for development and testing." ) def create_dataloaders( num_train: int = 40000, num_val: int = 5000, num_test: int = 5000, batch_size: int = 64, num_workers: int = 4, seed: int = 42, ): """Create train/val/test dataloaders with synthetic data.""" train_dataset = SyntheticGazeDataset( num_samples=num_train, seed=seed, noise_level=0.08, ) val_dataset = SyntheticGazeDataset( num_samples=num_val, seed=seed + 1, noise_level=0.05, ) test_dataset = SyntheticGazeDataset( num_samples=num_test, seed=seed + 2, noise_level=0.05, ) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True, ) val_loader = DataLoader( val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, ) return train_loader, val_loader, test_loader