BcantCode
/

privi-gaze-distill

Model card Files Files and versions

xet

Community

BcantCode commited on 5 days ago

Commit

b39769f

verified ·

1 Parent(s): 94cb2c0

Upload models/dataset.py

Browse files

Files changed (1) hide show

models/dataset.py +374 -0

models/dataset.py ADDED Viewed

	@@ -0,0 +1,374 @@

+"""
+PriviGaze Dataset - Synthetic Gaze Dataset Generator and MPIIGaze Loader
+Since gaze datasets are not readily available on HF Hub, this module provides:
+1. A synthetic gaze dataset generator using UnityEyes-style rendering
+2. MPIIGaze dataset loader (if dataset is available locally)
+The synthetic generator creates realistic face/eye crops with known gaze vectors,
+enabling the teacher-student distillation pipeline to be tested end-to-end.
+"""
+import os
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader
+from PIL import Image, ImageFilter, ImageOps, ImageEnhance
+import json
+from pathlib import Path
+from typing import Optional, Tuple, Dict, List
+class SyntheticGazeDataset(Dataset):
+    """Generates synthetic eye/face crops with known gaze vectors.
+    Creates simple but realistic eye and face patterns where the gaze direction
+    is encoded in the relative positions of pupil and iris within the eye crop.
+    This allows end-to-end testing and training of the gaze estimation pipeline
+    when real gaze datasets are not available.
+    Each sample includes:
+    - left_eye_rgb: [3, 112, 112] simulated eye with pupil position encoding gaze
+    - right_eye_rgb: [3, 112, 112]
+    - face_blurred_gray: [1, 224, 224] blurred grayscale face
+    - face_gray: [1, 224, 224] light-corrected grayscale face (for student)
+    - pitch: float (degrees, -90 to +90)
+    - yaw: float (degrees, -90 to +90)
+    """
+    def __init__(
+        self,
+        num_samples: int = 50000,
+        img_size_eye: int = 112,
+        img_size_face: int = 224,
+        seed: int = 42,
+        noise_level: float = 0.1,
+    ):
+        self.num_samples = num_samples
+        self.img_size_eye = img_size_eye
+        self.img_size_face = img_size_face
+        self.noise_level = noise_level
+        # Generate all gaze angles upfront
+        rng = np.random.RandomState(seed)
+        self.pitch_angles = rng.uniform(-60, 60, num_samples).astype(np.float32)
+        self.yaw_angles = rng.uniform(-60, 60, num_samples).astype(np.float32)
+        # Generate random iris colors
+        self.iris_colors = rng.uniform(0.3, 0.9, (num_samples, 3)).astype(np.float32)
+        self.skin_colors = rng.uniform(0.4, 0.9, (num_samples, 3)).astype(np.float32)
+    def __len__(self):
+        return self.num_samples
+    def _generate_eye(self, pitch: float, yaw: float, iris_color: np.ndarray,
+                      eye_idx: int = 0) -> Image.Image:
+        """Generate a synthetic eye image with pupil position encoding gaze.
+        Args:
+            pitch: gaze pitch angle in degrees
+            yaw: gaze yaw angle in degrees
+            iris_color: [3] RGB iris color
+            eye_idx: 0 for left eye, 1 for right eye
+        Returns:
+            PIL Image of size (img_size_eye, img_size_eye)
+        """
+        size = self.img_size_eye
+        img = np.ones((size, size, 3), dtype=np.float32) * 0.95  # White background (sclera)
+        # Eye oval (sclera boundary)
+        center_y, center_x = size // 2, size // 2
+        y_grid, x_grid = np.ogrid[:size, :size]
+        # Eye shape: oval
+        eye_mask = ((x_grid - center_x) ** 2 / (size * 0.35) ** 2 +
+                     (y_grid - center_y) ** 2 / (size * 0.25) ** 2) <= 1.0
+        # Add slight skin around eye
+        skin_mask = ~eye_mask
+        skin_color = np.array([0.85, 0.7, 0.6])  # Default skin tone
+        img[skin_mask] = skin_color * 0.9 + np.random.randn(size, size)[..., None][skin_mask] * 0.02
+        # Iris circle
+        iris_radius = size * 0.18
+        # Pupil position: yaw moves left/right, pitch moves up/down
+        # Scale: max displacement = iris can move within eye oval
+        max_displacement = size * 0.12
+        pupil_dx = yaw / 90.0 * max_displacement  # Positive yaw = looking right = pupil right
+        pupil_dy = -pitch / 90.0 * max_displacement  # Positive pitch = looking up = pupil up
+        iris_cy = center_y + int(pupil_dy)
+        iris_cx = center_x + int(pupil_dx)
+        # Create iris mask
+        iris_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2
+        iris_mask = iris_mask & eye_mask  # Clip to eye boundary
+        # Fill iris with color
+        img[iris_mask] = iris_color
+        # Pupil (black circle in center of iris)
+        pupil_radius = iris_radius * 0.4
+        pupil_mask = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_radius ** 2
+        img[pupil_mask] = np.array([0.05, 0.05, 0.05])
+        # Specular highlight (reflection)
+        highlight_radius = iris_radius * 0.15
+        highlight_cy = iris_cy - int(iris_radius * 0.3)
+        highlight_cx = iris_cx - int(iris_radius * 0.2)
+        highlight_mask = (x_grid - highlight_cx) ** 2 + (y_grid - highlight_cy) ** 2 <= highlight_radius ** 2
+        img[highlight_mask] = np.clip(img[highlight_mask] + 0.3, 0, 1.0)
+        # Eyelids (top and bottom)
+        eyelid_thickness = 0.15
+        top_lid_mask = (y_grid - center_y) / (size * 0.25) < -0.7 + eyelid_thickness
+        bottom_lid_mask = (y_grid - center_y) / (size * 0.25) > 0.7 - eyelid_thickness
+        eyelid_color = skin_color * 0.85
+        img[top_lid_mask & eye_mask] = eyelid_color
+        img[bottom_lid_mask & eye_mask] = eyelid_color
+        # Add noise
+        noise = np.random.randn(size, size, 3) * self.noise_level
+        img = np.clip(img + noise, 0, 1.0)
+        # Convert to PIL
+        img_uint8 = (img * 255).astype(np.uint8)
+        return Image.fromarray(img_uint8)
+    def _generate_face(self, pitch: float, yaw: float, skin_color: np.ndarray) -> Image.Image:
+        """Generate a simple face-like pattern.
+        The face contains both eyes positioned according to gaze direction,
+        providing the geometric information that the teacher model uses
+        (via blurred version) and the student must learn from directly.
+        """
+        size = self.img_size_face
+        img = np.ones((size, size, 3), dtype=np.float32) * skin_color
+        center_y, center_x = size // 2, size // 2
+        # Simple oval face shape
+        y_grid, x_grid = np.ogrid[:size, :size]
+        face_mask = ((x_grid - center_x) ** 2 / (size * 0.38) ** 2 +
+                      (y_grid - center_y) ** 2 / (size * 0.45) ** 2) <= 1.0
+        # Background
+        img[~face_mask] = np.array([0.3, 0.3, 0.35])
+        # Eye positions on face (further apart, higher up)
+        left_eye_cx = center_x - int(size * 0.12)
+        right_eye_cx = center_x + int(size * 0.12)
+        eye_cy = center_y - int(size * 0.08)
+        # Gaze-displaced pupil positions on each eye
+        displacement = size * 0.02
+        pupil_dx = yaw / 90.0 * displacement
+        pupil_dy = -pitch / 90.0 * displacement
+        # Draw eyes on face
+        eye_size = size * 0.06
+        for eye_cx in [left_eye_cx, right_eye_cx]:
+            # Eye white
+            eye_white = (x_grid - eye_cx) ** 2 + (y_grid - eye_cy) ** 2 <= eye_size ** 2
+            img[eye_white] = np.array([0.95, 0.95, 0.95])
+            # Iris
+            iris_radius = eye_size * 0.5
+            iris_cy = eye_cy + int(pupil_dy)
+            iris_cx = eye_cx + int(pupil_dx)
+            iris = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= iris_radius ** 2
+            img[iris] = np.array([0.3, 0.5, 0.7])
+            # Pupil
+            pupil_r = iris_radius * 0.4
+            pupil = (x_grid - iris_cx) ** 2 + (y_grid - iris_cy) ** 2 <= pupil_r ** 2
+            img[pupil] = np.array([0.05, 0.05, 0.05])
+        # Nose hint
+        nose_cx, nose_cy = center_x, center_y + int(size * 0.1)
+        nose = (x_grid - nose_cx) ** 2 + (y_grid - nose_cy) ** 2 <= (size * 0.03) ** 2
+        img[nose] = skin_color * 0.85
+        # Add noise
+        noise = np.random.randn(size, size, 3) * self.noise_level
+        img = np.clip(img + noise, 0, 1.0)
+        img_uint8 = (img * 255).astype(np.uint8)
+        return Image.fromarray(img_uint8)
+    def __getitem__(self, idx):
+        pitch = float(self.pitch_angles[idx])
+        yaw = float(self.yaw_angles[idx])
+        iris_color = self.iris_colors[idx]
+        skin_color = self.skin_colors[idx]
+        # Generate left and right eyes
+        # Left eye: slightly different iris color for realism
+        left_eye = self._generate_eye(pitch, yaw, iris_color, eye_idx=0)
+        right_eye = self._generate_eye(pitch, yaw, iris_color * 0.95, eye_idx=1)
+        # Generate face
+        face_rgb = self._generate_face(pitch, yaw, skin_color)
+        # Create blurred grayscale face (teacher input - only geometric info)
+        face_gray = ImageOps.grayscale(face_rgb)
+        face_blurred = face_gray.filter(ImageFilter.GaussianBlur(radius=8.0))
+        # Create light-corrected grayscale face (student input)
+        # Simulate varied lighting by adjusting brightness/contrast
+        enhancer = ImageEnhance.Brightness(face_gray)
+        face_light_corrected = enhancer.enhance(0.8 + 0.4 * np.random.random())
+        enhancer = ImageEnhance.Contrast(face_light_corrected)
+        face_light_corrected = enhancer.enhance(0.9 + 0.2 * np.random.random())
+        # Convert to tensors
+        left_eye_tensor = torch.from_numpy(np.array(left_eye)).permute(2, 0, 1).float() / 255.0
+        right_eye_tensor = torch.from_numpy(np.array(right_eye)).permute(2, 0, 1).float() / 255.0
+        face_blurred_tensor = torch.from_numpy(np.array(face_blurred)).unsqueeze(0).float() / 255.0
+        face_light_tensor = torch.from_numpy(np.array(face_light_corrected)).unsqueeze(0).float() / 255.0
+        # Normalize to [-1, 1]
+        left_eye_tensor = left_eye_tensor * 2 - 1
+        right_eye_tensor = right_eye_tensor * 2 - 1
+        face_blurred_tensor = face_blurred_tensor * 2 - 1
+        face_light_tensor = face_light_tensor * 2 - 1
+        return {
+            'left_eye': left_eye_tensor,       # [3, 112, 112]
+            'right_eye': right_eye_tensor,      # [3, 112, 112]
+            'face_blurred_gray': face_blurred_tensor,  # [1, 224, 224]
+            'face_gray': face_light_tensor,      # [1, 224, 224]
+            'pitch': torch.tensor(pitch),
+            'yaw': torch.tensor(yaw),
+        }
+class MPIIGazeDataset(Dataset):
+    """Loader for MPIIGaze/MPIIFaceGaze dataset.
+    MPIIFaceGaze contains:
+    - Face images normalized to 224x224
+    - Left and right eye patches extracted from face images
+    - 3D gaze direction vectors
+    Dataset format: HDF5 files with keys:
+    - 'image': face image [224, 224, 3]
+    - 'left_eye': left eye patch [varies, varies, 3]
+    - 'right_eye': right eye patch [varies, varies, 3]
+    - 'gaze': gaze vector [3] (unit vector in camera coordinate system)
+    - 'head_pose': head rotation vector [3]
+    """
+    def __init__(
+        self,
+        data_dir: str,
+        split: str = 'train',
+        img_size_eye: int = 112,
+        img_size_face: int = 224,
+        transform=None,
+    ):
+        self.data_dir = Path(data_dir)
+        self.split = split
+        self.img_size_eye = img_size_eye
+        self.img_size_face = img_size_face
+        self.transform = transform
+        # Load data indices
+        self.samples = self._load_samples()
+    def _load_samples(self) -> List[Dict]:
+        """Load sample metadata from the dataset."""
+        samples = []
+        # Implementation depends on actual dataset format
+        # For MPIIGaze: scans .mat or .h5 files
+        # This is a placeholder - fill in based on actual data
+        data_path = self.data_dir / self.split
+        if not data_path.exists():
+            raise FileNotFoundError(f"Data directory not found: {data_path}")
+        # TODO: Implement actual MPIIGaze loading
+        # See: https://github.com/hysts/pytorch_mpiigaze for reference
+        return samples
+    def _gaze_to_angles(self, gaze_vector: np.ndarray) -> Tuple[float, float]:
+        """Convert 3D gaze direction vector to pitch/yaw angles."""
+        # Gaze vector is [x, y, z] in camera coordinates
+        # Z points forward, X right, Y down
+        x, y, z = gaze_vector
+        # Yaw: rotation around Y axis (left-right)
+        yaw = np.arctan2(x, z) * 180.0 / np.pi
+        # Pitch: rotation around X axis (up-down)
+        pitch = np.arctan2(-y, np.sqrt(x**2 + z**2)) * 180.0 / np.pi
+        return float(pitch), float(yaw)
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, idx):
+        # Placeholder - implement based on actual data format
+        raise NotImplementedError(
+            "MPIIGaze dataset loader requires the actual dataset files. "
+            "Use SyntheticGazeDataset for development and testing."
+        )
+def create_dataloaders(
+    num_train: int = 40000,
+    num_val: int = 5000,
+    num_test: int = 5000,
+    batch_size: int = 64,
+    num_workers: int = 4,
+    seed: int = 42,
+):
+    """Create train/val/test dataloaders with synthetic data."""
+    train_dataset = SyntheticGazeDataset(
+        num_samples=num_train,
+        seed=seed,
+        noise_level=0.08,
+    )
+    val_dataset = SyntheticGazeDataset(
+        num_samples=num_val,
+        seed=seed + 1,
+        noise_level=0.05,
+    )
+    test_dataset = SyntheticGazeDataset(
+        num_samples=num_test,
+        seed=seed + 2,
+        noise_level=0.05,
+    )
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=num_workers,
+        pin_memory=True,
+        drop_last=True,
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+    return train_loader, val_loader, test_loader