Spaces:

FocusGuard
/

integration_test2

Sleeping

Abdelrahman Almatrooshi commited on Mar 17

Commit

7b53d75

0 Parent(s):

FocusGuard with L2CS-Net gaze estimation

Full integration: MediaPipe face mesh, MLP, XGBoost, Hybrid, and L2CS
pipelines. Includes 9-point gaze calibration, boost mode, and gaze-eye
fusion for real-time focus detection.

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.coverage +0 -0
.coveragerc +23 -0
.dockerignore +35 -0
.gitattributes +3 -0
.gitignore +44 -0
Dockerfile +40 -0
README.md +135 -0
api/history +0 -0
api/import +0 -0
api/sessions +0 -0
app.py +1 -0
checkpoints/L2CSNet_gaze360.pkl +3 -0
checkpoints/hybrid_combiner.joblib +3 -0
checkpoints/hybrid_focus_config.json +14 -0
checkpoints/meta_best.npz +3 -0
checkpoints/meta_mlp.npz +3 -0
checkpoints/mlp_best.pt +0 -0
checkpoints/model_best.joblib +3 -0
checkpoints/scaler_best.joblib +3 -0
checkpoints/scaler_mlp.joblib +3 -0
checkpoints/xgboost_face_orientation_best.json +0 -0
data/README.md +11 -0
data/collected_Abdelrahman/abdelrahman_20260306_023035.npz +3 -0
data/collected_Jarek/Jarek_20260225_012931.npz +3 -0
data/collected_Junhao/Junhao_20260303_113554.npz +3 -0
data/collected_Kexin/kexin2_20260305_180229.npz +3 -0
data/collected_Kexin/kexin_20260224_151043.npz +3 -0
data/collected_Langyuan/Langyuan_20260303_153145.npz +3 -0
data/collected_Mohamed/session_20260224_010131.npz +3 -0
data/collected_Yingtao/Yingtao_20260306_023937.npz +3 -0
data/collected_ayten/ayten_session_1.npz +3 -0
data/collected_saba/saba_20260306_230710.npz +3 -0
data_preparation/README.md +9 -0
data_preparation/__init__.py +0 -0
data_preparation/data_exploration.ipynb +0 -0
data_preparation/prepare_dataset.py +241 -0
docker-compose.yml +5 -0
download_l2cs_weights.py +37 -0
eslint.config.js +29 -0
evaluation/README.md +19 -0
evaluation/THRESHOLD_JUSTIFICATION.md +206 -0
evaluation/feature_importance.py +230 -0
evaluation/feature_selection_justification.md +54 -0
evaluation/justify_thresholds.py +555 -0
evaluation/logs/.gitkeep +0 -0
evaluation/plots/confusion_matrix_mlp.png +0 -0
evaluation/plots/confusion_matrix_xgb.png +0 -0
evaluation/plots/ear_distribution.png +0 -0
evaluation/plots/geo_weight_search.png +0 -0
evaluation/plots/hybrid_weight_search.png +0 -0

.coverage ADDED Viewed

Binary file (86 kB). View file

.coveragerc ADDED Viewed

	@@ -0,0 +1,23 @@

+[run]
+branch = True
+source =
+    .
+omit =
+    .venv/*
+    venv/*
+    */site-packages/*
+    tests/*
+    notebooks/*
+    evaluation/*
+    models/mlp/train.py
+    models/mlp/sweep.py
+    models/mlp/eval_accuracy.py
+    models/cnn/eye_attention/train.py
+    models\collect_features.py
+[report]
+show_missing = True
+skip_covered = False
+precision = 1
+exclude_lines =
+    pragma: no cover
+    if __name__ == .__main__.:

.dockerignore ADDED Viewed

	@@ -0,0 +1,35 @@

+.git
+.gitattributes
+.github
+node_modules
+dist
+venv
+.venv
+__pycache__
+*.pyc
+.pytest_cache
+.mypy_cache
+.ruff_cache
+# Dev/eval files not needed at runtime
+notebooks/
+evaluation/
+tests/
+others/
+*.ipynb
+requirements-dev.txt
+pytest.ini
+eslint.config.js
+docker-compose.yml
+# L2CS backup is a full duplicate
+models/L2CS-Net/L2CS-Net-backup/
+# DB files (created at runtime)
+*.db
+# Editor / OS junk
+.DS_Store
+.cursor
+.vscode
+*.swp

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules/
+dist/
+dist-ssr/
+*.local
+# Editor directories and files
+.vscode/
+.idea/
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+.venv/
+env/
+.env
+*.egg-info/
+.eggs/
+build/
+Thumbs.db
+ignore/
+# Project specific
+focus_guard.db
+test_focus_guard.db
+static/
+__pycache__/
+docs/
+docs

Dockerfile ADDED Viewed

	@@ -0,0 +1,40 @@

+FROM python:3.10-slim
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libglib2.0-0 libsm6 libxrender1 libxext6 libxcb1 libgl1 libgomp1 \
+    ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev \
+    libavdevice-dev libopus-dev libvpx-dev libsrtp2-dev \
+    build-essential nodejs npm git \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
+# Python deps (separate layer for caching)
+COPY requirements.txt ./
+RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu \
+    && pip install --no-cache-dir -r requirements.txt
+# Copy source (respects .dockerignore)
+COPY . .
+# Build frontend
+RUN npm install && npm run build && mkdir -p /app/static && cp -R dist/* /app/static/ \
+    && rm -rf node_modules dist
+# Download models at build time
+ENV FOCUSGUARD_CACHE_DIR=/app/.cache/focusguard
+RUN python -c "from models.face_mesh import _ensure_model; _ensure_model()"
+RUN python download_l2cs_weights.py || echo "[WARN] L2CS weights not downloaded — will run without gaze model"
+RUN mkdir -p /app/data && chown -R user:user /app
+USER user
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "info"]

README.md ADDED Viewed

	@@ -0,0 +1,135 @@

+---
+title: FocusGuard
+colorFrom: indigo
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# FocusGuard
+Webcam-based focus detection: MediaPipe face mesh -> 17 features (EAR, gaze, head pose, PERCLOS, etc.) -> MLP or XGBoost for focused/unfocused. React + FastAPI app with WebSocket video.
+## Project layout
+```
+├── data/                 collected_<name>/*.npz
+├── data_preparation/     loaders, split, scale
+├── notebooks/            MLP/XGB training + LOPO
+├── models/               face_mesh, head_pose, eye_scorer, train scripts
+│   ├── gaze_calibration.py   9-point polynomial gaze calibration
+│   ├── gaze_eye_fusion.py    Fuses calibrated gaze with eye openness
+│   └── L2CS-Net/              In-tree L2CS-Net repo with Gaze360 weights
+├── checkpoints/          mlp_best.pt, xgboost_*_best.json, scalers
+├── evaluation/           logs, plots, justify_thresholds
+├── ui/                   pipeline.py, live_demo.py
+├── src/                  React frontend
+│   ├── components/
+│   │   ├── FocusPageLocal.jsx      Main focus page (camera, controls, model selector)
+│   │   └── CalibrationOverlay.jsx  Fullscreen calibration UI
+│   └── utils/
+│       └── VideoManagerLocal.js    WebSocket client, frame capture, canvas rendering
+├── static/               built frontend (after npm run build)
+├── main.py, app.py       FastAPI backend
+├── requirements.txt
+└── package.json
+```
+## Setup
+```bash
+python -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+```
+To rebuild the frontend after changes:
+```bash
+npm install
+npm run build
+mkdir -p static && cp -r dist/* static/
+```
+## Run
+**Web app:** Use the venv and run uvicorn via Python so it picks up your deps (otherwise you get `ModuleNotFoundError: aiosqlite`):
+```bash
+source venv/bin/activate
+python -m uvicorn main:app --host 0.0.0.0 --port 7860
+```
+Then open http://localhost:7860.
+**Frontend dev server (optional, for React development):**
+```bash
+npm run dev
+```
+**OpenCV demo:**
+```bash
+python ui/live_demo.py
+python ui/live_demo.py --xgb
+```
+**Train:**
+```bash
+python -m models.mlp.train
+python -m models.xgboost.train
+```
+## Data
+9 participants, 144,793 samples, 10 features, binary labels. Collect with `python -m models.collect_features --name <name>`. Data lives in `data/collected_<name>/`.
+## Models
+| Model | What it uses | Best for |
+|-------|-------------|----------|
+| **Geometric** | Head pose angles + eye aspect ratio (EAR) | Fast, no ML needed |
+| **XGBoost** | Trained classifier on head/eye features (600 trees, depth 8) | Balanced accuracy/speed |
+| **MLP** | Neural network on same features (64->32) | Higher accuracy |
+| **Hybrid** | Weighted MLP + Geometric ensemble | Best head-pose accuracy |
+| **L2CS** | Deep gaze estimation (ResNet50, Gaze360 weights) | Detects eye-only gaze shifts |
+## Model numbers (15% test split)
+| Model | Accuracy | F1 | ROC-AUC |
+|-------|----------|-----|---------|
+| XGBoost (600 trees, depth 8) | 95.87% | 0.959 | 0.991 |
+| MLP (64->32) | 92.92% | 0.929 | 0.971 |
+## L2CS Gaze Tracking
+L2CS-Net predicts where your eyes are looking, not just where your head is pointed. This catches the scenario where your head faces the screen but your eyes wander.
+### Standalone mode
+Select **L2CS** as the model - it handles everything.
+### Boost mode
+Select any other model, then click the **GAZE** toggle. L2CS runs alongside the base model:
+- Base model handles head pose and eye openness (35% weight)
+- L2CS handles gaze direction (65% weight)
+- If L2CS detects gaze is clearly off-screen, it **vetoes** the base model regardless of score
+### Calibration
+After enabling L2CS or Gaze Boost, click **Calibrate** while a session is running:
+1. A fullscreen overlay shows 9 target dots (3x3 grid)
+2. Look at each dot as the progress ring fills
+3. The first dot (centre) sets your baseline gaze offset
+4. After all 9 points, a polynomial model maps your gaze angles to screen coordinates
+5. A cyan tracking dot appears on the video showing where you're looking
+## Pipeline
+1. Face mesh (MediaPipe 478 pts)
+2. Head pose -> yaw, pitch, roll, scores, gaze offset
+3. Eye scorer -> EAR, gaze ratio, MAR
+4. Temporal -> PERCLOS, blink rate, yawn
+5. 10-d vector -> MLP or XGBoost -> focused / unfocused
+**Stack:** FastAPI, aiosqlite, React/Vite, PyTorch, XGBoost, MediaPipe, OpenCV, L2CS-Net.

api/history ADDED Viewed

File without changes

api/import ADDED Viewed

File without changes

api/sessions ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from main import app

checkpoints/L2CSNet_gaze360.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7f3480d868dd48261e1d59f915b0ef0bb33ea12ea00938fb2168f212080665
+size 95849977

checkpoints/hybrid_combiner.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e460c6ca8d2cadf37727456401a0d63028ba23cb6401f0835d869abfa2e053c
+size 965

checkpoints/hybrid_focus_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "use_xgb": true,
+  "w_mlp": 0.3,
+  "w_xgb": 0.3,
+  "w_geo": 0.7,
+  "threshold": 0.46117913373775393,
+  "use_yawn_veto": true,
+  "geo_face_weight": 0.7,
+  "geo_eye_weight": 0.3,
+  "mar_yawn_threshold": 0.55,
+  "metric": "f1",
+  "combiner": "logistic",
+  "combiner_path": "/Users/mohammedalketbi22/GAP/Final/checkpoints/hybrid_combiner.joblib"
+}

checkpoints/meta_best.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d78d1df5e25536a2c82c4b8f5fd0c26dd35f44b28fd59761634cbf78c7546f8
+size 4196

checkpoints/meta_mlp.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4771c61cdf0711aa640b4d600a0851d344414cd16c1c2f75afc90e3c6135d14b
+size 840

checkpoints/mlp_best.pt ADDED Viewed

Binary file (14.5 kB). View file

checkpoints/model_best.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:183f2d4419e0eb1e58704e5a7312eb61e331523566d4dc551054a07b3aac7557
+size 5775881

checkpoints/scaler_best.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02ed6b4c0d99e0254c6a740a949da2384db58ec7d3e6df6432b9bfcd3a296c71
+size 783

checkpoints/scaler_mlp.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2038d5b051d4de303c5688b1b861a0b53b1307a52b9447bfa48e8c7ace749329
+size 823

checkpoints/xgboost_face_orientation_best.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# data/
+One folder per participant: `collected_<name>/` with .npz files. 9 participants, 144,793 samples total. Each .npz has `features` (N×17), `labels` (0/1), `feature_names`. Training uses 10 of the 17 (see data_preparation).
+**Collect more:**
+```bash
+python -m models.collect_features --name yourname
+```
+Webcam + overlay; press 1 = focused, 0 = unfocused, p = pause, q = save and quit.

data/collected_Abdelrahman/abdelrahman_20260306_023035.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c48532150182c8933d4595e0a0711365645b699647e99976575b7c2adffaf8
+size 1207980

data/collected_Jarek/Jarek_20260225_012931.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa68f4d587eee8d645b23b463a9f1c848b9bacc2adb68603d5fa9cd8cb744c7
+size 1128864

data/collected_Junhao/Junhao_20260303_113554.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec321ee79800c04fdc0f999690d07970445aeca61f977bf6537880bbc996b5e5
+size 678336

data/collected_Kexin/kexin2_20260305_180229.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e96fe17571fa1fcccc1b4bd0c8838270498883e4db6a608c4d4d4c3a8ac1d0d
+size 1129700

data/collected_Kexin/kexin_20260224_151043.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d402ca4e66910a2e174c4f4beec5d7b3db6a04213d29673b227ce6ef04b39c4
+size 1329732

data/collected_Langyuan/Langyuan_20260303_153145.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c679cdba334b2f3f0953b7e44f7209056277c826e2b7b5cfcf2b8b750898400
+size 1198784

data/collected_Mohamed/session_20260224_010131.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a784f703c13b83911f47ec507d32c25942a07572314b8a77cbf40ca8cdff16f
+size 1006428

data/collected_Yingtao/Yingtao_20260306_023937.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a75af17e25dca5f06ea9e7443ea5fee9db638f68a5910e014ee7cb8b7ae80fd
+size 1338776

data/collected_ayten/ayten_session_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbecdbffa1c1b03b3b0fb5f715dcb4ff885ecc67da4aff78e6952b8847a96014
+size 1341056

data/collected_saba/saba_20260306_230710.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db1cab5ddcf9988856c5bdca1183c8eba4647365e675a1d8a200d12f6b5d2097
+size 663212

data_preparation/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# data_preparation/
+Load and split the .npz data. Used by all training code and notebooks.
+**prepare_dataset.py:** `load_all_pooled()`, `load_per_person()` for LOPO, `get_numpy_splits()` (XGBoost), `get_dataloaders()` (MLP). Cleans yaw/pitch/roll and EAR to fixed ranges. Face_orientation uses 10 features: head_deviation, s_face, s_eye, h_gaze, pitch, ear_left, ear_avg, ear_right, gaze_offset, perclos.
+**data_exploration.ipynb:** EDA — stats, class balance, histograms, correlations.
+You don’t run prepare_dataset directly; import it from `models.mlp.train`, `models.xgboost.train`, or the notebooks.

data_preparation/__init__.py ADDED Viewed

File without changes

data_preparation/data_exploration.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/prepare_dataset.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import os
+import glob
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+torch = None
+Dataset = object  # type: ignore
+DataLoader = None
+def _require_torch():
+    global torch, Dataset, DataLoader
+    if torch is None:
+        try:
+            import torch as _torch
+            from torch.utils.data import Dataset as _Dataset, DataLoader as _DataLoader
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError("PyTorch not installed") from exc
+        torch = _torch
+        Dataset = _Dataset  # type: ignore
+        DataLoader = _DataLoader  # type: ignore
+    return torch, Dataset, DataLoader
+DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
+SELECTED_FEATURES = {
+    "face_orientation": [
+        'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
+        'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos'
+    ],
+    "eye_behaviour": [
+        'ear_left', 'ear_right', 'ear_avg', 'mar',
+        'blink_rate', 'closure_duration', 'perclos', 'yawn_duration'
+    ]
+}
+class FeatureVectorDataset(Dataset):
+    def __init__(self, features: np.ndarray, labels: np.ndarray):
+        torch_mod, _, _ = _require_torch()
+        self.features = torch_mod.tensor(features, dtype=torch_mod.float32)
+        self.labels = torch_mod.tensor(labels, dtype=torch_mod.long)
+    def __len__(self):
+        return len(self.labels)
+    def __getitem__(self, idx):
+        return self.features[idx], self.labels[idx]
+# ── Low-level helpers ────────────────────────────────────────────────────
+def _clean_npz(raw, names):
+    """Apply clipping rules in-place. Shared by all loaders."""
+    for col, lo, hi in [('yaw', -45, 45), ('pitch', -30, 30), ('roll', -30, 30)]:
+        if col in names:
+            raw[:, names.index(col)] = np.clip(raw[:, names.index(col)], lo, hi)
+    for feat in ['ear_left', 'ear_right', 'ear_avg']:
+        if feat in names:
+            raw[:, names.index(feat)] = np.clip(raw[:, names.index(feat)], 0, 0.85)
+    return raw
+def _load_one_npz(npz_path, target_features):
+    """Load a single .npz file, clean and select features. Returns (X, y, selected_feature_names)."""
+    data = np.load(npz_path, allow_pickle=True)
+    raw = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    raw = _clean_npz(raw, names)
+    selected = [f for f in target_features if f in names]
+    idx = [names.index(f) for f in selected]
+    return raw[:, idx], labels, selected
+# ── Public data loaders ──────────────────────────────────────────────────
+def load_all_pooled(model_name: str = "face_orientation", data_dir: str = None):
+    """Load all collected_*/*.npz, clean, select features, concatenate.
+    Returns (X_all, y_all, all_feature_names).
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        print("[DATA] Warning: No .npz files found. Falling back to synthetic.")
+        X, y = _generate_synthetic_data(model_name)
+        return X, y, target_features
+    all_X, all_y = [], []
+    all_names = None
+    for npz_path in npz_files:
+        X, y, names = _load_one_npz(npz_path, target_features)
+        if all_names is None:
+            all_names = names
+        all_X.append(X)
+        all_y.append(y)
+        print(f"[DATA]   + {os.path.basename(npz_path)}: {X.shape[0]} samples")
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] Loaded {len(npz_files)} file(s) for '{model_name}': "
+          f"{X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return X_all, y_all, all_names
+def load_per_person(model_name: str = "face_orientation", data_dir: str = None):
+    """Load collected_*/*.npz grouped by person (folder name).
+    Returns dict { person_name: (X, y) } where X/y are per-person numpy arrays.
+    Also returns (X_all, y_all) as pooled data.
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        raise FileNotFoundError(f"No .npz files matching {pattern}")
+    by_person = {}
+    all_X, all_y = [], []
+    for npz_path in npz_files:
+        folder = os.path.basename(os.path.dirname(npz_path))
+        person = folder.replace("collected_", "", 1)
+        X, y, _ = _load_one_npz(npz_path, target_features)
+        all_X.append(X)
+        all_y.append(y)
+        if person not in by_person:
+            by_person[person] = []
+        by_person[person].append((X, y))
+        print(f"[DATA]   + {person}/{os.path.basename(npz_path)}: {X.shape[0]} samples")
+    for person, chunks in by_person.items():
+        by_person[person] = (
+            np.concatenate([c[0] for c in chunks], axis=0),
+            np.concatenate([c[1] for c in chunks], axis=0),
+        )
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] {len(by_person)} persons, {X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return by_person, X_all, y_all
+def load_raw_npz(npz_path):
+    """Load a single .npz without cleaning or feature selection. For exploration notebooks."""
+    data = np.load(npz_path, allow_pickle=True)
+    features = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    return features, labels, names
+# ── Legacy helpers (used by models/mlp/train.py and models/xgboost/train.py) ─
+def _load_real_data(model_name: str):
+    X, y, _ = load_all_pooled(model_name)
+    return X, y
+def _generate_synthetic_data(model_name: str):
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    n = 500
+    d = len(target_features)
+    c = 2
+    rng = np.random.RandomState(42)
+    features = rng.randn(n, d).astype(np.float32)
+    labels = rng.randint(0, c, size=n).astype(np.int64)
+    print(f"[DATA] Using synthetic data for '{model_name}': {n} samples, {d} features, {c} classes")
+    return features, labels
+def _split_and_scale(features, labels, split_ratios, seed, scale):
+    """Split data into train/val/test (stratified) and optionally scale."""
+    test_ratio = split_ratios[2]
+    val_ratio = split_ratios[1] / (split_ratios[0] + split_ratios[1])
+    X_train_val, X_test, y_train_val, y_test = train_test_split(
+        features, labels, test_size=test_ratio, random_state=seed, stratify=labels,
+    )
+    X_train, X_val, y_train, y_val = train_test_split(
+        X_train_val, y_train_val, test_size=val_ratio, random_state=seed, stratify=y_train_val,
+    )
+    scaler = None
+    if scale:
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        X_val = scaler.transform(X_val)
+        X_test = scaler.transform(X_test)
+        print("[DATA] Applied StandardScaler (fitted on training split)")
+    splits = {
+        "X_train": X_train, "y_train": y_train,
+        "X_val": X_val,     "y_val": y_val,
+        "X_test": X_test,   "y_test": y_test,
+    }
+    print(f"[DATA] Split (stratified): train={len(y_train)}, val={len(y_val)}, test={len(y_test)}")
+    return splits, scaler
+def get_numpy_splits(model_name: str, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
+    """Return raw numpy arrays for non-PyTorch models (e.g. XGBoost)."""
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    if num_classes < 2:
+        raise ValueError("Dataset has only one class; need at least 2 for classification.")
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    return splits, num_features, num_classes, scaler
+def get_dataloaders(model_name: str, batch_size: int = 32, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
+    """Return PyTorch DataLoaders for neural-network models."""
+    _, _, dataloader_cls = _require_torch()
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    if num_classes < 2:
+        raise ValueError("Dataset has only one class; need at least 2 for classification.")
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    train_ds = FeatureVectorDataset(splits["X_train"], splits["y_train"])
+    val_ds   = FeatureVectorDataset(splits["X_val"],   splits["y_val"])
+    test_ds  = FeatureVectorDataset(splits["X_test"],  splits["y_test"])
+    train_loader = dataloader_cls(train_ds, batch_size=batch_size, shuffle=True)
+    val_loader   = dataloader_cls(val_ds,   batch_size=batch_size, shuffle=False)
+    test_loader  = dataloader_cls(test_ds,  batch_size=batch_size, shuffle=False)
+    return train_loader, val_loader, test_loader, num_features, num_classes, scaler

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+services:
+  focus-guard:
+    build: .
+    ports:
+      - "7860:7860"

download_l2cs_weights.py ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/usr/bin/env python3
+# Downloads L2CS-Net Gaze360 weights into checkpoints/
+import os
+import sys
+CHECKPOINTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checkpoints")
+DEST = os.path.join(CHECKPOINTS_DIR, "L2CSNet_gaze360.pkl")
+GDRIVE_ID = "1dL2Jokb19_SBSHAhKHOxJsmYs5-GoyLo"
+def main():
+    if os.path.isfile(DEST):
+        print(f"[OK] Weights already at {DEST}")
+        return
+    try:
+        import gdown
+    except ImportError:
+        print("gdown not installed. Run: pip install gdown")
+        sys.exit(1)
+    os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
+    print(f"Downloading L2CS-Net weights to {DEST} ...")
+    gdown.download(f"https://drive.google.com/uc?id={GDRIVE_ID}", DEST, quiet=False)
+    if os.path.isfile(DEST):
+        print(f"[OK] Downloaded ({os.path.getsize(DEST) / 1024 / 1024:.1f} MB)")
+    else:
+        print("[ERR] Download failed. Manual download:")
+        print("  https://drive.google.com/drive/folders/17p6ORr-JQJcw-eYtG2WGNiuS_qVKwdWd")
+        print(f"  Place L2CSNet_gaze360.pkl in {CHECKPOINTS_DIR}/")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

eslint.config.js ADDED Viewed

	@@ -0,0 +1,29 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{js,jsx}'],
+    extends: [
+      js.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+      parserOptions: {
+        ecmaVersion: 'latest',
+        ecmaFeatures: { jsx: true },
+        sourceType: 'module',
+      },
+    },
+    rules: {
+      'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
+    },
+  },
+])

evaluation/README.md ADDED Viewed

	@@ -0,0 +1,19 @@

+# evaluation/
+Training logs, threshold/weight analysis, and metrics.
+**Contents:** `logs/` (JSON from training runs), `plots/` (ROC, weight search, EAR/MAR), `justify_thresholds.py`, `feature_importance.py`, and the generated markdown reports.
+**Logs:** MLP writes `face_orientation_training_log.json`, XGBoost writes `xgboost_face_orientation_training_log.json`. Paths: `evaluation/logs/`.
+**Threshold report:** Generate `THRESHOLD_JUSTIFICATION.md` and plots with:
+```bash
+python -m evaluation.justify_thresholds
+```
+(LOPO over 9 participants, Youden’s J, weight grid search; ~10–15 min.) Outputs go to `plots/` and the markdown file.
+**Feature importance:** Run `python -m evaluation.feature_importance` for XGBoost gain and leave-one-feature-out LOPO; writes `feature_selection_justification.md`.
+**Who writes here:** `models.mlp.train`, `models.xgboost.train`, `evaluation.justify_thresholds`, `evaluation.feature_importance`, and the notebooks.

evaluation/THRESHOLD_JUSTIFICATION.md ADDED Viewed

	@@ -0,0 +1,206 @@

+# Threshold Justification Report
+Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation over 9 participants (~145k samples).
+## 1. ML Model Decision Thresholds
+Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) on pooled LOPO held-out predictions.
+| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |
+|-------|----------|-------------------------------|--------------|-----------|
+| MLP | 0.8624 | **0.228** | 0.8578 | 0.8149 |
+| XGBoost | 0.8804 | **0.377** | 0.8585 | 0.8424 |
+![MLP ROC](plots/roc_mlp.png)
+![XGBoost ROC](plots/roc_xgboost.png)
+## 2. Precision, Recall and Tradeoff
+At the optimal threshold (Youden's J), pooled over all LOPO held-out predictions:
+| Model | Threshold | Precision | Recall | F1 | Accuracy |
+|-------|----------:|----------:|-------:|---:|---------:|
+| MLP | 0.228 | 0.8187 | 0.9008 | 0.8578 | 0.8164 |
+| XGBoost | 0.377 | 0.8426 | 0.8750 | 0.8585 | 0.8228 |
+Higher threshold → fewer positive predictions → higher precision, lower recall. Youden's J picks the threshold that balances sensitivity and specificity (recall for the positive class and true negative rate).
+## 3. Confusion Matrix (Pooled LOPO)
+At optimal threshold. Rows = true label, columns = predicted label (0 = unfocused, 1 = focused).
+### MLP
+|  | Pred 0 | Pred 1 |
+|--|-------:|-------:|
+| **True 0** | 38065 (TN) | 17750 (FP) |
+| **True 1** | 8831 (FN) | 80147 (TP) |
+TN=38065, FP=17750, FN=8831, TP=80147.
+### XGBoost
+|  | Pred 0 | Pred 1 |
+|--|-------:|-------:|
+| **True 0** | 41271 (TN) | 14544 (FP) |
+| **True 1** | 11118 (FN) | 77860 (TP) |
+TN=41271, FP=14544, FN=11118, TP=77860.
+![Confusion MLP](plots/confusion_matrix_mlp.png)
+![Confusion XGBoost](plots/confusion_matrix_xgb.png)
+## 4. Per-Person Performance Variance (LOPO)
+One fold per left-out person; metrics at optimal threshold.
+### MLP — per held-out person
+| Person | Accuracy | F1 | Precision | Recall |
+|--------|---------:|---:|----------:|-------:|
+| Abdelrahman | 0.8628 | 0.9029 | 0.8760 | 0.9314 |
+| Jarek | 0.8400 | 0.8770 | 0.8909 | 0.8635 |
+| Junhao | 0.8872 | 0.8986 | 0.8354 | 0.9723 |
+| Kexin | 0.7941 | 0.8123 | 0.7965 | 0.8288 |
+| Langyuan | 0.5877 | 0.6169 | 0.4972 | 0.8126 |
+| Mohamed | 0.8432 | 0.8653 | 0.7931 | 0.9519 |
+| Yingtao | 0.8794 | 0.9263 | 0.9217 | 0.9309 |
+| ayten | 0.8307 | 0.8986 | 0.8558 | 0.9459 |
+| saba | 0.9192 | 0.9243 | 0.9260 | 0.9226 |
+### XGBoost — per held-out person
+| Person | Accuracy | F1 | Precision | Recall |
+|--------|---------:|---:|----------:|-------:|
+| Abdelrahman | 0.8601 | 0.8959 | 0.9129 | 0.8795 |
+| Jarek | 0.8680 | 0.8993 | 0.9070 | 0.8917 |
+| Junhao | 0.9099 | 0.9180 | 0.8627 | 0.9810 |
+| Kexin | 0.7363 | 0.7385 | 0.7906 | 0.6928 |
+| Langyuan | 0.6738 | 0.6945 | 0.5625 | 0.9074 |
+| Mohamed | 0.8868 | 0.8988 | 0.8529 | 0.9498 |
+| Yingtao | 0.8711 | 0.9195 | 0.9347 | 0.9048 |
+| ayten | 0.8451 | 0.9070 | 0.8654 | 0.9528 |
+| saba | 0.9393 | 0.9421 | 0.9615 | 0.9235 |
+### Summary across persons
+| Model | Accuracy mean ± std | F1 mean ± std | Precision mean ± std | Recall mean ± std |
+|-------|---------------------|---------------|----------------------|-------------------|
+| MLP | 0.8271 ± 0.0968 | 0.8580 ± 0.0968 | 0.8214 ± 0.1307 | 0.9067 ± 0.0572 |
+| XGBoost | 0.8434 ± 0.0847 | 0.8682 ± 0.0879 | 0.8500 ± 0.1191 | 0.8981 ± 0.0836 |
+## 5. Confidence Intervals (95%, LOPO over 9 persons)
+Mean ± half-width of 95% t-interval (df=8) for each metric across the 9 left-out persons.
+| Model | F1 | Accuracy | Precision | Recall |
+|-------|---:|--------:|----------:|-------:|
+| MLP | 0.8580 [0.7835, 0.9326] | 0.8271 [0.7526, 0.9017] | 0.8214 [0.7207, 0.9221] | 0.9067 [0.8626, 0.9507] |
+| XGBoost | 0.8682 [0.8005, 0.9358] | 0.8434 [0.7781, 0.9086] | 0.8500 [0.7583, 0.9417] | 0.8981 [0.8338, 0.9625] |
+## 6. Geometric Pipeline Weights (s_face vs s_eye)
+Grid search over face weight alpha in {0.2 ... 0.8}. Eye weight = 1 - alpha. Threshold per fold via Youden's J.
+| Face Weight (alpha) | Mean LOPO F1 |
+|--------------------:|-------------:|
+| 0.2 | 0.7926 |
+| 0.3 | 0.8002 |
+| 0.4 | 0.7719 |
+| 0.5 | 0.7868 |
+| 0.6 | 0.8184 |
+| 0.7 | 0.8195 **<-- selected** |
+| 0.8 | 0.8126 |
+**Best:** alpha = 0.7 (face 70%, eye 30%)
+![Geometric weight search](plots/geo_weight_search.png)
+## 7. Hybrid Pipeline: MLP vs Geometric
+Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3).
+| MLP Weight (w_mlp) | Mean LOPO F1 |
+|-------------------:|-------------:|
+| 0.3 | 0.8409 **<-- selected** |
+| 0.4 | 0.8246 |
+| 0.5 | 0.8164 |
+| 0.6 | 0.8106 |
+| 0.7 | 0.8039 |
+| 0.8 | 0.8016 |
+**Best:** w_mlp = 0.3 (MLP 30%, geometric 70%) → mean LOPO F1 = 0.8409
+![Hybrid MLP weight search](plots/hybrid_weight_search.png)
+## 8. Hybrid Pipeline: XGBoost vs Geometric
+Same grid over w_xgb in {0.3 ... 0.8}. w_geo = 1 - w_xgb.
+| XGBoost Weight (w_xgb) | Mean LOPO F1 |
+|-----------------------:|-------------:|
+| 0.3 | 0.8639 **<-- selected** |
+| 0.4 | 0.8552 |
+| 0.5 | 0.8451 |
+| 0.6 | 0.8419 |
+| 0.7 | 0.8382 |
+| 0.8 | 0.8353 |
+**Best:** w_xgb = 0.3 → mean LOPO F1 = 0.8639
+![Hybrid XGBoost weight search](plots/hybrid_xgb_weight_search.png)
+### Which hybrid is used in the app?
+**XGBoost hybrid is better** (F1 = 0.8639 vs MLP hybrid F1 = 0.8409).
+### Logistic regression combiner (replaces heuristic weights)
+Instead of a fixed linear blend (e.g. 0.3·ML + 0.7·geo), a **logistic regression** combines model probability and geometric score: meta-features = [model_prob, geo_score], trained on the same LOPO splits. Threshold from Youden's J on combiner output.
+| Method | Mean LOPO F1 |
+|--------|-------------:|
+| Heuristic weight grid (best w) | 0.8639 |
+| **LR combiner** | **0.8241** |
+The app uses the saved LR combiner when `combiner_path` is set in `hybrid_focus_config.json`.
+## 5. Eye and Mouth Aspect Ratio Thresholds
+### EAR (Eye Aspect Ratio)
+Reference: Soukupova & Cech, "Real-Time Eye Blink Detection Using Facial Landmarks" (2016) established EAR ~ 0.2 as a blink threshold.
+Our thresholds define a linear interpolation zone around this established value:
+| Constant | Value | Justification |
+|----------|------:|---------------|
+| `ear_closed` | 0.16 | Below this, eyes are fully shut. 16.3% of samples fall here. |
+| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. 21.2% of samples below. |
+| `ear_open` | 0.30 | Above this, eyes are fully open. 70.4% of samples here. |
+Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, providing a smooth transition rather than a hard binary cutoff.
+![EAR distribution](plots/ear_distribution.png)
+### MAR (Mouth Aspect Ratio)
+| Constant | Value | Justification |
+|----------|------:|---------------|
+| `MAR_YAWN_THRESHOLD` | 0.55 | Only 1.7% of samples exceed this, confirming it captures genuine yawns without false positives. |
+![MAR distribution](plots/mar_distribution.png)
+## 10. Other Constants
+| Constant | Value | Rationale |
+|----------|------:|-----------|
+| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at the extreme edge. |
+| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on typical monitor-viewing cone: at 60 cm distance and a 24" monitor, the viewing angle is ~20-25 degrees. |
+| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch (tilting head doesn't mean looking away), so it's down-weighted by 50%. |
+| `EMA alpha` | 0.3 | Smoothing factor for focus score. Gives ~3-4 frame effective window; balances responsiveness vs flicker. |
+| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief occlusions (e.g. hand gesture) without dropping score. |
+| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement window (Dinges & Grace, 1998). |
+| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous blink rate is 15-20/min (Bentivoglio et al., 1997). |

evaluation/feature_importance.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+Feature importance and leave-one-feature-out ablation for the 10 face_orientation features.
+Run: python -m evaluation.feature_importance
+Outputs:
+- XGBoost gain-based importance (from trained checkpoint)
+- Leave-one-feature-out LOPO F1 (ablation): drop each feature in turn, report mean LOPO F1.
+- Writes evaluation/feature_selection_justification.md
+"""
+import os
+import sys
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import f1_score
+from xgboost import XGBClassifier
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES
+SEED = 42
+FEATURES = SELECTED_FEATURES["face_orientation"]
+def _resolve_xgb_path():
+    return os.path.join(_PROJECT_ROOT, "checkpoints", "xgboost_face_orientation_best.json")
+def xgb_feature_importance():
+    """Load trained XGBoost and return gain-based importance for the 10 features."""
+    path = _resolve_xgb_path()
+    if not os.path.isfile(path):
+        print(f"[WARN] No XGBoost checkpoint at {path}; skip importance.")
+        return None
+    model = XGBClassifier()
+    model.load_model(path)
+    imp = model.get_booster().get_score(importance_type="gain")
+    # Booster uses f0, f1, ...; we use same order as FEATURES (training order)
+    by_idx = {int(k.replace("f", "")): v for k, v in imp.items() if k.startswith("f")}
+    order = [by_idx.get(i, 0.0) for i in range(len(FEATURES))]
+    return dict(zip(FEATURES, order))
+def run_ablation_lopo():
+    """Leave-one-feature-out: for each feature, train XGBoost on the other 9 with LOPO, report mean F1."""
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    n_folds = len(persons)
+    results = {}
+    for drop_feat in FEATURES:
+        idx_keep = [i for i, f in enumerate(FEATURES) if f != drop_feat]
+        f1s = []
+        for held_out in persons:
+            train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+            train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+            X_test, y_test = by_person[held_out]
+            X_tr = train_X[:, idx_keep]
+            X_te = X_test[:, idx_keep]
+            scaler = StandardScaler().fit(X_tr)
+            X_tr_sc = scaler.transform(X_tr)
+            X_te_sc = scaler.transform(X_te)
+            xgb = XGBClassifier(
+                n_estimators=600, max_depth=8, learning_rate=0.05,
+                subsample=0.8, colsample_bytree=0.8,
+                reg_alpha=0.1, reg_lambda=1.0,
+                eval_metric="logloss",
+                random_state=SEED, verbosity=0,
+            )
+            xgb.fit(X_tr_sc, train_y)
+            pred = xgb.predict(X_te_sc)
+            f1s.append(f1_score(y_test, pred, average="weighted"))
+        results[drop_feat] = np.mean(f1s)
+    return results
+def run_baseline_lopo_f1():
+    """Full 10-feature LOPO mean F1 for reference."""
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    f1s = []
+    for held_out in persons:
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        X_test, y_test = by_person[held_out]
+        scaler = StandardScaler().fit(train_X)
+        X_tr_sc = scaler.transform(train_X)
+        X_te_sc = scaler.transform(X_test)
+        xgb = XGBClassifier(
+            n_estimators=600, max_depth=8, learning_rate=0.05,
+            subsample=0.8, colsample_bytree=0.8,
+            reg_alpha=0.1, reg_lambda=1.0,
+            eval_metric="logloss",
+            random_state=SEED, verbosity=0,
+        )
+        xgb.fit(X_tr_sc, train_y)
+        pred = xgb.predict(X_te_sc)
+        f1s.append(f1_score(y_test, pred, average="weighted"))
+    return np.mean(f1s)
+# Channel subsets for ablation (subset name -> list of feature names)
+CHANNEL_SUBSETS = {
+    "head_pose": ["head_deviation", "s_face", "pitch"],
+    "eye_state": ["ear_left", "ear_avg", "ear_right", "perclos"],
+    "gaze": ["h_gaze", "gaze_offset", "s_eye"],
+}
+def run_channel_ablation():
+    """LOPO XGBoost with head-only, eye-only, gaze-only, and all 10. Returns dict subset_name -> mean F1."""
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    results = {}
+    for subset_name, feat_list in CHANNEL_SUBSETS.items():
+        idx_keep = [FEATURES.index(f) for f in feat_list]
+        f1s = []
+        for held_out in persons:
+            train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+            train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+            X_test, y_test = by_person[held_out]
+            X_tr = train_X[:, idx_keep]
+            X_te = X_test[:, idx_keep]
+            scaler = StandardScaler().fit(X_tr)
+            X_tr_sc = scaler.transform(X_tr)
+            X_te_sc = scaler.transform(X_te)
+            xgb = XGBClassifier(
+                n_estimators=600, max_depth=8, learning_rate=0.05,
+                subsample=0.8, colsample_bytree=0.8,
+                reg_alpha=0.1, reg_lambda=1.0,
+                eval_metric="logloss",
+                random_state=SEED, verbosity=0,
+            )
+            xgb.fit(X_tr_sc, train_y)
+            pred = xgb.predict(X_te_sc)
+            f1s.append(f1_score(y_test, pred, average="weighted"))
+        results[subset_name] = np.mean(f1s)
+    baseline = run_baseline_lopo_f1()
+    results["all_10"] = baseline
+    return results
+def main():
+    print("=== Feature importance (XGBoost gain) ===")
+    imp = xgb_feature_importance()
+    if imp:
+        for name in FEATURES:
+            print(f"  {name}: {imp.get(name, 0):.2f}")
+        order = sorted(imp.items(), key=lambda x: -x[1])
+        print("  Top-5 by gain:", [x[0] for x in order[:5]])
+    print("\n=== Leave-one-feature-out ablation (LOPO mean F1) ===")
+    baseline = run_baseline_lopo_f1()
+    print(f"  Baseline (all 10 features) mean LOPO F1: {baseline:.4f}")
+    ablation = run_ablation_lopo()
+    for feat in FEATURES:
+        delta = baseline - ablation[feat]
+        print(f"  drop {feat}: F1={ablation[feat]:.4f} (Δ={delta:+.4f})")
+    worst_drop = min(ablation.items(), key=lambda x: x[1])
+    print(f"  Largest F1 drop when dropping: {worst_drop[0]} (F1={worst_drop[1]:.4f})")
+    print("\n=== Channel ablation (LOPO mean F1) ===")
+    channel_f1 = run_channel_ablation()
+    for name, f1 in channel_f1.items():
+        print(f"  {name}: {f1:.4f}")
+    out_dir = os.path.join(_PROJECT_ROOT, "evaluation")
+    out_path = os.path.join(out_dir, "feature_selection_justification.md")
+    lines = [
+        "# Feature selection justification",
+        "",
+        "The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.",
+        "",
+        "## 1. Domain rationale",
+        "",
+        "The 10 features were chosen to cover three channels:",
+        "- **Head pose:** head_deviation, s_face, pitch",
+        "- **Eye state:** ear_left, ear_right, ear_avg, perclos",
+        "- **Gaze:** h_gaze, gaze_offset, s_eye",
+        "",
+        "Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).",
+        "",
+        "## 2. XGBoost feature importance (gain)",
+        "",
+        "From the trained XGBoost checkpoint (gain on the 10 features):",
+        "",
+        "| Feature | Gain |",
+        "|---------|------|",
+    ]
+    if imp:
+        for name in FEATURES:
+            lines.append(f"| {name} | {imp.get(name, 0):.2f} |")
+        order = sorted(imp.items(), key=lambda x: -x[1])
+        lines.append("")
+        lines.append(f"**Top 5 by gain:** {', '.join(x[0] for x in order[:5])}.")
+    else:
+        lines.append("(Run with XGBoost checkpoint to populate.)")
+    lines.extend([
+        "",
+        "## 3. Leave-one-feature-out ablation (LOPO)",
+        "",
+        f"Baseline (all 10 features) mean LOPO F1: **{baseline:.4f}**.",
+        "",
+        "| Feature dropped | Mean LOPO F1 | Δ vs baseline |",
+        "|------------------|--------------|---------------|",
+    ])
+    for feat in FEATURES:
+        delta = baseline - ablation[feat]
+        lines.append(f"| {feat} | {ablation[feat]:.4f} | {delta:+.4f} |")
+    worst_drop = min(ablation.items(), key=lambda x: x[1])
+    lines.append("")
+    lines.append(f"Dropping **{worst_drop[0]}** hurts most (F1={worst_drop[1]:.4f}), consistent with it being important.")
+    lines.append("")
+    lines.append("## 4. Conclusion")
+    lines.append("")
+    lines.append("Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) leave-one-out ablation. SHAP or correlation-based pruning can be added in future work.")
+    lines.append("")
+    with open(out_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    print(f"\nReport written to {out_path}")
+if __name__ == "__main__":
+    main()

evaluation/feature_selection_justification.md ADDED Viewed

	@@ -0,0 +1,54 @@

+# Feature selection justification
+The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.
+## 1. Domain rationale
+The 10 features were chosen to cover three channels:
+- **Head pose:** head_deviation, s_face, pitch
+- **Eye state:** ear_left, ear_right, ear_avg, perclos
+- **Gaze:** h_gaze, gaze_offset, s_eye
+Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).
+## 2. XGBoost feature importance (gain)
+From the trained XGBoost checkpoint (gain on the 10 features):
+| Feature | Gain |
+|---------|------|
+| head_deviation | 8.83 |
+| s_face | 10.27 |
+| s_eye | 2.18 |
+| h_gaze | 4.99 |
+| pitch | 4.64 |
+| ear_left | 3.57 |
+| ear_avg | 6.96 |
+| ear_right | 9.54 |
+| gaze_offset | 1.80 |
+| perclos | 5.68 |
+**Top 5 by gain:** s_face, ear_right, head_deviation, ear_avg, perclos.
+## 3. Leave-one-feature-out ablation (LOPO)
+Baseline (all 10 features) mean LOPO F1: **0.8327**.
+| Feature dropped | Mean LOPO F1 | Δ vs baseline |
+|------------------|--------------|---------------|
+| head_deviation | 0.8395 | -0.0068 |
+| s_face | 0.8390 | -0.0063 |
+| s_eye | 0.8342 | -0.0015 |
+| h_gaze | 0.8244 | +0.0083 |
+| pitch | 0.8250 | +0.0077 |
+| ear_left | 0.8326 | +0.0001 |
+| ear_avg | 0.8350 | -0.0023 |
+| ear_right | 0.8344 | -0.0017 |
+| gaze_offset | 0.8351 | -0.0024 |
+| perclos | 0.8258 | +0.0069 |
+Dropping **h_gaze** hurts most (F1=0.8244), consistent with it being important.
+## 4. Conclusion
+Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) leave-one-out ablation. SHAP or correlation-based pruning can be added in future work.

evaluation/justify_thresholds.py ADDED Viewed

	@@ -0,0 +1,555 @@

+# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
+# ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag
+import glob
+import os
+import sys
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import roc_curve, roc_auc_score, f1_score
+from xgboost import XGBClassifier
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, _PROJECT_ROOT)
+from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES
+PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
+REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
+SEED = 42
+# ClearML
+# start logging with: USE_CLEARML=1 python -m evaluation.justify_thresholds or: python -m evaluation.justify_thresholds --clearml
+_USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv
+_task = None
+_logger = None
+if _USE_CLEARML:
+    try:
+        from clearml import Task
+        _task = Task.init(
+            project_name="Focus Guard",
+            task_name="Threshold Justification",
+            tags=["evaluation", "thresholds"],
+        )
+        _task.connect({"SEED": SEED, "n_participants": 9})
+        _logger = _task.get_logger()
+        print("ClearML enabled — logging to project 'Focus Guard'")
+    except ImportError:
+        print("WARNING: ClearML not installed. Continuing without logging.")
+        _USE_CLEARML = False
+def _youdens_j(y_true, y_prob):
+    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
+    j = tpr - fpr
+    idx = j.argmax()
+    auc = roc_auc_score(y_true, y_prob)
+    return float(thresholds[idx]), fpr, tpr, thresholds, float(auc)
+def _f1_at_threshold(y_true, y_prob, threshold):
+    return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
+def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
+    fig, ax = plt.subplots(figsize=(6, 5))
+    ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
+    ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
+            label=f"Youden's J optimum (t = {opt_thresh:.3f})")
+    ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5)
+    ax.set_xlabel("False Positive Rate")
+    ax.set_ylabel("True Positive Rate")
+    ax.set_title(title)
+    ax.legend(loc="lower right")
+    fig.tight_layout()
+    # Log to ClearML before closing the figure
+    if _logger and clearml_title:
+        _logger.report_matplotlib_figure(
+            title=clearml_title, series="ROC", figure=fig, iteration=0
+        )
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+def run_lopo_models():
+    print("\n=== LOPO: MLP and XGBoost ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
+    for i, held_out in enumerate(persons):
+        X_test, y_test = by_person[held_out]
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        scaler = StandardScaler().fit(train_X)
+        X_tr_sc = scaler.transform(train_X)
+        X_te_sc = scaler.transform(X_test)
+        mlp = MLPClassifier(
+            hidden_layer_sizes=(64, 32), activation="relu",
+            max_iter=200, early_stopping=True, validation_fraction=0.15,
+            random_state=SEED, verbose=False,
+        )
+        mlp.fit(X_tr_sc, train_y)
+        mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
+        results["mlp"]["y"].append(y_test)
+        results["mlp"]["p"].append(mlp_prob)
+        xgb = XGBClassifier(
+            n_estimators=600, max_depth=8, learning_rate=0.05,
+            subsample=0.8, colsample_bytree=0.8,
+            reg_alpha=0.1, reg_lambda=1.0,
+            use_label_encoder=False, eval_metric="logloss",
+            random_state=SEED, verbosity=0,
+        )
+        xgb.fit(X_tr_sc, train_y)
+        xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
+        results["xgb"]["y"].append(y_test)
+        results["xgb"]["p"].append(xgb_prob)
+        print(f"  fold {i+1}/{len(persons)}: held out {held_out} "
+              f"({X_test.shape[0]} samples)")
+    for key in results:
+        results[key]["y"] = np.concatenate(results[key]["y"])
+        results[key]["p"] = np.concatenate(results[key]["p"])
+    return results
+def analyse_model_thresholds(results):
+    print("\n=== Model threshold analysis ===")
+    model_stats = {}
+    for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
+        y, p = results[name]["y"], results[name]["p"]
+        opt_t, fpr, tpr, thresholds, auc = _youdens_j(y, p)
+        j = tpr - fpr
+        opt_idx = j.argmax()
+        f1_opt = _f1_at_threshold(y, p, opt_t)
+        f1_50 = _f1_at_threshold(y, p, 0.50)
+        path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
+        _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
+                  f"LOPO ROC — {label} (9 folds, 144k samples)", path,
+                  clearml_title=f"ROC_{label}")
+        model_stats[name] = {
+            "label": label, "auc": auc,
+            "opt_threshold": opt_t, "f1_opt": f1_opt, "f1_50": f1_50,
+        }
+        print(f"  {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
+              f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
+        # Log scalars to ClearML
+        if _logger:
+            _logger.report_single_value(f"{label} Optimal Threshold", opt_t)
+            _logger.report_single_value(f"{label} AUC", auc)
+            _logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
+            _logger.report_single_value(f"{label} F1 @ 0.5", f1_50)
+    return model_stats
+def run_geo_weight_search():
+    print("\n=== Geometric weight grid search ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    features = SELECTED_FEATURES["face_orientation"]
+    sf_idx = features.index("s_face")
+    se_idx = features.index("s_eye")
+    alphas = np.arange(0.2, 0.85, 0.1).round(1)
+    alpha_f1 = {a: [] for a in alphas}
+    for held_out in persons:
+        X_test, y_test = by_person[held_out]
+        sf = X_test[:, sf_idx]
+        se = X_test[:, se_idx]
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        sf_tr = train_X[:, sf_idx]
+        se_tr = train_X[:, se_idx]
+        for a in alphas:
+            score_tr = a * sf_tr + (1.0 - a) * se_tr
+            opt_t, *_ = _youdens_j(train_y, score_tr)
+            score_te = a * sf + (1.0 - a) * se
+            f1 = _f1_at_threshold(y_test, score_te, opt_t)
+            alpha_f1[a].append(f1)
+    mean_f1 = {a: np.mean(f1s) for a, f1s in alpha_f1.items()}
+    best_alpha = max(mean_f1, key=mean_f1.get)
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar([f"{a:.1f}" for a in alphas],
+           [mean_f1[a] for a in alphas], color="steelblue")
+    ax.set_xlabel("Face weight (alpha); eye weight = 1 - alpha")
+    ax.set_ylabel("Mean LOPO F1")
+    ax.set_title("Geometric Pipeline: Face vs Eye Weight Search")
+    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
+    for i, a in enumerate(alphas):
+        ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
+                ha="center", va="bottom", fontsize=8)
+    fig.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+    print(f"  Best alpha (face weight) = {best_alpha:.1f}, "
+          f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
+    # Log scalars to ClearML
+    if _logger:
+        _logger.report_single_value("Geo Best Alpha", best_alpha)
+        for i, a in enumerate(sorted(alphas)):
+            _logger.report_scalar(
+                "Geo Weight Search", "Mean LOPO F1",
+                iteration=i, value=mean_f1[a]
+            )
+    return dict(mean_f1), best_alpha
+def run_hybrid_weight_search(lopo_results):
+    print("\n=== Hybrid weight grid search ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    features = SELECTED_FEATURES["face_orientation"]
+    sf_idx = features.index("s_face")
+    se_idx = features.index("s_eye")
+    GEO_FACE_W = 0.7
+    GEO_EYE_W = 0.3
+    w_mlps = np.arange(0.3, 0.85, 0.1).round(1)
+    wmf1 = {w: [] for w in w_mlps}
+    mlp_p = lopo_results["mlp"]["p"]
+    offset = 0
+    for held_out in persons:
+        X_test, y_test = by_person[held_out]
+        n = X_test.shape[0]
+        mlp_prob_fold = mlp_p[offset:offset + n]
+        offset += n
+        sf = X_test[:, sf_idx]
+        se = X_test[:, se_idx]
+        geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        sf_tr = train_X[:, sf_idx]
+        se_tr = train_X[:, se_idx]
+        geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
+        scaler = StandardScaler().fit(train_X)
+        mlp_tr = MLPClassifier(
+            hidden_layer_sizes=(64, 32), activation="relu",
+            max_iter=200, early_stopping=True, validation_fraction=0.15,
+            random_state=SEED, verbose=False,
+        )
+        mlp_tr.fit(scaler.transform(train_X), train_y)
+        mlp_prob_tr = mlp_tr.predict_proba(scaler.transform(train_X))[:, 1]
+        for w in w_mlps:
+            combo_tr = w * mlp_prob_tr + (1.0 - w) * geo_tr
+            opt_t, *_ = _youdens_j(train_y, combo_tr)
+            combo_te = w * mlp_prob_fold + (1.0 - w) * geo_score
+            f1 = _f1_at_threshold(y_test, combo_te, opt_t)
+            wmf1[w].append(f1)
+    mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
+    best_w = max(mean_f1, key=mean_f1.get)
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar([f"{w:.1f}" for w in w_mlps],
+           [mean_f1[w] for w in w_mlps], color="darkorange")
+    ax.set_xlabel("MLP weight (w_mlp); geo weight = 1 - w_mlp")
+    ax.set_ylabel("Mean LOPO F1")
+    ax.set_title("Hybrid Pipeline: MLP vs Geometric Weight Search")
+    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
+    for i, w in enumerate(w_mlps):
+        ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
+                ha="center", va="bottom", fontsize=8)
+    fig.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+    print(f"  Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
+    # Log scalars to ClearML
+    if _logger:
+        _logger.report_single_value("Hybrid Best w_mlp", best_w)
+        for i, w in enumerate(sorted(w_mlps)):
+            _logger.report_scalar(
+                "Hybrid Weight Search", "Mean LOPO F1",
+                iteration=i, value=mean_f1[w]
+            )
+    return dict(mean_f1), best_w
+def plot_distributions():
+    print("\n=== EAR / MAR distributions ===")
+    npz_files = sorted(glob.glob(os.path.join(_PROJECT_ROOT, "data", "collected_*", "*.npz")))
+    all_ear_l, all_ear_r, all_mar, all_labels = [], [], [], []
+    for f in npz_files:
+        d = np.load(f, allow_pickle=True)
+        names = list(d["feature_names"])
+        feat = d["features"].astype(np.float32)
+        lab = d["labels"].astype(np.int64)
+        all_ear_l.append(feat[:, names.index("ear_left")])
+        all_ear_r.append(feat[:, names.index("ear_right")])
+        all_mar.append(feat[:, names.index("mar")])
+        all_labels.append(lab)
+    ear_l = np.concatenate(all_ear_l)
+    ear_r = np.concatenate(all_ear_r)
+    mar = np.concatenate(all_mar)
+    labels = np.concatenate(all_labels)
+    ear_min = np.minimum(ear_l, ear_r)
+    ear_plot = np.clip(ear_min, 0, 0.85)
+    mar_plot = np.clip(mar, 0, 1.5)
+    # EAR distribution plot
+    fig_ear, ax = plt.subplots(figsize=(7, 4))
+    ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
+    ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
+    for val, lbl, c in [
+        (0.16, "ear_closed = 0.16", "red"),
+        (0.21, "EAR_BLINK = 0.21", "orange"),
+        (0.30, "ear_open = 0.30", "green"),
+    ]:
+        ax.axvline(val, color=c, ls="--", lw=1.5, label=lbl)
+    ax.set_xlabel("min(left_EAR, right_EAR)")
+    ax.set_ylabel("Density")
+    ax.set_title("EAR Distribution by Class (144k samples)")
+    ax.legend(fontsize=8)
+    fig_ear.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "ear_distribution.png")
+    fig_ear.savefig(path, dpi=150)
+    plt.close(fig_ear)
+    print(f"  saved {path}")
+    # MAR distribution plot
+    fig_mar, ax = plt.subplots(figsize=(7, 4))
+    ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
+    ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
+    ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
+    ax.set_xlabel("Mouth Aspect Ratio (MAR)")
+    ax.set_ylabel("Density")
+    ax.set_title("MAR Distribution by Class (144k samples)")
+    ax.legend(fontsize=8)
+    fig_mar.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "mar_distribution.png")
+    fig_mar.savefig(path, dpi=150)
+    plt.close(fig_mar)
+    print(f"  saved {path}")
+    closed_pct = np.mean(ear_min < 0.16) * 100
+    blink_pct = np.mean(ear_min < 0.21) * 100
+    open_pct = np.mean(ear_min >= 0.30) * 100
+    yawn_pct = np.mean(mar > 0.55) * 100
+    stats = {
+        "ear_below_016": closed_pct,
+        "ear_below_021": blink_pct,
+        "ear_above_030": open_pct,
+        "mar_above_055": yawn_pct,
+        "n_samples": len(ear_min),
+    }
+    print(f"  EAR<0.16 (closed): {closed_pct:.1f}%  |  EAR<0.21 (blink): {blink_pct:.1f}%  |  "
+          f"EAR>=0.30 (open): {open_pct:.1f}%")
+    print(f"  MAR>0.55 (yawn): {yawn_pct:.1f}%")
+    return stats
+def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
+    lines = []
+    lines.append("# Threshold Justification Report")
+    lines.append("")
+    lines.append("Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation "
+                 "over 9 participants (~145k samples).")
+    lines.append("")
+    lines.append("## 1. ML Model Decision Thresholds")
+    lines.append("")
+    lines.append("Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) "
+                 "on pooled LOPO held-out predictions.")
+    lines.append("")
+    lines.append("| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |")
+    lines.append("|-------|----------|-------------------------------|--------------|-----------|")
+    for key in ("mlp", "xgb"):
+        s = model_stats[key]
+        lines.append(f"| {s['label']} | {s['auc']:.4f} | **{s['opt_threshold']:.3f}** | "
+                     f"{s['f1_opt']:.4f} | {s['f1_50']:.4f} |")
+    lines.append("")
+    lines.append("![MLP ROC](plots/roc_mlp.png)")
+    lines.append("")
+    lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
+    lines.append("")
+    lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
+    lines.append("")
+    lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
+                 "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
+    lines.append("")
+    lines.append("| Face Weight (alpha) | Mean LOPO F1 |")
+    lines.append("|--------------------:|-------------:|")
+    for a in sorted(geo_f1.keys()):
+        marker = " **<-- selected**" if a == best_alpha else ""
+        lines.append(f"| {a:.1f} | {geo_f1[a]:.4f}{marker} |")
+    lines.append("")
+    lines.append(f"**Best:** alpha = {best_alpha:.1f} (face {best_alpha*100:.0f}%, "
+                 f"eye {(1-best_alpha)*100:.0f}%)")
+    lines.append("")
+    lines.append("![Geometric weight search](plots/geo_weight_search.png)")
+    lines.append("")
+    lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
+    lines.append("")
+    lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
+                 "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
+                 "If you change geometric weights, re-run this script — optimal w_mlp can shift.")
+    lines.append("")
+    lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
+    lines.append("|-------------------:|-------------:|")
+    for w in sorted(hybrid_f1.keys()):
+        marker = " **<-- selected**" if w == best_w else ""
+        lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
+    lines.append("")
+    lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
+                 f"geometric {(1-best_w)*100:.0f}%)")
+    lines.append("")
+    lines.append("![Hybrid weight search](plots/hybrid_weight_search.png)")
+    lines.append("")
+    lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
+    lines.append("")
+    lines.append("### EAR (Eye Aspect Ratio)")
+    lines.append("")
+    lines.append("Reference: Soukupova & Cech, \"Real-Time Eye Blink Detection Using Facial "
+                 "Landmarks\" (2016) established EAR ~ 0.2 as a blink threshold.")
+    lines.append("")
+    lines.append("Our thresholds define a linear interpolation zone around this established value:")
+    lines.append("")
+    lines.append("| Constant | Value | Justification |")
+    lines.append("|----------|------:|---------------|")
+    lines.append(f"| `ear_closed` | 0.16 | Below this, eyes are fully shut. "
+                 f"{dist_stats['ear_below_016']:.1f}% of samples fall here. |")
+    lines.append(f"| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. "
+                 f"{dist_stats['ear_below_021']:.1f}% of samples below. |")
+    lines.append(f"| `ear_open` | 0.30 | Above this, eyes are fully open. "
+                 f"{dist_stats['ear_above_030']:.1f}% of samples here. |")
+    lines.append("")
+    lines.append("Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, "
+                 "providing a smooth transition rather than a hard binary cutoff.")
+    lines.append("")
+    lines.append("![EAR distribution](plots/ear_distribution.png)")
+    lines.append("")
+    lines.append("### MAR (Mouth Aspect Ratio)")
+    lines.append("")
+    lines.append(f"| Constant | Value | Justification |")
+    lines.append("|----------|------:|---------------|")
+    lines.append(f"| `MAR_YAWN_THRESHOLD` | 0.55 | Only {dist_stats['mar_above_055']:.1f}% of "
+                 f"samples exceed this, confirming it captures genuine yawns without false positives. |")
+    lines.append("")
+    lines.append("![MAR distribution](plots/mar_distribution.png)")
+    lines.append("")
+    lines.append("## 5. Other Constants")
+    lines.append("")
+    lines.append("| Constant | Value | Rationale |")
+    lines.append("|----------|------:|-----------|")
+    lines.append("| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score "
+                 "drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at "
+                 "the extreme edge. |")
+    lines.append("| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on "
+                 "typical monitor-viewing cone: at 60 cm distance and a 24\" monitor, the viewing "
+                 "angle is ~20-25 degrees. |")
+    lines.append("| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch "
+                 "(tilting head doesn't mean looking away), so it's down-weighted by 50%. |")
+    lines.append("| `EMA alpha` | 0.3 | Smoothing factor for focus score. "
+                 "Gives ~3-4 frame effective window; balances responsiveness vs flicker. |")
+    lines.append("| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief "
+                 "occlusions (e.g. hand gesture) without dropping score. |")
+    lines.append("| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement "
+                 "window (Dinges & Grace, 1998). |")
+    lines.append("| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous "
+                 "blink rate is 15-20/min (Bentivoglio et al., 1997). |")
+    lines.append("")
+    with open(REPORT_PATH, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    print(f"\nReport written to {REPORT_PATH}")
+def main():
+    os.makedirs(PLOTS_DIR, exist_ok=True)
+    lopo_results = run_lopo_models()
+    model_stats = analyse_model_thresholds(lopo_results)
+    geo_f1, best_alpha = run_geo_weight_search()
+    hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
+    dist_stats = plot_distributions()
+    write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
+    # Close ClearML task
+    if _task:
+        _task.close()
+        print("ClearML task closed.")
+    print("\nDone.")
+if __name__ == "__main__":
+    main()

evaluation/logs/.gitkeep ADDED Viewed

File without changes

evaluation/plots/confusion_matrix_mlp.png ADDED Viewed

evaluation/plots/confusion_matrix_xgb.png ADDED Viewed

evaluation/plots/ear_distribution.png ADDED Viewed

evaluation/plots/geo_weight_search.png ADDED Viewed

evaluation/plots/hybrid_weight_search.png ADDED Viewed