Spaces:

FocusGuard
/

final_test

Sleeping

App Files Files Community

Abdelrahman Almatrooshi commited on 26 days ago

Commit

22a6915

0 Parent(s):

Deploy snapshot from main b7a59b11809483dfc959f196f1930240f2662c49

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.coveragerc +23 -0
.dockerignore +31 -0
.gitattributes +5 -0
.gitignore +56 -0
Dockerfile +35 -0
LICENSE +21 -0
README.md +366 -0
api/__init__.py +1 -0
api/db.py +201 -0
api/drawing.py +124 -0
app.py +1 -0
assets/focusguard-demo.gif +3 -0
checkpoints/L2CSNet_gaze360.pkl +3 -0
checkpoints/README.md +47 -0
checkpoints/hybrid_combiner.joblib +3 -0
checkpoints/hybrid_focus_config.json +10 -0
checkpoints/meta_best.npz +3 -0
checkpoints/meta_mlp.npz +3 -0
checkpoints/mlp_best.pt +3 -0
checkpoints/scaler_mlp.joblib +3 -0
checkpoints/xgboost_face_orientation_best.json +0 -0
config/README.md +45 -0
config/__init__.py +60 -0
config/clearml_enrich.py +87 -0
config/default.yaml +80 -0
data_preparation/README.md +90 -0
data_preparation/__init__.py +0 -0
data_preparation/data_exploration.ipynb +0 -0
data_preparation/prepare_dataset.py +279 -0
docker-compose.yml +5 -0
download_l2cs_weights.py +37 -0
eslint.config.js +42 -0
evaluation/GROUPED_SPLIT_BENCHMARK.md +13 -0
evaluation/README.md +84 -0
evaluation/THRESHOLD_JUSTIFICATION.md +100 -0
evaluation/feature_importance.py +279 -0
evaluation/feature_selection_justification.md +53 -0
evaluation/grouped_split_benchmark.py +107 -0
evaluation/justify_thresholds.py +573 -0
evaluation/logs/.gitkeep +0 -0
evaluation/plots/confusion_matrix_mlp.png +0 -0
evaluation/plots/confusion_matrix_xgb.png +0 -0
evaluation/plots/ear_distribution.png +0 -0
evaluation/plots/geo_weight_search.png +0 -0
evaluation/plots/hybrid_weight_search.png +0 -0
evaluation/plots/hybrid_xgb_weight_search.png +0 -0
evaluation/plots/mar_distribution.png +0 -0
evaluation/plots/roc_mlp.png +0 -0
evaluation/plots/roc_xgb.png +0 -0
index.html +17 -0

.coveragerc ADDED Viewed

	@@ -0,0 +1,23 @@

+[run]
+branch = True
+source =
+    .
+omit =
+    .venv/*
+    venv/*
+    */site-packages/*
+    tests/*
+    notebooks/*
+    evaluation/*
+    models/mlp/train.py
+    models/mlp/sweep.py
+    models/mlp/eval_accuracy.py
+    models/cnn/eye_attention/train.py
+    models\collect_features.py
+[report]
+show_missing = True
+skip_covered = False
+precision = 1
+exclude_lines =
+    pragma: no cover
+    if __name__ == .__main__.:

.dockerignore ADDED Viewed

	@@ -0,0 +1,31 @@

+.git
+.gitattributes
+.github
+node_modules
+dist
+venv
+.venv
+__pycache__
+*.pyc
+.pytest_cache
+.mypy_cache
+.ruff_cache
+notebooks/
+evaluation/
+tests/
+others/
+*.ipynb
+requirements-dev.txt
+pytest.ini
+eslint.config.js
+docker-compose.yml
+models/L2CS-Net/L2CS-Net-backup/
+*.db
+.DS_Store
+.cursor
+.vscode
+*.swp

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,56 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules/
+dist/
+dist-ssr/
+*.local
+# Editor directories and files
+.vscode/
+.idea/
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+.venv/
+env/
+.env
+*.egg-info/
+.eggs/
+build/
+Thumbs.db
+ignore/
+# Coverage / caches
+.coverage
+htmlcov/
+# Project specific
+focus_guard.db
+test_focus_guard.db
+# Large weights: fetch at build/runtime (see download_l2cs_weights.py)
+checkpoints/L2CSNet_gaze360.pkl
+models/L2CS-Net/models/L2CSNet_gaze360.pkl
+# Training artefacts (too large for HF Hub; keep local only)
+data/
+data_preparation/collected*/
+best_eye_cnn.pth
+checkpoints/model_best.joblib
+__pycache__/
+docs/
+docs
+LOCAL_TESTING.md

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM python:3.10-slim
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libglib2.0-0 libsm6 libxrender1 libxext6 libxcb1 libgl1 libgles2 libegl1 libgomp1 \
+    ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev \
+    libavdevice-dev libopus-dev libvpx-dev libsrtp2-dev \
+    build-essential nodejs npm git \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+RUN npm install && npm run build && mkdir -p /app/static && cp -R dist/* /app/static/ \
+    && rm -rf node_modules dist
+ENV FOCUSGUARD_CACHE_DIR=/app/.cache/focusguard
+RUN python -c "from models.face_mesh import _ensure_model; _ensure_model()"
+RUN python download_l2cs_weights.py || echo "[WARN] L2CS weights not downloaded — will run without gaze model"
+RUN mkdir -p /app/data && chown -R user:user /app
+USER user
+EXPOSE 7860
+CMD ["bash", "start.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 k23172173
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,366 @@

+---
+title: FocusGuard
+emoji: 👁️
+colorFrom: blue
+colorTo: indigo
+sdk: docker
+app_port: 7860
+pinned: false
+short_description: Real-time webcam focus detection via MediaPipe + MLP/XGBoost
+---
+# FocusGuard
+Real-time webcam-based visual attention estimation. MediaPipe Face Mesh extracts 17 features (EAR, gaze ratios, head pose, PERCLOS) per frame, selects 10, and routes them through MLP or XGBoost for binary focused/unfocused classification. Includes a local OpenCV demo and a full React + FastAPI web app with WebSocket/WebRTC video streaming.
+![Real-time focus detection with face mesh and XGBoost classification](assets/focusguard-demo.gif)
+---
+## Team
+**Team name:** FocusGuards (5CCSAGAP Large Group Project)
+**Members:** Yingao Zheng, Mohamed Alketbi, Abdelrahman Almatrooshi, Junhao Zhou, Kexin Wang, Langyuan Huang, Saba Al-Gafri, Ayten Arab, Jaroslav Rakoto-Miklas
+---
+## Links
+### Project access
+- Git repository: [GAP_Large_project](https://github.kcl.ac.uk/k23172173/GAP_Large_project)
+- Deployed app (Hugging Face): [FocusGuard/final_v2](https://huggingface.co/spaces/FocusGuard/final_v2)
+- ClearML experiments: [FocusGuards Large Group Project](https://app.5ccsagap.er.kcl.ac.uk/projects/ce218b2f751641c68042f8fa216f8746/experiments)
+### Data and checkpoints
+- Checkpoints (Google Drive): [Download folder](https://drive.google.com/drive/folders/15yYHKgCHg5AFIBb04XnVaeqHRukwBLAd?usp=drive_link)
+- Dataset (Google Drive): [Dataset folder](https://drive.google.com/drive/folders/1fwACM6i6uVGFkTlJKSlqVhizzgrHl_gY?usp=sharing)
+- Data consent form (PDF): [Consent document](https://drive.google.com/file/d/1g1Hc764ffljoKrjApD6nmWDCXJGYTR0j/view?usp=drive_link)
+The deployed app contains the full feature set (session history, L2CS calibration, model selector, achievements).
+---
+## Trained models
+Model checkpoints are **not included** in the submission archive. Download them before running inference.
+### Option 1: Hugging Face Space
+Pre-trained checkpoints are available in the Hugging Face Space files:
+```
+https://huggingface.co/spaces/FocusGuard/final_v2/tree/main/checkpoints
+```
+Download and place into `checkpoints/`:
+| File | Description |
+|------|-------------|
+| `mlp_best.pt` | PyTorch MLP (10-64-32-2, ~2,850 params) |
+| `xgboost_face_orientation_best.json` | XGBoost (600 trees, depth 8, lr 0.1489) |
+| `scaler_mlp.joblib` | StandardScaler fit on training data |
+| `hybrid_focus_config.json` | Hybrid pipeline fusion weights |
+| `hybrid_combiner.joblib` | Hybrid combiner |
+| `L2CSNet_gaze360.pkl` | L2CS-Net ResNet50 gaze weights (96 MB) |
+### Option 2: ClearML
+Models are registered as ClearML OutputModels under project "FocusGuards Large Group Project".
+| Model | Task ID | Model ID |
+|-------|---------|----------|
+| MLP | `3899b5aa0c3348b28213a3194322cdf7` | `56f94b799f624bdc845fa50c4d0606fe` |
+| XGBoost | `c0ceb8e7e8194a51a7a31078cc47775c` | `6727b8de334f4ca0961c46b436f6fb7c` |
+**UI:** Open a task on the [experiments page](https://app.5ccsagap.er.kcl.ac.uk/projects/ce218b2f751641c68042f8fa216f8746/experiments), go to Artifacts > Output Models, and download.
+**Python:**
+```python
+from clearml import Model
+mlp = Model(model_id="56f94b799f624bdc845fa50c4d0606fe")
+mlp_path = mlp.get_local_copy()   # downloads .pt
+xgb = Model(model_id="6727b8de334f4ca0961c46b436f6fb7c")
+xgb_path = xgb.get_local_copy()   # downloads .json
+```
+Copy the downloaded files into `checkpoints/`.
+### Option 3: Google Drive (submission fallback)
+If ClearML access is restricted, download checkpoints from:
+https://drive.google.com/drive/folders/15yYHKgCHg5AFIBb04XnVaeqHRukwBLAd?usp=drive_link
+Place all files under `checkpoints/`.
+### Option 4: Retrain from scratch
+```bash
+python -m models.mlp.train
+python -m models.xgboost.train
+```
+This regenerates `checkpoints/mlp_best.pt`, `checkpoints/xgboost_face_orientation_best.json`, and scalers. Requires training data under `data/collected_*/`.
+---
+## Project layout
+```
+config/
+    default.yaml              hyperparameters, thresholds, app settings
+    __init__.py               config loader + ClearML flattener
+    clearml_enrich.py         ClearML task enrichment + artifact upload
+data_preparation/
+    prepare_dataset.py        load/split/scale .npz files (pooled + LOPO)
+    data_exploration.ipynb    EDA: distributions, class balance, correlations
+models/
+    face_mesh.py              MediaPipe 478-point face landmarks
+    head_pose.py              yaw/pitch/roll via solvePnP, face-orientation score
+    eye_scorer.py             EAR, MAR, gaze ratios, PERCLOS
+    collect_features.py       real-time feature extraction + webcam labelling CLI
+    gaze_calibration.py       9-point polynomial gaze calibration
+    gaze_eye_fusion.py        fuses calibrated gaze with eye openness
+    mlp/                      MLP training, eval, Optuna sweep
+    xgboost/                  XGBoost training, eval, ClearML + Optuna sweeps
+    L2CS-Net/                 vendored L2CS-Net (ResNet50, Gaze360)
+checkpoints/                  (excluded from archive; see download instructions above)
+notebooks/
+    mlp.ipynb                 MLP training + LOPO in Jupyter
+    xgboost.ipynb             XGBoost training + LOPO in Jupyter
+evaluation/
+    justify_thresholds.py     LOPO threshold + weight grid search
+    feature_importance.py     XGBoost gain + leave-one-feature-out ablation
+    grouped_split_benchmark.py  pooled vs LOPO comparison
+    plots/                    ROC curves, confusion matrices, weight searches
+    logs/                     JSON training logs
+tests/
+    test_*.py                 unit + integration tests (pytest)
+    .coveragerc               coverage config
+ui/
+    pipeline.py               all 5 pipeline classes + output smoothing
+    live_demo.py              OpenCV webcam demo
+src/                          React (Vite) frontend source
+static/                       built frontend assets (after npm build)
+main.py                       FastAPI application entry point
+package.json                  frontend package manifest
+requirements.txt
+pytest.ini
+```
+---
+## Setup
+Recommended versions:
+- Python 3.10-3.11
+- Node.js 18+ (needed only for frontend rebuild/dev)
+```bash
+python -m venv venv
+source venv/bin/activate        # Windows: venv\Scripts\activate
+pip install -r requirements.txt
+```
+Then download checkpoints (see above).
+If you need to rebuild frontend assets locally:
+```bash
+npm install
+npm run build
+mkdir -p static && cp -r dist/* static/
+```
+---
+## Run
+### Local OpenCV demo
+```bash
+python ui/live_demo.py
+python ui/live_demo.py --xgb    # XGBoost
+```
+Controls: `m` cycle mesh overlay, `1-5` switch pipeline mode, `q` quit.
+### Web app (without Docker)
+```bash
+source venv/bin/activate
+python -m uvicorn main:app --host 0.0.0.0 --port 7860
+```
+Open http://localhost:7860
+### Web app (Docker)
+```bash
+docker-compose up               # serves on port 7860
+```
+---
+## Data collection
+```bash
+python -m models.collect_features --name <participant>
+```
+Records webcam sessions with real-time binary labelling (spacebar toggles focused/unfocused). Saves per-frame feature vectors to `data/collected_<participant>/` as `.npz` files. Raw video is never stored.
+9 participants recorded 5-10 min sessions across varied environments (144,793 frames total, 61.5% focused / 38.5% unfocused). All participants provided informed consent. Dataset files are not included in this repository.
+Consent document: https://drive.google.com/file/d/1g1Hc764ffljoKrjApD6nmWDCXJGYTR0j/view?usp=drive_link
+Raw participant dataset is excluded from this submission (coursework policy and privacy constraints). It can be shared with module staff on request: https://drive.google.com/drive/folders/1fwACM6i6uVGFkTlJKSlqVhizzgrHl_gY?usp=sharing
+---
+## Pipeline
+```
+Webcam frame
+  --> MediaPipe Face Mesh (478 landmarks)
+    --> Head pose (solvePnP): yaw, pitch, roll, s_face, head_deviation
+    --> Eye scorer: EAR_left, EAR_right, EAR_avg, s_eye, MAR
+    --> Gaze ratios: h_gaze, v_gaze, gaze_offset
+    --> Temporal tracker: PERCLOS, blink_rate, closure_dur, yawn_dur
+  --> 17 features --> select 10 --> clip to physiological bounds
+  --> ML model (MLP / XGBoost) or geometric scorer
+  --> Asymmetric EMA smoothing (alpha_up=0.55, alpha_down=0.45)
+  --> FOCUSED / UNFOCUSED
+```
+Five runtime modes share the same feature extraction backbone:
+| Mode | Description |
+|------|-------------|
+| **Geometric** | Deterministic scoring: 0.7 * s_face + 0.3 * s_eye, cosine-decay with max_angle=22 deg |
+| **XGBoost** | 600-tree gradient-boosted ensemble, threshold 0.28 (LOPO-optimal) |
+| **MLP** | PyTorch 10-64-32-2 perceptron, threshold 0.23 (LOPO-optimal) |
+| **Hybrid** | 30% MLP + 70% geometric ensemble (LOPO F1 = 0.841) |
+| **L2CS** | Deep gaze estimation via L2CS-Net (ResNet50, Gaze360 pretrained) |
+Any mode can be combined with L2CS Boost mode (35% base + 65% L2CS, fused threshold 0.52). Off-screen gaze produces near-zero L2CS score via cosine decay, acting as a soft veto.
+---
+## Training
+Both scripts read all hyperparameters from `config/default.yaml`.
+```bash
+python -m models.mlp.train
+python -m models.xgboost.train
+```
+Outputs: `checkpoints/` (model + scaler) and `evaluation/logs/` (CSVs, JSON summaries).
+### ClearML experiment tracking
+```bash
+USE_CLEARML=1 python -m models.mlp.train
+USE_CLEARML=1 CLEARML_QUEUE=gpu python -m models.xgboost.train
+USE_CLEARML=1 python -m evaluation.justify_thresholds --clearml
+```
+Logs hyperparameters, per-epoch scalars, confusion matrices, ROC curves, model registration, dataset stats, and reproducibility artifacts (config YAML, requirements.txt, git SHA).
+Reference experiment IDs:
+| Model | ClearML experiment ID |
+|-------|------------------------|
+| MLP (`models.mlp.train`) | `3899b5aa0c3348b28213a3194322cdf7` |
+| XGBoost (`models.xgboost.train`) | `c0ceb8e7e8194a51a7a31078cc47775c` |
+---
+## Evaluation
+```bash
+python -m evaluation.justify_thresholds          # LOPO threshold + weight search
+python -m evaluation.grouped_split_benchmark     # pooled vs LOPO comparison
+python -m evaluation.feature_importance          # XGBoost gain + LOFO ablation
+```
+### Results (pooled random split, 15% test)
+| Model | Accuracy | F1 | ROC-AUC |
+|-------|----------|----|---------|
+| XGBoost (600 trees, depth 8) | 95.87% | 0.959 | 0.991 |
+| MLP (64-32) | 92.92% | 0.929 | 0.971 |
+### Results (LOPO, 9 participants)
+| Model | LOPO AUC | Best threshold (Youden's J) | F1 at best threshold |
+|-------|----------|-----------------------------|----------------------|
+| MLP | 0.862 | 0.228 | 0.858 |
+| XGBoost | 0.870 | 0.280 | 0.855 |
+Best geometric face weight (alpha) = 0.7 (mean LOPO F1 = 0.820).
+Best hybrid MLP weight (w_mlp) = 0.3 (mean LOPO F1 = 0.841).
+The ~12 pp drop from pooled to LOPO reflects temporal data leakage and confirms LOPO as the primary generalisation metric.
+### Feature ablation
+| Channel subset | Mean LOPO F1 |
+|----------------|-------------|
+| All 10 features | 0.829 |
+| Eye state only | 0.807 |
+| Head pose only | 0.748 |
+| Gaze only | 0.726 |
+Top-5 XGBoost gain: `s_face` (10.27), `ear_right` (9.54), `head_deviation` (8.83), `ear_avg` (6.96), `perclos` (5.68).
+---
+## L2CS Gaze Tracking
+L2CS-Net predicts where your eyes are looking, not just where your head is pointed, catching the scenario where the head faces the screen but eyes wander.
+**Standalone mode:** Select L2CS as the model.
+**Boost mode:** Select any other model, then enable the GAZE toggle. L2CS runs alongside the base model with score-level fusion (35% base / 65% L2CS). Off-screen gaze triggers a soft veto.
+**Calibration:** Click Calibrate during a session. A fullscreen overlay shows 9 target dots (3x3 grid). After all 9 points, a degree-2 polynomial maps gaze angles to screen coordinates with IQR outlier filtering and centre-point bias correction.
+L2CS weight lookup order in runtime:
+1. `checkpoints/L2CSNet_gaze360.pkl`
+2. `models/L2CS-Net/models/L2CSNet_gaze360.pkl`
+3. `models/L2CSNet_gaze360.pkl`
+---
+## Config
+All hyperparameters and app settings are in `config/default.yaml`. Override with `FOCUSGUARD_CONFIG=/path/to/custom.yaml`.
+---
+## Tests
+Included checks:
+- data prep helpers and real split consistency (`test_data_preparation.py`; split test **skips** if `data/collected_*/*.npz` is absent)
+- feature clipping (`test_models_clip_features.py`)
+- pipeline integration (`test_pipeline_integration.py`)
+- gaze calibration / fusion diagnostics (`test_gaze_pipeline.py`)
+- FastAPI health, settings, sessions (`test_health_endpoint.py`, `test_api_settings.py`, `test_api_sessions.py`)
+```bash
+pytest
+```
+Coverage is enabled by default via `pytest.ini` (`--cov` / term report). For HTML coverage: `pytest --cov-report=html`.
+**Stack:** Python, PyTorch, XGBoost, MediaPipe, OpenCV, L2CS-Net, FastAPI, React/Vite, SQLite, Docker, ClearML, pytest.

api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # API package: db, drawing, routes, websocket.

api/db.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""SQLite DB for focus sessions and user settings."""
+from __future__ import annotations
+import asyncio
+import json
+from datetime import datetime
+import aiosqlite
+def get_db_path() -> str:
+    """Database file path from config or default."""
+    try:
+        from config import get
+        return get("app.db_path") or "focus_guard.db"
+    except Exception:
+        return "focus_guard.db"
+async def init_database(db_path: str | None = None) -> None:
+    """Create focus_sessions, focus_events, user_settings tables if missing."""
+    path = db_path or get_db_path()
+    async with aiosqlite.connect(path) as db:
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS focus_sessions (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                start_time TIMESTAMP NOT NULL,
+                end_time TIMESTAMP,
+                duration_seconds INTEGER DEFAULT 0,
+                focus_score REAL DEFAULT 0.0,
+                total_frames INTEGER DEFAULT 0,
+                focused_frames INTEGER DEFAULT 0,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS focus_events (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id INTEGER NOT NULL,
+                timestamp TIMESTAMP NOT NULL,
+                is_focused BOOLEAN NOT NULL,
+                confidence REAL NOT NULL,
+                detection_data TEXT,
+                FOREIGN KEY (session_id) REFERENCES focus_sessions (id)
+            )
+        """)
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS user_settings (
+                id INTEGER PRIMARY KEY CHECK (id = 1),
+                model_name TEXT DEFAULT 'mlp'
+            )
+        """)
+        await db.execute("""
+            INSERT OR IGNORE INTO user_settings (id, model_name)
+            VALUES (1, 'mlp')
+        """)
+        await db.commit()
+async def create_session(db_path: str | None = None) -> int:
+    """Insert a new focus session. Returns session id."""
+    path = db_path or get_db_path()
+    async with aiosqlite.connect(path) as db:
+        cursor = await db.execute(
+            "INSERT INTO focus_sessions (start_time) VALUES (?)",
+            (datetime.now().isoformat(),),
+        )
+        await db.commit()
+        return cursor.lastrowid
+async def end_session(session_id: int, db_path: str | None = None) -> dict | None:
+    """Close session and return summary (duration, focus_score, etc.)."""
+    path = db_path or get_db_path()
+    async with aiosqlite.connect(path) as db:
+        cursor = await db.execute(
+            "SELECT start_time, total_frames, focused_frames FROM focus_sessions WHERE id = ?",
+            (session_id,),
+        )
+        row = await cursor.fetchone()
+    if not row:
+        return None
+    start_time_str, total_frames, focused_frames = row
+    start_time = datetime.fromisoformat(start_time_str)
+    end_time = datetime.now()
+    duration = (end_time - start_time).total_seconds()
+    focus_score = focused_frames / total_frames if total_frames > 0 else 0.0
+    async with aiosqlite.connect(path) as db:
+        await db.execute("""
+            UPDATE focus_sessions
+            SET end_time = ?, duration_seconds = ?, focus_score = ?
+            WHERE id = ?
+        """, (end_time.isoformat(), int(duration), focus_score, session_id))
+        await db.commit()
+    return {
+        "session_id": session_id,
+        "start_time": start_time_str,
+        "end_time": end_time.isoformat(),
+        "duration_seconds": int(duration),
+        "focus_score": round(focus_score, 3),
+        "total_frames": total_frames,
+        "focused_frames": focused_frames,
+    }
+async def store_focus_event(
+    session_id: int,
+    is_focused: bool,
+    confidence: float,
+    metadata: dict,
+    db_path: str | None = None,
+) -> None:
+    """Append one focus event and update session counters."""
+    path = db_path or get_db_path()
+    async with aiosqlite.connect(path) as db:
+        await db.execute("""
+            INSERT INTO focus_events (session_id, timestamp, is_focused, confidence, detection_data)
+            VALUES (?, ?, ?, ?, ?)
+        """, (session_id, datetime.now().isoformat(), is_focused, confidence, json.dumps(metadata)))
+        await db.execute("""
+            UPDATE focus_sessions
+            SET total_frames = total_frames + 1,
+                focused_frames = focused_frames + ?
+            WHERE id = ?
+        """, (1 if is_focused else 0, session_id))
+        await db.commit()
+class EventBuffer:
+    """Buffer focus events and flush to DB in batches to avoid per-frame writes."""
+    def __init__(self, db_path: str | None = None, flush_interval: float = 2.0):
+        self._db_path = db_path or get_db_path()
+        self._flush_interval = flush_interval
+        self._buf: list = []
+        self._lock = asyncio.Lock()
+        self._task: asyncio.Task | None = None
+        self._total_frames = 0
+        self._focused_frames = 0
+    def start(self) -> None:
+        if self._task is None:
+            self._task = asyncio.create_task(self._flush_loop())
+    async def stop(self) -> None:
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+            self._task = None
+        await self._flush()
+    def add(self, session_id: int, is_focused: bool, confidence: float, metadata: dict) -> None:
+        self._buf.append((
+            session_id,
+            datetime.now().isoformat(),
+            is_focused,
+            confidence,
+            json.dumps(metadata),
+        ))
+        self._total_frames += 1
+        if is_focused:
+            self._focused_frames += 1
+    async def _flush_loop(self) -> None:
+        while True:
+            await asyncio.sleep(self._flush_interval)
+            await self._flush()
+    async def _flush(self) -> None:
+        async with self._lock:
+            if not self._buf:
+                return
+            batch = self._buf[:]
+            total = self._total_frames
+            focused = self._focused_frames
+            self._buf.clear()
+            self._total_frames = 0
+            self._focused_frames = 0
+        if not batch:
+            return
+        session_id = batch[0][0]
+        try:
+            async with aiosqlite.connect(self._db_path) as db:
+                await db.executemany("""
+                    INSERT INTO focus_events (session_id, timestamp, is_focused, confidence, detection_data)
+                    VALUES (?, ?, ?, ?, ?)
+                """, batch)
+                await db.execute("""
+                    UPDATE focus_sessions
+                    SET total_frames = total_frames + ?,
+                        focused_frames = focused_frames + ?
+                    WHERE id = ?
+                """, (total, focused, session_id))
+                await db.commit()
+        except Exception as e:
+            import logging
+            logging.getLogger(__name__).warning("DB flush error: %s", e)

api/drawing.py ADDED Viewed

	@@ -0,0 +1,124 @@

+"""Server-side face mesh and HUD drawing for WebRTC/WS video frames."""
+from __future__ import annotations
+import cv2
+import numpy as np
+from mediapipe.tasks.python.vision import FaceLandmarksConnections
+from models.face_mesh import FaceMeshDetector
+_FONT = cv2.FONT_HERSHEY_SIMPLEX
+_CYAN = (255, 255, 0)
+_GREEN = (0, 255, 0)
+_MAGENTA = (255, 0, 255)
+_ORANGE = (0, 165, 255)
+_RED = (0, 0, 255)
+_WHITE = (255, 255, 255)
+_LIGHT_GREEN = (144, 238, 144)
+_TESSELATION_CONNS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
+_CONTOUR_CONNS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
+_LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
+_RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
+_NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
+_LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
+_LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
+_LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
+_RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
+def _lm_px(lm: np.ndarray, idx: int, w: int, h: int) -> tuple[int, int]:
+    return (int(lm[idx, 0] * w), int(lm[idx, 1] * h))
+def _draw_polyline(
+    frame: np.ndarray, lm: np.ndarray, indices: list[int], w: int, h: int, color: tuple, thickness: int
+) -> None:
+    for i in range(len(indices) - 1):
+        cv2.line(
+            frame,
+            _lm_px(lm, indices[i], w, h),
+            _lm_px(lm, indices[i + 1], w, h),
+            color,
+            thickness,
+            cv2.LINE_AA,
+        )
+def draw_face_mesh(frame: np.ndarray, lm: np.ndarray, w: int, h: int) -> None:
+    """Draw tessellation, contours, eyebrows, nose, lips, eyes, irises, gaze lines on frame."""
+    overlay = frame.copy()
+    for s, e in _TESSELATION_CONNS:
+        cv2.line(overlay, _lm_px(lm, s, w, h), _lm_px(lm, e, w, h), (200, 200, 200), 1, cv2.LINE_AA)
+    cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
+    for s, e in _CONTOUR_CONNS:
+        cv2.line(frame, _lm_px(lm, s, w, h), _lm_px(lm, e, w, h), _CYAN, 1, cv2.LINE_AA)
+    _draw_polyline(frame, lm, _LEFT_EYEBROW, w, h, _LIGHT_GREEN, 2)
+    _draw_polyline(frame, lm, _RIGHT_EYEBROW, w, h, _LIGHT_GREEN, 2)
+    _draw_polyline(frame, lm, _NOSE_BRIDGE, w, h, _ORANGE, 1)
+    _draw_polyline(frame, lm, _LIPS_OUTER, w, h, _MAGENTA, 1)
+    _draw_polyline(frame, lm, _LIPS_INNER, w, h, (200, 0, 200), 1)
+    left_pts = np.array([_lm_px(lm, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES], dtype=np.int32)
+    cv2.polylines(frame, [left_pts], True, _GREEN, 2, cv2.LINE_AA)
+    right_pts = np.array([_lm_px(lm, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES], dtype=np.int32)
+    cv2.polylines(frame, [right_pts], True, _GREEN, 2, cv2.LINE_AA)
+    for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
+        for idx in indices:
+            cv2.circle(frame, _lm_px(lm, idx, w, h), 3, (0, 255, 255), -1, cv2.LINE_AA)
+    for iris_idx, eye_inner, eye_outer in [
+        (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
+        (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
+    ]:
+        iris_pts = np.array([_lm_px(lm, i, w, h) for i in iris_idx], dtype=np.int32)
+        center = iris_pts[0]
+        if len(iris_pts) >= 5:
+            radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
+            radius = max(int(np.mean(radii)), 2)
+            cv2.circle(frame, tuple(center), radius, _MAGENTA, 2, cv2.LINE_AA)
+            cv2.circle(frame, tuple(center), 2, _WHITE, -1, cv2.LINE_AA)
+        eye_cx = int((lm[eye_inner, 0] + lm[eye_outer, 0]) / 2.0 * w)
+        eye_cy = int((lm[eye_inner, 1] + lm[eye_outer, 1]) / 2.0 * h)
+        dx, dy = center[0] - eye_cx, center[1] - eye_cy
+        cv2.line(
+            frame,
+            tuple(center),
+            (int(center[0] + dx * 3), int(center[1] + dy * 3)),
+            _RED,
+            1,
+            cv2.LINE_AA,
+        )
+def draw_hud(frame: np.ndarray, result: dict, model_name: str) -> None:
+    """Draw status bar and detail overlay (FOCUSED/NOT FOCUSED, conf, s_face, s_eye, MAR, yawn)."""
+    h, w = frame.shape[:2]
+    is_focused = result["is_focused"]
+    status = "FOCUSED" if is_focused else "NOT FOCUSED"
+    color = _GREEN if is_focused else _RED
+    cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
+    cv2.putText(frame, status, (10, 28), _FONT, 0.8, color, 2, cv2.LINE_AA)
+    cv2.putText(frame, model_name.upper(), (w - 150, 28), _FONT, 0.45, _WHITE, 1, cv2.LINE_AA)
+    conf = result.get("mlp_prob", result.get("raw_score", 0.0))
+    mar_s = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
+    sf, se = result.get("s_face", 0), result.get("s_eye", 0)
+    detail = f"conf:{conf:.2f} S_face:{sf:.2f} S_eye:{se:.2f}{mar_s}"
+    cv2.putText(frame, detail, (10, 48), _FONT, 0.4, _WHITE, 1, cv2.LINE_AA)
+    if result.get("yaw") is not None:
+        cv2.putText(
+            frame,
+            f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}",
+            (w - 280, 48),
+            _FONT,
+            0.4,
+            (180, 180, 180),
+            1,
+            cv2.LINE_AA,
+        )
+    if result.get("is_yawning"):
+        cv2.putText(frame, "YAWN", (10, 75), _FONT, 0.7, _ORANGE, 2, cv2.LINE_AA)
+def get_tesselation_connections() -> list[tuple[int, int]]:
+    """Return tessellation edge pairs for client-side face mesh (cached by client)."""
+    return list(_TESSELATION_CONNS)

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from main import app

assets/focusguard-demo.gif ADDED Viewed

Git LFS Details

SHA256: 33dc81af4b303af6d62438a1aae71bb86b5c321317a3bd25241afef670edbb93
Pointer size: 132 Bytes
Size of remote file: 2.83 MB

checkpoints/L2CSNet_gaze360.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7f3480d868dd48261e1d59f915b0ef0bb33ea12ea00938fb2168f212080665
+size 95849977

checkpoints/README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+# checkpoints
+Checkpoint files are not included in the submission archive.
+## Required files
+Place the following files in this folder:
+- `mlp_best.pt`
+- `xgboost_face_orientation_best.json`
+- `scaler_mlp.joblib`
+- `hybrid_focus_config.json`
+- `hybrid_combiner.joblib`
+- `L2CSNet_gaze360.pkl`
+## Optional generated files
+- `meta_best.npz`
+- `meta_mlp.npz`
+They are metadata artifacts and are not required for standard inference.
+## Download sources
+Use any one source:
+- Hugging Face Space files: [checkpoints folder](https://huggingface.co/spaces/FocusGuard/final_v2/tree/main/checkpoints)
+- ClearML project: [FocusGuards Large Group Project](https://app.5ccsagap.er.kcl.ac.uk/projects/ce218b2f751641c68042f8fa216f8746/experiments)
+- Google Drive fallback: [checkpoint folder](https://drive.google.com/drive/folders/15yYHKgCHg5AFIBb04XnVaeqHRukwBLAd?usp=drive_link)
+## Verify files
+Run from repo root:
+```bash
+ls -lh checkpoints
+```
+You should see all required filenames above.
+## L2CS lookup order in runtime
+The app checks for L2CS weights in this order:
+1. `checkpoints/L2CSNet_gaze360.pkl`
+2. `models/L2CS-Net/models/L2CSNet_gaze360.pkl`
+3. `models/L2CSNet_gaze360.pkl`

checkpoints/hybrid_combiner.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e460c6ca8d2cadf37727456401a0d63028ba23cb6401f0835d869abfa2e053c
+size 965

checkpoints/hybrid_focus_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "use_xgb": false,
+  "w_mlp": 0.3,
+  "w_geo": 0.7,
+  "threshold": 0.35,
+  "use_yawn_veto": true,
+  "geo_face_weight": 0.7,
+  "geo_eye_weight": 0.3,
+  "mar_yawn_threshold": 0.55
+}

checkpoints/meta_best.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d78d1df5e25536a2c82c4b8f5fd0c26dd35f44b28fd59761634cbf78c7546f8
+size 4196

checkpoints/meta_mlp.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4771c61cdf0711aa640b4d600a0851d344414cd16c1c2f75afc90e3c6135d14b
+size 840

checkpoints/mlp_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2f55129785b6882c304483aa5399f5bf6c9ed6e73dfec7ca6f36cd0436156c8
+size 14497

checkpoints/scaler_mlp.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2038d5b051d4de303c5688b1b861a0b53b1307a52b9447bfa48e8c7ace749329
+size 823

checkpoints/xgboost_face_orientation_best.json ADDED Viewed

The diff for this file is too large to render. See raw diff

config/README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+# config
+Centralised configuration for FocusGuard. Every training script, pipeline, and evaluation tool reads from this package rather than hardcoding values. This ensures reproducibility across experiments and consistent behaviour between training and deployment.
+## Files
+| File | Purpose |
+|------|---------|
+| `default.yaml` | Single source of truth for all hyperparameters, thresholds, clipping bounds, and app settings |
+| `__init__.py` | YAML loader with dotted-key access (`get("pipeline.mlp_threshold")`), ClearML flattener, and project-wide constants |
+| `clearml_enrich.py` | ClearML experiment tracking helpers: environment tags, config/requirements upload, model metadata |
+## Usage
+```python
+from config import get
+lr = get("mlp.lr")                        # 0.001
+threshold = get("pipeline.mlp_threshold")  # 0.23
+clip_yaw = get("data.clip.yaw")           # [-45, 45]
+```
+Override the default path by setting `FOCUSGUARD_CONFIG` to point at a different YAML file.
+## Key sections in default.yaml
+| Section | What it controls |
+|---------|-----------------|
+| `app` | DB path, inference size (640x480), 4 workers, default model |
+| `l2cs_boost` | Boost-mode fusion weights (35% base / 65% L2CS), fused threshold 0.52 |
+| `mlp` | 30 epochs, batch 32, lr 0.001, hidden sizes [64, 32] |
+| `xgboost` | 600 trees, depth 8, lr 0.1489, regularisation from 40-trial Optuna sweep |
+| `data.clip` | Physiological clipping: yaw +/-45, pitch +/-30, EAR [0, 0.85], MAR [0, 1.0] |
+| `pipeline.geometric` | max_angle 22 deg, face/eye weights 0.7/0.3, asymmetric EMA (alpha_up=0.55, alpha_down=0.45) |
+| `pipeline` | Production thresholds: MLP 0.23, XGBoost 0.28, hybrid 0.35 (all derived from LOPO Youden's J) |
+| `evaluation` | Seed 42, weight search ranges for geometric alpha and hybrid w_mlp |
+## ClearML enrichment
+`clearml_enrich.py` provides reusable helpers called by all training and evaluation scripts:
+- `enrich_task(task, role)` adds tags (Python version, OS, torch/CUDA, git SHA)
+- `upload_repro_artifacts(task)` pins the exact YAML config and requirements.txt
+- `attach_output_metrics(model, metrics)` surfaces headline metrics on registered model cards
+- `task_done_summary(task, summary)` sets a human-readable task comment

config/__init__.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Load app and model config from YAML. Single source for hyperparameters and tunables."""
+from __future__ import annotations
+import os
+# ClearML UI project name (must match the project in your ClearML workspace).
+CLEARML_PROJECT_NAME = "FocusGuards Large Group Project"
+from pathlib import Path
+from typing import Any
+_CONFIG: dict[str, Any] | None = None
+def _default_path() -> Path:
+    return Path(__file__).resolve().parent / "default.yaml"
+def load_config(path: str | Path | None = None) -> dict[str, Any]:
+    """Load YAML config. Uses FOCUSGUARD_CONFIG env or config/default.yaml."""
+    global _CONFIG
+    if _CONFIG is not None:
+        return _CONFIG
+    import yaml
+    p = path or os.environ.get("FOCUSGUARD_CONFIG") or _default_path()
+    p = Path(p)
+    if not p.is_file():
+        _CONFIG = {}
+        return _CONFIG
+    with open(p, "r", encoding="utf-8") as f:
+        _CONFIG = yaml.safe_load(f) or {}
+    return _CONFIG
+def get(key_path: str, default: Any = None) -> Any:
+    """Return a nested config value. E.g. get('app.db_path'), get('mlp.epochs')."""
+    cfg = load_config()
+    for part in key_path.split("."):
+        if not isinstance(cfg, dict) or part not in cfg:
+            return default
+        cfg = cfg[part]
+    return cfg
+def flatten_for_clearml(cfg: dict[str, Any] | None = None, prefix: str = "") -> dict[str, Any]:
+    """Flatten nested config so every value appears as a ClearML task parameter (no nested dicts)."""
+    cfg = cfg if cfg is not None else load_config()
+    out = {}
+    for k, v in cfg.items():
+        key = f"{prefix}/{k}" if prefix else k
+        if isinstance(v, dict) and v and not any(isinstance(x, (dict, list)) for x in v.values()):
+            for k2, v2 in v.items():
+                out[f"{key}/{k2}"] = v2
+        elif isinstance(v, dict) and v:
+            out.update(flatten_for_clearml(v, key))
+        elif isinstance(v, list):
+            out[key] = str(v)
+        else:
+            out[key] = v
+    return out

config/clearml_enrich.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""Extra ClearML polish: env tags, config snapshot, output model metadata."""
+from __future__ import annotations
+import os
+import subprocess
+import sys
+from pathlib import Path
+def project_root() -> Path:
+    return Path(__file__).resolve().parent.parent
+def active_config_path() -> Path:
+    env = os.environ.get("FOCUSGUARD_CONFIG")
+    if env:
+        return Path(env).expanduser()
+    return Path(__file__).resolve().parent / "default.yaml"
+def enrich_task(task, *, role: str) -> None:
+    """Tags for filtering in the UI (Python, OS, torch device, git revision)."""
+    tags = [
+        role,
+        f"py{sys.version_info.major}{sys.version_info.minor}",
+        sys.platform.replace(" ", "_"),
+    ]
+    try:
+        import torch
+        ver = torch.__version__.split("+")[0].replace(".", "_")
+        tags.append(f"torch_{ver}")
+        tags.append("cuda" if torch.cuda.is_available() else "cpu")
+    except ImportError:
+        tags.append("no_torch")
+    rev = _git_short_rev()
+    if rev:
+        tags.append(f"git_{rev}")
+    task.add_tags(tags)
+def _git_short_rev() -> str | None:
+    root = project_root()
+    try:
+        p = subprocess.run(
+            ["git", "rev-parse", "--short", "HEAD"],
+            cwd=str(root),
+            capture_output=True,
+            text=True,
+            timeout=6,
+            check=False,
+        )
+        if p.returncode == 0 and p.stdout:
+            return p.stdout.strip()
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+    return None
+def upload_repro_artifacts(task) -> None:
+    """Pin the exact YAML + requirements file used for this run."""
+    cfg = active_config_path()
+    if cfg.is_file():
+        task.upload_artifact(name="config_yaml", artifact_object=str(cfg))
+    req = project_root() / "requirements.txt"
+    if req.is_file():
+        task.upload_artifact(name="requirements_txt", artifact_object=str(req))
+def attach_output_metrics(output_model, metrics: dict[str, float | str]) -> None:
+    """Surface headline metrics on the registered model card."""
+    for k, v in metrics.items():
+        key = str(k).replace("/", "_")
+        try:
+            output_model.set_metadata(key, str(v))
+        except Exception:
+            pass
+def task_done_summary(task, summary: str) -> None:
+    setter = getattr(task, "set_comment", None)
+    if callable(setter):
+        try:
+            setter(summary)
+        except Exception:
+            pass

config/default.yaml ADDED Viewed

	@@ -0,0 +1,80 @@

+# FocusGuard app and model config. Override with FOCUSGUARD_CONFIG env path if needed.
+app:
+  db_path: "focus_guard.db"
+  inference_size: [640, 480]
+  inference_workers: 4
+  default_model: "mlp"
+  calibration_verify_target: [0.5, 0.5]
+  no_face_confidence_cap: 0.1
+l2cs_boost:
+  base_weight: 0.35
+  l2cs_weight: 0.65
+  veto_threshold: 0.38
+  fused_threshold: 0.52
+mlp:
+  model_name: "face_orientation"
+  epochs: 30
+  batch_size: 32
+  lr: 0.001
+  seed: 42
+  split_ratios: [0.7, 0.15, 0.15]
+  hidden_sizes: [64, 32]
+xgboost:
+  n_estimators: 600
+  max_depth: 8
+  learning_rate: 0.1489
+  subsample: 0.9625
+  colsample_bytree: 0.9013
+  reg_alpha: 1.1407
+  reg_lambda: 2.4181
+  eval_metric: "logloss"
+data:
+  split_ratios: [0.7, 0.15, 0.15]
+  clip:
+    yaw: [-45, 45]
+    pitch: [-30, 30]
+    roll: [-30, 30]
+    ear: [0, 0.85]
+    mar: [0, 1.0]
+    gaze_offset: [0, 0.50]
+    perclos: [0, 0.80]
+    blink_rate: [0, 30.0]
+    closure_duration: [0, 10.0]
+    yawn_duration: [0, 10.0]
+pipeline:
+  geometric:
+    max_angle: 22.0
+    alpha: 0.7
+    beta: 0.3
+    threshold: 0.55
+  smoother:
+    alpha_up: 0.55
+    alpha_down: 0.45
+    grace_frames: 10
+  hybrid_defaults:
+    w_mlp: 0.3
+    w_geo: 0.7
+    threshold: 0.35
+    geo_face_weight: 0.7
+    geo_eye_weight: 0.3
+  mlp_threshold: 0.23
+  xgboost_threshold: 0.28
+evaluation:
+  seed: 42
+  mlp_sklearn:
+    hidden_layer_sizes: [64, 32]
+    max_iter: 200
+    validation_fraction: 0.15
+  geo_weights:
+    face: 0.7
+    eye: 0.3
+  threshold_search:
+    alphas: [0.2, 0.85]
+    w_mlps: [0.3, 0.85]

data_preparation/README.md ADDED Viewed

	@@ -0,0 +1,90 @@

+# data_preparation
+Handles loading, splitting, scaling, and serving the collected dataset for training and evaluation.
+## Links
+- Participant consent form: [Consent document](https://drive.google.com/file/d/1g1Hc764ffljoKrjApD6nmWDCXJGYTR0j/view?usp=drive_link)
+- Dataset (staff access): [Dataset folder](https://drive.google.com/drive/folders/1fwACM6i6uVGFkTlJKSlqVhizzgrHl_gY?usp=sharing)
+## Data collection protocol
+9 team members each recorded 5-10 minute webcam sessions using a purpose-built tool (`models/collect_features.py`). During recording:
+- Participants simulated **focused** behaviour (reading, typing) and **unfocused** behaviour (looking at phone, turning away)
+- Binary labels were annotated in real-time via key presses
+- Sessions were recorded across different rooms, workspaces, and home offices using consumer webcams under varying lighting
+- Real-time quality guidance warned if class balance fell outside 30-70% or if fewer than 10 state transitions occurred
+- An automated post-collection quality report validated minimum duration (120s), sample count (3,000+ frames), balance, and transition frequency
+All participants provided informed consent for their facial landmark data to be used within this coursework project. Raw video frames are never stored; only the 17-dimensional feature vector and binary labels are saved.
+Raw participant dataset is excluded from this repository (coursework policy and privacy constraints). It is shared separately via the dataset link above.
+## Dataset summary
+| Metric | Value |
+|--------|-------|
+| Participants | 9 |
+| Total frames | 144,793 |
+| Class balance | 61.5% focused / 38.5% unfocused |
+| Features extracted | 17 per frame |
+| Features selected | 10 (used by ML models) |
+## Data format
+Training data lives under `data/collected_<participant>/` as `.npz` files. Each file contains:
+| Key | Shape | Description |
+|-----|-------|-------------|
+| `features` | (N, 17) | Float array of extracted features |
+| `labels` | (N,) | Binary: 0 = unfocused, 1 = focused |
+| `feature_names` | (17,) | String names matching `FEATURE_NAMES` in `collect_features.py` |
+Data files are not included in this repository due to privacy considerations.
+## Files
+| File | Purpose |
+|------|---------|
+| `prepare_dataset.py` | Core data pipeline: loads `.npz`, applies feature selection, stratified splits, StandardScaler on train only |
+| `data_exploration.ipynb` | Exploratory analysis: feature distributions, class balance, per-person statistics, correlation heatmaps |
+## Feature selection
+`SELECTED_FEATURES["face_orientation"]` defines the 10 features used by all ML models:
+**Head pose (3):** `head_deviation`, `s_face`, `pitch`
+**Eye state (4):** `ear_left`, `ear_right`, `ear_avg`, `perclos`
+**Gaze (3):** `h_gaze`, `gaze_offset`, `s_eye`
+Excluded: `v_gaze` (noisy), `mar` (1.7% trigger rate), `yaw`/`roll` (redundant with `head_deviation`/`s_face`), `blink_rate`/`closure_duration`/`yawn_duration` (temporal overlap with `perclos`).
+Selection was validated by XGBoost gain importance and LOPO channel ablation:
+| Channel subset | Mean LOPO F1 |
+|---------------|-------------|
+| All 10 features | 0.829 |
+| Eye state only | 0.807 |
+| Head pose only | 0.748 |
+| Gaze only | 0.726 |
+## Key functions
+| Function | What it does |
+|----------|-------------|
+| `load_all_pooled(model_name)` | Concatenates all participant data into one array |
+| `load_per_person(model_name)` | Returns `{person: (X, y)}` dict for LOPO cross-validation |
+| `get_numpy_splits(model_name)` | Returns scaled train/val/test numpy arrays (70/15/15 split) |
+| `get_dataloaders(model_name)` | Returns PyTorch DataLoaders for MLP training |
+| `get_default_split_config()` | Returns split ratios and seed from `config/default.yaml` |
+## Data cleaning
+Applied before splitting (in `ui/pipeline.py` at inference, in `prepare_dataset.py` for training):
+1. Angles clipped to physiological ranges (yaw +/-45, pitch/roll +/-30)
+2. `head_deviation` recomputed from clipped angles (not clipped after computation)
+3. EAR clipped to [0, 0.85], MAR to [0, 1.0]
+4. Physiological bounds on gaze_offset, PERCLOS, blink_rate, closure/yawn duration
+5. StandardScaler fit on training split only, applied to val/test

data_preparation/__init__.py ADDED Viewed

File without changes

data_preparation/data_exploration.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/prepare_dataset.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Single source for pooled train/val/test data and splits.
+- Data: load_all_pooled() / load_per_person() from data/collected_*/*.npz (same pattern everywhere).
+- Splits: get_numpy_splits() / get_dataloaders() use stratified train/val/test with a fixed seed from config.
+- Test is held out before any preprocessing; StandardScaler is fit on train only, then applied to val and test.
+"""
+import os
+import glob
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+torch = None
+Dataset = object  # type: ignore
+DataLoader = None
+# Defaults for stratified split (overridden by config when available)
+_DEFAULT_SPLIT_RATIOS = (0.7, 0.15, 0.15)
+_DEFAULT_SPLIT_SEED = 42
+def _require_torch():
+    global torch, Dataset, DataLoader
+    if torch is None:
+        try:
+            import torch as _torch
+            from torch.utils.data import Dataset as _Dataset, DataLoader as _DataLoader
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError("PyTorch not installed") from exc
+        torch = _torch
+        Dataset = _Dataset  # type: ignore
+        DataLoader = _DataLoader  # type: ignore
+    return torch, Dataset, DataLoader
+DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
+SELECTED_FEATURES = {
+    "face_orientation": [
+        'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
+        'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos'
+    ],
+    "eye_behaviour": [
+        'ear_left', 'ear_right', 'ear_avg', 'mar',
+        'blink_rate', 'closure_duration', 'perclos', 'yawn_duration'
+    ]
+}
+class FeatureVectorDataset(Dataset):
+    def __init__(self, features: np.ndarray, labels: np.ndarray):
+        torch_mod, _, _ = _require_torch()
+        self.features = torch_mod.tensor(features, dtype=torch_mod.float32)
+        self.labels = torch_mod.tensor(labels, dtype=torch_mod.long)
+    def __len__(self):
+        return len(self.labels)
+    def __getitem__(self, idx):
+        return self.features[idx], self.labels[idx]
+# ── Low-level helpers ────────────────────────────────────────────────────
+def _clean_npz(raw, names):
+    """Apply clipping rules in-place. Shared by all loaders."""
+    for col, lo, hi in [('yaw', -45, 45), ('pitch', -30, 30), ('roll', -30, 30)]:
+        if col in names:
+            raw[:, names.index(col)] = np.clip(raw[:, names.index(col)], lo, hi)
+    for feat in ['ear_left', 'ear_right', 'ear_avg']:
+        if feat in names:
+            raw[:, names.index(feat)] = np.clip(raw[:, names.index(feat)], 0, 0.85)
+    return raw
+def _load_one_npz(npz_path, target_features):
+    """Load a single .npz file, clean and select features. Returns (X, y, selected_feature_names)."""
+    data = np.load(npz_path, allow_pickle=True)
+    raw = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    raw = _clean_npz(raw, names)
+    selected = [f for f in target_features if f in names]
+    idx = [names.index(f) for f in selected]
+    return raw[:, idx], labels, selected
+# ── Public data loaders ──────────────────────────────────────────────────
+def load_all_pooled(model_name: str = "face_orientation", data_dir: str = None):
+    """Load all collected_*/*.npz, clean, select features, concatenate.
+    Returns (X_all, y_all, all_feature_names).
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        raise FileNotFoundError(
+            f"No .npz files matching {pattern}. "
+            "Collect data first with `python -m models.collect_features --name <name>`."
+        )
+    all_X, all_y = [], []
+    all_names = None
+    for npz_path in npz_files:
+        X, y, names = _load_one_npz(npz_path, target_features)
+        if all_names is None:
+            all_names = names
+        all_X.append(X)
+        all_y.append(y)
+        print(f"[DATA]   + {os.path.basename(npz_path)}: {X.shape[0]} samples")
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] Loaded {len(npz_files)} file(s) for '{model_name}': "
+          f"{X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return X_all, y_all, all_names
+def load_per_person(model_name: str = "face_orientation", data_dir: str = None):
+    """Load collected_*/*.npz grouped by person (folder name).
+    Returns dict { person_name: (X, y) } where X/y are per-person numpy arrays.
+    Also returns (X_all, y_all) as pooled data.
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        raise FileNotFoundError(f"No .npz files matching {pattern}")
+    by_person = {}
+    all_X, all_y = [], []
+    for npz_path in npz_files:
+        folder = os.path.basename(os.path.dirname(npz_path))
+        person = folder.replace("collected_", "", 1)
+        X, y, _ = _load_one_npz(npz_path, target_features)
+        all_X.append(X)
+        all_y.append(y)
+        if person not in by_person:
+            by_person[person] = []
+        by_person[person].append((X, y))
+        print(f"[DATA]   + {person}/{os.path.basename(npz_path)}: {X.shape[0]} samples")
+    for person, chunks in by_person.items():
+        by_person[person] = (
+            np.concatenate([c[0] for c in chunks], axis=0),
+            np.concatenate([c[1] for c in chunks], axis=0),
+        )
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] {len(by_person)} persons, {X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return by_person, X_all, y_all
+def load_raw_npz(npz_path):
+    """Load a single .npz without cleaning or feature selection. For exploration notebooks."""
+    data = np.load(npz_path, allow_pickle=True)
+    features = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    return features, labels, names
+# ── Legacy helpers (used by models/mlp/train.py and models/xgboost/train.py) ─
+def _load_real_data(model_name: str):
+    X, y, _ = load_all_pooled(model_name)
+    return X, y
+def _generate_synthetic_data(model_name: str):
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    n = 500
+    d = len(target_features)
+    c = 2
+    rng = np.random.RandomState(42)
+    features = rng.randn(n, d).astype(np.float32)
+    labels = rng.randint(0, c, size=n).astype(np.int64)
+    print(f"[DATA] Using synthetic data for '{model_name}': {n} samples, {d} features, {c} classes")
+    return features, labels
+def get_default_split_config():
+    """Return (split_ratios, seed) from config so all scripts use the same split. Reproducible and consistent."""
+    try:
+        from config import get
+        data = get("data") or {}
+        ratios = data.get("split_ratios", list(_DEFAULT_SPLIT_RATIOS))
+        seed = get("mlp.seed") or _DEFAULT_SPLIT_SEED
+        return (tuple(ratios), int(seed))
+    except Exception:
+        return (_DEFAULT_SPLIT_RATIOS, _DEFAULT_SPLIT_SEED)
+def _split_and_scale(features, labels, split_ratios, seed, scale):
+    """Stratified train/val/test split. Test is held out first; val is split from the rest.
+    No training data is used for validation or test. Scaler is fit on train only, then
+    applied to val and test (no leakage from val/test into scaling).
+    """
+    test_ratio = split_ratios[2]
+    val_ratio = split_ratios[1] / (split_ratios[0] + split_ratios[1])
+    X_train_val, X_test, y_train_val, y_test = train_test_split(
+        features, labels, test_size=test_ratio, random_state=seed, stratify=labels,
+    )
+    X_train, X_val, y_train, y_val = train_test_split(
+        X_train_val, y_train_val, test_size=val_ratio, random_state=seed, stratify=y_train_val,
+    )
+    scaler = None
+    if scale:
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        X_val = scaler.transform(X_val)
+        X_test = scaler.transform(X_test)
+        print("[DATA] Applied StandardScaler (fitted on training split only)")
+    splits = {
+        "X_train": X_train, "y_train": y_train,
+        "X_val": X_val,     "y_val": y_val,
+        "X_test": X_test,   "y_test": y_test,
+    }
+    print(f"[DATA] Split (stratified): train={len(y_train)}, val={len(y_val)}, test={len(y_test)}")
+    return splits, scaler
+def get_numpy_splits(model_name: str, split_ratios=None, seed=None, scale: bool = True):
+    """Return train/val/test numpy arrays. Uses config defaults for split_ratios/seed when None.
+    Same dataset and split logic as get_dataloaders for consistent evaluation."""
+    if split_ratios is None or seed is None:
+        _ratios, _seed = get_default_split_config()
+        split_ratios = split_ratios if split_ratios is not None else _ratios
+        seed = seed if seed is not None else _seed
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    if num_classes < 2:
+        raise ValueError("Dataset has only one class; need at least 2 for classification.")
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    return splits, num_features, num_classes, scaler
+def get_dataloaders(model_name: str, batch_size: int = 32, split_ratios=None, seed=None, scale: bool = True):
+    """Return PyTorch DataLoaders. Uses config defaults for split_ratios/seed when None.
+    Test set is held out before preprocessing; scaler fit on train only."""
+    if split_ratios is None or seed is None:
+        _ratios, _seed = get_default_split_config()
+        split_ratios = split_ratios if split_ratios is not None else _ratios
+        seed = seed if seed is not None else _seed
+    _, _, dataloader_cls = _require_torch()
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    if num_classes < 2:
+        raise ValueError("Dataset has only one class; need at least 2 for classification.")
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    train_ds = FeatureVectorDataset(splits["X_train"], splits["y_train"])
+    val_ds   = FeatureVectorDataset(splits["X_val"],   splits["y_val"])
+    test_ds  = FeatureVectorDataset(splits["X_test"],  splits["y_test"])
+    train_loader = dataloader_cls(train_ds, batch_size=batch_size, shuffle=True)
+    val_loader   = dataloader_cls(val_ds,   batch_size=batch_size, shuffle=False)
+    test_loader  = dataloader_cls(test_ds,  batch_size=batch_size, shuffle=False)
+    return train_loader, val_loader, test_loader, num_features, num_classes, scaler

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+services:
+  focus-guard:
+    build: .
+    ports:
+      - "7860:7860"

download_l2cs_weights.py ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/usr/bin/env python3
+# Downloads L2CS-Net Gaze360 weights into checkpoints/
+import os
+import sys
+CHECKPOINTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checkpoints")
+DEST = os.path.join(CHECKPOINTS_DIR, "L2CSNet_gaze360.pkl")
+GDRIVE_ID = "1dL2Jokb19_SBSHAhKHOxJsmYs5-GoyLo"
+def main():
+    if os.path.isfile(DEST):
+        print(f"[OK] Weights already at {DEST}")
+        return
+    try:
+        import gdown
+    except ImportError:
+        print("gdown not installed. Run: pip install gdown")
+        sys.exit(1)
+    os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
+    print(f"Downloading L2CS-Net weights to {DEST} ...")
+    gdown.download(f"https://drive.google.com/uc?id={GDRIVE_ID}", DEST, quiet=False)
+    if os.path.isfile(DEST):
+        print(f"[OK] Downloaded ({os.path.getsize(DEST) / 1024 / 1024:.1f} MB)")
+    else:
+        print("[ERR] Download failed. Manual download:")
+        print("  https://drive.google.com/drive/folders/17p6ORr-JQJcw-eYtG2WGNiuS_qVKwdWd")
+        print(f"  Place L2CSNet_gaze360.pkl in {CHECKPOINTS_DIR}/")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

eslint.config.js ADDED Viewed

	@@ -0,0 +1,42 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores([
+    'dist',
+    'node_modules',
+    '.venv',
+    'venv',
+    'static',
+    'coverage',
+    'htmlcov',
+  ]),
+  {
+    files: ['vite.config.js'],
+    languageOptions: { globals: globals.node },
+  },
+  {
+    files: ['**/*.{js,jsx}'],
+    ignores: ['vite.config.js'],
+    extends: [
+      js.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+      parserOptions: {
+        ecmaVersion: 'latest',
+        ecmaFeatures: { jsx: true },
+        sourceType: 'module',
+      },
+    },
+    rules: {
+      'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
+    },
+  },
+])

evaluation/GROUPED_SPLIT_BENCHMARK.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Grouped vs pooled split benchmark
+This compares the same XGBoost config under two evaluation protocols.
+Config: `{'n_estimators': 600, 'max_depth': 8, 'learning_rate': 0.1489, 'subsample': 0.9625, 'colsample_bytree': 0.9013, 'reg_alpha': 1.1407, 'reg_lambda': 2.4181, 'eval_metric': 'logloss'}`
+Quick mode: yes (n_estimators=200)
+| Protocol | Accuracy | F1 (weighted) | ROC-AUC |
+|----------|---------:|--------------:|--------:|
+| Pooled random split (70/15/15) | 0.9510 | 0.9507 | 0.9869 |
+| Grouped LOPO (9 folds) | 0.8303 | 0.8304 | 0.8801 |
+Use grouped LOPO as the primary generalisation metric when reporting model quality.

evaluation/README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+# evaluation
+Systematic evaluation scripts and generated reports. All evaluation uses Leave-One-Person-Out (LOPO) cross-validation over 9 participants (~145k samples) as the primary generalisation metric.
+## Scripts
+| Script | What it does | Runtime |
+|--------|-------------|---------|
+| `justify_thresholds.py` | LOPO threshold search (Youden's J) for MLP and XGBoost; geometric alpha grid search; hybrid w_mlp grid search | ~10-15 min |
+| `feature_importance.py` | XGBoost gain importance + leave-one-feature-out LOPO ablation | ~20 min (full) |
+| `grouped_split_benchmark.py` | Compares pooled random split vs LOPO on the same XGBoost config | ~5 min |
+### Quick mode
+Add `--quick` to reduce tree count for faster iteration:
+```bash
+python -m evaluation.grouped_split_benchmark --quick
+python -m evaluation.feature_importance --quick --skip-lofo
+```
+### ClearML support
+```bash
+USE_CLEARML=1 python -m evaluation.justify_thresholds --clearml
+```
+Logs threshold search results, weight grid searches, and generated reports as ClearML artifacts.
+## Generated reports
+| Report | Contents |
+|--------|----------|
+| `THRESHOLD_JUSTIFICATION.md` | ML thresholds (MLP t*=0.228, XGBoost t*=0.280), geometric weights (alpha=0.7), hybrid weights (w_mlp=0.3), EAR/MAR physiological constants |
+| `GROUPED_SPLIT_BENCHMARK.md` | Pooled (95.1% acc) vs LOPO (83.0% acc) comparison |
+| `feature_selection_justification.md` | Domain rationale, XGBoost gain ranking, channel ablation results |
+## Generated plots
+All plots are in `plots/` and referenced by the generated reports.
+### ROC curves (LOPO, 9 folds, 144k samples)
+| Plot | Model | AUC | Optimal threshold |
+|------|-------|-----|-------------------|
+| ![MLP ROC](plots/roc_mlp.png) | MLP | 0.862 | 0.228 |
+| ![XGBoost ROC](plots/roc_xgb.png) | XGBoost | 0.870 | 0.280 |
+Red dots mark the Youden's J optimal operating points. Both thresholds fall well below 0.50 due to cross-person probability compression under LOPO.
+### Confusion matrices
+| MLP | XGBoost |
+|-----|---------|
+| ![MLP CM](plots/confusion_matrix_mlp.png) | ![XGBoost CM](plots/confusion_matrix_xgb.png) |
+### Weight grid searches
+| Geometric alpha search | Hybrid w_mlp search |
+|----------------------|-------------------|
+| ![Geo weights](plots/geo_weight_search.png) | ![Hybrid weights](plots/hybrid_weight_search.png) |
+Geometric pipeline: face-dominant weighting (alpha=0.7) generalises best across participants.
+Hybrid pipeline: low MLP weight (0.3) with strong geometric anchor gives the best LOPO F1 (0.841).
+### Physiological distributions
+| EAR distribution | MAR distribution |
+|-----------------|-----------------|
+| ![EAR](plots/ear_distribution.png) | ![MAR](plots/mar_distribution.png) |
+EAR thresholds (closed=0.16, blink=0.21, open=0.30) and MAR yawn threshold (0.55) are validated against these distributions.
+## Key findings
+1. LOPO drops ~12 pp vs pooled split, confirming the importance of person-independent evaluation
+2. Threshold optimisation alone yields +2-4 pp F1 without retraining
+3. All three feature channels contribute (removing any one drops F1 by 2-10 pp)
+4. `s_face` and `ear_right` are the highest-gain features, confirming that head pose and eye state are the strongest focus indicators
+5. The geometric anchor (70% weight) stabilises the hybrid model against per-person variance
+## Evaluation logs
+Training logs (per-epoch CSVs and JSON summaries) are written to `logs/` by the MLP and XGBoost training scripts.

evaluation/THRESHOLD_JUSTIFICATION.md ADDED Viewed

	@@ -0,0 +1,100 @@

+# Threshold Justification Report
+Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation over 9 participants (~145k samples).
+## 0. Latest random split checkpoints (15% test split)
+From the latest training runs:
+| Model | Accuracy | F1 | ROC-AUC |
+|-------|----------|-----|---------|
+| XGBoost | 95.87% | 0.9585 | 0.9908 |
+| MLP | 92.92% | 0.9287 | 0.9714 |
+## 1. ML Model Decision Thresholds
+XGBoost config used for this report: `{'n_estimators': 600, 'max_depth': 8, 'learning_rate': 0.1489, 'subsample': 0.9625, 'colsample_bytree': 0.9013, 'reg_alpha': 1.1407, 'reg_lambda': 2.4181, 'eval_metric': 'logloss'}`.
+Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) on pooled LOPO held-out predictions.
+| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |
+|-------|----------|-------------------------------|--------------|-----------|
+| MLP | 0.8624 | **0.228** | 0.8578 | 0.8149 |
+| XGBoost | 0.8695 | **0.280** | 0.8549 | 0.8324 |
+![MLP ROC](plots/roc_mlp.png)
+![XGBoost ROC](plots/roc_xgboost.png)
+## 2. Geometric Pipeline Weights (s_face vs s_eye)
+Grid search over face weight alpha in {0.2 ... 0.8}. Eye weight = 1 - alpha. Threshold per fold via Youden's J.
+| Face Weight (alpha) | Mean LOPO F1 |
+|--------------------:|-------------:|
+| 0.2 | 0.7926 |
+| 0.3 | 0.8002 |
+| 0.4 | 0.7719 |
+| 0.5 | 0.7868 |
+| 0.6 | 0.8184 |
+| 0.7 | 0.8195 **<-- selected** |
+| 0.8 | 0.8126 |
+**Best:** alpha = 0.7 (face 70%, eye 30%)
+![Geometric weight search](plots/geo_weight_search.png)
+## 3. Hybrid Pipeline Weights (MLP vs Geometric)
+Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). If you change geometric weights, re-run this script — optimal w_mlp can shift.
+| MLP Weight (w_mlp) | Mean LOPO F1 |
+|-------------------:|-------------:|
+| 0.3 | 0.8409 **<-- selected** |
+| 0.4 | 0.8246 |
+| 0.5 | 0.8164 |
+| 0.6 | 0.8106 |
+| 0.7 | 0.8039 |
+| 0.8 | 0.8016 |
+**Best:** w_mlp = 0.3 (MLP 30%, geometric 70%)
+![Hybrid weight search](plots/hybrid_weight_search.png)
+## 4. Eye and Mouth Aspect Ratio Thresholds
+### EAR (Eye Aspect Ratio)
+Reference: Soukupova & Cech, "Real-Time Eye Blink Detection Using Facial Landmarks" (2016) established EAR ~ 0.2 as a blink threshold.
+Our thresholds define a linear interpolation zone around this established value:
+| Constant | Value | Justification |
+|----------|------:|---------------|
+| `ear_closed` | 0.16 | Below this, eyes are fully shut. 16.3% of samples fall here. |
+| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. 21.2% of samples below. |
+| `ear_open` | 0.30 | Above this, eyes are fully open. 70.4% of samples here. |
+Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, providing a smooth transition rather than a hard binary cutoff.
+![EAR distribution](plots/ear_distribution.png)
+### MAR (Mouth Aspect Ratio)
+| Constant | Value | Justification |
+|----------|------:|---------------|
+| `MAR_YAWN_THRESHOLD` | 0.55 | Only 1.7% of samples exceed this, confirming it captures genuine yawns without false positives. |
+![MAR distribution](plots/mar_distribution.png)
+## 5. Other Constants
+| Constant | Value | Rationale |
+|----------|------:|-----------|
+| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at the extreme edge. |
+| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on typical monitor-viewing cone: at 60 cm distance and a 24" monitor, the viewing angle is ~20-25 degrees. |
+| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch (tilting head doesn't mean looking away), so it's down-weighted by 50%. |
+| `EMA alpha` | 0.3 | Smoothing factor for focus score. Gives ~3-4 frame effective window; balances responsiveness vs flicker. |
+| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief occlusions (e.g. hand gesture) without dropping score. |
+| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement window (Dinges & Grace, 1998). |
+| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous blink rate is 15-20/min (Bentivoglio et al., 1997). |

evaluation/feature_importance.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Feature importance and leave-one-feature-out ablation for the 10 face_orientation features.
+Run: python -m evaluation.feature_importance
+Outputs:
+- XGBoost gain-based importance (from trained checkpoint)
+- Leave-one-feature-out LOPO F1 (ablation): drop each feature in turn, report mean LOPO F1.
+- Writes evaluation/feature_selection_justification.md
+"""
+import os
+import sys
+import argparse
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import f1_score
+from xgboost import XGBClassifier
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+from data_preparation.prepare_dataset import get_default_split_config, load_per_person, SELECTED_FEATURES
+from models.xgboost.config import XGB_BASE_PARAMS, build_xgb_classifier, get_xgb_params
+_, SEED = get_default_split_config()
+FEATURES = SELECTED_FEATURES["face_orientation"]
+def _resolve_xgb_path():
+    return os.path.join(_PROJECT_ROOT, "checkpoints", "xgboost_face_orientation_best.json")
+def xgb_feature_importance():
+    """Load trained XGBoost and return gain-based importance for the 10 features."""
+    path = _resolve_xgb_path()
+    if not os.path.isfile(path):
+        print(f"[WARN] No XGBoost checkpoint at {path}; skip importance.")
+        return None
+    model = XGBClassifier()
+    model.load_model(path)
+    imp = model.get_booster().get_score(importance_type="gain")
+    # Booster uses f0, f1, ...; we use same order as FEATURES (training order)
+    by_idx = {int(k.replace("f", "")): v for k, v in imp.items() if k.startswith("f")}
+    order = [by_idx.get(i, 0.0) for i in range(len(FEATURES))]
+    return dict(zip(FEATURES, order))
+def _make_eval_model(seed: int, quick: bool):
+    if not quick:
+        return build_xgb_classifier(seed, verbosity=0)
+    params = get_xgb_params()
+    params["n_estimators"] = 200
+    params["random_state"] = seed
+    params["verbosity"] = 0
+    return XGBClassifier(**params)
+def run_ablation_lopo(by_person, persons, quick: bool):
+    """Leave-one-feature-out: for each feature, train XGBoost on the other 9 with LOPO, report mean F1."""
+    results = {}
+    for drop_feat in FEATURES:
+        print(f"  -> dropping {drop_feat} ({len(results)+1}/{len(FEATURES)})")
+        idx_keep = [i for i, f in enumerate(FEATURES) if f != drop_feat]
+        f1s = []
+        for held_out in persons:
+            train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+            train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+            X_test, y_test = by_person[held_out]
+            X_tr = train_X[:, idx_keep]
+            X_te = X_test[:, idx_keep]
+            scaler = StandardScaler().fit(X_tr)
+            X_tr_sc = scaler.transform(X_tr)
+            X_te_sc = scaler.transform(X_te)
+            xgb = _make_eval_model(SEED, quick)
+            xgb.fit(X_tr_sc, train_y)
+            pred = xgb.predict(X_te_sc)
+            f1s.append(f1_score(y_test, pred, average="weighted"))
+        results[drop_feat] = np.mean(f1s)
+    return results
+def run_baseline_lopo_f1(by_person, persons, quick: bool):
+    """Full 10-feature LOPO mean F1 for reference."""
+    f1s = []
+    for held_out in persons:
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        X_test, y_test = by_person[held_out]
+        scaler = StandardScaler().fit(train_X)
+        X_tr_sc = scaler.transform(train_X)
+        X_te_sc = scaler.transform(X_test)
+        xgb = _make_eval_model(SEED, quick)
+        xgb.fit(X_tr_sc, train_y)
+        pred = xgb.predict(X_te_sc)
+        f1s.append(f1_score(y_test, pred, average="weighted"))
+    return np.mean(f1s)
+# Channel subsets for ablation (subset name -> list of feature names)
+CHANNEL_SUBSETS = {
+    "head_pose": ["head_deviation", "s_face", "pitch"],
+    "eye_state": ["ear_left", "ear_avg", "ear_right", "perclos"],
+    "gaze": ["h_gaze", "gaze_offset", "s_eye"],
+}
+def run_channel_ablation(by_person, persons, quick: bool, baseline: float):
+    """LOPO XGBoost with head-only, eye-only, gaze-only, and all 10. Returns dict subset_name -> mean F1."""
+    results = {}
+    for subset_name, feat_list in CHANNEL_SUBSETS.items():
+        print(f"  -> channel {subset_name}")
+        idx_keep = [FEATURES.index(f) for f in feat_list]
+        f1s = []
+        for held_out in persons:
+            train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+            train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+            X_test, y_test = by_person[held_out]
+            X_tr = train_X[:, idx_keep]
+            X_te = X_test[:, idx_keep]
+            scaler = StandardScaler().fit(X_tr)
+            X_tr_sc = scaler.transform(X_tr)
+            X_te_sc = scaler.transform(X_te)
+            xgb = _make_eval_model(SEED, quick)
+            xgb.fit(X_tr_sc, train_y)
+            pred = xgb.predict(X_te_sc)
+            f1s.append(f1_score(y_test, pred, average="weighted"))
+        results[subset_name] = np.mean(f1s)
+    results["all_10"] = baseline
+    return results
+def _parse_args():
+    parser = argparse.ArgumentParser(description="Feature importance + LOPO ablation")
+    parser.add_argument(
+        "--quick",
+        action="store_true",
+        help="Use fewer trees (200) for faster iteration.",
+    )
+    parser.add_argument(
+        "--skip-lofo",
+        action="store_true",
+        help="Skip leave-one-feature-out ablation.",
+    )
+    parser.add_argument(
+        "--skip-channel",
+        action="store_true",
+        help="Skip channel ablation.",
+    )
+    return parser.parse_args()
+def main():
+    args = _parse_args()
+    print("=== Feature importance (XGBoost gain) ===")
+    if args.quick:
+        print("Running in quick mode (n_estimators=200).")
+    imp = xgb_feature_importance()
+    if imp:
+        for name in FEATURES:
+            print(f"  {name}: {imp.get(name, 0):.2f}")
+        order = sorted(imp.items(), key=lambda x: -x[1])
+        print("  Top-5 by gain:", [x[0] for x in order[:5]])
+    print("\n[DATA] Loading per-person splits once...")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    print("\n=== Baseline LOPO (all 10 features) ===")
+    baseline = run_baseline_lopo_f1(by_person, persons, quick=args.quick)
+    print(f"  Baseline (all 10 features) mean LOPO F1: {baseline:.4f}")
+    ablation = None
+    worst_drop = None
+    if args.skip_lofo:
+        print("\n=== Leave-one-feature-out ablation (LOPO mean F1) ===")
+        print("  skipped (--skip-lofo)")
+    else:
+        print("\n=== Leave-one-feature-out ablation (LOPO mean F1) ===")
+        ablation = run_ablation_lopo(by_person, persons, quick=args.quick)
+        for feat in FEATURES:
+            delta = baseline - ablation[feat]
+            print(f"  drop {feat}: F1={ablation[feat]:.4f} (Δ={delta:+.4f})")
+        worst_drop = min(ablation.items(), key=lambda x: x[1])
+        print(f"  Largest F1 drop when dropping: {worst_drop[0]} (F1={worst_drop[1]:.4f})")
+    channel_f1 = None
+    if args.skip_channel:
+        print("\n=== Channel ablation (LOPO mean F1) ===")
+        print("  skipped (--skip-channel)")
+    else:
+        print("\n=== Channel ablation (LOPO mean F1) ===")
+        channel_f1 = run_channel_ablation(by_person, persons, quick=args.quick, baseline=baseline)
+        for name, f1 in channel_f1.items():
+            print(f"  {name}: {f1:.4f}")
+    out_dir = os.path.join(_PROJECT_ROOT, "evaluation")
+    out_path = os.path.join(out_dir, "feature_selection_justification.md")
+    lines = [
+        "# Feature selection justification",
+        "",
+        "The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.",
+        "",
+        "## 1. Domain rationale",
+        "",
+        "The 10 features were chosen to cover three channels:",
+        "- **Head pose:** head_deviation, s_face, pitch",
+        "- **Eye state:** ear_left, ear_right, ear_avg, perclos",
+        "- **Gaze:** h_gaze, gaze_offset, s_eye",
+        "",
+        "Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).",
+        "",
+        "## 2. XGBoost feature importance (gain)",
+        "",
+        f"Config used: `{XGB_BASE_PARAMS}`.",
+        "Quick mode: " + ("yes (200 trees)" if args.quick else "no (full config)"),
+        "",
+        "From the trained XGBoost checkpoint (gain on the 10 features):",
+        "",
+        "| Feature | Gain |",
+        "|---------|------|",
+    ]
+    if imp:
+        for name in FEATURES:
+            lines.append(f"| {name} | {imp.get(name, 0):.2f} |")
+        order = sorted(imp.items(), key=lambda x: -x[1])
+        lines.append("")
+        lines.append(f"**Top 5 by gain:** {', '.join(x[0] for x in order[:5])}.")
+    else:
+        lines.append("(Run with XGBoost checkpoint to populate.)")
+    lines.extend([
+        "",
+        "## 3. Leave-one-feature-out ablation (LOPO)",
+        "",
+        f"Baseline (all 10 features) mean LOPO F1: **{baseline:.4f}**.",
+        "",
+    ])
+    if ablation is None:
+        lines.append("Skipped in this run (`--skip-lofo`).")
+    else:
+        lines.extend([
+            "| Feature dropped | Mean LOPO F1 | Δ vs baseline |",
+            "|------------------|--------------|---------------|",
+        ])
+        for feat in FEATURES:
+            delta = baseline - ablation[feat]
+            lines.append(f"| {feat} | {ablation[feat]:.4f} | {delta:+.4f} |")
+        lines.append("")
+        lines.append(f"Dropping **{worst_drop[0]}** hurts most (F1={worst_drop[1]:.4f}), consistent with it being important.")
+    lines.append("")
+    lines.append("## 4. Channel ablation (LOPO)")
+    lines.append("")
+    if channel_f1 is None:
+        lines.append("Skipped in this run (`--skip-channel`).")
+    else:
+        lines.append("| Subset | Mean LOPO F1 |")
+        lines.append("|--------|--------------|")
+        for name in ["head_pose", "eye_state", "gaze", "all_10"]:
+            lines.append(f"| {name} | {channel_f1[name]:.4f} |")
+    lines.append("")
+    lines.append("## 5. Conclusion")
+    lines.append("")
+    if ablation is None:
+        lines.append("Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) channel ablation. Run without `--skip-lofo` for full leave-one-out ablation.")
+    else:
+        lines.append("Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) leave-one-out ablation. SHAP or correlation-based pruning can be added in future work.")
+    lines.append("")
+    with open(out_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    print(f"\nReport written to {out_path}")
+if __name__ == "__main__":
+    main()

evaluation/feature_selection_justification.md ADDED Viewed

	@@ -0,0 +1,53 @@

+# Feature selection justification
+The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.
+## 1. Domain rationale
+The 10 features were chosen to cover three channels:
+- **Head pose:** head_deviation, s_face, pitch
+- **Eye state:** ear_left, ear_right, ear_avg, perclos
+- **Gaze:** h_gaze, gaze_offset, s_eye
+Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).
+## 2. XGBoost feature importance (gain)
+Config used: `{'n_estimators': 600, 'max_depth': 8, 'learning_rate': 0.1489, 'subsample': 0.9625, 'colsample_bytree': 0.9013, 'reg_alpha': 1.1407, 'reg_lambda': 2.4181, 'eval_metric': 'logloss'}`.
+Quick mode: yes (200 trees)
+From the trained XGBoost checkpoint (gain on the 10 features):
+| Feature | Gain |
+|---------|------|
+| head_deviation | 8.83 |
+| s_face | 10.27 |
+| s_eye | 2.18 |
+| h_gaze | 4.99 |
+| pitch | 4.64 |
+| ear_left | 3.57 |
+| ear_avg | 6.96 |
+| ear_right | 9.54 |
+| gaze_offset | 1.80 |
+| perclos | 5.68 |
+**Top 5 by gain:** s_face, ear_right, head_deviation, ear_avg, perclos.
+## 3. Leave-one-feature-out ablation (LOPO)
+Baseline (all 10 features) mean LOPO F1: **0.8286**.
+Skipped in this run (`--skip-lofo`).
+## 4. Channel ablation (LOPO)
+| Subset | Mean LOPO F1 |
+|--------|--------------|
+| head_pose | 0.7480 |
+| eye_state | 0.8071 |
+| gaze | 0.7260 |
+| all_10 | 0.8286 |
+## 5. Conclusion
+Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) channel ablation. Run without `--skip-lofo` for full leave-one-out ablation.

evaluation/grouped_split_benchmark.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""Compare pooled random split vs grouped LOPO for XGBoost."""
+import os
+import sys
+import numpy as np
+from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+from data_preparation.prepare_dataset import get_default_split_config, get_numpy_splits, load_per_person
+from models.xgboost.config import build_xgb_classifier, XGB_BASE_PARAMS
+MODEL_NAME = "face_orientation"
+OUT_PATH = os.path.join(_PROJECT_ROOT, "evaluation", "GROUPED_SPLIT_BENCHMARK.md")
+def run_pooled_split():
+    split_ratios, seed = get_default_split_config()
+    splits, _, _, _ = get_numpy_splits(
+        model_name=MODEL_NAME,
+        split_ratios=split_ratios,
+        seed=seed,
+        scale=False,
+    )
+    model = build_xgb_classifier(seed, verbosity=0, early_stopping_rounds=30)
+    model.fit(
+        splits["X_train"],
+        splits["y_train"],
+        eval_set=[(splits["X_val"], splits["y_val"])],
+        verbose=False,
+    )
+    probs = model.predict_proba(splits["X_test"])[:, 1]
+    preds = (probs >= 0.5).astype(int)
+    y = splits["y_test"]
+    return {
+        "accuracy": float(accuracy_score(y, preds)),
+        "f1": float(f1_score(y, preds, average="weighted")),
+        "auc": float(roc_auc_score(y, probs)),
+    }
+def run_grouped_lopo():
+    by_person, _, _ = load_per_person(MODEL_NAME)
+    persons = sorted(by_person.keys())
+    scores = {"accuracy": [], "f1": [], "auc": []}
+    _, seed = get_default_split_config()
+    for held_out in persons:
+        train_x = np.concatenate([by_person[p][0] for p in persons if p != held_out], axis=0)
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out], axis=0)
+        test_x, test_y = by_person[held_out]
+        model = build_xgb_classifier(seed, verbosity=0)
+        model.fit(train_x, train_y, verbose=False)
+        probs = model.predict_proba(test_x)[:, 1]
+        preds = (probs >= 0.5).astype(int)
+        scores["accuracy"].append(float(accuracy_score(test_y, preds)))
+        scores["f1"].append(float(f1_score(test_y, preds, average="weighted")))
+        scores["auc"].append(float(roc_auc_score(test_y, probs)))
+    return {
+        "accuracy": float(np.mean(scores["accuracy"])),
+        "f1": float(np.mean(scores["f1"])),
+        "auc": float(np.mean(scores["auc"])),
+        "folds": len(persons),
+    }
+def write_report(pooled, grouped):
+    lines = [
+        "# Grouped vs pooled split benchmark",
+        "",
+        "This compares the same XGBoost config under two evaluation protocols.",
+        "",
+        f"Config: `{XGB_BASE_PARAMS}`",
+        "",
+        "| Protocol | Accuracy | F1 (weighted) | ROC-AUC |",
+        "|----------|---------:|--------------:|--------:|",
+        f"| Pooled random split (70/15/15) | {pooled['accuracy']:.4f} | {pooled['f1']:.4f} | {pooled['auc']:.4f} |",
+        f"| Grouped LOPO ({grouped['folds']} folds) | {grouped['accuracy']:.4f} | {grouped['f1']:.4f} | {grouped['auc']:.4f} |",
+        "",
+        "Use grouped LOPO as the primary generalisation metric when reporting model quality.",
+        "",
+    ]
+    with open(OUT_PATH, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    print(f"[LOG] Wrote {OUT_PATH}")
+def main():
+    pooled = run_pooled_split()
+    grouped = run_grouped_lopo()
+    write_report(pooled, grouped)
+    print(
+        "[DONE] pooled_f1={:.4f} grouped_f1={:.4f}".format(
+            pooled["f1"], grouped["f1"]
+        )
+    )
+if __name__ == "__main__":
+    main()

evaluation/justify_thresholds.py ADDED Viewed

	@@ -0,0 +1,573 @@

+# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
+# ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag
+import glob
+import os
+import sys
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import roc_curve, roc_auc_score, f1_score
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, _PROJECT_ROOT)
+from data_preparation.prepare_dataset import get_default_split_config, load_per_person, SELECTED_FEATURES
+from models.xgboost.config import XGB_BASE_PARAMS, build_xgb_classifier
+PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
+REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
+_, SEED = get_default_split_config()
+_USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv or bool(os.environ.get("CLEARML_TASK_ID"))
+_CLEARML_QUEUE = os.environ.get("CLEARML_QUEUE", "")
+_task = None
+_logger = None
+if _USE_CLEARML:
+    try:
+        from clearml import Task
+        from config import CLEARML_PROJECT_NAME, flatten_for_clearml
+        _task = Task.init(
+            project_name=CLEARML_PROJECT_NAME,
+            task_name="Threshold Justification",
+            tags=["evaluation", "thresholds"],
+        )
+        from config.clearml_enrich import enrich_task, upload_repro_artifacts
+        enrich_task(_task, role="eval_thresholds")
+        flat = flatten_for_clearml()
+        flat["evaluation/SEED"] = SEED
+        flat["evaluation/n_participants"] = 9
+        _task.connect(flat)
+        upload_repro_artifacts(_task)
+        _logger = _task.get_logger()
+        if _CLEARML_QUEUE:
+            print(f"[ClearML] Enqueuing to queue '{_CLEARML_QUEUE}'.")
+            _task.execute_remotely(queue_name=_CLEARML_QUEUE)
+            sys.exit(0)
+        print(f"ClearML enabled — logging to project '{CLEARML_PROJECT_NAME}'")
+    except ImportError:
+        print("WARNING: ClearML not installed. Continuing without logging.")
+        _USE_CLEARML = False
+def _youdens_j(y_true, y_prob):
+    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
+    j = tpr - fpr
+    idx = j.argmax()
+    auc = roc_auc_score(y_true, y_prob)
+    return float(thresholds[idx]), fpr, tpr, thresholds, float(auc)
+def _f1_at_threshold(y_true, y_prob, threshold):
+    return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
+def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
+    fig, ax = plt.subplots(figsize=(6, 5))
+    ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
+    ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
+            label=f"Youden's J optimum (t = {opt_thresh:.3f})")
+    ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5)
+    ax.set_xlabel("False Positive Rate")
+    ax.set_ylabel("True Positive Rate")
+    ax.set_title(title)
+    ax.legend(loc="lower right")
+    fig.tight_layout()
+    # Log to ClearML before closing the figure
+    if _logger and clearml_title:
+        _logger.report_matplotlib_figure(
+            title=clearml_title, series="ROC", figure=fig, iteration=0
+        )
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+def run_lopo_models():
+    print("\n=== LOPO: MLP and XGBoost ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
+    for i, held_out in enumerate(persons):
+        X_test, y_test = by_person[held_out]
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        scaler = StandardScaler().fit(train_X)
+        X_tr_sc = scaler.transform(train_X)
+        X_te_sc = scaler.transform(X_test)
+        mlp = MLPClassifier(
+            hidden_layer_sizes=(64, 32), activation="relu",
+            max_iter=200, early_stopping=True, validation_fraction=0.15,
+            random_state=SEED, verbose=False,
+        )
+        mlp.fit(X_tr_sc, train_y)
+        mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
+        results["mlp"]["y"].append(y_test)
+        results["mlp"]["p"].append(mlp_prob)
+        xgb = build_xgb_classifier(SEED, verbosity=0)
+        xgb.fit(X_tr_sc, train_y)
+        xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
+        results["xgb"]["y"].append(y_test)
+        results["xgb"]["p"].append(xgb_prob)
+        print(f"  fold {i+1}/{len(persons)}: held out {held_out} "
+              f"({X_test.shape[0]} samples)")
+    for key in results:
+        results[key]["y"] = np.concatenate(results[key]["y"])
+        results[key]["p"] = np.concatenate(results[key]["p"])
+    return results
+def analyse_model_thresholds(results):
+    print("\n=== Model threshold analysis ===")
+    model_stats = {}
+    for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
+        y, p = results[name]["y"], results[name]["p"]
+        opt_t, fpr, tpr, thresholds, auc = _youdens_j(y, p)
+        j = tpr - fpr
+        opt_idx = j.argmax()
+        f1_opt = _f1_at_threshold(y, p, opt_t)
+        f1_50 = _f1_at_threshold(y, p, 0.50)
+        path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
+        _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
+                  f"LOPO ROC — {label} (9 folds, 144k samples)", path,
+                  clearml_title=f"ROC_{label}")
+        model_stats[name] = {
+            "label": label, "auc": auc,
+            "opt_threshold": opt_t, "f1_opt": f1_opt, "f1_50": f1_50,
+        }
+        print(f"  {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
+              f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
+        # Log scalars to ClearML
+        if _logger:
+            _logger.report_single_value(f"{label} Optimal Threshold", opt_t)
+            _logger.report_single_value(f"{label} AUC", auc)
+            _logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
+            _logger.report_single_value(f"{label} F1 @ 0.5", f1_50)
+    return model_stats
+def run_geo_weight_search():
+    print("\n=== Geometric weight grid search ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    features = SELECTED_FEATURES["face_orientation"]
+    sf_idx = features.index("s_face")
+    se_idx = features.index("s_eye")
+    alphas = np.arange(0.2, 0.85, 0.1).round(1)
+    alpha_f1 = {a: [] for a in alphas}
+    for held_out in persons:
+        X_test, y_test = by_person[held_out]
+        sf = X_test[:, sf_idx]
+        se = X_test[:, se_idx]
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        sf_tr = train_X[:, sf_idx]
+        se_tr = train_X[:, se_idx]
+        for a in alphas:
+            score_tr = a * sf_tr + (1.0 - a) * se_tr
+            opt_t, *_ = _youdens_j(train_y, score_tr)
+            score_te = a * sf + (1.0 - a) * se
+            f1 = _f1_at_threshold(y_test, score_te, opt_t)
+            alpha_f1[a].append(f1)
+    mean_f1 = {a: np.mean(f1s) for a, f1s in alpha_f1.items()}
+    best_alpha = max(mean_f1, key=mean_f1.get)
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar([f"{a:.1f}" for a in alphas],
+           [mean_f1[a] for a in alphas], color="steelblue")
+    ax.set_xlabel("Face weight (alpha); eye weight = 1 - alpha")
+    ax.set_ylabel("Mean LOPO F1")
+    ax.set_title("Geometric Pipeline: Face vs Eye Weight Search")
+    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
+    for i, a in enumerate(alphas):
+        ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
+                ha="center", va="bottom", fontsize=8)
+    fig.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+    print(f"  Best alpha (face weight) = {best_alpha:.1f}, "
+          f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
+    # Log scalars to ClearML
+    if _logger:
+        _logger.report_single_value("Geo Best Alpha", best_alpha)
+        for i, a in enumerate(sorted(alphas)):
+            _logger.report_scalar(
+                "Geo Weight Search", "Mean LOPO F1",
+                iteration=i, value=mean_f1[a]
+            )
+    return dict(mean_f1), best_alpha
+def run_hybrid_weight_search(lopo_results):
+    print("\n=== Hybrid weight grid search ===")
+    by_person, _, _ = load_per_person("face_orientation")
+    persons = sorted(by_person.keys())
+    features = SELECTED_FEATURES["face_orientation"]
+    sf_idx = features.index("s_face")
+    se_idx = features.index("s_eye")
+    GEO_FACE_W = 0.7
+    GEO_EYE_W = 0.3
+    w_mlps = np.arange(0.3, 0.85, 0.1).round(1)
+    wmf1 = {w: [] for w in w_mlps}
+    mlp_p = lopo_results["mlp"]["p"]
+    offset = 0
+    for held_out in persons:
+        X_test, y_test = by_person[held_out]
+        n = X_test.shape[0]
+        mlp_prob_fold = mlp_p[offset:offset + n]
+        offset += n
+        sf = X_test[:, sf_idx]
+        se = X_test[:, se_idx]
+        geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
+        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
+        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
+        sf_tr = train_X[:, sf_idx]
+        se_tr = train_X[:, se_idx]
+        geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
+        scaler = StandardScaler().fit(train_X)
+        mlp_tr = MLPClassifier(
+            hidden_layer_sizes=(64, 32), activation="relu",
+            max_iter=200, early_stopping=True, validation_fraction=0.15,
+            random_state=SEED, verbose=False,
+        )
+        mlp_tr.fit(scaler.transform(train_X), train_y)
+        mlp_prob_tr = mlp_tr.predict_proba(scaler.transform(train_X))[:, 1]
+        for w in w_mlps:
+            combo_tr = w * mlp_prob_tr + (1.0 - w) * geo_tr
+            opt_t, *_ = _youdens_j(train_y, combo_tr)
+            combo_te = w * mlp_prob_fold + (1.0 - w) * geo_score
+            f1 = _f1_at_threshold(y_test, combo_te, opt_t)
+            wmf1[w].append(f1)
+    mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
+    best_w = max(mean_f1, key=mean_f1.get)
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar([f"{w:.1f}" for w in w_mlps],
+           [mean_f1[w] for w in w_mlps], color="darkorange")
+    ax.set_xlabel("MLP weight (w_mlp); geo weight = 1 - w_mlp")
+    ax.set_ylabel("Mean LOPO F1")
+    ax.set_title("Hybrid Pipeline: MLP vs Geometric Weight Search")
+    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
+    for i, w in enumerate(w_mlps):
+        ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
+                ha="center", va="bottom", fontsize=8)
+    fig.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"  saved {path}")
+    print(f"  Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
+    # Log scalars to ClearML
+    if _logger:
+        _logger.report_single_value("Hybrid Best w_mlp", best_w)
+        for i, w in enumerate(sorted(w_mlps)):
+            _logger.report_scalar(
+                "Hybrid Weight Search", "Mean LOPO F1",
+                iteration=i, value=mean_f1[w]
+            )
+    return dict(mean_f1), best_w
+def plot_distributions():
+    print("\n=== EAR / MAR distributions ===")
+    npz_files = sorted(glob.glob(os.path.join(_PROJECT_ROOT, "data", "collected_*", "*.npz")))
+    all_ear_l, all_ear_r, all_mar, all_labels = [], [], [], []
+    for f in npz_files:
+        d = np.load(f, allow_pickle=True)
+        names = list(d["feature_names"])
+        feat = d["features"].astype(np.float32)
+        lab = d["labels"].astype(np.int64)
+        all_ear_l.append(feat[:, names.index("ear_left")])
+        all_ear_r.append(feat[:, names.index("ear_right")])
+        all_mar.append(feat[:, names.index("mar")])
+        all_labels.append(lab)
+    ear_l = np.concatenate(all_ear_l)
+    ear_r = np.concatenate(all_ear_r)
+    mar = np.concatenate(all_mar)
+    labels = np.concatenate(all_labels)
+    ear_min = np.minimum(ear_l, ear_r)
+    ear_plot = np.clip(ear_min, 0, 0.85)
+    mar_plot = np.clip(mar, 0, 1.5)
+    # EAR distribution plot
+    fig_ear, ax = plt.subplots(figsize=(7, 4))
+    ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
+    ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
+    for val, lbl, c in [
+        (0.16, "ear_closed = 0.16", "red"),
+        (0.21, "EAR_BLINK = 0.21", "orange"),
+        (0.30, "ear_open = 0.30", "green"),
+    ]:
+        ax.axvline(val, color=c, ls="--", lw=1.5, label=lbl)
+    ax.set_xlabel("min(left_EAR, right_EAR)")
+    ax.set_ylabel("Density")
+    ax.set_title("EAR Distribution by Class (144k samples)")
+    ax.legend(fontsize=8)
+    fig_ear.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "ear_distribution.png")
+    fig_ear.savefig(path, dpi=150)
+    plt.close(fig_ear)
+    print(f"  saved {path}")
+    # MAR distribution plot
+    fig_mar, ax = plt.subplots(figsize=(7, 4))
+    ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
+    ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
+    ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
+    ax.set_xlabel("Mouth Aspect Ratio (MAR)")
+    ax.set_ylabel("Density")
+    ax.set_title("MAR Distribution by Class (144k samples)")
+    ax.legend(fontsize=8)
+    fig_mar.tight_layout()
+    # Log to ClearML before closing
+    if _logger:
+        _logger.report_matplotlib_figure(
+            title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
+        )
+    path = os.path.join(PLOTS_DIR, "mar_distribution.png")
+    fig_mar.savefig(path, dpi=150)
+    plt.close(fig_mar)
+    print(f"  saved {path}")
+    closed_pct = np.mean(ear_min < 0.16) * 100
+    blink_pct = np.mean(ear_min < 0.21) * 100
+    open_pct = np.mean(ear_min >= 0.30) * 100
+    yawn_pct = np.mean(mar > 0.55) * 100
+    stats = {
+        "ear_below_016": closed_pct,
+        "ear_below_021": blink_pct,
+        "ear_above_030": open_pct,
+        "mar_above_055": yawn_pct,
+        "n_samples": len(ear_min),
+    }
+    print(f"  EAR<0.16 (closed): {closed_pct:.1f}%  |  EAR<0.21 (blink): {blink_pct:.1f}%  |  "
+          f"EAR>=0.30 (open): {open_pct:.1f}%")
+    print(f"  MAR>0.55 (yawn): {yawn_pct:.1f}%")
+    return stats
+def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
+    lines = []
+    lines.append("# Threshold Justification Report")
+    lines.append("")
+    lines.append("Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation "
+                 "over 9 participants (~145k samples).")
+    lines.append("")
+    lines.append("## 1. ML Model Decision Thresholds")
+    lines.append("")
+    lines.append(f"XGBoost config used for this report: `{XGB_BASE_PARAMS}`.")
+    lines.append("")
+    lines.append("Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) "
+                 "on pooled LOPO held-out predictions.")
+    lines.append("")
+    lines.append("| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |")
+    lines.append("|-------|----------|-------------------------------|--------------|-----------|")
+    for key in ("mlp", "xgb"):
+        s = model_stats[key]
+        lines.append(f"| {s['label']} | {s['auc']:.4f} | **{s['opt_threshold']:.3f}** | "
+                     f"{s['f1_opt']:.4f} | {s['f1_50']:.4f} |")
+    lines.append("")
+    lines.append("![MLP ROC](plots/roc_mlp.png)")
+    lines.append("")
+    lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
+    lines.append("")
+    lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
+    lines.append("")
+    lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
+                 "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
+    lines.append("")
+    lines.append("| Face Weight (alpha) | Mean LOPO F1 |")
+    lines.append("|--------------------:|-------------:|")
+    for a in sorted(geo_f1.keys()):
+        marker = " **<-- selected**" if a == best_alpha else ""
+        lines.append(f"| {a:.1f} | {geo_f1[a]:.4f}{marker} |")
+    lines.append("")
+    lines.append(f"**Best:** alpha = {best_alpha:.1f} (face {best_alpha*100:.0f}%, "
+                 f"eye {(1-best_alpha)*100:.0f}%)")
+    lines.append("")
+    lines.append("![Geometric weight search](plots/geo_weight_search.png)")
+    lines.append("")
+    lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
+    lines.append("")
+    lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
+                 "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
+                 "If you change geometric weights, re-run this script — optimal w_mlp can shift.")
+    lines.append("")
+    lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
+    lines.append("|-------------------:|-------------:|")
+    for w in sorted(hybrid_f1.keys()):
+        marker = " **<-- selected**" if w == best_w else ""
+        lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
+    lines.append("")
+    lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
+                 f"geometric {(1-best_w)*100:.0f}%)")
+    lines.append("")
+    lines.append("![Hybrid weight search](plots/hybrid_weight_search.png)")
+    lines.append("")
+    lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
+    lines.append("")
+    lines.append("### EAR (Eye Aspect Ratio)")
+    lines.append("")
+    lines.append("Reference: Soukupova & Cech, \"Real-Time Eye Blink Detection Using Facial "
+                 "Landmarks\" (2016) established EAR ~ 0.2 as a blink threshold.")
+    lines.append("")
+    lines.append("Our thresholds define a linear interpolation zone around this established value:")
+    lines.append("")
+    lines.append("| Constant | Value | Justification |")
+    lines.append("|----------|------:|---------------|")
+    lines.append(f"| `ear_closed` | 0.16 | Below this, eyes are fully shut. "
+                 f"{dist_stats['ear_below_016']:.1f}% of samples fall here. |")
+    lines.append(f"| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. "
+                 f"{dist_stats['ear_below_021']:.1f}% of samples below. |")
+    lines.append(f"| `ear_open` | 0.30 | Above this, eyes are fully open. "
+                 f"{dist_stats['ear_above_030']:.1f}% of samples here. |")
+    lines.append("")
+    lines.append("Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, "
+                 "providing a smooth transition rather than a hard binary cutoff.")
+    lines.append("")
+    lines.append("![EAR distribution](plots/ear_distribution.png)")
+    lines.append("")
+    lines.append("### MAR (Mouth Aspect Ratio)")
+    lines.append("")
+    lines.append(f"| Constant | Value | Justification |")
+    lines.append("|----------|------:|---------------|")
+    lines.append(f"| `MAR_YAWN_THRESHOLD` | 0.55 | Only {dist_stats['mar_above_055']:.1f}% of "
+                 f"samples exceed this, confirming it captures genuine yawns without false positives. |")
+    lines.append("")
+    lines.append("![MAR distribution](plots/mar_distribution.png)")
+    lines.append("")
+    lines.append("## 5. Other Constants")
+    lines.append("")
+    lines.append("| Constant | Value | Rationale |")
+    lines.append("|----------|------:|-----------|")
+    lines.append("| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score "
+                 "drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at "
+                 "the extreme edge. |")
+    lines.append("| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on "
+                 "typical monitor-viewing cone: at 60 cm distance and a 24\" monitor, the viewing "
+                 "angle is ~20-25 degrees. |")
+    lines.append("| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch "
+                 "(tilting head doesn't mean looking away), so it's down-weighted by 50%. |")
+    lines.append("| `EMA alpha` | 0.3 | Smoothing factor for focus score. "
+                 "Gives ~3-4 frame effective window; balances responsiveness vs flicker. |")
+    lines.append("| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief "
+                 "occlusions (e.g. hand gesture) without dropping score. |")
+    lines.append("| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement "
+                 "window (Dinges & Grace, 1998). |")
+    lines.append("| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous "
+                 "blink rate is 15-20/min (Bentivoglio et al., 1997). |")
+    lines.append("")
+    with open(REPORT_PATH, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+    print(f"\nReport written to {REPORT_PATH}")
+def main():
+    os.makedirs(PLOTS_DIR, exist_ok=True)
+    lopo_results = run_lopo_models()
+    model_stats = analyse_model_thresholds(lopo_results)
+    geo_f1, best_alpha = run_geo_weight_search()
+    hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
+    dist_stats = plot_distributions()
+    write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
+    # Close ClearML task
+    if _task:
+        from config.clearml_enrich import task_done_summary
+        if os.path.isfile(REPORT_PATH):
+            _task.upload_artifact(
+                name="threshold_justification_report",
+                artifact_object=REPORT_PATH,
+            )
+        task_done_summary(
+            _task,
+            "LOPO threshold / weight analysis; see artifact threshold_justification_report and plots in Debug samples.",
+        )
+        _task.close()
+        print("ClearML task closed.")
+    print("\nDone.")
+if __name__ == "__main__":
+    main()

evaluation/logs/.gitkeep ADDED Viewed

File without changes

evaluation/plots/confusion_matrix_mlp.png ADDED Viewed

evaluation/plots/confusion_matrix_xgb.png ADDED Viewed

evaluation/plots/ear_distribution.png ADDED Viewed

evaluation/plots/geo_weight_search.png ADDED Viewed

evaluation/plots/hybrid_weight_search.png ADDED Viewed

evaluation/plots/hybrid_xgb_weight_search.png ADDED Viewed

evaluation/plots/mar_distribution.png ADDED Viewed

evaluation/plots/roc_mlp.png ADDED Viewed

evaluation/plots/roc_xgb.png ADDED Viewed

index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Focus Guard</title>
+  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;700&display=swap" rel="stylesheet">
+</head>
+<body>
+  <div id="root"></div>
+  <script type="module" src="/src/main.jsx"></script>
+</body>
+</html>