Abdelrahman Almatrooshi commited on
Commit
7b53d75
·
0 Parent(s):

FocusGuard with L2CS-Net gaze estimation

Browse files

Full integration: MediaPipe face mesh, MLP, XGBoost, Hybrid, and L2CS
pipelines. Includes 9-point gaze calibration, boost mode, and gaze-eye
fusion for real-time focus detection.

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .coverage +0 -0
  2. .coveragerc +23 -0
  3. .dockerignore +35 -0
  4. .gitattributes +3 -0
  5. .gitignore +44 -0
  6. Dockerfile +40 -0
  7. README.md +135 -0
  8. api/history +0 -0
  9. api/import +0 -0
  10. api/sessions +0 -0
  11. app.py +1 -0
  12. checkpoints/L2CSNet_gaze360.pkl +3 -0
  13. checkpoints/hybrid_combiner.joblib +3 -0
  14. checkpoints/hybrid_focus_config.json +14 -0
  15. checkpoints/meta_best.npz +3 -0
  16. checkpoints/meta_mlp.npz +3 -0
  17. checkpoints/mlp_best.pt +0 -0
  18. checkpoints/model_best.joblib +3 -0
  19. checkpoints/scaler_best.joblib +3 -0
  20. checkpoints/scaler_mlp.joblib +3 -0
  21. checkpoints/xgboost_face_orientation_best.json +0 -0
  22. data/README.md +11 -0
  23. data/collected_Abdelrahman/abdelrahman_20260306_023035.npz +3 -0
  24. data/collected_Jarek/Jarek_20260225_012931.npz +3 -0
  25. data/collected_Junhao/Junhao_20260303_113554.npz +3 -0
  26. data/collected_Kexin/kexin2_20260305_180229.npz +3 -0
  27. data/collected_Kexin/kexin_20260224_151043.npz +3 -0
  28. data/collected_Langyuan/Langyuan_20260303_153145.npz +3 -0
  29. data/collected_Mohamed/session_20260224_010131.npz +3 -0
  30. data/collected_Yingtao/Yingtao_20260306_023937.npz +3 -0
  31. data/collected_ayten/ayten_session_1.npz +3 -0
  32. data/collected_saba/saba_20260306_230710.npz +3 -0
  33. data_preparation/README.md +9 -0
  34. data_preparation/__init__.py +0 -0
  35. data_preparation/data_exploration.ipynb +0 -0
  36. data_preparation/prepare_dataset.py +241 -0
  37. docker-compose.yml +5 -0
  38. download_l2cs_weights.py +37 -0
  39. eslint.config.js +29 -0
  40. evaluation/README.md +19 -0
  41. evaluation/THRESHOLD_JUSTIFICATION.md +206 -0
  42. evaluation/feature_importance.py +230 -0
  43. evaluation/feature_selection_justification.md +54 -0
  44. evaluation/justify_thresholds.py +555 -0
  45. evaluation/logs/.gitkeep +0 -0
  46. evaluation/plots/confusion_matrix_mlp.png +0 -0
  47. evaluation/plots/confusion_matrix_xgb.png +0 -0
  48. evaluation/plots/ear_distribution.png +0 -0
  49. evaluation/plots/geo_weight_search.png +0 -0
  50. evaluation/plots/hybrid_weight_search.png +0 -0
.coverage ADDED
Binary file (86 kB). View file
 
.coveragerc ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [run]
2
+ branch = True
3
+ source =
4
+ .
5
+ omit =
6
+ .venv/*
7
+ venv/*
8
+ */site-packages/*
9
+ tests/*
10
+ notebooks/*
11
+ evaluation/*
12
+ models/mlp/train.py
13
+ models/mlp/sweep.py
14
+ models/mlp/eval_accuracy.py
15
+ models/cnn/eye_attention/train.py
16
+ models\collect_features.py
17
+ [report]
18
+ show_missing = True
19
+ skip_covered = False
20
+ precision = 1
21
+ exclude_lines =
22
+ pragma: no cover
23
+ if __name__ == .__main__.:
.dockerignore ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .gitattributes
3
+ .github
4
+ node_modules
5
+ dist
6
+ venv
7
+ .venv
8
+ __pycache__
9
+ *.pyc
10
+ .pytest_cache
11
+ .mypy_cache
12
+ .ruff_cache
13
+
14
+ # Dev/eval files not needed at runtime
15
+ notebooks/
16
+ evaluation/
17
+ tests/
18
+ others/
19
+ *.ipynb
20
+ requirements-dev.txt
21
+ pytest.ini
22
+ eslint.config.js
23
+ docker-compose.yml
24
+
25
+ # L2CS backup is a full duplicate
26
+ models/L2CS-Net/L2CS-Net-backup/
27
+
28
+ # DB files (created at runtime)
29
+ *.db
30
+
31
+ # Editor / OS junk
32
+ .DS_Store
33
+ .cursor
34
+ .vscode
35
+ *.swp
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
2
+ *.npz filter=lfs diff=lfs merge=lfs -text
3
+ *.joblib filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules/
11
+ dist/
12
+ dist-ssr/
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/
17
+ .idea/
18
+ .DS_Store
19
+ *.suo
20
+ *.ntvs*
21
+ *.njsproj
22
+ *.sln
23
+ *.sw?
24
+ *.py[cod]
25
+ *$py.class
26
+ *.so
27
+ .Python
28
+ venv/
29
+ .venv/
30
+ env/
31
+ .env
32
+ *.egg-info/
33
+ .eggs/
34
+ build/
35
+ Thumbs.db
36
+ ignore/
37
+
38
+ # Project specific
39
+ focus_guard.db
40
+ test_focus_guard.db
41
+ static/
42
+ __pycache__/
43
+ docs/
44
+ docs
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ WORKDIR /app
8
+
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ libglib2.0-0 libsm6 libxrender1 libxext6 libxcb1 libgl1 libgomp1 \
11
+ ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev \
12
+ libavdevice-dev libopus-dev libvpx-dev libsrtp2-dev \
13
+ build-essential nodejs npm git \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
17
+
18
+ # Python deps (separate layer for caching)
19
+ COPY requirements.txt ./
20
+ RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu \
21
+ && pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy source (respects .dockerignore)
24
+ COPY . .
25
+
26
+ # Build frontend
27
+ RUN npm install && npm run build && mkdir -p /app/static && cp -R dist/* /app/static/ \
28
+ && rm -rf node_modules dist
29
+
30
+ # Download models at build time
31
+ ENV FOCUSGUARD_CACHE_DIR=/app/.cache/focusguard
32
+ RUN python -c "from models.face_mesh import _ensure_model; _ensure_model()"
33
+ RUN python download_l2cs_weights.py || echo "[WARN] L2CS weights not downloaded — will run without gaze model"
34
+
35
+ RUN mkdir -p /app/data && chown -R user:user /app
36
+
37
+ USER user
38
+ EXPOSE 7860
39
+
40
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "info"]
README.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: FocusGuard
3
+ colorFrom: indigo
4
+ colorTo: purple
5
+ sdk: docker
6
+ pinned: false
7
+ ---
8
+
9
+ # FocusGuard
10
+
11
+ Webcam-based focus detection: MediaPipe face mesh -> 17 features (EAR, gaze, head pose, PERCLOS, etc.) -> MLP or XGBoost for focused/unfocused. React + FastAPI app with WebSocket video.
12
+
13
+ ## Project layout
14
+
15
+ ```
16
+ ├── data/ collected_<name>/*.npz
17
+ ├── data_preparation/ loaders, split, scale
18
+ ├── notebooks/ MLP/XGB training + LOPO
19
+ ├── models/ face_mesh, head_pose, eye_scorer, train scripts
20
+ │ ├── gaze_calibration.py 9-point polynomial gaze calibration
21
+ │ ├── gaze_eye_fusion.py Fuses calibrated gaze with eye openness
22
+ │ └── L2CS-Net/ In-tree L2CS-Net repo with Gaze360 weights
23
+ ├── checkpoints/ mlp_best.pt, xgboost_*_best.json, scalers
24
+ ├── evaluation/ logs, plots, justify_thresholds
25
+ ├── ui/ pipeline.py, live_demo.py
26
+ ├── src/ React frontend
27
+ │ ├── components/
28
+ │ │ ├── FocusPageLocal.jsx Main focus page (camera, controls, model selector)
29
+ │ │ └── CalibrationOverlay.jsx Fullscreen calibration UI
30
+ │ └── utils/
31
+ │ └── VideoManagerLocal.js WebSocket client, frame capture, canvas rendering
32
+ ├── static/ built frontend (after npm run build)
33
+ ├── main.py, app.py FastAPI backend
34
+ ├── requirements.txt
35
+ └── package.json
36
+ ```
37
+
38
+ ## Setup
39
+
40
+ ```bash
41
+ python -m venv venv
42
+ source venv/bin/activate
43
+ pip install -r requirements.txt
44
+ ```
45
+
46
+ To rebuild the frontend after changes:
47
+
48
+ ```bash
49
+ npm install
50
+ npm run build
51
+ mkdir -p static && cp -r dist/* static/
52
+ ```
53
+
54
+ ## Run
55
+
56
+ **Web app:** Use the venv and run uvicorn via Python so it picks up your deps (otherwise you get `ModuleNotFoundError: aiosqlite`):
57
+
58
+ ```bash
59
+ source venv/bin/activate
60
+ python -m uvicorn main:app --host 0.0.0.0 --port 7860
61
+ ```
62
+
63
+ Then open http://localhost:7860.
64
+
65
+ **Frontend dev server (optional, for React development):**
66
+
67
+ ```bash
68
+ npm run dev
69
+ ```
70
+
71
+ **OpenCV demo:**
72
+
73
+ ```bash
74
+ python ui/live_demo.py
75
+ python ui/live_demo.py --xgb
76
+ ```
77
+
78
+ **Train:**
79
+
80
+ ```bash
81
+ python -m models.mlp.train
82
+ python -m models.xgboost.train
83
+ ```
84
+
85
+ ## Data
86
+
87
+ 9 participants, 144,793 samples, 10 features, binary labels. Collect with `python -m models.collect_features --name <name>`. Data lives in `data/collected_<name>/`.
88
+
89
+ ## Models
90
+
91
+ | Model | What it uses | Best for |
92
+ |-------|-------------|----------|
93
+ | **Geometric** | Head pose angles + eye aspect ratio (EAR) | Fast, no ML needed |
94
+ | **XGBoost** | Trained classifier on head/eye features (600 trees, depth 8) | Balanced accuracy/speed |
95
+ | **MLP** | Neural network on same features (64->32) | Higher accuracy |
96
+ | **Hybrid** | Weighted MLP + Geometric ensemble | Best head-pose accuracy |
97
+ | **L2CS** | Deep gaze estimation (ResNet50, Gaze360 weights) | Detects eye-only gaze shifts |
98
+
99
+ ## Model numbers (15% test split)
100
+
101
+ | Model | Accuracy | F1 | ROC-AUC |
102
+ |-------|----------|-----|---------|
103
+ | XGBoost (600 trees, depth 8) | 95.87% | 0.959 | 0.991 |
104
+ | MLP (64->32) | 92.92% | 0.929 | 0.971 |
105
+
106
+ ## L2CS Gaze Tracking
107
+
108
+ L2CS-Net predicts where your eyes are looking, not just where your head is pointed. This catches the scenario where your head faces the screen but your eyes wander.
109
+
110
+ ### Standalone mode
111
+ Select **L2CS** as the model - it handles everything.
112
+
113
+ ### Boost mode
114
+ Select any other model, then click the **GAZE** toggle. L2CS runs alongside the base model:
115
+ - Base model handles head pose and eye openness (35% weight)
116
+ - L2CS handles gaze direction (65% weight)
117
+ - If L2CS detects gaze is clearly off-screen, it **vetoes** the base model regardless of score
118
+
119
+ ### Calibration
120
+ After enabling L2CS or Gaze Boost, click **Calibrate** while a session is running:
121
+ 1. A fullscreen overlay shows 9 target dots (3x3 grid)
122
+ 2. Look at each dot as the progress ring fills
123
+ 3. The first dot (centre) sets your baseline gaze offset
124
+ 4. After all 9 points, a polynomial model maps your gaze angles to screen coordinates
125
+ 5. A cyan tracking dot appears on the video showing where you're looking
126
+
127
+ ## Pipeline
128
+
129
+ 1. Face mesh (MediaPipe 478 pts)
130
+ 2. Head pose -> yaw, pitch, roll, scores, gaze offset
131
+ 3. Eye scorer -> EAR, gaze ratio, MAR
132
+ 4. Temporal -> PERCLOS, blink rate, yawn
133
+ 5. 10-d vector -> MLP or XGBoost -> focused / unfocused
134
+
135
+ **Stack:** FastAPI, aiosqlite, React/Vite, PyTorch, XGBoost, MediaPipe, OpenCV, L2CS-Net.
api/history ADDED
File without changes
api/import ADDED
File without changes
api/sessions ADDED
File without changes
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from main import app
checkpoints/L2CSNet_gaze360.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7f3480d868dd48261e1d59f915b0ef0bb33ea12ea00938fb2168f212080665
3
+ size 95849977
checkpoints/hybrid_combiner.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e460c6ca8d2cadf37727456401a0d63028ba23cb6401f0835d869abfa2e053c
3
+ size 965
checkpoints/hybrid_focus_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "use_xgb": true,
3
+ "w_mlp": 0.3,
4
+ "w_xgb": 0.3,
5
+ "w_geo": 0.7,
6
+ "threshold": 0.46117913373775393,
7
+ "use_yawn_veto": true,
8
+ "geo_face_weight": 0.7,
9
+ "geo_eye_weight": 0.3,
10
+ "mar_yawn_threshold": 0.55,
11
+ "metric": "f1",
12
+ "combiner": "logistic",
13
+ "combiner_path": "/Users/mohammedalketbi22/GAP/Final/checkpoints/hybrid_combiner.joblib"
14
+ }
checkpoints/meta_best.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d78d1df5e25536a2c82c4b8f5fd0c26dd35f44b28fd59761634cbf78c7546f8
3
+ size 4196
checkpoints/meta_mlp.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4771c61cdf0711aa640b4d600a0851d344414cd16c1c2f75afc90e3c6135d14b
3
+ size 840
checkpoints/mlp_best.pt ADDED
Binary file (14.5 kB). View file
 
checkpoints/model_best.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183f2d4419e0eb1e58704e5a7312eb61e331523566d4dc551054a07b3aac7557
3
+ size 5775881
checkpoints/scaler_best.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ed6b4c0d99e0254c6a740a949da2384db58ec7d3e6df6432b9bfcd3a296c71
3
+ size 783
checkpoints/scaler_mlp.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2038d5b051d4de303c5688b1b861a0b53b1307a52b9447bfa48e8c7ace749329
3
+ size 823
checkpoints/xgboost_face_orientation_best.json ADDED
The diff for this file is too large to render. See raw diff
 
data/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # data/
2
+
3
+ One folder per participant: `collected_<name>/` with .npz files. 9 participants, 144,793 samples total. Each .npz has `features` (N×17), `labels` (0/1), `feature_names`. Training uses 10 of the 17 (see data_preparation).
4
+
5
+ **Collect more:**
6
+
7
+ ```bash
8
+ python -m models.collect_features --name yourname
9
+ ```
10
+
11
+ Webcam + overlay; press 1 = focused, 0 = unfocused, p = pause, q = save and quit.
data/collected_Abdelrahman/abdelrahman_20260306_023035.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c48532150182c8933d4595e0a0711365645b699647e99976575b7c2adffaf8
3
+ size 1207980
data/collected_Jarek/Jarek_20260225_012931.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa68f4d587eee8d645b23b463a9f1c848b9bacc2adb68603d5fa9cd8cb744c7
3
+ size 1128864
data/collected_Junhao/Junhao_20260303_113554.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec321ee79800c04fdc0f999690d07970445aeca61f977bf6537880bbc996b5e5
3
+ size 678336
data/collected_Kexin/kexin2_20260305_180229.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e96fe17571fa1fcccc1b4bd0c8838270498883e4db6a608c4d4d4c3a8ac1d0d
3
+ size 1129700
data/collected_Kexin/kexin_20260224_151043.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d402ca4e66910a2e174c4f4beec5d7b3db6a04213d29673b227ce6ef04b39c4
3
+ size 1329732
data/collected_Langyuan/Langyuan_20260303_153145.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c679cdba334b2f3f0953b7e44f7209056277c826e2b7b5cfcf2b8b750898400
3
+ size 1198784
data/collected_Mohamed/session_20260224_010131.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a784f703c13b83911f47ec507d32c25942a07572314b8a77cbf40ca8cdff16f
3
+ size 1006428
data/collected_Yingtao/Yingtao_20260306_023937.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a75af17e25dca5f06ea9e7443ea5fee9db638f68a5910e014ee7cb8b7ae80fd
3
+ size 1338776
data/collected_ayten/ayten_session_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbecdbffa1c1b03b3b0fb5f715dcb4ff885ecc67da4aff78e6952b8847a96014
3
+ size 1341056
data/collected_saba/saba_20260306_230710.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1cab5ddcf9988856c5bdca1183c8eba4647365e675a1d8a200d12f6b5d2097
3
+ size 663212
data_preparation/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # data_preparation/
2
+
3
+ Load and split the .npz data. Used by all training code and notebooks.
4
+
5
+ **prepare_dataset.py:** `load_all_pooled()`, `load_per_person()` for LOPO, `get_numpy_splits()` (XGBoost), `get_dataloaders()` (MLP). Cleans yaw/pitch/roll and EAR to fixed ranges. Face_orientation uses 10 features: head_deviation, s_face, s_eye, h_gaze, pitch, ear_left, ear_avg, ear_right, gaze_offset, perclos.
6
+
7
+ **data_exploration.ipynb:** EDA — stats, class balance, histograms, correlations.
8
+
9
+ You don’t run prepare_dataset directly; import it from `models.mlp.train`, `models.xgboost.train`, or the notebooks.
data_preparation/__init__.py ADDED
File without changes
data_preparation/data_exploration.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
data_preparation/prepare_dataset.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+
4
+ import numpy as np
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ torch = None
9
+ Dataset = object # type: ignore
10
+ DataLoader = None
11
+
12
+
13
+ def _require_torch():
14
+ global torch, Dataset, DataLoader
15
+ if torch is None:
16
+ try:
17
+ import torch as _torch
18
+ from torch.utils.data import Dataset as _Dataset, DataLoader as _DataLoader
19
+ except ImportError as exc: # pragma: no cover
20
+ raise ImportError("PyTorch not installed") from exc
21
+
22
+ torch = _torch
23
+ Dataset = _Dataset # type: ignore
24
+ DataLoader = _DataLoader # type: ignore
25
+
26
+ return torch, Dataset, DataLoader
27
+
28
+ DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
29
+
30
+ SELECTED_FEATURES = {
31
+ "face_orientation": [
32
+ 'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
33
+ 'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos'
34
+ ],
35
+ "eye_behaviour": [
36
+ 'ear_left', 'ear_right', 'ear_avg', 'mar',
37
+ 'blink_rate', 'closure_duration', 'perclos', 'yawn_duration'
38
+ ]
39
+ }
40
+
41
+
42
+ class FeatureVectorDataset(Dataset):
43
+ def __init__(self, features: np.ndarray, labels: np.ndarray):
44
+ torch_mod, _, _ = _require_torch()
45
+ self.features = torch_mod.tensor(features, dtype=torch_mod.float32)
46
+ self.labels = torch_mod.tensor(labels, dtype=torch_mod.long)
47
+
48
+ def __len__(self):
49
+ return len(self.labels)
50
+
51
+ def __getitem__(self, idx):
52
+ return self.features[idx], self.labels[idx]
53
+
54
+
55
+ # ── Low-level helpers ────────────────────────────────────────────────────
56
+
57
+ def _clean_npz(raw, names):
58
+ """Apply clipping rules in-place. Shared by all loaders."""
59
+ for col, lo, hi in [('yaw', -45, 45), ('pitch', -30, 30), ('roll', -30, 30)]:
60
+ if col in names:
61
+ raw[:, names.index(col)] = np.clip(raw[:, names.index(col)], lo, hi)
62
+ for feat in ['ear_left', 'ear_right', 'ear_avg']:
63
+ if feat in names:
64
+ raw[:, names.index(feat)] = np.clip(raw[:, names.index(feat)], 0, 0.85)
65
+ return raw
66
+
67
+
68
+ def _load_one_npz(npz_path, target_features):
69
+ """Load a single .npz file, clean and select features. Returns (X, y, selected_feature_names)."""
70
+ data = np.load(npz_path, allow_pickle=True)
71
+ raw = data['features'].astype(np.float32)
72
+ labels = data['labels'].astype(np.int64)
73
+ names = list(data['feature_names'])
74
+ raw = _clean_npz(raw, names)
75
+ selected = [f for f in target_features if f in names]
76
+ idx = [names.index(f) for f in selected]
77
+ return raw[:, idx], labels, selected
78
+
79
+
80
+ # ── Public data loaders ──────────────────────────────────────────────────
81
+
82
+ def load_all_pooled(model_name: str = "face_orientation", data_dir: str = None):
83
+ """Load all collected_*/*.npz, clean, select features, concatenate.
84
+
85
+ Returns (X_all, y_all, all_feature_names).
86
+ """
87
+ data_dir = data_dir or DATA_DIR
88
+ target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
89
+ pattern = os.path.join(data_dir, "collected_*", "*.npz")
90
+ npz_files = sorted(glob.glob(pattern))
91
+
92
+ if not npz_files:
93
+ print("[DATA] Warning: No .npz files found. Falling back to synthetic.")
94
+ X, y = _generate_synthetic_data(model_name)
95
+ return X, y, target_features
96
+
97
+ all_X, all_y = [], []
98
+ all_names = None
99
+ for npz_path in npz_files:
100
+ X, y, names = _load_one_npz(npz_path, target_features)
101
+ if all_names is None:
102
+ all_names = names
103
+ all_X.append(X)
104
+ all_y.append(y)
105
+ print(f"[DATA] + {os.path.basename(npz_path)}: {X.shape[0]} samples")
106
+
107
+ X_all = np.concatenate(all_X, axis=0)
108
+ y_all = np.concatenate(all_y, axis=0)
109
+ print(f"[DATA] Loaded {len(npz_files)} file(s) for '{model_name}': "
110
+ f"{X_all.shape[0]} total samples, {X_all.shape[1]} features")
111
+ return X_all, y_all, all_names
112
+
113
+
114
+ def load_per_person(model_name: str = "face_orientation", data_dir: str = None):
115
+ """Load collected_*/*.npz grouped by person (folder name).
116
+
117
+ Returns dict { person_name: (X, y) } where X/y are per-person numpy arrays.
118
+ Also returns (X_all, y_all) as pooled data.
119
+ """
120
+ data_dir = data_dir or DATA_DIR
121
+ target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
122
+ pattern = os.path.join(data_dir, "collected_*", "*.npz")
123
+ npz_files = sorted(glob.glob(pattern))
124
+
125
+ if not npz_files:
126
+ raise FileNotFoundError(f"No .npz files matching {pattern}")
127
+
128
+ by_person = {}
129
+ all_X, all_y = [], []
130
+ for npz_path in npz_files:
131
+ folder = os.path.basename(os.path.dirname(npz_path))
132
+ person = folder.replace("collected_", "", 1)
133
+ X, y, _ = _load_one_npz(npz_path, target_features)
134
+ all_X.append(X)
135
+ all_y.append(y)
136
+ if person not in by_person:
137
+ by_person[person] = []
138
+ by_person[person].append((X, y))
139
+ print(f"[DATA] + {person}/{os.path.basename(npz_path)}: {X.shape[0]} samples")
140
+
141
+ for person, chunks in by_person.items():
142
+ by_person[person] = (
143
+ np.concatenate([c[0] for c in chunks], axis=0),
144
+ np.concatenate([c[1] for c in chunks], axis=0),
145
+ )
146
+
147
+ X_all = np.concatenate(all_X, axis=0)
148
+ y_all = np.concatenate(all_y, axis=0)
149
+ print(f"[DATA] {len(by_person)} persons, {X_all.shape[0]} total samples, {X_all.shape[1]} features")
150
+ return by_person, X_all, y_all
151
+
152
+
153
+ def load_raw_npz(npz_path):
154
+ """Load a single .npz without cleaning or feature selection. For exploration notebooks."""
155
+ data = np.load(npz_path, allow_pickle=True)
156
+ features = data['features'].astype(np.float32)
157
+ labels = data['labels'].astype(np.int64)
158
+ names = list(data['feature_names'])
159
+ return features, labels, names
160
+
161
+
162
+ # ── Legacy helpers (used by models/mlp/train.py and models/xgboost/train.py) ─
163
+
164
+ def _load_real_data(model_name: str):
165
+ X, y, _ = load_all_pooled(model_name)
166
+ return X, y
167
+
168
+
169
+ def _generate_synthetic_data(model_name: str):
170
+ target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
171
+ n = 500
172
+ d = len(target_features)
173
+ c = 2
174
+ rng = np.random.RandomState(42)
175
+ features = rng.randn(n, d).astype(np.float32)
176
+ labels = rng.randint(0, c, size=n).astype(np.int64)
177
+ print(f"[DATA] Using synthetic data for '{model_name}': {n} samples, {d} features, {c} classes")
178
+ return features, labels
179
+
180
+
181
+ def _split_and_scale(features, labels, split_ratios, seed, scale):
182
+ """Split data into train/val/test (stratified) and optionally scale."""
183
+ test_ratio = split_ratios[2]
184
+ val_ratio = split_ratios[1] / (split_ratios[0] + split_ratios[1])
185
+
186
+ X_train_val, X_test, y_train_val, y_test = train_test_split(
187
+ features, labels, test_size=test_ratio, random_state=seed, stratify=labels,
188
+ )
189
+ X_train, X_val, y_train, y_val = train_test_split(
190
+ X_train_val, y_train_val, test_size=val_ratio, random_state=seed, stratify=y_train_val,
191
+ )
192
+
193
+ scaler = None
194
+ if scale:
195
+ scaler = StandardScaler()
196
+ X_train = scaler.fit_transform(X_train)
197
+ X_val = scaler.transform(X_val)
198
+ X_test = scaler.transform(X_test)
199
+ print("[DATA] Applied StandardScaler (fitted on training split)")
200
+
201
+ splits = {
202
+ "X_train": X_train, "y_train": y_train,
203
+ "X_val": X_val, "y_val": y_val,
204
+ "X_test": X_test, "y_test": y_test,
205
+ }
206
+
207
+ print(f"[DATA] Split (stratified): train={len(y_train)}, val={len(y_val)}, test={len(y_test)}")
208
+ return splits, scaler
209
+
210
+
211
+ def get_numpy_splits(model_name: str, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
212
+ """Return raw numpy arrays for non-PyTorch models (e.g. XGBoost)."""
213
+ features, labels = _load_real_data(model_name)
214
+ num_features = features.shape[1]
215
+ num_classes = int(labels.max()) + 1
216
+ if num_classes < 2:
217
+ raise ValueError("Dataset has only one class; need at least 2 for classification.")
218
+ splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
219
+ return splits, num_features, num_classes, scaler
220
+
221
+
222
+ def get_dataloaders(model_name: str, batch_size: int = 32, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
223
+ """Return PyTorch DataLoaders for neural-network models."""
224
+ _, _, dataloader_cls = _require_torch()
225
+ features, labels = _load_real_data(model_name)
226
+ num_features = features.shape[1]
227
+ num_classes = int(labels.max()) + 1
228
+ if num_classes < 2:
229
+ raise ValueError("Dataset has only one class; need at least 2 for classification.")
230
+ splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
231
+
232
+ train_ds = FeatureVectorDataset(splits["X_train"], splits["y_train"])
233
+ val_ds = FeatureVectorDataset(splits["X_val"], splits["y_val"])
234
+ test_ds = FeatureVectorDataset(splits["X_test"], splits["y_test"])
235
+
236
+ train_loader = dataloader_cls(train_ds, batch_size=batch_size, shuffle=True)
237
+ val_loader = dataloader_cls(val_ds, batch_size=batch_size, shuffle=False)
238
+ test_loader = dataloader_cls(test_ds, batch_size=batch_size, shuffle=False)
239
+
240
+ return train_loader, val_loader, test_loader, num_features, num_classes, scaler
241
+
docker-compose.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ services:
2
+ focus-guard:
3
+ build: .
4
+ ports:
5
+ - "7860:7860"
download_l2cs_weights.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Downloads L2CS-Net Gaze360 weights into checkpoints/
3
+
4
+ import os
5
+ import sys
6
+
7
+ CHECKPOINTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checkpoints")
8
+ DEST = os.path.join(CHECKPOINTS_DIR, "L2CSNet_gaze360.pkl")
9
+ GDRIVE_ID = "1dL2Jokb19_SBSHAhKHOxJsmYs5-GoyLo"
10
+
11
+
12
+ def main():
13
+ if os.path.isfile(DEST):
14
+ print(f"[OK] Weights already at {DEST}")
15
+ return
16
+
17
+ try:
18
+ import gdown
19
+ except ImportError:
20
+ print("gdown not installed. Run: pip install gdown")
21
+ sys.exit(1)
22
+
23
+ os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
24
+ print(f"Downloading L2CS-Net weights to {DEST} ...")
25
+ gdown.download(f"https://drive.google.com/uc?id={GDRIVE_ID}", DEST, quiet=False)
26
+
27
+ if os.path.isfile(DEST):
28
+ print(f"[OK] Downloaded ({os.path.getsize(DEST) / 1024 / 1024:.1f} MB)")
29
+ else:
30
+ print("[ERR] Download failed. Manual download:")
31
+ print(" https://drive.google.com/drive/folders/17p6ORr-JQJcw-eYtG2WGNiuS_qVKwdWd")
32
+ print(f" Place L2CSNet_gaze360.pkl in {CHECKPOINTS_DIR}/")
33
+ sys.exit(1)
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()
eslint.config.js ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from '@eslint/js'
2
+ import globals from 'globals'
3
+ import reactHooks from 'eslint-plugin-react-hooks'
4
+ import reactRefresh from 'eslint-plugin-react-refresh'
5
+ import { defineConfig, globalIgnores } from 'eslint/config'
6
+
7
+ export default defineConfig([
8
+ globalIgnores(['dist']),
9
+ {
10
+ files: ['**/*.{js,jsx}'],
11
+ extends: [
12
+ js.configs.recommended,
13
+ reactHooks.configs.flat.recommended,
14
+ reactRefresh.configs.vite,
15
+ ],
16
+ languageOptions: {
17
+ ecmaVersion: 2020,
18
+ globals: globals.browser,
19
+ parserOptions: {
20
+ ecmaVersion: 'latest',
21
+ ecmaFeatures: { jsx: true },
22
+ sourceType: 'module',
23
+ },
24
+ },
25
+ rules: {
26
+ 'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
27
+ },
28
+ },
29
+ ])
evaluation/README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluation/
2
+
3
+ Training logs, threshold/weight analysis, and metrics.
4
+
5
+ **Contents:** `logs/` (JSON from training runs), `plots/` (ROC, weight search, EAR/MAR), `justify_thresholds.py`, `feature_importance.py`, and the generated markdown reports.
6
+
7
+ **Logs:** MLP writes `face_orientation_training_log.json`, XGBoost writes `xgboost_face_orientation_training_log.json`. Paths: `evaluation/logs/`.
8
+
9
+ **Threshold report:** Generate `THRESHOLD_JUSTIFICATION.md` and plots with:
10
+
11
+ ```bash
12
+ python -m evaluation.justify_thresholds
13
+ ```
14
+
15
+ (LOPO over 9 participants, Youden’s J, weight grid search; ~10–15 min.) Outputs go to `plots/` and the markdown file.
16
+
17
+ **Feature importance:** Run `python -m evaluation.feature_importance` for XGBoost gain and leave-one-feature-out LOPO; writes `feature_selection_justification.md`.
18
+
19
+ **Who writes here:** `models.mlp.train`, `models.xgboost.train`, `evaluation.justify_thresholds`, `evaluation.feature_importance`, and the notebooks.
evaluation/THRESHOLD_JUSTIFICATION.md ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Threshold Justification Report
2
+
3
+ Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation over 9 participants (~145k samples).
4
+
5
+ ## 1. ML Model Decision Thresholds
6
+
7
+ Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) on pooled LOPO held-out predictions.
8
+
9
+ | Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |
10
+ |-------|----------|-------------------------------|--------------|-----------|
11
+ | MLP | 0.8624 | **0.228** | 0.8578 | 0.8149 |
12
+ | XGBoost | 0.8804 | **0.377** | 0.8585 | 0.8424 |
13
+
14
+ ![MLP ROC](plots/roc_mlp.png)
15
+
16
+ ![XGBoost ROC](plots/roc_xgboost.png)
17
+
18
+ ## 2. Precision, Recall and Tradeoff
19
+
20
+ At the optimal threshold (Youden's J), pooled over all LOPO held-out predictions:
21
+
22
+ | Model | Threshold | Precision | Recall | F1 | Accuracy |
23
+ |-------|----------:|----------:|-------:|---:|---------:|
24
+ | MLP | 0.228 | 0.8187 | 0.9008 | 0.8578 | 0.8164 |
25
+ | XGBoost | 0.377 | 0.8426 | 0.8750 | 0.8585 | 0.8228 |
26
+
27
+ Higher threshold → fewer positive predictions → higher precision, lower recall. Youden's J picks the threshold that balances sensitivity and specificity (recall for the positive class and true negative rate).
28
+
29
+ ## 3. Confusion Matrix (Pooled LOPO)
30
+
31
+ At optimal threshold. Rows = true label, columns = predicted label (0 = unfocused, 1 = focused).
32
+
33
+ ### MLP
34
+
35
+ | | Pred 0 | Pred 1 |
36
+ |--|-------:|-------:|
37
+ | **True 0** | 38065 (TN) | 17750 (FP) |
38
+ | **True 1** | 8831 (FN) | 80147 (TP) |
39
+
40
+ TN=38065, FP=17750, FN=8831, TP=80147.
41
+
42
+ ### XGBoost
43
+
44
+ | | Pred 0 | Pred 1 |
45
+ |--|-------:|-------:|
46
+ | **True 0** | 41271 (TN) | 14544 (FP) |
47
+ | **True 1** | 11118 (FN) | 77860 (TP) |
48
+
49
+ TN=41271, FP=14544, FN=11118, TP=77860.
50
+
51
+ ![Confusion MLP](plots/confusion_matrix_mlp.png)
52
+
53
+ ![Confusion XGBoost](plots/confusion_matrix_xgb.png)
54
+
55
+ ## 4. Per-Person Performance Variance (LOPO)
56
+
57
+ One fold per left-out person; metrics at optimal threshold.
58
+
59
+ ### MLP — per held-out person
60
+
61
+ | Person | Accuracy | F1 | Precision | Recall |
62
+ |--------|---------:|---:|----------:|-------:|
63
+ | Abdelrahman | 0.8628 | 0.9029 | 0.8760 | 0.9314 |
64
+ | Jarek | 0.8400 | 0.8770 | 0.8909 | 0.8635 |
65
+ | Junhao | 0.8872 | 0.8986 | 0.8354 | 0.9723 |
66
+ | Kexin | 0.7941 | 0.8123 | 0.7965 | 0.8288 |
67
+ | Langyuan | 0.5877 | 0.6169 | 0.4972 | 0.8126 |
68
+ | Mohamed | 0.8432 | 0.8653 | 0.7931 | 0.9519 |
69
+ | Yingtao | 0.8794 | 0.9263 | 0.9217 | 0.9309 |
70
+ | ayten | 0.8307 | 0.8986 | 0.8558 | 0.9459 |
71
+ | saba | 0.9192 | 0.9243 | 0.9260 | 0.9226 |
72
+
73
+ ### XGBoost — per held-out person
74
+
75
+ | Person | Accuracy | F1 | Precision | Recall |
76
+ |--------|---------:|---:|----------:|-------:|
77
+ | Abdelrahman | 0.8601 | 0.8959 | 0.9129 | 0.8795 |
78
+ | Jarek | 0.8680 | 0.8993 | 0.9070 | 0.8917 |
79
+ | Junhao | 0.9099 | 0.9180 | 0.8627 | 0.9810 |
80
+ | Kexin | 0.7363 | 0.7385 | 0.7906 | 0.6928 |
81
+ | Langyuan | 0.6738 | 0.6945 | 0.5625 | 0.9074 |
82
+ | Mohamed | 0.8868 | 0.8988 | 0.8529 | 0.9498 |
83
+ | Yingtao | 0.8711 | 0.9195 | 0.9347 | 0.9048 |
84
+ | ayten | 0.8451 | 0.9070 | 0.8654 | 0.9528 |
85
+ | saba | 0.9393 | 0.9421 | 0.9615 | 0.9235 |
86
+
87
+ ### Summary across persons
88
+
89
+ | Model | Accuracy mean ± std | F1 mean ± std | Precision mean ± std | Recall mean ± std |
90
+ |-------|---------------------|---------------|----------------------|-------------------|
91
+ | MLP | 0.8271 ± 0.0968 | 0.8580 ± 0.0968 | 0.8214 ± 0.1307 | 0.9067 ± 0.0572 |
92
+ | XGBoost | 0.8434 ± 0.0847 | 0.8682 ± 0.0879 | 0.8500 ± 0.1191 | 0.8981 ± 0.0836 |
93
+
94
+ ## 5. Confidence Intervals (95%, LOPO over 9 persons)
95
+
96
+ Mean ± half-width of 95% t-interval (df=8) for each metric across the 9 left-out persons.
97
+
98
+ | Model | F1 | Accuracy | Precision | Recall |
99
+ |-------|---:|--------:|----------:|-------:|
100
+ | MLP | 0.8580 [0.7835, 0.9326] | 0.8271 [0.7526, 0.9017] | 0.8214 [0.7207, 0.9221] | 0.9067 [0.8626, 0.9507] |
101
+ | XGBoost | 0.8682 [0.8005, 0.9358] | 0.8434 [0.7781, 0.9086] | 0.8500 [0.7583, 0.9417] | 0.8981 [0.8338, 0.9625] |
102
+
103
+ ## 6. Geometric Pipeline Weights (s_face vs s_eye)
104
+
105
+ Grid search over face weight alpha in {0.2 ... 0.8}. Eye weight = 1 - alpha. Threshold per fold via Youden's J.
106
+
107
+ | Face Weight (alpha) | Mean LOPO F1 |
108
+ |--------------------:|-------------:|
109
+ | 0.2 | 0.7926 |
110
+ | 0.3 | 0.8002 |
111
+ | 0.4 | 0.7719 |
112
+ | 0.5 | 0.7868 |
113
+ | 0.6 | 0.8184 |
114
+ | 0.7 | 0.8195 **<-- selected** |
115
+ | 0.8 | 0.8126 |
116
+
117
+ **Best:** alpha = 0.7 (face 70%, eye 30%)
118
+
119
+ ![Geometric weight search](plots/geo_weight_search.png)
120
+
121
+ ## 7. Hybrid Pipeline: MLP vs Geometric
122
+
123
+ Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3).
124
+
125
+ | MLP Weight (w_mlp) | Mean LOPO F1 |
126
+ |-------------------:|-------------:|
127
+ | 0.3 | 0.8409 **<-- selected** |
128
+ | 0.4 | 0.8246 |
129
+ | 0.5 | 0.8164 |
130
+ | 0.6 | 0.8106 |
131
+ | 0.7 | 0.8039 |
132
+ | 0.8 | 0.8016 |
133
+
134
+ **Best:** w_mlp = 0.3 (MLP 30%, geometric 70%) → mean LOPO F1 = 0.8409
135
+
136
+ ![Hybrid MLP weight search](plots/hybrid_weight_search.png)
137
+
138
+ ## 8. Hybrid Pipeline: XGBoost vs Geometric
139
+
140
+ Same grid over w_xgb in {0.3 ... 0.8}. w_geo = 1 - w_xgb.
141
+
142
+ | XGBoost Weight (w_xgb) | Mean LOPO F1 |
143
+ |-----------------------:|-------------:|
144
+ | 0.3 | 0.8639 **<-- selected** |
145
+ | 0.4 | 0.8552 |
146
+ | 0.5 | 0.8451 |
147
+ | 0.6 | 0.8419 |
148
+ | 0.7 | 0.8382 |
149
+ | 0.8 | 0.8353 |
150
+
151
+ **Best:** w_xgb = 0.3 → mean LOPO F1 = 0.8639
152
+
153
+ ![Hybrid XGBoost weight search](plots/hybrid_xgb_weight_search.png)
154
+
155
+ ### Which hybrid is used in the app?
156
+
157
+ **XGBoost hybrid is better** (F1 = 0.8639 vs MLP hybrid F1 = 0.8409).
158
+
159
+ ### Logistic regression combiner (replaces heuristic weights)
160
+
161
+ Instead of a fixed linear blend (e.g. 0.3·ML + 0.7·geo), a **logistic regression** combines model probability and geometric score: meta-features = [model_prob, geo_score], trained on the same LOPO splits. Threshold from Youden's J on combiner output.
162
+
163
+ | Method | Mean LOPO F1 |
164
+ |--------|-------------:|
165
+ | Heuristic weight grid (best w) | 0.8639 |
166
+ | **LR combiner** | **0.8241** |
167
+
168
+ The app uses the saved LR combiner when `combiner_path` is set in `hybrid_focus_config.json`.
169
+
170
+ ## 5. Eye and Mouth Aspect Ratio Thresholds
171
+
172
+ ### EAR (Eye Aspect Ratio)
173
+
174
+ Reference: Soukupova & Cech, "Real-Time Eye Blink Detection Using Facial Landmarks" (2016) established EAR ~ 0.2 as a blink threshold.
175
+
176
+ Our thresholds define a linear interpolation zone around this established value:
177
+
178
+ | Constant | Value | Justification |
179
+ |----------|------:|---------------|
180
+ | `ear_closed` | 0.16 | Below this, eyes are fully shut. 16.3% of samples fall here. |
181
+ | `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. 21.2% of samples below. |
182
+ | `ear_open` | 0.30 | Above this, eyes are fully open. 70.4% of samples here. |
183
+
184
+ Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, providing a smooth transition rather than a hard binary cutoff.
185
+
186
+ ![EAR distribution](plots/ear_distribution.png)
187
+
188
+ ### MAR (Mouth Aspect Ratio)
189
+
190
+ | Constant | Value | Justification |
191
+ |----------|------:|---------------|
192
+ | `MAR_YAWN_THRESHOLD` | 0.55 | Only 1.7% of samples exceed this, confirming it captures genuine yawns without false positives. |
193
+
194
+ ![MAR distribution](plots/mar_distribution.png)
195
+
196
+ ## 10. Other Constants
197
+
198
+ | Constant | Value | Rationale |
199
+ |----------|------:|-----------|
200
+ | `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at the extreme edge. |
201
+ | `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on typical monitor-viewing cone: at 60 cm distance and a 24" monitor, the viewing angle is ~20-25 degrees. |
202
+ | `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch (tilting head doesn't mean looking away), so it's down-weighted by 50%. |
203
+ | `EMA alpha` | 0.3 | Smoothing factor for focus score. Gives ~3-4 frame effective window; balances responsiveness vs flicker. |
204
+ | `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief occlusions (e.g. hand gesture) without dropping score. |
205
+ | `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement window (Dinges & Grace, 1998). |
206
+ | `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous blink rate is 15-20/min (Bentivoglio et al., 1997). |
evaluation/feature_importance.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Feature importance and leave-one-feature-out ablation for the 10 face_orientation features.
3
+ Run: python -m evaluation.feature_importance
4
+
5
+ Outputs:
6
+ - XGBoost gain-based importance (from trained checkpoint)
7
+ - Leave-one-feature-out LOPO F1 (ablation): drop each feature in turn, report mean LOPO F1.
8
+ - Writes evaluation/feature_selection_justification.md
9
+ """
10
+
11
+ import os
12
+ import sys
13
+
14
+ import numpy as np
15
+ from sklearn.preprocessing import StandardScaler
16
+ from sklearn.metrics import f1_score
17
+ from xgboost import XGBClassifier
18
+
19
+ _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
20
+ if _PROJECT_ROOT not in sys.path:
21
+ sys.path.insert(0, _PROJECT_ROOT)
22
+
23
+ from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES
24
+
25
+ SEED = 42
26
+ FEATURES = SELECTED_FEATURES["face_orientation"]
27
+
28
+
29
+ def _resolve_xgb_path():
30
+ return os.path.join(_PROJECT_ROOT, "checkpoints", "xgboost_face_orientation_best.json")
31
+
32
+
33
+ def xgb_feature_importance():
34
+ """Load trained XGBoost and return gain-based importance for the 10 features."""
35
+ path = _resolve_xgb_path()
36
+ if not os.path.isfile(path):
37
+ print(f"[WARN] No XGBoost checkpoint at {path}; skip importance.")
38
+ return None
39
+ model = XGBClassifier()
40
+ model.load_model(path)
41
+ imp = model.get_booster().get_score(importance_type="gain")
42
+ # Booster uses f0, f1, ...; we use same order as FEATURES (training order)
43
+ by_idx = {int(k.replace("f", "")): v for k, v in imp.items() if k.startswith("f")}
44
+ order = [by_idx.get(i, 0.0) for i in range(len(FEATURES))]
45
+ return dict(zip(FEATURES, order))
46
+
47
+
48
+ def run_ablation_lopo():
49
+ """Leave-one-feature-out: for each feature, train XGBoost on the other 9 with LOPO, report mean F1."""
50
+ by_person, _, _ = load_per_person("face_orientation")
51
+ persons = sorted(by_person.keys())
52
+ n_folds = len(persons)
53
+
54
+ results = {}
55
+ for drop_feat in FEATURES:
56
+ idx_keep = [i for i, f in enumerate(FEATURES) if f != drop_feat]
57
+ f1s = []
58
+ for held_out in persons:
59
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
60
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
61
+ X_test, y_test = by_person[held_out]
62
+
63
+ X_tr = train_X[:, idx_keep]
64
+ X_te = X_test[:, idx_keep]
65
+ scaler = StandardScaler().fit(X_tr)
66
+ X_tr_sc = scaler.transform(X_tr)
67
+ X_te_sc = scaler.transform(X_te)
68
+
69
+ xgb = XGBClassifier(
70
+ n_estimators=600, max_depth=8, learning_rate=0.05,
71
+ subsample=0.8, colsample_bytree=0.8,
72
+ reg_alpha=0.1, reg_lambda=1.0,
73
+ eval_metric="logloss",
74
+ random_state=SEED, verbosity=0,
75
+ )
76
+ xgb.fit(X_tr_sc, train_y)
77
+ pred = xgb.predict(X_te_sc)
78
+ f1s.append(f1_score(y_test, pred, average="weighted"))
79
+ results[drop_feat] = np.mean(f1s)
80
+ return results
81
+
82
+
83
+ def run_baseline_lopo_f1():
84
+ """Full 10-feature LOPO mean F1 for reference."""
85
+ by_person, _, _ = load_per_person("face_orientation")
86
+ persons = sorted(by_person.keys())
87
+ f1s = []
88
+ for held_out in persons:
89
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
90
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
91
+ X_test, y_test = by_person[held_out]
92
+ scaler = StandardScaler().fit(train_X)
93
+ X_tr_sc = scaler.transform(train_X)
94
+ X_te_sc = scaler.transform(X_test)
95
+ xgb = XGBClassifier(
96
+ n_estimators=600, max_depth=8, learning_rate=0.05,
97
+ subsample=0.8, colsample_bytree=0.8,
98
+ reg_alpha=0.1, reg_lambda=1.0,
99
+ eval_metric="logloss",
100
+ random_state=SEED, verbosity=0,
101
+ )
102
+ xgb.fit(X_tr_sc, train_y)
103
+ pred = xgb.predict(X_te_sc)
104
+ f1s.append(f1_score(y_test, pred, average="weighted"))
105
+ return np.mean(f1s)
106
+
107
+
108
+ # Channel subsets for ablation (subset name -> list of feature names)
109
+ CHANNEL_SUBSETS = {
110
+ "head_pose": ["head_deviation", "s_face", "pitch"],
111
+ "eye_state": ["ear_left", "ear_avg", "ear_right", "perclos"],
112
+ "gaze": ["h_gaze", "gaze_offset", "s_eye"],
113
+ }
114
+
115
+
116
+ def run_channel_ablation():
117
+ """LOPO XGBoost with head-only, eye-only, gaze-only, and all 10. Returns dict subset_name -> mean F1."""
118
+ by_person, _, _ = load_per_person("face_orientation")
119
+ persons = sorted(by_person.keys())
120
+ results = {}
121
+ for subset_name, feat_list in CHANNEL_SUBSETS.items():
122
+ idx_keep = [FEATURES.index(f) for f in feat_list]
123
+ f1s = []
124
+ for held_out in persons:
125
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
126
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
127
+ X_test, y_test = by_person[held_out]
128
+ X_tr = train_X[:, idx_keep]
129
+ X_te = X_test[:, idx_keep]
130
+ scaler = StandardScaler().fit(X_tr)
131
+ X_tr_sc = scaler.transform(X_tr)
132
+ X_te_sc = scaler.transform(X_te)
133
+ xgb = XGBClassifier(
134
+ n_estimators=600, max_depth=8, learning_rate=0.05,
135
+ subsample=0.8, colsample_bytree=0.8,
136
+ reg_alpha=0.1, reg_lambda=1.0,
137
+ eval_metric="logloss",
138
+ random_state=SEED, verbosity=0,
139
+ )
140
+ xgb.fit(X_tr_sc, train_y)
141
+ pred = xgb.predict(X_te_sc)
142
+ f1s.append(f1_score(y_test, pred, average="weighted"))
143
+ results[subset_name] = np.mean(f1s)
144
+ baseline = run_baseline_lopo_f1()
145
+ results["all_10"] = baseline
146
+ return results
147
+
148
+
149
+ def main():
150
+ print("=== Feature importance (XGBoost gain) ===")
151
+ imp = xgb_feature_importance()
152
+ if imp:
153
+ for name in FEATURES:
154
+ print(f" {name}: {imp.get(name, 0):.2f}")
155
+ order = sorted(imp.items(), key=lambda x: -x[1])
156
+ print(" Top-5 by gain:", [x[0] for x in order[:5]])
157
+
158
+ print("\n=== Leave-one-feature-out ablation (LOPO mean F1) ===")
159
+ baseline = run_baseline_lopo_f1()
160
+ print(f" Baseline (all 10 features) mean LOPO F1: {baseline:.4f}")
161
+ ablation = run_ablation_lopo()
162
+ for feat in FEATURES:
163
+ delta = baseline - ablation[feat]
164
+ print(f" drop {feat}: F1={ablation[feat]:.4f} (Δ={delta:+.4f})")
165
+ worst_drop = min(ablation.items(), key=lambda x: x[1])
166
+ print(f" Largest F1 drop when dropping: {worst_drop[0]} (F1={worst_drop[1]:.4f})")
167
+
168
+ print("\n=== Channel ablation (LOPO mean F1) ===")
169
+ channel_f1 = run_channel_ablation()
170
+ for name, f1 in channel_f1.items():
171
+ print(f" {name}: {f1:.4f}")
172
+
173
+ out_dir = os.path.join(_PROJECT_ROOT, "evaluation")
174
+ out_path = os.path.join(out_dir, "feature_selection_justification.md")
175
+ lines = [
176
+ "# Feature selection justification",
177
+ "",
178
+ "The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.",
179
+ "",
180
+ "## 1. Domain rationale",
181
+ "",
182
+ "The 10 features were chosen to cover three channels:",
183
+ "- **Head pose:** head_deviation, s_face, pitch",
184
+ "- **Eye state:** ear_left, ear_right, ear_avg, perclos",
185
+ "- **Gaze:** h_gaze, gaze_offset, s_eye",
186
+ "",
187
+ "Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).",
188
+ "",
189
+ "## 2. XGBoost feature importance (gain)",
190
+ "",
191
+ "From the trained XGBoost checkpoint (gain on the 10 features):",
192
+ "",
193
+ "| Feature | Gain |",
194
+ "|---------|------|",
195
+ ]
196
+ if imp:
197
+ for name in FEATURES:
198
+ lines.append(f"| {name} | {imp.get(name, 0):.2f} |")
199
+ order = sorted(imp.items(), key=lambda x: -x[1])
200
+ lines.append("")
201
+ lines.append(f"**Top 5 by gain:** {', '.join(x[0] for x in order[:5])}.")
202
+ else:
203
+ lines.append("(Run with XGBoost checkpoint to populate.)")
204
+ lines.extend([
205
+ "",
206
+ "## 3. Leave-one-feature-out ablation (LOPO)",
207
+ "",
208
+ f"Baseline (all 10 features) mean LOPO F1: **{baseline:.4f}**.",
209
+ "",
210
+ "| Feature dropped | Mean LOPO F1 | Δ vs baseline |",
211
+ "|------------------|--------------|---------------|",
212
+ ])
213
+ for feat in FEATURES:
214
+ delta = baseline - ablation[feat]
215
+ lines.append(f"| {feat} | {ablation[feat]:.4f} | {delta:+.4f} |")
216
+ worst_drop = min(ablation.items(), key=lambda x: x[1])
217
+ lines.append("")
218
+ lines.append(f"Dropping **{worst_drop[0]}** hurts most (F1={worst_drop[1]:.4f}), consistent with it being important.")
219
+ lines.append("")
220
+ lines.append("## 4. Conclusion")
221
+ lines.append("")
222
+ lines.append("Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) leave-one-out ablation. SHAP or correlation-based pruning can be added in future work.")
223
+ lines.append("")
224
+ with open(out_path, "w", encoding="utf-8") as f:
225
+ f.write("\n".join(lines))
226
+ print(f"\nReport written to {out_path}")
227
+
228
+
229
+ if __name__ == "__main__":
230
+ main()
evaluation/feature_selection_justification.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Feature selection justification
2
+
3
+ The face_orientation model uses 10 of 17 extracted features. This document summarises empirical support.
4
+
5
+ ## 1. Domain rationale
6
+
7
+ The 10 features were chosen to cover three channels:
8
+ - **Head pose:** head_deviation, s_face, pitch
9
+ - **Eye state:** ear_left, ear_right, ear_avg, perclos
10
+ - **Gaze:** h_gaze, gaze_offset, s_eye
11
+
12
+ Excluded: v_gaze (noisy), mar (rare events), yaw/roll (redundant with head_deviation/s_face), blink_rate/closure_duration/yawn_duration (temporal overlap with perclos).
13
+
14
+ ## 2. XGBoost feature importance (gain)
15
+
16
+ From the trained XGBoost checkpoint (gain on the 10 features):
17
+
18
+ | Feature | Gain |
19
+ |---------|------|
20
+ | head_deviation | 8.83 |
21
+ | s_face | 10.27 |
22
+ | s_eye | 2.18 |
23
+ | h_gaze | 4.99 |
24
+ | pitch | 4.64 |
25
+ | ear_left | 3.57 |
26
+ | ear_avg | 6.96 |
27
+ | ear_right | 9.54 |
28
+ | gaze_offset | 1.80 |
29
+ | perclos | 5.68 |
30
+
31
+ **Top 5 by gain:** s_face, ear_right, head_deviation, ear_avg, perclos.
32
+
33
+ ## 3. Leave-one-feature-out ablation (LOPO)
34
+
35
+ Baseline (all 10 features) mean LOPO F1: **0.8327**.
36
+
37
+ | Feature dropped | Mean LOPO F1 | Δ vs baseline |
38
+ |------------------|--------------|---------------|
39
+ | head_deviation | 0.8395 | -0.0068 |
40
+ | s_face | 0.8390 | -0.0063 |
41
+ | s_eye | 0.8342 | -0.0015 |
42
+ | h_gaze | 0.8244 | +0.0083 |
43
+ | pitch | 0.8250 | +0.0077 |
44
+ | ear_left | 0.8326 | +0.0001 |
45
+ | ear_avg | 0.8350 | -0.0023 |
46
+ | ear_right | 0.8344 | -0.0017 |
47
+ | gaze_offset | 0.8351 | -0.0024 |
48
+ | perclos | 0.8258 | +0.0069 |
49
+
50
+ Dropping **h_gaze** hurts most (F1=0.8244), consistent with it being important.
51
+
52
+ ## 4. Conclusion
53
+
54
+ Selection is supported by (1) domain rationale (three attention channels), (2) XGBoost gain importance, and (3) leave-one-out ablation. SHAP or correlation-based pruning can be added in future work.
evaluation/justify_thresholds.py ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
2
+ # ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag
3
+
4
+ import glob
5
+ import os
6
+ import sys
7
+
8
+ import numpy as np
9
+ import matplotlib
10
+ matplotlib.use("Agg")
11
+ import matplotlib.pyplot as plt
12
+ from sklearn.neural_network import MLPClassifier
13
+ from sklearn.preprocessing import StandardScaler
14
+ from sklearn.metrics import roc_curve, roc_auc_score, f1_score
15
+ from xgboost import XGBClassifier
16
+
17
+ _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
18
+ sys.path.insert(0, _PROJECT_ROOT)
19
+
20
+ from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES
21
+
22
+ PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
23
+ REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
24
+ SEED = 42
25
+
26
+ # ClearML
27
+ # start logging with: USE_CLEARML=1 python -m evaluation.justify_thresholds or: python -m evaluation.justify_thresholds --clearml
28
+ _USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv
29
+
30
+ _task = None
31
+ _logger = None
32
+
33
+ if _USE_CLEARML:
34
+ try:
35
+ from clearml import Task
36
+ _task = Task.init(
37
+ project_name="Focus Guard",
38
+ task_name="Threshold Justification",
39
+ tags=["evaluation", "thresholds"],
40
+ )
41
+ _task.connect({"SEED": SEED, "n_participants": 9})
42
+ _logger = _task.get_logger()
43
+ print("ClearML enabled — logging to project 'Focus Guard'")
44
+ except ImportError:
45
+ print("WARNING: ClearML not installed. Continuing without logging.")
46
+ _USE_CLEARML = False
47
+
48
+ def _youdens_j(y_true, y_prob):
49
+ fpr, tpr, thresholds = roc_curve(y_true, y_prob)
50
+ j = tpr - fpr
51
+ idx = j.argmax()
52
+ auc = roc_auc_score(y_true, y_prob)
53
+ return float(thresholds[idx]), fpr, tpr, thresholds, float(auc)
54
+
55
+
56
+ def _f1_at_threshold(y_true, y_prob, threshold):
57
+ return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
58
+
59
+
60
+ def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
61
+ fig, ax = plt.subplots(figsize=(6, 5))
62
+ ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
63
+ ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
64
+ label=f"Youden's J optimum (t = {opt_thresh:.3f})")
65
+ ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5)
66
+ ax.set_xlabel("False Positive Rate")
67
+ ax.set_ylabel("True Positive Rate")
68
+ ax.set_title(title)
69
+ ax.legend(loc="lower right")
70
+ fig.tight_layout()
71
+
72
+ # Log to ClearML before closing the figure
73
+ if _logger and clearml_title:
74
+ _logger.report_matplotlib_figure(
75
+ title=clearml_title, series="ROC", figure=fig, iteration=0
76
+ )
77
+
78
+ fig.savefig(path, dpi=150)
79
+ plt.close(fig)
80
+ print(f" saved {path}")
81
+
82
+
83
+ def run_lopo_models():
84
+ print("\n=== LOPO: MLP and XGBoost ===")
85
+ by_person, _, _ = load_per_person("face_orientation")
86
+ persons = sorted(by_person.keys())
87
+
88
+ results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
89
+
90
+ for i, held_out in enumerate(persons):
91
+ X_test, y_test = by_person[held_out]
92
+
93
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
94
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
95
+
96
+ scaler = StandardScaler().fit(train_X)
97
+ X_tr_sc = scaler.transform(train_X)
98
+ X_te_sc = scaler.transform(X_test)
99
+
100
+ mlp = MLPClassifier(
101
+ hidden_layer_sizes=(64, 32), activation="relu",
102
+ max_iter=200, early_stopping=True, validation_fraction=0.15,
103
+ random_state=SEED, verbose=False,
104
+ )
105
+ mlp.fit(X_tr_sc, train_y)
106
+ mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
107
+ results["mlp"]["y"].append(y_test)
108
+ results["mlp"]["p"].append(mlp_prob)
109
+
110
+ xgb = XGBClassifier(
111
+ n_estimators=600, max_depth=8, learning_rate=0.05,
112
+ subsample=0.8, colsample_bytree=0.8,
113
+ reg_alpha=0.1, reg_lambda=1.0,
114
+ use_label_encoder=False, eval_metric="logloss",
115
+ random_state=SEED, verbosity=0,
116
+ )
117
+ xgb.fit(X_tr_sc, train_y)
118
+ xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
119
+ results["xgb"]["y"].append(y_test)
120
+ results["xgb"]["p"].append(xgb_prob)
121
+
122
+ print(f" fold {i+1}/{len(persons)}: held out {held_out} "
123
+ f"({X_test.shape[0]} samples)")
124
+
125
+ for key in results:
126
+ results[key]["y"] = np.concatenate(results[key]["y"])
127
+ results[key]["p"] = np.concatenate(results[key]["p"])
128
+
129
+ return results
130
+
131
+
132
+ def analyse_model_thresholds(results):
133
+ print("\n=== Model threshold analysis ===")
134
+ model_stats = {}
135
+
136
+ for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
137
+ y, p = results[name]["y"], results[name]["p"]
138
+ opt_t, fpr, tpr, thresholds, auc = _youdens_j(y, p)
139
+ j = tpr - fpr
140
+ opt_idx = j.argmax()
141
+ f1_opt = _f1_at_threshold(y, p, opt_t)
142
+ f1_50 = _f1_at_threshold(y, p, 0.50)
143
+
144
+ path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
145
+ _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
146
+ f"LOPO ROC — {label} (9 folds, 144k samples)", path,
147
+ clearml_title=f"ROC_{label}")
148
+
149
+ model_stats[name] = {
150
+ "label": label, "auc": auc,
151
+ "opt_threshold": opt_t, "f1_opt": f1_opt, "f1_50": f1_50,
152
+ }
153
+ print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
154
+ f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
155
+
156
+ # Log scalars to ClearML
157
+ if _logger:
158
+ _logger.report_single_value(f"{label} Optimal Threshold", opt_t)
159
+ _logger.report_single_value(f"{label} AUC", auc)
160
+ _logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
161
+ _logger.report_single_value(f"{label} F1 @ 0.5", f1_50)
162
+
163
+ return model_stats
164
+
165
+ def run_geo_weight_search():
166
+ print("\n=== Geometric weight grid search ===")
167
+
168
+ by_person, _, _ = load_per_person("face_orientation")
169
+ persons = sorted(by_person.keys())
170
+ features = SELECTED_FEATURES["face_orientation"]
171
+ sf_idx = features.index("s_face")
172
+ se_idx = features.index("s_eye")
173
+
174
+ alphas = np.arange(0.2, 0.85, 0.1).round(1)
175
+ alpha_f1 = {a: [] for a in alphas}
176
+
177
+ for held_out in persons:
178
+ X_test, y_test = by_person[held_out]
179
+ sf = X_test[:, sf_idx]
180
+ se = X_test[:, se_idx]
181
+
182
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
183
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
184
+ sf_tr = train_X[:, sf_idx]
185
+ se_tr = train_X[:, se_idx]
186
+
187
+ for a in alphas:
188
+ score_tr = a * sf_tr + (1.0 - a) * se_tr
189
+ opt_t, *_ = _youdens_j(train_y, score_tr)
190
+
191
+ score_te = a * sf + (1.0 - a) * se
192
+ f1 = _f1_at_threshold(y_test, score_te, opt_t)
193
+ alpha_f1[a].append(f1)
194
+
195
+ mean_f1 = {a: np.mean(f1s) for a, f1s in alpha_f1.items()}
196
+ best_alpha = max(mean_f1, key=mean_f1.get)
197
+
198
+ fig, ax = plt.subplots(figsize=(7, 4))
199
+ ax.bar([f"{a:.1f}" for a in alphas],
200
+ [mean_f1[a] for a in alphas], color="steelblue")
201
+ ax.set_xlabel("Face weight (alpha); eye weight = 1 - alpha")
202
+ ax.set_ylabel("Mean LOPO F1")
203
+ ax.set_title("Geometric Pipeline: Face vs Eye Weight Search")
204
+ ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
205
+ for i, a in enumerate(alphas):
206
+ ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
207
+ ha="center", va="bottom", fontsize=8)
208
+ fig.tight_layout()
209
+
210
+ # Log to ClearML before closing
211
+ if _logger:
212
+ _logger.report_matplotlib_figure(
213
+ title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
214
+ )
215
+
216
+ path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
217
+ fig.savefig(path, dpi=150)
218
+ plt.close(fig)
219
+ print(f" saved {path}")
220
+
221
+ print(f" Best alpha (face weight) = {best_alpha:.1f}, "
222
+ f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
223
+
224
+ # Log scalars to ClearML
225
+ if _logger:
226
+ _logger.report_single_value("Geo Best Alpha", best_alpha)
227
+ for i, a in enumerate(sorted(alphas)):
228
+ _logger.report_scalar(
229
+ "Geo Weight Search", "Mean LOPO F1",
230
+ iteration=i, value=mean_f1[a]
231
+ )
232
+
233
+ return dict(mean_f1), best_alpha
234
+
235
+
236
+ def run_hybrid_weight_search(lopo_results):
237
+ print("\n=== Hybrid weight grid search ===")
238
+
239
+ by_person, _, _ = load_per_person("face_orientation")
240
+ persons = sorted(by_person.keys())
241
+ features = SELECTED_FEATURES["face_orientation"]
242
+ sf_idx = features.index("s_face")
243
+ se_idx = features.index("s_eye")
244
+
245
+ GEO_FACE_W = 0.7
246
+ GEO_EYE_W = 0.3
247
+
248
+ w_mlps = np.arange(0.3, 0.85, 0.1).round(1)
249
+ wmf1 = {w: [] for w in w_mlps}
250
+ mlp_p = lopo_results["mlp"]["p"]
251
+ offset = 0
252
+ for held_out in persons:
253
+ X_test, y_test = by_person[held_out]
254
+ n = X_test.shape[0]
255
+ mlp_prob_fold = mlp_p[offset:offset + n]
256
+ offset += n
257
+
258
+ sf = X_test[:, sf_idx]
259
+ se = X_test[:, se_idx]
260
+ geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
261
+
262
+ train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
263
+ train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
264
+ sf_tr = train_X[:, sf_idx]
265
+ se_tr = train_X[:, se_idx]
266
+ geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
267
+
268
+ scaler = StandardScaler().fit(train_X)
269
+ mlp_tr = MLPClassifier(
270
+ hidden_layer_sizes=(64, 32), activation="relu",
271
+ max_iter=200, early_stopping=True, validation_fraction=0.15,
272
+ random_state=SEED, verbose=False,
273
+ )
274
+ mlp_tr.fit(scaler.transform(train_X), train_y)
275
+ mlp_prob_tr = mlp_tr.predict_proba(scaler.transform(train_X))[:, 1]
276
+
277
+ for w in w_mlps:
278
+ combo_tr = w * mlp_prob_tr + (1.0 - w) * geo_tr
279
+ opt_t, *_ = _youdens_j(train_y, combo_tr)
280
+
281
+ combo_te = w * mlp_prob_fold + (1.0 - w) * geo_score
282
+ f1 = _f1_at_threshold(y_test, combo_te, opt_t)
283
+ wmf1[w].append(f1)
284
+
285
+ mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
286
+ best_w = max(mean_f1, key=mean_f1.get)
287
+
288
+ fig, ax = plt.subplots(figsize=(7, 4))
289
+ ax.bar([f"{w:.1f}" for w in w_mlps],
290
+ [mean_f1[w] for w in w_mlps], color="darkorange")
291
+ ax.set_xlabel("MLP weight (w_mlp); geo weight = 1 - w_mlp")
292
+ ax.set_ylabel("Mean LOPO F1")
293
+ ax.set_title("Hybrid Pipeline: MLP vs Geometric Weight Search")
294
+ ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
295
+ for i, w in enumerate(w_mlps):
296
+ ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
297
+ ha="center", va="bottom", fontsize=8)
298
+ fig.tight_layout()
299
+
300
+ # Log to ClearML before closing
301
+ if _logger:
302
+ _logger.report_matplotlib_figure(
303
+ title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
304
+ )
305
+
306
+ path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
307
+ fig.savefig(path, dpi=150)
308
+ plt.close(fig)
309
+ print(f" saved {path}")
310
+
311
+ print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
312
+
313
+ # Log scalars to ClearML
314
+ if _logger:
315
+ _logger.report_single_value("Hybrid Best w_mlp", best_w)
316
+ for i, w in enumerate(sorted(w_mlps)):
317
+ _logger.report_scalar(
318
+ "Hybrid Weight Search", "Mean LOPO F1",
319
+ iteration=i, value=mean_f1[w]
320
+ )
321
+
322
+ return dict(mean_f1), best_w
323
+
324
+
325
+ def plot_distributions():
326
+ print("\n=== EAR / MAR distributions ===")
327
+ npz_files = sorted(glob.glob(os.path.join(_PROJECT_ROOT, "data", "collected_*", "*.npz")))
328
+
329
+ all_ear_l, all_ear_r, all_mar, all_labels = [], [], [], []
330
+ for f in npz_files:
331
+ d = np.load(f, allow_pickle=True)
332
+ names = list(d["feature_names"])
333
+ feat = d["features"].astype(np.float32)
334
+ lab = d["labels"].astype(np.int64)
335
+ all_ear_l.append(feat[:, names.index("ear_left")])
336
+ all_ear_r.append(feat[:, names.index("ear_right")])
337
+ all_mar.append(feat[:, names.index("mar")])
338
+ all_labels.append(lab)
339
+
340
+ ear_l = np.concatenate(all_ear_l)
341
+ ear_r = np.concatenate(all_ear_r)
342
+ mar = np.concatenate(all_mar)
343
+ labels = np.concatenate(all_labels)
344
+ ear_min = np.minimum(ear_l, ear_r)
345
+ ear_plot = np.clip(ear_min, 0, 0.85)
346
+ mar_plot = np.clip(mar, 0, 1.5)
347
+
348
+ # EAR distribution plot
349
+ fig_ear, ax = plt.subplots(figsize=(7, 4))
350
+ ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
351
+ ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
352
+ for val, lbl, c in [
353
+ (0.16, "ear_closed = 0.16", "red"),
354
+ (0.21, "EAR_BLINK = 0.21", "orange"),
355
+ (0.30, "ear_open = 0.30", "green"),
356
+ ]:
357
+ ax.axvline(val, color=c, ls="--", lw=1.5, label=lbl)
358
+ ax.set_xlabel("min(left_EAR, right_EAR)")
359
+ ax.set_ylabel("Density")
360
+ ax.set_title("EAR Distribution by Class (144k samples)")
361
+ ax.legend(fontsize=8)
362
+ fig_ear.tight_layout()
363
+
364
+ # Log to ClearML before closing
365
+ if _logger:
366
+ _logger.report_matplotlib_figure(
367
+ title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
368
+ )
369
+
370
+ path = os.path.join(PLOTS_DIR, "ear_distribution.png")
371
+ fig_ear.savefig(path, dpi=150)
372
+ plt.close(fig_ear)
373
+ print(f" saved {path}")
374
+
375
+ # MAR distribution plot
376
+ fig_mar, ax = plt.subplots(figsize=(7, 4))
377
+ ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
378
+ ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
379
+ ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
380
+ ax.set_xlabel("Mouth Aspect Ratio (MAR)")
381
+ ax.set_ylabel("Density")
382
+ ax.set_title("MAR Distribution by Class (144k samples)")
383
+ ax.legend(fontsize=8)
384
+ fig_mar.tight_layout()
385
+
386
+ # Log to ClearML before closing
387
+ if _logger:
388
+ _logger.report_matplotlib_figure(
389
+ title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
390
+ )
391
+
392
+ path = os.path.join(PLOTS_DIR, "mar_distribution.png")
393
+ fig_mar.savefig(path, dpi=150)
394
+ plt.close(fig_mar)
395
+ print(f" saved {path}")
396
+
397
+ closed_pct = np.mean(ear_min < 0.16) * 100
398
+ blink_pct = np.mean(ear_min < 0.21) * 100
399
+ open_pct = np.mean(ear_min >= 0.30) * 100
400
+ yawn_pct = np.mean(mar > 0.55) * 100
401
+
402
+ stats = {
403
+ "ear_below_016": closed_pct,
404
+ "ear_below_021": blink_pct,
405
+ "ear_above_030": open_pct,
406
+ "mar_above_055": yawn_pct,
407
+ "n_samples": len(ear_min),
408
+ }
409
+ print(f" EAR<0.16 (closed): {closed_pct:.1f}% | EAR<0.21 (blink): {blink_pct:.1f}% | "
410
+ f"EAR>=0.30 (open): {open_pct:.1f}%")
411
+ print(f" MAR>0.55 (yawn): {yawn_pct:.1f}%")
412
+ return stats
413
+
414
+
415
+ def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
416
+ lines = []
417
+ lines.append("# Threshold Justification Report")
418
+ lines.append("")
419
+ lines.append("Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation "
420
+ "over 9 participants (~145k samples).")
421
+ lines.append("")
422
+
423
+ lines.append("## 1. ML Model Decision Thresholds")
424
+ lines.append("")
425
+ lines.append("Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) "
426
+ "on pooled LOPO held-out predictions.")
427
+ lines.append("")
428
+ lines.append("| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |")
429
+ lines.append("|-------|----------|-------------------------------|--------------|-----------|")
430
+ for key in ("mlp", "xgb"):
431
+ s = model_stats[key]
432
+ lines.append(f"| {s['label']} | {s['auc']:.4f} | **{s['opt_threshold']:.3f}** | "
433
+ f"{s['f1_opt']:.4f} | {s['f1_50']:.4f} |")
434
+ lines.append("")
435
+ lines.append("![MLP ROC](plots/roc_mlp.png)")
436
+ lines.append("")
437
+ lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
438
+ lines.append("")
439
+
440
+ lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
441
+ lines.append("")
442
+ lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
443
+ "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
444
+ lines.append("")
445
+ lines.append("| Face Weight (alpha) | Mean LOPO F1 |")
446
+ lines.append("|--------------------:|-------------:|")
447
+ for a in sorted(geo_f1.keys()):
448
+ marker = " **<-- selected**" if a == best_alpha else ""
449
+ lines.append(f"| {a:.1f} | {geo_f1[a]:.4f}{marker} |")
450
+ lines.append("")
451
+ lines.append(f"**Best:** alpha = {best_alpha:.1f} (face {best_alpha*100:.0f}%, "
452
+ f"eye {(1-best_alpha)*100:.0f}%)")
453
+ lines.append("")
454
+ lines.append("![Geometric weight search](plots/geo_weight_search.png)")
455
+ lines.append("")
456
+
457
+ lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
458
+ lines.append("")
459
+ lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
460
+ "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
461
+ "If you change geometric weights, re-run this script — optimal w_mlp can shift.")
462
+ lines.append("")
463
+ lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
464
+ lines.append("|-------------------:|-------------:|")
465
+ for w in sorted(hybrid_f1.keys()):
466
+ marker = " **<-- selected**" if w == best_w else ""
467
+ lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
468
+ lines.append("")
469
+ lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
470
+ f"geometric {(1-best_w)*100:.0f}%)")
471
+ lines.append("")
472
+ lines.append("![Hybrid weight search](plots/hybrid_weight_search.png)")
473
+ lines.append("")
474
+
475
+ lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
476
+ lines.append("")
477
+ lines.append("### EAR (Eye Aspect Ratio)")
478
+ lines.append("")
479
+ lines.append("Reference: Soukupova & Cech, \"Real-Time Eye Blink Detection Using Facial "
480
+ "Landmarks\" (2016) established EAR ~ 0.2 as a blink threshold.")
481
+ lines.append("")
482
+ lines.append("Our thresholds define a linear interpolation zone around this established value:")
483
+ lines.append("")
484
+ lines.append("| Constant | Value | Justification |")
485
+ lines.append("|----------|------:|---------------|")
486
+ lines.append(f"| `ear_closed` | 0.16 | Below this, eyes are fully shut. "
487
+ f"{dist_stats['ear_below_016']:.1f}% of samples fall here. |")
488
+ lines.append(f"| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. "
489
+ f"{dist_stats['ear_below_021']:.1f}% of samples below. |")
490
+ lines.append(f"| `ear_open` | 0.30 | Above this, eyes are fully open. "
491
+ f"{dist_stats['ear_above_030']:.1f}% of samples here. |")
492
+ lines.append("")
493
+ lines.append("Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, "
494
+ "providing a smooth transition rather than a hard binary cutoff.")
495
+ lines.append("")
496
+ lines.append("![EAR distribution](plots/ear_distribution.png)")
497
+ lines.append("")
498
+ lines.append("### MAR (Mouth Aspect Ratio)")
499
+ lines.append("")
500
+ lines.append(f"| Constant | Value | Justification |")
501
+ lines.append("|----------|------:|---------------|")
502
+ lines.append(f"| `MAR_YAWN_THRESHOLD` | 0.55 | Only {dist_stats['mar_above_055']:.1f}% of "
503
+ f"samples exceed this, confirming it captures genuine yawns without false positives. |")
504
+ lines.append("")
505
+ lines.append("![MAR distribution](plots/mar_distribution.png)")
506
+ lines.append("")
507
+
508
+ lines.append("## 5. Other Constants")
509
+ lines.append("")
510
+ lines.append("| Constant | Value | Rationale |")
511
+ lines.append("|----------|------:|-----------|")
512
+ lines.append("| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score "
513
+ "drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at "
514
+ "the extreme edge. |")
515
+ lines.append("| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on "
516
+ "typical monitor-viewing cone: at 60 cm distance and a 24\" monitor, the viewing "
517
+ "angle is ~20-25 degrees. |")
518
+ lines.append("| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch "
519
+ "(tilting head doesn't mean looking away), so it's down-weighted by 50%. |")
520
+ lines.append("| `EMA alpha` | 0.3 | Smoothing factor for focus score. "
521
+ "Gives ~3-4 frame effective window; balances responsiveness vs flicker. |")
522
+ lines.append("| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief "
523
+ "occlusions (e.g. hand gesture) without dropping score. |")
524
+ lines.append("| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement "
525
+ "window (Dinges & Grace, 1998). |")
526
+ lines.append("| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous "
527
+ "blink rate is 15-20/min (Bentivoglio et al., 1997). |")
528
+ lines.append("")
529
+
530
+ with open(REPORT_PATH, "w", encoding="utf-8") as f:
531
+ f.write("\n".join(lines))
532
+ print(f"\nReport written to {REPORT_PATH}")
533
+
534
+
535
+ def main():
536
+ os.makedirs(PLOTS_DIR, exist_ok=True)
537
+
538
+ lopo_results = run_lopo_models()
539
+ model_stats = analyse_model_thresholds(lopo_results)
540
+ geo_f1, best_alpha = run_geo_weight_search()
541
+ hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
542
+ dist_stats = plot_distributions()
543
+
544
+ write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
545
+
546
+ # Close ClearML task
547
+ if _task:
548
+ _task.close()
549
+ print("ClearML task closed.")
550
+
551
+ print("\nDone.")
552
+
553
+
554
+ if __name__ == "__main__":
555
+ main()
evaluation/logs/.gitkeep ADDED
File without changes
evaluation/plots/confusion_matrix_mlp.png ADDED
evaluation/plots/confusion_matrix_xgb.png ADDED
evaluation/plots/ear_distribution.png ADDED
evaluation/plots/geo_weight_search.png ADDED
evaluation/plots/hybrid_weight_search.png ADDED