k22056537 commited on
Commit
666df9c
·
1 Parent(s): 773cbaa

chore: add template layout and stage 1 face mesh demo

Browse files
{models/eye_behaviour_model → data_preparation/eye_crops/test/closed}/.gitkeep RENAMED
File without changes
{models/face_landmarks_pretrained → data_preparation/eye_crops/test/open}/.gitkeep RENAMED
File without changes
{models/face_orientation_model → data_preparation/eye_crops/train/closed}/.gitkeep RENAMED
File without changes
data_preparation/eye_crops/train/open/.gitkeep ADDED
File without changes
data_preparation/eye_crops/val/closed/.gitkeep ADDED
File without changes
data_preparation/eye_crops/val/open/.gitkeep ADDED
File without changes
evaluation/evaluate.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
evaluation/logs/.gitkeep ADDED
File without changes
evaluation/metrics.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/attention_model/__init__.py ADDED
File without changes
models/attention_model/attention_classifier.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/attention_model/collect_features.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/attention_model/train_attention.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/attention_score_fusion/__init__.py ADDED
File without changes
models/attention_score_fusion/fusion.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/eye_behaviour/__init__.py ADDED
File without changes
models/eye_behaviour/eye_attention_model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/eye_behaviour/eye_crop.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/eye_behaviour/eye_scorer.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/face_mesh/.gitkeep ADDED
File without changes
models/face_mesh/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # face mesh (stage 1)
models/face_mesh/face_mesh.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MediaPipe FaceLandmarker — 478 landmarks (incl. iris)."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from urllib.request import urlretrieve
6
+
7
+ import cv2
8
+ import numpy as np
9
+ import mediapipe as mp
10
+ from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode
11
+ from mediapipe.tasks import python as mp_tasks
12
+
13
+ _MODEL_URL = (
14
+ "https://storage.googleapis.com/mediapipe-models/face_landmarker/"
15
+ "face_landmarker/float16/latest/face_landmarker.task"
16
+ )
17
+
18
+
19
+ def _ensure_model() -> str:
20
+ cache_dir = Path(os.environ.get(
21
+ "FOCUSGUARD_CACHE_DIR",
22
+ Path.home() / ".cache" / "focusguard",
23
+ ))
24
+ model_path = cache_dir / "face_landmarker.task"
25
+ if model_path.exists():
26
+ return str(model_path)
27
+ cache_dir.mkdir(parents=True, exist_ok=True)
28
+ print(f"[FACE_MESH] Downloading model to {model_path}...")
29
+ urlretrieve(_MODEL_URL, model_path)
30
+ print("[FACE_MESH] Download complete.")
31
+ return str(model_path)
32
+
33
+
34
+ class FaceMeshDetector:
35
+
36
+ # indices for eyes/iris (for downstream)
37
+ LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
38
+ RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
39
+ LEFT_IRIS_INDICES = [468, 469, 470, 471, 472]
40
+ RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477]
41
+
42
+ def __init__(
43
+ self,
44
+ max_num_faces: int = 1,
45
+ min_detection_confidence: float = 0.5,
46
+ min_tracking_confidence: float = 0.5,
47
+ ):
48
+ model_path = _ensure_model()
49
+ options = FaceLandmarkerOptions(
50
+ base_options=mp_tasks.BaseOptions(model_asset_path=model_path),
51
+ num_faces=max_num_faces,
52
+ min_face_detection_confidence=min_detection_confidence,
53
+ min_face_presence_confidence=min_detection_confidence,
54
+ min_tracking_confidence=min_tracking_confidence,
55
+ running_mode=RunningMode.VIDEO,
56
+ )
57
+ self._landmarker = FaceLandmarker.create_from_options(options)
58
+ self._frame_ts = 0 # ms, for video API
59
+
60
+ def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
61
+ # BGR in -> (478,3) norm x,y,z or None
62
+ rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
63
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
64
+ self._frame_ts += 33 # ~30fps
65
+ result = self._landmarker.detect_for_video(mp_image, self._frame_ts)
66
+
67
+ if not result.face_landmarks:
68
+ return None
69
+
70
+ face = result.face_landmarks[0]
71
+ return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32)
72
+
73
+ def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
74
+ # norm -> pixel (x,y)
75
+ pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
76
+ pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32)
77
+ pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32)
78
+ return pixel
79
+
80
+ def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
81
+ # norm -> pixel-scale x,y,z (z scaled by width)
82
+ pts = np.zeros_like(landmarks)
83
+ pts[:, 0] = landmarks[:, 0] * frame_w
84
+ pts[:, 1] = landmarks[:, 1] * frame_h
85
+ pts[:, 2] = landmarks[:, 2] * frame_w
86
+ return pts
87
+
88
+ def close(self):
89
+ self._landmarker.close()
90
+
91
+ def __enter__(self):
92
+ return self
93
+
94
+ def __exit__(self, *args):
95
+ self.close()
models/face_orientation/__init__.py ADDED
File without changes
models/face_orientation/head_pose.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
models/face_orientation_model/best_model.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c1f2750c7274e72538b94afcc9f0243287a5b2eb8fcce6be6e4ae18ec59cb0
3
- size 15033
 
 
 
 
models/train_eye_cnn.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # stub
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Stage 1: face mesh + test UI (no torch)
2
+ mediapipe>=0.10.14
3
+ opencv-python>=4.8.0
4
+ numpy>=1.24.0
ui/README.md CHANGED
@@ -1,3 +1,15 @@
1
  # ui
2
 
3
- Live demo and session view — structure up to the team.
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ui
2
 
3
+ Live demo and session view.
4
+
5
+ ## Stage 1 (face mesh only)
6
+
7
+ - **pipeline.py** — frame → 478 landmarks (no head pose / CNN).
8
+ - **live_demo.py** — webcam + mesh overlay (tessellation, contours, eyes, irises).
9
+
10
+ From repo root:
11
+ ```bash
12
+ pip install -r requirements.txt
13
+ python ui/live_demo.py
14
+ ```
15
+ `q` = quit, `m` = cycle mesh mode (full / contours / off).
ui/live_demo.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1 demo — webcam + face mesh overlay
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ import time
7
+
8
+ import cv2
9
+ import numpy as np
10
+ from mediapipe.tasks.python.vision import FaceLandmarksConnections
11
+
12
+ _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
13
+ if _PROJECT_ROOT not in sys.path:
14
+ sys.path.insert(0, _PROJECT_ROOT)
15
+
16
+ from ui.pipeline import FaceMeshPipeline
17
+ from models.face_mesh.face_mesh import FaceMeshDetector
18
+
19
+ # Drawing
20
+ FONT = cv2.FONT_HERSHEY_SIMPLEX
21
+ CYAN = (255, 255, 0)
22
+ GREEN = (0, 255, 0)
23
+ MAGENTA = (255, 0, 255)
24
+ ORANGE = (0, 165, 255)
25
+ RED = (0, 0, 255)
26
+ WHITE = (255, 255, 255)
27
+ YELLOW = (0, 255, 255)
28
+ LIGHT_GREEN = (144, 238, 144)
29
+
30
+ _TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
31
+ _CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
32
+ _LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
33
+ _RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
34
+ _NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
35
+ _LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
36
+ _LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
37
+ _LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
38
+ _RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
39
+
40
+ MESH_FULL = 0
41
+ MESH_CONTOURS = 1
42
+ MESH_OFF = 2
43
+ _MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]
44
+
45
+
46
+ def _lm_to_px(landmarks, idx, w, h):
47
+ return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))
48
+
49
+
50
+ def draw_tessellation(frame, landmarks, w, h):
51
+ overlay = frame.copy()
52
+ for conn in _TESSELATION:
53
+ pt1 = _lm_to_px(landmarks, conn[0], w, h)
54
+ pt2 = _lm_to_px(landmarks, conn[1], w, h)
55
+ cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
56
+ cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
57
+
58
+
59
+ def draw_contours(frame, landmarks, w, h):
60
+ for conn in _CONTOURS:
61
+ pt1 = _lm_to_px(landmarks, conn[0], w, h)
62
+ pt2 = _lm_to_px(landmarks, conn[1], w, h)
63
+ cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
64
+ for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
65
+ for i in range(len(indices) - 1):
66
+ pt1 = _lm_to_px(landmarks, indices[i], w, h)
67
+ pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
68
+ cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
69
+ for i in range(len(_NOSE_BRIDGE) - 1):
70
+ pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
71
+ pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
72
+ cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
73
+ for i in range(len(_LIPS_OUTER) - 1):
74
+ pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
75
+ pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
76
+ cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
77
+ for i in range(len(_LIPS_INNER) - 1):
78
+ pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
79
+ pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
80
+ cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)
81
+
82
+
83
+ def draw_eyes_and_irises(frame, landmarks, w, h):
84
+ left_pts = np.array(
85
+ [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
86
+ dtype=np.int32,
87
+ )
88
+ cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
89
+ right_pts = np.array(
90
+ [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
91
+ dtype=np.int32,
92
+ )
93
+ cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
94
+ for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
95
+ for idx in indices:
96
+ pt = _lm_to_px(landmarks, idx, w, h)
97
+ cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
98
+ for iris_indices, eye_inner, eye_outer in [
99
+ (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
100
+ (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
101
+ ]:
102
+ iris_pts = np.array(
103
+ [_lm_to_px(landmarks, i, w, h) for i in iris_indices],
104
+ dtype=np.int32,
105
+ )
106
+ center = iris_pts[0]
107
+ if len(iris_pts) >= 5:
108
+ radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
109
+ radius = max(int(np.mean(radii)), 2)
110
+ cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
111
+ cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
112
+ eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
113
+ eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
114
+ eye_center = (int(eye_center_x * w), int(eye_center_y * h))
115
+ dx = center[0] - eye_center[0]
116
+ dy = center[1] - eye_center[1]
117
+ gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
118
+ cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)
119
+
120
+
121
+ def main():
122
+ parser = argparse.ArgumentParser(description="FocusGuard — Face mesh (Stage 1)")
123
+ parser.add_argument("--camera", type=int, default=0, help="Camera index")
124
+ args = parser.parse_args()
125
+
126
+ print("[DEMO] Face mesh only (Stage 1)")
127
+ pipeline = FaceMeshPipeline()
128
+
129
+ cap = cv2.VideoCapture(args.camera)
130
+ if not cap.isOpened():
131
+ print("[DEMO] ERROR: Cannot open camera")
132
+ return
133
+
134
+ print("[DEMO] q = quit, m = cycle mesh mode (full / contours / off)")
135
+ prev_time = time.time()
136
+ fps = 0.0
137
+ mesh_mode = MESH_FULL
138
+
139
+ try:
140
+ while True:
141
+ ret, frame = cap.read()
142
+ if not ret:
143
+ break
144
+
145
+ result = pipeline.process_frame(frame)
146
+ now = time.time()
147
+ fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
148
+ prev_time = now
149
+
150
+ h, w = frame.shape[:2]
151
+ if result["landmarks"] is not None:
152
+ lm = result["landmarks"]
153
+ if mesh_mode == MESH_FULL:
154
+ draw_tessellation(frame, lm, w, h)
155
+ draw_contours(frame, lm, w, h)
156
+ elif mesh_mode == MESH_CONTOURS:
157
+ draw_contours(frame, lm, w, h)
158
+ draw_eyes_and_irises(frame, lm, w, h)
159
+
160
+ cv2.rectangle(frame, (0, 0), (w, 28), (0, 0, 0), -1)
161
+ cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} FPS: {fps:.0f}", (10, 20), FONT, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
162
+ cv2.putText(frame, "q:quit m:mesh", (w - 140, 20), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
163
+
164
+ cv2.imshow("FocusGuard", frame)
165
+
166
+ key = cv2.waitKey(1) & 0xFF
167
+ if key == ord("q"):
168
+ break
169
+ elif key == ord("m"):
170
+ mesh_mode = (mesh_mode + 1) % 3
171
+ print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
172
+
173
+ finally:
174
+ cap.release()
175
+ cv2.destroyAllWindows()
176
+ pipeline.close()
177
+ print("[DEMO] Done")
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()
ui/pipeline.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: face mesh only (no head pose / eye model / fusion)
2
+
3
+ import os
4
+ import sys
5
+
6
+ import numpy as np
7
+
8
+ _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9
+ if _PROJECT_ROOT not in sys.path:
10
+ sys.path.insert(0, _PROJECT_ROOT)
11
+
12
+ from models.face_mesh.face_mesh import FaceMeshDetector
13
+
14
+
15
+ class FaceMeshPipeline:
16
+ # frame -> face mesh -> 478 landmarks
17
+
18
+ def __init__(self):
19
+ self.detector = FaceMeshDetector()
20
+
21
+ def process_frame(self, bgr_frame: np.ndarray) -> dict:
22
+ landmarks = self.detector.process(bgr_frame)
23
+ return {"landmarks": landmarks}
24
+
25
+ def close(self):
26
+ self.detector.close()
27
+
28
+ def __enter__(self):
29
+ return self
30
+
31
+ def __exit__(self, *args):
32
+ self.close()