Spaces:

FocusGuard
/

final_v2

Sleeping

k22056537 commited on Mar 19

Commit

77ec8df

1 Parent(s): ec03d7b

feat: switch eye gaze to score-based fusion

Update gaze fusion logic and related API/UI/test paths to use score-based eye gaze behavior instead of the previous mode.

Files changed (4) hide show

main.py +7 -14
models/gaze_eye_fusion.py +8 -9
src/components/FocusPageLocal.jsx +20 -20
tests/test_gaze_pipeline.py +1 -1

main.py CHANGED Viewed

@@ -317,13 +317,8 @@ def _process_frame_with_l2cs_boost(base_pipeline, frame, base_model_name):
     base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
     l2cs_score = l2cs_out.get("raw_score", 0.0)
-    # veto: gaze clearly off-screen overrides base model
-    if l2cs_score < _BOOST_VETO:
-        fused_score = l2cs_score * 0.8
-        is_focused = False
-    else:
-        fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
-        is_focused = fused_score >= _fused_threshold
     base_out["raw_score"] = fused_score
     base_out["is_focused"] = is_focused
@@ -650,10 +645,11 @@ async def websocket_endpoint(websocket: WebSocket):
                             # L2CS standalone: fusion fully controls focus decision
                             is_focused = fuse["focused"]
                             confidence = fuse["focus_score"]
-                        elif use_boost and not fuse["on_screen"]:
-                            # Boost mode: if gaze is clearly off-screen, override to unfocused
-                            is_focused = False
-                            confidence = min(confidence, _no_face_cap)
                     if session_id:
                         metadata = {
@@ -694,9 +690,6 @@ async def websocket_endpoint(websocket: WebSocket):
                         if model_name == "l2cs":
                             resp["focused"] = fuse["focused"]
                             resp["confidence"] = round(fuse["focus_score"], 3)
-                        elif use_boost and not fuse["on_screen"]:
-                            resp["focused"] = False
-                            resp["confidence"] = min(resp["confidence"], _no_face_cap)
                     if has_gaze:
                         resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
                         resp["gaze_pitch"] = round(out["gaze_pitch"], 4)

     base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
     l2cs_score = l2cs_out.get("raw_score", 0.0)
+    fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
+    is_focused = fused_score >= _fused_threshold
     base_out["raw_score"] = fused_score
     base_out["is_focused"] = is_focused
                             # L2CS standalone: fusion fully controls focus decision
                             is_focused = fuse["focused"]
                             confidence = fuse["focus_score"]
+                        elif use_boost and fuse is not None:
+                            # Boost mode: blend base confidence with continuous gaze score
+                            gaze_focus = fuse["focus_score"]
+                            confidence = 0.6 * confidence + 0.4 * gaze_focus
+                            is_focused = confidence >= _fused_threshold
                     if session_id:
                         metadata = {
                         if model_name == "l2cs":
                             resp["focused"] = fuse["focused"]
                             resp["confidence"] = round(fuse["focus_score"], 3)
                     if has_gaze:
                         resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
                         resp["gaze_pitch"] = round(out["gaze_pitch"], 4)

models/gaze_eye_fusion.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .eye_scorer import compute_avg_ear
 _EAR_BLINK = 0.18
 _ON_SCREEN_MARGIN = 0.15
 _SUSTAINED_CLOSE_FRAMES = 4  # ~250ms at 15fps — ignore brief blinks
@@ -52,15 +53,13 @@ class GazeEyeFusion:
                 ear_score = min(ear / 0.30, 1.0)
                 self._closed_streak = 0
-        # Gaze score: 1.0 anywhere on screen, gentle falloff near edges,
-        # 0.0 when clearly off screen.
-        if not on_screen:
-            gaze_score = 0.0
-        else:
-            dx = max(0.0, abs(gx - 0.5) - 0.4)
-            dy = max(0.0, abs(gy - 0.5) - 0.4)
-            dist = math.sqrt(dx ** 2 + dy ** 2)
-            gaze_score = max(0.0, 1.0 - dist * 2.5)
         # Sustained eye closure veto — ignore brief blinks (< 4 frames)
         if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:

 _EAR_BLINK = 0.18
 _ON_SCREEN_MARGIN = 0.15
+_GAZE_MAX_DIST = 0.6
 _SUSTAINED_CLOSE_FRAMES = 4  # ~250ms at 15fps — ignore brief blinks
                 ear_score = min(ear / 0.30, 1.0)
                 self._closed_streak = 0
+        # Continuous gaze score: 1.0 at screen center, cosine falloff toward edges
+        # and beyond — no hard cliff at the screen boundary.
+        dx = max(0.0, abs(gx - 0.5) - 0.5)
+        dy = max(0.0, abs(gy - 0.5) - 0.5)
+        dist = math.sqrt(dx ** 2 + dy ** 2)
+        t = min(dist / _GAZE_MAX_DIST, 1.0)
+        gaze_score = 0.5 * (1.0 + math.cos(math.pi * t))
         # Sustained eye closure veto — ignore brief blinks (< 4 frames)
         if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:

src/components/FocusPageLocal.jsx CHANGED Viewed

@@ -43,53 +43,53 @@ const MODEL_INFO = {
   hybrid: {
     label: 'Hybrid',
     tagline: 'Best overall — combines ML with geometric scoring',
-    how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%). A logistic regression meta-classifier combines both signals for the final decision.',
-    accuracy: '84.3%',
-    f1: '0.864',
-    auc: '0.880',
     threshold: '0.46',
-    evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
-    strengths: 'Most robust across different people. Geometric scoring generalises well; ML catches subtle patterns.',
     badge: 'Recommended',
   },
   xgboost: {
     label: 'XGBoost',
     tagline: 'Highest raw accuracy — gradient-boosted decision trees',
     how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
-    accuracy: '84.3%',
-    f1: '0.859',
-    auc: '0.880',
     threshold: '0.38',
-    evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
-    strengths: 'Strong pattern recognition. Handles non-linear feature interactions. 95.9% accuracy on random split (but LOPO is the fairer test).',
     badge: null,
   },
   mlp: {
     label: 'MLP',
     tagline: 'Lightweight neural network — fast and efficient',
     how: 'Two-layer neural network (64→32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
-    accuracy: '82.7%',
-    f1: '0.858',
-    auc: '0.862',
     threshold: '0.23',
-    evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
-    strengths: 'Fastest inference. Smallest model size. Good baseline. 92.9% accuracy on random split.',
     badge: null,
   },
   geometric: {
     label: 'Geometric',
     tagline: 'Baseline only — hardcoded thresholds, no learning',
     how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training — just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
-    accuracy: '~77%',
-    f1: '0.772',
     auc: 'N/A',
     threshold: '0.55',
-    evaluation: 'Leave-One-Person-Out geometric sweep',
     features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
-    strengths: 'No model files needed. Useful as a fallback. This is the baseline that motivated building the ML models — its fixed thresholds struggle with different face shapes, lighting, and camera angles.',
     badge: 'Baseline',
   },
 };

   hybrid: {
     label: 'Hybrid',
     tagline: 'Best overall — combines ML with geometric scoring',
+    how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%). Uses a weighted blend tuned with LOPO evaluation.',
+    accuracy: 'N/A',
+    f1: '0.8409',
+    auc: 'N/A',
     threshold: '0.46',
+    evaluation: 'LOPO tuning (9 participants, 144K frames)',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
+    strengths: 'Most robust across different people. Latest LOPO mean F1 is 0.8409 at w_mlp=0.3.',
     badge: 'Recommended',
   },
   xgboost: {
     label: 'XGBoost',
     tagline: 'Highest raw accuracy — gradient-boosted decision trees',
     how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
+    accuracy: '95.87%',
+    f1: '0.9585',
+    auc: '0.9908',
     threshold: '0.38',
+    evaluation: 'Random split test (15%) + LOPO thresholds',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
+    strengths: 'Strong pattern recognition and fast inference. LOPO: AUC 0.8695, optimal threshold 0.280, F1 0.8549.',
     badge: null,
   },
   mlp: {
     label: 'MLP',
     tagline: 'Lightweight neural network — fast and efficient',
     how: 'Two-layer neural network (64→32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
+    accuracy: '92.92%',
+    f1: '0.9287',
+    auc: '0.9714',
     threshold: '0.23',
+    evaluation: 'Random split test (15%) + LOPO thresholds',
     features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
+    strengths: 'Fastest inference and smallest model size. LOPO: AUC 0.8624, optimal threshold 0.228, F1 0.8578.',
     badge: null,
   },
   geometric: {
     label: 'Geometric',
     tagline: 'Baseline only — hardcoded thresholds, no learning',
     how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training — just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
+    accuracy: 'N/A',
+    f1: '0.8195',
     auc: 'N/A',
     threshold: '0.55',
+    evaluation: 'LOPO geometric sweep',
     features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
+    strengths: 'No model files needed. Useful fallback when model checkpoints are unavailable.',
     badge: 'Baseline',
   },
 };

tests/test_gaze_pipeline.py CHANGED Viewed

@@ -195,7 +195,7 @@ def test_fusion_unfocused_off_screen():
         fusion = GazeEyeFusion(cal)
         yaw = (tx - 0.5) * 0.7
         pitch = (ty - 0.5) * 0.5
-        for _ in range(10):
             result = fusion.update(yaw, pitch, lm)
         status = "PASS" if not result["focused"] else "FAIL"

         fusion = GazeEyeFusion(cal)
         yaw = (tx - 0.5) * 0.7
         pitch = (ty - 0.5) * 0.5
+        for _ in range(20):
             result = fusion.update(yaw, pitch, lm)
         status = "PASS" if not result["focused"] else "FAIL"