k22056537 commited on
Commit
77ec8df
Β·
1 Parent(s): ec03d7b

feat: switch eye gaze to score-based fusion

Browse files

Update gaze fusion logic and related API/UI/test paths to use score-based eye gaze behavior instead of the previous mode.

main.py CHANGED
@@ -317,13 +317,8 @@ def _process_frame_with_l2cs_boost(base_pipeline, frame, base_model_name):
317
  base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
318
  l2cs_score = l2cs_out.get("raw_score", 0.0)
319
 
320
- # veto: gaze clearly off-screen overrides base model
321
- if l2cs_score < _BOOST_VETO:
322
- fused_score = l2cs_score * 0.8
323
- is_focused = False
324
- else:
325
- fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
326
- is_focused = fused_score >= _fused_threshold
327
 
328
  base_out["raw_score"] = fused_score
329
  base_out["is_focused"] = is_focused
@@ -650,10 +645,11 @@ async def websocket_endpoint(websocket: WebSocket):
650
  # L2CS standalone: fusion fully controls focus decision
651
  is_focused = fuse["focused"]
652
  confidence = fuse["focus_score"]
653
- elif use_boost and not fuse["on_screen"]:
654
- # Boost mode: if gaze is clearly off-screen, override to unfocused
655
- is_focused = False
656
- confidence = min(confidence, _no_face_cap)
 
657
 
658
  if session_id:
659
  metadata = {
@@ -694,9 +690,6 @@ async def websocket_endpoint(websocket: WebSocket):
694
  if model_name == "l2cs":
695
  resp["focused"] = fuse["focused"]
696
  resp["confidence"] = round(fuse["focus_score"], 3)
697
- elif use_boost and not fuse["on_screen"]:
698
- resp["focused"] = False
699
- resp["confidence"] = min(resp["confidence"], _no_face_cap)
700
  if has_gaze:
701
  resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
702
  resp["gaze_pitch"] = round(out["gaze_pitch"], 4)
 
317
  base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
318
  l2cs_score = l2cs_out.get("raw_score", 0.0)
319
 
320
+ fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
321
+ is_focused = fused_score >= _fused_threshold
 
 
 
 
 
322
 
323
  base_out["raw_score"] = fused_score
324
  base_out["is_focused"] = is_focused
 
645
  # L2CS standalone: fusion fully controls focus decision
646
  is_focused = fuse["focused"]
647
  confidence = fuse["focus_score"]
648
+ elif use_boost and fuse is not None:
649
+ # Boost mode: blend base confidence with continuous gaze score
650
+ gaze_focus = fuse["focus_score"]
651
+ confidence = 0.6 * confidence + 0.4 * gaze_focus
652
+ is_focused = confidence >= _fused_threshold
653
 
654
  if session_id:
655
  metadata = {
 
690
  if model_name == "l2cs":
691
  resp["focused"] = fuse["focused"]
692
  resp["confidence"] = round(fuse["focus_score"], 3)
 
 
 
693
  if has_gaze:
694
  resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
695
  resp["gaze_pitch"] = round(out["gaze_pitch"], 4)
models/gaze_eye_fusion.py CHANGED
@@ -9,6 +9,7 @@ from .eye_scorer import compute_avg_ear
9
 
10
  _EAR_BLINK = 0.18
11
  _ON_SCREEN_MARGIN = 0.15
 
12
 
13
 
14
  _SUSTAINED_CLOSE_FRAMES = 4 # ~250ms at 15fps β€” ignore brief blinks
@@ -52,15 +53,13 @@ class GazeEyeFusion:
52
  ear_score = min(ear / 0.30, 1.0)
53
  self._closed_streak = 0
54
 
55
- # Gaze score: 1.0 anywhere on screen, gentle falloff near edges,
56
- # 0.0 when clearly off screen.
57
- if not on_screen:
58
- gaze_score = 0.0
59
- else:
60
- dx = max(0.0, abs(gx - 0.5) - 0.4)
61
- dy = max(0.0, abs(gy - 0.5) - 0.4)
62
- dist = math.sqrt(dx ** 2 + dy ** 2)
63
- gaze_score = max(0.0, 1.0 - dist * 2.5)
64
 
65
  # Sustained eye closure veto β€” ignore brief blinks (< 4 frames)
66
  if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:
 
9
 
10
  _EAR_BLINK = 0.18
11
  _ON_SCREEN_MARGIN = 0.15
12
+ _GAZE_MAX_DIST = 0.6
13
 
14
 
15
  _SUSTAINED_CLOSE_FRAMES = 4 # ~250ms at 15fps β€” ignore brief blinks
 
53
  ear_score = min(ear / 0.30, 1.0)
54
  self._closed_streak = 0
55
 
56
+ # Continuous gaze score: 1.0 at screen center, cosine falloff toward edges
57
+ # and beyond β€” no hard cliff at the screen boundary.
58
+ dx = max(0.0, abs(gx - 0.5) - 0.5)
59
+ dy = max(0.0, abs(gy - 0.5) - 0.5)
60
+ dist = math.sqrt(dx ** 2 + dy ** 2)
61
+ t = min(dist / _GAZE_MAX_DIST, 1.0)
62
+ gaze_score = 0.5 * (1.0 + math.cos(math.pi * t))
 
 
63
 
64
  # Sustained eye closure veto β€” ignore brief blinks (< 4 frames)
65
  if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:
src/components/FocusPageLocal.jsx CHANGED
@@ -43,53 +43,53 @@ const MODEL_INFO = {
43
  hybrid: {
44
  label: 'Hybrid',
45
  tagline: 'Best overall β€” combines ML with geometric scoring',
46
- how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%). A logistic regression meta-classifier combines both signals for the final decision.',
47
- accuracy: '84.3%',
48
- f1: '0.864',
49
- auc: '0.880',
50
  threshold: '0.46',
51
- evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
52
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
53
- strengths: 'Most robust across different people. Geometric scoring generalises well; ML catches subtle patterns.',
54
  badge: 'Recommended',
55
  },
56
  xgboost: {
57
  label: 'XGBoost',
58
  tagline: 'Highest raw accuracy β€” gradient-boosted decision trees',
59
  how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
60
- accuracy: '84.3%',
61
- f1: '0.859',
62
- auc: '0.880',
63
  threshold: '0.38',
64
- evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
65
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
66
- strengths: 'Strong pattern recognition. Handles non-linear feature interactions. 95.9% accuracy on random split (but LOPO is the fairer test).',
67
  badge: null,
68
  },
69
  mlp: {
70
  label: 'MLP',
71
  tagline: 'Lightweight neural network β€” fast and efficient',
72
  how: 'Two-layer neural network (64β†’32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
73
- accuracy: '82.7%',
74
- f1: '0.858',
75
- auc: '0.862',
76
  threshold: '0.23',
77
- evaluation: 'Leave-One-Person-Out (9 participants, 144K frames)',
78
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
79
- strengths: 'Fastest inference. Smallest model size. Good baseline. 92.9% accuracy on random split.',
80
  badge: null,
81
  },
82
  geometric: {
83
  label: 'Geometric',
84
  tagline: 'Baseline only β€” hardcoded thresholds, no learning',
85
  how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training β€” just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
86
- accuracy: '~77%',
87
- f1: '0.772',
88
  auc: 'N/A',
89
  threshold: '0.55',
90
- evaluation: 'Leave-One-Person-Out geometric sweep',
91
  features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
92
- strengths: 'No model files needed. Useful as a fallback. This is the baseline that motivated building the ML models β€” its fixed thresholds struggle with different face shapes, lighting, and camera angles.',
93
  badge: 'Baseline',
94
  },
95
  };
 
43
  hybrid: {
44
  label: 'Hybrid',
45
  tagline: 'Best overall β€” combines ML with geometric scoring',
46
+ how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%). Uses a weighted blend tuned with LOPO evaluation.',
47
+ accuracy: 'N/A',
48
+ f1: '0.8409',
49
+ auc: 'N/A',
50
  threshold: '0.46',
51
+ evaluation: 'LOPO tuning (9 participants, 144K frames)',
52
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
53
+ strengths: 'Most robust across different people. Latest LOPO mean F1 is 0.8409 at w_mlp=0.3.',
54
  badge: 'Recommended',
55
  },
56
  xgboost: {
57
  label: 'XGBoost',
58
  tagline: 'Highest raw accuracy β€” gradient-boosted decision trees',
59
  how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
60
+ accuracy: '95.87%',
61
+ f1: '0.9585',
62
+ auc: '0.9908',
63
  threshold: '0.38',
64
+ evaluation: 'Random split test (15%) + LOPO thresholds',
65
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
66
+ strengths: 'Strong pattern recognition and fast inference. LOPO: AUC 0.8695, optimal threshold 0.280, F1 0.8549.',
67
  badge: null,
68
  },
69
  mlp: {
70
  label: 'MLP',
71
  tagline: 'Lightweight neural network β€” fast and efficient',
72
  how: 'Two-layer neural network (64β†’32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
73
+ accuracy: '92.92%',
74
+ f1: '0.9287',
75
+ auc: '0.9714',
76
  threshold: '0.23',
77
+ evaluation: 'Random split test (15%) + LOPO thresholds',
78
  features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
79
+ strengths: 'Fastest inference and smallest model size. LOPO: AUC 0.8624, optimal threshold 0.228, F1 0.8578.',
80
  badge: null,
81
  },
82
  geometric: {
83
  label: 'Geometric',
84
  tagline: 'Baseline only β€” hardcoded thresholds, no learning',
85
  how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training β€” just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
86
+ accuracy: 'N/A',
87
+ f1: '0.8195',
88
  auc: 'N/A',
89
  threshold: '0.55',
90
+ evaluation: 'LOPO geometric sweep',
91
  features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
92
+ strengths: 'No model files needed. Useful fallback when model checkpoints are unavailable.',
93
  badge: 'Baseline',
94
  },
95
  };
tests/test_gaze_pipeline.py CHANGED
@@ -195,7 +195,7 @@ def test_fusion_unfocused_off_screen():
195
  fusion = GazeEyeFusion(cal)
196
  yaw = (tx - 0.5) * 0.7
197
  pitch = (ty - 0.5) * 0.5
198
- for _ in range(10):
199
  result = fusion.update(yaw, pitch, lm)
200
 
201
  status = "PASS" if not result["focused"] else "FAIL"
 
195
  fusion = GazeEyeFusion(cal)
196
  yaw = (tx - 0.5) * 0.7
197
  pitch = (ty - 0.5) * 0.5
198
+ for _ in range(20):
199
  result = fusion.update(yaw, pitch, lm)
200
 
201
  status = "PASS" if not result["focused"] else "FAIL"