Spaces:
Sleeping
Sleeping
k22056537 commited on
Commit Β·
77ec8df
1
Parent(s): ec03d7b
feat: switch eye gaze to score-based fusion
Browse filesUpdate gaze fusion logic and related API/UI/test paths to use score-based eye gaze behavior instead of the previous mode.
- main.py +7 -14
- models/gaze_eye_fusion.py +8 -9
- src/components/FocusPageLocal.jsx +20 -20
- tests/test_gaze_pipeline.py +1 -1
main.py
CHANGED
|
@@ -317,13 +317,8 @@ def _process_frame_with_l2cs_boost(base_pipeline, frame, base_model_name):
|
|
| 317 |
base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
|
| 318 |
l2cs_score = l2cs_out.get("raw_score", 0.0)
|
| 319 |
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
fused_score = l2cs_score * 0.8
|
| 323 |
-
is_focused = False
|
| 324 |
-
else:
|
| 325 |
-
fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
|
| 326 |
-
is_focused = fused_score >= _fused_threshold
|
| 327 |
|
| 328 |
base_out["raw_score"] = fused_score
|
| 329 |
base_out["is_focused"] = is_focused
|
|
@@ -650,10 +645,11 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 650 |
# L2CS standalone: fusion fully controls focus decision
|
| 651 |
is_focused = fuse["focused"]
|
| 652 |
confidence = fuse["focus_score"]
|
| 653 |
-
elif use_boost and not
|
| 654 |
-
# Boost mode:
|
| 655 |
-
|
| 656 |
-
confidence =
|
|
|
|
| 657 |
|
| 658 |
if session_id:
|
| 659 |
metadata = {
|
|
@@ -694,9 +690,6 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 694 |
if model_name == "l2cs":
|
| 695 |
resp["focused"] = fuse["focused"]
|
| 696 |
resp["confidence"] = round(fuse["focus_score"], 3)
|
| 697 |
-
elif use_boost and not fuse["on_screen"]:
|
| 698 |
-
resp["focused"] = False
|
| 699 |
-
resp["confidence"] = min(resp["confidence"], _no_face_cap)
|
| 700 |
if has_gaze:
|
| 701 |
resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
|
| 702 |
resp["gaze_pitch"] = round(out["gaze_pitch"], 4)
|
|
|
|
| 317 |
base_score = base_out.get("mlp_prob", base_out.get("raw_score", 0.0))
|
| 318 |
l2cs_score = l2cs_out.get("raw_score", 0.0)
|
| 319 |
|
| 320 |
+
fused_score = _BOOST_BASE_W * base_score + _BOOST_L2CS_W * l2cs_score
|
| 321 |
+
is_focused = fused_score >= _fused_threshold
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
base_out["raw_score"] = fused_score
|
| 324 |
base_out["is_focused"] = is_focused
|
|
|
|
| 645 |
# L2CS standalone: fusion fully controls focus decision
|
| 646 |
is_focused = fuse["focused"]
|
| 647 |
confidence = fuse["focus_score"]
|
| 648 |
+
elif use_boost and fuse is not None:
|
| 649 |
+
# Boost mode: blend base confidence with continuous gaze score
|
| 650 |
+
gaze_focus = fuse["focus_score"]
|
| 651 |
+
confidence = 0.6 * confidence + 0.4 * gaze_focus
|
| 652 |
+
is_focused = confidence >= _fused_threshold
|
| 653 |
|
| 654 |
if session_id:
|
| 655 |
metadata = {
|
|
|
|
| 690 |
if model_name == "l2cs":
|
| 691 |
resp["focused"] = fuse["focused"]
|
| 692 |
resp["confidence"] = round(fuse["focus_score"], 3)
|
|
|
|
|
|
|
|
|
|
| 693 |
if has_gaze:
|
| 694 |
resp["gaze_yaw"] = round(out["gaze_yaw"], 4)
|
| 695 |
resp["gaze_pitch"] = round(out["gaze_pitch"], 4)
|
models/gaze_eye_fusion.py
CHANGED
|
@@ -9,6 +9,7 @@ from .eye_scorer import compute_avg_ear
|
|
| 9 |
|
| 10 |
_EAR_BLINK = 0.18
|
| 11 |
_ON_SCREEN_MARGIN = 0.15
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
_SUSTAINED_CLOSE_FRAMES = 4 # ~250ms at 15fps β ignore brief blinks
|
|
@@ -52,15 +53,13 @@ class GazeEyeFusion:
|
|
| 52 |
ear_score = min(ear / 0.30, 1.0)
|
| 53 |
self._closed_streak = 0
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
#
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
dist = math.sqrt(dx ** 2 + dy ** 2)
|
| 63 |
-
gaze_score = max(0.0, 1.0 - dist * 2.5)
|
| 64 |
|
| 65 |
# Sustained eye closure veto β ignore brief blinks (< 4 frames)
|
| 66 |
if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:
|
|
|
|
| 9 |
|
| 10 |
_EAR_BLINK = 0.18
|
| 11 |
_ON_SCREEN_MARGIN = 0.15
|
| 12 |
+
_GAZE_MAX_DIST = 0.6
|
| 13 |
|
| 14 |
|
| 15 |
_SUSTAINED_CLOSE_FRAMES = 4 # ~250ms at 15fps β ignore brief blinks
|
|
|
|
| 53 |
ear_score = min(ear / 0.30, 1.0)
|
| 54 |
self._closed_streak = 0
|
| 55 |
|
| 56 |
+
# Continuous gaze score: 1.0 at screen center, cosine falloff toward edges
|
| 57 |
+
# and beyond β no hard cliff at the screen boundary.
|
| 58 |
+
dx = max(0.0, abs(gx - 0.5) - 0.5)
|
| 59 |
+
dy = max(0.0, abs(gy - 0.5) - 0.5)
|
| 60 |
+
dist = math.sqrt(dx ** 2 + dy ** 2)
|
| 61 |
+
t = min(dist / _GAZE_MAX_DIST, 1.0)
|
| 62 |
+
gaze_score = 0.5 * (1.0 + math.cos(math.pi * t))
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Sustained eye closure veto β ignore brief blinks (< 4 frames)
|
| 65 |
if self._closed_streak >= _SUSTAINED_CLOSE_FRAMES:
|
src/components/FocusPageLocal.jsx
CHANGED
|
@@ -43,53 +43,53 @@ const MODEL_INFO = {
|
|
| 43 |
hybrid: {
|
| 44 |
label: 'Hybrid',
|
| 45 |
tagline: 'Best overall β combines ML with geometric scoring',
|
| 46 |
-
how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%).
|
| 47 |
-
accuracy: '
|
| 48 |
-
f1: '0.
|
| 49 |
-
auc: '
|
| 50 |
threshold: '0.46',
|
| 51 |
-
evaluation: '
|
| 52 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 53 |
-
strengths: 'Most robust across different people.
|
| 54 |
badge: 'Recommended',
|
| 55 |
},
|
| 56 |
xgboost: {
|
| 57 |
label: 'XGBoost',
|
| 58 |
tagline: 'Highest raw accuracy β gradient-boosted decision trees',
|
| 59 |
how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
|
| 60 |
-
accuracy: '
|
| 61 |
-
f1: '0.
|
| 62 |
-
auc: '0.
|
| 63 |
threshold: '0.38',
|
| 64 |
-
evaluation: '
|
| 65 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 66 |
-
strengths: 'Strong pattern recognition
|
| 67 |
badge: null,
|
| 68 |
},
|
| 69 |
mlp: {
|
| 70 |
label: 'MLP',
|
| 71 |
tagline: 'Lightweight neural network β fast and efficient',
|
| 72 |
how: 'Two-layer neural network (64β32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
|
| 73 |
-
accuracy: '
|
| 74 |
-
f1: '0.
|
| 75 |
-
auc: '0.
|
| 76 |
threshold: '0.23',
|
| 77 |
-
evaluation: '
|
| 78 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 79 |
-
strengths: 'Fastest inference
|
| 80 |
badge: null,
|
| 81 |
},
|
| 82 |
geometric: {
|
| 83 |
label: 'Geometric',
|
| 84 |
tagline: 'Baseline only β hardcoded thresholds, no learning',
|
| 85 |
how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training β just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
|
| 86 |
-
accuracy: '
|
| 87 |
-
f1: '0.
|
| 88 |
auc: 'N/A',
|
| 89 |
threshold: '0.55',
|
| 90 |
-
evaluation: '
|
| 91 |
features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
|
| 92 |
-
strengths: 'No model files needed. Useful
|
| 93 |
badge: 'Baseline',
|
| 94 |
},
|
| 95 |
};
|
|
|
|
| 43 |
hybrid: {
|
| 44 |
label: 'Hybrid',
|
| 45 |
tagline: 'Best overall β combines ML with geometric scoring',
|
| 46 |
+
how: 'Fuses XGBoost predictions (30%) with geometric face/eye scores (70%). Uses a weighted blend tuned with LOPO evaluation.',
|
| 47 |
+
accuracy: 'N/A',
|
| 48 |
+
f1: '0.8409',
|
| 49 |
+
auc: 'N/A',
|
| 50 |
threshold: '0.46',
|
| 51 |
+
evaluation: 'LOPO tuning (9 participants, 144K frames)',
|
| 52 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 53 |
+
strengths: 'Most robust across different people. Latest LOPO mean F1 is 0.8409 at w_mlp=0.3.',
|
| 54 |
badge: 'Recommended',
|
| 55 |
},
|
| 56 |
xgboost: {
|
| 57 |
label: 'XGBoost',
|
| 58 |
tagline: 'Highest raw accuracy β gradient-boosted decision trees',
|
| 59 |
how: 'Ensemble of 600 decision trees (max depth 8). Each tree learns to correct errors from previous trees. Outputs probability of focused state.',
|
| 60 |
+
accuracy: '95.87%',
|
| 61 |
+
f1: '0.9585',
|
| 62 |
+
auc: '0.9908',
|
| 63 |
threshold: '0.38',
|
| 64 |
+
evaluation: 'Random split test (15%) + LOPO thresholds',
|
| 65 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 66 |
+
strengths: 'Strong pattern recognition and fast inference. LOPO: AUC 0.8695, optimal threshold 0.280, F1 0.8549.',
|
| 67 |
badge: null,
|
| 68 |
},
|
| 69 |
mlp: {
|
| 70 |
label: 'MLP',
|
| 71 |
tagline: 'Lightweight neural network β fast and efficient',
|
| 72 |
how: 'Two-layer neural network (64β32 neurons). Takes 10 face features, applies learned weights, outputs focused/unfocused probability via softmax.',
|
| 73 |
+
accuracy: '92.92%',
|
| 74 |
+
f1: '0.9287',
|
| 75 |
+
auc: '0.9714',
|
| 76 |
threshold: '0.23',
|
| 77 |
+
evaluation: 'Random split test (15%) + LOPO thresholds',
|
| 78 |
features: '10 features: head deviation, face score, eye scores (EAR), gaze offset, pitch, horizontal gaze, PERCLOS',
|
| 79 |
+
strengths: 'Fastest inference and smallest model size. LOPO: AUC 0.8624, optimal threshold 0.228, F1 0.8578.',
|
| 80 |
badge: null,
|
| 81 |
},
|
| 82 |
geometric: {
|
| 83 |
label: 'Geometric',
|
| 84 |
tagline: 'Baseline only β hardcoded thresholds, no learning',
|
| 85 |
how: 'Uses fixed thresholds on head orientation (70%) and eye openness (30%). No training β just hand-tuned rules on 478 face landmarks. Cannot adapt to new faces or environments.',
|
| 86 |
+
accuracy: 'N/A',
|
| 87 |
+
f1: '0.8195',
|
| 88 |
auc: 'N/A',
|
| 89 |
threshold: '0.55',
|
| 90 |
+
evaluation: 'LOPO geometric sweep',
|
| 91 |
features: 'Head yaw/pitch/roll angles, eye aspect ratio (EAR), iris gaze offset, mouth aspect ratio (MAR)',
|
| 92 |
+
strengths: 'No model files needed. Useful fallback when model checkpoints are unavailable.',
|
| 93 |
badge: 'Baseline',
|
| 94 |
},
|
| 95 |
};
|
tests/test_gaze_pipeline.py
CHANGED
|
@@ -195,7 +195,7 @@ def test_fusion_unfocused_off_screen():
|
|
| 195 |
fusion = GazeEyeFusion(cal)
|
| 196 |
yaw = (tx - 0.5) * 0.7
|
| 197 |
pitch = (ty - 0.5) * 0.5
|
| 198 |
-
for _ in range(
|
| 199 |
result = fusion.update(yaw, pitch, lm)
|
| 200 |
|
| 201 |
status = "PASS" if not result["focused"] else "FAIL"
|
|
|
|
| 195 |
fusion = GazeEyeFusion(cal)
|
| 196 |
yaw = (tx - 0.5) * 0.7
|
| 197 |
pitch = (ty - 0.5) * 0.5
|
| 198 |
+
for _ in range(20):
|
| 199 |
result = fusion.update(yaw, pitch, lm)
|
| 200 |
|
| 201 |
status = "PASS" if not result["focused"] else "FAIL"
|