akashkolte commited on
Commit
56a15bc
Β·
1 Parent(s): 55a5eba

added affect and gesture fix

Browse files
backend/evals/multimodal_alignment.py CHANGED
@@ -40,7 +40,9 @@ _AFFECT_TARGET = {
40
  _GESTURE_OPENER_PATTERNS = {
41
  "THUMBS_UP": re.compile(r"^\s*(yes|yeah|totally|for sure|absolutely|sure)\b", re.I),
42
  "THUMBS_DOWN": re.compile(r"^\s*(no|nah|not really|i'd rather not)\b", re.I),
43
- "WAVING": re.compile(r"^\s*(hi|hey|hello)\b", re.I),
 
 
44
  }
45
 
46
 
 
40
  _GESTURE_OPENER_PATTERNS = {
41
  "THUMBS_UP": re.compile(r"^\s*(yes|yeah|totally|for sure|absolutely|sure)\b", re.I),
42
  "THUMBS_DOWN": re.compile(r"^\s*(no|nah|not really|i'd rather not)\b", re.I),
43
+ "OPEN_PALM": re.compile(r"^\s*(hi|hey|hello)\b", re.I),
44
+ "VICTORY": re.compile(r"^\s*(yes|awesome|great|fantastic|amazing|woo)\b", re.I),
45
+ "I_LOVE_YOU": re.compile(r"^\s*(love|i love|adore|care)\b", re.I),
46
  }
47
 
48
 
backend/pipeline/nodes/planner.py CHANGED
@@ -659,7 +659,7 @@ def _format_multimodal_intent(
659
  if source == "conflict_air" and air_t:
660
  return (
661
  f'\nThe user spoke "{voice_t}" but also air-wrote "{air_t}". '
662
- "The air-written token is a canonical AAC signal "
663
  "(help/stop/water/done/more) β€” prioritise it over the spoken "
664
  "words, which may have been misheard."
665
  )
 
659
  if source == "conflict_air" and air_t:
660
  return (
661
  f'\nThe user spoke "{voice_t}" but also air-wrote "{air_t}". '
662
+ "The air-writing token is a canonical AAC signal "
663
  "(help/stop/water/done/more) β€” prioritise it over the spoken "
664
  "words, which may have been misheard."
665
  )
backend/pipeline/state.py CHANGED
@@ -91,7 +91,7 @@ class PipelineState(TypedDict):
91
  affect: AffectState | None
92
  gesture_tag: str | None # e.g. "THUMBS_UP"
93
  gaze_bucket: str | None # bucket hinted by gaze fixation
94
- air_written_text: str | None # concatenated air-written chars
95
  head_signal: str | None # "HEAD_SHAKE" | "HEAD_NOD_DISSATISFIED"
96
  voice_text: str | None # raw Web Speech transcript, pre-resolution
97
  # Resolved voice⇄air-writing intent. Keys: text, source, voice_text, air_text.
 
91
  affect: AffectState | None
92
  gesture_tag: str | None # e.g. "THUMBS_UP"
93
  gaze_bucket: str | None # bucket hinted by gaze fixation
94
+ air_written_text: str | None # concatenated air-writing chars
95
  head_signal: str | None # "HEAD_SHAKE" | "HEAD_NOD_DISSATISFIED"
96
  voice_text: str | None # raw Web Speech transcript, pre-resolution
97
  # Resolved voice⇄air-writing intent. Keys: text, source, voice_text, air_text.
backend/sensing/labels.py CHANGED
@@ -7,12 +7,24 @@ GESTURE_DIRECTIVES: dict[str, dict[str, str]] = {
7
  "tone": "[GESTURE:THUMBS_DOWN][TONE:NEGATIVE]",
8
  "opener_hint": "Open by declining or disagreeing briefly.",
9
  },
10
- "POINTING": {
11
- "tone": "[GESTURE:POINTING][INTENT:REFERENTIAL]",
12
  "opener_hint": "Treat the query as referring to a specific named thing.",
13
  },
14
- "WAVING": {
15
- "tone": "[GESTURE:WAVING][INTENT:GREETING]",
16
- "opener_hint": "Open with a greeting.",
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  }
 
7
  "tone": "[GESTURE:THUMBS_DOWN][TONE:NEGATIVE]",
8
  "opener_hint": "Open by declining or disagreeing briefly.",
9
  },
10
+ "POINTING_UP": {
11
+ "tone": "[GESTURE:POINTING_UP][INTENT:REFERENTIAL]",
12
  "opener_hint": "Treat the query as referring to a specific named thing.",
13
  },
14
+ "CLOSED_FIST": {
15
+ "tone": "[GESTURE:CLOSED_FIST][TONE:EMPHATIC]",
16
+ "opener_hint": "Respond with emphasis or urgency β€” something important needs saying.",
17
+ },
18
+ "OPEN_PALM": {
19
+ "tone": "[GESTURE:OPEN_PALM][INTENT:GREETING]",
20
+ "opener_hint": "Open with a warm greeting.",
21
+ },
22
+ "VICTORY": {
23
+ "tone": "[GESTURE:VICTORY][TONE:CELEBRATORY]",
24
+ "opener_hint": "Open with celebration or excitement.",
25
+ },
26
+ "I_LOVE_YOU": {
27
+ "tone": "[GESTURE:I_LOVE_YOU][TONE:AFFECTIONATE]",
28
+ "opener_hint": "Open with warmth and affection.",
29
  },
30
  }
frontend/src/components/SensingStatus.tsx CHANGED
@@ -60,7 +60,7 @@ export function SensingStatus({ sensing, webcamActive }: Props) {
60
  )}
61
  {sensing.airWrittenText && (
62
  <div className="sensing-row">
63
- <span className="sensing-label">Air-written</span>
64
  <span className="sensing-value">{sensing.airWrittenText}</span>
65
  </div>
66
  )}
 
60
  )}
61
  {sensing.airWrittenText && (
62
  <div className="sensing-row">
63
+ <span className="sensing-label">Air-writing</span>
64
  <span className="sensing-value">{sensing.airWrittenText}</span>
65
  </div>
66
  )}
frontend/src/hooks/useSensing.ts CHANGED
@@ -1,35 +1,30 @@
1
  import { useRef, useCallback, useState, useEffect } from "react";
2
  import {
3
  FaceLandmarker,
4
- HandLandmarker,
5
  FilesetResolver,
6
  } from "@mediapipe/tasks-vision";
7
  import type { SensingState } from "../types";
8
  import {
9
- computeAffectVector,
10
  classifyAffect,
11
- classifyGesture,
12
  GazeTracker,
13
  AirWriter,
14
  HeadPoseTracker,
15
  } from "../lib/sensing";
16
  import { DEFAULT_AIR_TEMPLATES } from "../lib/airTemplates";
17
 
18
- const EMA_ALPHA = 0.2;
19
  const GESTURE_DEBOUNCE_FRAMES = 3;
20
  const AFFECT_DEBOUNCE_FRAMES = 8;
21
 
22
  export function useSensing() {
23
  const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
24
- const handLandmarkerRef = useRef<HandLandmarker | null>(null);
25
  const gazeTrackerRef = useRef(new GazeTracker());
26
  const airWriterRef = useRef(new AirWriter(DEFAULT_AIR_TEMPLATES));
27
  const headTrackerRef = useRef(new HeadPoseTracker());
28
  const calibratePendingRef = useRef(false);
29
  const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
30
- const neutralLCPRef = useRef<number | null>(null);
31
- const calibBufferRef = useRef<number[]>([]);
32
- const smoothedRef = useRef({ MAR: 0, EAR: 0.3, BRI: -0.3, LCP: 0 });
33
  const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
34
  const affectCountRef = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
35
  const initingRef = useRef(false);
@@ -49,9 +44,9 @@ export function useSensing() {
49
  useEffect(() => {
50
  return () => {
51
  faceLandmarkerRef.current?.close();
52
- handLandmarkerRef.current?.close();
53
  faceLandmarkerRef.current = null;
54
- handLandmarkerRef.current = null;
55
  };
56
  }, []);
57
 
@@ -72,16 +67,16 @@ export function useSensing() {
72
  },
73
  runningMode: "VIDEO",
74
  numFaces: 1,
75
- outputFaceBlendshapes: false,
76
  outputFacialTransformationMatrixes: false,
77
  }
78
  );
79
- handLandmarkerRef.current = await HandLandmarker.createFromOptions(
80
  vision,
81
  {
82
  baseOptions: {
83
  modelAssetPath:
84
- "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task",
85
  delegate: "GPU",
86
  },
87
  runningMode: "VIDEO",
@@ -103,8 +98,8 @@ export function useSensing() {
103
  const processFrame = useCallback(
104
  (video: HTMLVideoElement, timestamp: number) => {
105
  const faceLandmarker = faceLandmarkerRef.current;
106
- const handLandmarker = handLandmarkerRef.current;
107
- if (!faceLandmarker || !handLandmarker) return;
108
 
109
  let affect: SensingState["affect"] = null;
110
  let gazeBucket: SensingState["gazeBucket"] = null;
@@ -114,38 +109,17 @@ export function useSensing() {
114
  if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
115
  const landmarks = faceResult.faceLandmarks[0];
116
 
117
- // Average the raw LCP (vertical corner pull, pre-offset) over ~30 frames
118
- // of the user's face before locking neutral. Single-frame calibration is
119
- // too noisy and tended to bake in a momentary smile as "neutral".
120
- // During calibration, affect stays null but gaze/head/gesture still flow.
121
- if (neutralLCPRef.current === null) {
122
- const raw0 = computeAffectVector(landmarks, 0);
123
- calibBufferRef.current.push(raw0.LCP);
124
- if (calibBufferRef.current.length >= 30) {
125
- const sum = calibBufferRef.current.reduce((a, b) => a + b, 0);
126
- neutralLCPRef.current = sum / calibBufferRef.current.length;
127
- calibBufferRef.current = [];
128
- }
129
- }
130
-
131
  if (calibratePendingRef.current) {
132
  headTrackerRef.current.calibrate(landmarks);
133
  calibratePendingRef.current = false;
134
  }
135
 
136
- if (neutralLCPRef.current !== null) {
137
- const raw = computeAffectVector(landmarks, neutralLCPRef.current);
138
-
139
- const prev = smoothedRef.current;
140
- const smoothed = {
141
- MAR: EMA_ALPHA * raw.MAR + (1 - EMA_ALPHA) * prev.MAR,
142
- EAR: EMA_ALPHA * raw.EAR + (1 - EMA_ALPHA) * prev.EAR,
143
- BRI: EMA_ALPHA * raw.BRI + (1 - EMA_ALPHA) * prev.BRI,
144
- LCP: EMA_ALPHA * raw.LCP + (1 - EMA_ALPHA) * prev.LCP,
145
- };
146
- smoothedRef.current = smoothed;
147
-
148
- affect = classifyAffect(smoothed);
149
  }
150
 
151
  gazeBucket = gazeTrackerRef.current.process(landmarks);
@@ -155,10 +129,11 @@ export function useSensing() {
155
 
156
  let gestureTag: SensingState["gestureTag"] = null;
157
 
158
- const handResult = handLandmarker.detectForVideo(video, timestamp);
159
- if (handResult.landmarks && handResult.landmarks.length > 0) {
160
- const handLandmarks = handResult.landmarks[0];
161
- gestureTag = classifyGesture(handLandmarks);
 
162
  airWriterRef.current.processHandLandmarks(
163
  handLandmarks,
164
  video.videoWidth,
@@ -217,9 +192,6 @@ export function useSensing() {
217
  }, []);
218
 
219
  const resetCalibration = useCallback(() => {
220
- neutralLCPRef.current = null;
221
- calibBufferRef.current = [];
222
- smoothedRef.current = { MAR: 0, EAR: 0.3, BRI: -0.3, LCP: 0 };
223
  gestureCountRef.current = { tag: null, count: 0 };
224
  affectCountRef.current = { affect: null, count: 0 };
225
  gazeTrackerRef.current.reset();
 
1
  import { useRef, useCallback, useState, useEffect } from "react";
2
  import {
3
  FaceLandmarker,
4
+ GestureRecognizer,
5
  FilesetResolver,
6
  } from "@mediapipe/tasks-vision";
7
  import type { SensingState } from "../types";
8
  import {
 
9
  classifyAffect,
10
+ mapGestureLabel,
11
  GazeTracker,
12
  AirWriter,
13
  HeadPoseTracker,
14
  } from "../lib/sensing";
15
  import { DEFAULT_AIR_TEMPLATES } from "../lib/airTemplates";
16
 
 
17
  const GESTURE_DEBOUNCE_FRAMES = 3;
18
  const AFFECT_DEBOUNCE_FRAMES = 8;
19
 
20
  export function useSensing() {
21
  const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
22
+ const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
23
  const gazeTrackerRef = useRef(new GazeTracker());
24
  const airWriterRef = useRef(new AirWriter(DEFAULT_AIR_TEMPLATES));
25
  const headTrackerRef = useRef(new HeadPoseTracker());
26
  const calibratePendingRef = useRef(false);
27
  const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
 
 
 
28
  const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
29
  const affectCountRef = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
30
  const initingRef = useRef(false);
 
44
  useEffect(() => {
45
  return () => {
46
  faceLandmarkerRef.current?.close();
47
+ gestureRecognizerRef.current?.close();
48
  faceLandmarkerRef.current = null;
49
+ gestureRecognizerRef.current = null;
50
  };
51
  }, []);
52
 
 
67
  },
68
  runningMode: "VIDEO",
69
  numFaces: 1,
70
+ outputFaceBlendshapes: true,
71
  outputFacialTransformationMatrixes: false,
72
  }
73
  );
74
+ gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(
75
  vision,
76
  {
77
  baseOptions: {
78
  modelAssetPath:
79
+ "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task",
80
  delegate: "GPU",
81
  },
82
  runningMode: "VIDEO",
 
98
  const processFrame = useCallback(
99
  (video: HTMLVideoElement, timestamp: number) => {
100
  const faceLandmarker = faceLandmarkerRef.current;
101
+ const gestureRecognizer = gestureRecognizerRef.current;
102
+ if (!faceLandmarker || !gestureRecognizer) return;
103
 
104
  let affect: SensingState["affect"] = null;
105
  let gazeBucket: SensingState["gazeBucket"] = null;
 
109
  if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
110
  const landmarks = faceResult.faceLandmarks[0];
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  if (calibratePendingRef.current) {
113
  headTrackerRef.current.calibrate(landmarks);
114
  calibratePendingRef.current = false;
115
  }
116
 
117
+ if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
118
+ const bs: Record<string, number> = {};
119
+ for (const cat of faceResult.faceBlendshapes[0].categories) {
120
+ bs[cat.categoryName] = cat.score;
121
+ }
122
+ affect = classifyAffect(bs);
 
 
 
 
 
 
 
123
  }
124
 
125
  gazeBucket = gazeTrackerRef.current.process(landmarks);
 
129
 
130
  let gestureTag: SensingState["gestureTag"] = null;
131
 
132
+ const gestureResult = gestureRecognizer.recognizeForVideo(video, timestamp);
133
+ if (gestureResult.gestures && gestureResult.gestures.length > 0) {
134
+ const topGesture = gestureResult.gestures[0][0];
135
+ gestureTag = mapGestureLabel(topGesture.categoryName);
136
+ const handLandmarks = gestureResult.landmarks[0];
137
  airWriterRef.current.processHandLandmarks(
138
  handLandmarks,
139
  video.videoWidth,
 
192
  }, []);
193
 
194
  const resetCalibration = useCallback(() => {
 
 
 
195
  gestureCountRef.current = { tag: null, count: 0 };
196
  affectCountRef.current = { affect: null, count: 0 };
197
  gazeTrackerRef.current.reset();
frontend/src/lib/resolveIntent.ts CHANGED
@@ -2,7 +2,7 @@ import { DEFAULT_AIR_TEMPLATES } from "./airTemplates";
2
 
3
  // Canonical AAC tokens that carry high signal when someone air-writes them β€”
4
  // short, action-oriented, and hard to confuse for casual chat. When the
5
- // voice transcript and the air-written text disagree, these tokens win.
6
  const AAC_PRIORITY_TOKENS: ReadonlySet<string> = new Set(
7
  ["help", "stop", "water", "done", "more"].filter((t) =>
8
  DEFAULT_AIR_TEMPLATES.has(t)
 
2
 
3
  // Canonical AAC tokens that carry high signal when someone air-writes them β€”
4
  // short, action-oriented, and hard to confuse for casual chat. When the
5
+ // voice transcript and the air-writing text disagree, these tokens win.
6
  const AAC_PRIORITY_TOKENS: ReadonlySet<string> = new Set(
7
  ["help", "stop", "water", "done", "more"].filter((t) =>
8
  DEFAULT_AIR_TEMPLATES.has(t)
frontend/src/lib/sensing.ts CHANGED
@@ -1,132 +1,37 @@
1
  import type { Affect, GestureName, MemoryBucket } from "../types";
2
 
3
- // ── Affect classification (ported from backend/sensing/face_mesh.py) ────────
4
-
5
- interface AffectVector {
6
- MAR: number;
7
- EAR: number;
8
- BRI: number;
9
- LCP: number;
10
- }
11
-
12
- export function classifyAffect(v: AffectVector): Affect {
13
- // BRI is relative (browMid.y - eyeCenter.y) / interOcular β€” more negative = brows raised higher
14
- // LCP is vertical offset of lip corners from mouth center, normalised by inter-ocular,
15
- // relative to calibrated neutral β€” positive = corners pulled UP (smile), negative = DOWN (frown)
16
- // MAR is absolute ratio β€” higher = mouth more open
17
- // EAR is absolute ratio β€” lower = eyes more closed / squinting
18
- if (v.BRI < -0.35 && v.MAR > 0.4) return "SURPRISED";
19
- // FRUSTRATED: a clear frown, OR brows lowered + squinting β€” either signals displeasure
20
- if (v.LCP < -0.018) return "FRUSTRATED";
21
- if (v.BRI > -0.2 && v.EAR < 0.18) return "FRUSTRATED";
22
- if (v.LCP > 0.012) return "HAPPY";
23
  return "NEUTRAL";
24
  }
25
 
26
- // Face landmark indices (MediaPipe 478-point mesh)
27
- const MOUTH_TOP = 13, MOUTH_BOTTOM = 14, MOUTH_LEFT = 61, MOUTH_RIGHT = 291;
28
- const EYE_TOP = 159, EYE_BOTTOM = 145, EYE_LEFT = 33, EYE_RIGHT = 133;
29
- const BROW_LEFT = 70, BROW_RIGHT = 300;
30
- const CORNER_LEFT = 61, CORNER_RIGHT = 291;
31
-
32
- function dist(a: { x: number; y: number }, b: { x: number; y: number }): number {
33
- return Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2);
34
- }
35
-
36
- export function computeAffectVector(
37
- landmarks: { x: number; y: number }[],
38
- neutralLCP: number
39
- ): AffectVector {
40
- const MAR =
41
- dist(landmarks[MOUTH_TOP], landmarks[MOUTH_BOTTOM]) /
42
- (dist(landmarks[MOUTH_LEFT], landmarks[MOUTH_RIGHT]) + 1e-6);
43
-
44
- const EAR =
45
- dist(landmarks[EYE_TOP], landmarks[EYE_BOTTOM]) /
46
- (dist(landmarks[EYE_LEFT], landmarks[EYE_RIGHT]) + 1e-6);
47
-
48
- const eyeCenter = {
49
- x: (landmarks[EYE_LEFT].x + landmarks[EYE_RIGHT].x) / 2,
50
- y: (landmarks[EYE_LEFT].y + landmarks[EYE_RIGHT].y) / 2,
51
- };
52
- const interOcular = dist(landmarks[EYE_LEFT], landmarks[EYE_RIGHT]);
53
- const browMid = {
54
- x: (landmarks[BROW_LEFT].x + landmarks[BROW_RIGHT].x) / 2,
55
- y: (landmarks[BROW_LEFT].y + landmarks[BROW_RIGHT].y) / 2,
56
- };
57
- // MediaPipe y increases downward, so browMid.y < eyeCenter.y when brows are above eyes.
58
- // Raising brows moves them toward y=0, making this value more negative.
59
- const BRI = (browMid.y - eyeCenter.y) / (interOcular + 1e-6);
60
-
61
- // Lip-corner pull: average y of the two corners vs. mouth vertical centre,
62
- // normalised by inter-ocular distance, relative to calibrated neutral.
63
- // MediaPipe y increases downward, so corners rising above the mouth centre β†’ negative raw,
64
- // which we flip so smile = positive. Subtracting the calibrated neutral removes per-face bias.
65
- const mouthCentreY = (landmarks[MOUTH_TOP].y + landmarks[MOUTH_BOTTOM].y) / 2;
66
- const cornerAvgY = (landmarks[CORNER_LEFT].y + landmarks[CORNER_RIGHT].y) / 2;
67
- const rawLCP = (mouthCentreY - cornerAvgY) / (interOcular + 1e-6);
68
- const LCP = rawLCP - neutralLCP;
69
-
70
- return { MAR, EAR, BRI, LCP };
71
- }
72
-
73
- // ── Gesture classification (ported from backend/sensing/gesture.py) ─────────
74
-
75
- interface Point3D {
76
- x: number;
77
- y: number;
78
- z: number;
79
- }
80
-
81
- function norm3(a: Point3D): number {
82
- return Math.sqrt(a.x ** 2 + a.y ** 2 + a.z ** 2);
83
- }
84
-
85
- function sub3(a: Point3D, b: Point3D): Point3D {
86
- return { x: a.x - b.x, y: a.y - b.y, z: a.z - b.z };
87
- }
88
-
89
- function scale3(a: Point3D, s: number): Point3D {
90
- return { x: a.x * s, y: a.y * s, z: a.z * s };
91
- }
92
-
93
- export function classifyGesture(landmarks: Point3D[]): GestureName | null {
94
- const wrist = landmarks[0];
95
- const palmWidth =
96
- norm3(sub3(landmarks[5], landmarks[17])) + 1e-6;
97
-
98
- const p = landmarks.map((lm) => scale3(sub3(lm, wrist), 1 / palmWidth));
99
-
100
- const thumbTip = p[4];
101
- const indexTip = p[8];
102
- const middleTip = p[12];
103
- const ringTip = p[16];
104
- const pinkyTip = p[20];
105
- const indexMcp = p[5];
106
-
107
- const fingersCurled = [
108
- [indexTip, p[5]],
109
- [middleTip, p[9]],
110
- [ringTip, p[13]],
111
- ].every(([tip, mcp]) => norm3(tip) < norm3(mcp));
112
-
113
- // Check POINTING before THUMBS_UP β€” pointing with a raised thumb would otherwise
114
- // satisfy fingersCurled on a noisy frame and fire the wrong label first.
115
- const indexExtended = norm3(indexTip) > norm3(indexMcp) * 1.3;
116
- const othersCurled = [middleTip, ringTip, pinkyTip].every(
117
- (tip) => norm3(tip) < 0.7
118
- );
119
- if (indexExtended && othersCurled) return "POINTING";
120
-
121
- if (thumbTip.y < -0.3 && fingersCurled) return "THUMBS_UP";
122
- if (thumbTip.y > 0.3 && fingersCurled) return "THUMBS_DOWN";
123
-
124
- const allExtended = [indexTip, middleTip, ringTip, pinkyTip, thumbTip].every(
125
- (tip) => norm3(tip) > 0.7
126
- );
127
- if (allExtended) return "WAVING";
128
-
129
- return null;
130
  }
131
 
132
  // ── Gaze region mapping (ported from backend/sensing/gaze.py) ────────────────
 
1
  import type { Affect, GestureName, MemoryBucket } from "../types";
2
 
3
+ // ── Affect classification via MediaPipe blendshapes ──────────────────────────
4
+
5
+ export function classifyAffect(bs: Record<string, number>): Affect {
6
+ const smileLeft = bs["mouthSmileLeft"] ?? 0;
7
+ const smileRight = bs["mouthSmileRight"] ?? 0;
8
+ const browDownL = bs["browDownLeft"] ?? 0;
9
+ const browDownR = bs["browDownRight"] ?? 0;
10
+ const squintL = bs["eyeSquintLeft"] ?? 0;
11
+ const squintR = bs["eyeSquintRight"] ?? 0;
12
+ const jawOpen = bs["jawOpen"] ?? 0;
13
+ const browInnerUp = bs["browInnerUp"] ?? 0;
14
+
15
+ if (jawOpen > 0.4 && browInnerUp > 0.5) return "SURPRISED";
16
+ if (browDownL > 0.4 || browDownR > 0.4) return "FRUSTRATED";
17
+ if (squintL > 0.5 && squintR > 0.5) return "FRUSTRATED";
18
+ if (smileLeft > 0.5 && smileRight > 0.5) return "HAPPY";
 
 
 
 
19
  return "NEUTRAL";
20
  }
21
 
22
+ // ── Gesture label mapping from MediaPipe GestureRecognizer ───────────────────
23
+
24
+ export function mapGestureLabel(label: string): GestureName | null {
25
+ switch (label) {
26
+ case "Thumb_Up": return "THUMBS_UP";
27
+ case "Thumb_Down": return "THUMBS_DOWN";
28
+ case "Pointing_Up": return "POINTING_UP";
29
+ case "Closed_Fist": return "CLOSED_FIST";
30
+ case "Open_Palm": return "OPEN_PALM";
31
+ case "Victory": return "VICTORY";
32
+ case "ILoveYou": return "I_LOVE_YOU";
33
+ default: return null;
34
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
 
37
  // ── Gaze region mapping (ported from backend/sensing/gaze.py) ────────────────
frontend/src/types.ts CHANGED
@@ -1,5 +1,5 @@
1
  export type Affect = "HAPPY" | "FRUSTRATED" | "NEUTRAL" | "SURPRISED";
2
- export type GestureName = "THUMBS_UP" | "THUMBS_DOWN" | "POINTING" | "WAVING";
3
  export type MemoryBucket = "family" | "medical" | "hobbies" | "daily_routine" | "social";
4
  export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD_DISSATISFIED";
5
 
 
1
  export type Affect = "HAPPY" | "FRUSTRATED" | "NEUTRAL" | "SURPRISED";
2
+ export type GestureName = "THUMBS_UP" | "THUMBS_DOWN" | "POINTING_UP" | "CLOSED_FIST" | "OPEN_PALM" | "VICTORY" | "I_LOVE_YOU";
3
  export type MemoryBucket = "family" | "medical" | "hobbies" | "daily_routine" | "social";
4
  export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD_DISSATISFIED";
5