Spaces:

ub-aac-chatbot
/

aac-chatbot

Sleeping

App Files Files Community

akashkolte commited on 16 days ago

Commit

cadc44f

unverified ·

2 Parent(s): 34b30e1 948a968

Merge pull request #6 from akashkolte/akash/m3-changes

Browse files

Files changed (5) hide show

frontend/src/App.tsx +1 -27
frontend/src/hooks/useSensing.ts +79 -175
frontend/src/hooks/useWebcam.ts +1 -1
frontend/src/lib/sensing.ts +100 -266
frontend/src/types.ts +1 -37

frontend/src/App.tsx CHANGED Viewed

@@ -8,7 +8,6 @@ import { ChatPanel } from "./components/ChatPanel";
 import { WebcamSensing } from "./components/WebcamSensing";
 import { SensingStatus } from "./components/SensingStatus";
 import { LatencyMetrics } from "./components/LatencyMetrics";
-import { CalibrationOverlay } from "./components/CalibrationOverlay";
 import "./App.css";
 function App() {
@@ -37,13 +36,8 @@ function App() {
     sensing,
     ready,
     initError,
-    isCalibrating,
-    isCalibrated,
-    calibrationProgress,
     init,
     processFrame,
-    startCalibration,
-    cancelCalibration,
     clearAirWrittenText,
     clearHeadSignal,
     resetCalibration,
@@ -61,22 +55,12 @@ function App() {
     onFrame,
   });
-  const autoCalibratedRef = useRef(false);
-  useEffect(() => {
-    if (active && ready && !autoCalibratedRef.current) {
-      autoCalibratedRef.current = true;
-      startCalibration();
-    }
-  }, [active, ready, startCalibration]);
   async function handleWebcamToggle() {
     if (!webcamEnabled) {
       const ok = await init();
       if (ok) setWebcamEnabled(true);
     } else {
       setWebcamEnabled(false);
-      autoCalibratedRef.current = false;
       resetCalibration();
     }
   }
@@ -115,12 +99,7 @@ function App() {
             Enable webcam
           </label>
           <WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
-          <SensingStatus
-            sensing={sensing}
-            webcamActive={active}
-            calibrated={isCalibrated}
-            onRecalibrate={active ? startCalibration : undefined}
-          />
         </div>
         <div className="sidebar-section">
@@ -160,11 +139,6 @@ function App() {
         />
       </main>
-      <CalibrationOverlay
-        active={isCalibrating}
-        progress={calibrationProgress}
-        onCancel={cancelCalibration}
-      />
     </div>
   );
 }

 import { WebcamSensing } from "./components/WebcamSensing";
 import { SensingStatus } from "./components/SensingStatus";
 import { LatencyMetrics } from "./components/LatencyMetrics";
 import "./App.css";
 function App() {
     sensing,
     ready,
     initError,
     init,
     processFrame,
     clearAirWrittenText,
     clearHeadSignal,
     resetCalibration,
     onFrame,
   });
   async function handleWebcamToggle() {
     if (!webcamEnabled) {
       const ok = await init();
       if (ok) setWebcamEnabled(true);
     } else {
       setWebcamEnabled(false);
       resetCalibration();
     }
   }
             Enable webcam
           </label>
           <WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
+          <SensingStatus sensing={sensing} webcamActive={active} />
         </div>
         <div className="sidebar-section">
         />
       </main>
     </div>
   );
 }

frontend/src/hooks/useSensing.ts CHANGED Viewed

@@ -11,39 +11,30 @@ import {
   GazeTracker,
   AirWriter,
   HeadPoseTracker,
-  Calibrator,
-  worldGazeXY,
-  extractAngles,
-  faceBboxSize,
 } from "../lib/sensing";
 import { recognizeInkStroke } from "../lib/inkRecognizer";
-const GESTURE_DEBOUNCE_MS = 100;
-const AFFECT_DEBOUNCE_MS  = 270;
-const AIRWRITING_ENABLED  = import.meta.env.VITE_AIRWRITING_ENABLED !== "false";
-const GAZE_ENABLED        = import.meta.env.VITE_GAZE_ENABLED !== "false";
-const CALIBRATION_ENABLED = import.meta.env.VITE_CALIBRATION_ENABLED !== "false";
 export function useSensing() {
-  const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
   const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
-  const calibratorRef = useRef(new Calibrator());
-  const gazeTrackerRef = useRef(new GazeTracker());
-  const airWriterRef = useRef(new AirWriter());
-  const inkBusyRef = useRef(false);
-  const headTrackerRef = useRef(new HeadPoseTracker());
-  const headDebugRef = useRef({ pitch: 0, yaw: 0, roll: 0, crossings: 0 });
-  const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; since: number }>({ tag: null, since: 0 });
-  const affectCountRef = useRef<{ affect: SensingState["affect"]; since: number }>({ affect: null, since: 0 });
-  const initingRef = useRef(false);
-  const [ready, setReady] = useState(false);
   const [initError, setInitError] = useState<string | null>(null);
-  const [isCalibrating, setIsCalibrating] = useState(false);
-  const [isCalibrated, setIsCalibrated] = useState(false);
-  const [calibrationProgress, setCalibrationProgress] = useState(0);
-  const [sensing, setSensing] = useState<SensingState>({
     affect: null,
     gestureTag: null,
     gazeZone: null,
@@ -51,6 +42,7 @@ export function useSensing() {
     airWrittenText: "",
     airWritingActive: false,
     headSignal: null,
     headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
   });
@@ -58,7 +50,7 @@ export function useSensing() {
     return () => {
       faceLandmarkerRef.current?.close();
       gestureRecognizerRef.current?.close();
-      faceLandmarkerRef.current = null;
       gestureRecognizerRef.current = null;
     };
   }, []);
@@ -70,128 +62,74 @@ export function useSensing() {
       const vision = await FilesetResolver.forVisionTasks(
         "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
       );
-      faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(
-        vision,
-        {
-          baseOptions: {
-            modelAssetPath:
-              "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task",
-            delegate: "GPU",
-          },
-          runningMode: "VIDEO",
-          numFaces: 1,
-          outputFaceBlendshapes: true,
-          outputFacialTransformationMatrixes: true,
-        }
-      );
-      gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(
-        vision,
-        {
-          baseOptions: {
-            modelAssetPath:
-              "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task",
-            delegate: "GPU",
-          },
-          runningMode: "VIDEO",
-          numHands: 1,
-        }
-      );
       setReady(true);
       return true;
     } catch (e) {
-      setInitError(
-        e instanceof Error ? e.message : "Failed to load MediaPipe models"
-      );
       return false;
     } finally {
       initingRef.current = false;
     }
   }, []);
-  const startCalibration = useCallback(() => {
-    if (!CALIBRATION_ENABLED) {
-      setIsCalibrated(true);
-      return;
-    }
-    calibratorRef.current.start();
-    setIsCalibrating(true);
-    setIsCalibrated(false);
-    setCalibrationProgress(0);
-    // Reset the per-detector state so post-calibration baselines aren't
-    // mixed with stale pre-calibration history.
-    gazeTrackerRef.current.reset();
-    headTrackerRef.current.reset();
-    gestureCountRef.current = { tag: null, since: 0 };
-    affectCountRef.current = { affect: null, since: 0 };
-  }, []);
-  const cancelCalibration = useCallback(() => {
-    calibratorRef.current.cancel();
-    setIsCalibrating(false);
-    setIsCalibrated(false);
-    setCalibrationProgress(0);
-  }, []);
   const processFrame = useCallback(
     (video: HTMLVideoElement, timestamp: number) => {
-      const faceLandmarker = faceLandmarkerRef.current;
       const gestureRecognizer = gestureRecognizerRef.current;
       if (!faceLandmarker || !gestureRecognizer) return;
-      const calibrator = calibratorRef.current;
-      const calibrating = calibrator.isActive;
-      const baseline = calibrator.getBaseline();
-      let affect: SensingState["affect"] = null;
       let gazeBucket: SensingState["gazeBucket"] = null;
       let headSignal: SensingState["headSignal"] = null;
       const faceResult = faceLandmarker.detectForVideo(video, timestamp);
       if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
         const matrix = faceResult.facialTransformationMatrixes?.[0] ?? null;
-        const landmarks = faceResult.faceLandmarks[0];
         const bs: Record<string, number> = {};
         if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
           for (const cat of faceResult.faceBlendshapes[0].categories) {
             bs[cat.categoryName] = cat.score;
           }
         }
-        if (calibrating) {
-          calibrator.addSample({
-            blendshapes: bs,
-            gaze: matrix ? worldGazeXY(matrix, bs) : null,
-            head: matrix ? extractAngles(matrix.data) : null,
-            faceBboxSize: faceBboxSize(landmarks),
-          });
-          setCalibrationProgress(Math.round(calibrator.progress * 100) / 100);
-          if (calibrator.isReady) {
-            setIsCalibrating(false);
-            setIsCalibrated(true);
-            setCalibrationProgress(1);
-          }
-          return;
-        }
-        affect = classifyAffect(bs, baseline);
         if (GAZE_ENABLED) {
-          gazeBucket = gazeTrackerRef.current.process(matrix, bs, baseline);
         }
         if (matrix) {
-          headSignal = headTrackerRef.current.process(matrix, baseline);
           headDebugRef.current = headTrackerRef.current.debug;
         }
-      } else if (calibrating) {
-        setCalibrationProgress(Math.round(calibrator.progress * 100) / 100);
-        return;
       }
       let gestureTag: SensingState["gestureTag"] = null;
-      const gestureResult = gestureRecognizer.recognizeForVideo(video, timestamp);
       if (gestureResult.gestures && gestureResult.gestures.length > 0) {
         const topGesture = gestureResult.gestures[0][0];
         gestureTag = mapGestureLabel(topGesture.categoryName);
@@ -214,67 +152,41 @@ export function useSensing() {
           recognizeInkStroke(completedStroke).then((text) => {
             inkBusyRef.current = false;
             if (text) {
-              setSensing((prev) => ({
-                ...prev,
-                airWrittenText: prev.airWrittenText + text,
-              }));
             }
           });
         }
       }
-      const now = performance.now();
-      if (gestureTag !== gestureCountRef.current.tag) {
-        gestureCountRef.current = { tag: gestureTag, since: now };
       }
-      const stableGesture =
-        now - gestureCountRef.current.since >= GESTURE_DEBOUNCE_MS
-          ? gestureTag
-          : null;
-      if (affect !== affectCountRef.current.affect) {
-        affectCountRef.current = { affect, since: now };
       }
-      const stableAffect =
-        now - affectCountRef.current.since >= AFFECT_DEBOUNCE_MS
-          ? affect
-          : null;
-      const activeZone = GAZE_ENABLED ? gazeTrackerRef.current.activeZone : null;
-      const airWritingActive = airWriterRef.current.strokeActive;
-      const headDebug = headDebugRef.current;
-      setSensing((prev) => {
-        const nextAffect = stableAffect ?? prev.affect;
-        const nextGazeBucket = gazeBucket ?? prev.gazeBucket;
-        const nextHeadSignal = headSignal ?? prev.headSignal;
-        const debugChanged =
-          headDebug.pitch !== prev.headDebug.pitch ||
-          headDebug.yaw !== prev.headDebug.yaw ||
-          headDebug.roll !== prev.headDebug.roll ||
-          headDebug.crossings !== prev.headDebug.crossings;
-        if (
-          !debugChanged &&
-          activeZone === prev.gazeZone &&
-          nextAffect === prev.affect &&
-          stableGesture === prev.gestureTag &&
-          nextGazeBucket === prev.gazeBucket &&
-          nextHeadSignal === prev.headSignal &&
-          airWritingActive === prev.airWritingActive
-        ) {
-          return prev;
-        }
-        return {
-          ...prev,
-          affect: nextAffect,
-          gestureTag: stableGesture,
-          gazeZone: activeZone,
-          gazeBucket: nextGazeBucket,
-          airWritingActive,
-          headSignal: nextHeadSignal,
-          headDebug: debugChanged ? headDebug : prev.headDebug,
-        };
-      });
     },
     []
   );
@@ -288,14 +200,10 @@ export function useSensing() {
   }, []);
   const resetCalibration = useCallback(() => {
-    gestureCountRef.current = { tag: null, since: 0 };
-    affectCountRef.current = { affect: null, since: 0 };
     gazeTrackerRef.current.reset();
     headTrackerRef.current.reset();
-    calibratorRef.current.cancel();
-    setIsCalibrating(false);
-    setIsCalibrated(false);
-    setCalibrationProgress(0);
     setSensing({
       affect: null,
       gestureTag: null,
@@ -304,6 +212,7 @@ export function useSensing() {
       airWrittenText: "",
       airWritingActive: false,
       headSignal: null,
       headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
     });
   }, []);
@@ -312,13 +221,8 @@ export function useSensing() {
     sensing,
     ready,
     initError,
-    isCalibrating,
-    isCalibrated,
-    calibrationProgress,
     init,
     processFrame,
-    startCalibration,
-    cancelCalibration,
     clearAirWrittenText,
     clearHeadSignal,
     resetCalibration,

   GazeTracker,
   AirWriter,
   HeadPoseTracker,
 } from "../lib/sensing";
 import { recognizeInkStroke } from "../lib/inkRecognizer";
+const GESTURE_DEBOUNCE_FRAMES = 3;
+const AFFECT_DEBOUNCE_FRAMES  = 8;
+const AIRWRITING_ENABLED = import.meta.env.VITE_AIRWRITING_ENABLED !== "false";
+const GAZE_ENABLED       = import.meta.env.VITE_GAZE_ENABLED !== "false";
 export function useSensing() {
+  const faceLandmarkerRef    = useRef<FaceLandmarker | null>(null);
   const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
+  const gazeTrackerRef  = useRef(new GazeTracker());
+  const airWriterRef    = useRef(new AirWriter());
+  const inkBusyRef      = useRef(false);
+  const headTrackerRef  = useRef(new HeadPoseTracker());
+  const headDebugRef    = useRef({ pitch: 0, yaw: 0, roll: 0, crossings: 0 });
+  const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
+  const affectCountRef  = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
+  const initingRef      = useRef(false);
+  const [ready, setReady]       = useState(false);
   const [initError, setInitError] = useState<string | null>(null);
+  const [sensing, setSensing]   = useState<SensingState>({
     affect: null,
     gestureTag: null,
     gazeZone: null,
     airWrittenText: "",
     airWritingActive: false,
     headSignal: null,
+    headCalibrated: false,
     headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
   });
     return () => {
       faceLandmarkerRef.current?.close();
       gestureRecognizerRef.current?.close();
+      faceLandmarkerRef.current    = null;
       gestureRecognizerRef.current = null;
     };
   }, []);
       const vision = await FilesetResolver.forVisionTasks(
         "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
       );
+      faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(vision, {
+        baseOptions: {
+          modelAssetPath:
+            "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task",
+          delegate: "GPU",
+        },
+        runningMode: "VIDEO",
+        numFaces: 1,
+        outputFaceBlendshapes: true,
+        outputFacialTransformationMatrixes: true,
+      });
+      gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(vision, {
+        baseOptions: {
+          modelAssetPath:
+            "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task",
+          delegate: "GPU",
+        },
+        runningMode: "VIDEO",
+        numHands: 1,
+      });
       setReady(true);
       return true;
     } catch (e) {
+      setInitError(e instanceof Error ? e.message : "Failed to load MediaPipe models");
       return false;
     } finally {
       initingRef.current = false;
     }
   }, []);
   const processFrame = useCallback(
     (video: HTMLVideoElement, timestamp: number) => {
+      const faceLandmarker    = faceLandmarkerRef.current;
       const gestureRecognizer = gestureRecognizerRef.current;
       if (!faceLandmarker || !gestureRecognizer) return;
+      let affect: SensingState["affect"]       = null;
       let gazeBucket: SensingState["gazeBucket"] = null;
       let headSignal: SensingState["headSignal"] = null;
       const faceResult = faceLandmarker.detectForVideo(video, timestamp);
       if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
         const matrix = faceResult.facialTransformationMatrixes?.[0] ?? null;
         const bs: Record<string, number> = {};
         if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
           for (const cat of faceResult.faceBlendshapes[0].categories) {
             bs[cat.categoryName] = cat.score;
           }
+          affect = classifyAffect(bs);
         }
         if (GAZE_ENABLED) {
+          gazeBucket = gazeTrackerRef.current.process(matrix, bs);
         }
         if (matrix) {
+          headSignal = headTrackerRef.current.process(matrix);
           headDebugRef.current = headTrackerRef.current.debug;
         }
       }
+      // Always call recognizeForVideo every frame — VIDEO-mode models maintain
+      // internal temporal state and produce stale results if frames are skipped.
+      const gestureResult = gestureRecognizer.recognizeForVideo(video, timestamp);
       let gestureTag: SensingState["gestureTag"] = null;
       if (gestureResult.gestures && gestureResult.gestures.length > 0) {
         const topGesture = gestureResult.gestures[0][0];
         gestureTag = mapGestureLabel(topGesture.categoryName);
           recognizeInkStroke(completedStroke).then((text) => {
             inkBusyRef.current = false;
             if (text) {
+              setSensing((prev) => ({ ...prev, airWrittenText: prev.airWrittenText + text }));
             }
           });
         }
       }
+      if (gestureTag === gestureCountRef.current.tag) {
+        gestureCountRef.current.count++;
+      } else {
+        gestureCountRef.current = { tag: gestureTag, count: 1 };
       }
+      const stableGesture = gestureCountRef.current.count >= GESTURE_DEBOUNCE_FRAMES
+        ? gestureTag
+        : null;
+      if (affect === affectCountRef.current.affect) {
+        affectCountRef.current.count++;
+      } else {
+        affectCountRef.current = { affect, count: 1 };
       }
+      const stableAffect = affectCountRef.current.count >= AFFECT_DEBOUNCE_FRAMES
+        ? affect
+        : null;
+      setSensing((prev) => ({
+        affect: stableAffect ?? prev.affect,
+        gestureTag: stableGesture,
+        gazeZone: GAZE_ENABLED ? gazeTrackerRef.current.activeZone : null,
+        gazeBucket: gazeBucket ?? prev.gazeBucket,
+        airWrittenText: prev.airWrittenText,
+        airWritingActive: airWriterRef.current.strokeActive,
+        headSignal: headSignal ?? prev.headSignal,
+        headCalibrated: headTrackerRef.current.calibrated,
+        headDebug: headDebugRef.current,
+      }));
     },
     []
   );
   }, []);
   const resetCalibration = useCallback(() => {
+    gestureCountRef.current = { tag: null, count: 0 };
+    affectCountRef.current  = { affect: null, count: 0 };
     gazeTrackerRef.current.reset();
     headTrackerRef.current.reset();
     setSensing({
       affect: null,
       gestureTag: null,
       airWrittenText: "",
       airWritingActive: false,
       headSignal: null,
+      headCalibrated: false,
       headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
     });
   }, []);
     sensing,
     ready,
     initError,
     init,
     processFrame,
     clearAirWrittenText,
     clearHeadSignal,
     resetCalibration,

frontend/src/hooks/useWebcam.ts CHANGED Viewed

@@ -9,7 +9,7 @@ interface UseWebcamOptions {
 export function useWebcam({
   enabled,
   onFrame,
-  processEveryN = 3,
 }: UseWebcamOptions) {
   const videoRef = useRef<HTMLVideoElement | null>(null);
   const streamRef = useRef<MediaStream | null>(null);

 export function useWebcam({
   enabled,
   onFrame,
+  processEveryN = 2,
 }: UseWebcamOptions) {
   const videoRef = useRef<HTMLVideoElement | null>(null);
   const streamRef = useRef<MediaStream | null>(null);

frontend/src/lib/sensing.ts CHANGED Viewed

@@ -1,178 +1,27 @@
 import type { Matrix } from "@mediapipe/tasks-vision";
-import type { Affect, GestureName, HeadDebug, HeadSignal, MemoryBucket } from "../types";
-const SIGMA_K = 2.8;
-const CALIBRATION_DURATION_MS = 5000;
-const CALIBRATION_WARMUP_MS   = 1000;
-const OUTLIER_TRIM_FRACTION   = 0.10;
-const AFFECT_BLENDSHAPES = [
-  "mouthSmileLeft", "mouthSmileRight",
-  "browDownLeft", "browDownRight",
-  "eyeSquintLeft", "eyeSquintRight",
-  "jawOpen", "browInnerUp",
-] as const;
-type AffectBlendshape = typeof AFFECT_BLENDSHAPES[number];
-interface Stats { mean: number; std: number }
-interface Baseline {
-  affect: Record<string, Stats>;
-  gaze: { x: number; y: number };
-  head: { pitch: number; yaw: number; roll: number };
-  faceBboxSize: number;  // normalised face size — proxy for distance
-}
-function trimmedStats(values: number[]): Stats {
-  if (values.length === 0) return { mean: 0, std: 0 };
-  const sorted = [...values].sort((a, b) => a - b);
-  const trim = Math.floor(sorted.length * OUTLIER_TRIM_FRACTION);
-  const kept = sorted.slice(trim, sorted.length - trim);
-  if (kept.length === 0) return { mean: 0, std: 0 };
-  const mean = kept.reduce((s, v) => s + v, 0) / kept.length;
-  const variance = kept.reduce((s, v) => s + (v - mean) ** 2, 0) / kept.length;
-  const std = Math.max(Math.sqrt(variance), 0.01);
-  return { mean, std };
-}
-function trimmedMean(values: number[]): number {
-  return trimmedStats(values).mean;
-}
-export class Calibrator {
-  private startTs = 0;
-  private active = false;
-  private done = false;
-  private affectSamples: Record<string, number[]> = {};
-  private gazeSamples: { x: number; y: number }[] = [];
-  private headSamples: { pitch: number; yaw: number; roll: number }[] = [];
-  private bboxSamples: number[] = [];
-  private baseline: Baseline | null = null;
-  start(): void {
-    this.startTs = performance.now();
-    this.active = true;
-    this.done = false;
-    this.baseline = null;
-    this.affectSamples = {};
-    for (const name of AFFECT_BLENDSHAPES) this.affectSamples[name] = [];
-    this.gazeSamples = [];
-    this.headSamples = [];
-    this.bboxSamples = [];
-  }
-  cancel(): void {
-    this.active = false;
-    this.done = false;
-    this.baseline = null;
-  }
-  get isActive(): boolean { return this.active; }
-  get isReady(): boolean  { return this.done && this.baseline !== null; }
-  // 0 → 1 over the calibration window (excluding warm-up).
-  get progress(): number {
-    if (!this.active) return this.done ? 1 : 0;
-    const elapsed = performance.now() - this.startTs - CALIBRATION_WARMUP_MS;
-    if (elapsed <= 0) return 0;
-    return Math.min(1, elapsed / (CALIBRATION_DURATION_MS - CALIBRATION_WARMUP_MS));
-  }
-  // Feed a frame's signals during calibration. After the window elapses,
-  // the baseline is computed and `isReady` becomes true.
-  addSample(args: {
-    blendshapes: Record<string, number>;
-    gaze: { x: number; y: number } | null;
-    head: { pitch: number; yaw: number; roll: number } | null;
-    faceBboxSize: number | null;
-  }): void {
-    if (!this.active) return;
-    const elapsed = performance.now() - this.startTs;
-    if (elapsed < CALIBRATION_WARMUP_MS) return;
-    if (elapsed >= CALIBRATION_DURATION_MS) {
-      this.finalise();
-      return;
-    }
-    for (const name of AFFECT_BLENDSHAPES) {
-      const v = args.blendshapes[name];
-      if (typeof v === "number") this.affectSamples[name].push(v);
-    }
-    if (args.gaze) this.gazeSamples.push(args.gaze);
-    if (args.head) this.headSamples.push(args.head);
-    if (typeof args.faceBboxSize === "number") this.bboxSamples.push(args.faceBboxSize);
-  }
-  private finalise(): void {
-    const affect: Record<string, Stats> = {};
-    for (const name of AFFECT_BLENDSHAPES) {
-      affect[name] = trimmedStats(this.affectSamples[name] ?? []);
-    }
-    const gaze = {
-      x: trimmedMean(this.gazeSamples.map((g) => g.x)),
-      y: trimmedMean(this.gazeSamples.map((g) => g.y)),
-    };
-    const head = {
-      pitch: trimmedMean(this.headSamples.map((h) => h.pitch)),
-      yaw:   trimmedMean(this.headSamples.map((h) => h.yaw)),
-      roll:  trimmedMean(this.headSamples.map((h) => h.roll)),
-    };
-    // Floor at a small positive value so we never divide by zero when scaling.
-    const faceBboxSize = Math.max(trimmedMean(this.bboxSamples), 0.01);
-    this.baseline = { affect, gaze, head, faceBboxSize };
-    this.active = false;
-    this.done = true;
-  }
-  getBaseline(): Baseline | null { return this.baseline; }
-}
-const AFFECT_FALLBACK_THRESHOLD = 0.4;
-function isAbove(
-  bs: Record<string, number>,
-  name: AffectBlendshape,
-  baseline: Baseline | null,
-): boolean {
-  const v = bs[name] ?? 0;
-  if (baseline) {
-    const stats = baseline.affect[name];
-    if (!stats) return false;
-    return v - stats.mean > SIGMA_K * stats.std;
-  }
-  return v > AFFECT_FALLBACK_THRESHOLD;
-}
-export function classifyAffect(
-  bs: Record<string, number>,
-  baseline: Baseline | null = null,
-): Affect {
-  const smileL  = isAbove(bs, "mouthSmileLeft",  baseline);
-  const smileR  = isAbove(bs, "mouthSmileRight", baseline);
-  const browDL  = isAbove(bs, "browDownLeft",    baseline);
-  const browDR  = isAbove(bs, "browDownRight",   baseline);
-  const squintL = isAbove(bs, "eyeSquintLeft",   baseline);
-  const squintR = isAbove(bs, "eyeSquintRight",  baseline);
-  const jawOpen = isAbove(bs, "jawOpen",         baseline);
-  const browIn  = isAbove(bs, "browInnerUp",     baseline);
-  // Order matters here — first match wins. HAPPY is checked before FRUSTRATED
-  // because smile+concentration (smile + slight brow furrow) is a common
-  // pose while reading a reply, and we'd rather miss frustration than
-  // mis-read a smiling user as frustrated. Both sides required for every
-  // affect to suppress one-sided twitches.
-  if (jawOpen && browIn)         return "SURPRISED";
-  if (smileL && smileR)          return "HAPPY";
-  if (browDL && browDR)          return "FRUSTRATED";
-  if (squintL && squintR)        return "FRUSTRATED";
   return "NEUTRAL";
 }
 export function mapGestureLabel(label: string): GestureName | null {
   switch (label) {
     case "Thumb_Up":    return "THUMBS_UP";
@@ -186,30 +35,41 @@ export function mapGestureLabel(label: string): GestureName | null {
   }
 }
-// Bucket layout matches the 5 regions on the AAC interface:
-//   family / medical (top), social (centre), hobbies / daily_routine (bottom).
-const GAZE_INVERT_Y = import.meta.env.VITE_GAZE_INVERT_Y === "true";
-const GAZE_LATERAL_DELTA  = 0.12;
-const GAZE_VERTICAL_DELTA = 0.12;
-type GazeBlendshape =
-  | "eyeLookInLeft" | "eyeLookInRight"
-  | "eyeLookOutLeft" | "eyeLookOutRight"
-  | "eyeLookUpLeft" | "eyeLookUpRight"
-  | "eyeLookDownLeft" | "eyeLookDownRight";
-function gazeBs(bs: Record<string, number>, name: GazeBlendshape): number {
-  return bs[name] ?? 0;
-}
-export function worldGazeXY(
   matrix: Matrix,
   bs: Record<string, number>,
 ): { x: number; y: number } {
-  const eyeR = (gazeBs(bs, "eyeLookInLeft")   + gazeBs(bs, "eyeLookOutRight")) / 2;
-  const eyeL = (gazeBs(bs, "eyeLookOutLeft")  + gazeBs(bs, "eyeLookInRight"))  / 2;
-  const eyeU = (gazeBs(bs, "eyeLookUpLeft")   + gazeBs(bs, "eyeLookUpRight"))  / 2;
-  const eyeD = (gazeBs(bs, "eyeLookDownLeft") + gazeBs(bs, "eyeLookDownRight")) / 2;
   const lx = eyeR - eyeL;
   const ly = eyeU - eyeD;
@@ -225,13 +85,14 @@ export function worldGazeXY(
   return { x: cx / fwd, y };
 }
-function deflectionToRegion(dx: number, dy: number): MemoryBucket | null {
-  const ax = Math.abs(dx), ay = Math.abs(dy);
-  if (ax < GAZE_LATERAL_DELTA && ay < GAZE_VERTICAL_DELTA) return "social";
-  if (dx < -GAZE_LATERAL_DELTA && dy >  GAZE_VERTICAL_DELTA) return "family";
-  if (dx >  GAZE_LATERAL_DELTA && dy >  GAZE_VERTICAL_DELTA) return "medical";
-  if (dx < -GAZE_LATERAL_DELTA && dy < -GAZE_VERTICAL_DELTA) return "hobbies";
-  if (dx >  GAZE_LATERAL_DELTA && dy < -GAZE_VERTICAL_DELTA) return "daily_routine";
   return null;
 }
@@ -240,36 +101,22 @@ export class GazeTracker {
   private dwellStart = 0;
   private dwellThresholdMs: number;
   private _activeZone: MemoryBucket | null = null;
-  private _lastSeenAt = 0;
-  private static ACTIVE_ZONE_TIMEOUT_MS = 500;
   constructor(dwellThresholdMs = 1500) {
     this.dwellThresholdMs = dwellThresholdMs;
   }
   get activeZone(): MemoryBucket | null {
-    if (performance.now() - this._lastSeenAt > GazeTracker.ACTIVE_ZONE_TIMEOUT_MS) {
-      this._activeZone = null;
-    }
     return this._activeZone;
   }
   process(
     matrix: Matrix | null,
     bs: Record<string, number>,
-    baseline: Baseline | null,
   ): MemoryBucket | null {
-    if (!matrix) return null;
-    const { x, y } = worldGazeXY(matrix, bs);
-    const dx = baseline ? x - baseline.gaze.x : x;
-    const dy = baseline ? y - baseline.gaze.y : y;
-    const bucket = deflectionToRegion(dx, dy);
-    if (bucket !== null) {
-      this._activeZone = bucket;
-      this._lastSeenAt = performance.now();
-    }
     if (bucket !== this.currentBucket) {
       this.currentBucket = bucket;
@@ -291,17 +138,25 @@ export class GazeTracker {
     this.currentBucket = null;
     this._activeZone = null;
     this.dwellStart = 0;
-    this._lastSeenAt = 0;
   }
 }
 interface AnglePoint { pitch: number; yaw: number; t: number }
 const RAD2DEG = 180 / Math.PI;
-export function extractAngles(
-  data: number[],
-): { pitch: number; yaw: number; roll: number } {
   const r20 = data[2], r21 = data[6], r22 = data[10];
   const r10 = data[1], r00 = data[0];
   return {
@@ -311,41 +166,31 @@ export function extractAngles(
   };
 }
-const WINDOW_MS       = 1200;
-const REFRACTORY_MS   = 2000;
-const NOD_WINDOW_MS   = 1000;
-// Hard cap covers backgrounded-tab catch-up where many frames arrive at once.
-const HISTORY_MAX     = 100;
-const SHAKE_RANGE_RAD     = 0.30;
-const SHAKE_DEADBAND_RAD  = 0.05;
-const SHAKE_MIN_REVERSALS = 3;
-const NOD_AMPLITUDE_RAD = 0.12;
-const NOD_SHARP_RAD     = 0.25;
-const NOD_RECOVERY_RAD  = 0.12;
-const NOD_MAX_YAW_RAD   = 0.25;
 export class HeadPoseTracker {
   private history: AnglePoint[] = [];
   private lastEmitTs = 0;
   private lastDebug: HeadDebug = { pitch: 0, yaw: 0, roll: 0, crossings: 0 };
-  process(matrix: Matrix, baseline: Baseline | null): HeadSignal | null {
-    const raw = extractAngles(matrix.data);
-    const pitch = baseline ? raw.pitch - baseline.head.pitch : raw.pitch;
-    const yaw   = baseline ? raw.yaw   - baseline.head.yaw   : raw.yaw;
-    const roll  = baseline ? raw.roll  - baseline.head.roll  : raw.roll;
     const now = performance.now();
     this.history.push({ pitch, yaw, t: now });
-    const cutoff = now - WINDOW_MS;
-    let drop = 0;
-    while (drop < this.history.length && this.history[drop].t < cutoff) drop++;
-    if (this.history.length - drop > HISTORY_MAX) {
-      drop = this.history.length - HISTORY_MAX;
-    }
-    if (drop > 0) this.history.splice(0, drop);
     this.updateDebug(pitch, yaw, roll);
@@ -419,27 +264,16 @@ export class HeadPoseTracker {
     this.history = [];
     this.lastEmitTs = 0;
   }
-}
-export function faceBboxSize(landmarks: { x: number; y: number }[]): number | null {
-  if (!landmarks || landmarks.length < 3) return null;
-  let minX = 1, maxX = 0, minY = 1, maxY = 0;
-  for (const p of landmarks) {
-    if (p.x < minX) minX = p.x;
-    if (p.x > maxX) maxX = p.x;
-    if (p.y < minY) minY = p.y;
-    if (p.y > maxY) maxY = p.y;
-  }
-  const w = maxX - minX;
-  const h = maxY - minY;
-  if (w <= 0 || h <= 0) return null;
-  return Math.sqrt(w * h);
 }
-const INDEX_TIP = 8;
 const VELOCITY_START = 15;
-const VELOCITY_END = 5;
-const STROKE_GAP_MS = 200;
 export class AirWriter {
   private trajectory: [number, number][] = [];
@@ -493,9 +327,7 @@ export class AirWriter {
     }
   }
-  get strokeActive(): boolean {
-    return this.inStroke;
-  }
   getCompletedStroke(): [number, number][] | null {
     const s = this.pendingStroke;
@@ -503,6 +335,8 @@ export class AirWriter {
     return s;
   }
   noHand(): void {
     if (this.inStroke && this.strokeEndTime === 0) {
       this.strokeEndTime = performance.now();

 import type { Matrix } from "@mediapipe/tasks-vision";
+import type { Affect, GestureName, MemoryBucket } from "../types";
+// ── Affect classification via MediaPipe blendshapes ──────────────────────────
+export function classifyAffect(bs: Record<string, number>): Affect {
+  const smileLeft   = bs["mouthSmileLeft"]  ?? 0;
+  const smileRight  = bs["mouthSmileRight"] ?? 0;
+  const browDownL   = bs["browDownLeft"]    ?? 0;
+  const browDownR   = bs["browDownRight"]   ?? 0;
+  const squintL     = bs["eyeSquintLeft"]   ?? 0;
+  const squintR     = bs["eyeSquintRight"]  ?? 0;
+  const jawOpen     = bs["jawOpen"]         ?? 0;
+  const browInnerUp = bs["browInnerUp"]     ?? 0;
+  if (jawOpen > 0.4 && browInnerUp > 0.5)   return "SURPRISED";
+  if (browDownL > 0.4 || browDownR > 0.4)   return "FRUSTRATED";
+  if (squintL > 0.5 && squintR > 0.5)       return "FRUSTRATED";
+  if (smileLeft > 0.5 && smileRight > 0.5)  return "HAPPY";
   return "NEUTRAL";
 }
+// ── Gesture label mapping from MediaPipe GestureRecognizer ───────────────────
 export function mapGestureLabel(label: string): GestureName | null {
   switch (label) {
     case "Thumb_Up":    return "THUMBS_UP";
   }
 }
+// ── Gaze tracker — world-space gaze via head rotation × eye blendshapes ──────
+//
+// Old approach: absolute iris X/Y position in frame → grid region.
+//   Problem: head shifting in frame changes the bucket even if eyes didn't move.
+//
+// New approach:
+//   1. Eye direction in face-local space from blendshapes (head-relative).
+//   2. Rotate into camera space using the facial transformation matrix.
+//   3. Perspective-project to a 2-D screen gaze point.
+//   4. Map that point to the 5 memory buckets with a dwell timer.
+//
+// Bucket layout (matches the 5 regions on the AAC interface):
+//
+//   family     │  medical
+//   (top-left) │  (top-right)
+//   ───────────┼───────────
+//   hobbies    │  daily_routine
+//   (bot-left) │  (bot-right)
+//            social
+//           (centre)
+//
+// If top/bottom buckets appear swapped on your device, set VITE_GAZE_INVERT_Y=true.
+const GAZE_INVERT_Y  = import.meta.env.VITE_GAZE_INVERT_Y === "true";
+const GAZE_CENTER    = 0.10;
+const GAZE_LATERAL   = 0.12;
+const GAZE_VERTICAL  = 0.12;
+function worldGazeXY(
   matrix: Matrix,
   bs: Record<string, number>,
 ): { x: number; y: number } {
+  const eyeR = ((bs.eyeLookInLeft  ?? 0) + (bs.eyeLookOutRight ?? 0)) / 2;
+  const eyeL = ((bs.eyeLookOutLeft ?? 0) + (bs.eyeLookInRight  ?? 0)) / 2;
+  const eyeU = ((bs.eyeLookUpLeft  ?? 0) + (bs.eyeLookUpRight  ?? 0)) / 2;
+  const eyeD = ((bs.eyeLookDownLeft ?? 0) + (bs.eyeLookDownRight ?? 0)) / 2;
   const lx = eyeR - eyeL;
   const ly = eyeU - eyeD;
   return { x: cx / fwd, y };
 }
+function gazeToRegion(x: number, y: number): MemoryBucket | null {
+  const ax = Math.abs(x), ay = Math.abs(y);
+  if (ax < GAZE_CENTER && ay < GAZE_CENTER)  return "social";
+  if (ax < GAZE_LATERAL && ay < GAZE_VERTICAL) return "social";
+  if (x < -GAZE_LATERAL && y >  GAZE_VERTICAL) return "family";
+  if (x >  GAZE_LATERAL && y >  GAZE_VERTICAL) return "medical";
+  if (x < -GAZE_LATERAL && y < -GAZE_VERTICAL) return "hobbies";
+  if (x >  GAZE_LATERAL && y < -GAZE_VERTICAL) return "daily_routine";
   return null;
 }
   private dwellStart = 0;
   private dwellThresholdMs: number;
   private _activeZone: MemoryBucket | null = null;
   constructor(dwellThresholdMs = 1500) {
     this.dwellThresholdMs = dwellThresholdMs;
   }
   get activeZone(): MemoryBucket | null {
     return this._activeZone;
   }
   process(
     matrix: Matrix | null,
     bs: Record<string, number>,
   ): MemoryBucket | null {
+    const { x, y } = matrix ? worldGazeXY(matrix, bs) : { x: 0, y: 0 };
+    const bucket = matrix ? gazeToRegion(x, y) : null;
+    this._activeZone = bucket;
     if (bucket !== this.currentBucket) {
       this.currentBucket = bucket;
     this.currentBucket = null;
     this._activeZone = null;
     this.dwellStart = 0;
   }
 }
+// ── Head-pose tracker using facial transformation matrix ────────────────────
+export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD" | "HEAD_NOD_DISSATISFIED";
+export interface HeadDebug {
+  pitch: number;
+  yaw: number;
+  roll: number;
+  crossings: number;
+}
 interface AnglePoint { pitch: number; yaw: number; t: number }
 const RAD2DEG = 180 / Math.PI;
+function extractAngles(data: Float32Array | number[]): { pitch: number; yaw: number; roll: number } {
   const r20 = data[2], r21 = data[6], r22 = data[10];
   const r10 = data[1], r00 = data[0];
   return {
   };
 }
+const WINDOW_MS          = 1200;
+const REFRACTORY_MS      = 2000;
+const NOD_WINDOW_MS      = 1000;
+const SHAKE_RANGE_RAD    = 0.10;
+const SHAKE_DEADBAND_RAD = 0.03;
+const SHAKE_MIN_REVERSALS = 2;
+const NOD_AMPLITUDE_RAD  = 0.15;
+const NOD_SHARP_RAD      = 0.28;
+const NOD_RECOVERY_RAD   = 0.15;
+const NOD_MAX_YAW_RAD    = 0.25;
 export class HeadPoseTracker {
   private history: AnglePoint[] = [];
   private lastEmitTs = 0;
   private lastDebug: HeadDebug = { pitch: 0, yaw: 0, roll: 0, crossings: 0 };
+  // No-op — angles are self-calibrating relative to the canonical face model.
+  calibrate(_landmarks: unknown): void {}
+  process(matrix: Matrix): HeadSignal | null {
+    const { pitch, yaw, roll } = extractAngles(matrix.data);
     const now = performance.now();
     this.history.push({ pitch, yaw, t: now });
+    this.history = this.history.filter((p) => p.t >= now - WINDOW_MS);
     this.updateDebug(pitch, yaw, roll);
     this.history = [];
     this.lastEmitTs = 0;
   }
+  get calibrated(): boolean { return true; }
 }
+// ── Air-writing stroke collector ─────────────────────────────────────────────
+const INDEX_TIP      = 8;
 const VELOCITY_START = 15;
+const VELOCITY_END   = 5;
+const STROKE_GAP_MS  = 200;
 export class AirWriter {
   private trajectory: [number, number][] = [];
     }
   }
+  get strokeActive(): boolean { return this.inStroke; }
   getCompletedStroke(): [number, number][] | null {
     const s = this.pendingStroke;
     return s;
   }
+  getText(): string { return ""; }
   noHand(): void {
     if (this.inStroke && this.strokeEndTime === 0) {
       this.strokeEndTime = performance.now();

frontend/src/types.ts CHANGED Viewed

@@ -18,6 +18,7 @@ export interface SensingState {
   airWrittenText: string;
   airWritingActive: boolean;
   headSignal: HeadSignal | null;
   headDebug: HeadDebug;
 }
@@ -69,43 +70,10 @@ export interface LatencyLog {
   t_total: number;
 }
-export interface CandidateEval {
-  idx: number;
-  strategy: string;
-  selected: boolean;
-  groundedness: number;
-  hallucination_rate: number;
-  no_evidence: boolean;
-  relevance: number;
-}
-export interface EvalExplain {
-  affect?: {
-    target: string;
-    pos_words: number;
-    neg_words: number;
-    sentiment: number;
-  };
-  gesture?: {
-    tag: string;
-    has_pattern: boolean;
-    matched: boolean | null;
-  };
-  gaze?: {
-    bucket: string;
-    matched_chunks: number;
-    total_chunks: number;
-  };
-}
 export interface EvalScores {
   groundedness: number;
   hallucination_rate: number;
   no_evidence: boolean;
-  sentences_total?: number;
-  sentences_grounded?: number;
-  nli_threshold?: number;
-  relevance?: number;
   t_total_s: number;
   slo_target_s: number;
   slo_passed: boolean;
@@ -114,10 +82,6 @@ export interface EvalScores {
   affect_alignment: number;
   gesture_alignment: number;
   gaze_alignment: number;
-  candidate_diversity?: number;
-  n_candidates?: number;
-  candidates_eval?: CandidateEval[];
-  explain?: EvalExplain;
 }
 export type CandidateStrategy =

   airWrittenText: string;
   airWritingActive: boolean;
   headSignal: HeadSignal | null;
+  headCalibrated: boolean;
   headDebug: HeadDebug;
 }
   t_total: number;
 }
 export interface EvalScores {
   groundedness: number;
   hallucination_rate: number;
   no_evidence: boolean;
   t_total_s: number;
   slo_target_s: number;
   slo_passed: boolean;
   affect_alignment: number;
   gesture_alignment: number;
   gaze_alignment: number;
 }
 export type CandidateStrategy =