Spaces:
Sleeping
Sleeping
Merge pull request #6 from akashkolte/akash/m3-changes
Browse files- frontend/src/App.tsx +1 -27
- frontend/src/hooks/useSensing.ts +79 -175
- frontend/src/hooks/useWebcam.ts +1 -1
- frontend/src/lib/sensing.ts +100 -266
- frontend/src/types.ts +1 -37
frontend/src/App.tsx
CHANGED
|
@@ -8,7 +8,6 @@ import { ChatPanel } from "./components/ChatPanel";
|
|
| 8 |
import { WebcamSensing } from "./components/WebcamSensing";
|
| 9 |
import { SensingStatus } from "./components/SensingStatus";
|
| 10 |
import { LatencyMetrics } from "./components/LatencyMetrics";
|
| 11 |
-
import { CalibrationOverlay } from "./components/CalibrationOverlay";
|
| 12 |
import "./App.css";
|
| 13 |
|
| 14 |
function App() {
|
|
@@ -37,13 +36,8 @@ function App() {
|
|
| 37 |
sensing,
|
| 38 |
ready,
|
| 39 |
initError,
|
| 40 |
-
isCalibrating,
|
| 41 |
-
isCalibrated,
|
| 42 |
-
calibrationProgress,
|
| 43 |
init,
|
| 44 |
processFrame,
|
| 45 |
-
startCalibration,
|
| 46 |
-
cancelCalibration,
|
| 47 |
clearAirWrittenText,
|
| 48 |
clearHeadSignal,
|
| 49 |
resetCalibration,
|
|
@@ -61,22 +55,12 @@ function App() {
|
|
| 61 |
onFrame,
|
| 62 |
});
|
| 63 |
|
| 64 |
-
const autoCalibratedRef = useRef(false);
|
| 65 |
-
|
| 66 |
-
useEffect(() => {
|
| 67 |
-
if (active && ready && !autoCalibratedRef.current) {
|
| 68 |
-
autoCalibratedRef.current = true;
|
| 69 |
-
startCalibration();
|
| 70 |
-
}
|
| 71 |
-
}, [active, ready, startCalibration]);
|
| 72 |
-
|
| 73 |
async function handleWebcamToggle() {
|
| 74 |
if (!webcamEnabled) {
|
| 75 |
const ok = await init();
|
| 76 |
if (ok) setWebcamEnabled(true);
|
| 77 |
} else {
|
| 78 |
setWebcamEnabled(false);
|
| 79 |
-
autoCalibratedRef.current = false;
|
| 80 |
resetCalibration();
|
| 81 |
}
|
| 82 |
}
|
|
@@ -115,12 +99,7 @@ function App() {
|
|
| 115 |
Enable webcam
|
| 116 |
</label>
|
| 117 |
<WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
|
| 118 |
-
<SensingStatus
|
| 119 |
-
sensing={sensing}
|
| 120 |
-
webcamActive={active}
|
| 121 |
-
calibrated={isCalibrated}
|
| 122 |
-
onRecalibrate={active ? startCalibration : undefined}
|
| 123 |
-
/>
|
| 124 |
</div>
|
| 125 |
|
| 126 |
<div className="sidebar-section">
|
|
@@ -160,11 +139,6 @@ function App() {
|
|
| 160 |
/>
|
| 161 |
</main>
|
| 162 |
|
| 163 |
-
<CalibrationOverlay
|
| 164 |
-
active={isCalibrating}
|
| 165 |
-
progress={calibrationProgress}
|
| 166 |
-
onCancel={cancelCalibration}
|
| 167 |
-
/>
|
| 168 |
</div>
|
| 169 |
);
|
| 170 |
}
|
|
|
|
| 8 |
import { WebcamSensing } from "./components/WebcamSensing";
|
| 9 |
import { SensingStatus } from "./components/SensingStatus";
|
| 10 |
import { LatencyMetrics } from "./components/LatencyMetrics";
|
|
|
|
| 11 |
import "./App.css";
|
| 12 |
|
| 13 |
function App() {
|
|
|
|
| 36 |
sensing,
|
| 37 |
ready,
|
| 38 |
initError,
|
|
|
|
|
|
|
|
|
|
| 39 |
init,
|
| 40 |
processFrame,
|
|
|
|
|
|
|
| 41 |
clearAirWrittenText,
|
| 42 |
clearHeadSignal,
|
| 43 |
resetCalibration,
|
|
|
|
| 55 |
onFrame,
|
| 56 |
});
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
async function handleWebcamToggle() {
|
| 59 |
if (!webcamEnabled) {
|
| 60 |
const ok = await init();
|
| 61 |
if (ok) setWebcamEnabled(true);
|
| 62 |
} else {
|
| 63 |
setWebcamEnabled(false);
|
|
|
|
| 64 |
resetCalibration();
|
| 65 |
}
|
| 66 |
}
|
|
|
|
| 99 |
Enable webcam
|
| 100 |
</label>
|
| 101 |
<WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
|
| 102 |
+
<SensingStatus sensing={sensing} webcamActive={active} />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
</div>
|
| 104 |
|
| 105 |
<div className="sidebar-section">
|
|
|
|
| 139 |
/>
|
| 140 |
</main>
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
</div>
|
| 143 |
);
|
| 144 |
}
|
frontend/src/hooks/useSensing.ts
CHANGED
|
@@ -11,39 +11,30 @@ import {
|
|
| 11 |
GazeTracker,
|
| 12 |
AirWriter,
|
| 13 |
HeadPoseTracker,
|
| 14 |
-
Calibrator,
|
| 15 |
-
worldGazeXY,
|
| 16 |
-
extractAngles,
|
| 17 |
-
faceBboxSize,
|
| 18 |
} from "../lib/sensing";
|
| 19 |
import { recognizeInkStroke } from "../lib/inkRecognizer";
|
| 20 |
|
| 21 |
-
const
|
| 22 |
-
const
|
| 23 |
|
| 24 |
-
const AIRWRITING_ENABLED
|
| 25 |
-
const GAZE_ENABLED
|
| 26 |
-
const CALIBRATION_ENABLED = import.meta.env.VITE_CALIBRATION_ENABLED !== "false";
|
| 27 |
|
| 28 |
export function useSensing() {
|
| 29 |
-
const faceLandmarkerRef
|
| 30 |
const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
|
| 31 |
-
const
|
| 32 |
-
const
|
| 33 |
-
const
|
| 34 |
-
const
|
| 35 |
-
const
|
| 36 |
-
const
|
| 37 |
-
const
|
| 38 |
-
const
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
const [ready, setReady] = useState(false);
|
| 42 |
const [initError, setInitError] = useState<string | null>(null);
|
| 43 |
-
const [
|
| 44 |
-
const [isCalibrated, setIsCalibrated] = useState(false);
|
| 45 |
-
const [calibrationProgress, setCalibrationProgress] = useState(0);
|
| 46 |
-
const [sensing, setSensing] = useState<SensingState>({
|
| 47 |
affect: null,
|
| 48 |
gestureTag: null,
|
| 49 |
gazeZone: null,
|
|
@@ -51,6 +42,7 @@ export function useSensing() {
|
|
| 51 |
airWrittenText: "",
|
| 52 |
airWritingActive: false,
|
| 53 |
headSignal: null,
|
|
|
|
| 54 |
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 55 |
});
|
| 56 |
|
|
@@ -58,7 +50,7 @@ export function useSensing() {
|
|
| 58 |
return () => {
|
| 59 |
faceLandmarkerRef.current?.close();
|
| 60 |
gestureRecognizerRef.current?.close();
|
| 61 |
-
faceLandmarkerRef.current
|
| 62 |
gestureRecognizerRef.current = null;
|
| 63 |
};
|
| 64 |
}, []);
|
|
@@ -70,128 +62,74 @@ export function useSensing() {
|
|
| 70 |
const vision = await FilesetResolver.forVisionTasks(
|
| 71 |
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
| 72 |
);
|
| 73 |
-
faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
delegate: "GPU",
|
| 94 |
-
},
|
| 95 |
-
runningMode: "VIDEO",
|
| 96 |
-
numHands: 1,
|
| 97 |
-
}
|
| 98 |
-
);
|
| 99 |
setReady(true);
|
| 100 |
return true;
|
| 101 |
} catch (e) {
|
| 102 |
-
setInitError(
|
| 103 |
-
e instanceof Error ? e.message : "Failed to load MediaPipe models"
|
| 104 |
-
);
|
| 105 |
return false;
|
| 106 |
} finally {
|
| 107 |
initingRef.current = false;
|
| 108 |
}
|
| 109 |
}, []);
|
| 110 |
|
| 111 |
-
const startCalibration = useCallback(() => {
|
| 112 |
-
if (!CALIBRATION_ENABLED) {
|
| 113 |
-
setIsCalibrated(true);
|
| 114 |
-
return;
|
| 115 |
-
}
|
| 116 |
-
calibratorRef.current.start();
|
| 117 |
-
setIsCalibrating(true);
|
| 118 |
-
setIsCalibrated(false);
|
| 119 |
-
setCalibrationProgress(0);
|
| 120 |
-
// Reset the per-detector state so post-calibration baselines aren't
|
| 121 |
-
// mixed with stale pre-calibration history.
|
| 122 |
-
gazeTrackerRef.current.reset();
|
| 123 |
-
headTrackerRef.current.reset();
|
| 124 |
-
gestureCountRef.current = { tag: null, since: 0 };
|
| 125 |
-
affectCountRef.current = { affect: null, since: 0 };
|
| 126 |
-
}, []);
|
| 127 |
-
|
| 128 |
-
const cancelCalibration = useCallback(() => {
|
| 129 |
-
calibratorRef.current.cancel();
|
| 130 |
-
setIsCalibrating(false);
|
| 131 |
-
setIsCalibrated(false);
|
| 132 |
-
setCalibrationProgress(0);
|
| 133 |
-
}, []);
|
| 134 |
-
|
| 135 |
const processFrame = useCallback(
|
| 136 |
(video: HTMLVideoElement, timestamp: number) => {
|
| 137 |
-
const faceLandmarker
|
| 138 |
const gestureRecognizer = gestureRecognizerRef.current;
|
| 139 |
if (!faceLandmarker || !gestureRecognizer) return;
|
| 140 |
|
| 141 |
-
|
| 142 |
-
const calibrating = calibrator.isActive;
|
| 143 |
-
const baseline = calibrator.getBaseline();
|
| 144 |
-
|
| 145 |
-
let affect: SensingState["affect"] = null;
|
| 146 |
let gazeBucket: SensingState["gazeBucket"] = null;
|
| 147 |
let headSignal: SensingState["headSignal"] = null;
|
| 148 |
|
| 149 |
const faceResult = faceLandmarker.detectForVideo(video, timestamp);
|
| 150 |
if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
|
| 151 |
const matrix = faceResult.facialTransformationMatrixes?.[0] ?? null;
|
| 152 |
-
const landmarks = faceResult.faceLandmarks[0];
|
| 153 |
|
| 154 |
const bs: Record<string, number> = {};
|
| 155 |
if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
|
| 156 |
for (const cat of faceResult.faceBlendshapes[0].categories) {
|
| 157 |
bs[cat.categoryName] = cat.score;
|
| 158 |
}
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
-
if (calibrating) {
|
| 162 |
-
calibrator.addSample({
|
| 163 |
-
blendshapes: bs,
|
| 164 |
-
gaze: matrix ? worldGazeXY(matrix, bs) : null,
|
| 165 |
-
head: matrix ? extractAngles(matrix.data) : null,
|
| 166 |
-
faceBboxSize: faceBboxSize(landmarks),
|
| 167 |
-
});
|
| 168 |
-
setCalibrationProgress(Math.round(calibrator.progress * 100) / 100);
|
| 169 |
-
if (calibrator.isReady) {
|
| 170 |
-
setIsCalibrating(false);
|
| 171 |
-
setIsCalibrated(true);
|
| 172 |
-
setCalibrationProgress(1);
|
| 173 |
-
}
|
| 174 |
-
return;
|
| 175 |
-
}
|
| 176 |
-
|
| 177 |
-
affect = classifyAffect(bs, baseline);
|
| 178 |
-
|
| 179 |
if (GAZE_ENABLED) {
|
| 180 |
-
gazeBucket = gazeTrackerRef.current.process(matrix, bs
|
| 181 |
}
|
| 182 |
|
| 183 |
if (matrix) {
|
| 184 |
-
headSignal = headTrackerRef.current.process(matrix
|
| 185 |
headDebugRef.current = headTrackerRef.current.debug;
|
| 186 |
}
|
| 187 |
-
} else if (calibrating) {
|
| 188 |
-
setCalibrationProgress(Math.round(calibrator.progress * 100) / 100);
|
| 189 |
-
return;
|
| 190 |
}
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
let gestureTag: SensingState["gestureTag"] = null;
|
| 193 |
|
| 194 |
-
const gestureResult = gestureRecognizer.recognizeForVideo(video, timestamp);
|
| 195 |
if (gestureResult.gestures && gestureResult.gestures.length > 0) {
|
| 196 |
const topGesture = gestureResult.gestures[0][0];
|
| 197 |
gestureTag = mapGestureLabel(topGesture.categoryName);
|
|
@@ -214,67 +152,41 @@ export function useSensing() {
|
|
| 214 |
recognizeInkStroke(completedStroke).then((text) => {
|
| 215 |
inkBusyRef.current = false;
|
| 216 |
if (text) {
|
| 217 |
-
setSensing((prev) => ({
|
| 218 |
-
...prev,
|
| 219 |
-
airWrittenText: prev.airWrittenText + text,
|
| 220 |
-
}));
|
| 221 |
}
|
| 222 |
});
|
| 223 |
}
|
| 224 |
}
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
| 229 |
}
|
| 230 |
-
const stableGesture =
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
|
|
|
| 237 |
}
|
| 238 |
-
const stableAffect =
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
headDebug.yaw !== prev.headDebug.yaw ||
|
| 254 |
-
headDebug.roll !== prev.headDebug.roll ||
|
| 255 |
-
headDebug.crossings !== prev.headDebug.crossings;
|
| 256 |
-
if (
|
| 257 |
-
!debugChanged &&
|
| 258 |
-
activeZone === prev.gazeZone &&
|
| 259 |
-
nextAffect === prev.affect &&
|
| 260 |
-
stableGesture === prev.gestureTag &&
|
| 261 |
-
nextGazeBucket === prev.gazeBucket &&
|
| 262 |
-
nextHeadSignal === prev.headSignal &&
|
| 263 |
-
airWritingActive === prev.airWritingActive
|
| 264 |
-
) {
|
| 265 |
-
return prev;
|
| 266 |
-
}
|
| 267 |
-
return {
|
| 268 |
-
...prev,
|
| 269 |
-
affect: nextAffect,
|
| 270 |
-
gestureTag: stableGesture,
|
| 271 |
-
gazeZone: activeZone,
|
| 272 |
-
gazeBucket: nextGazeBucket,
|
| 273 |
-
airWritingActive,
|
| 274 |
-
headSignal: nextHeadSignal,
|
| 275 |
-
headDebug: debugChanged ? headDebug : prev.headDebug,
|
| 276 |
-
};
|
| 277 |
-
});
|
| 278 |
},
|
| 279 |
[]
|
| 280 |
);
|
|
@@ -288,14 +200,10 @@ export function useSensing() {
|
|
| 288 |
}, []);
|
| 289 |
|
| 290 |
const resetCalibration = useCallback(() => {
|
| 291 |
-
gestureCountRef.current = { tag: null,
|
| 292 |
-
affectCountRef.current
|
| 293 |
gazeTrackerRef.current.reset();
|
| 294 |
headTrackerRef.current.reset();
|
| 295 |
-
calibratorRef.current.cancel();
|
| 296 |
-
setIsCalibrating(false);
|
| 297 |
-
setIsCalibrated(false);
|
| 298 |
-
setCalibrationProgress(0);
|
| 299 |
setSensing({
|
| 300 |
affect: null,
|
| 301 |
gestureTag: null,
|
|
@@ -304,6 +212,7 @@ export function useSensing() {
|
|
| 304 |
airWrittenText: "",
|
| 305 |
airWritingActive: false,
|
| 306 |
headSignal: null,
|
|
|
|
| 307 |
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 308 |
});
|
| 309 |
}, []);
|
|
@@ -312,13 +221,8 @@ export function useSensing() {
|
|
| 312 |
sensing,
|
| 313 |
ready,
|
| 314 |
initError,
|
| 315 |
-
isCalibrating,
|
| 316 |
-
isCalibrated,
|
| 317 |
-
calibrationProgress,
|
| 318 |
init,
|
| 319 |
processFrame,
|
| 320 |
-
startCalibration,
|
| 321 |
-
cancelCalibration,
|
| 322 |
clearAirWrittenText,
|
| 323 |
clearHeadSignal,
|
| 324 |
resetCalibration,
|
|
|
|
| 11 |
GazeTracker,
|
| 12 |
AirWriter,
|
| 13 |
HeadPoseTracker,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
} from "../lib/sensing";
|
| 15 |
import { recognizeInkStroke } from "../lib/inkRecognizer";
|
| 16 |
|
| 17 |
+
const GESTURE_DEBOUNCE_FRAMES = 3;
|
| 18 |
+
const AFFECT_DEBOUNCE_FRAMES = 8;
|
| 19 |
|
| 20 |
+
const AIRWRITING_ENABLED = import.meta.env.VITE_AIRWRITING_ENABLED !== "false";
|
| 21 |
+
const GAZE_ENABLED = import.meta.env.VITE_GAZE_ENABLED !== "false";
|
|
|
|
| 22 |
|
| 23 |
export function useSensing() {
|
| 24 |
+
const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
|
| 25 |
const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
|
| 26 |
+
const gazeTrackerRef = useRef(new GazeTracker());
|
| 27 |
+
const airWriterRef = useRef(new AirWriter());
|
| 28 |
+
const inkBusyRef = useRef(false);
|
| 29 |
+
const headTrackerRef = useRef(new HeadPoseTracker());
|
| 30 |
+
const headDebugRef = useRef({ pitch: 0, yaw: 0, roll: 0, crossings: 0 });
|
| 31 |
+
const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
|
| 32 |
+
const affectCountRef = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
|
| 33 |
+
const initingRef = useRef(false);
|
| 34 |
+
|
| 35 |
+
const [ready, setReady] = useState(false);
|
|
|
|
| 36 |
const [initError, setInitError] = useState<string | null>(null);
|
| 37 |
+
const [sensing, setSensing] = useState<SensingState>({
|
|
|
|
|
|
|
|
|
|
| 38 |
affect: null,
|
| 39 |
gestureTag: null,
|
| 40 |
gazeZone: null,
|
|
|
|
| 42 |
airWrittenText: "",
|
| 43 |
airWritingActive: false,
|
| 44 |
headSignal: null,
|
| 45 |
+
headCalibrated: false,
|
| 46 |
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 47 |
});
|
| 48 |
|
|
|
|
| 50 |
return () => {
|
| 51 |
faceLandmarkerRef.current?.close();
|
| 52 |
gestureRecognizerRef.current?.close();
|
| 53 |
+
faceLandmarkerRef.current = null;
|
| 54 |
gestureRecognizerRef.current = null;
|
| 55 |
};
|
| 56 |
}, []);
|
|
|
|
| 62 |
const vision = await FilesetResolver.forVisionTasks(
|
| 63 |
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
| 64 |
);
|
| 65 |
+
faceLandmarkerRef.current = await FaceLandmarker.createFromOptions(vision, {
|
| 66 |
+
baseOptions: {
|
| 67 |
+
modelAssetPath:
|
| 68 |
+
"https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task",
|
| 69 |
+
delegate: "GPU",
|
| 70 |
+
},
|
| 71 |
+
runningMode: "VIDEO",
|
| 72 |
+
numFaces: 1,
|
| 73 |
+
outputFaceBlendshapes: true,
|
| 74 |
+
outputFacialTransformationMatrixes: true,
|
| 75 |
+
});
|
| 76 |
+
gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(vision, {
|
| 77 |
+
baseOptions: {
|
| 78 |
+
modelAssetPath:
|
| 79 |
+
"https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task",
|
| 80 |
+
delegate: "GPU",
|
| 81 |
+
},
|
| 82 |
+
runningMode: "VIDEO",
|
| 83 |
+
numHands: 1,
|
| 84 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
setReady(true);
|
| 86 |
return true;
|
| 87 |
} catch (e) {
|
| 88 |
+
setInitError(e instanceof Error ? e.message : "Failed to load MediaPipe models");
|
|
|
|
|
|
|
| 89 |
return false;
|
| 90 |
} finally {
|
| 91 |
initingRef.current = false;
|
| 92 |
}
|
| 93 |
}, []);
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
const processFrame = useCallback(
|
| 96 |
(video: HTMLVideoElement, timestamp: number) => {
|
| 97 |
+
const faceLandmarker = faceLandmarkerRef.current;
|
| 98 |
const gestureRecognizer = gestureRecognizerRef.current;
|
| 99 |
if (!faceLandmarker || !gestureRecognizer) return;
|
| 100 |
|
| 101 |
+
let affect: SensingState["affect"] = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
let gazeBucket: SensingState["gazeBucket"] = null;
|
| 103 |
let headSignal: SensingState["headSignal"] = null;
|
| 104 |
|
| 105 |
const faceResult = faceLandmarker.detectForVideo(video, timestamp);
|
| 106 |
if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
|
| 107 |
const matrix = faceResult.facialTransformationMatrixes?.[0] ?? null;
|
|
|
|
| 108 |
|
| 109 |
const bs: Record<string, number> = {};
|
| 110 |
if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
|
| 111 |
for (const cat of faceResult.faceBlendshapes[0].categories) {
|
| 112 |
bs[cat.categoryName] = cat.score;
|
| 113 |
}
|
| 114 |
+
affect = classifyAffect(bs);
|
| 115 |
}
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if (GAZE_ENABLED) {
|
| 118 |
+
gazeBucket = gazeTrackerRef.current.process(matrix, bs);
|
| 119 |
}
|
| 120 |
|
| 121 |
if (matrix) {
|
| 122 |
+
headSignal = headTrackerRef.current.process(matrix);
|
| 123 |
headDebugRef.current = headTrackerRef.current.debug;
|
| 124 |
}
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
| 126 |
|
| 127 |
+
// Always call recognizeForVideo every frame β VIDEO-mode models maintain
|
| 128 |
+
// internal temporal state and produce stale results if frames are skipped.
|
| 129 |
+
const gestureResult = gestureRecognizer.recognizeForVideo(video, timestamp);
|
| 130 |
+
|
| 131 |
let gestureTag: SensingState["gestureTag"] = null;
|
| 132 |
|
|
|
|
| 133 |
if (gestureResult.gestures && gestureResult.gestures.length > 0) {
|
| 134 |
const topGesture = gestureResult.gestures[0][0];
|
| 135 |
gestureTag = mapGestureLabel(topGesture.categoryName);
|
|
|
|
| 152 |
recognizeInkStroke(completedStroke).then((text) => {
|
| 153 |
inkBusyRef.current = false;
|
| 154 |
if (text) {
|
| 155 |
+
setSensing((prev) => ({ ...prev, airWrittenText: prev.airWrittenText + text }));
|
|
|
|
|
|
|
|
|
|
| 156 |
}
|
| 157 |
});
|
| 158 |
}
|
| 159 |
}
|
| 160 |
|
| 161 |
+
if (gestureTag === gestureCountRef.current.tag) {
|
| 162 |
+
gestureCountRef.current.count++;
|
| 163 |
+
} else {
|
| 164 |
+
gestureCountRef.current = { tag: gestureTag, count: 1 };
|
| 165 |
}
|
| 166 |
+
const stableGesture = gestureCountRef.current.count >= GESTURE_DEBOUNCE_FRAMES
|
| 167 |
+
? gestureTag
|
| 168 |
+
: null;
|
| 169 |
+
|
| 170 |
+
if (affect === affectCountRef.current.affect) {
|
| 171 |
+
affectCountRef.current.count++;
|
| 172 |
+
} else {
|
| 173 |
+
affectCountRef.current = { affect, count: 1 };
|
| 174 |
}
|
| 175 |
+
const stableAffect = affectCountRef.current.count >= AFFECT_DEBOUNCE_FRAMES
|
| 176 |
+
? affect
|
| 177 |
+
: null;
|
| 178 |
+
|
| 179 |
+
setSensing((prev) => ({
|
| 180 |
+
affect: stableAffect ?? prev.affect,
|
| 181 |
+
gestureTag: stableGesture,
|
| 182 |
+
gazeZone: GAZE_ENABLED ? gazeTrackerRef.current.activeZone : null,
|
| 183 |
+
gazeBucket: gazeBucket ?? prev.gazeBucket,
|
| 184 |
+
airWrittenText: prev.airWrittenText,
|
| 185 |
+
airWritingActive: airWriterRef.current.strokeActive,
|
| 186 |
+
headSignal: headSignal ?? prev.headSignal,
|
| 187 |
+
headCalibrated: headTrackerRef.current.calibrated,
|
| 188 |
+
headDebug: headDebugRef.current,
|
| 189 |
+
}));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
},
|
| 191 |
[]
|
| 192 |
);
|
|
|
|
| 200 |
}, []);
|
| 201 |
|
| 202 |
const resetCalibration = useCallback(() => {
|
| 203 |
+
gestureCountRef.current = { tag: null, count: 0 };
|
| 204 |
+
affectCountRef.current = { affect: null, count: 0 };
|
| 205 |
gazeTrackerRef.current.reset();
|
| 206 |
headTrackerRef.current.reset();
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
setSensing({
|
| 208 |
affect: null,
|
| 209 |
gestureTag: null,
|
|
|
|
| 212 |
airWrittenText: "",
|
| 213 |
airWritingActive: false,
|
| 214 |
headSignal: null,
|
| 215 |
+
headCalibrated: false,
|
| 216 |
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 217 |
});
|
| 218 |
}, []);
|
|
|
|
| 221 |
sensing,
|
| 222 |
ready,
|
| 223 |
initError,
|
|
|
|
|
|
|
|
|
|
| 224 |
init,
|
| 225 |
processFrame,
|
|
|
|
|
|
|
| 226 |
clearAirWrittenText,
|
| 227 |
clearHeadSignal,
|
| 228 |
resetCalibration,
|
frontend/src/hooks/useWebcam.ts
CHANGED
|
@@ -9,7 +9,7 @@ interface UseWebcamOptions {
|
|
| 9 |
export function useWebcam({
|
| 10 |
enabled,
|
| 11 |
onFrame,
|
| 12 |
-
processEveryN =
|
| 13 |
}: UseWebcamOptions) {
|
| 14 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 15 |
const streamRef = useRef<MediaStream | null>(null);
|
|
|
|
| 9 |
export function useWebcam({
|
| 10 |
enabled,
|
| 11 |
onFrame,
|
| 12 |
+
processEveryN = 2,
|
| 13 |
}: UseWebcamOptions) {
|
| 14 |
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 15 |
const streamRef = useRef<MediaStream | null>(null);
|
frontend/src/lib/sensing.ts
CHANGED
|
@@ -1,178 +1,27 @@
|
|
| 1 |
import type { Matrix } from "@mediapipe/tasks-vision";
|
| 2 |
-
import type { Affect, GestureName,
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
const
|
| 8 |
-
|
| 9 |
-
const
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
affect: Record<string, Stats>;
|
| 21 |
-
gaze: { x: number; y: number };
|
| 22 |
-
head: { pitch: number; yaw: number; roll: number };
|
| 23 |
-
faceBboxSize: number; // normalised face size β proxy for distance
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
function trimmedStats(values: number[]): Stats {
|
| 27 |
-
if (values.length === 0) return { mean: 0, std: 0 };
|
| 28 |
-
const sorted = [...values].sort((a, b) => a - b);
|
| 29 |
-
const trim = Math.floor(sorted.length * OUTLIER_TRIM_FRACTION);
|
| 30 |
-
const kept = sorted.slice(trim, sorted.length - trim);
|
| 31 |
-
if (kept.length === 0) return { mean: 0, std: 0 };
|
| 32 |
-
const mean = kept.reduce((s, v) => s + v, 0) / kept.length;
|
| 33 |
-
const variance = kept.reduce((s, v) => s + (v - mean) ** 2, 0) / kept.length;
|
| 34 |
-
const std = Math.max(Math.sqrt(variance), 0.01);
|
| 35 |
-
return { mean, std };
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
function trimmedMean(values: number[]): number {
|
| 39 |
-
return trimmedStats(values).mean;
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
export class Calibrator {
|
| 43 |
-
private startTs = 0;
|
| 44 |
-
private active = false;
|
| 45 |
-
private done = false;
|
| 46 |
-
|
| 47 |
-
private affectSamples: Record<string, number[]> = {};
|
| 48 |
-
private gazeSamples: { x: number; y: number }[] = [];
|
| 49 |
-
private headSamples: { pitch: number; yaw: number; roll: number }[] = [];
|
| 50 |
-
private bboxSamples: number[] = [];
|
| 51 |
-
|
| 52 |
-
private baseline: Baseline | null = null;
|
| 53 |
-
|
| 54 |
-
start(): void {
|
| 55 |
-
this.startTs = performance.now();
|
| 56 |
-
this.active = true;
|
| 57 |
-
this.done = false;
|
| 58 |
-
this.baseline = null;
|
| 59 |
-
this.affectSamples = {};
|
| 60 |
-
for (const name of AFFECT_BLENDSHAPES) this.affectSamples[name] = [];
|
| 61 |
-
this.gazeSamples = [];
|
| 62 |
-
this.headSamples = [];
|
| 63 |
-
this.bboxSamples = [];
|
| 64 |
-
}
|
| 65 |
-
|
| 66 |
-
cancel(): void {
|
| 67 |
-
this.active = false;
|
| 68 |
-
this.done = false;
|
| 69 |
-
this.baseline = null;
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
get isActive(): boolean { return this.active; }
|
| 73 |
-
get isReady(): boolean { return this.done && this.baseline !== null; }
|
| 74 |
-
|
| 75 |
-
// 0 β 1 over the calibration window (excluding warm-up).
|
| 76 |
-
get progress(): number {
|
| 77 |
-
if (!this.active) return this.done ? 1 : 0;
|
| 78 |
-
const elapsed = performance.now() - this.startTs - CALIBRATION_WARMUP_MS;
|
| 79 |
-
if (elapsed <= 0) return 0;
|
| 80 |
-
return Math.min(1, elapsed / (CALIBRATION_DURATION_MS - CALIBRATION_WARMUP_MS));
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
// Feed a frame's signals during calibration. After the window elapses,
|
| 84 |
-
// the baseline is computed and `isReady` becomes true.
|
| 85 |
-
addSample(args: {
|
| 86 |
-
blendshapes: Record<string, number>;
|
| 87 |
-
gaze: { x: number; y: number } | null;
|
| 88 |
-
head: { pitch: number; yaw: number; roll: number } | null;
|
| 89 |
-
faceBboxSize: number | null;
|
| 90 |
-
}): void {
|
| 91 |
-
if (!this.active) return;
|
| 92 |
-
const elapsed = performance.now() - this.startTs;
|
| 93 |
-
|
| 94 |
-
if (elapsed < CALIBRATION_WARMUP_MS) return;
|
| 95 |
-
|
| 96 |
-
if (elapsed >= CALIBRATION_DURATION_MS) {
|
| 97 |
-
this.finalise();
|
| 98 |
-
return;
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
for (const name of AFFECT_BLENDSHAPES) {
|
| 102 |
-
const v = args.blendshapes[name];
|
| 103 |
-
if (typeof v === "number") this.affectSamples[name].push(v);
|
| 104 |
-
}
|
| 105 |
-
if (args.gaze) this.gazeSamples.push(args.gaze);
|
| 106 |
-
if (args.head) this.headSamples.push(args.head);
|
| 107 |
-
if (typeof args.faceBboxSize === "number") this.bboxSamples.push(args.faceBboxSize);
|
| 108 |
-
}
|
| 109 |
-
|
| 110 |
-
private finalise(): void {
|
| 111 |
-
const affect: Record<string, Stats> = {};
|
| 112 |
-
for (const name of AFFECT_BLENDSHAPES) {
|
| 113 |
-
affect[name] = trimmedStats(this.affectSamples[name] ?? []);
|
| 114 |
-
}
|
| 115 |
-
const gaze = {
|
| 116 |
-
x: trimmedMean(this.gazeSamples.map((g) => g.x)),
|
| 117 |
-
y: trimmedMean(this.gazeSamples.map((g) => g.y)),
|
| 118 |
-
};
|
| 119 |
-
const head = {
|
| 120 |
-
pitch: trimmedMean(this.headSamples.map((h) => h.pitch)),
|
| 121 |
-
yaw: trimmedMean(this.headSamples.map((h) => h.yaw)),
|
| 122 |
-
roll: trimmedMean(this.headSamples.map((h) => h.roll)),
|
| 123 |
-
};
|
| 124 |
-
// Floor at a small positive value so we never divide by zero when scaling.
|
| 125 |
-
const faceBboxSize = Math.max(trimmedMean(this.bboxSamples), 0.01);
|
| 126 |
-
|
| 127 |
-
this.baseline = { affect, gaze, head, faceBboxSize };
|
| 128 |
-
this.active = false;
|
| 129 |
-
this.done = true;
|
| 130 |
-
}
|
| 131 |
-
|
| 132 |
-
getBaseline(): Baseline | null { return this.baseline; }
|
| 133 |
-
}
|
| 134 |
-
|
| 135 |
-
const AFFECT_FALLBACK_THRESHOLD = 0.4;
|
| 136 |
-
|
| 137 |
-
function isAbove(
|
| 138 |
-
bs: Record<string, number>,
|
| 139 |
-
name: AffectBlendshape,
|
| 140 |
-
baseline: Baseline | null,
|
| 141 |
-
): boolean {
|
| 142 |
-
const v = bs[name] ?? 0;
|
| 143 |
-
if (baseline) {
|
| 144 |
-
const stats = baseline.affect[name];
|
| 145 |
-
if (!stats) return false;
|
| 146 |
-
return v - stats.mean > SIGMA_K * stats.std;
|
| 147 |
-
}
|
| 148 |
-
return v > AFFECT_FALLBACK_THRESHOLD;
|
| 149 |
-
}
|
| 150 |
-
|
| 151 |
-
export function classifyAffect(
|
| 152 |
-
bs: Record<string, number>,
|
| 153 |
-
baseline: Baseline | null = null,
|
| 154 |
-
): Affect {
|
| 155 |
-
const smileL = isAbove(bs, "mouthSmileLeft", baseline);
|
| 156 |
-
const smileR = isAbove(bs, "mouthSmileRight", baseline);
|
| 157 |
-
const browDL = isAbove(bs, "browDownLeft", baseline);
|
| 158 |
-
const browDR = isAbove(bs, "browDownRight", baseline);
|
| 159 |
-
const squintL = isAbove(bs, "eyeSquintLeft", baseline);
|
| 160 |
-
const squintR = isAbove(bs, "eyeSquintRight", baseline);
|
| 161 |
-
const jawOpen = isAbove(bs, "jawOpen", baseline);
|
| 162 |
-
const browIn = isAbove(bs, "browInnerUp", baseline);
|
| 163 |
-
|
| 164 |
-
// Order matters here β first match wins. HAPPY is checked before FRUSTRATED
|
| 165 |
-
// because smile+concentration (smile + slight brow furrow) is a common
|
| 166 |
-
// pose while reading a reply, and we'd rather miss frustration than
|
| 167 |
-
// mis-read a smiling user as frustrated. Both sides required for every
|
| 168 |
-
// affect to suppress one-sided twitches.
|
| 169 |
-
if (jawOpen && browIn) return "SURPRISED";
|
| 170 |
-
if (smileL && smileR) return "HAPPY";
|
| 171 |
-
if (browDL && browDR) return "FRUSTRATED";
|
| 172 |
-
if (squintL && squintR) return "FRUSTRATED";
|
| 173 |
return "NEUTRAL";
|
| 174 |
}
|
| 175 |
|
|
|
|
|
|
|
| 176 |
export function mapGestureLabel(label: string): GestureName | null {
|
| 177 |
switch (label) {
|
| 178 |
case "Thumb_Up": return "THUMBS_UP";
|
|
@@ -186,30 +35,41 @@ export function mapGestureLabel(label: string): GestureName | null {
|
|
| 186 |
}
|
| 187 |
}
|
| 188 |
|
| 189 |
-
//
|
| 190 |
-
//
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
matrix: Matrix,
|
| 207 |
bs: Record<string, number>,
|
| 208 |
): { x: number; y: number } {
|
| 209 |
-
const eyeR = (
|
| 210 |
-
const eyeL = (
|
| 211 |
-
const eyeU = (
|
| 212 |
-
const eyeD = (
|
| 213 |
|
| 214 |
const lx = eyeR - eyeL;
|
| 215 |
const ly = eyeU - eyeD;
|
|
@@ -225,13 +85,14 @@ export function worldGazeXY(
|
|
| 225 |
return { x: cx / fwd, y };
|
| 226 |
}
|
| 227 |
|
| 228 |
-
function
|
| 229 |
-
const ax = Math.abs(
|
| 230 |
-
if (ax <
|
| 231 |
-
if (
|
| 232 |
-
if (
|
| 233 |
-
if (
|
| 234 |
-
if (
|
|
|
|
| 235 |
return null;
|
| 236 |
}
|
| 237 |
|
|
@@ -240,36 +101,22 @@ export class GazeTracker {
|
|
| 240 |
private dwellStart = 0;
|
| 241 |
private dwellThresholdMs: number;
|
| 242 |
private _activeZone: MemoryBucket | null = null;
|
| 243 |
-
private _lastSeenAt = 0;
|
| 244 |
-
private static ACTIVE_ZONE_TIMEOUT_MS = 500;
|
| 245 |
|
| 246 |
constructor(dwellThresholdMs = 1500) {
|
| 247 |
this.dwellThresholdMs = dwellThresholdMs;
|
| 248 |
}
|
| 249 |
|
| 250 |
get activeZone(): MemoryBucket | null {
|
| 251 |
-
if (performance.now() - this._lastSeenAt > GazeTracker.ACTIVE_ZONE_TIMEOUT_MS) {
|
| 252 |
-
this._activeZone = null;
|
| 253 |
-
}
|
| 254 |
return this._activeZone;
|
| 255 |
}
|
| 256 |
|
| 257 |
process(
|
| 258 |
matrix: Matrix | null,
|
| 259 |
bs: Record<string, number>,
|
| 260 |
-
baseline: Baseline | null,
|
| 261 |
): MemoryBucket | null {
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
const dx = baseline ? x - baseline.gaze.x : x;
|
| 266 |
-
const dy = baseline ? y - baseline.gaze.y : y;
|
| 267 |
-
|
| 268 |
-
const bucket = deflectionToRegion(dx, dy);
|
| 269 |
-
if (bucket !== null) {
|
| 270 |
-
this._activeZone = bucket;
|
| 271 |
-
this._lastSeenAt = performance.now();
|
| 272 |
-
}
|
| 273 |
|
| 274 |
if (bucket !== this.currentBucket) {
|
| 275 |
this.currentBucket = bucket;
|
|
@@ -291,17 +138,25 @@ export class GazeTracker {
|
|
| 291 |
this.currentBucket = null;
|
| 292 |
this._activeZone = null;
|
| 293 |
this.dwellStart = 0;
|
| 294 |
-
this._lastSeenAt = 0;
|
| 295 |
}
|
| 296 |
}
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
interface AnglePoint { pitch: number; yaw: number; t: number }
|
| 299 |
|
| 300 |
const RAD2DEG = 180 / Math.PI;
|
| 301 |
|
| 302 |
-
|
| 303 |
-
data: number[],
|
| 304 |
-
): { pitch: number; yaw: number; roll: number } {
|
| 305 |
const r20 = data[2], r21 = data[6], r22 = data[10];
|
| 306 |
const r10 = data[1], r00 = data[0];
|
| 307 |
return {
|
|
@@ -311,41 +166,31 @@ export function extractAngles(
|
|
| 311 |
};
|
| 312 |
}
|
| 313 |
|
| 314 |
-
const WINDOW_MS
|
| 315 |
-
const REFRACTORY_MS
|
| 316 |
-
const NOD_WINDOW_MS
|
| 317 |
-
|
| 318 |
-
const
|
| 319 |
-
|
| 320 |
-
const
|
| 321 |
-
const
|
| 322 |
-
const
|
| 323 |
-
|
| 324 |
-
const NOD_AMPLITUDE_RAD = 0.12;
|
| 325 |
-
const NOD_SHARP_RAD = 0.25;
|
| 326 |
-
const NOD_RECOVERY_RAD = 0.12;
|
| 327 |
-
const NOD_MAX_YAW_RAD = 0.25;
|
| 328 |
|
| 329 |
export class HeadPoseTracker {
|
| 330 |
private history: AnglePoint[] = [];
|
| 331 |
private lastEmitTs = 0;
|
| 332 |
private lastDebug: HeadDebug = { pitch: 0, yaw: 0, roll: 0, crossings: 0 };
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
const
|
| 339 |
const now = performance.now();
|
| 340 |
|
| 341 |
this.history.push({ pitch, yaw, t: now });
|
| 342 |
-
|
| 343 |
-
let drop = 0;
|
| 344 |
-
while (drop < this.history.length && this.history[drop].t < cutoff) drop++;
|
| 345 |
-
if (this.history.length - drop > HISTORY_MAX) {
|
| 346 |
-
drop = this.history.length - HISTORY_MAX;
|
| 347 |
-
}
|
| 348 |
-
if (drop > 0) this.history.splice(0, drop);
|
| 349 |
|
| 350 |
this.updateDebug(pitch, yaw, roll);
|
| 351 |
|
|
@@ -419,27 +264,16 @@ export class HeadPoseTracker {
|
|
| 419 |
this.history = [];
|
| 420 |
this.lastEmitTs = 0;
|
| 421 |
}
|
| 422 |
-
}
|
| 423 |
|
| 424 |
-
|
| 425 |
-
if (!landmarks || landmarks.length < 3) return null;
|
| 426 |
-
let minX = 1, maxX = 0, minY = 1, maxY = 0;
|
| 427 |
-
for (const p of landmarks) {
|
| 428 |
-
if (p.x < minX) minX = p.x;
|
| 429 |
-
if (p.x > maxX) maxX = p.x;
|
| 430 |
-
if (p.y < minY) minY = p.y;
|
| 431 |
-
if (p.y > maxY) maxY = p.y;
|
| 432 |
-
}
|
| 433 |
-
const w = maxX - minX;
|
| 434 |
-
const h = maxY - minY;
|
| 435 |
-
if (w <= 0 || h <= 0) return null;
|
| 436 |
-
return Math.sqrt(w * h);
|
| 437 |
}
|
| 438 |
|
| 439 |
-
|
|
|
|
|
|
|
| 440 |
const VELOCITY_START = 15;
|
| 441 |
-
const VELOCITY_END
|
| 442 |
-
const STROKE_GAP_MS
|
| 443 |
|
| 444 |
export class AirWriter {
|
| 445 |
private trajectory: [number, number][] = [];
|
|
@@ -493,9 +327,7 @@ export class AirWriter {
|
|
| 493 |
}
|
| 494 |
}
|
| 495 |
|
| 496 |
-
get strokeActive(): boolean {
|
| 497 |
-
return this.inStroke;
|
| 498 |
-
}
|
| 499 |
|
| 500 |
getCompletedStroke(): [number, number][] | null {
|
| 501 |
const s = this.pendingStroke;
|
|
@@ -503,6 +335,8 @@ export class AirWriter {
|
|
| 503 |
return s;
|
| 504 |
}
|
| 505 |
|
|
|
|
|
|
|
| 506 |
noHand(): void {
|
| 507 |
if (this.inStroke && this.strokeEndTime === 0) {
|
| 508 |
this.strokeEndTime = performance.now();
|
|
|
|
| 1 |
import type { Matrix } from "@mediapipe/tasks-vision";
|
| 2 |
+
import type { Affect, GestureName, MemoryBucket } from "../types";
|
| 3 |
+
|
| 4 |
+
// ββ Affect classification via MediaPipe blendshapes ββββββββββββββββββββββββββ
|
| 5 |
+
|
| 6 |
+
export function classifyAffect(bs: Record<string, number>): Affect {
|
| 7 |
+
const smileLeft = bs["mouthSmileLeft"] ?? 0;
|
| 8 |
+
const smileRight = bs["mouthSmileRight"] ?? 0;
|
| 9 |
+
const browDownL = bs["browDownLeft"] ?? 0;
|
| 10 |
+
const browDownR = bs["browDownRight"] ?? 0;
|
| 11 |
+
const squintL = bs["eyeSquintLeft"] ?? 0;
|
| 12 |
+
const squintR = bs["eyeSquintRight"] ?? 0;
|
| 13 |
+
const jawOpen = bs["jawOpen"] ?? 0;
|
| 14 |
+
const browInnerUp = bs["browInnerUp"] ?? 0;
|
| 15 |
+
|
| 16 |
+
if (jawOpen > 0.4 && browInnerUp > 0.5) return "SURPRISED";
|
| 17 |
+
if (browDownL > 0.4 || browDownR > 0.4) return "FRUSTRATED";
|
| 18 |
+
if (squintL > 0.5 && squintR > 0.5) return "FRUSTRATED";
|
| 19 |
+
if (smileLeft > 0.5 && smileRight > 0.5) return "HAPPY";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
return "NEUTRAL";
|
| 21 |
}
|
| 22 |
|
| 23 |
+
// ββ Gesture label mapping from MediaPipe GestureRecognizer βββββββββββββββββββ
|
| 24 |
+
|
| 25 |
export function mapGestureLabel(label: string): GestureName | null {
|
| 26 |
switch (label) {
|
| 27 |
case "Thumb_Up": return "THUMBS_UP";
|
|
|
|
| 35 |
}
|
| 36 |
}
|
| 37 |
|
| 38 |
+
// ββ Gaze tracker β world-space gaze via head rotation Γ eye blendshapes ββββββ
|
| 39 |
+
//
|
| 40 |
+
// Old approach: absolute iris X/Y position in frame β grid region.
|
| 41 |
+
// Problem: head shifting in frame changes the bucket even if eyes didn't move.
|
| 42 |
+
//
|
| 43 |
+
// New approach:
|
| 44 |
+
// 1. Eye direction in face-local space from blendshapes (head-relative).
|
| 45 |
+
// 2. Rotate into camera space using the facial transformation matrix.
|
| 46 |
+
// 3. Perspective-project to a 2-D screen gaze point.
|
| 47 |
+
// 4. Map that point to the 5 memory buckets with a dwell timer.
|
| 48 |
+
//
|
| 49 |
+
// Bucket layout (matches the 5 regions on the AAC interface):
|
| 50 |
+
//
|
| 51 |
+
// family β medical
|
| 52 |
+
// (top-left) β (top-right)
|
| 53 |
+
// ββββββββββββΌβββββββββββ
|
| 54 |
+
// hobbies β daily_routine
|
| 55 |
+
// (bot-left) β (bot-right)
|
| 56 |
+
// social
|
| 57 |
+
// (centre)
|
| 58 |
+
//
|
| 59 |
+
// If top/bottom buckets appear swapped on your device, set VITE_GAZE_INVERT_Y=true.
|
| 60 |
+
const GAZE_INVERT_Y = import.meta.env.VITE_GAZE_INVERT_Y === "true";
|
| 61 |
+
const GAZE_CENTER = 0.10;
|
| 62 |
+
const GAZE_LATERAL = 0.12;
|
| 63 |
+
const GAZE_VERTICAL = 0.12;
|
| 64 |
+
|
| 65 |
+
function worldGazeXY(
|
| 66 |
matrix: Matrix,
|
| 67 |
bs: Record<string, number>,
|
| 68 |
): { x: number; y: number } {
|
| 69 |
+
const eyeR = ((bs.eyeLookInLeft ?? 0) + (bs.eyeLookOutRight ?? 0)) / 2;
|
| 70 |
+
const eyeL = ((bs.eyeLookOutLeft ?? 0) + (bs.eyeLookInRight ?? 0)) / 2;
|
| 71 |
+
const eyeU = ((bs.eyeLookUpLeft ?? 0) + (bs.eyeLookUpRight ?? 0)) / 2;
|
| 72 |
+
const eyeD = ((bs.eyeLookDownLeft ?? 0) + (bs.eyeLookDownRight ?? 0)) / 2;
|
| 73 |
|
| 74 |
const lx = eyeR - eyeL;
|
| 75 |
const ly = eyeU - eyeD;
|
|
|
|
| 85 |
return { x: cx / fwd, y };
|
| 86 |
}
|
| 87 |
|
| 88 |
+
function gazeToRegion(x: number, y: number): MemoryBucket | null {
|
| 89 |
+
const ax = Math.abs(x), ay = Math.abs(y);
|
| 90 |
+
if (ax < GAZE_CENTER && ay < GAZE_CENTER) return "social";
|
| 91 |
+
if (ax < GAZE_LATERAL && ay < GAZE_VERTICAL) return "social";
|
| 92 |
+
if (x < -GAZE_LATERAL && y > GAZE_VERTICAL) return "family";
|
| 93 |
+
if (x > GAZE_LATERAL && y > GAZE_VERTICAL) return "medical";
|
| 94 |
+
if (x < -GAZE_LATERAL && y < -GAZE_VERTICAL) return "hobbies";
|
| 95 |
+
if (x > GAZE_LATERAL && y < -GAZE_VERTICAL) return "daily_routine";
|
| 96 |
return null;
|
| 97 |
}
|
| 98 |
|
|
|
|
| 101 |
private dwellStart = 0;
|
| 102 |
private dwellThresholdMs: number;
|
| 103 |
private _activeZone: MemoryBucket | null = null;
|
|
|
|
|
|
|
| 104 |
|
| 105 |
constructor(dwellThresholdMs = 1500) {
|
| 106 |
this.dwellThresholdMs = dwellThresholdMs;
|
| 107 |
}
|
| 108 |
|
| 109 |
get activeZone(): MemoryBucket | null {
|
|
|
|
|
|
|
|
|
|
| 110 |
return this._activeZone;
|
| 111 |
}
|
| 112 |
|
| 113 |
process(
|
| 114 |
matrix: Matrix | null,
|
| 115 |
bs: Record<string, number>,
|
|
|
|
| 116 |
): MemoryBucket | null {
|
| 117 |
+
const { x, y } = matrix ? worldGazeXY(matrix, bs) : { x: 0, y: 0 };
|
| 118 |
+
const bucket = matrix ? gazeToRegion(x, y) : null;
|
| 119 |
+
this._activeZone = bucket;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
if (bucket !== this.currentBucket) {
|
| 122 |
this.currentBucket = bucket;
|
|
|
|
| 138 |
this.currentBucket = null;
|
| 139 |
this._activeZone = null;
|
| 140 |
this.dwellStart = 0;
|
|
|
|
| 141 |
}
|
| 142 |
}
|
| 143 |
|
| 144 |
+
// ββ Head-pose tracker using facial transformation matrix ββββββββββββββββββββ
|
| 145 |
+
|
| 146 |
+
export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD" | "HEAD_NOD_DISSATISFIED";
|
| 147 |
+
|
| 148 |
+
export interface HeadDebug {
|
| 149 |
+
pitch: number;
|
| 150 |
+
yaw: number;
|
| 151 |
+
roll: number;
|
| 152 |
+
crossings: number;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
interface AnglePoint { pitch: number; yaw: number; t: number }
|
| 156 |
|
| 157 |
const RAD2DEG = 180 / Math.PI;
|
| 158 |
|
| 159 |
+
function extractAngles(data: Float32Array | number[]): { pitch: number; yaw: number; roll: number } {
|
|
|
|
|
|
|
| 160 |
const r20 = data[2], r21 = data[6], r22 = data[10];
|
| 161 |
const r10 = data[1], r00 = data[0];
|
| 162 |
return {
|
|
|
|
| 166 |
};
|
| 167 |
}
|
| 168 |
|
| 169 |
+
const WINDOW_MS = 1200;
|
| 170 |
+
const REFRACTORY_MS = 2000;
|
| 171 |
+
const NOD_WINDOW_MS = 1000;
|
| 172 |
+
const SHAKE_RANGE_RAD = 0.10;
|
| 173 |
+
const SHAKE_DEADBAND_RAD = 0.03;
|
| 174 |
+
const SHAKE_MIN_REVERSALS = 2;
|
| 175 |
+
const NOD_AMPLITUDE_RAD = 0.15;
|
| 176 |
+
const NOD_SHARP_RAD = 0.28;
|
| 177 |
+
const NOD_RECOVERY_RAD = 0.15;
|
| 178 |
+
const NOD_MAX_YAW_RAD = 0.25;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
export class HeadPoseTracker {
|
| 181 |
private history: AnglePoint[] = [];
|
| 182 |
private lastEmitTs = 0;
|
| 183 |
private lastDebug: HeadDebug = { pitch: 0, yaw: 0, roll: 0, crossings: 0 };
|
| 184 |
|
| 185 |
+
// No-op β angles are self-calibrating relative to the canonical face model.
|
| 186 |
+
calibrate(_landmarks: unknown): void {}
|
| 187 |
+
|
| 188 |
+
process(matrix: Matrix): HeadSignal | null {
|
| 189 |
+
const { pitch, yaw, roll } = extractAngles(matrix.data);
|
| 190 |
const now = performance.now();
|
| 191 |
|
| 192 |
this.history.push({ pitch, yaw, t: now });
|
| 193 |
+
this.history = this.history.filter((p) => p.t >= now - WINDOW_MS);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
this.updateDebug(pitch, yaw, roll);
|
| 196 |
|
|
|
|
| 264 |
this.history = [];
|
| 265 |
this.lastEmitTs = 0;
|
| 266 |
}
|
|
|
|
| 267 |
|
| 268 |
+
get calibrated(): boolean { return true; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
}
|
| 270 |
|
| 271 |
+
// ββ Air-writing stroke collector βββββββββββββββββββββββββββββββββββββββββββββ
|
| 272 |
+
|
| 273 |
+
const INDEX_TIP = 8;
|
| 274 |
const VELOCITY_START = 15;
|
| 275 |
+
const VELOCITY_END = 5;
|
| 276 |
+
const STROKE_GAP_MS = 200;
|
| 277 |
|
| 278 |
export class AirWriter {
|
| 279 |
private trajectory: [number, number][] = [];
|
|
|
|
| 327 |
}
|
| 328 |
}
|
| 329 |
|
| 330 |
+
get strokeActive(): boolean { return this.inStroke; }
|
|
|
|
|
|
|
| 331 |
|
| 332 |
getCompletedStroke(): [number, number][] | null {
|
| 333 |
const s = this.pendingStroke;
|
|
|
|
| 335 |
return s;
|
| 336 |
}
|
| 337 |
|
| 338 |
+
getText(): string { return ""; }
|
| 339 |
+
|
| 340 |
noHand(): void {
|
| 341 |
if (this.inStroke && this.strokeEndTime === 0) {
|
| 342 |
this.strokeEndTime = performance.now();
|
frontend/src/types.ts
CHANGED
|
@@ -18,6 +18,7 @@ export interface SensingState {
|
|
| 18 |
airWrittenText: string;
|
| 19 |
airWritingActive: boolean;
|
| 20 |
headSignal: HeadSignal | null;
|
|
|
|
| 21 |
headDebug: HeadDebug;
|
| 22 |
}
|
| 23 |
|
|
@@ -69,43 +70,10 @@ export interface LatencyLog {
|
|
| 69 |
t_total: number;
|
| 70 |
}
|
| 71 |
|
| 72 |
-
export interface CandidateEval {
|
| 73 |
-
idx: number;
|
| 74 |
-
strategy: string;
|
| 75 |
-
selected: boolean;
|
| 76 |
-
groundedness: number;
|
| 77 |
-
hallucination_rate: number;
|
| 78 |
-
no_evidence: boolean;
|
| 79 |
-
relevance: number;
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
export interface EvalExplain {
|
| 83 |
-
affect?: {
|
| 84 |
-
target: string;
|
| 85 |
-
pos_words: number;
|
| 86 |
-
neg_words: number;
|
| 87 |
-
sentiment: number;
|
| 88 |
-
};
|
| 89 |
-
gesture?: {
|
| 90 |
-
tag: string;
|
| 91 |
-
has_pattern: boolean;
|
| 92 |
-
matched: boolean | null;
|
| 93 |
-
};
|
| 94 |
-
gaze?: {
|
| 95 |
-
bucket: string;
|
| 96 |
-
matched_chunks: number;
|
| 97 |
-
total_chunks: number;
|
| 98 |
-
};
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
export interface EvalScores {
|
| 102 |
groundedness: number;
|
| 103 |
hallucination_rate: number;
|
| 104 |
no_evidence: boolean;
|
| 105 |
-
sentences_total?: number;
|
| 106 |
-
sentences_grounded?: number;
|
| 107 |
-
nli_threshold?: number;
|
| 108 |
-
relevance?: number;
|
| 109 |
t_total_s: number;
|
| 110 |
slo_target_s: number;
|
| 111 |
slo_passed: boolean;
|
|
@@ -114,10 +82,6 @@ export interface EvalScores {
|
|
| 114 |
affect_alignment: number;
|
| 115 |
gesture_alignment: number;
|
| 116 |
gaze_alignment: number;
|
| 117 |
-
candidate_diversity?: number;
|
| 118 |
-
n_candidates?: number;
|
| 119 |
-
candidates_eval?: CandidateEval[];
|
| 120 |
-
explain?: EvalExplain;
|
| 121 |
}
|
| 122 |
|
| 123 |
export type CandidateStrategy =
|
|
|
|
| 18 |
airWrittenText: string;
|
| 19 |
airWritingActive: boolean;
|
| 20 |
headSignal: HeadSignal | null;
|
| 21 |
+
headCalibrated: boolean;
|
| 22 |
headDebug: HeadDebug;
|
| 23 |
}
|
| 24 |
|
|
|
|
| 70 |
t_total: number;
|
| 71 |
}
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
export interface EvalScores {
|
| 74 |
groundedness: number;
|
| 75 |
hallucination_rate: number;
|
| 76 |
no_evidence: boolean;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
t_total_s: number;
|
| 78 |
slo_target_s: number;
|
| 79 |
slo_passed: boolean;
|
|
|
|
| 82 |
affect_alignment: number;
|
| 83 |
gesture_alignment: number;
|
| 84 |
gaze_alignment: number;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
}
|
| 86 |
|
| 87 |
export type CandidateStrategy =
|