Spaces:
Sleeping
Sleeping
Commit Β·
9a82bce
1
Parent(s): 8539a00
head gestures
Browse files- frontend/src/App.tsx +0 -11
- frontend/src/components/SensingStatus.tsx +6 -15
- frontend/src/hooks/useSensing.ts +9 -17
- frontend/src/lib/sensing.ts +103 -175
- frontend/src/types.ts +5 -6
frontend/src/App.tsx
CHANGED
|
@@ -40,7 +40,6 @@ function App() {
|
|
| 40 |
processFrame,
|
| 41 |
clearAirWrittenText,
|
| 42 |
clearHeadSignal,
|
| 43 |
-
calibrateHeadPose,
|
| 44 |
resetCalibration,
|
| 45 |
} = useSensing();
|
| 46 |
|
|
@@ -101,16 +100,6 @@ function App() {
|
|
| 101 |
</label>
|
| 102 |
<WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
|
| 103 |
<SensingStatus sensing={sensing} webcamActive={active} />
|
| 104 |
-
<button
|
| 105 |
-
type="button"
|
| 106 |
-
className="calibrate-btn"
|
| 107 |
-
disabled={!active}
|
| 108 |
-
onClick={() => calibrateHeadPose()}
|
| 109 |
-
>
|
| 110 |
-
{sensing.headCalibrated
|
| 111 |
-
? "Re-calibrate head pose"
|
| 112 |
-
: "Calibrate head pose"}
|
| 113 |
-
</button>
|
| 114 |
</div>
|
| 115 |
|
| 116 |
<div className="sidebar-section">
|
|
|
|
| 40 |
processFrame,
|
| 41 |
clearAirWrittenText,
|
| 42 |
clearHeadSignal,
|
|
|
|
| 43 |
resetCalibration,
|
| 44 |
} = useSensing();
|
| 45 |
|
|
|
|
| 100 |
</label>
|
| 101 |
<WebcamSensing videoRef={videoRef} active={active} error={error || initError} />
|
| 102 |
<SensingStatus sensing={sensing} webcamActive={active} />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
</div>
|
| 104 |
|
| 105 |
<div className="sidebar-section">
|
frontend/src/components/SensingStatus.tsx
CHANGED
|
@@ -40,24 +40,15 @@ export function SensingStatus({ sensing, webcamActive }: Props) {
|
|
| 40 |
</div>
|
| 41 |
<div className="sensing-row">
|
| 42 |
<span className="sensing-label">Head</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
<span className="sensing-value">
|
| 44 |
-
{sensing.
|
| 45 |
-
|
| 46 |
-
: "not calibrated"}
|
| 47 |
</span>
|
| 48 |
</div>
|
| 49 |
-
{sensing.headCalibrated && (
|
| 50 |
-
<div className="sensing-row sensing-debug">
|
| 51 |
-
<span className="sensing-label"> β³ Ξx/Ξy</span>
|
| 52 |
-
<span className="sensing-value">
|
| 53 |
-
{sensing.headDebug.dx.toFixed(3)} / {sensing.headDebug.dy.toFixed(3)}
|
| 54 |
-
{" "}
|
| 55 |
-
(peak {sensing.headDebug.maxAbsDx.toFixed(3)}/
|
| 56 |
-
{sensing.headDebug.maxAbsDy.toFixed(3)},{" "}
|
| 57 |
-
x{sensing.headDebug.crossings})
|
| 58 |
-
</span>
|
| 59 |
-
</div>
|
| 60 |
-
)}
|
| 61 |
<div className="sensing-row">
|
| 62 |
<span className="sensing-label">Air-writing</span>
|
| 63 |
<span className="sensing-value">
|
|
|
|
| 40 |
</div>
|
| 41 |
<div className="sensing-row">
|
| 42 |
<span className="sensing-label">Head</span>
|
| 43 |
+
<span className="sensing-value">{sensing.headSignal ?? "steady"}</span>
|
| 44 |
+
</div>
|
| 45 |
+
<div className="sensing-row sensing-debug">
|
| 46 |
+
<span className="sensing-label"> β³ p/y/r</span>
|
| 47 |
<span className="sensing-value">
|
| 48 |
+
{sensing.headDebug.pitch}Β° / {sensing.headDebug.yaw}Β° / {sensing.headDebug.roll}Β°
|
| 49 |
+
{" "}(x{sensing.headDebug.crossings})
|
|
|
|
| 50 |
</span>
|
| 51 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
<div className="sensing-row">
|
| 53 |
<span className="sensing-label">Air-writing</span>
|
| 54 |
<span className="sensing-value">
|
frontend/src/hooks/useSensing.ts
CHANGED
|
@@ -27,7 +27,6 @@ export function useSensing() {
|
|
| 27 |
const airWriterRef = useRef(new AirWriter());
|
| 28 |
const inkBusyRef = useRef(false);
|
| 29 |
const headTrackerRef = useRef(new HeadPoseTracker());
|
| 30 |
-
const calibratePendingRef = useRef(false);
|
| 31 |
const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
|
| 32 |
const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
|
| 33 |
const affectCountRef = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
|
|
@@ -42,7 +41,7 @@ export function useSensing() {
|
|
| 42 |
airWritingActive: false,
|
| 43 |
headSignal: null,
|
| 44 |
headCalibrated: false,
|
| 45 |
-
headDebug: {
|
| 46 |
});
|
| 47 |
|
| 48 |
// Cleanup MediaPipe resources on unmount
|
|
@@ -73,7 +72,7 @@ export function useSensing() {
|
|
| 73 |
runningMode: "VIDEO",
|
| 74 |
numFaces: 1,
|
| 75 |
outputFaceBlendshapes: true,
|
| 76 |
-
outputFacialTransformationMatrixes:
|
| 77 |
}
|
| 78 |
);
|
| 79 |
gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(
|
|
@@ -114,11 +113,6 @@ export function useSensing() {
|
|
| 114 |
if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
|
| 115 |
const landmarks = faceResult.faceLandmarks[0];
|
| 116 |
|
| 117 |
-
if (calibratePendingRef.current) {
|
| 118 |
-
headTrackerRef.current.calibrate(landmarks);
|
| 119 |
-
calibratePendingRef.current = false;
|
| 120 |
-
}
|
| 121 |
-
|
| 122 |
if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
|
| 123 |
const bs: Record<string, number> = {};
|
| 124 |
for (const cat of faceResult.faceBlendshapes[0].categories) {
|
|
@@ -128,8 +122,12 @@ export function useSensing() {
|
|
| 128 |
}
|
| 129 |
|
| 130 |
gazeBucket = gazeTrackerRef.current.process(landmarks);
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
}
|
| 134 |
|
| 135 |
let gestureTag: SensingState["gestureTag"] = null;
|
|
@@ -210,11 +208,6 @@ export function useSensing() {
|
|
| 210 |
setSensing((prev) => ({ ...prev, headSignal: null }));
|
| 211 |
}, []);
|
| 212 |
|
| 213 |
-
const calibrateHeadPose = useCallback(() => {
|
| 214 |
-
calibratePendingRef.current = true;
|
| 215 |
-
setSensing((prev) => ({ ...prev, headSignal: null }));
|
| 216 |
-
}, []);
|
| 217 |
-
|
| 218 |
const resetCalibration = useCallback(() => {
|
| 219 |
gestureCountRef.current = { tag: null, count: 0 };
|
| 220 |
affectCountRef.current = { affect: null, count: 0 };
|
|
@@ -228,7 +221,7 @@ export function useSensing() {
|
|
| 228 |
airWritingActive: false,
|
| 229 |
headSignal: null,
|
| 230 |
headCalibrated: false,
|
| 231 |
-
headDebug: {
|
| 232 |
});
|
| 233 |
}, []);
|
| 234 |
|
|
@@ -240,7 +233,6 @@ export function useSensing() {
|
|
| 240 |
processFrame,
|
| 241 |
clearAirWrittenText,
|
| 242 |
clearHeadSignal,
|
| 243 |
-
calibrateHeadPose,
|
| 244 |
resetCalibration,
|
| 245 |
};
|
| 246 |
}
|
|
|
|
| 27 |
const airWriterRef = useRef(new AirWriter());
|
| 28 |
const inkBusyRef = useRef(false);
|
| 29 |
const headTrackerRef = useRef(new HeadPoseTracker());
|
|
|
|
| 30 |
const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
|
| 31 |
const gestureCountRef = useRef<{ tag: SensingState["gestureTag"]; count: number }>({ tag: null, count: 0 });
|
| 32 |
const affectCountRef = useRef<{ affect: SensingState["affect"]; count: number }>({ affect: null, count: 0 });
|
|
|
|
| 41 |
airWritingActive: false,
|
| 42 |
headSignal: null,
|
| 43 |
headCalibrated: false,
|
| 44 |
+
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 45 |
});
|
| 46 |
|
| 47 |
// Cleanup MediaPipe resources on unmount
|
|
|
|
| 72 |
runningMode: "VIDEO",
|
| 73 |
numFaces: 1,
|
| 74 |
outputFaceBlendshapes: true,
|
| 75 |
+
outputFacialTransformationMatrixes: true,
|
| 76 |
}
|
| 77 |
);
|
| 78 |
gestureRecognizerRef.current = await GestureRecognizer.createFromOptions(
|
|
|
|
| 113 |
if (faceResult.faceLandmarks && faceResult.faceLandmarks.length > 0) {
|
| 114 |
const landmarks = faceResult.faceLandmarks[0];
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if (faceResult.faceBlendshapes && faceResult.faceBlendshapes.length > 0) {
|
| 117 |
const bs: Record<string, number> = {};
|
| 118 |
for (const cat of faceResult.faceBlendshapes[0].categories) {
|
|
|
|
| 122 |
}
|
| 123 |
|
| 124 |
gazeBucket = gazeTrackerRef.current.process(landmarks);
|
| 125 |
+
|
| 126 |
+
const matrix = faceResult.facialTransformationMatrixes?.[0];
|
| 127 |
+
if (matrix) {
|
| 128 |
+
headSignal = headTrackerRef.current.process(matrix);
|
| 129 |
+
headDebugRef.current = headTrackerRef.current.debug;
|
| 130 |
+
}
|
| 131 |
}
|
| 132 |
|
| 133 |
let gestureTag: SensingState["gestureTag"] = null;
|
|
|
|
| 208 |
setSensing((prev) => ({ ...prev, headSignal: null }));
|
| 209 |
}, []);
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
const resetCalibration = useCallback(() => {
|
| 212 |
gestureCountRef.current = { tag: null, count: 0 };
|
| 213 |
affectCountRef.current = { affect: null, count: 0 };
|
|
|
|
| 221 |
airWritingActive: false,
|
| 222 |
headSignal: null,
|
| 223 |
headCalibrated: false,
|
| 224 |
+
headDebug: { pitch: 0, yaw: 0, roll: 0, crossings: 0 },
|
| 225 |
});
|
| 226 |
}, []);
|
| 227 |
|
|
|
|
| 233 |
processFrame,
|
| 234 |
clearAirWrittenText,
|
| 235 |
clearHeadSignal,
|
|
|
|
| 236 |
resetCalibration,
|
| 237 |
};
|
| 238 |
}
|
frontend/src/lib/sensing.ts
CHANGED
|
@@ -105,225 +105,153 @@ export class GazeTracker {
|
|
| 105 |
}
|
| 106 |
}
|
| 107 |
|
| 108 |
-
// ββ Head-pose tracker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
export
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
x: number;
|
| 116 |
-
y: number;
|
| 117 |
-
t: number;
|
| 118 |
}
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
}
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
export class HeadPoseTracker {
|
| 129 |
-
private
|
| 130 |
-
private neutralY: number | null = null;
|
| 131 |
-
private history: NosePoint[] = [];
|
| 132 |
private lastEmitTs = 0;
|
| 133 |
-
private lastDebug: HeadDebug = {
|
| 134 |
-
dx: 0,
|
| 135 |
-
dy: 0,
|
| 136 |
-
maxAbsDx: 0,
|
| 137 |
-
maxAbsDy: 0,
|
| 138 |
-
crossings: 0,
|
| 139 |
-
};
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
private static SHAKE_MIN_CROSSINGS = 3;
|
| 145 |
-
// Per-frame jitter below this magnitude is ignored when counting side
|
| 146 |
-
// crossings, so micro-fidgets near neutral can't rack up false crossings.
|
| 147 |
-
private static SHAKE_DEADBAND = 0.005;
|
| 148 |
-
private static NOD_DROP = 0.06;
|
| 149 |
-
private static NOD_WINDOW_MS = 600;
|
| 150 |
-
// Reject "nod" when horizontal motion exceeds this β it's a shake/sway.
|
| 151 |
-
private static NOD_MAX_HORIZONTAL = 0.015;
|
| 152 |
-
// Recovery: head must come back to within this of neutral.
|
| 153 |
-
private static NOD_RECOVERY = 0.015;
|
| 154 |
-
// The drop must start from near-neutral (not from a tilted resting pose).
|
| 155 |
-
private static NOD_START_THRESHOLD = 0.015;
|
| 156 |
-
// Minimum frames between drop start and peak β guards against single-frame
|
| 157 |
-
// landmark glitches that look like an instantaneous jerk.
|
| 158 |
-
private static NOD_MIN_DROP_FRAMES = 3;
|
| 159 |
-
// Minimum frames between peak and recovery β same reason, going up.
|
| 160 |
-
private static NOD_MIN_RECOVERY_FRAMES = 2;
|
| 161 |
-
|
| 162 |
-
calibrate(landmarks: { x: number; y: number }[]): void {
|
| 163 |
-
if (!landmarks[NOSE_TIP]) return;
|
| 164 |
-
this.neutralX = landmarks[NOSE_TIP].x;
|
| 165 |
-
this.neutralY = landmarks[NOSE_TIP].y;
|
| 166 |
-
this.history = [];
|
| 167 |
-
this.lastEmitTs = 0;
|
| 168 |
-
}
|
| 169 |
-
|
| 170 |
-
process(landmarks: { x: number; y: number }[]): HeadSignal | null {
|
| 171 |
-
if (!landmarks[NOSE_TIP]) return null;
|
| 172 |
-
if (this.neutralX === null || this.neutralY === null) return null;
|
| 173 |
|
|
|
|
|
|
|
| 174 |
const now = performance.now();
|
| 175 |
-
const nose = landmarks[NOSE_TIP];
|
| 176 |
-
this.history.push({ x: nose.x, y: nose.y, t: now });
|
| 177 |
-
const cutoff = now - HeadPoseTracker.WINDOW_MS;
|
| 178 |
-
this.history = this.history.filter((p) => p.t >= cutoff);
|
| 179 |
|
| 180 |
-
this.
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
-
if (now - this.lastEmitTs <
|
| 183 |
if (this.history.length < 6) return null;
|
| 184 |
|
| 185 |
const shake = this.detectShake();
|
| 186 |
-
if (shake) {
|
| 187 |
-
this.lastEmitTs = now;
|
| 188 |
-
return shake;
|
| 189 |
-
}
|
| 190 |
|
| 191 |
const nod = this.detectNod(now);
|
| 192 |
-
if (nod) {
|
| 193 |
-
this.lastEmitTs = now;
|
| 194 |
-
return nod;
|
| 195 |
-
}
|
| 196 |
|
| 197 |
return null;
|
| 198 |
}
|
| 199 |
|
| 200 |
-
private updateDebug(
|
| 201 |
-
if (this.neutralX === null || this.neutralY === null) return;
|
| 202 |
-
let maxAbsDx = 0;
|
| 203 |
-
let maxAbsDy = 0;
|
| 204 |
let crossings = 0;
|
| 205 |
-
let
|
| 206 |
-
for (
|
| 207 |
-
const
|
| 208 |
-
|
| 209 |
-
const
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
if (absDx < HeadPoseTracker.SHAKE_DEADBAND) continue;
|
| 213 |
-
const side = dx > 0 ? 1 : -1;
|
| 214 |
-
if (prevSide !== 0 && side !== prevSide) crossings += 1;
|
| 215 |
-
prevSide = side;
|
| 216 |
}
|
| 217 |
this.lastDebug = {
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
maxAbsDy,
|
| 222 |
crossings,
|
| 223 |
};
|
| 224 |
}
|
| 225 |
|
| 226 |
-
get debug(): HeadDebug {
|
| 227 |
-
return this.lastDebug;
|
| 228 |
-
}
|
| 229 |
-
|
| 230 |
private detectShake(): HeadSignal | null {
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
const
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
const side = dx > 0 ? 1 : -1;
|
| 243 |
-
if (prevSide !== 0 && side !== prevSide) crossings += 1;
|
| 244 |
-
prevSide = side;
|
| 245 |
}
|
| 246 |
-
|
| 247 |
-
crossings >= HeadPoseTracker.SHAKE_MIN_CROSSINGS &&
|
| 248 |
-
maxAbs >= HeadPoseTracker.SHAKE_AMPLITUDE
|
| 249 |
-
) {
|
| 250 |
-
return "HEAD_SHAKE";
|
| 251 |
-
}
|
| 252 |
-
return null;
|
| 253 |
}
|
| 254 |
|
| 255 |
private detectNod(now: number): HeadSignal | null {
|
| 256 |
-
|
| 257 |
-
const windowStart = now - HeadPoseTracker.NOD_WINDOW_MS;
|
| 258 |
-
const recent = this.history.filter((p) => p.t >= windowStart);
|
| 259 |
if (recent.length < 6) return null;
|
| 260 |
|
| 261 |
-
// Reject if there's significant
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
maxAbsDx = Math.max(maxAbsDx, Math.abs(p.x - this.neutralX));
|
| 265 |
-
}
|
| 266 |
-
if (maxAbsDx > HeadPoseTracker.NOD_MAX_HORIZONTAL) return null;
|
| 267 |
-
|
| 268 |
-
// Find the peak (lowest head position) within the window.
|
| 269 |
-
let maxDrop = 0;
|
| 270 |
-
let peakIdx = -1;
|
| 271 |
-
for (let i = 0; i < recent.length; i++) {
|
| 272 |
-
const drop = recent[i].y - this.neutralY;
|
| 273 |
-
if (drop > maxDrop) {
|
| 274 |
-
maxDrop = drop;
|
| 275 |
-
peakIdx = i;
|
| 276 |
-
}
|
| 277 |
-
}
|
| 278 |
-
if (maxDrop < HeadPoseTracker.NOD_DROP || peakIdx < 0) return null;
|
| 279 |
-
|
| 280 |
-
// Find a near-neutral start before the peak β a nod is a deliberate
|
| 281 |
-
// motion *from* neutral, not a recovery from an already-tilted pose.
|
| 282 |
-
let startIdx = -1;
|
| 283 |
-
for (let i = peakIdx - 1; i >= 0; i--) {
|
| 284 |
-
if (
|
| 285 |
-
recent[i].y - this.neutralY <=
|
| 286 |
-
HeadPoseTracker.NOD_START_THRESHOLD
|
| 287 |
-
) {
|
| 288 |
-
startIdx = i;
|
| 289 |
-
break;
|
| 290 |
-
}
|
| 291 |
-
}
|
| 292 |
-
if (
|
| 293 |
-
startIdx < 0 ||
|
| 294 |
-
peakIdx - startIdx < HeadPoseTracker.NOD_MIN_DROP_FRAMES
|
| 295 |
-
) {
|
| 296 |
-
return null;
|
| 297 |
-
}
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
if (
|
| 308 |
-
recoveryIdx < 0 ||
|
| 309 |
-
recoveryIdx - peakIdx < HeadPoseTracker.NOD_MIN_RECOVERY_FRAMES
|
| 310 |
-
) {
|
| 311 |
-
return null;
|
| 312 |
-
}
|
| 313 |
|
| 314 |
-
return "HEAD_NOD_DISSATISFIED";
|
| 315 |
}
|
| 316 |
|
|
|
|
|
|
|
| 317 |
reset(): void {
|
| 318 |
-
this.neutralX = null;
|
| 319 |
-
this.neutralY = null;
|
| 320 |
this.history = [];
|
| 321 |
this.lastEmitTs = 0;
|
| 322 |
}
|
| 323 |
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
}
|
| 327 |
}
|
| 328 |
|
| 329 |
// ββ Air-writing stroke collector (recognition via Gemini Vision) βββββββββββββ
|
|
|
|
| 105 |
}
|
| 106 |
}
|
| 107 |
|
| 108 |
+
// ββ Head-pose tracker using facial transformation matrix ββββββββββββββββββββ
|
| 109 |
+
//
|
| 110 |
+
// MediaPipe FaceLandmarker produces a 4Γ4 column-major transformation matrix
|
| 111 |
+
// that encodes the 3-D rotation of the canonical face model in camera space.
|
| 112 |
+
// We decompose it to Euler angles (ZYX convention) β no calibration step needed
|
| 113 |
+
// because the angles are always relative to the canonical neutral pose.
|
| 114 |
+
//
|
| 115 |
+
// Signals emitted:
|
| 116 |
+
// HEAD_SHAKE β yaw oscillates Β±NΒ° (left/right), "no"
|
| 117 |
+
// HEAD_NOD β gentle pitch dip + recovery, "yes"
|
| 118 |
+
// HEAD_NOD_DISSATISFIED β sharp/large pitch dip + recovery, discomfort
|
| 119 |
+
|
| 120 |
+
export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD" | "HEAD_NOD_DISSATISFIED";
|
| 121 |
|
| 122 |
+
export interface HeadDebug {
|
| 123 |
+
pitch: number; // degrees β nod angle
|
| 124 |
+
yaw: number; // degrees β shake angle
|
| 125 |
+
roll: number; // degrees β tilt angle
|
| 126 |
+
crossings: number; // yaw direction reversals in current window
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
+
interface AnglePoint { pitch: number; yaw: number; t: number }
|
| 130 |
+
|
| 131 |
+
const RAD2DEG = 180 / Math.PI;
|
| 132 |
+
|
| 133 |
+
function extractAngles(data: Float32Array): { pitch: number; yaw: number; roll: number } {
|
| 134 |
+
// Column-major 4Γ4: R[row][col] = data[col*4 + row]
|
| 135 |
+
// ZYX Euler (R = RzΒ·RyΒ·Rx):
|
| 136 |
+
// pitch (X, nod) = atan2(R[2][1], R[2][2]) = atan2(data[6], data[10])
|
| 137 |
+
// yaw (Y, shake) = atan2(βR[2][0], β(R[2][1]Β²+R[2][2]Β²))
|
| 138 |
+
// roll (Z, tilt) = atan2(R[1][0], R[0][0]) = atan2(data[1], data[0])
|
| 139 |
+
const r20 = data[2], r21 = data[6], r22 = data[10];
|
| 140 |
+
const r10 = data[1], r00 = data[0];
|
| 141 |
+
return {
|
| 142 |
+
pitch: Math.atan2(r21, r22),
|
| 143 |
+
yaw: Math.atan2(-r20, Math.sqrt(r21 * r21 + r22 * r22)),
|
| 144 |
+
roll: Math.atan2(r10, r00),
|
| 145 |
+
};
|
| 146 |
}
|
| 147 |
|
| 148 |
+
// Thresholds (radians unless noted)
|
| 149 |
+
const WINDOW_MS = 1200;
|
| 150 |
+
const REFRACTORY_MS = 2000;
|
| 151 |
+
const NOD_WINDOW_MS = 1000;
|
| 152 |
+
|
| 153 |
+
const SHAKE_RANGE_RAD = 0.30; // total yaw swing needed (~17Β°)
|
| 154 |
+
const SHAKE_DEADBAND_RAD = 0.05; // ignore jitter below ~3Β°
|
| 155 |
+
const SHAKE_MIN_REVERSALS = 3;
|
| 156 |
+
|
| 157 |
+
const NOD_AMPLITUDE_RAD = 0.15; // ~8.6Β° β min pitch deviation for any nod
|
| 158 |
+
const NOD_SHARP_RAD = 0.28; // ~16Β° β above this = DISSATISFIED
|
| 159 |
+
const NOD_RECOVERY_RAD = 0.15; // must return within ~8.6Β° of start pitch
|
| 160 |
+
const NOD_MAX_YAW_RAD = 0.25; // reject if too much lateral (~14Β°)
|
| 161 |
+
|
| 162 |
export class HeadPoseTracker {
|
| 163 |
+
private history: AnglePoint[] = [];
|
|
|
|
|
|
|
| 164 |
private lastEmitTs = 0;
|
| 165 |
+
private lastDebug: HeadDebug = { pitch: 0, yaw: 0, roll: 0, crossings: 0 };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
+
// No-op β angles are self-calibrating relative to the canonical face model.
|
| 168 |
+
// Kept so existing callers (calibrateHeadPose button) don't break.
|
| 169 |
+
calibrate(_landmarks: unknown): void {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
process(matrix: { data: Float32Array }): HeadSignal | null {
|
| 172 |
+
const { pitch, yaw, roll } = extractAngles(matrix.data);
|
| 173 |
const now = performance.now();
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
+
this.history.push({ pitch, yaw, t: now });
|
| 176 |
+
this.history = this.history.filter((p) => p.t >= now - WINDOW_MS);
|
| 177 |
+
|
| 178 |
+
this.updateDebug(pitch, yaw, roll);
|
| 179 |
|
| 180 |
+
if (now - this.lastEmitTs < REFRACTORY_MS) return null;
|
| 181 |
if (this.history.length < 6) return null;
|
| 182 |
|
| 183 |
const shake = this.detectShake();
|
| 184 |
+
if (shake) { this.lastEmitTs = now; return shake; }
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
const nod = this.detectNod(now);
|
| 187 |
+
if (nod) { this.lastEmitTs = now; return nod; }
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
return null;
|
| 190 |
}
|
| 191 |
|
| 192 |
+
private updateDebug(pitch: number, yaw: number, roll: number): void {
|
|
|
|
|
|
|
|
|
|
| 193 |
let crossings = 0;
|
| 194 |
+
let prevDir = 0;
|
| 195 |
+
for (let i = 1; i < this.history.length; i++) {
|
| 196 |
+
const diff = this.history[i].yaw - this.history[i - 1].yaw;
|
| 197 |
+
if (Math.abs(diff) < SHAKE_DEADBAND_RAD) continue;
|
| 198 |
+
const dir = diff > 0 ? 1 : -1;
|
| 199 |
+
if (prevDir !== 0 && dir !== prevDir) crossings++;
|
| 200 |
+
prevDir = dir;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
}
|
| 202 |
this.lastDebug = {
|
| 203 |
+
pitch: +(pitch * RAD2DEG).toFixed(1),
|
| 204 |
+
yaw: +(yaw * RAD2DEG).toFixed(1),
|
| 205 |
+
roll: +(roll * RAD2DEG).toFixed(1),
|
|
|
|
| 206 |
crossings,
|
| 207 |
};
|
| 208 |
}
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
private detectShake(): HeadSignal | null {
|
| 211 |
+
const yaws = this.history.map((p) => p.yaw);
|
| 212 |
+
const range = Math.max(...yaws) - Math.min(...yaws);
|
| 213 |
+
if (range < SHAKE_RANGE_RAD) return null;
|
| 214 |
+
|
| 215 |
+
let reversals = 0, prevDir = 0;
|
| 216 |
+
for (let i = 1; i < yaws.length; i++) {
|
| 217 |
+
const diff = yaws[i] - yaws[i - 1];
|
| 218 |
+
if (Math.abs(diff) < SHAKE_DEADBAND_RAD) continue;
|
| 219 |
+
const dir = diff > 0 ? 1 : -1;
|
| 220 |
+
if (prevDir !== 0 && dir !== prevDir) reversals++;
|
| 221 |
+
prevDir = dir;
|
|
|
|
|
|
|
|
|
|
| 222 |
}
|
| 223 |
+
return reversals >= SHAKE_MIN_REVERSALS ? "HEAD_SHAKE" : null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
}
|
| 225 |
|
| 226 |
private detectNod(now: number): HeadSignal | null {
|
| 227 |
+
const recent = this.history.filter((p) => p.t >= now - NOD_WINDOW_MS);
|
|
|
|
|
|
|
| 228 |
if (recent.length < 6) return null;
|
| 229 |
|
| 230 |
+
// Reject if there's significant lateral motion β it's a shake, not a nod.
|
| 231 |
+
const yawRange = Math.max(...recent.map((p) => Math.abs(p.yaw)));
|
| 232 |
+
if (yawRange > NOD_MAX_YAW_RAD) return null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
const pitches = recent.map((p) => p.pitch);
|
| 235 |
+
const startPitch = pitches[0];
|
| 236 |
+
const maxDev = Math.max(...pitches.map((p) => Math.abs(p - startPitch)));
|
| 237 |
+
if (maxDev < NOD_AMPLITUDE_RAD) return null;
|
| 238 |
+
|
| 239 |
+
// Must recover back near the start pitch.
|
| 240 |
+
const lastPitch = pitches[pitches.length - 1];
|
| 241 |
+
if (Math.abs(lastPitch - startPitch) >= NOD_RECOVERY_RAD) return null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
+
return maxDev >= NOD_SHARP_RAD ? "HEAD_NOD_DISSATISFIED" : "HEAD_NOD";
|
| 244 |
}
|
| 245 |
|
| 246 |
+
get debug(): HeadDebug { return this.lastDebug; }
|
| 247 |
+
|
| 248 |
reset(): void {
|
|
|
|
|
|
|
| 249 |
this.history = [];
|
| 250 |
this.lastEmitTs = 0;
|
| 251 |
}
|
| 252 |
|
| 253 |
+
// Always true β no manual calibration step required with the matrix approach.
|
| 254 |
+
get calibrated(): boolean { return true; }
|
|
|
|
| 255 |
}
|
| 256 |
|
| 257 |
// ββ Air-writing stroke collector (recognition via Gemini Vision) βββββββββββββ
|
frontend/src/types.ts
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
export type Affect = "HAPPY" | "FRUSTRATED" | "NEUTRAL" | "SURPRISED";
|
| 2 |
export type GestureName = "THUMBS_UP" | "THUMBS_DOWN" | "POINTING_UP" | "CLOSED_FIST" | "OPEN_PALM" | "VICTORY" | "I_LOVE_YOU";
|
| 3 |
export type MemoryBucket = "family" | "medical" | "hobbies" | "daily_routine" | "social";
|
| 4 |
-
export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD_DISSATISFIED";
|
| 5 |
|
| 6 |
export interface HeadDebug {
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
crossings: number;
|
| 12 |
}
|
| 13 |
|
| 14 |
export interface SensingState {
|
|
|
|
| 1 |
export type Affect = "HAPPY" | "FRUSTRATED" | "NEUTRAL" | "SURPRISED";
|
| 2 |
export type GestureName = "THUMBS_UP" | "THUMBS_DOWN" | "POINTING_UP" | "CLOSED_FIST" | "OPEN_PALM" | "VICTORY" | "I_LOVE_YOU";
|
| 3 |
export type MemoryBucket = "family" | "medical" | "hobbies" | "daily_routine" | "social";
|
| 4 |
+
export type HeadSignal = "HEAD_SHAKE" | "HEAD_NOD" | "HEAD_NOD_DISSATISFIED";
|
| 5 |
|
| 6 |
export interface HeadDebug {
|
| 7 |
+
pitch: number; // degrees β nod angle
|
| 8 |
+
yaw: number; // degrees β shake angle
|
| 9 |
+
roll: number; // degrees β tilt angle
|
| 10 |
+
crossings: number; // yaw direction reversals in current window
|
|
|
|
| 11 |
}
|
| 12 |
|
| 13 |
export interface SensingState {
|