Spaces:
Sleeping
Sleeping
Commit ·
8539a00
1
Parent(s): 56a15bc
air-writing
Browse files- .env.example +9 -0
- backend/api/main.py +65 -0
- backend/config/settings.py +6 -0
- frontend/src/components/SensingStatus.tsx +10 -6
- frontend/src/hooks/useSensing.ts +34 -9
- frontend/src/lib/inkRecognizer.ts +68 -0
- frontend/src/lib/sensing.ts +26 -97
- frontend/src/types.ts +1 -0
- frontend/vite.config.ts +2 -0
.env.example
CHANGED
|
@@ -23,3 +23,12 @@ THINKING_MODE=off
|
|
| 23 |
THINKING_TOKEN_BUDGET=4096
|
| 24 |
|
| 25 |
FALLBACK_LATENCY_THRESHOLD=3.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
THINKING_TOKEN_BUDGET=4096
|
| 24 |
|
| 25 |
FALLBACK_LATENCY_THRESHOLD=3.5
|
| 26 |
+
|
| 27 |
+
# Vision model used by /ink/recognize (needs image_url support).
|
| 28 |
+
INK_VISION_MODEL=gemini-2.0-flash
|
| 29 |
+
INK_VISION_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/
|
| 30 |
+
INK_VISION_API_KEY=
|
| 31 |
+
|
| 32 |
+
# Frontend flags (VITE_ prefix required for Vite to expose them to the browser).
|
| 33 |
+
# Set to "false" to disable air-writing stroke capture and Gemini ink recognition.
|
| 34 |
+
VITE_AIRWRITING_ENABLED=false
|
backend/api/main.py
CHANGED
|
@@ -5,6 +5,7 @@ import re
|
|
| 5 |
import threading
|
| 6 |
import time
|
| 7 |
from collections import OrderedDict
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
|
| 10 |
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
|
@@ -979,6 +980,70 @@ def submit_rating(req: RatingRequest):
|
|
| 979 |
return {"status": "ok"}
|
| 980 |
|
| 981 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 982 |
# Serve React frontend — must be last so API routes take priority
|
| 983 |
_frontend_dist = Path(__file__).parent.parent.parent / "frontend" / "dist"
|
| 984 |
if _frontend_dist.exists():
|
|
|
|
| 5 |
import threading
|
| 6 |
import time
|
| 7 |
from collections import OrderedDict
|
| 8 |
+
from functools import lru_cache
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
|
|
|
| 980 |
return {"status": "ok"}
|
| 981 |
|
| 982 |
|
| 983 |
+
class InkRecognizeRequest(BaseModel):
|
| 984 |
+
image_base64: str
|
| 985 |
+
|
| 986 |
+
|
| 987 |
+
@lru_cache(maxsize=1)
|
| 988 |
+
def _get_vision_client():
|
| 989 |
+
from openai import OpenAI as _OpenAI
|
| 990 |
+
return _OpenAI(
|
| 991 |
+
base_url=settings.ink_vision_base_url,
|
| 992 |
+
api_key=settings.ink_vision_api_key or "unused",
|
| 993 |
+
)
|
| 994 |
+
|
| 995 |
+
|
| 996 |
+
@app.post("/ink/recognize")
|
| 997 |
+
def ink_recognize(req: InkRecognizeRequest):
|
| 998 |
+
if not req.image_base64:
|
| 999 |
+
return {"text": ""}
|
| 1000 |
+
if not settings.ink_vision_api_key:
|
| 1001 |
+
_log.warning("/ink/recognize called but INK_VISION_API_KEY is not set")
|
| 1002 |
+
raise HTTPException(status_code=503, detail="INK_VISION_API_KEY not configured")
|
| 1003 |
+
try:
|
| 1004 |
+
client = _get_vision_client()
|
| 1005 |
+
response = client.chat.completions.create(
|
| 1006 |
+
model=settings.ink_vision_model,
|
| 1007 |
+
messages=[
|
| 1008 |
+
{
|
| 1009 |
+
"role": "user",
|
| 1010 |
+
"content": [
|
| 1011 |
+
{
|
| 1012 |
+
"type": "image_url",
|
| 1013 |
+
"image_url": {
|
| 1014 |
+
"url": f"data:image/png;base64,{req.image_base64}"
|
| 1015 |
+
},
|
| 1016 |
+
},
|
| 1017 |
+
{
|
| 1018 |
+
"type": "text",
|
| 1019 |
+
# /no_think suppresses Qwen3 chain-of-thought so the
|
| 1020 |
+
# answer isn't buried inside <think> tags.
|
| 1021 |
+
"text": (
|
| 1022 |
+
"/no_think\n"
|
| 1023 |
+
"This is a single handwritten character or short word "
|
| 1024 |
+
"drawn in the air. Reply with ONLY the character or "
|
| 1025 |
+
"word, nothing else."
|
| 1026 |
+
),
|
| 1027 |
+
},
|
| 1028 |
+
],
|
| 1029 |
+
}
|
| 1030 |
+
],
|
| 1031 |
+
# 512 gives thinking models room to emit <think>…</think> + the answer
|
| 1032 |
+
# before being cut off; the answer itself is stripped out below.
|
| 1033 |
+
max_tokens=512,
|
| 1034 |
+
temperature=0.0,
|
| 1035 |
+
)
|
| 1036 |
+
raw = (response.choices[0].message.content or "")
|
| 1037 |
+
_log.info("/ink/recognize raw → %r", raw[:200])
|
| 1038 |
+
# Strip <think>…</think> blocks emitted by reasoning models (Qwen3 etc.)
|
| 1039 |
+
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
| 1040 |
+
_log.info("/ink/recognize → %r", text)
|
| 1041 |
+
return {"text": text}
|
| 1042 |
+
except Exception as exc:
|
| 1043 |
+
_log.exception("/ink/recognize failed: %r", exc)
|
| 1044 |
+
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
| 1045 |
+
|
| 1046 |
+
|
| 1047 |
# Serve React frontend — must be last so API routes take priority
|
| 1048 |
_frontend_dist = Path(__file__).parent.parent.parent / "frontend" / "dist"
|
| 1049 |
if _frontend_dist.exists():
|
backend/config/settings.py
CHANGED
|
@@ -46,6 +46,12 @@ class Settings(BaseSettings):
|
|
| 46 |
# Active tier: "primary" | "fallback"
|
| 47 |
active_llm_tier: str = "primary"
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# off | strip | full | suppress
|
| 50 |
thinking_mode: str = "off"
|
| 51 |
thinking_token_budget: int = 4096
|
|
|
|
| 46 |
# Active tier: "primary" | "fallback"
|
| 47 |
active_llm_tier: str = "primary"
|
| 48 |
|
| 49 |
+
# Vision model used only by /ink/recognize (needs image_url support).
|
| 50 |
+
# Defaults to Gemini flash via the OpenAI-compatible endpoint.
|
| 51 |
+
ink_vision_model: str = "gemini-2.0-flash"
|
| 52 |
+
ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 53 |
+
ink_vision_api_key: str = ""
|
| 54 |
+
|
| 55 |
# off | strip | full | suppress
|
| 56 |
thinking_mode: str = "off"
|
| 57 |
thinking_token_budget: int = 4096
|
frontend/src/components/SensingStatus.tsx
CHANGED
|
@@ -58,12 +58,16 @@ export function SensingStatus({ sensing, webcamActive }: Props) {
|
|
| 58 |
</span>
|
| 59 |
</div>
|
| 60 |
)}
|
| 61 |
-
|
| 62 |
-
<
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
</div>
|
| 68 |
);
|
| 69 |
}
|
|
|
|
| 58 |
</span>
|
| 59 |
</div>
|
| 60 |
)}
|
| 61 |
+
<div className="sensing-row">
|
| 62 |
+
<span className="sensing-label">Air-writing</span>
|
| 63 |
+
<span className="sensing-value">
|
| 64 |
+
{sensing.airWritingActive
|
| 65 |
+
? "✏️ drawing…"
|
| 66 |
+
: sensing.airWrittenText
|
| 67 |
+
? sensing.airWrittenText
|
| 68 |
+
: "none"}
|
| 69 |
+
</span>
|
| 70 |
+
</div>
|
| 71 |
</div>
|
| 72 |
);
|
| 73 |
}
|
frontend/src/hooks/useSensing.ts
CHANGED
|
@@ -12,16 +12,20 @@ import {
|
|
| 12 |
AirWriter,
|
| 13 |
HeadPoseTracker,
|
| 14 |
} from "../lib/sensing";
|
| 15 |
-
import {
|
| 16 |
|
| 17 |
const GESTURE_DEBOUNCE_FRAMES = 3;
|
| 18 |
const AFFECT_DEBOUNCE_FRAMES = 8;
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
export function useSensing() {
|
| 21 |
const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
|
| 22 |
const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
|
| 23 |
const gazeTrackerRef = useRef(new GazeTracker());
|
| 24 |
-
const airWriterRef = useRef(new AirWriter(
|
|
|
|
| 25 |
const headTrackerRef = useRef(new HeadPoseTracker());
|
| 26 |
const calibratePendingRef = useRef(false);
|
| 27 |
const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
|
|
@@ -35,6 +39,7 @@ export function useSensing() {
|
|
| 35 |
gestureTag: null,
|
| 36 |
gazeBucket: null,
|
| 37 |
airWrittenText: "",
|
|
|
|
| 38 |
headSignal: null,
|
| 39 |
headCalibrated: false,
|
| 40 |
headDebug: { dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 },
|
|
@@ -133,18 +138,36 @@ export function useSensing() {
|
|
| 133 |
if (gestureResult.gestures && gestureResult.gestures.length > 0) {
|
| 134 |
const topGesture = gestureResult.gestures[0][0];
|
| 135 |
gestureTag = mapGestureLabel(topGesture.categoryName);
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
airWriterRef.current.noHand();
|
| 144 |
}
|
| 145 |
|
| 146 |
const newAirText = airWriterRef.current.getText();
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
if (gestureTag === gestureCountRef.current.tag) {
|
| 149 |
gestureCountRef.current.count++;
|
| 150 |
} else {
|
|
@@ -170,6 +193,7 @@ export function useSensing() {
|
|
| 170 |
airWrittenText: newAirText
|
| 171 |
? prev.airWrittenText + newAirText
|
| 172 |
: prev.airWrittenText,
|
|
|
|
| 173 |
headSignal: headSignal ?? prev.headSignal,
|
| 174 |
headCalibrated: headTrackerRef.current.calibrated,
|
| 175 |
headDebug: headDebugRef.current,
|
|
@@ -201,6 +225,7 @@ export function useSensing() {
|
|
| 201 |
gestureTag: null,
|
| 202 |
gazeBucket: null,
|
| 203 |
airWrittenText: "",
|
|
|
|
| 204 |
headSignal: null,
|
| 205 |
headCalibrated: false,
|
| 206 |
headDebug: { dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 },
|
|
|
|
| 12 |
AirWriter,
|
| 13 |
HeadPoseTracker,
|
| 14 |
} from "../lib/sensing";
|
| 15 |
+
import { recognizeInkStroke } from "../lib/inkRecognizer";
|
| 16 |
|
| 17 |
const GESTURE_DEBOUNCE_FRAMES = 3;
|
| 18 |
const AFFECT_DEBOUNCE_FRAMES = 8;
|
| 19 |
|
| 20 |
+
// Set VITE_AIRWRITING_ENABLED=false in .env to disable air-writing.
|
| 21 |
+
const AIRWRITING_ENABLED = import.meta.env.VITE_AIRWRITING_ENABLED !== "false";
|
| 22 |
+
|
| 23 |
export function useSensing() {
|
| 24 |
const faceLandmarkerRef = useRef<FaceLandmarker | null>(null);
|
| 25 |
const gestureRecognizerRef = useRef<GestureRecognizer | null>(null);
|
| 26 |
const gazeTrackerRef = useRef(new GazeTracker());
|
| 27 |
+
const airWriterRef = useRef(new AirWriter());
|
| 28 |
+
const inkBusyRef = useRef(false);
|
| 29 |
const headTrackerRef = useRef(new HeadPoseTracker());
|
| 30 |
const calibratePendingRef = useRef(false);
|
| 31 |
const headDebugRef = useRef({ dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 });
|
|
|
|
| 39 |
gestureTag: null,
|
| 40 |
gazeBucket: null,
|
| 41 |
airWrittenText: "",
|
| 42 |
+
airWritingActive: false,
|
| 43 |
headSignal: null,
|
| 44 |
headCalibrated: false,
|
| 45 |
headDebug: { dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 },
|
|
|
|
| 138 |
if (gestureResult.gestures && gestureResult.gestures.length > 0) {
|
| 139 |
const topGesture = gestureResult.gestures[0][0];
|
| 140 |
gestureTag = mapGestureLabel(topGesture.categoryName);
|
| 141 |
+
if (AIRWRITING_ENABLED) {
|
| 142 |
+
const handLandmarks = gestureResult.landmarks[0];
|
| 143 |
+
airWriterRef.current.processHandLandmarks(
|
| 144 |
+
handLandmarks,
|
| 145 |
+
video.videoWidth,
|
| 146 |
+
video.videoHeight
|
| 147 |
+
);
|
| 148 |
+
}
|
| 149 |
+
} else if (AIRWRITING_ENABLED) {
|
| 150 |
airWriterRef.current.noHand();
|
| 151 |
}
|
| 152 |
|
| 153 |
const newAirText = airWriterRef.current.getText();
|
| 154 |
|
| 155 |
+
if (AIRWRITING_ENABLED) {
|
| 156 |
+
const completedStroke = airWriterRef.current.getCompletedStroke();
|
| 157 |
+
if (completedStroke && !inkBusyRef.current) {
|
| 158 |
+
inkBusyRef.current = true;
|
| 159 |
+
recognizeInkStroke(completedStroke).then((text) => {
|
| 160 |
+
inkBusyRef.current = false;
|
| 161 |
+
if (text) {
|
| 162 |
+
setSensing((prev) => ({
|
| 163 |
+
...prev,
|
| 164 |
+
airWrittenText: prev.airWrittenText + text,
|
| 165 |
+
}));
|
| 166 |
+
}
|
| 167 |
+
});
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
if (gestureTag === gestureCountRef.current.tag) {
|
| 172 |
gestureCountRef.current.count++;
|
| 173 |
} else {
|
|
|
|
| 193 |
airWrittenText: newAirText
|
| 194 |
? prev.airWrittenText + newAirText
|
| 195 |
: prev.airWrittenText,
|
| 196 |
+
airWritingActive: airWriterRef.current.strokeActive,
|
| 197 |
headSignal: headSignal ?? prev.headSignal,
|
| 198 |
headCalibrated: headTrackerRef.current.calibrated,
|
| 199 |
headDebug: headDebugRef.current,
|
|
|
|
| 225 |
gestureTag: null,
|
| 226 |
gazeBucket: null,
|
| 227 |
airWrittenText: "",
|
| 228 |
+
airWritingActive: false,
|
| 229 |
headSignal: null,
|
| 230 |
headCalibrated: false,
|
| 231 |
headDebug: { dx: 0, dy: 0, maxAbsDx: 0, maxAbsDy: 0, crossings: 0 },
|
frontend/src/lib/inkRecognizer.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const CANVAS_SIZE = 200;
|
| 2 |
+
const PAD = 20;
|
| 3 |
+
|
| 4 |
+
function renderStroke(trajectory: [number, number][]): string {
|
| 5 |
+
const canvas = document.createElement("canvas");
|
| 6 |
+
canvas.width = CANVAS_SIZE;
|
| 7 |
+
canvas.height = CANVAS_SIZE;
|
| 8 |
+
const ctx = canvas.getContext("2d")!;
|
| 9 |
+
|
| 10 |
+
ctx.fillStyle = "#ffffff";
|
| 11 |
+
ctx.fillRect(0, 0, CANVAS_SIZE, CANVAS_SIZE);
|
| 12 |
+
|
| 13 |
+
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
| 14 |
+
for (const [x, y] of trajectory) {
|
| 15 |
+
if (x < minX) minX = x;
|
| 16 |
+
if (y < minY) minY = y;
|
| 17 |
+
if (x > maxX) maxX = x;
|
| 18 |
+
if (y > maxY) maxY = y;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
const drawArea = CANVAS_SIZE - 2 * PAD;
|
| 22 |
+
const rangeX = Math.max(maxX - minX, 1);
|
| 23 |
+
const rangeY = Math.max(maxY - minY, 1);
|
| 24 |
+
const scale = Math.min(drawArea / rangeX, drawArea / rangeY);
|
| 25 |
+
const offX = PAD + (drawArea - rangeX * scale) / 2;
|
| 26 |
+
const offY = PAD + (drawArea - rangeY * scale) / 2;
|
| 27 |
+
|
| 28 |
+
ctx.strokeStyle = "#000000";
|
| 29 |
+
ctx.lineWidth = 5;
|
| 30 |
+
ctx.lineCap = "round";
|
| 31 |
+
ctx.lineJoin = "round";
|
| 32 |
+
ctx.beginPath();
|
| 33 |
+
const [x0, y0] = trajectory[0];
|
| 34 |
+
ctx.moveTo((x0 - minX) * scale + offX, (y0 - minY) * scale + offY);
|
| 35 |
+
for (let i = 1; i < trajectory.length; i++) {
|
| 36 |
+
const [x, y] = trajectory[i];
|
| 37 |
+
ctx.lineTo((x - minX) * scale + offX, (y - minY) * scale + offY);
|
| 38 |
+
}
|
| 39 |
+
ctx.stroke();
|
| 40 |
+
|
| 41 |
+
return canvas.toDataURL("image/png").split(",")[1];
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
export async function recognizeInkStroke(
|
| 45 |
+
trajectory: [number, number][]
|
| 46 |
+
): Promise<string> {
|
| 47 |
+
if (trajectory.length < 5) return "";
|
| 48 |
+
const base64 = renderStroke(trajectory);
|
| 49 |
+
try {
|
| 50 |
+
const res = await fetch("/ink/recognize", {
|
| 51 |
+
method: "POST",
|
| 52 |
+
headers: { "Content-Type": "application/json" },
|
| 53 |
+
body: JSON.stringify({ image_base64: base64 }),
|
| 54 |
+
});
|
| 55 |
+
if (!res.ok) {
|
| 56 |
+
const body = await res.text().catch(() => "");
|
| 57 |
+
console.error("[inkRecognizer] HTTP", res.status, body);
|
| 58 |
+
return "";
|
| 59 |
+
}
|
| 60 |
+
const data = await res.json();
|
| 61 |
+
const text = (data.text as string) || "";
|
| 62 |
+
console.debug("[inkRecognizer] recognized:", JSON.stringify(text));
|
| 63 |
+
return text;
|
| 64 |
+
} catch (err) {
|
| 65 |
+
console.error("[inkRecognizer] fetch failed:", err);
|
| 66 |
+
return "";
|
| 67 |
+
}
|
| 68 |
+
}
|
frontend/src/lib/sensing.ts
CHANGED
|
@@ -326,76 +326,25 @@ export class HeadPoseTracker {
|
|
| 326 |
}
|
| 327 |
}
|
| 328 |
|
| 329 |
-
// ── Air-writing
|
| 330 |
|
| 331 |
const INDEX_TIP = 8;
|
| 332 |
const VELOCITY_START = 15;
|
| 333 |
const VELOCITY_END = 5;
|
| 334 |
const STROKE_GAP_MS = 200;
|
| 335 |
-
const RESAMPLE_N = 32;
|
| 336 |
-
|
| 337 |
-
function normaliseTrajectory(pts: [number, number][]): [number, number][] {
|
| 338 |
-
if (pts.length < 2) return pts;
|
| 339 |
-
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
| 340 |
-
for (const [x, y] of pts) {
|
| 341 |
-
minX = Math.min(minX, x);
|
| 342 |
-
minY = Math.min(minY, y);
|
| 343 |
-
maxX = Math.max(maxX, x);
|
| 344 |
-
maxY = Math.max(maxY, y);
|
| 345 |
-
}
|
| 346 |
-
const scaleX = maxX - minX + 1e-6;
|
| 347 |
-
const scaleY = maxY - minY + 1e-6;
|
| 348 |
-
const norm = pts.map(([x, y]) => [(x - minX) / scaleX, (y - minY) / scaleY] as [number, number]);
|
| 349 |
-
|
| 350 |
-
// Resample to RESAMPLE_N points via linear interpolation
|
| 351 |
-
const resampled: [number, number][] = [];
|
| 352 |
-
for (let i = 0; i < RESAMPLE_N; i++) {
|
| 353 |
-
const t = (i / (RESAMPLE_N - 1)) * (norm.length - 1);
|
| 354 |
-
const lo = Math.floor(t);
|
| 355 |
-
const hi = Math.min(lo + 1, norm.length - 1);
|
| 356 |
-
const frac = t - lo;
|
| 357 |
-
resampled.push([
|
| 358 |
-
norm[lo][0] + frac * (norm[hi][0] - norm[lo][0]),
|
| 359 |
-
norm[lo][1] + frac * (norm[hi][1] - norm[lo][1]),
|
| 360 |
-
]);
|
| 361 |
-
}
|
| 362 |
-
return resampled;
|
| 363 |
-
}
|
| 364 |
-
|
| 365 |
-
function dtwDistance(a: [number, number][], b: [number, number][]): number {
|
| 366 |
-
const n = a.length, m = b.length;
|
| 367 |
-
const dtw: number[][] = Array.from({ length: n + 1 }, () =>
|
| 368 |
-
Array(m + 1).fill(Infinity)
|
| 369 |
-
);
|
| 370 |
-
dtw[0][0] = 0;
|
| 371 |
-
for (let i = 1; i <= n; i++) {
|
| 372 |
-
for (let j = 1; j <= m; j++) {
|
| 373 |
-
const cost = Math.sqrt(
|
| 374 |
-
(a[i - 1][0] - b[j - 1][0]) ** 2 + (a[i - 1][1] - b[j - 1][1]) ** 2
|
| 375 |
-
);
|
| 376 |
-
dtw[i][j] = cost + Math.min(dtw[i - 1][j], dtw[i][j - 1], dtw[i - 1][j - 1]);
|
| 377 |
-
}
|
| 378 |
-
}
|
| 379 |
-
return dtw[n][m];
|
| 380 |
-
}
|
| 381 |
|
| 382 |
export class AirWriter {
|
| 383 |
private trajectory: [number, number][] = [];
|
| 384 |
private inStroke = false;
|
| 385 |
private strokeEndTime = 0;
|
| 386 |
private prevPt: [number, number] | null = null;
|
| 387 |
-
private
|
| 388 |
-
private templates: Map<string, [number, number][]>;
|
| 389 |
-
|
| 390 |
-
constructor(templates: Map<string, [number, number][]> = new Map()) {
|
| 391 |
-
this.templates = templates;
|
| 392 |
-
}
|
| 393 |
|
| 394 |
processHandLandmarks(
|
| 395 |
landmarks: { x: number; y: number }[],
|
| 396 |
frameWidth: number,
|
| 397 |
frameHeight: number
|
| 398 |
-
):
|
| 399 |
const tip: [number, number] = [
|
| 400 |
landmarks[INDEX_TIP].x * frameWidth,
|
| 401 |
landmarks[INDEX_TIP].y * frameHeight,
|
|
@@ -413,70 +362,50 @@ export class AirWriter {
|
|
| 413 |
this.inStroke = true;
|
| 414 |
this.trajectory.push(tip);
|
| 415 |
this.strokeEndTime = 0;
|
| 416 |
-
return
|
| 417 |
}
|
| 418 |
|
| 419 |
if (this.inStroke && velocity < VELOCITY_END) {
|
| 420 |
if (this.strokeEndTime === 0) {
|
| 421 |
this.strokeEndTime = performance.now();
|
| 422 |
}
|
| 423 |
-
|
| 424 |
}
|
| 425 |
-
|
| 426 |
-
return null;
|
| 427 |
}
|
| 428 |
|
| 429 |
-
private checkStrokeEnd():
|
| 430 |
-
if (!this.inStroke || this.strokeEndTime === 0) return
|
| 431 |
if (performance.now() - this.strokeEndTime >= STROKE_GAP_MS) {
|
| 432 |
-
|
|
|
|
|
|
|
| 433 |
this.trajectory = [];
|
| 434 |
this.inStroke = false;
|
| 435 |
this.strokeEndTime = 0;
|
| 436 |
-
if (char) this.buffer.push(char);
|
| 437 |
-
return char;
|
| 438 |
}
|
| 439 |
-
return null;
|
| 440 |
}
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
return null;
|
| 445 |
-
}
|
| 446 |
-
if (this.templates.size === 0) {
|
| 447 |
-
console.debug("[AirWriter] stroke completed but template bank is empty");
|
| 448 |
-
return null;
|
| 449 |
-
}
|
| 450 |
-
const query = normaliseTrajectory(trajectory);
|
| 451 |
-
let bestChar: string | null = null;
|
| 452 |
-
let bestDist = Infinity;
|
| 453 |
-
for (const [char, template] of this.templates) {
|
| 454 |
-
const d = dtwDistance(query, template);
|
| 455 |
-
if (d < bestDist) {
|
| 456 |
-
bestDist = d;
|
| 457 |
-
bestChar = char;
|
| 458 |
-
}
|
| 459 |
-
}
|
| 460 |
-
// Reject poor matches so we don't pass garbage to the LLM.
|
| 461 |
-
// Threshold is empirical — tune once real users test this.
|
| 462 |
-
const MATCH_THRESHOLD = 8.0;
|
| 463 |
-
if (bestDist > MATCH_THRESHOLD) {
|
| 464 |
-
console.debug(
|
| 465 |
-
`[AirWriter] no template matched (best='${bestChar}', dist=${bestDist.toFixed(2)})`
|
| 466 |
-
);
|
| 467 |
-
return null;
|
| 468 |
-
}
|
| 469 |
-
return bestChar;
|
| 470 |
}
|
| 471 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
getText(): string {
|
| 473 |
-
|
| 474 |
-
this.buffer = [];
|
| 475 |
-
return text;
|
| 476 |
}
|
| 477 |
|
| 478 |
-
noHand():
|
|
|
|
|
|
|
|
|
|
| 479 |
this.prevPt = null;
|
| 480 |
-
|
| 481 |
}
|
| 482 |
}
|
|
|
|
| 326 |
}
|
| 327 |
}
|
| 328 |
|
| 329 |
+
// ── Air-writing stroke collector (recognition via Gemini Vision) ─────────────
|
| 330 |
|
| 331 |
const INDEX_TIP = 8;
|
| 332 |
const VELOCITY_START = 15;
|
| 333 |
const VELOCITY_END = 5;
|
| 334 |
const STROKE_GAP_MS = 200;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
export class AirWriter {
|
| 337 |
private trajectory: [number, number][] = [];
|
| 338 |
private inStroke = false;
|
| 339 |
private strokeEndTime = 0;
|
| 340 |
private prevPt: [number, number] | null = null;
|
| 341 |
+
private pendingStroke: [number, number][] | null = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
processHandLandmarks(
|
| 344 |
landmarks: { x: number; y: number }[],
|
| 345 |
frameWidth: number,
|
| 346 |
frameHeight: number
|
| 347 |
+
): void {
|
| 348 |
const tip: [number, number] = [
|
| 349 |
landmarks[INDEX_TIP].x * frameWidth,
|
| 350 |
landmarks[INDEX_TIP].y * frameHeight,
|
|
|
|
| 362 |
this.inStroke = true;
|
| 363 |
this.trajectory.push(tip);
|
| 364 |
this.strokeEndTime = 0;
|
| 365 |
+
return;
|
| 366 |
}
|
| 367 |
|
| 368 |
if (this.inStroke && velocity < VELOCITY_END) {
|
| 369 |
if (this.strokeEndTime === 0) {
|
| 370 |
this.strokeEndTime = performance.now();
|
| 371 |
}
|
| 372 |
+
this.checkStrokeEnd();
|
| 373 |
}
|
|
|
|
|
|
|
| 374 |
}
|
| 375 |
|
| 376 |
+
private checkStrokeEnd(): void {
|
| 377 |
+
if (!this.inStroke || this.strokeEndTime === 0) return;
|
| 378 |
if (performance.now() - this.strokeEndTime >= STROKE_GAP_MS) {
|
| 379 |
+
if (this.trajectory.length >= 5) {
|
| 380 |
+
this.pendingStroke = [...this.trajectory];
|
| 381 |
+
}
|
| 382 |
this.trajectory = [];
|
| 383 |
this.inStroke = false;
|
| 384 |
this.strokeEndTime = 0;
|
|
|
|
|
|
|
| 385 |
}
|
|
|
|
| 386 |
}
|
| 387 |
|
| 388 |
+
get strokeActive(): boolean {
|
| 389 |
+
return this.inStroke;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
}
|
| 391 |
|
| 392 |
+
// Returns the completed stroke trajectory and clears it (call once per frame).
|
| 393 |
+
getCompletedStroke(): [number, number][] | null {
|
| 394 |
+
const s = this.pendingStroke;
|
| 395 |
+
this.pendingStroke = null;
|
| 396 |
+
return s;
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
// Kept for API compatibility — always returns "".
|
| 400 |
getText(): string {
|
| 401 |
+
return "";
|
|
|
|
|
|
|
| 402 |
}
|
| 403 |
|
| 404 |
+
noHand(): void {
|
| 405 |
+
if (this.inStroke && this.strokeEndTime === 0) {
|
| 406 |
+
this.strokeEndTime = performance.now();
|
| 407 |
+
}
|
| 408 |
this.prevPt = null;
|
| 409 |
+
this.checkStrokeEnd();
|
| 410 |
}
|
| 411 |
}
|
frontend/src/types.ts
CHANGED
|
@@ -16,6 +16,7 @@ export interface SensingState {
|
|
| 16 |
gestureTag: GestureName | null;
|
| 17 |
gazeBucket: MemoryBucket | null;
|
| 18 |
airWrittenText: string;
|
|
|
|
| 19 |
headSignal: HeadSignal | null;
|
| 20 |
headCalibrated: boolean;
|
| 21 |
headDebug: HeadDebug;
|
|
|
|
| 16 |
gestureTag: GestureName | null;
|
| 17 |
gazeBucket: MemoryBucket | null;
|
| 18 |
airWrittenText: string;
|
| 19 |
+
airWritingActive: boolean;
|
| 20 |
headSignal: HeadSignal | null;
|
| 21 |
headCalibrated: boolean;
|
| 22 |
headDebug: HeadDebug;
|
frontend/vite.config.ts
CHANGED
|
@@ -4,6 +4,7 @@ import react from '@vitejs/plugin-react'
|
|
| 4 |
// https://vite.dev/config/
|
| 5 |
export default defineConfig({
|
| 6 |
plugins: [react()],
|
|
|
|
| 7 |
server: {
|
| 8 |
port: 7550,
|
| 9 |
proxy: {
|
|
@@ -14,6 +15,7 @@ export default defineConfig({
|
|
| 14 |
"/evals": "http://localhost:8000",
|
| 15 |
"/feedback": "http://localhost:8000",
|
| 16 |
"/debug": "http://localhost:8000",
|
|
|
|
| 17 |
},
|
| 18 |
},
|
| 19 |
})
|
|
|
|
| 4 |
// https://vite.dev/config/
|
| 5 |
export default defineConfig({
|
| 6 |
plugins: [react()],
|
| 7 |
+
envDir: "..", // read .env from repo root (shared with backend)
|
| 8 |
server: {
|
| 9 |
port: 7550,
|
| 10 |
proxy: {
|
|
|
|
| 15 |
"/evals": "http://localhost:8000",
|
| 16 |
"/feedback": "http://localhost:8000",
|
| 17 |
"/debug": "http://localhost:8000",
|
| 18 |
+
"/ink": "http://localhost:8000",
|
| 19 |
},
|
| 20 |
},
|
| 21 |
})
|