Spaces:
Sleeping
Sleeping
File size: 2,839 Bytes
535a98d 56a15bc 535a98d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import { DEFAULT_AIR_TEMPLATES } from "./airTemplates";
// Canonical AAC tokens that carry high signal when someone air-writes them —
// short, action-oriented, and hard to confuse for casual chat. When the
// voice transcript and the air-writing text disagree, these tokens win.
const AAC_PRIORITY_TOKENS: ReadonlySet<string> = new Set(
["help", "stop", "water", "done", "more"].filter((t) =>
DEFAULT_AIR_TEMPLATES.has(t)
)
);
export type ResolvedSource =
| "voice_only"
| "air_only"
| "agree"
| "conflict_air"
| "conflict_voice"
| "none";
export interface ResolvedIntent {
text: string;
source: ResolvedSource;
voice_text: string | null;
air_text: string | null;
}
function normalise(s: string | null | undefined): string {
return (s ?? "").trim().toLowerCase();
}
function tokens(s: string): Set<string> {
return new Set(
s
.toLowerCase()
.replace(/[^a-z0-9\s]/g, " ")
.split(/\s+/)
.filter((w) => w.length > 1)
);
}
function jaccard(a: Set<string>, b: Set<string>): number {
if (a.size === 0 || b.size === 0) return 0;
let inter = 0;
for (const tok of a) if (b.has(tok)) inter++;
const union = a.size + b.size - inter;
return union === 0 ? 0 : inter / union;
}
export function resolveIntent(
voiceRaw: string | null,
airRaw: string | null
): ResolvedIntent {
const voice = normalise(voiceRaw);
const air = normalise(airRaw);
if (!voice && !air) {
return { text: "", source: "none", voice_text: null, air_text: null };
}
if (voice && !air) {
return {
text: voice,
source: "voice_only",
voice_text: voice,
air_text: null,
};
}
if (!voice && air) {
return { text: air, source: "air_only", voice_text: null, air_text: air };
}
// Both present.
const voiceTokens = tokens(voice);
const airTokens = tokens(air);
const overlap = jaccard(voiceTokens, airTokens);
// Air-text appears as a substring of the voice transcript (or vice versa) —
// user probably said the word while also writing it. Treat as agreement.
const substringHit =
voice.includes(air) || air.includes(voice) || overlap >= 0.5;
if (substringHit) {
// Prefer the longer / richer form (usually voice), but mark source as agree.
const winner = voice.length >= air.length ? voice : air;
return {
text: winner,
source: "agree",
voice_text: voice,
air_text: air,
};
}
// Genuine conflict. AAC priority tokens (help/stop/water/done/more) dominate.
if (AAC_PRIORITY_TOKENS.has(air)) {
return {
text: air,
source: "conflict_air",
voice_text: voice,
air_text: air,
};
}
// Otherwise voice wins — higher information density.
return {
text: voice,
source: "conflict_voice",
voice_text: voice,
air_text: air,
};
}
|