Spaces:
Sleeping
Sleeping
| // 6-layer anti-hallucination validation pipeline. | |
| // Port of app.py validate_form_output (lines 772-847) and the danger-sign | |
| // validation inside extract_danger_signs (lines 877-947). | |
| // | |
| // Layers: | |
| // Form validation | |
| // 1. Name hallucination (दीदी/बहन/patient) | |
| // 2. Default-age hallucination (age=30 when not in transcript) | |
| // 3. Lab results hallucination (blood_group, hiv_status invented) | |
| // 4. Numeric range checks (BP, weight, Hb, gestation, temp) | |
| // Danger-sign validation | |
| // 5. Evidence length (<10 chars dropped) | |
| // 6. Generic ASHA phrase blocklist | |
| // 7. Normal value filter (BP 110/70, "ठीक है") | |
| // 8. Transcript grounding (verbatim or 30-char chunk) | |
| // 9. Duplicate-evidence dedup (all signs cite same evidence → drop all) | |
| const FAKE_NAMES = new Set(['दीदी', 'बहन', 'बहनजी', 'patient', 'दी दी', 'didi', 'bahen']) | |
| const BLOOD_GROUPS = new Set(['a+', 'a-', 'b+', 'b-', 'ab+', 'ab-', 'o+', 'o-']) | |
| const HIV_VALUES = new Set(['negative', 'positive', 'नेगेटिव', 'पॉजिटिव']) | |
| const BG_KEYWORDS = ['blood group', 'ब्लड ग्रुप', 'खून का ग्रुप', 'रक्त समूह'] | |
| const HIV_KEYWORDS = ['hiv', 'एचआईवी', 'एड्स'] | |
| const RANGES = { | |
| bp_systolic: [60, 250], | |
| bp_diastolic: [30, 150], | |
| weight_kg: [1, 200], | |
| hemoglobin_gm_percent: [3, 20], | |
| gestational_weeks: [1, 45], | |
| temperature_f: [90, 110], | |
| } | |
| const GENERIC_PHRASES = [ | |
| 'कोई तकलीफ़ हो तो फ़ोन कर दीजिए', | |
| 'कोई तकलीफ हो तो फोन कर दीजिए', | |
| 'कोई समस्या हो तो तुरंत बताइए', | |
| 'कोई समस्या हो तो फोन करें', | |
| 'कोई दिक्कत हो तो', | |
| 'अगली बार आऊँगी', | |
| 'अगली विज़िट', | |
| 'ठीक है दीदी, धन्यवाद', | |
| 'ठीक है दीदी', | |
| ] | |
| const NORMAL_INDICATORS = [ | |
| '110/70', '120/80', '110/80', '118/76', '108/72', | |
| 'बिल्कुल ठीक', 'सामान्य', 'नॉर्मल', 'अच्छा है', 'ठीक है', | |
| 'बिल्कुल सामान्य', | |
| ] | |
| function isPlainObject(v) { | |
| return v !== null && typeof v === 'object' && !Array.isArray(v) | |
| } | |
| /** | |
| * Strip hallucinated fields + apply range checks on form output. | |
| * Mutates and returns `parsed`. | |
| */ | |
| export function validateFormOutput(parsed, transcript) { | |
| if (!isPlainObject(parsed)) return parsed | |
| const tLower = (transcript || '').toLowerCase() | |
| // Layer 1 — fake names | |
| const patient = isPlainObject(parsed.patient) ? parsed.patient : {} | |
| const name = patient.name ?? patient.patient_name | |
| if (name && FAKE_NAMES.has(String(name).trim().toLowerCase())) { | |
| if (isPlainObject(parsed.patient)) { | |
| for (const key of ['name', 'patient_name']) { | |
| if (key in parsed.patient) parsed.patient[key] = null | |
| } | |
| } | |
| } | |
| // Layer 2 — default-age hallucination | |
| const age = patient.age ?? patient.patient_age | |
| if (age === 30) { | |
| const t = transcript || '' | |
| if (!t.includes('30') && !t.includes('तीस')) { | |
| if (isPlainObject(parsed.patient)) { | |
| for (const key of ['age', 'patient_age']) { | |
| if (key in parsed.patient) parsed.patient[key] = null | |
| } | |
| } | |
| } | |
| } | |
| // Layer 3a — blood group invented | |
| const lab = isPlainObject(parsed.lab_results) ? parsed.lab_results : {} | |
| const bg = lab.blood_group | |
| if (bg && BLOOD_GROUPS.has(String(bg).trim().toLowerCase())) { | |
| const mentioned = BG_KEYWORDS.some((kw) => tLower.includes(kw)) | |
| if (!mentioned) { | |
| if (!isPlainObject(parsed.lab_results)) parsed.lab_results = {} | |
| parsed.lab_results.blood_group = null | |
| } | |
| } | |
| // Layer 3b — HIV invented | |
| const hiv = lab.hiv_status ?? lab.hiv | |
| if (hiv && HIV_VALUES.has(String(hiv).trim().toLowerCase())) { | |
| const mentioned = HIV_KEYWORDS.some((kw) => tLower.includes(kw)) | |
| if (!mentioned) { | |
| if (isPlainObject(parsed.lab_results)) { | |
| for (const key of ['hiv_status', 'hiv']) { | |
| if (key in parsed.lab_results) parsed.lab_results[key] = null | |
| } | |
| } | |
| } | |
| } | |
| // Layer 4 — numeric range checks | |
| const sections = [ | |
| parsed, | |
| isPlainObject(parsed.vitals) ? parsed.vitals : null, | |
| isPlainObject(parsed.pregnancy) ? parsed.pregnancy : null, | |
| isPlainObject(parsed.anc_details) ? parsed.anc_details : null, | |
| isPlainObject(parsed.newborn) ? parsed.newborn : null, | |
| ].filter(Boolean) | |
| for (const section of sections) { | |
| for (const [field, [lo, hi]] of Object.entries(RANGES)) { | |
| const val = section[field] | |
| if (val == null) continue | |
| const num = Number(val) | |
| if (Number.isFinite(num) && (num < lo || num > hi)) { | |
| section[field] = null | |
| } | |
| } | |
| } | |
| return parsed | |
| } | |
| /** | |
| * Validate danger_signs array against transcript. | |
| * Input: { danger_signs: [...], ... }, transcript string. | |
| * Returns a new object with the danger_signs array filtered. | |
| */ | |
| export function validateDangerSigns(parsed, transcript) { | |
| if (!isPlainObject(parsed) || !Array.isArray(parsed.danger_signs)) return parsed | |
| const normTranscript = (transcript || '').replace(/\s+/g, ' ').trim() | |
| const validated = [] | |
| for (const sign of parsed.danger_signs) { | |
| const evidence = sign.utterance_evidence || '' | |
| // Layer 5 — evidence length | |
| if (!evidence || evidence.length < 10) continue | |
| const normEvidence = evidence.replace(/\s+/g, ' ').trim() | |
| // Layer 6 — generic ASHA phrases | |
| if (GENERIC_PHRASES.some((p) => normEvidence.includes(p))) continue | |
| // Layer 7 — normal vital indicators | |
| if (NORMAL_INDICATORS.some((i) => normEvidence.includes(i))) continue | |
| // Layer 8 — transcript grounding | |
| let found = false | |
| if (normTranscript.includes(normEvidence)) { | |
| found = true | |
| } else if (normEvidence.length >= 20) { | |
| const minChunk = Math.min(30, normEvidence.length) | |
| for (let i = 0; i <= normEvidence.length - minChunk; i++) { | |
| if (normTranscript.includes(normEvidence.slice(i, i + minChunk))) { | |
| found = true | |
| break | |
| } | |
| } | |
| } | |
| if (!found) continue | |
| validated.push(sign) | |
| } | |
| // Layer 9 — all cite same evidence → drop all | |
| if (validated.length > 1) { | |
| const evidences = new Set(validated.map((s) => (s.utterance_evidence || '').trim())) | |
| if (evidences.size === 1) { | |
| return { ...parsed, danger_signs: [] } | |
| } | |
| } | |
| return { ...parsed, danger_signs: validated } | |
| } | |