Spaces:
Sleeping
Sleeping
File size: 6,662 Bytes
745f62a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | // 6-layer anti-hallucination validation pipeline.
// Port of app.py validate_form_output (lines 772-847) and the danger-sign
// validation inside extract_danger_signs (lines 877-947).
//
// Layers:
// Form validation
// 1. Name hallucination (दीदी/बहन/patient)
// 2. Default-age hallucination (age=30 when not in transcript)
// 3. Lab results hallucination (blood_group, hiv_status invented)
// 4. Numeric range checks (BP, weight, Hb, gestation, temp)
// Danger-sign validation
// 5. Evidence length (<10 chars dropped)
// 6. Generic ASHA phrase blocklist
// 7. Normal value filter (BP 110/70, "ठीक है")
// 8. Transcript grounding (verbatim or 30-char chunk)
// 9. Duplicate-evidence dedup (all signs cite same evidence → drop all)
const FAKE_NAMES = new Set(['दीदी', 'बहन', 'बहनजी', 'patient', 'दी दी', 'didi', 'bahen'])
const BLOOD_GROUPS = new Set(['a+', 'a-', 'b+', 'b-', 'ab+', 'ab-', 'o+', 'o-'])
const HIV_VALUES = new Set(['negative', 'positive', 'नेगेटिव', 'पॉजिटिव'])
const BG_KEYWORDS = ['blood group', 'ब्लड ग्रुप', 'खून का ग्रुप', 'रक्त समूह']
const HIV_KEYWORDS = ['hiv', 'एचआईवी', 'एड्स']
const RANGES = {
bp_systolic: [60, 250],
bp_diastolic: [30, 150],
weight_kg: [1, 200],
hemoglobin_gm_percent: [3, 20],
gestational_weeks: [1, 45],
temperature_f: [90, 110],
}
const GENERIC_PHRASES = [
'कोई तकलीफ़ हो तो फ़ोन कर दीजिए',
'कोई तकलीफ हो तो फोन कर दीजिए',
'कोई समस्या हो तो तुरंत बताइए',
'कोई समस्या हो तो फोन करें',
'कोई दिक्कत हो तो',
'अगली बार आऊँगी',
'अगली विज़िट',
'ठीक है दीदी, धन्यवाद',
'ठीक है दीदी',
]
const NORMAL_INDICATORS = [
'110/70', '120/80', '110/80', '118/76', '108/72',
'बिल्कुल ठीक', 'सामान्य', 'नॉर्मल', 'अच्छा है', 'ठीक है',
'बिल्कुल सामान्य',
]
function isPlainObject(v) {
return v !== null && typeof v === 'object' && !Array.isArray(v)
}
/**
* Strip hallucinated fields + apply range checks on form output.
* Mutates and returns `parsed`.
*/
export function validateFormOutput(parsed, transcript) {
if (!isPlainObject(parsed)) return parsed
const tLower = (transcript || '').toLowerCase()
// Layer 1 — fake names
const patient = isPlainObject(parsed.patient) ? parsed.patient : {}
const name = patient.name ?? patient.patient_name
if (name && FAKE_NAMES.has(String(name).trim().toLowerCase())) {
if (isPlainObject(parsed.patient)) {
for (const key of ['name', 'patient_name']) {
if (key in parsed.patient) parsed.patient[key] = null
}
}
}
// Layer 2 — default-age hallucination
const age = patient.age ?? patient.patient_age
if (age === 30) {
const t = transcript || ''
if (!t.includes('30') && !t.includes('तीस')) {
if (isPlainObject(parsed.patient)) {
for (const key of ['age', 'patient_age']) {
if (key in parsed.patient) parsed.patient[key] = null
}
}
}
}
// Layer 3a — blood group invented
const lab = isPlainObject(parsed.lab_results) ? parsed.lab_results : {}
const bg = lab.blood_group
if (bg && BLOOD_GROUPS.has(String(bg).trim().toLowerCase())) {
const mentioned = BG_KEYWORDS.some((kw) => tLower.includes(kw))
if (!mentioned) {
if (!isPlainObject(parsed.lab_results)) parsed.lab_results = {}
parsed.lab_results.blood_group = null
}
}
// Layer 3b — HIV invented
const hiv = lab.hiv_status ?? lab.hiv
if (hiv && HIV_VALUES.has(String(hiv).trim().toLowerCase())) {
const mentioned = HIV_KEYWORDS.some((kw) => tLower.includes(kw))
if (!mentioned) {
if (isPlainObject(parsed.lab_results)) {
for (const key of ['hiv_status', 'hiv']) {
if (key in parsed.lab_results) parsed.lab_results[key] = null
}
}
}
}
// Layer 4 — numeric range checks
const sections = [
parsed,
isPlainObject(parsed.vitals) ? parsed.vitals : null,
isPlainObject(parsed.pregnancy) ? parsed.pregnancy : null,
isPlainObject(parsed.anc_details) ? parsed.anc_details : null,
isPlainObject(parsed.newborn) ? parsed.newborn : null,
].filter(Boolean)
for (const section of sections) {
for (const [field, [lo, hi]] of Object.entries(RANGES)) {
const val = section[field]
if (val == null) continue
const num = Number(val)
if (Number.isFinite(num) && (num < lo || num > hi)) {
section[field] = null
}
}
}
return parsed
}
/**
* Validate danger_signs array against transcript.
* Input: { danger_signs: [...], ... }, transcript string.
* Returns a new object with the danger_signs array filtered.
*/
export function validateDangerSigns(parsed, transcript) {
if (!isPlainObject(parsed) || !Array.isArray(parsed.danger_signs)) return parsed
const normTranscript = (transcript || '').replace(/\s+/g, ' ').trim()
const validated = []
for (const sign of parsed.danger_signs) {
const evidence = sign.utterance_evidence || ''
// Layer 5 — evidence length
if (!evidence || evidence.length < 10) continue
const normEvidence = evidence.replace(/\s+/g, ' ').trim()
// Layer 6 — generic ASHA phrases
if (GENERIC_PHRASES.some((p) => normEvidence.includes(p))) continue
// Layer 7 — normal vital indicators
if (NORMAL_INDICATORS.some((i) => normEvidence.includes(i))) continue
// Layer 8 — transcript grounding
let found = false
if (normTranscript.includes(normEvidence)) {
found = true
} else if (normEvidence.length >= 20) {
const minChunk = Math.min(30, normEvidence.length)
for (let i = 0; i <= normEvidence.length - minChunk; i++) {
if (normTranscript.includes(normEvidence.slice(i, i + minChunk))) {
found = true
break
}
}
}
if (!found) continue
validated.push(sign)
}
// Layer 9 — all cite same evidence → drop all
if (validated.length > 1) {
const evidences = new Set(validated.map((s) => (s.utterance_evidence || '').trim()))
if (evidences.size === 1) {
return { ...parsed, danger_signs: [] }
}
}
return { ...parsed, danger_signs: validated }
}
|