File size: 6,662 Bytes
745f62a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// 6-layer anti-hallucination validation pipeline.
// Port of app.py validate_form_output (lines 772-847) and the danger-sign
// validation inside extract_danger_signs (lines 877-947).
//
// Layers:
//   Form validation
//     1. Name hallucination (दीदी/बहन/patient)
//     2. Default-age hallucination (age=30 when not in transcript)
//     3. Lab results hallucination (blood_group, hiv_status invented)
//     4. Numeric range checks (BP, weight, Hb, gestation, temp)
//   Danger-sign validation
//     5. Evidence length (<10 chars dropped)
//     6. Generic ASHA phrase blocklist
//     7. Normal value filter (BP 110/70, "ठीक है")
//     8. Transcript grounding (verbatim or 30-char chunk)
//     9. Duplicate-evidence dedup (all signs cite same evidence → drop all)

const FAKE_NAMES = new Set(['दीदी', 'बहन', 'बहनजी', 'patient', 'दी दी', 'didi', 'bahen'])
const BLOOD_GROUPS = new Set(['a+', 'a-', 'b+', 'b-', 'ab+', 'ab-', 'o+', 'o-'])
const HIV_VALUES = new Set(['negative', 'positive', 'नेगेटिव', 'पॉजिटिव'])
const BG_KEYWORDS = ['blood group', 'ब्लड ग्रुप', 'खून का ग्रुप', 'रक्त समूह']
const HIV_KEYWORDS = ['hiv', 'एचआईवी', 'एड्स']

const RANGES = {
  bp_systolic: [60, 250],
  bp_diastolic: [30, 150],
  weight_kg: [1, 200],
  hemoglobin_gm_percent: [3, 20],
  gestational_weeks: [1, 45],
  temperature_f: [90, 110],
}

const GENERIC_PHRASES = [
  'कोई तकलीफ़ हो तो फ़ोन कर दीजिए',
  'कोई तकलीफ हो तो फोन कर दीजिए',
  'कोई समस्या हो तो तुरंत बताइए',
  'कोई समस्या हो तो फोन करें',
  'कोई दिक्कत हो तो',
  'अगली बार आऊँगी',
  'अगली विज़िट',
  'ठीक है दीदी, धन्यवाद',
  'ठीक है दीदी',
]

const NORMAL_INDICATORS = [
  '110/70', '120/80', '110/80', '118/76', '108/72',
  'बिल्कुल ठीक', 'सामान्य', 'नॉर्मल', 'अच्छा है', 'ठीक है',
  'बिल्कुल सामान्य',
]

function isPlainObject(v) {
  return v !== null && typeof v === 'object' && !Array.isArray(v)
}

/**
 * Strip hallucinated fields + apply range checks on form output.
 * Mutates and returns `parsed`.
 */
export function validateFormOutput(parsed, transcript) {
  if (!isPlainObject(parsed)) return parsed
  const tLower = (transcript || '').toLowerCase()

  // Layer 1 — fake names
  const patient = isPlainObject(parsed.patient) ? parsed.patient : {}
  const name = patient.name ?? patient.patient_name
  if (name && FAKE_NAMES.has(String(name).trim().toLowerCase())) {
    if (isPlainObject(parsed.patient)) {
      for (const key of ['name', 'patient_name']) {
        if (key in parsed.patient) parsed.patient[key] = null
      }
    }
  }

  // Layer 2 — default-age hallucination
  const age = patient.age ?? patient.patient_age
  if (age === 30) {
    const t = transcript || ''
    if (!t.includes('30') && !t.includes('तीस')) {
      if (isPlainObject(parsed.patient)) {
        for (const key of ['age', 'patient_age']) {
          if (key in parsed.patient) parsed.patient[key] = null
        }
      }
    }
  }

  // Layer 3a — blood group invented
  const lab = isPlainObject(parsed.lab_results) ? parsed.lab_results : {}
  const bg = lab.blood_group
  if (bg && BLOOD_GROUPS.has(String(bg).trim().toLowerCase())) {
    const mentioned = BG_KEYWORDS.some((kw) => tLower.includes(kw))
    if (!mentioned) {
      if (!isPlainObject(parsed.lab_results)) parsed.lab_results = {}
      parsed.lab_results.blood_group = null
    }
  }

  // Layer 3b — HIV invented
  const hiv = lab.hiv_status ?? lab.hiv
  if (hiv && HIV_VALUES.has(String(hiv).trim().toLowerCase())) {
    const mentioned = HIV_KEYWORDS.some((kw) => tLower.includes(kw))
    if (!mentioned) {
      if (isPlainObject(parsed.lab_results)) {
        for (const key of ['hiv_status', 'hiv']) {
          if (key in parsed.lab_results) parsed.lab_results[key] = null
        }
      }
    }
  }

  // Layer 4 — numeric range checks
  const sections = [
    parsed,
    isPlainObject(parsed.vitals) ? parsed.vitals : null,
    isPlainObject(parsed.pregnancy) ? parsed.pregnancy : null,
    isPlainObject(parsed.anc_details) ? parsed.anc_details : null,
    isPlainObject(parsed.newborn) ? parsed.newborn : null,
  ].filter(Boolean)

  for (const section of sections) {
    for (const [field, [lo, hi]] of Object.entries(RANGES)) {
      const val = section[field]
      if (val == null) continue
      const num = Number(val)
      if (Number.isFinite(num) && (num < lo || num > hi)) {
        section[field] = null
      }
    }
  }

  return parsed
}

/**
 * Validate danger_signs array against transcript.
 * Input: { danger_signs: [...], ... }, transcript string.
 * Returns a new object with the danger_signs array filtered.
 */
export function validateDangerSigns(parsed, transcript) {
  if (!isPlainObject(parsed) || !Array.isArray(parsed.danger_signs)) return parsed

  const normTranscript = (transcript || '').replace(/\s+/g, ' ').trim()
  const validated = []

  for (const sign of parsed.danger_signs) {
    const evidence = sign.utterance_evidence || ''

    // Layer 5 — evidence length
    if (!evidence || evidence.length < 10) continue

    const normEvidence = evidence.replace(/\s+/g, ' ').trim()

    // Layer 6 — generic ASHA phrases
    if (GENERIC_PHRASES.some((p) => normEvidence.includes(p))) continue

    // Layer 7 — normal vital indicators
    if (NORMAL_INDICATORS.some((i) => normEvidence.includes(i))) continue

    // Layer 8 — transcript grounding
    let found = false
    if (normTranscript.includes(normEvidence)) {
      found = true
    } else if (normEvidence.length >= 20) {
      const minChunk = Math.min(30, normEvidence.length)
      for (let i = 0; i <= normEvidence.length - minChunk; i++) {
        if (normTranscript.includes(normEvidence.slice(i, i + minChunk))) {
          found = true
          break
        }
      }
    }
    if (!found) continue

    validated.push(sign)
  }

  // Layer 9 — all cite same evidence → drop all
  if (validated.length > 1) {
    const evidences = new Set(validated.map((s) => (s.utterance_evidence || '').trim()))
    if (evidences.size === 1) {
      return { ...parsed, danger_signs: [] }
    }
  }

  return { ...parsed, danger_signs: validated }
}