File size: 4,080 Bytes
745f62a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { test } from 'node:test'
import assert from 'node:assert/strict'
import {
  WORD_TO_NUM,
  parseHindiNumber,
  convertNumbers,
  normalizeTranscript,
} from '../hindiNormalize.js'

test('WORD_TO_NUM has 160+ entries covering 0-99 + 100', () => {
  assert.ok(Object.keys(WORD_TO_NUM).length >= 160)
  assert.equal(WORD_TO_NUM['शून्य'], 0)
  assert.equal(WORD_TO_NUM['एक'], 1)
  assert.equal(WORD_TO_NUM['दस'], 10)
  assert.equal(WORD_TO_NUM['सौ'], 100)
})

test('parseHindiNumber single-word lookups', () => {
  assert.equal(parseHindiNumber('शून्य'), 0)
  assert.equal(parseHindiNumber('एक'), 1)
  assert.equal(parseHindiNumber('दस'), 10)
  assert.equal(parseHindiNumber('सत्तर'), 70)
  assert.equal(parseHindiNumber('अट्ठावन'), 58)
  assert.equal(parseHindiNumber('सौ'), 100)
})

test('parseHindiNumber compound phrases', () => {
  assert.equal(parseHindiNumber('एक सौ दस'), 110)
  assert.equal(parseHindiNumber('एक सौ पचपन'), 155)
  assert.equal(parseHindiNumber('दो सौ'), 200)
  assert.equal(parseHindiNumber('पाँच सौ'), 500)
})

test('parseHindiNumber returns null on empty / non-number', () => {
  assert.equal(parseHindiNumber(''), null)
  assert.equal(parseHindiNumber('   '), null)
  assert.equal(parseHindiNumber('नमस्ते'), null)
})

test('parseHindiNumber stops at first non-number word (mirrors Python bug)', () => {
  // Python breaks the loop on unknown word, returns `total + current`.
  // Since `total` is never incremented, it returns `current` so far.
  assert.equal(parseHindiNumber('दस नमस्ते बीस'), 10)
})

test('convertNumbers replaces number words with digits', () => {
  assert.equal(convertNumbers('एक सौ दस'), '110')
  assert.equal(convertNumbers('एक सौ दस बटा सत्तर'), '110 बटा 70')
  assert.equal(convertNumbers('अट्ठावन kg'), '58 kg')
  // 'बटा' is a medical abbrev, normalizer replaces it with '/' — but convertNumbers alone doesn't.
})

test('convertNumbers handles compound splits (Whisper artifacts)', () => {
  // "एकसो" (merged) should split to "एक सो" and become 100
  assert.equal(convertNumbers('एकसो दस'), '110')
  assert.equal(convertNumbers('दोसो पचास'), '250')
})

test('normalizeTranscript full pipeline - BP reading', () => {
  const out = normalizeTranscript('आपका BP एक सौ दस बटा सत्तर है, वजन अट्ठावन kg')
  // After medical-term replace + number convert + space-around-slash cleanup
  assert.ok(out.includes('110/70'))
  assert.ok(out.includes('58 kg'))
})

test('normalizeTranscript converts बीपी → BP', () => {
  const out = normalizeTranscript('बीपी एक सौ दस')
  assert.ok(out.startsWith('BP '))
  assert.ok(out.includes('110'))
})

test('normalizeTranscript fixes repetition artifacts', () => {
  const out = normalizeTranscript('ठीकठीकठीकठीक है')
  // 4+ consecutive repeats should collapse to 1
  assert.ok(!/(ठीक){4,}/.test(out))
})

test('normalizeTranscript handles decimal via दशमलव', () => {
  const out = normalizeTranscript('ग्यारह दशमलव पाँच')
  // दशमलव → '.', numbers → digits, then digit-dot-digit whitespace cleanup
  assert.ok(out.includes('11'))
  assert.ok(out.includes('5'))
})

test('normalizeTranscript adds line break after ।', () => {
  const out = normalizeTranscript('वजन बढ़ रहा है। BP ठीक है।')
  assert.ok(out.includes('।\n'))
})

test('normalizeTranscript trims trailing punctuation/whitespace', () => {
  const out = normalizeTranscript('  ठीक है.  ')
  assert.equal(out, 'ठीक है')
})

test('normalizeTranscript preserves English medical terms', () => {
  const out = normalizeTranscript('BP ठीक, IFA दे दी')
  assert.ok(out.includes('BP'))
  assert.ok(out.includes('IFA'))
})