Sarjinkhan2003 commited on
Commit
db3128a
·
verified ·
1 Parent(s): dd3fddc

Bengali model CER=0.0046

Browse files
bengali/shobdo_bengali.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c81b5727ec94e8ffc80a5b66433cf2ab4f0b25b74438f4017ae65dcafaad5c
3
+ size 18697093
bengali/shobdo_bengali.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class ConvBnRelu(nn.Module):
6
+ def __init__(self,i,o,k=3,s=1,p=1):
7
+ super().__init__()
8
+ self.b=nn.Sequential(nn.Conv2d(i,o,k,s,p,bias=False),nn.BatchNorm2d(o),nn.ReLU(inplace=True))
9
+ def forward(self,x): return self.b(x)
10
+
11
+ class LightCNN(nn.Module):
12
+ def __init__(self):
13
+ super().__init__()
14
+ self.b1=nn.Sequential(ConvBnRelu(1,32),ConvBnRelu(32,32),nn.MaxPool2d(2,2))
15
+ self.b2=nn.Sequential(ConvBnRelu(32,64),ConvBnRelu(64,64),nn.MaxPool2d(2,2))
16
+ self.b3=nn.Sequential(ConvBnRelu(64,128),ConvBnRelu(128,128),nn.MaxPool2d((2,1)))
17
+ self.b4=nn.Sequential(ConvBnRelu(128,256),ConvBnRelu(256,256),nn.MaxPool2d((2,1)))
18
+ self.b5=nn.Sequential(ConvBnRelu(256,256),ConvBnRelu(256,256))
19
+ self.pool=nn.AdaptiveAvgPool2d((1,None))
20
+ def forward(self,x):
21
+ for b in [self.b1,self.b2,self.b3,self.b4,self.b5]: x=b(x)
22
+ return self.pool(x).squeeze(2)
23
+
24
+ class BiLSTM(nn.Module):
25
+ def __init__(self,i,h,o):
26
+ super().__init__()
27
+ self.rnn=nn.LSTM(i,h,bidirectional=True,batch_first=True)
28
+ self.fc=nn.Linear(h*2,o)
29
+ def forward(self,x): o,_=self.rnn(x); return self.fc(o)
30
+
31
+ class Model(nn.Module):
32
+ def __init__(self,input_channel,output_channel,hidden_size,num_class):
33
+ super().__init__()
34
+ self.cnn=LightCNN()
35
+ self.rnn=nn.Sequential(BiLSTM(256,hidden_size,hidden_size),BiLSTM(hidden_size,hidden_size,num_class))
36
+ def forward(self,x):
37
+ f=self.cnn(x).permute(0,2,1)
38
+ o=self.rnn(f).permute(1,0,2)
39
+ return F.log_softmax(o,dim=2)
bengali/vocab.json ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "charset": "অআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ৎািীুূৃেৈোৌ্ংঃঁ০১২৩৪৫৬৭৮৯ ।,.?!()-–:'%/\\ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
3
+ "num_classes": 148,
4
+ "char2idx": {
5
+ "অ": 1,
6
+ "আ": 2,
7
+ "ই": 3,
8
+ "ঈ": 4,
9
+ "উ": 5,
10
+ "ঊ": 6,
11
+ "ঋ": 7,
12
+ "ঌ": 8,
13
+ "এ": 9,
14
+ "ঐ": 10,
15
+ "ও": 11,
16
+ "ঔ": 12,
17
+ "ক": 13,
18
+ "খ": 14,
19
+ "গ": 15,
20
+ "ঘ": 16,
21
+ "ঙ": 17,
22
+ "চ": 18,
23
+ "ছ": 19,
24
+ "জ": 20,
25
+ "ঝ": 21,
26
+ "ঞ": 22,
27
+ "ট": 23,
28
+ "ঠ": 24,
29
+ "ড": 25,
30
+ "ঢ": 26,
31
+ "ণ": 27,
32
+ "ত": 28,
33
+ "থ": 29,
34
+ "দ": 30,
35
+ "ধ": 31,
36
+ "ন": 32,
37
+ "প": 33,
38
+ "ফ": 34,
39
+ "ব": 35,
40
+ "ভ": 36,
41
+ "ম": 37,
42
+ "য": 38,
43
+ "র": 39,
44
+ "ল": 40,
45
+ "শ": 41,
46
+ "ষ": 42,
47
+ "স": 43,
48
+ "হ": 44,
49
+ "়": 45,
50
+ "ৎ": 46,
51
+ "া": 47,
52
+ "ি": 48,
53
+ "ী": 49,
54
+ "ু": 50,
55
+ "ূ": 51,
56
+ "ৃ": 52,
57
+ "ে": 53,
58
+ "ৈ": 54,
59
+ "ো": 55,
60
+ "ৌ": 56,
61
+ "্": 57,
62
+ "ং": 58,
63
+ "ঃ": 59,
64
+ "ঁ": 60,
65
+ "০": 61,
66
+ "১": 62,
67
+ "২": 63,
68
+ "৩": 64,
69
+ "৪": 65,
70
+ "৫": 66,
71
+ "৬": 67,
72
+ "৭": 68,
73
+ "৮": 69,
74
+ "৯": 70,
75
+ " ": 71,
76
+ "।": 72,
77
+ ",": 73,
78
+ ".": 74,
79
+ "?": 75,
80
+ "!": 76,
81
+ "(": 77,
82
+ ")": 78,
83
+ "-": 79,
84
+ "–": 80,
85
+ ":": 81,
86
+ "'": 82,
87
+ "%": 83,
88
+ "/": 84,
89
+ "\\": 85,
90
+ "A": 86,
91
+ "B": 87,
92
+ "C": 88,
93
+ "D": 89,
94
+ "E": 90,
95
+ "F": 91,
96
+ "G": 92,
97
+ "H": 93,
98
+ "I": 94,
99
+ "J": 95,
100
+ "K": 96,
101
+ "L": 97,
102
+ "M": 98,
103
+ "N": 99,
104
+ "O": 100,
105
+ "P": 101,
106
+ "Q": 102,
107
+ "R": 103,
108
+ "S": 104,
109
+ "T": 105,
110
+ "U": 106,
111
+ "V": 107,
112
+ "W": 108,
113
+ "X": 109,
114
+ "Y": 110,
115
+ "Z": 111,
116
+ "a": 112,
117
+ "b": 113,
118
+ "c": 114,
119
+ "d": 115,
120
+ "e": 116,
121
+ "f": 117,
122
+ "g": 118,
123
+ "h": 119,
124
+ "i": 120,
125
+ "j": 121,
126
+ "k": 122,
127
+ "l": 123,
128
+ "m": 124,
129
+ "n": 125,
130
+ "o": 126,
131
+ "p": 127,
132
+ "q": 128,
133
+ "r": 129,
134
+ "s": 130,
135
+ "t": 131,
136
+ "u": 132,
137
+ "v": 133,
138
+ "w": 134,
139
+ "x": 135,
140
+ "y": 136,
141
+ "z": 137,
142
+ "0": 138,
143
+ "1": 139,
144
+ "2": 140,
145
+ "3": 141,
146
+ "4": 142,
147
+ "5": 143,
148
+ "6": 144,
149
+ "7": 145,
150
+ "8": 146,
151
+ "9": 147
152
+ },
153
+ "idx2char": {
154
+ "1": "অ",
155
+ "2": "আ",
156
+ "3": "ই",
157
+ "4": "ঈ",
158
+ "5": "উ",
159
+ "6": "ঊ",
160
+ "7": "ঋ",
161
+ "8": "ঌ",
162
+ "9": "এ",
163
+ "10": "ঐ",
164
+ "11": "ও",
165
+ "12": "ঔ",
166
+ "13": "ক",
167
+ "14": "খ",
168
+ "15": "গ",
169
+ "16": "ঘ",
170
+ "17": "ঙ",
171
+ "18": "চ",
172
+ "19": "ছ",
173
+ "20": "জ",
174
+ "21": "ঝ",
175
+ "22": "ঞ",
176
+ "23": "ট",
177
+ "24": "ঠ",
178
+ "25": "ড",
179
+ "26": "ঢ",
180
+ "27": "ণ",
181
+ "28": "ত",
182
+ "29": "থ",
183
+ "30": "দ",
184
+ "31": "ধ",
185
+ "32": "ন",
186
+ "33": "প",
187
+ "34": "ফ",
188
+ "35": "ব",
189
+ "36": "ভ",
190
+ "37": "ম",
191
+ "38": "য",
192
+ "39": "র",
193
+ "40": "ল",
194
+ "41": "শ",
195
+ "42": "ষ",
196
+ "43": "স",
197
+ "44": "হ",
198
+ "45": "়",
199
+ "46": "ৎ",
200
+ "47": "া",
201
+ "48": "ি",
202
+ "49": "ী",
203
+ "50": "ু",
204
+ "51": "ূ",
205
+ "52": "ৃ",
206
+ "53": "ে",
207
+ "54": "ৈ",
208
+ "55": "ো",
209
+ "56": "ৌ",
210
+ "57": "্",
211
+ "58": "ং",
212
+ "59": "ঃ",
213
+ "60": "ঁ",
214
+ "61": "০",
215
+ "62": "১",
216
+ "63": "২",
217
+ "64": "৩",
218
+ "65": "৪",
219
+ "66": "৫",
220
+ "67": "৬",
221
+ "68": "৭",
222
+ "69": "৮",
223
+ "70": "৯",
224
+ "71": " ",
225
+ "72": "।",
226
+ "73": ",",
227
+ "74": ".",
228
+ "75": "?",
229
+ "76": "!",
230
+ "77": "(",
231
+ "78": ")",
232
+ "79": "-",
233
+ "80": "–",
234
+ "81": ":",
235
+ "82": "'",
236
+ "83": "%",
237
+ "84": "/",
238
+ "85": "\\",
239
+ "86": "A",
240
+ "87": "B",
241
+ "88": "C",
242
+ "89": "D",
243
+ "90": "E",
244
+ "91": "F",
245
+ "92": "G",
246
+ "93": "H",
247
+ "94": "I",
248
+ "95": "J",
249
+ "96": "K",
250
+ "97": "L",
251
+ "98": "M",
252
+ "99": "N",
253
+ "100": "O",
254
+ "101": "P",
255
+ "102": "Q",
256
+ "103": "R",
257
+ "104": "S",
258
+ "105": "T",
259
+ "106": "U",
260
+ "107": "V",
261
+ "108": "W",
262
+ "109": "X",
263
+ "110": "Y",
264
+ "111": "Z",
265
+ "112": "a",
266
+ "113": "b",
267
+ "114": "c",
268
+ "115": "d",
269
+ "116": "e",
270
+ "117": "f",
271
+ "118": "g",
272
+ "119": "h",
273
+ "120": "i",
274
+ "121": "j",
275
+ "122": "k",
276
+ "123": "l",
277
+ "124": "m",
278
+ "125": "n",
279
+ "126": "o",
280
+ "127": "p",
281
+ "128": "q",
282
+ "129": "r",
283
+ "130": "s",
284
+ "131": "t",
285
+ "132": "u",
286
+ "133": "v",
287
+ "134": "w",
288
+ "135": "x",
289
+ "136": "y",
290
+ "137": "z",
291
+ "138": "0",
292
+ "139": "1",
293
+ "140": "2",
294
+ "141": "3",
295
+ "142": "4",
296
+ "143": "5",
297
+ "144": "6",
298
+ "145": "7",
299
+ "146": "8",
300
+ "147": "9",
301
+ "0": ""
302
+ }
303
+ }