Sarjinkhan2003 commited on
Commit
9c73ebf
·
verified ·
1 Parent(s): f964f83

Bengali vocab

Browse files
Files changed (1) hide show
  1. bengali/vocab.json +136 -134
bengali/vocab.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "charset": "অআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ৎািীুূৃেৈোৌ্ংঃঁ০১২৩৪৫৬৭৮৯ ।,.?!()-–:'%/\\ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
3
- "num_classes": 148,
4
  "char2idx": {
5
  "অ": 1,
6
  "আ": 2,
@@ -83,72 +83,73 @@
83
  "-": 79,
84
  "–": 80,
85
  ":": 81,
86
- "'": 82,
87
- "%": 83,
88
- "/": 84,
89
- "\\": 85,
90
- "A": 86,
91
- "B": 87,
92
- "C": 88,
93
- "D": 89,
94
- "E": 90,
95
- "F": 91,
96
- "G": 92,
97
- "H": 93,
98
- "I": 94,
99
- "J": 95,
100
- "K": 96,
101
- "L": 97,
102
- "M": 98,
103
- "N": 99,
104
- "O": 100,
105
- "P": 101,
106
- "Q": 102,
107
- "R": 103,
108
- "S": 104,
109
- "T": 105,
110
- "U": 106,
111
- "V": 107,
112
- "W": 108,
113
- "X": 109,
114
- "Y": 110,
115
- "Z": 111,
116
- "a": 112,
117
- "b": 113,
118
- "c": 114,
119
- "d": 115,
120
- "e": 116,
121
- "f": 117,
122
- "g": 118,
123
- "h": 119,
124
- "i": 120,
125
- "j": 121,
126
- "k": 122,
127
- "l": 123,
128
- "m": 124,
129
- "n": 125,
130
- "o": 126,
131
- "p": 127,
132
- "q": 128,
133
- "r": 129,
134
- "s": 130,
135
- "t": 131,
136
- "u": 132,
137
- "v": 133,
138
- "w": 134,
139
- "x": 135,
140
- "y": 136,
141
- "z": 137,
142
- "0": 138,
143
- "1": 139,
144
- "2": 140,
145
- "3": 141,
146
- "4": 142,
147
- "5": 143,
148
- "6": 144,
149
- "7": 145,
150
- "8": 146,
151
- "9": 147
 
152
  },
153
  "idx2char": {
154
  "1": "অ",
@@ -232,72 +233,73 @@
232
  "79": "-",
233
  "80": "–",
234
  "81": ":",
235
- "82": "'",
236
- "83": "%",
237
- "84": "/",
238
- "85": "\\",
239
- "86": "A",
240
- "87": "B",
241
- "88": "C",
242
- "89": "D",
243
- "90": "E",
244
- "91": "F",
245
- "92": "G",
246
- "93": "H",
247
- "94": "I",
248
- "95": "J",
249
- "96": "K",
250
- "97": "L",
251
- "98": "M",
252
- "99": "N",
253
- "100": "O",
254
- "101": "P",
255
- "102": "Q",
256
- "103": "R",
257
- "104": "S",
258
- "105": "T",
259
- "106": "U",
260
- "107": "V",
261
- "108": "W",
262
- "109": "X",
263
- "110": "Y",
264
- "111": "Z",
265
- "112": "a",
266
- "113": "b",
267
- "114": "c",
268
- "115": "d",
269
- "116": "e",
270
- "117": "f",
271
- "118": "g",
272
- "119": "h",
273
- "120": "i",
274
- "121": "j",
275
- "122": "k",
276
- "123": "l",
277
- "124": "m",
278
- "125": "n",
279
- "126": "o",
280
- "127": "p",
281
- "128": "q",
282
- "129": "r",
283
- "130": "s",
284
- "131": "t",
285
- "132": "u",
286
- "133": "v",
287
- "134": "w",
288
- "135": "x",
289
- "136": "y",
290
- "137": "z",
291
- "138": "0",
292
- "139": "1",
293
- "140": "2",
294
- "141": "3",
295
- "142": "4",
296
- "143": "5",
297
- "144": "6",
298
- "145": "7",
299
- "146": "8",
300
- "147": "9",
 
301
  "0": ""
302
  }
303
  }
 
1
  {
2
+ "charset": "অআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ৎািীুূৃেৈোৌ্ংঃঁ০১২৩৪৫৬৭৮৯ ।,.?!()-–:;'%/\\ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
3
+ "num_classes": 149,
4
  "char2idx": {
5
  "অ": 1,
6
  "আ": 2,
 
83
  "-": 79,
84
  "–": 80,
85
  ":": 81,
86
+ ";": 82,
87
+ "'": 83,
88
+ "%": 84,
89
+ "/": 85,
90
+ "\\": 86,
91
+ "A": 87,
92
+ "B": 88,
93
+ "C": 89,
94
+ "D": 90,
95
+ "E": 91,
96
+ "F": 92,
97
+ "G": 93,
98
+ "H": 94,
99
+ "I": 95,
100
+ "J": 96,
101
+ "K": 97,
102
+ "L": 98,
103
+ "M": 99,
104
+ "N": 100,
105
+ "O": 101,
106
+ "P": 102,
107
+ "Q": 103,
108
+ "R": 104,
109
+ "S": 105,
110
+ "T": 106,
111
+ "U": 107,
112
+ "V": 108,
113
+ "W": 109,
114
+ "X": 110,
115
+ "Y": 111,
116
+ "Z": 112,
117
+ "a": 113,
118
+ "b": 114,
119
+ "c": 115,
120
+ "d": 116,
121
+ "e": 117,
122
+ "f": 118,
123
+ "g": 119,
124
+ "h": 120,
125
+ "i": 121,
126
+ "j": 122,
127
+ "k": 123,
128
+ "l": 124,
129
+ "m": 125,
130
+ "n": 126,
131
+ "o": 127,
132
+ "p": 128,
133
+ "q": 129,
134
+ "r": 130,
135
+ "s": 131,
136
+ "t": 132,
137
+ "u": 133,
138
+ "v": 134,
139
+ "w": 135,
140
+ "x": 136,
141
+ "y": 137,
142
+ "z": 138,
143
+ "0": 139,
144
+ "1": 140,
145
+ "2": 141,
146
+ "3": 142,
147
+ "4": 143,
148
+ "5": 144,
149
+ "6": 145,
150
+ "7": 146,
151
+ "8": 147,
152
+ "9": 148
153
  },
154
  "idx2char": {
155
  "1": "অ",
 
233
  "79": "-",
234
  "80": "–",
235
  "81": ":",
236
+ "82": ";",
237
+ "83": "'",
238
+ "84": "%",
239
+ "85": "/",
240
+ "86": "\\",
241
+ "87": "A",
242
+ "88": "B",
243
+ "89": "C",
244
+ "90": "D",
245
+ "91": "E",
246
+ "92": "F",
247
+ "93": "G",
248
+ "94": "H",
249
+ "95": "I",
250
+ "96": "J",
251
+ "97": "K",
252
+ "98": "L",
253
+ "99": "M",
254
+ "100": "N",
255
+ "101": "O",
256
+ "102": "P",
257
+ "103": "Q",
258
+ "104": "R",
259
+ "105": "S",
260
+ "106": "T",
261
+ "107": "U",
262
+ "108": "V",
263
+ "109": "W",
264
+ "110": "X",
265
+ "111": "Y",
266
+ "112": "Z",
267
+ "113": "a",
268
+ "114": "b",
269
+ "115": "c",
270
+ "116": "d",
271
+ "117": "e",
272
+ "118": "f",
273
+ "119": "g",
274
+ "120": "h",
275
+ "121": "i",
276
+ "122": "j",
277
+ "123": "k",
278
+ "124": "l",
279
+ "125": "m",
280
+ "126": "n",
281
+ "127": "o",
282
+ "128": "p",
283
+ "129": "q",
284
+ "130": "r",
285
+ "131": "s",
286
+ "132": "t",
287
+ "133": "u",
288
+ "134": "v",
289
+ "135": "w",
290
+ "136": "x",
291
+ "137": "y",
292
+ "138": "z",
293
+ "139": "0",
294
+ "140": "1",
295
+ "141": "2",
296
+ "142": "3",
297
+ "143": "4",
298
+ "144": "5",
299
+ "145": "6",
300
+ "146": "7",
301
+ "147": "8",
302
+ "148": "9",
303
  "0": ""
304
  }
305
  }