Biu3010 commited on
Commit
23c8d2f
·
verified ·
1 Parent(s): ed866d4

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -53,5 +53,5 @@
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "transformers_version": "4.57.1",
55
  "use_cache": true,
56
- "vocab_size": 250057
57
  }
 
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "transformers_version": "4.57.1",
55
  "use_cache": true,
56
+ "vocab_size": 250054
57
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce6f5e772d40e23373efb6a3cd4218cf190e687a5100cdede10cf2b5f86285f5
3
- size 2444590988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d018c7fbed55974ff0805fbd0e76c9fe837550788678a217dbb081b964b1d942
3
+ size 2444578688
special_tokens_map.json CHANGED
@@ -1,26 +1,57 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<northern>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<central>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<southern>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
  "bos_token": "<s>",
26
  "cls_token": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
55
  ],
56
  "bos_token": "<s>",
57
  "cls_token": "<s>",
tokenizer_config.json CHANGED
@@ -455,36 +455,61 @@
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
458
- },
459
- "250054": {
460
- "content": "<northern>",
461
- "lstrip": false,
462
- "normalized": false,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": true
466
- },
467
- "250055": {
468
- "content": "<central>",
469
- "lstrip": false,
470
- "normalized": false,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": true
474
- },
475
- "250056": {
476
- "content": "<southern>",
477
- "lstrip": false,
478
- "normalized": false,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": true
482
  }
483
  },
484
  "additional_special_tokens": [
485
- "<northern>",
486
- "<central>",
487
- "<southern>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  ],
489
  "bos_token": "<s>",
490
  "clean_up_tokenization_spaces": false,
 
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  },
460
  "additional_special_tokens": [
461
+ "ar_AR",
462
+ "cs_CZ",
463
+ "de_DE",
464
+ "en_XX",
465
+ "es_XX",
466
+ "et_EE",
467
+ "fi_FI",
468
+ "fr_XX",
469
+ "gu_IN",
470
+ "hi_IN",
471
+ "it_IT",
472
+ "ja_XX",
473
+ "kk_KZ",
474
+ "ko_KR",
475
+ "lt_LT",
476
+ "lv_LV",
477
+ "my_MM",
478
+ "ne_NP",
479
+ "nl_XX",
480
+ "ro_RO",
481
+ "ru_RU",
482
+ "si_LK",
483
+ "tr_TR",
484
+ "vi_VN",
485
+ "zh_CN",
486
+ "af_ZA",
487
+ "az_AZ",
488
+ "bn_IN",
489
+ "fa_IR",
490
+ "he_IL",
491
+ "hr_HR",
492
+ "id_ID",
493
+ "ka_GE",
494
+ "km_KH",
495
+ "mk_MK",
496
+ "ml_IN",
497
+ "mn_MN",
498
+ "mr_IN",
499
+ "pl_PL",
500
+ "ps_AF",
501
+ "pt_XX",
502
+ "sv_SE",
503
+ "sw_KE",
504
+ "ta_IN",
505
+ "te_IN",
506
+ "th_TH",
507
+ "tl_XX",
508
+ "uk_UA",
509
+ "ur_PK",
510
+ "xh_ZA",
511
+ "gl_ES",
512
+ "sl_SI"
513
  ],
514
  "bos_token": "<s>",
515
  "clean_up_tokenization_spaces": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fcf0880a068fe9e26ebe93aa8abde4a6c35820f6d6656ce30defd741c516c76
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ebaa665ea27db5f8e7e0633eb18198348507328968472f22b70586e737dc5b6
3
  size 5969