FiveC commited on
Commit
0834433
·
verified ·
1 Parent(s): 9d0a24b

End of training

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +63 -0
  3. model.safetensors +1 -1
  4. tokenizer.json +3 -0
  5. tokenizer_config.json +73 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: facebook/mbart-large-50-many-to-many-mmt
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - sacrebleu
8
+ model-index:
9
+ - name: za-zh-theme
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # za-zh-theme
17
+
18
+ This model is a fine-tuned version of [facebook/mbart-large-50-many-to-many-mmt](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 2.7864
21
+ - Sacrebleu: 6.6442
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 2e-05
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 16
43
+ - seed: 42
44
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 3
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Sacrebleu |
52
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|
53
+ | 0.4156 | 1.0 | 1678 | 2.7864 | 6.6442 |
54
+ | 0.1474 | 2.0 | 3356 | 2.8686 | 5.0403 |
55
+ | 0.0730 | 3.0 | 5034 | 2.9130 | 6.2466 |
56
+
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 5.0.0
61
+ - Pytorch 2.10.0+cu128
62
+ - Datasets 4.0.0
63
+ - Tokenizers 0.22.2
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4b069ebc872e08468e1c32471fd9f3afb12049b7301db5708b7ac843d2f9d90
3
  size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ace579aefd498d0ea9ab41882b3d8faae2eaafcf5a1f4b294898bff25c25a5a
3
  size 2444578688
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e673ccd228f195203d0ca7520a3e75b660426c6d0cda003d28f43d49069a48d8
3
+ size 16793291
tokenizer_config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "extra_special_tokens": [
7
+ "<s>",
8
+ "</s>",
9
+ "ar_AR",
10
+ "cs_CZ",
11
+ "de_DE",
12
+ "en_XX",
13
+ "es_XX",
14
+ "et_EE",
15
+ "fi_FI",
16
+ "fr_XX",
17
+ "gu_IN",
18
+ "hi_IN",
19
+ "it_IT",
20
+ "ja_XX",
21
+ "kk_KZ",
22
+ "ko_KR",
23
+ "lt_LT",
24
+ "lv_LV",
25
+ "my_MM",
26
+ "ne_NP",
27
+ "nl_XX",
28
+ "ro_RO",
29
+ "ru_RU",
30
+ "si_LK",
31
+ "tr_TR",
32
+ "vi_VN",
33
+ "zh_CN",
34
+ "af_ZA",
35
+ "az_AZ",
36
+ "bn_IN",
37
+ "fa_IR",
38
+ "he_IL",
39
+ "hr_HR",
40
+ "id_ID",
41
+ "ka_GE",
42
+ "km_KH",
43
+ "mk_MK",
44
+ "ml_IN",
45
+ "mn_MN",
46
+ "mr_IN",
47
+ "pl_PL",
48
+ "ps_AF",
49
+ "pt_XX",
50
+ "sv_SE",
51
+ "sw_KE",
52
+ "ta_IN",
53
+ "te_IN",
54
+ "th_TH",
55
+ "tl_XX",
56
+ "uk_UA",
57
+ "ur_PK",
58
+ "xh_ZA",
59
+ "gl_ES",
60
+ "sl_SI"
61
+ ],
62
+ "is_local": false,
63
+ "language_codes": "ML50",
64
+ "mask_token": "<mask>",
65
+ "model_max_length": 1000000000000000019884624838656,
66
+ "pad_token": "<pad>",
67
+ "sep_token": "</s>",
68
+ "src_lang": "zh_CN",
69
+ "tgt_lang": null,
70
+ "tokenizer_class": "MBart50Tokenizer",
71
+ "unk_id": 0,
72
+ "unk_token": "<unk>"
73
+ }