sneakyfree commited on
Commit
c1cbef9
·
verified ·
1 Parent(s): 2e8dfd2

byte-perfect re-sync from Helsinki-NLP/opus-mt-ca-it (Phase D upgrade)

Browse files
.gitattributes CHANGED
@@ -6,6 +6,5 @@
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
9
- model.safetensors filter=lfs diff=lfs merge=lfs -text
10
  source.spm filter=lfs diff=lfs merge=lfs -text
11
  target.spm filter=lfs diff=lfs merge=lfs -text
 
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
9
  source.spm filter=lfs diff=lfs merge=lfs -text
10
  target.spm filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-ca-it",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -8,6 +8,11 @@
8
  "MarianMTModel"
9
  ],
10
  "attention_dropout": 0.0,
 
 
 
 
 
11
  "bos_token_id": 0,
12
  "classif_dropout": 0.0,
13
  "classifier_dropout": 0.0,
@@ -38,19 +43,18 @@
38
  "LABEL_1": 1,
39
  "LABEL_2": 2
40
  },
41
- "max_length": null,
42
  "max_position_embeddings": 512,
43
  "model_type": "marian",
44
  "normalize_before": false,
45
  "normalize_embedding": false,
46
- "num_beams": null,
47
  "num_hidden_layers": 6,
48
  "pad_token_id": 21527,
49
  "scale_embedding": true,
50
  "share_encoder_decoder_embeddings": true,
51
  "static_position_embeddings": true,
52
- "torch_dtype": "float32",
53
- "transformers_version": "4.45.1",
54
  "use_cache": true,
55
  "vocab_size": 21528
56
  }
 
1
  {
2
+ "_name_or_path": "/tmp/Helsinki-NLP/opus-mt-ca-it",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
 
8
  "MarianMTModel"
9
  ],
10
  "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 21527
14
+ ]
15
+ ],
16
  "bos_token_id": 0,
17
  "classif_dropout": 0.0,
18
  "classifier_dropout": 0.0,
 
43
  "LABEL_1": 1,
44
  "LABEL_2": 2
45
  },
46
+ "max_length": 512,
47
  "max_position_embeddings": 512,
48
  "model_type": "marian",
49
  "normalize_before": false,
50
  "normalize_embedding": false,
51
+ "num_beams": 4,
52
  "num_hidden_layers": 6,
53
  "pad_token_id": 21527,
54
  "scale_embedding": true,
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
+ "transformers_version": "4.22.0.dev0",
 
58
  "use_cache": true,
59
  "vocab_size": 21528
60
  }
generation_config.json CHANGED
@@ -12,5 +12,5 @@
12
  "num_beams": 4,
13
  "pad_token_id": 21527,
14
  "renormalize_logits": true,
15
- "transformers_version": "4.45.1"
16
  }
 
12
  "num_beams": 4,
13
  "pad_token_id": 21527,
14
  "renormalize_logits": true,
15
+ "transformers_version": "4.32.0.dev0"
16
  }
metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hf_name":"cat-ita","source_languages":"cat","target_languages":"ita","opus_readme_url":"https:\/\/github.com\/Helsinki-NLP\/Tatoeba-Challenge\/tree\/master\/models\/cat-ita\/README.md","original_repo":"Tatoeba-Challenge","tags":["translation"],"languages":["ca","it"],"src_constituents":["cat"],"tgt_constituents":["ita"],"src_multilingual":false,"tgt_multilingual":false,"prepro":" normalization + SentencePiece (spm12k,spm12k)","url_model":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/cat-ita\/opus-2020-06-16.zip","url_test_set":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/cat-ita\/opus-2020-06-16.test.txt","src_alpha3":"cat","tgt_alpha3":"ita","short_pair":"ca-it","chrF2_score":0.69,"bleu":48.6,"brevity_penalty":0.985,"ref_len":1995.0,"src_name":"Catalan","tgt_name":"Italian","train_date":"2020-06-16","src_alpha2":"ca","tgt_alpha2":"it","prefer_old":false,"long_pair":"cat-ita","helsinki_git_sha":"480fcbe0ee1bf4774bcbe6226ad9f58e63f6c535","transformers_git_sha":"2207e5d8cb224e954a7cba69fa4ac2309e9ff30b","port_machine":"brutasse","port_time":"2020-08-21-14:41"}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32714512b0cd946f789e45eabda385809ac8747bc572ceec2e697e1fe4295fe6
3
+ size 222880413
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18cc2606470884f2631d23e2a41d9372980c1b6e50cd4cb87892fe35b2342f8
3
+ size 223200112
tokenizer_config.json CHANGED
@@ -1,38 +1 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "</s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<unk>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "21527": {
20
- "content": "<pad>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- }
27
- },
28
- "clean_up_tokenization_spaces": false,
29
- "eos_token": "</s>",
30
- "model_max_length": 512,
31
- "pad_token": "<pad>",
32
- "separate_vocabs": false,
33
- "source_lang": "cat",
34
- "sp_model_kwargs": {},
35
- "target_lang": "ita",
36
- "tokenizer_class": "MarianTokenizer",
37
- "unk_token": "<unk>"
38
- }
 
1
+ {"target_lang": "ita", "source_lang": "cat"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vocab.json CHANGED
The diff for this file is too large to render. See raw diff