sneakyfree commited on
Commit
3266e12
·
verified ·
1 Parent(s): 0a403b6

upstream-archive byte-perfect snapshot of Helsinki-NLP/opus-mt-itc-itc (ADR-039 Phase D)

Browse files
Files changed (11) hide show
  1. .gitattributes +7 -32
  2. README.md +269 -0
  3. config.json +60 -0
  4. generation_config.json +16 -0
  5. metadata.json +1 -0
  6. pytorch_model.bin +3 -0
  7. source.spm +3 -0
  8. target.spm +3 -0
  9. tf_model.h5 +3 -0
  10. tokenizer_config.json +1 -0
  11. vocab.json +0 -0
.gitattributes CHANGED
@@ -1,35 +1,10 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ source.spm filter=lfs diff=lfs merge=lfs -text
10
+ target.spm filter=lfs diff=lfs merge=lfs -text
 
README.md ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - it
4
+ - ca
5
+ - rm
6
+ - es
7
+ - ro
8
+ - gl
9
+ - sc
10
+ - co
11
+ - wa
12
+ - pt
13
+ - oc
14
+ - an
15
+ - id
16
+ - fr
17
+ - ht
18
+ - itc
19
+
20
+ tags:
21
+ - translation
22
+
23
+ license: apache-2.0
24
+ ---
25
+
26
+ ### itc-itc
27
+
28
+ * source group: Italic languages
29
+ * target group: Italic languages
30
+ * OPUS readme: [itc-itc](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/itc-itc/README.md)
31
+
32
+ * model: transformer
33
+ * source language(s): arg ast bjn cat cos egl fra frm_Latn gcf_Latn glg hat ind ita lad lad_Latn lat_Grek lat_Latn lij lld_Latn lmo mwl oci pap pcd pms por roh ron scn spa srd vec wln zsm_Latn
34
+ * target language(s): arg ast bjn cat cos egl fra frm_Latn gcf_Latn glg hat ind ita lad lad_Latn lat_Grek lat_Latn lij lld_Latn lmo mwl oci pap pcd pms por roh ron scn spa srd vec wln zsm_Latn
35
+ * model: transformer
36
+ * pre-processing: normalization + SentencePiece (spm32k,spm32k)
37
+ * a sentence initial language token is required in the form of `>>id<<` (id = valid target language ID)
38
+ * download original weights: [opus-2020-07-07.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/itc-itc/opus-2020-07-07.zip)
39
+ * test set translations: [opus-2020-07-07.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/itc-itc/opus-2020-07-07.test.txt)
40
+ * test set scores: [opus-2020-07-07.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/itc-itc/opus-2020-07-07.eval.txt)
41
+
42
+ ## Benchmarks
43
+
44
+ | testset | BLEU | chr-F |
45
+ |-----------------------|-------|-------|
46
+ | Tatoeba-test.arg-fra.arg.fra | 40.8 | 0.501 |
47
+ | Tatoeba-test.arg-spa.arg.spa | 59.9 | 0.739 |
48
+ | Tatoeba-test.ast-fra.ast.fra | 45.4 | 0.628 |
49
+ | Tatoeba-test.ast-por.ast.por | 100.0 | 1.000 |
50
+ | Tatoeba-test.ast-spa.ast.spa | 46.8 | 0.636 |
51
+ | Tatoeba-test.cat-fra.cat.fra | 51.6 | 0.689 |
52
+ | Tatoeba-test.cat-ita.cat.ita | 49.2 | 0.699 |
53
+ | Tatoeba-test.cat-por.cat.por | 48.0 | 0.688 |
54
+ | Tatoeba-test.cat-ron.cat.ron | 35.4 | 0.719 |
55
+ | Tatoeba-test.cat-spa.cat.spa | 69.0 | 0.826 |
56
+ | Tatoeba-test.cos-fra.cos.fra | 22.3 | 0.383 |
57
+ | Tatoeba-test.cos-pms.cos.pms | 3.4 | 0.199 |
58
+ | Tatoeba-test.egl-fra.egl.fra | 9.5 | 0.283 |
59
+ | Tatoeba-test.egl-ita.egl.ita | 3.0 | 0.206 |
60
+ | Tatoeba-test.egl-spa.egl.spa | 3.7 | 0.194 |
61
+ | Tatoeba-test.fra-arg.fra.arg | 3.8 | 0.090 |
62
+ | Tatoeba-test.fra-ast.fra.ast | 25.9 | 0.457 |
63
+ | Tatoeba-test.fra-cat.fra.cat | 42.2 | 0.637 |
64
+ | Tatoeba-test.fra-cos.fra.cos | 3.3 | 0.185 |
65
+ | Tatoeba-test.fra-egl.fra.egl | 2.2 | 0.120 |
66
+ | Tatoeba-test.fra-frm.fra.frm | 1.0 | 0.191 |
67
+ | Tatoeba-test.fra-gcf.fra.gcf | 0.2 | 0.099 |
68
+ | Tatoeba-test.fra-glg.fra.glg | 40.5 | 0.625 |
69
+ | Tatoeba-test.fra-hat.fra.hat | 22.6 | 0.472 |
70
+ | Tatoeba-test.fra-ita.fra.ita | 46.7 | 0.679 |
71
+ | Tatoeba-test.fra-lad.fra.lad | 15.9 | 0.345 |
72
+ | Tatoeba-test.fra-lat.fra.lat | 2.9 | 0.247 |
73
+ | Tatoeba-test.fra-lij.fra.lij | 1.0 | 0.201 |
74
+ | Tatoeba-test.fra-lld.fra.lld | 1.1 | 0.257 |
75
+ | Tatoeba-test.fra-lmo.fra.lmo | 1.2 | 0.241 |
76
+ | Tatoeba-test.fra-msa.fra.msa | 0.4 | 0.111 |
77
+ | Tatoeba-test.fra-oci.fra.oci | 7.3 | 0.322 |
78
+ | Tatoeba-test.fra-pap.fra.pap | 69.8 | 0.912 |
79
+ | Tatoeba-test.fra-pcd.fra.pcd | 0.6 | 0.144 |
80
+ | Tatoeba-test.fra-pms.fra.pms | 1.0 | 0.181 |
81
+ | Tatoeba-test.fra-por.fra.por | 39.7 | 0.619 |
82
+ | Tatoeba-test.fra-roh.fra.roh | 5.7 | 0.286 |
83
+ | Tatoeba-test.fra-ron.fra.ron | 36.4 | 0.591 |
84
+ | Tatoeba-test.fra-scn.fra.scn | 2.1 | 0.101 |
85
+ | Tatoeba-test.fra-spa.fra.spa | 47.5 | 0.670 |
86
+ | Tatoeba-test.fra-srd.fra.srd | 2.8 | 0.306 |
87
+ | Tatoeba-test.fra-vec.fra.vec | 3.0 | 0.345 |
88
+ | Tatoeba-test.fra-wln.fra.wln | 3.5 | 0.212 |
89
+ | Tatoeba-test.frm-fra.frm.fra | 11.4 | 0.472 |
90
+ | Tatoeba-test.gcf-fra.gcf.fra | 7.1 | 0.267 |
91
+ | Tatoeba-test.gcf-lad.gcf.lad | 0.0 | 0.170 |
92
+ | Tatoeba-test.gcf-por.gcf.por | 0.0 | 0.230 |
93
+ | Tatoeba-test.gcf-spa.gcf.spa | 13.4 | 0.314 |
94
+ | Tatoeba-test.glg-fra.glg.fra | 54.7 | 0.702 |
95
+ | Tatoeba-test.glg-ita.glg.ita | 40.1 | 0.661 |
96
+ | Tatoeba-test.glg-por.glg.por | 57.6 | 0.748 |
97
+ | Tatoeba-test.glg-spa.glg.spa | 70.0 | 0.817 |
98
+ | Tatoeba-test.hat-fra.hat.fra | 14.2 | 0.419 |
99
+ | Tatoeba-test.hat-spa.hat.spa | 17.9 | 0.449 |
100
+ | Tatoeba-test.ita-cat.ita.cat | 51.0 | 0.693 |
101
+ | Tatoeba-test.ita-egl.ita.egl | 1.1 | 0.114 |
102
+ | Tatoeba-test.ita-fra.ita.fra | 58.2 | 0.727 |
103
+ | Tatoeba-test.ita-glg.ita.glg | 41.7 | 0.652 |
104
+ | Tatoeba-test.ita-lad.ita.lad | 17.5 | 0.419 |
105
+ | Tatoeba-test.ita-lat.ita.lat | 7.1 | 0.294 |
106
+ | Tatoeba-test.ita-lij.ita.lij | 1.0 | 0.208 |
107
+ | Tatoeba-test.ita-msa.ita.msa | 0.9 | 0.115 |
108
+ | Tatoeba-test.ita-oci.ita.oci | 12.3 | 0.378 |
109
+ | Tatoeba-test.ita-pms.ita.pms | 1.6 | 0.182 |
110
+ | Tatoeba-test.ita-por.ita.por | 44.8 | 0.665 |
111
+ | Tatoeba-test.ita-ron.ita.ron | 43.3 | 0.653 |
112
+ | Tatoeba-test.ita-spa.ita.spa | 56.6 | 0.733 |
113
+ | Tatoeba-test.ita-vec.ita.vec | 2.0 | 0.187 |
114
+ | Tatoeba-test.lad-fra.lad.fra | 30.4 | 0.458 |
115
+ | Tatoeba-test.lad-gcf.lad.gcf | 0.0 | 0.163 |
116
+ | Tatoeba-test.lad-ita.lad.ita | 12.3 | 0.426 |
117
+ | Tatoeba-test.lad-lat.lad.lat | 1.6 | 0.178 |
118
+ | Tatoeba-test.lad-por.lad.por | 8.8 | 0.394 |
119
+ | Tatoeba-test.lad-ron.lad.ron | 78.3 | 0.717 |
120
+ | Tatoeba-test.lad-spa.lad.spa | 28.3 | 0.531 |
121
+ | Tatoeba-test.lat-fra.lat.fra | 9.4 | 0.300 |
122
+ | Tatoeba-test.lat-ita.lat.ita | 20.0 | 0.421 |
123
+ | Tatoeba-test.lat-lad.lat.lad | 3.8 | 0.173 |
124
+ | Tatoeba-test.lat-por.lat.por | 13.0 | 0.354 |
125
+ | Tatoeba-test.lat-ron.lat.ron | 14.0 | 0.358 |
126
+ | Tatoeba-test.lat-spa.lat.spa | 21.8 | 0.436 |
127
+ | Tatoeba-test.lij-fra.lij.fra | 13.8 | 0.346 |
128
+ | Tatoeba-test.lij-ita.lij.ita | 14.7 | 0.442 |
129
+ | Tatoeba-test.lld-fra.lld.fra | 18.8 | 0.428 |
130
+ | Tatoeba-test.lld-spa.lld.spa | 11.1 | 0.377 |
131
+ | Tatoeba-test.lmo-fra.lmo.fra | 11.0 | 0.329 |
132
+ | Tatoeba-test.msa-fra.msa.fra | 0.8 | 0.129 |
133
+ | Tatoeba-test.msa-ita.msa.ita | 1.1 | 0.138 |
134
+ | Tatoeba-test.msa-msa.msa.msa | 19.1 | 0.453 |
135
+ | Tatoeba-test.msa-pap.msa.pap | 0.0 | 0.037 |
136
+ | Tatoeba-test.msa-por.msa.por | 2.4 | 0.155 |
137
+ | Tatoeba-test.msa-ron.msa.ron | 1.2 | 0.129 |
138
+ | Tatoeba-test.msa-spa.msa.spa | 1.0 | 0.139 |
139
+ | Tatoeba-test.multi.multi | 40.8 | 0.599 |
140
+ | Tatoeba-test.mwl-por.mwl.por | 35.4 | 0.561 |
141
+ | Tatoeba-test.oci-fra.oci.fra | 24.5 | 0.467 |
142
+ | Tatoeba-test.oci-ita.oci.ita | 23.3 | 0.493 |
143
+ | Tatoeba-test.oci-spa.oci.spa | 26.1 | 0.505 |
144
+ | Tatoeba-test.pap-fra.pap.fra | 31.0 | 0.629 |
145
+ | Tatoeba-test.pap-msa.pap.msa | 0.0 | 0.051 |
146
+ | Tatoeba-test.pcd-fra.pcd.fra | 13.8 | 0.381 |
147
+ | Tatoeba-test.pcd-spa.pcd.spa | 2.6 | 0.227 |
148
+ | Tatoeba-test.pms-cos.pms.cos | 3.4 | 0.217 |
149
+ | Tatoeba-test.pms-fra.pms.fra | 13.4 | 0.347 |
150
+ | Tatoeba-test.pms-ita.pms.ita | 13.0 | 0.373 |
151
+ | Tatoeba-test.pms-spa.pms.spa | 13.1 | 0.374 |
152
+ | Tatoeba-test.por-ast.por.ast | 100.0 | 1.000 |
153
+ | Tatoeba-test.por-cat.por.cat | 45.1 | 0.673 |
154
+ | Tatoeba-test.por-fra.por.fra | 52.5 | 0.698 |
155
+ | Tatoeba-test.por-gcf.por.gcf | 16.0 | 0.128 |
156
+ | Tatoeba-test.por-glg.por.glg | 57.5 | 0.750 |
157
+ | Tatoeba-test.por-ita.por.ita | 50.1 | 0.710 |
158
+ | Tatoeba-test.por-lad.por.lad | 15.7 | 0.341 |
159
+ | Tatoeba-test.por-lat.por.lat | 11.1 | 0.362 |
160
+ | Tatoeba-test.por-msa.por.msa | 2.4 | 0.136 |
161
+ | Tatoeba-test.por-mwl.por.mwl | 30.5 | 0.559 |
162
+ | Tatoeba-test.por-roh.por.roh | 0.0 | 0.132 |
163
+ | Tatoeba-test.por-ron.por.ron | 40.0 | 0.632 |
164
+ | Tatoeba-test.por-spa.por.spa | 58.6 | 0.756 |
165
+ | Tatoeba-test.roh-fra.roh.fra | 23.1 | 0.564 |
166
+ | Tatoeba-test.roh-por.roh.por | 21.4 | 0.347 |
167
+ | Tatoeba-test.roh-spa.roh.spa | 19.8 | 0.489 |
168
+ | Tatoeba-test.ron-cat.ron.cat | 59.5 | 0.854 |
169
+ | Tatoeba-test.ron-fra.ron.fra | 47.4 | 0.647 |
170
+ | Tatoeba-test.ron-ita.ron.ita | 45.7 | 0.683 |
171
+ | Tatoeba-test.ron-lad.ron.lad | 44.2 | 0.712 |
172
+ | Tatoeba-test.ron-lat.ron.lat | 14.8 | 0.449 |
173
+ | Tatoeba-test.ron-msa.ron.msa | 1.2 | 0.098 |
174
+ | Tatoeba-test.ron-por.ron.por | 42.7 | 0.650 |
175
+ | Tatoeba-test.ron-spa.ron.spa | 50.4 | 0.686 |
176
+ | Tatoeba-test.scn-fra.scn.fra | 2.4 | 0.180 |
177
+ | Tatoeba-test.scn-spa.scn.spa | 5.1 | 0.212 |
178
+ | Tatoeba-test.spa-arg.spa.arg | 10.8 | 0.267 |
179
+ | Tatoeba-test.spa-ast.spa.ast | 24.6 | 0.514 |
180
+ | Tatoeba-test.spa-cat.spa.cat | 61.6 | 0.783 |
181
+ | Tatoeba-test.spa-egl.spa.egl | 2.2 | 0.106 |
182
+ | Tatoeba-test.spa-fra.spa.fra | 51.1 | 0.683 |
183
+ | Tatoeba-test.spa-gcf.spa.gcf | 7.8 | 0.067 |
184
+ | Tatoeba-test.spa-glg.spa.glg | 62.8 | 0.776 |
185
+ | Tatoeba-test.spa-hat.spa.hat | 16.6 | 0.398 |
186
+ | Tatoeba-test.spa-ita.spa.ita | 51.8 | 0.718 |
187
+ | Tatoeba-test.spa-lad.spa.lad | 14.6 | 0.393 |
188
+ | Tatoeba-test.spa-lat.spa.lat | 21.5 | 0.486 |
189
+ | Tatoeba-test.spa-lld.spa.lld | 2.0 | 0.222 |
190
+ | Tatoeba-test.spa-msa.spa.msa | 0.8 | 0.113 |
191
+ | Tatoeba-test.spa-oci.spa.oci | 10.3 | 0.377 |
192
+ | Tatoeba-test.spa-pcd.spa.pcd | 0.9 | 0.115 |
193
+ | Tatoeba-test.spa-pms.spa.pms | 1.5 | 0.194 |
194
+ | Tatoeba-test.spa-por.spa.por | 49.4 | 0.698 |
195
+ | Tatoeba-test.spa-roh.spa.roh | 4.6 | 0.261 |
196
+ | Tatoeba-test.spa-ron.spa.ron | 39.1 | 0.618 |
197
+ | Tatoeba-test.spa-scn.spa.scn | 2.0 | 0.113 |
198
+ | Tatoeba-test.spa-wln.spa.wln | 8.7 | 0.295 |
199
+ | Tatoeba-test.srd-fra.srd.fra | 6.7 | 0.369 |
200
+ | Tatoeba-test.vec-fra.vec.fra | 59.9 | 0.608 |
201
+ | Tatoeba-test.vec-ita.vec.ita | 14.2 | 0.405 |
202
+ | Tatoeba-test.wln-fra.wln.fra | 8.9 | 0.344 |
203
+ | Tatoeba-test.wln-spa.wln.spa | 9.6 | 0.298 |
204
+
205
+
206
+ ### System Info:
207
+ - hf_name: itc-itc
208
+
209
+ - source_languages: itc
210
+
211
+ - target_languages: itc
212
+
213
+ - opus_readme_url: https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/itc-itc/README.md
214
+
215
+ - original_repo: Tatoeba-Challenge
216
+
217
+ - tags: ['translation']
218
+
219
+ - languages: ['it', 'ca', 'rm', 'es', 'ro', 'gl', 'sc', 'co', 'wa', 'pt', 'oc', 'an', 'id', 'fr', 'ht', 'itc']
220
+
221
+ - src_constituents: {'ita', 'cat', 'roh', 'spa', 'pap', 'bjn', 'lmo', 'mwl', 'lij', 'lat_Latn', 'lad_Latn', 'pcd', 'lat_Grek', 'ext', 'ron', 'ast', 'glg', 'pms', 'zsm_Latn', 'srd', 'gcf_Latn', 'lld_Latn', 'min', 'tmw_Latn', 'cos', 'wln', 'zlm_Latn', 'por', 'egl', 'oci', 'vec', 'arg', 'ind', 'fra', 'hat', 'lad', 'max_Latn', 'frm_Latn', 'scn', 'mfe'}
222
+
223
+ - tgt_constituents: {'ita', 'cat', 'roh', 'spa', 'pap', 'bjn', 'lmo', 'mwl', 'lij', 'lat_Latn', 'lad_Latn', 'pcd', 'lat_Grek', 'ext', 'ron', 'ast', 'glg', 'pms', 'zsm_Latn', 'srd', 'gcf_Latn', 'lld_Latn', 'min', 'tmw_Latn', 'cos', 'wln', 'zlm_Latn', 'por', 'egl', 'oci', 'vec', 'arg', 'ind', 'fra', 'hat', 'lad', 'max_Latn', 'frm_Latn', 'scn', 'mfe'}
224
+
225
+ - src_multilingual: True
226
+
227
+ - tgt_multilingual: True
228
+
229
+ - prepro: normalization + SentencePiece (spm32k,spm32k)
230
+
231
+ - url_model: https://object.pouta.csc.fi/Tatoeba-MT-models/itc-itc/opus-2020-07-07.zip
232
+
233
+ - url_test_set: https://object.pouta.csc.fi/Tatoeba-MT-models/itc-itc/opus-2020-07-07.test.txt
234
+
235
+ - src_alpha3: itc
236
+
237
+ - tgt_alpha3: itc
238
+
239
+ - short_pair: itc-itc
240
+
241
+ - chrF2_score: 0.599
242
+
243
+ - bleu: 40.8
244
+
245
+ - brevity_penalty: 0.968
246
+
247
+ - ref_len: 77448.0
248
+
249
+ - src_name: Italic languages
250
+
251
+ - tgt_name: Italic languages
252
+
253
+ - train_date: 2020-07-07
254
+
255
+ - src_alpha2: itc
256
+
257
+ - tgt_alpha2: itc
258
+
259
+ - prefer_old: False
260
+
261
+ - long_pair: itc-itc
262
+
263
+ - helsinki_git_sha: 480fcbe0ee1bf4774bcbe6226ad9f58e63f6c535
264
+
265
+ - transformers_git_sha: 2207e5d8cb224e954a7cba69fa4ac2309e9ff30b
266
+
267
+ - port_machine: brutasse
268
+
269
+ - port_time: 2020-08-21-14:41
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/tmp/Helsinki-NLP/opus-mt-itc-itc",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 36546
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 36546,
25
+ "decoder_vocab_size": 36547,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 36547,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 6,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 36546,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "transformers_version": "4.22.0.dev0",
58
+ "use_cache": true,
59
+ "vocab_size": 36547
60
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 36546
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 36546,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 6,
13
+ "pad_token_id": 36546,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.32.0.dev0"
16
+ }
metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hf_name":"itc-itc","source_languages":"itc","target_languages":"itc","opus_readme_url":"https:\/\/github.com\/Helsinki-NLP\/Tatoeba-Challenge\/tree\/master\/models\/itc-itc\/README.md","original_repo":"Tatoeba-Challenge","tags":["translation"],"languages":["it","ca","rm","es","ro","gl","sc","co","wa","pt","oc","an","id","fr","ht","itc"],"src_constituents":["ita","cat","roh","spa","pap","bjn","lmo","mwl","lij","lat_Latn","lad_Latn","pcd","lat_Grek","ext","ron","ast","glg","pms","zsm_Latn","srd","gcf_Latn","lld_Latn","min","tmw_Latn","cos","wln","zlm_Latn","por","egl","oci","vec","arg","ind","fra","hat","lad","max_Latn","frm_Latn","scn","mfe"],"tgt_constituents":["ita","cat","roh","spa","pap","bjn","lmo","mwl","lij","lat_Latn","lad_Latn","pcd","lat_Grek","ext","ron","ast","glg","pms","zsm_Latn","srd","gcf_Latn","lld_Latn","min","tmw_Latn","cos","wln","zlm_Latn","por","egl","oci","vec","arg","ind","fra","hat","lad","max_Latn","frm_Latn","scn","mfe"],"src_multilingual":true,"tgt_multilingual":true,"prepro":" normalization + SentencePiece (spm32k,spm32k)","url_model":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/itc-itc\/opus-2020-07-07.zip","url_test_set":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/itc-itc\/opus-2020-07-07.test.txt","src_alpha3":"itc","tgt_alpha3":"itc","short_pair":"itc-itc","chrF2_score":0.599,"bleu":40.8,"brevity_penalty":0.968,"ref_len":77448.0,"src_name":"Italic languages","tgt_name":"Italic languages","train_date":"2020-07-07","src_alpha2":"itc","tgt_alpha2":"itc","prefer_old":false,"long_pair":"itc-itc","helsinki_git_sha":"480fcbe0ee1bf4774bcbe6226ad9f58e63f6c535","transformers_git_sha":"2207e5d8cb224e954a7cba69fa4ac2309e9ff30b","port_machine":"brutasse","port_time":"2020-08-21-14:41"}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9ce234f8fe2a30bb1631d0cc4567114ba361f71d68c9d6a46621273260f4dd
3
+ size 253699401
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc020c38ea7076c5471493ac37d64d86f74893aae7a9643eb560d78c7c77029
3
+ size 798069
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b139b2079d7566484c91fe4b796803c8a8249ce1d1562b025ce3700ce1c6e50
3
+ size 797934
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aa8a24304cd23149d3e09da2a0068d32fe5c118c063d76d82d8489d0d238340
3
+ size 254079176
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"target_lang": "itc", "source_lang": "itc"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff