Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -1
- ressources/dev.csv +0 -0
- ressources/kenLM_model/kab_5k_6-gram.bin +3 -0
- ressources/kenLM_model/kab_5k_6-gram_v2.bin +2 -2
- ressources/kenLM_model/kab_5k_trigram.bin +3 -0
- ressources/lexicon_v7.txt +3 -0
- ressources/test.csv +0 -0
- ressources/tokenizer/128_v7.model +2 -2
- ressources/tokenizer/128_v7.vocab +128 -0
- ressources/tokenizer/5K.model +2 -2
- ressources/train.csv +3 -0
.gitattributes
CHANGED
|
@@ -36,4 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 36 |
ressources/e2e_model/squeezeformer filter=lfs diff=lfs merge=lfs -text
|
| 37 |
ressources/tokenizer/128_v7.model filter=lfs diff=lfs merge=lfs -text
|
| 38 |
ressources/kenLM_model/kab_5k_6-gram_v2.bin filter=lfs diff=lfs merge=lfs -text
|
| 39 |
-
ressources/tokenizer/5K.model filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 36 |
ressources/e2e_model/squeezeformer filter=lfs diff=lfs merge=lfs -text
|
| 37 |
ressources/tokenizer/128_v7.model filter=lfs diff=lfs merge=lfs -text
|
| 38 |
ressources/kenLM_model/kab_5k_6-gram_v2.bin filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
ressources/tokenizer/5K.model filter=lfs diff=lfs merge=lfs -textressources/lexicon_v7.txt filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
ressources/train.csv filter=lfs diff=lfs merge=lfs -text
|
ressources/dev.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ressources/kenLM_model/kab_5k_6-gram.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:880abff36dd3c64ad9df109773ef3d49c7909e5de80cd63350b188e13e1df42c
|
| 3 |
+
size 6568786
|
ressources/kenLM_model/kab_5k_6-gram_v2.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88604fe3c4c653f822748386ad143ba4e3d9086577ed6d3ef205100a28d3d115
|
| 3 |
+
size 87786060
|
ressources/kenLM_model/kab_5k_trigram.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b89ecec10ddd295bca9abfc10a91a5488ad47217efd9ef8d672f3ba0f61765a4
|
| 3 |
+
size 1102536
|
ressources/lexicon_v7.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:907da74884c9375b7a365569ca85e8d4b59ceb048cc3d77d1b85065e6e44b95b
|
| 3 |
+
size 10602533
|
ressources/test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ressources/tokenizer/128_v7.model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cfd6e89442fcff8a9850d69a1b23fb6b71cc8dcc98952d5c3c144038a013bc2
|
| 3 |
+
size 239191
|
ressources/tokenizer/128_v7.vocab
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<unk> 0
|
| 2 |
+
_ 0
|
| 3 |
+
a -3.11192
|
| 4 |
+
▁ -3.15595
|
| 5 |
+
t -3.25944
|
| 6 |
+
▁t -3.52915
|
| 7 |
+
s -3.54419
|
| 8 |
+
u -3.54656
|
| 9 |
+
m -3.5736
|
| 10 |
+
i -3.59306
|
| 11 |
+
k -3.62106
|
| 12 |
+
en -3.69013
|
| 13 |
+
d -3.79267
|
| 14 |
+
▁i -3.86006
|
| 15 |
+
n -3.97854
|
| 16 |
+
- -4.16532
|
| 17 |
+
▁d -4.17459
|
| 18 |
+
ɣ -4.18648
|
| 19 |
+
l -4.21454
|
| 20 |
+
r -4.27429
|
| 21 |
+
g -4.35671
|
| 22 |
+
eɣ -4.35988
|
| 23 |
+
em -4.36879
|
| 24 |
+
▁ad -4.3696
|
| 25 |
+
f -4.38996
|
| 26 |
+
e -4.41498
|
| 27 |
+
b -4.42066
|
| 28 |
+
▁a -4.43429
|
| 29 |
+
▁n -4.44957
|
| 30 |
+
an -4.49517
|
| 31 |
+
c -4.5262
|
| 32 |
+
z -4.62464
|
| 33 |
+
-t -4.70214
|
| 34 |
+
er -4.70406
|
| 35 |
+
el -4.71813
|
| 36 |
+
w -4.7289
|
| 37 |
+
y -4.7304
|
| 38 |
+
-d -4.74953
|
| 39 |
+
▁u -4.75923
|
| 40 |
+
▁ur -4.7942
|
| 41 |
+
▁s -4.79872
|
| 42 |
+
q -4.80626
|
| 43 |
+
ḥ -4.81343
|
| 44 |
+
▁l -4.91088
|
| 45 |
+
-i -4.92641
|
| 46 |
+
ɛ -5.00366
|
| 47 |
+
la -5.01626
|
| 48 |
+
in -5.03372
|
| 49 |
+
▁ay -5.03937
|
| 50 |
+
-a -5.07755
|
| 51 |
+
ed -5.10726
|
| 52 |
+
▁ara -5.11616
|
| 53 |
+
li -5.12989
|
| 54 |
+
▁y -5.17803
|
| 55 |
+
x -5.18338
|
| 56 |
+
ḍ -5.19544
|
| 57 |
+
▁ye -5.24084
|
| 58 |
+
ra -5.28018
|
| 59 |
+
▁yi -5.28171
|
| 60 |
+
eḍ -5.29483
|
| 61 |
+
al -5.29686
|
| 62 |
+
▁ta -5.3133
|
| 63 |
+
h -5.31978
|
| 64 |
+
ar -5.35051
|
| 65 |
+
-nni -5.3551
|
| 66 |
+
ent -5.42007
|
| 67 |
+
▁deg -5.42538
|
| 68 |
+
et -5.44656
|
| 69 |
+
ma -5.46095
|
| 70 |
+
eb -5.48129
|
| 71 |
+
ṛ -5.50409
|
| 72 |
+
ek -5.53251
|
| 73 |
+
es -5.53324
|
| 74 |
+
▁w -5.55271
|
| 75 |
+
ẓ -5.58031
|
| 76 |
+
▁as -5.60799
|
| 77 |
+
j -5.61945
|
| 78 |
+
ef -5.63673
|
| 79 |
+
mi -5.70638
|
| 80 |
+
-ine -5.71319
|
| 81 |
+
ri -5.71521
|
| 82 |
+
▁ti -5.71893
|
| 83 |
+
wen -5.74511
|
| 84 |
+
▁am -5.74738
|
| 85 |
+
wi -5.74836
|
| 86 |
+
wa -5.75519
|
| 87 |
+
ay -5.78674
|
| 88 |
+
na -5.79551
|
| 89 |
+
iɣ -5.8004
|
| 90 |
+
-s -5.8074
|
| 91 |
+
▁akk -5.82383
|
| 92 |
+
ur -5.82542
|
| 93 |
+
▁wa -5.83448
|
| 94 |
+
-is -5.8603
|
| 95 |
+
▁m -5.87086
|
| 96 |
+
-as -5.87551
|
| 97 |
+
ṭ -5.90041
|
| 98 |
+
-iyi -5.90805
|
| 99 |
+
ess -5.91906
|
| 100 |
+
iḍ -6.04588
|
| 101 |
+
▁ɣer -6.12172
|
| 102 |
+
▁ma -6.12576
|
| 103 |
+
t-id -6.12728
|
| 104 |
+
ez -6.15352
|
| 105 |
+
▁ɣef -6.17657
|
| 106 |
+
eɛ -6.19351
|
| 107 |
+
ṣ -6.19506
|
| 108 |
+
ir -6.20809
|
| 109 |
+
ǧ -6.23683
|
| 110 |
+
▁d-t -6.28702
|
| 111 |
+
-iw -6.29982
|
| 112 |
+
ew -6.30459
|
| 113 |
+
▁fell- -6.30805
|
| 114 |
+
eḥ -6.32849
|
| 115 |
+
▁acu -6.36947
|
| 116 |
+
▁seg -6.37521
|
| 117 |
+
eq -6.38939
|
| 118 |
+
wal -6.39378
|
| 119 |
+
-nwen -6.40979
|
| 120 |
+
▁d-y -6.42137
|
| 121 |
+
▁yid- -6.43992
|
| 122 |
+
eṛ -6.44694
|
| 123 |
+
▁tett -6.45687
|
| 124 |
+
-nsen -6.49355
|
| 125 |
+
eqq -6.51539
|
| 126 |
+
o -7.27575
|
| 127 |
+
p -8.37641
|
| 128 |
+
č -8.37641
|
ressources/tokenizer/5K.model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eae00d2ddc1baf700a3247817d6371959c71f967d9fb91529a86efea1492dfed
|
| 3 |
+
size 315116
|
ressources/train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61709eed30b35a4a6403fb54968326d4d20731263ed1b76190146d11347eab27
|
| 3 |
+
size 18971108
|