init
Browse files- pytorch_model.bin +3 -0
- v3_e2e_rnnt.yaml +46 -0
- v3_e2e_rnnt_decoder.onnx +3 -0
- v3_e2e_rnnt_encoder.onnx +3 -0
- v3_e2e_rnnt_joint.onnx +3 -0
- v3_e2e_rnnt_tokenizer.model +3 -0
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da85c29829d4002daedd676e012936488234d9255e65e86dfab9bec6b1729298
|
| 3 |
+
size 5905440
|
v3_e2e_rnnt.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_class: rnnt
|
| 2 |
+
sample_rate: 16000
|
| 3 |
+
preprocessor:
|
| 4 |
+
_target_: gigaam.preprocess.FeatureExtractor
|
| 5 |
+
sample_rate: 16000
|
| 6 |
+
features: 64
|
| 7 |
+
win_length: 320
|
| 8 |
+
hop_length: 160
|
| 9 |
+
mel_scale: htk
|
| 10 |
+
n_fft: 320
|
| 11 |
+
mel_norm: null
|
| 12 |
+
center: false
|
| 13 |
+
encoder:
|
| 14 |
+
_target_: gigaam.encoder.ConformerEncoder
|
| 15 |
+
feat_in: 64
|
| 16 |
+
n_layers: 16
|
| 17 |
+
d_model: 768
|
| 18 |
+
subsampling_factor: 4
|
| 19 |
+
ff_expansion_factor: 4
|
| 20 |
+
self_attention_model: rotary
|
| 21 |
+
pos_emb_max_len: 5000
|
| 22 |
+
n_heads: 16
|
| 23 |
+
conv_kernel_size: 5
|
| 24 |
+
flash_attn: false
|
| 25 |
+
subs_kernel_size: 5
|
| 26 |
+
subsampling: conv1d
|
| 27 |
+
conv_norm_type: layer_norm
|
| 28 |
+
head:
|
| 29 |
+
_target_: gigaam.decoder.RNNTHead
|
| 30 |
+
decoder:
|
| 31 |
+
pred_hidden: 320
|
| 32 |
+
pred_rnn_layers: 1
|
| 33 |
+
num_classes: 1025
|
| 34 |
+
joint:
|
| 35 |
+
enc_hidden: 768
|
| 36 |
+
pred_hidden: 320
|
| 37 |
+
joint_hidden: 320
|
| 38 |
+
num_classes: 1025
|
| 39 |
+
decoding:
|
| 40 |
+
_target_: gigaam.decoding.RNNTGreedyDecoding
|
| 41 |
+
vocabulary: null
|
| 42 |
+
model_path: /models/v3_e2e_rnnt_tokenizer.model
|
| 43 |
+
model_name: v3_e2e_rnnt
|
| 44 |
+
hashes:
|
| 45 |
+
model: 72e2a9b5c7caad963b2bbfd2f298c252
|
| 46 |
+
tokenizer: 3b3bf8370e882885d79731592fc99f98
|
v3_e2e_rnnt_decoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b0a16d67fd2cb37061decc93c69e364a9ab27afee3c57495d55b1c974cf7231
|
| 3 |
+
size 4599910
|
v3_e2e_rnnt_encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:297c67d9c56bd4e9f34934a5f35ad5d2b49557247e5a69de6bcc4eebfea29988
|
| 3 |
+
size 885084534
|
v3_e2e_rnnt_joint.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:602ff7017a93311aad34df1437c8d7f49911353c13d6eae7a6ee7b041339465c
|
| 3 |
+
size 2712896
|
v3_e2e_rnnt_tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:828c12c991019eef952a960661f25a92d6ad279591e2ea466b4aeddf1d20a18a
|
| 3 |
+
size 255336
|