{ "model_type": "wav2vec2-conformer", "auto_map": { "AutoModel": "modeling_conformer.Wav2Vec2ConformerRNNT" }, "vocab_size": 5632, "hidden_size": 512, "num_hidden_layers": 17, "num_attention_heads": 8, "intermediate_size": 2048, "hidden_act": "swish", "conv_depthwise_kernel_size": 31, "mask_time_prob": 0, "lstm_layer": 1, "pred_hidden": 640, "joint_hidden": 640, "sampling_rate": 16000, "max_symbols_per_step": 10, "apply_spec_augment": false, "feat_extract_activation": "relu", "feat_extract_norm": "layer", "conv_bias": true, "conv_stride": [ 2, 2 ], "conv_kernel": [ 3, 3 ], "conv_dim": [ 512, 512 ], "blank_id": 256, "preemph": 0.97, "pad_to": 16, "pad_id": 257, "languages": { "Assamese": "as", "Bengali": "bn", "Bodo": "brx", "Dogri": "doi", "Gujarati": "gu", "Hindi": "hi", "Kannada": "kn", "Konkani": "kok", "Kashmiri": "ks", "Maithili": "mai", "Malayalam": "ml", "Manipuri": "mni", "Marathi": "mr", "Nepali": "ne", "Oriya": "or", "Panjabi": "pa", "Sanskrit": "sa", "Santali": "sat", "Sindhi": "sd", "Tamil": "ta", "Telugu": "te", "Urdu": "ur" } }