Automatic Speech Recognition
MLX
Safetensors
granite_speech_nar
mlx-audio
speech-to-text
non-autoregressive
granite
custom_code
Instructions to use mouddane/granite-speech-4.1-2b-nar-mlx with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use mouddane/granite-speech-4.1-2b-nar-mlx with MLX:
# Download the model from the Hub pip install huggingface_hub[hf_xet] huggingface-cli download --local-dir granite-speech-4.1-2b-nar-mlx mouddane/granite-speech-4.1-2b-nar-mlx
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
| { | |
| "architectures": [ | |
| "GraniteSpeechNarForASR" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_granite_speech_nar.GraniteSpeechNarConfig", | |
| "AutoFeatureExtractor": "feature_extraction_granite_speech_nar.GraniteSpeechNarFeatureExtractor", | |
| "AutoModel": "modeling_granite_speech_nar.GraniteSpeechNarForASR", | |
| "AutoProcessor": "processing_granite_speech_nar.GraniteSpeechNarProcessor" | |
| }, | |
| "blank_token_id": 100257, | |
| "ce_loss_lambda": 0.02, | |
| "dtype": "bfloat16", | |
| "encoder_config": { | |
| "blank_token_id": 100257, | |
| "bpe_output_dim": 100352, | |
| "bpe_pooling_window": 4, | |
| "context_size": 200, | |
| "conv_expansion_factor": 2, | |
| "conv_kernel_size": 15, | |
| "dim_head": 128, | |
| "dropout": 0.1, | |
| "feedforward_mult": 4, | |
| "hidden_dim": 1024, | |
| "initializer_range": 0.02, | |
| "input_dim": 160, | |
| "max_pos_emb": 512, | |
| "model_type": "granite_speech_nar_encoder", | |
| "num_heads": 8, | |
| "num_layers": 16, | |
| "output_dim": 348, | |
| "pred_dropout": 0.25, | |
| "self_conditioning_layer": 8 | |
| }, | |
| "encoder_ctc_loss_lambda": 0.8, | |
| "encoder_layer_indices": [ | |
| 4, | |
| 8, | |
| 12, | |
| -1 | |
| ], | |
| "min_edit_sequence_length": 8, | |
| "model_type": "granite_speech_nar", | |
| "projector_config": { | |
| "attn_bias": true, | |
| "block_size": 15, | |
| "downsample_rate": 5, | |
| "dropout_prob": 0.1, | |
| "encoder_dim": 1024, | |
| "hidden_size": 2048, | |
| "layernorm_eps": 1e-06, | |
| "llm_dim": 2048, | |
| "mlp_bias": true, | |
| "mlp_ratio": 2, | |
| "model_type": "granite_speech_nar_projector", | |
| "num_encoder_layers": 4, | |
| "num_heads": 32, | |
| "num_layers": 2 | |
| }, | |
| "scale_projected_embeddings": true, | |
| "text_config": { | |
| "_name_or_path": "ibm-granite/granite-4.0-1b-base", | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "attention_multiplier": 0.0078125, | |
| "bos_token_id": 100257, | |
| "dtype": "bfloat16", | |
| "embedding_multiplier": 12, | |
| "eos_token_id": 100257, | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.1, | |
| "intermediate_size": 4096, | |
| "logits_scaling": 8, | |
| "max_position_embeddings": 4096, | |
| "mlp_bias": false, | |
| "model_type": "granite", | |
| "num_attention_heads": 16, | |
| "num_hidden_layers": 40, | |
| "num_key_value_heads": 4, | |
| "pad_token_id": 100256, | |
| "residual_multiplier": 0.22, | |
| "rms_norm_eps": 1e-05, | |
| "rope_parameters": { | |
| "rope_theta": 10000, | |
| "rope_type": "default" | |
| }, | |
| "tie_word_embeddings": true, | |
| "use_cache": true, | |
| "vocab_size": 100352 | |
| }, | |
| "tie_word_embeddings": true, | |
| "transformers_version": "5.8.0.dev0" | |
| } |