{ "model_id": "nvidia/parakeet-tdt_ctc-110m", "model_type": "hybrid_tdt_ctc", "sample_rate": 16000, "max_audio_seconds": 15.0, "max_audio_samples": 240000, "max_symbol_steps": 1, "vocab_size": 1024, "blank_id": 1024, "joint_extra_outputs": 5, "duration_bins": [ 0, 1, 2, 3, 4 ], "encoder_dim": 512, "decoder_dim": 640, "decoder_hidden": 640, "decoder_layers": 1, "checkpoint": { "type": "pretrained", "model_id": "nvidia/parakeet-tdt_ctc-110m" }, "coreml": { "compute_units": "CPU_ONLY", "compute_precision": "FLOAT32" }, "components": { "preprocessor": { "inputs": { "audio_signal": [ 1, 240000 ], "audio_length": [ 1 ] }, "outputs": { "encoder": [ 1, 512, 188 ], "encoder_length": [ 1 ] }, "path": "Preprocessor.mlpackage" }, "decoder": { "inputs": { "targets": [ 1, 1 ], "target_length": [ 1 ], "h_in": [ 1, 1, 640 ], "c_in": [ 1, 1, 640 ] }, "outputs": { "decoder": [ 1, 640, 1 ], "h_out": [ 1, 1, 640 ], "c_out": [ 1, 1, 640 ] }, "path": "Decoder.mlpackage" }, "joint_decision": { "inputs": { "encoder": [ 1, 512, 188 ], "decoder": [ 1, 640, 1 ] }, "outputs": { "token_id": "int32", "token_prob": "float32", "duration": "int32" }, "path": "JointDecision.mlpackage" }, "joint_decision_single_step": { "inputs": { "encoder_step": [ 1, 512, 1 ], "decoder_step": [ 1, 640, 1 ] }, "outputs": { "token_id": [ 1, 1, 1 ], "token_prob": [ 1, 1, 1 ], "duration": [ 1, 1, 1 ], "top_k_ids": "int32", "top_k_logits": "float32" }, "path": "JointDecisionSingleStep.mlpackage" } } }