Automatic Speech Recognition
NeMo
Core ML
PyTorch
speech
audio
Transducer
TDT
FastConformer
Conformer
NeMo
hf-asr-leaderboard
Instructions to use VoiceScribe/parakeet-tdt-0.6b-v3-coreml with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- NeMo
How to use VoiceScribe/parakeet-tdt-0.6b-v3-coreml with NeMo:
import nemo.collections.asr as nemo_asr asr_model = nemo_asr.models.ASRModel.from_pretrained("VoiceScribe/parakeet-tdt-0.6b-v3-coreml") transcriptions = asr_model.transcribe(["file.wav"]) - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "metadataOutputVersion" : "3.0", | |
| "shortDescription" : "Parakeet decoder (RNNT prediction network)", | |
| "outputSchema" : [ | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Float32", | |
| "formattedType" : "MultiArray (Float32 1 × 640 × 1)", | |
| "shortDescription" : "", | |
| "shape" : "[1, 640, 1]", | |
| "name" : "decoder", | |
| "type" : "MultiArray" | |
| }, | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Float32", | |
| "formattedType" : "MultiArray (Float32 2 × 1 × 640)", | |
| "shortDescription" : "", | |
| "shape" : "[2, 1, 640]", | |
| "name" : "h_out", | |
| "type" : "MultiArray" | |
| }, | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Float32", | |
| "formattedType" : "MultiArray (Float32 2 × 1 × 640)", | |
| "shortDescription" : "", | |
| "shape" : "[2, 1, 640]", | |
| "name" : "c_out", | |
| "type" : "MultiArray" | |
| } | |
| ], | |
| "storagePrecision" : "Float16", | |
| "modelParameters" : [ | |
| ], | |
| "author" : "Fluid Inference", | |
| "specificationVersion" : 8, | |
| "mlProgramOperationTypeHistogram" : { | |
| "Select" : 1, | |
| "Ios17.squeeze" : 4, | |
| "Ios17.gather" : 1, | |
| "Ios17.cast" : 8, | |
| "Ios17.lstm" : 2, | |
| "Split" : 2, | |
| "Ios17.add" : 1, | |
| "Ios17.transpose" : 2, | |
| "Ios17.greaterEqual" : 1, | |
| "Identity" : 1, | |
| "Stack" : 2 | |
| }, | |
| "computePrecision" : "Mixed (Float16, Float32, Int16, Int32)", | |
| "isUpdatable" : "0", | |
| "stateSchema" : [ | |
| ], | |
| "availability" : { | |
| "macOS" : "14.0", | |
| "tvOS" : "17.0", | |
| "visionOS" : "1.0", | |
| "watchOS" : "10.0", | |
| "iOS" : "17.0", | |
| "macCatalyst" : "17.0" | |
| }, | |
| "modelType" : { | |
| "name" : "MLModelType_mlProgram" | |
| }, | |
| "inputSchema" : [ | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Int32", | |
| "formattedType" : "MultiArray (Int32 1 × 1)", | |
| "shortDescription" : "", | |
| "shape" : "[1, 1]", | |
| "name" : "targets", | |
| "type" : "MultiArray" | |
| }, | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Int32", | |
| "formattedType" : "MultiArray (Int32 1)", | |
| "shortDescription" : "", | |
| "shape" : "[1]", | |
| "name" : "target_length", | |
| "type" : "MultiArray" | |
| }, | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Float32", | |
| "formattedType" : "MultiArray (Float32 2 × 1 × 640)", | |
| "shortDescription" : "", | |
| "shape" : "[2, 1, 640]", | |
| "name" : "h_in", | |
| "type" : "MultiArray" | |
| }, | |
| { | |
| "hasShapeFlexibility" : "0", | |
| "isOptional" : "0", | |
| "dataType" : "Float32", | |
| "formattedType" : "MultiArray (Float32 2 × 1 × 640)", | |
| "shortDescription" : "", | |
| "shape" : "[2, 1, 640]", | |
| "name" : "c_in", | |
| "type" : "MultiArray" | |
| } | |
| ], | |
| "userDefinedMetadata" : { | |
| "com.github.apple.coremltools.conversion_date" : "2025-09-19", | |
| "com.github.apple.coremltools.source" : "torch==2.7.0", | |
| "com.github.apple.coremltools.version" : "9.0b1", | |
| "com.github.apple.coremltools.source_dialect" : "TorchScript" | |
| }, | |
| "generatedClassName" : "parakeet_decoder", | |
| "method" : "predict" | |
| } | |
| ] |