{
  "model": "nvidia/parakeet-tdt-0.6b-v3",
  "torch_version": "2.11.0+cu130",
  "model_class": "EncDecRNNTBPEModel",
  "vocab_size": 8192,
  "blank_id": 8192,
  "durations": [
    0,
    1,
    2,
    3,
    4
  ],
  "num_durations": 5,
  "joint_output_dim": 8198,
  "joint_token_logits_slice": [
    0,
    8193
  ],
  "joint_duration_logits_slice": [
    8193,
    8198
  ],
  "encoder": {
    "d_model": 1024,
    "subsampling_factor": 8,
    "n_layers": 24,
    "n_heads": 8,
    "feat_in": 128,
    "attention_mode": "rel_pos",
    "att_context_size": null,
    "buckets": [
      {
        "n_mel_frames": 1500,
        "n_encoder_frames": 187,
        "input_shape": [
          1,
          128,
          1500
        ],
        "output_shape": [
          1,
          1024,
          188
        ],
        "artifact": "encoder_T1500.pt2",
        "size_mb": 2366.85
      }
    ],
    "multisig": false
  },
  "decoder": {
    "num_layers": 2,
    "hidden": 640,
    "embed_dim": 640
  },
  "joint": {
    "d_enc": 1024,
    "d_pred": 640,
    "joint_dim": 640
  },
  "preprocessor": {
    "sample_rate": 16000,
    "n_fft": 512,
    "win_length": 400,
    "hop_length": 160,
    "n_mels": 128,
    "preemph": 0.97,
    "log": true,
    "frame_rate_hz_post_subsample": 12.5
  },
  "artifacts": {
    "decoder_step": {
      "filename": "decoder_step.pt2",
      "size_mb": 45.07,
      "input_shapes": {
        "token": [
          1,
          1
        ],
        "h": [
          2,
          1,
          640
        ],
        "c": [
          2,
          1,
          640
        ]
      },
      "input_dtypes": {
        "token": "int64",
        "h": "float32",
        "c": "float32"
      },
      "output_shapes": {
        "g": [
          1,
          1,
          640
        ],
        "h": [
          2,
          1,
          640
        ],
        "c": [
          2,
          1,
          640
        ]
      }
    },
    "joint_step": {
      "filename": "joint_step.pt2",
      "size_mb": 24.14,
      "input_shapes": {
        "enc_frame": [
          1,
          1024,
          1
        ],
        "pred_frame": [
          1,
          640,
          1
        ]
      },
      "output_shape": [
        1,
        1,
        1,
        8198
      ]
    }
  },
  "tokenizer": {
    "saved": true,
    "method": "serialized_model_proto",
    "vocab_size": 8192
  },
  "litert": {
    "quant": "fp16",
    "results": [
      {
        "graph": "encoder",
        "source_artifact": "encoder_T1500.pt2",
        "output_artifact": "encoder_T1500.tflite",
        "size_mb": 1150.88,
        "convert_seconds": 158.59,
        "quant": "fp16",
        "torch_output_shapes": [
          [
            1,
            1024,
            188
          ],
          [
            1
          ]
        ],
        "parity": {
          "ok": true,
          "max_abs_diff": 0.0,
          "per_output_diffs": [
            [
              "shape mismatch",
              [
                1
              ],
              [
                1,
                1024,
                188
              ]
            ],
            [
              "shape mismatch",
              [
                1,
                1024,
                188
              ],
              [
                1
              ]
            ]
          ],
          "tflite_output_shapes": [
            [
              1
            ],
            [
              1,
              1024,
              188
            ]
          ],
          "torch_output_shapes": [
            [
              1,
              1024,
              188
            ],
            [
              1
            ]
          ]
        }
      },
      {
        "graph": "decoder_step",
        "source_artifact": "decoder_step.pt2",
        "output_artifact": "decoder_step.tflite",
        "size_mb": 22.55,
        "convert_seconds": 1.92,
        "quant": "fp16",
        "torch_output_shapes": [
          [
            1,
            1,
            640
          ],
          [
            2,
            1,
            640
          ],
          [
            2,
            1,
            640
          ]
        ],
        "parity": {
          "ok": true,
          "max_abs_diff": 0.0044100284576416016,
          "per_output_diffs": [
            [
              "shape mismatch",
              [
                2,
                1,
                640
              ],
              [
                1,
                1,
                640
              ]
            ],
            [
              "shape mismatch",
              [
                1,
                1,
                640
              ],
              [
                2,
                1,
                640
              ]
            ],
            0.0044100284576416016
          ],
          "tflite_output_shapes": [
            [
              2,
              1,
              640
            ],
            [
              1,
              1,
              640
            ],
            [
              2,
              1,
              640
            ]
          ],
          "torch_output_shapes": [
            [
              1,
              1,
              640
            ],
            [
              2,
              1,
              640
            ],
            [
              2,
              1,
              640
            ]
          ]
        }
      },
      {
        "graph": "joint_step",
        "source_artifact": "joint_step.pt2",
        "output_artifact": "joint_step.tflite",
        "size_mb": 12.08,
        "convert_seconds": 1.61,
        "quant": "fp16",
        "torch_output_shapes": [
          [
            1,
            1,
            1,
            8198
          ]
        ],
        "parity": {
          "ok": true,
          "max_abs_diff": 0.408447265625,
          "per_output_diffs": [
            0.408447265625
          ],
          "tflite_output_shapes": [
            [
              1,
              1,
              1,
              8198
            ]
          ],
          "torch_output_shapes": [
            [
              1,
              1,
              1,
              8198
            ]
          ]
        }
      }
    ]
  }
}