{ "model_type": "depthformer_base_decoder_step", "inputs": [ { "name": "target_token", "shape": [ 1, 1 ], "dtype": "int32" }, { "name": "encoder_hidden_states", "shape": [ 1, 1006, 768 ], "dtype": "float32" }, { "name": "kv_cache_keys", "shape": "dynamic", "dtype": "float32" }, { "name": "kv_cache_values", "shape": "dynamic", "dtype": "float32" } ], "outputs": [ { "name": "logits", "shape": [ 1, 16384 ], "dtype": "float32" }, { "name": "new_kv_cache_keys", "shape": "dynamic", "dtype": "float32" }, { "name": "new_kv_cache_values", "shape": "dynamic", "dtype": "float32" } ], "model_config": { "embed_dim": 768, "num_heads": 12, "num_decoder_layers": 12, "mlp_dim": 2048 }, "kv_cache": { "max_length": 1806, "num_heads": 12, "head_dim": 64 }, "opset_version": 18, "ir_version": 8, "precision": "fp16" }