| { |
| "export_timestamp": "[0.]", |
| "format": "iree_vmfb", |
| "models": [ |
| { |
| "model_type": "spectrostream_encoder_iree", |
| "format": "vmfb", |
| "input": { |
| "name": "audio", |
| "shape": [ |
| 1, |
| 96000, |
| 2 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "embeddings", |
| "shape": [ |
| 1, |
| 50, |
| 256 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "opt_level": 3, |
| "file_size_bytes": 0 |
| }, |
| { |
| "model_type": "spectrostream_decoder_iree", |
| "format": "vmfb", |
| "input": { |
| "name": "embeddings", |
| "shape": [ |
| 1, |
| 50, |
| 256 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "audio", |
| "shape": [ |
| 1, |
| 96000, |
| 2 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "opt_level": 3, |
| "file_size_bytes": 0 |
| }, |
| { |
| "model_type": "musiccoca_text_encoder_iree", |
| "format": "vmfb", |
| "inputs": [ |
| { |
| "name": "text_ids", |
| "shape": [ |
| 1, |
| 128 |
| ], |
| "dtype": "int32" |
| }, |
| { |
| "name": "padding", |
| "shape": [ |
| 1, |
| 128 |
| ], |
| "dtype": "float32" |
| } |
| ], |
| "output": { |
| "name": "embedding", |
| "shape": [ |
| 1, |
| 768 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "file_size_bytes": 0 |
| }, |
| { |
| "model_type": "musiccoca_audio_encoder_iree", |
| "format": "vmfb", |
| "input": { |
| "name": "audio", |
| "shape": [ |
| 1, |
| 160000 |
| ], |
| "dtype": "float32" |
| }, |
| "output": { |
| "name": "embedding", |
| "shape": [ |
| 1, |
| 768 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "file_size_bytes": 0 |
| }, |
| { |
| "model_type": "depthformer_base_encoder_iree", |
| "format": "vmfb", |
| "inputs": [ |
| { |
| "name": "context_tokens", |
| "shape": [ |
| 1, |
| 1006 |
| ], |
| "dtype": "int32" |
| }, |
| { |
| "name": "style_tokens", |
| "shape": [ |
| 1, |
| 6 |
| ], |
| "dtype": "int32" |
| } |
| ], |
| "output": { |
| "name": "encoder_hidden_states", |
| "shape": [ |
| 1, |
| 1006, |
| 768 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "file_size_bytes": 0 |
| }, |
| { |
| "model_type": "depthformer_base_decoder_step_iree", |
| "format": "vmfb", |
| "inputs": [ |
| { |
| "name": "target_token", |
| "shape": [ |
| 1, |
| 1 |
| ], |
| "dtype": "int32" |
| }, |
| { |
| "name": "encoder_hidden_states", |
| "shape": [ |
| 1, |
| 1006, |
| 768 |
| ], |
| "dtype": "float32" |
| } |
| ], |
| "output": { |
| "name": "logits", |
| "shape": [ |
| 1, |
| 16384 |
| ], |
| "dtype": "float32" |
| }, |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "file_size_bytes": 0 |
| } |
| ], |
| "config": { |
| "target_backend": "rocm", |
| "target_chip": "gfx1030", |
| "opt_level": 3 |
| }, |
| "ue_path": "Content/MLModels/" |
| } |