Audio Classification
LiteRT
LiteRT
multilingual
speaker-embedding
speaker-recognition
diarization
on-device
soniqo
speech-cloud
speech-core
Instructions to use soniqo/WeSpeaker-ResNet34-LM-LiteRT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LiteRT
How to use soniqo/WeSpeaker-ResNet34-LM-LiteRT with LiteRT:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "model": "wespeaker-resnet34-LM", | |
| "format": "tflite", | |
| "takes_raw_audio": false, | |
| "fbank": { | |
| "num_mel_bins": 80, | |
| "sample_rate": 16000, | |
| "frame_length_ms": 25, | |
| "frame_shift_ms": 10, | |
| "window_type": "hamming", | |
| "dither": 0.0, | |
| "use_energy": false, | |
| "centering": "running_mean_per_window", | |
| "reference_implementation": "torchaudio.compliance.kaldi.fbank + pyannote wespeaker centering" | |
| }, | |
| "inputs": { | |
| "fbank": { | |
| "shape": [ | |
| 1, | |
| 298, | |
| 80 | |
| ], | |
| "dtype": "float32", | |
| "note": "Precomputed kaldi-compatible mel fbank features. T=298 corresponds to 3s at 16000Hz." | |
| } | |
| }, | |
| "outputs": { | |
| "embedding": { | |
| "shape": [ | |
| 1, | |
| 256 | |
| ], | |
| "dtype": "float32" | |
| } | |
| } | |
| } |