| { |
| "model_type": "vui", |
| "library_name": "vui", |
| "pipeline_tag": "text-to-speech", |
| "license": "mit", |
| "language": ["en"], |
| "architectures": ["VuiForConditionalGeneration"], |
| "model_files": { |
| "base": "vui-100m-base.pt", |
| "abraham": "vui-abraham-100m.pt", |
| "cohost": "vui-cohost-100m.pt", |
| "cohost_alt": "ckpts-vui-cohost-100m.pt", |
| "tokenizer": "fluac-22hz-22khz.pt" |
| }, |
| "model_variants": { |
| "vui-100m-base": { |
| "description": "Base checkpoint trained on 40k hours of audio conversations", |
| "file": "vui-100m-base.pt", |
| "size_mb": 198 |
| }, |
| "vui-abraham-100m": { |
| "description": "Single speaker model with context awareness", |
| "file": "vui-abraham-100m.pt", |
| "size_mb": 198 |
| }, |
| "vui-cohost-100m": { |
| "description": "Two speakers that can interact with each other", |
| "file": "vui-cohost-100m.pt", |
| "size_mb": 198 |
| } |
| }, |
| "tokenizer_config": { |
| "audio_tokenizer": "fluac", |
| "sample_rate": "22khz", |
| "file": "fluac-22hz-22khz.pt", |
| "size_mb": 307 |
| }, |
| "training_data": { |
| "hours": 40000, |
| "type": "audio_conversations" |
| }, |
| "capabilities": [ |
| "text-to-speech", |
| "conversational-speech", |
| "voice-cloning", |
| "on-device-inference" |
| ], |
| "torch_dtype": "float32", |
| "framework": "pytorch" |
| } |