| { |
| "_name_or_path": "TuKoResearch/WavTokenizer", |
| "architectures": ["WavTokenizer"], |
| "auto_map": { |
| "AutoConfig": "configuration_wavtokenizer.WavTokenizerConfig", |
| "AutoModel": "modeling_wavtokenizer.WavTokenizer" |
| }, |
| "model_type": "wavtokenizer", |
| |
| "sample_rate": 24000, |
| "n_fft": 1280, |
| "hop_length": 320, |
| "n_mels": 128, |
| "padding": "center", |
| |
| "feature_dim": 512, |
| "encoder_dim": 32, |
| "encoder_rates": [2, 4, 5, 8], |
| "latent_dim": 512, |
| |
| "codebook_size": 4096, |
| "codebook_dim": 512, |
| "num_quantizers": 1, |
| |
| "backbone_type": "vocos", |
| "backbone_dim": 768, |
| "backbone_num_blocks": 12, |
| "backbone_intermediate_dim": 2304, |
| "backbone_kernel_size": 7, |
| "backbone_layer_scale_init_value": 1e-6, |
| |
| "head_type": "istft", |
| "head_dim": 641, |
| |
| "use_attention": false, |
| "attention_dim": 768, |
| "attention_heads": 8, |
| "attention_layers": 0, |
| |
| "torch_dtype": "float32", |
| "transformers_version": "4.40.0" |
| } |
|
|