| backbone: |
| class_path: vocos.models.VocosBackbone |
| init_args: |
| adanorm_num_embeddings: null |
| dim: 384 |
| input_channels: 1024 |
| intermediate_dim: 1152 |
| layer_scale_init_value: null |
| num_layers: 8 |
| decay_mel_coeff: false |
| evaluate_periodicty: true |
| evaluate_pesq: true |
| evaluate_utmos: true |
| feature_extractor: |
| class_path: vocos.feature_extractors.DACFeatures |
| head: |
| class_path: vocos.heads.ISTFTHead |
| init_args: |
| dim: 384 |
| hop_length: 320 |
| n_fft: 1280 |
| padding: same |
| initial_learning_rate: 0.0005 |
| mel_loss_coeff: 45.0 |
| mrd_loss_coeff: 1.0 |
| num_warmup_steps: 0 |
| pretrain_mel_steps: 0 |
| sample_rate: 16000 |
|
|