feature_extractor: class_path: model.vocos.feature_extractors.MelSpectrogramFeatures init_args: sample_rate: 16000 n_fft: 512 n_win: 512 n_hop: 128 n_mels: 80 f_min: 0 f_max: 8000 power: 2 center: true normalize: false onesided: true mel_norm: slaney mel_scale: slaney librosa_mel: true clip_val: 0.00001 backbone: class_path: model.vocos.models.VocosBackbone init_args: input_channels: 80 dim: 512 intermediate_dim: 1536 num_layers: 8 layer_scale_init_value: null adanorm_num_embeddings: null head: class_path: model.vocos.heads.ISTFTHead init_args: dim: 512 n_fft: 512 hop_length: 128 padding: center sample_rate: 16000 initial_learning_rate: 0.0005 num_warmup_steps: 0 mel_loss_coeff: 45.0 mrd_loss_coeff: 0.1 pretrain_mel_steps: 0 decay_mel_coeff: false evaluate_utmos: true evaluate_pesq: true evaluate_periodicty: true