feature_extractor:
  class_path: model.vocos.feature_extractors.MelSpectrogramFeatures
  init_args:
    sample_rate: 16000
    n_fft: 512
    n_win: 512
    n_hop: 128
    n_mels: 80
    f_min: 0
    f_max: 8000
    power: 2
    center: true
    normalize: false
    onesided: true
    mel_norm: slaney
    mel_scale: slaney
    librosa_mel: true
    clip_val: 0.00001
backbone:
  class_path: model.vocos.models.VocosBackbone
  init_args:
    input_channels: 80
    dim: 512
    intermediate_dim: 1536
    num_layers: 8
    layer_scale_init_value: null
    adanorm_num_embeddings: null
head:
  class_path: model.vocos.heads.ISTFTHead
  init_args:
    dim: 512
    n_fft: 512
    hop_length: 128
    padding: center
sample_rate: 16000
initial_learning_rate: 0.0005
num_warmup_steps: 0
mel_loss_coeff: 45.0
mrd_loss_coeff: 0.1
pretrain_mel_steps: 0
decay_mel_coeff: false
evaluate_utmos: true
evaluate_pesq: true
evaluate_periodicty: true