Audio-to-Audio
English
cn
CleanMel / configs /vocos_offline.yaml
SaoYear's picture
Upload folder using huggingface_hub
cf82a4e verified
raw
history blame contribute delete
936 Bytes
feature_extractor:
class_path: model.vocos.feature_extractors.MelSpectrogramFeatures
init_args:
sample_rate: 16000
n_fft: 512
n_win: 512
n_hop: 128
n_mels: 80
f_min: 0
f_max: 8000
power: 2
center: true
normalize: false
onesided: true
mel_norm: slaney
mel_scale: slaney
librosa_mel: true
clip_val: 0.00001
backbone:
class_path: model.vocos.models.VocosBackbone
init_args:
input_channels: 80
dim: 512
intermediate_dim: 1536
num_layers: 8
layer_scale_init_value: null
adanorm_num_embeddings: null
head:
class_path: model.vocos.heads.ISTFTHead
init_args:
dim: 512
n_fft: 512
hop_length: 128
padding: center
sample_rate: 16000
initial_learning_rate: 0.0005
num_warmup_steps: 0
mel_loss_coeff: 45.0
mrd_loss_coeff: 0.1
pretrain_mel_steps: 0
decay_mel_coeff: false
evaluate_utmos: true
evaluate_pesq: true
evaluate_periodicty: true