WavCube / WavCube-pro /config.yaml
yhaha's picture
Add files using upload-large-folder tool
a75b2a4 verified
# pytorch_lightning==1.8.6
seed_everything: 4444
trainer:
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k
name: first
version: null
log_graph: false
default_hp_metric: true
prefix: ''
sub_dir: null
logdir: null
comment: ''
purge_step: null
max_queue: 10
flush_secs: 120
filename_suffix: ''
write_to_disk: true
comet_config:
disabled: true
enable_checkpointing: true
callbacks:
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
init_args:
logging_interval: null
log_momentum: false
- class_path: pytorch_lightning.callbacks.ModelSummary
init_args:
max_depth: 2
- class_path: pytorch_lightning.callbacks.ModelCheckpoint
init_args:
dirpath: null
filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
monitor: val_loss
verbose: false
save_last: true
save_top_k: -1
save_weights_only: false
mode: min
auto_insert_metric_name: true
every_n_train_steps: 1000
train_time_interval: null
every_n_epochs: null
save_on_train_epoch_end: null
- class_path: vocos.helpers.GradNormCallback
default_root_dir: null
gradient_clip_val: null
gradient_clip_algorithm: null
num_nodes: 8
num_processes: null
devices: '8'
gpus: null
auto_select_gpus: false
tpu_cores: null
ipus: null
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: false
accumulate_grad_batches: null
max_epochs: null
min_epochs: null
max_steps: 1000000
min_steps: null
max_time: null
limit_train_batches: null
limit_val_batches: 100
limit_test_batches: null
limit_predict_batches: null
val_check_interval: null
log_every_n_steps: 50
accelerator: gpu
strategy: ddp
sync_batchnorm: false
precision: 32
enable_model_summary: true
num_sanity_val_steps: 2
resume_from_checkpoint: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=11_step=70000_val_loss=3.5162.ckpt
profiler: null
benchmark: null
deterministic: null
reload_dataloaders_every_n_epochs: 0
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: false
plugins: null
amp_backend: native
amp_level: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle
inference_mode: true
model:
class_path: vocos.experiment.MiMoWavLMVAEExp
init_args:
feature_extractor:
class_path: vocos.feature_extractors.WavLMVAEFeatures
init_args:
model_id: ckpts/wavlm-large
layer_idx: -1
freeze_model: true
latent_dim: 128
stage: 2
stage1_ckpt_path: logs/wavlmvae-mimo-librispeech-stage1_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=41_step=138000_val_loss=6.2627.ckpt
use_vae: false
use_sigma_vae: false
use_temporal_downsampling: false
apply_mask: false
mask_time_prob: 0.15
mask_time_length: 10
backbone:
class_path: vocos.models.MiMoBackbone
init_args:
d_model: 1024
decoder_attention_heads: 16
decoder_ffn_dim: 4096
sampling_rate: 16000
hop_length: 160
window_size: 640
nfft: 640
upsample: true
latent_dim: 128
decoder_layers: 24
head: null
sample_rate: 16000
initial_learning_rate: 0.0001
num_warmup_steps: 5000
mel_loss_coeff: 4.5
mrd_loss_coeff: 1.0
kl_loss_coeff: 0.0001
sr_loss_coeff: 1.0
gan_loss_coeff: 0.1
pretrain_mel_steps: 0
decay_mel_coeff: false
evaluate_utmos: true
evaluate_pesq: true
evaluate_periodicty: false
evaluate_stoi: false
evaluate_pesq_wb: false
evaluate_sim: true
data:
class_path: vocos.dataset.VocosEmiliaDataModule
init_args:
train_params:
filelist_path: data/librispeech_train_librilight_small_meidum_6k
sampling_rate: 16000
num_samples: 160000
batch_size: 8
num_workers: 8
val_params:
filelist_path: data/librispeech_test_clean
sampling_rate: 16000
num_samples: 160000
batch_size: 8
num_workers: 8
optimizer: null
lr_scheduler: null