| |
| seed_everything: 4444 |
| trainer: |
| logger: |
| class_path: pytorch_lightning.loggers.TensorBoardLogger |
| init_args: |
| save_dir: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k |
| name: first |
| version: null |
| log_graph: false |
| default_hp_metric: true |
| prefix: '' |
| sub_dir: null |
| logdir: null |
| comment: '' |
| purge_step: null |
| max_queue: 10 |
| flush_secs: 120 |
| filename_suffix: '' |
| write_to_disk: true |
| comet_config: |
| disabled: true |
| enable_checkpointing: true |
| callbacks: |
| - class_path: pytorch_lightning.callbacks.LearningRateMonitor |
| init_args: |
| logging_interval: null |
| log_momentum: false |
| - class_path: pytorch_lightning.callbacks.ModelSummary |
| init_args: |
| max_depth: 2 |
| - class_path: pytorch_lightning.callbacks.ModelCheckpoint |
| init_args: |
| dirpath: null |
| filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f} |
| monitor: val_loss |
| verbose: false |
| save_last: true |
| save_top_k: -1 |
| save_weights_only: false |
| mode: min |
| auto_insert_metric_name: true |
| every_n_train_steps: 1000 |
| train_time_interval: null |
| every_n_epochs: null |
| save_on_train_epoch_end: null |
| - class_path: vocos.helpers.GradNormCallback |
| default_root_dir: null |
| gradient_clip_val: null |
| gradient_clip_algorithm: null |
| num_nodes: 8 |
| num_processes: null |
| devices: '8' |
| gpus: null |
| auto_select_gpus: false |
| tpu_cores: null |
| ipus: null |
| enable_progress_bar: true |
| overfit_batches: 0.0 |
| track_grad_norm: -1 |
| check_val_every_n_epoch: 1 |
| fast_dev_run: false |
| accumulate_grad_batches: null |
| max_epochs: null |
| min_epochs: null |
| max_steps: 1000000 |
| min_steps: null |
| max_time: null |
| limit_train_batches: null |
| limit_val_batches: 100 |
| limit_test_batches: null |
| limit_predict_batches: null |
| val_check_interval: null |
| log_every_n_steps: 50 |
| accelerator: gpu |
| strategy: ddp |
| sync_batchnorm: false |
| precision: 32 |
| enable_model_summary: true |
| num_sanity_val_steps: 2 |
| resume_from_checkpoint: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=11_step=70000_val_loss=3.5162.ckpt |
| profiler: null |
| benchmark: null |
| deterministic: null |
| reload_dataloaders_every_n_epochs: 0 |
| auto_lr_find: false |
| replace_sampler_ddp: true |
| detect_anomaly: false |
| auto_scale_batch_size: false |
| plugins: null |
| amp_backend: native |
| amp_level: null |
| move_metrics_to_cpu: false |
| multiple_trainloader_mode: max_size_cycle |
| inference_mode: true |
| model: |
| class_path: vocos.experiment.MiMoWavLMVAEExp |
| init_args: |
| feature_extractor: |
| class_path: vocos.feature_extractors.WavLMVAEFeatures |
| init_args: |
| model_id: ckpts/wavlm-large |
| layer_idx: -1 |
| freeze_model: true |
| latent_dim: 128 |
| stage: 2 |
| stage1_ckpt_path: logs/wavlmvae-mimo-librispeech-stage1_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=41_step=138000_val_loss=6.2627.ckpt |
| use_vae: false |
| use_sigma_vae: false |
| use_temporal_downsampling: false |
| apply_mask: false |
| mask_time_prob: 0.15 |
| mask_time_length: 10 |
| backbone: |
| class_path: vocos.models.MiMoBackbone |
| init_args: |
| d_model: 1024 |
| decoder_attention_heads: 16 |
| decoder_ffn_dim: 4096 |
| sampling_rate: 16000 |
| hop_length: 160 |
| window_size: 640 |
| nfft: 640 |
| upsample: true |
| latent_dim: 128 |
| decoder_layers: 24 |
| head: null |
| sample_rate: 16000 |
| initial_learning_rate: 0.0001 |
| num_warmup_steps: 5000 |
| mel_loss_coeff: 4.5 |
| mrd_loss_coeff: 1.0 |
| kl_loss_coeff: 0.0001 |
| sr_loss_coeff: 1.0 |
| gan_loss_coeff: 0.1 |
| pretrain_mel_steps: 0 |
| decay_mel_coeff: false |
| evaluate_utmos: true |
| evaluate_pesq: true |
| evaluate_periodicty: false |
| evaluate_stoi: false |
| evaluate_pesq_wb: false |
| evaluate_sim: true |
| data: |
| class_path: vocos.dataset.VocosEmiliaDataModule |
| init_args: |
| train_params: |
| filelist_path: data/librispeech_train_librilight_small_meidum_6k |
| sampling_rate: 16000 |
| num_samples: 160000 |
| batch_size: 8 |
| num_workers: 8 |
| val_params: |
| filelist_path: data/librispeech_test_clean |
| sampling_rate: 16000 |
| num_samples: 160000 |
| batch_size: 8 |
| num_workers: 8 |
| optimizer: null |
| lr_scheduler: null |
|
|