| |
| seed_everything: 3407 |
| trainer: |
| logger: |
| class_path: pytorch_lightning.loggers.TensorBoardLogger |
| init_args: |
| save_dir: ./result/ |
| name: lightning_logs |
| version: null |
| log_graph: false |
| default_hp_metric: true |
| prefix: '' |
| sub_dir: null |
| logdir: null |
| comment: '' |
| purge_step: null |
| max_queue: 10 |
| flush_secs: 120 |
| filename_suffix: '' |
| write_to_disk: true |
| comet_config: |
| disabled: true |
| enable_checkpointing: true |
| callbacks: |
| - class_path: pytorch_lightning.callbacks.LearningRateMonitor |
| init_args: |
| logging_interval: null |
| log_momentum: false |
| - class_path: pytorch_lightning.callbacks.ModelSummary |
| init_args: |
| max_depth: 2 |
| - class_path: pytorch_lightning.callbacks.ModelCheckpoint |
| init_args: |
| dirpath: null |
| filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f} |
| monitor: val_loss |
| verbose: false |
| save_last: true |
| save_top_k: 10 |
| save_weights_only: false |
| mode: min |
| auto_insert_metric_name: true |
| every_n_train_steps: 1000 |
| train_time_interval: null |
| every_n_epochs: null |
| save_on_train_epoch_end: null |
| - class_path: inspiremusic.wavtokenizer.decoder.helpers.GradNormCallback |
| default_root_dir: null |
| gradient_clip_val: null |
| gradient_clip_algorithm: null |
| num_nodes: 1 |
| num_processes: null |
| devices: -1 |
| gpus: null |
| auto_select_gpus: false |
| tpu_cores: null |
| ipus: null |
| enable_progress_bar: true |
| overfit_batches: 0.0 |
| track_grad_norm: -1 |
| check_val_every_n_epoch: 1 |
| fast_dev_run: false |
| accumulate_grad_batches: null |
| max_epochs: null |
| min_epochs: null |
| max_steps: 20000000 |
| min_steps: null |
| max_time: null |
| limit_train_batches: null |
| limit_val_batches: 100 |
| limit_test_batches: null |
| limit_predict_batches: null |
| val_check_interval: null |
| log_every_n_steps: 1000 |
| accelerator: gpu |
| strategy: ddp |
| sync_batchnorm: false |
| precision: 32 |
| enable_model_summary: true |
| num_sanity_val_steps: 2 |
| resume_from_checkpoint: null |
| profiler: null |
| benchmark: null |
| deterministic: null |
| reload_dataloaders_every_n_epochs: 0 |
| auto_lr_find: false |
| replace_sampler_ddp: true |
| detect_anomaly: false |
| auto_scale_batch_size: false |
| plugins: null |
| amp_backend: native |
| amp_level: null |
| move_metrics_to_cpu: false |
| multiple_trainloader_mode: max_size_cycle |
| inference_mode: true |
| ckpt_path: null |
| data: |
| class_path: inspiremusic.wavtokenizer.decoder.dataset.VocosDataModule |
| init_args: |
| train_params: |
| filelist_path: train.scp |
| sampling_rate: 24000 |
| num_samples: 72000 |
| batch_size: 38 |
| num_workers: 8 |
| val_params: |
| filelist_path: test.scp |
| sampling_rate: 24000 |
| num_samples: 72000 |
| batch_size: 10 |
| num_workers: 8 |
| model: |
| class_path: inspiremusic.wavtokenizer.decoder.experiment.WavTokenizer |
| init_args: |
| feature_extractor: |
| class_path: inspiremusic.wavtokenizer.decoder.feature_extractors.EncodecFeatures |
| init_args: |
| encodec_model: encodec_24khz |
| bandwidths: |
| - 6.6 |
| - 6.6 |
| - 6.6 |
| - 6.6 |
| train_codebooks: true |
| num_quantizers: 1 |
| dowmsamples: |
| - 8 |
| - 5 |
| - 4 |
| - 2 |
| vq_bins: 4096 |
| vq_kmeans: 200 |
| backbone: |
| class_path: inspiremusic.wavtokenizer.decoder.models.VocosBackbone |
| init_args: |
| input_channels: 512 |
| dim: 768 |
| intermediate_dim: 2304 |
| num_layers: 12 |
| layer_scale_init_value: null |
| adanorm_num_embeddings: 4 |
| head: |
| class_path: inspiremusic.wavtokenizer.decoder.heads.ISTFTHead |
| init_args: |
| dim: 768 |
| n_fft: 1280 |
| hop_length: 320 |
| padding: same |
| resume_config: config.yaml |
| resume_model: last.ckpt |
| sample_rate: 24000 |
| initial_learning_rate: 0.0001 |
| num_warmup_steps: 0 |
| mel_loss_coeff: 45.0 |
| mrd_loss_coeff: 1.0 |
| pretrain_mel_steps: 0 |
| decay_mel_coeff: false |
| evaluate_utmos: false |
| evaluate_pesq: true |
| evaluate_periodicty: true |
| resume: true |
|
|