yhaha commited on
Commit
a75b2a4
·
verified ·
1 Parent(s): 033a22d

Add files using upload-large-folder tool

Browse files
WavCube-pro/checkpoints/vocos_checkpoint_epoch=34_step=200000_val_loss=3.2140.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be60dc837b1c90f4717d4b87a0616295785087b683212b27d8883ef1e94f684
3
+ size 11575027210
WavCube-pro/config.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.8.6
2
+ seed_everything: 4444
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k
8
+ name: first
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ logdir: null
15
+ comment: ''
16
+ purge_step: null
17
+ max_queue: 10
18
+ flush_secs: 120
19
+ filename_suffix: ''
20
+ write_to_disk: true
21
+ comet_config:
22
+ disabled: true
23
+ enable_checkpointing: true
24
+ callbacks:
25
+ - class_path: pytorch_lightning.callbacks.LearningRateMonitor
26
+ init_args:
27
+ logging_interval: null
28
+ log_momentum: false
29
+ - class_path: pytorch_lightning.callbacks.ModelSummary
30
+ init_args:
31
+ max_depth: 2
32
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
33
+ init_args:
34
+ dirpath: null
35
+ filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
36
+ monitor: val_loss
37
+ verbose: false
38
+ save_last: true
39
+ save_top_k: -1
40
+ save_weights_only: false
41
+ mode: min
42
+ auto_insert_metric_name: true
43
+ every_n_train_steps: 1000
44
+ train_time_interval: null
45
+ every_n_epochs: null
46
+ save_on_train_epoch_end: null
47
+ - class_path: vocos.helpers.GradNormCallback
48
+ default_root_dir: null
49
+ gradient_clip_val: null
50
+ gradient_clip_algorithm: null
51
+ num_nodes: 8
52
+ num_processes: null
53
+ devices: '8'
54
+ gpus: null
55
+ auto_select_gpus: false
56
+ tpu_cores: null
57
+ ipus: null
58
+ enable_progress_bar: true
59
+ overfit_batches: 0.0
60
+ track_grad_norm: -1
61
+ check_val_every_n_epoch: 1
62
+ fast_dev_run: false
63
+ accumulate_grad_batches: null
64
+ max_epochs: null
65
+ min_epochs: null
66
+ max_steps: 1000000
67
+ min_steps: null
68
+ max_time: null
69
+ limit_train_batches: null
70
+ limit_val_batches: 100
71
+ limit_test_batches: null
72
+ limit_predict_batches: null
73
+ val_check_interval: null
74
+ log_every_n_steps: 50
75
+ accelerator: gpu
76
+ strategy: ddp
77
+ sync_batchnorm: false
78
+ precision: 32
79
+ enable_model_summary: true
80
+ num_sanity_val_steps: 2
81
+ resume_from_checkpoint: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=11_step=70000_val_loss=3.5162.ckpt
82
+ profiler: null
83
+ benchmark: null
84
+ deterministic: null
85
+ reload_dataloaders_every_n_epochs: 0
86
+ auto_lr_find: false
87
+ replace_sampler_ddp: true
88
+ detect_anomaly: false
89
+ auto_scale_batch_size: false
90
+ plugins: null
91
+ amp_backend: native
92
+ amp_level: null
93
+ move_metrics_to_cpu: false
94
+ multiple_trainloader_mode: max_size_cycle
95
+ inference_mode: true
96
+ model:
97
+ class_path: vocos.experiment.MiMoWavLMVAEExp
98
+ init_args:
99
+ feature_extractor:
100
+ class_path: vocos.feature_extractors.WavLMVAEFeatures
101
+ init_args:
102
+ model_id: ckpts/wavlm-large
103
+ layer_idx: -1
104
+ freeze_model: true
105
+ latent_dim: 128
106
+ stage: 2
107
+ stage1_ckpt_path: logs/wavlmvae-mimo-librispeech-stage1_kl1e-4_ae_300mdeco_6k/first/version_2/checkpoints/vocos_checkpoint_epoch=41_step=138000_val_loss=6.2627.ckpt
108
+ use_vae: false
109
+ use_sigma_vae: false
110
+ use_temporal_downsampling: false
111
+ apply_mask: false
112
+ mask_time_prob: 0.15
113
+ mask_time_length: 10
114
+ backbone:
115
+ class_path: vocos.models.MiMoBackbone
116
+ init_args:
117
+ d_model: 1024
118
+ decoder_attention_heads: 16
119
+ decoder_ffn_dim: 4096
120
+ sampling_rate: 16000
121
+ hop_length: 160
122
+ window_size: 640
123
+ nfft: 640
124
+ upsample: true
125
+ latent_dim: 128
126
+ decoder_layers: 24
127
+ head: null
128
+ sample_rate: 16000
129
+ initial_learning_rate: 0.0001
130
+ num_warmup_steps: 5000
131
+ mel_loss_coeff: 4.5
132
+ mrd_loss_coeff: 1.0
133
+ kl_loss_coeff: 0.0001
134
+ sr_loss_coeff: 1.0
135
+ gan_loss_coeff: 0.1
136
+ pretrain_mel_steps: 0
137
+ decay_mel_coeff: false
138
+ evaluate_utmos: true
139
+ evaluate_pesq: true
140
+ evaluate_periodicty: false
141
+ evaluate_stoi: false
142
+ evaluate_pesq_wb: false
143
+ evaluate_sim: true
144
+ data:
145
+ class_path: vocos.dataset.VocosEmiliaDataModule
146
+ init_args:
147
+ train_params:
148
+ filelist_path: data/librispeech_train_librilight_small_meidum_6k
149
+ sampling_rate: 16000
150
+ num_samples: 160000
151
+ batch_size: 8
152
+ num_workers: 8
153
+ val_params:
154
+ filelist_path: data/librispeech_test_clean
155
+ sampling_rate: 16000
156
+ num_samples: 160000
157
+ batch_size: 8
158
+ num_workers: 8
159
+ optimizer: null
160
+ lr_scheduler: null
WavCube-pro/hparams.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sample_rate: 16000
2
+ initial_learning_rate: 0.0001
3
+ num_warmup_steps: 5000
4
+ mel_loss_coeff: 4.5
5
+ mrd_loss_coeff: 1.0
6
+ pretrain_mel_steps: 0
7
+ decay_mel_coeff: false
8
+ evaluate_utmos: true
9
+ evaluate_pesq: true
10
+ evaluate_periodicty: false
11
+ evaluate_stoi: false
12
+ evaluate_pesq_wb: false
13
+ evaluate_sim: true
14
+ kl_loss_coeff: 0.0001
15
+ sr_loss_coeff: 1.0
16
+ gan_loss_coeff: 0.1
WavCube/checkpoints/vocos_checkpoint_epoch=177_step=195000_val_loss=3.3080.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba70c7d19271036b1a2cc5efaba3470fe81eced7b9cff3803515032831f5d609
3
+ size 11574970721
WavCube/config.yaml ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.8.6
2
+ seed_everything: 4444
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs/wavlmvae-mimo-librispeech-stage2_kl1e-4_ae_300mdeco
8
+ name: first
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ logdir: null
15
+ comment: ''
16
+ purge_step: null
17
+ max_queue: 10
18
+ flush_secs: 120
19
+ filename_suffix: ''
20
+ write_to_disk: true
21
+ comet_config:
22
+ disabled: true
23
+ enable_checkpointing: true
24
+ callbacks:
25
+ - class_path: pytorch_lightning.callbacks.LearningRateMonitor
26
+ init_args:
27
+ logging_interval: null
28
+ log_momentum: false
29
+ - class_path: pytorch_lightning.callbacks.ModelSummary
30
+ init_args:
31
+ max_depth: 2
32
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
33
+ init_args:
34
+ dirpath: null
35
+ filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
36
+ monitor: val_loss
37
+ verbose: false
38
+ save_last: true
39
+ save_top_k: -1
40
+ save_weights_only: false
41
+ mode: min
42
+ auto_insert_metric_name: true
43
+ every_n_train_steps: 5000
44
+ train_time_interval: null
45
+ every_n_epochs: null
46
+ save_on_train_epoch_end: null
47
+ - class_path: vocos.helpers.GradNormCallback
48
+ default_root_dir: null
49
+ gradient_clip_val: null
50
+ gradient_clip_algorithm: null
51
+ num_nodes: 8
52
+ num_processes: null
53
+ devices: '8'
54
+ gpus: null
55
+ auto_select_gpus: false
56
+ tpu_cores: null
57
+ ipus: null
58
+ enable_progress_bar: true
59
+ overfit_batches: 0.0
60
+ track_grad_norm: -1
61
+ check_val_every_n_epoch: 1
62
+ fast_dev_run: false
63
+ accumulate_grad_batches: null
64
+ max_epochs: null
65
+ min_epochs: null
66
+ max_steps: 1000000
67
+ min_steps: null
68
+ max_time: null
69
+ limit_train_batches: null
70
+ limit_val_batches: 100
71
+ limit_test_batches: null
72
+ limit_predict_batches: null
73
+ val_check_interval: null
74
+ log_every_n_steps: 50
75
+ accelerator: gpu
76
+ strategy: ddp
77
+ sync_batchnorm: false
78
+ precision: 32
79
+ enable_model_summary: true
80
+ num_sanity_val_steps: 2
81
+ resume_from_checkpoint: null
82
+ profiler: null
83
+ benchmark: null
84
+ deterministic: null
85
+ reload_dataloaders_every_n_epochs: 0
86
+ auto_lr_find: false
87
+ replace_sampler_ddp: true
88
+ detect_anomaly: false
89
+ auto_scale_batch_size: false
90
+ plugins: null
91
+ amp_backend: native
92
+ amp_level: null
93
+ move_metrics_to_cpu: false
94
+ multiple_trainloader_mode: max_size_cycle
95
+ inference_mode: true
96
+ model:
97
+ class_path: vocos.experiment.MiMoWavLMVAEExp
98
+ init_args:
99
+ feature_extractor:
100
+ class_path: vocos.feature_extractors.WavLMVAEFeatures
101
+ init_args:
102
+ model_id: ckpts/wavlm-large
103
+ layer_idx: -1
104
+ freeze_model: true
105
+ latent_dim: 128
106
+ stage: 2
107
+ stage1_ckpt_path: logs/wavlmvae-mimo-librispeech-stage1_kl1e-4_ae_300mdeco/first/version_2/checkpoints/vocos_checkpoint_epoch=219_step=138000_val_loss=6.6572.ckpt
108
+ use_vae: false
109
+ use_sigma_vae: false
110
+ use_temporal_downsampling: false
111
+ backbone:
112
+ class_path: vocos.models.MiMoBackbone
113
+ init_args:
114
+ d_model: 1024
115
+ decoder_attention_heads: 16
116
+ decoder_ffn_dim: 4096
117
+ sampling_rate: 16000
118
+ hop_length: 160
119
+ window_size: 640
120
+ nfft: 640
121
+ upsample: true
122
+ latent_dim: 128
123
+ decoder_layers: 24
124
+ head: null
125
+ sample_rate: 16000
126
+ initial_learning_rate: 0.0001
127
+ num_warmup_steps: 5000
128
+ mel_loss_coeff: 4.5
129
+ mrd_loss_coeff: 1.0
130
+ kl_loss_coeff: 0.0001
131
+ sr_loss_coeff: 1.0
132
+ gan_loss_coeff: 0.1
133
+ pretrain_mel_steps: 0
134
+ decay_mel_coeff: false
135
+ evaluate_utmos: true
136
+ evaluate_pesq: true
137
+ evaluate_periodicty: false
138
+ evaluate_stoi: false
139
+ evaluate_pesq_wb: false
140
+ evaluate_sim: true
141
+ data:
142
+ class_path: vocos.dataset.VocosDataModule
143
+ init_args:
144
+ train_params:
145
+ filelist_path: data/librispeech_train
146
+ sampling_rate: 16000
147
+ num_samples: 160000
148
+ batch_size: 8
149
+ num_workers: 8
150
+ val_params:
151
+ filelist_path: data/librispeech_test_clean
152
+ sampling_rate: 16000
153
+ num_samples: 160000
154
+ batch_size: 8
155
+ num_workers: 8
156
+ optimizer: null
157
+ lr_scheduler: null
WavCube/hparams.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sample_rate: 16000
2
+ initial_learning_rate: 0.0001
3
+ num_warmup_steps: 5000
4
+ mel_loss_coeff: 4.5
5
+ mrd_loss_coeff: 1.0
6
+ pretrain_mel_steps: 0
7
+ decay_mel_coeff: false
8
+ evaluate_utmos: true
9
+ evaluate_pesq: true
10
+ evaluate_periodicty: false
11
+ evaluate_stoi: false
12
+ evaluate_pesq_wb: false
13
+ evaluate_sim: true
14
+ kl_loss_coeff: 0.0001
15
+ sr_loss_coeff: 1.0
16
+ gan_loss_coeff: 0.1