Gaphy commited on
Commit
13158d1
·
verified ·
1 Parent(s): 545b7dc

Upload 2 files

Browse files
checkpoints/trainset_hifigan/config.yaml ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ adam_b1: 0.8
3
+ adam_b2: 0.99
4
+ amp: false
5
+ audio_num_mel_bins: 80
6
+ audio_sample_rate: 16000
7
+ aux_context_window: 0
8
+ base_config:
9
+ - egs/egs_bases/tts/vocoder/hifigan.yaml
10
+ - egs/datasets/audio/emotion/base_text2mel.yaml
11
+ binarization_args:
12
+ reset_phone_dict: true
13
+ reset_word_dict: true
14
+ shuffle: true
15
+ trim_eos_bos: false
16
+ trim_sil: false
17
+ with_align: false
18
+ with_f0: true
19
+ with_f0cwt: false
20
+ with_linear: false
21
+ with_spk_embed: false
22
+ with_spk_id: true
23
+ with_txt: false
24
+ with_wav: true
25
+ with_word: false
26
+ binarizer_cls: data_gen.tts.base_binarizer_emotion.EmotionBinarizer
27
+ binary_data_dir: data/binary/training_set
28
+ check_val_every_n_epoch: 10
29
+ clip_grad_norm: 1
30
+ clip_grad_value: 0
31
+ debug: false
32
+ dec_ffn_kernel_size: 9
33
+ dec_layers: 4
34
+ dict_dir: ''
35
+ disc_start_steps: 40000
36
+ discriminator_grad_norm: 1
37
+ discriminator_optimizer_params:
38
+ lr: 0.0002
39
+ discriminator_scheduler_params:
40
+ gamma: 0.999
41
+ step_size: 600
42
+ dropout: 0.1
43
+ ds_workers: 1
44
+ enc_ffn_kernel_size: 9
45
+ enc_layers: 4
46
+ endless_ds: true
47
+ ffn_act: gelu
48
+ ffn_padding: SAME
49
+ fft_size: 1024
50
+ fmax: 7600
51
+ fmin: 80
52
+ frames_multiple: 1
53
+ gen_dir_name: ''
54
+ generator_grad_norm: 10
55
+ generator_optimizer_params:
56
+ lr: 0.0002
57
+ generator_scheduler_params:
58
+ gamma: 0.999
59
+ step_size: 600
60
+ griffin_lim_iters: 60
61
+ hidden_size: 256
62
+ hop_size: 256
63
+ infer: false
64
+ lambda_adv: 1.0
65
+ lambda_cdisc: 4.0
66
+ lambda_mel: 5.0
67
+ lambda_mel_adv: 1.0
68
+ load_ckpt: ''
69
+ loud_norm: false
70
+ lr: 2.0
71
+ max_epochs: 1000
72
+ max_frames: 1548
73
+ max_input_tokens: 1550
74
+ max_samples: 8192
75
+ max_sentences: 24
76
+ max_tokens: 30000
77
+ max_updates: 1000000
78
+ max_valid_sentences: 1
79
+ max_valid_tokens: 60000
80
+ mel_loss: ssim:0.5|l1:0.5
81
+ mel_vmax: 1.5
82
+ mel_vmin: -6
83
+ min_frames: 128
84
+ min_level_db: -100
85
+ num_ckpt_keep: 3
86
+ num_heads: 2
87
+ num_mels: 80
88
+ num_sanity_val_steps: -1
89
+ num_spk: 10
90
+ num_test_samples: 30
91
+ num_valid_plots: 10
92
+ optimizer_adam_beta1: 0.9
93
+ optimizer_adam_beta2: 0.98
94
+ out_wav_norm: false
95
+ pitch_extractor: parselmouth
96
+ pitch_type: frame
97
+ pre_align_args:
98
+ allow_no_txt: false
99
+ denoise: false
100
+ sox_resample: false
101
+ sox_to_wav: false
102
+ trim_sil: false
103
+ txt_processor: en
104
+ use_tone: true
105
+ pre_align_cls: egs.datasets.audio.emotion.pre_align.EmoPreAlign
106
+ print_nan_grads: false
107
+ processed_data_dir: data/processed/emotion,data/processed/LibriTTS
108
+ profile_infer: false
109
+ raw_data_dir: data/raw/ESD
110
+ ref_level_db: 20
111
+ rename_tmux: true
112
+ resblock: '1'
113
+ resblock_dilation_sizes:
114
+ - - 1
115
+ - 3
116
+ - 5
117
+ - - 1
118
+ - 3
119
+ - 5
120
+ - - 1
121
+ - 3
122
+ - 5
123
+ resblock_kernel_sizes:
124
+ - 3
125
+ - 7
126
+ - 11
127
+ resume_from_checkpoint: 0
128
+ save_best: true
129
+ save_codes: []
130
+ save_f0: false
131
+ save_gt: true
132
+ scheduler: rsqrt
133
+ seed: 1234
134
+ sort_by_len: true
135
+ task_cls: tasks.vocoder.hifigan.HifiGanTask
136
+ tb_log_interval: 100
137
+ test_ids: []
138
+ test_input_dir: ''
139
+ test_num: 200
140
+ test_set_name: test
141
+ train_set_name: train
142
+ train_sets: ''
143
+ upsample_initial_channel: 512
144
+ upsample_kernel_sizes:
145
+ - 16
146
+ - 16
147
+ - 4
148
+ - 4
149
+ upsample_rates:
150
+ - 8
151
+ - 8
152
+ - 2
153
+ - 2
154
+ use_cdisc: false
155
+ use_cond_disc: false
156
+ use_emotion: true
157
+ use_fm_loss: false
158
+ use_ms_stft: false
159
+ use_pitch_embed: false
160
+ use_spec_disc: false
161
+ use_spk_embed: false
162
+ use_spk_id: true
163
+ use_split_spk_id: false
164
+ val_check_interval: 2000
165
+ valid_infer_interval: 2000
166
+ valid_monitor_key: val_loss
167
+ valid_monitor_mode: min
168
+ valid_set_name: valid
169
+ vocoder: pwg
170
+ vocoder_ckpt: ''
171
+ vocoder_denoise_c: 0.0
172
+ warmup_updates: 8000
173
+ weight_decay: 0
174
+ win_length: null
175
+ win_size: 1024
176
+ window: hann
177
+ word_size: 30000
178
+ work_dir: checkpoints/trainset_hifigan
checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2577919899400a111ef42a2aba65797d282c259d083d2c276539dda9d17870
3
+ size 1016199247