LH-Tech-AI commited on
Commit
ea3793d
·
verified ·
1 Parent(s): 3203de4

Create train_vocoder.py

Browse files
Files changed (1) hide show
  1. train_vocoder.py +102 -0
train_vocoder.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # train_vocoder.py
2
+ import os
3
+ from trainer import Trainer, TrainerArgs
4
+ from TTS.utils.audio import AudioProcessor
5
+ from TTS.config.shared_configs import BaseAudioConfig
6
+ from TTS.vocoder.configs import HifiganConfig
7
+ from TTS.vocoder.datasets.preprocess import load_wav_data
8
+ from TTS.vocoder.models.gan import GAN
9
+
10
+
11
+ def main():
12
+ output_path = os.path.dirname(os.path.abspath(__file__))
13
+ data_path = os.path.join(output_path, "LJSpeech-1.1/wavs/")
14
+
15
+ audio_config = BaseAudioConfig(
16
+ sample_rate=22050,
17
+ resample=False,
18
+ do_trim_silence=True,
19
+ trim_db=45,
20
+
21
+ fft_size=1024,
22
+ win_length=1024,
23
+ hop_length=256,
24
+ frame_shift_ms=None,
25
+ frame_length_ms=None,
26
+
27
+ num_mels=80,
28
+ mel_fmin=0.0,
29
+ mel_fmax=None,
30
+
31
+ signal_norm=True,
32
+ symmetric_norm=True,
33
+ max_norm=4.0,
34
+ clip_norm=True,
35
+ ref_level_db=20,
36
+ min_level_db=-100,
37
+ spec_gain=20.0,
38
+ log_func="np.log10",
39
+ preemphasis=0.0,
40
+
41
+ stats_path=None,
42
+ )
43
+
44
+ config = HifiganConfig(
45
+ run_name="hifigan_ljspeech",
46
+ run_description="HiFi-GAN v1 from scratch, GlowTTS-compatible mels",
47
+
48
+ data_path=data_path,
49
+ output_path=output_path,
50
+ eval_split_size=10,
51
+
52
+ audio=audio_config,
53
+
54
+ epochs=2000,
55
+ batch_size=64,
56
+ eval_batch_size=16,
57
+ num_loader_workers=4,
58
+ num_eval_loader_workers=2,
59
+ run_eval=True,
60
+ test_delay_epochs=5,
61
+ mixed_precision=True,
62
+
63
+ seq_len=8192,
64
+ pad_short=2000,
65
+ use_noise_augment=True,
66
+
67
+ lr_gen=2e-4,
68
+ lr_disc=2e-4,
69
+
70
+ print_step=50,
71
+ print_eval=False,
72
+ save_step=5000,
73
+ save_n_checkpoints=5,
74
+ save_checkpoints=True,
75
+ log_model_step=10000,
76
+ plot_step=500,
77
+ )
78
+
79
+ ap = AudioProcessor(config=config.audio)
80
+
81
+ eval_samples, train_samples = load_wav_data(
82
+ config.data_path,
83
+ config.eval_split_size,
84
+ )
85
+
86
+ model = GAN(config)
87
+
88
+ trainer = Trainer(
89
+ TrainerArgs(),
90
+ config,
91
+ output_path,
92
+ model=model,
93
+ train_samples=train_samples,
94
+ eval_samples=eval_samples,
95
+ training_assets={"audio_processor": ap},
96
+ )
97
+
98
+ trainer.fit()
99
+
100
+
101
+ if __name__ == "__main__":
102
+ main()