Text-to-Speech
F5-TTS
Ewe
tts
open-bible
ewe
luel commited on
Commit
b8bd5ae
·
verified ·
1 Parent(s): d416a46

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. F5TTS_v1_Base_Open_Bible_Ewe.yaml +55 -0
  2. model_last.pt +3 -0
  3. vocab.txt +125 -0
F5TTS_v1_Base_Open_Bible_Ewe.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: open-bible-ewe # dataset name
7
+ batch_size_per_gpu: 28000 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 32 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 4
11
+
12
+ optim:
13
+ epochs: 627
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_v1_Base # model name
22
+ tokenizer: custom # tokenizer type
23
+ tokenizer_path: data/open-bible-ewe_custom/vocab.txt # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 1024
27
+ depth: 22
28
+ heads: 16
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: True
32
+ qk_norm: null # null | rms_norm
33
+ conv_layers: 4
34
+ pe_attn_head: null
35
+ attn_backend: torch # torch | flash_attn
36
+ attn_mask_enabled: False
37
+ checkpoint_activations: False # recompute activations and save memory for extra compute
38
+ mel_spec:
39
+ target_sample_rate: 24000
40
+ n_mel_channels: 100
41
+ hop_length: 256
42
+ win_length: 1024
43
+ n_fft: 1024
44
+ mel_spec_type: vocos # vocos | bigvgan
45
+ vocoder:
46
+ is_local: False # use local offline ckpt or not
47
+ local_path: null # local vocoder path
48
+
49
+ ckpts:
50
+ logger: wandb #| tensorboard | null
51
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
52
+ save_per_updates: 10000 # save checkpoint per updates
53
+ keep_last_n_checkpoints: 5 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
54
+ last_per_updates: 5000 # save last checkpoint per updates
55
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
model_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9551dee25e23db90561f8513632b57a82915412fb519e4d1adc680f0dae4b5c
3
+ size 5374368419
vocab.txt ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ '
5
+ (
6
+ )
7
+ ,
8
+ -
9
+ .
10
+ 0
11
+ 1
12
+ 2
13
+ 3
14
+ 4
15
+ 5
16
+ 6
17
+ 7
18
+ 8
19
+ 9
20
+ :
21
+ ?
22
+ A
23
+ B
24
+ D
25
+ E
26
+ F
27
+ G
28
+ H
29
+ I
30
+ K
31
+ L
32
+ M
33
+ N
34
+ O
35
+ P
36
+ Q
37
+ R
38
+ S
39
+ T
40
+ U
41
+ V
42
+ W
43
+ X
44
+ Y
45
+ Z
46
+ [
47
+ ]
48
+ a
49
+ b
50
+ d
51
+ e
52
+ f
53
+ g
54
+ h
55
+ i
56
+ k
57
+ l
58
+ m
59
+ n
60
+ o
61
+ p
62
+ r
63
+ s
64
+ t
65
+ u
66
+ v
67
+ w
68
+ x
69
+ y
70
+ z
71
+ À
72
+ È
73
+ à
74
+ á
75
+ ã
76
+ è
77
+ é
78
+ ì
79
+ í
80
+ ò
81
+ ó
82
+ õ
83
+ ĩ
84
+ Ŋ
85
+ ŋ
86
+ ũ
87
+ Ɔ
88
+ Ɖ
89
+ Ɛ
90
+ Ƒ
91
+ ƒ
92
+ Ɣ
93
+ Ʋ
94
+ ɔ
95
+ ɖ
96
+ ɛ
97
+ ɣ
98
+ ʋ
99
+ ́
100
+ ̃
101
+ ב
102
+ ג
103
+ ד
104
+ ה
105
+ ו
106
+ ז
107
+ ח
108
+ ט
109
+ י
110
+ כ
111
+ ל
112
+ מ
113
+ נ
114
+ ס
115
+ ע
116
+ פ
117
+ צ
118
+ ק
119
+ ש
120
+ ת
121
+
122
+
123
+
124
+
125
+