Text-to-Speech
F5-TTS
Luo (Kenya and Tanzania)
tts
open-bible
dholuo
luel commited on
Commit
8ce8649
·
verified ·
1 Parent(s): 92d8888

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. F5TTS_v1_Base_Open_Bible_Dholuo.yaml +55 -0
  2. model_last.pt +3 -0
  3. vocab.txt +80 -0
F5TTS_v1_Base_Open_Bible_Dholuo.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: open-bible-dholuo # dataset name
7
+ batch_size_per_gpu: 28000 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 32 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 4
11
+
12
+ optim:
13
+ epochs: 549
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_v1_Base # model name
22
+ tokenizer: custom # tokenizer type
23
+ tokenizer_path: data/open-bible-dholuo_custom/vocab.txt # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 1024
27
+ depth: 22
28
+ heads: 16
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: True
32
+ qk_norm: null # null | rms_norm
33
+ conv_layers: 4
34
+ pe_attn_head: null
35
+ attn_backend: torch # torch | flash_attn
36
+ attn_mask_enabled: False
37
+ checkpoint_activations: False # recompute activations and save memory for extra compute
38
+ mel_spec:
39
+ target_sample_rate: 24000
40
+ n_mel_channels: 100
41
+ hop_length: 256
42
+ win_length: 1024
43
+ n_fft: 1024
44
+ mel_spec_type: vocos # vocos | bigvgan
45
+ vocoder:
46
+ is_local: False # use local offline ckpt or not
47
+ local_path: null # local vocoder path
48
+
49
+ ckpts:
50
+ logger: wandb #| tensorboard | null
51
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
52
+ save_per_updates: 10000 # save checkpoint per updates
53
+ keep_last_n_checkpoints: 5 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
54
+ last_per_updates: 5000 # save last checkpoint per updates
55
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
model_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c53e18a68c0ad3138513e25c74717a3752e0803bd05df5452c9259be5cf6fa2
3
+ size 5373999779
vocab.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ '
5
+ (
6
+ )
7
+ *
8
+ +
9
+ ,
10
+ -
11
+ .
12
+ 0
13
+ 1
14
+ 2
15
+ 3
16
+ 4
17
+ 5
18
+ 6
19
+ 7
20
+ 8
21
+ 9
22
+ :
23
+ ?
24
+ A
25
+ B
26
+ C
27
+ D
28
+ E
29
+ F
30
+ G
31
+ H
32
+ I
33
+ J
34
+ K
35
+ L
36
+ M
37
+ N
38
+ O
39
+ P
40
+ R
41
+ S
42
+ T
43
+ U
44
+ V
45
+ W
46
+ Y
47
+ Z
48
+ [
49
+ \
50
+ ]
51
+ a
52
+ b
53
+ c
54
+ d
55
+ e
56
+ f
57
+ g
58
+ h
59
+ i
60
+ j
61
+ k
62
+ l
63
+ m
64
+ n
65
+ o
66
+ p
67
+ q
68
+ r
69
+ s
70
+ t
71
+ u
72
+ v
73
+ w
74
+ x
75
+ y
76
+ z
77
+ ʼ
78
+ ט
79
+ י
80
+ ר