shethjenil
/

StyleTTS

Model card Files Files and versions

shethjenil commited on about 24 hours ago

Commit

b0cf56e

·

verified ·

1 Parent(s): 71d986e

Create mini_config.yaml

Files changed (1) hide show

mini_config.yaml +105 -0

mini_config.yaml ADDED Viewed

	@@ -0,0 +1,105 @@

+log_dir: "Models/Output"
+save_freq: 5
+log_interval: 10
+device: "cuda"
+epochs: 50
+batch_size: 8
+max_len: 400
+pretrained_model: ""
+second_stage_load_pretrained: true
+load_only_params: true
+external_models:
+  asr:
+    input_dim: 80
+    hidden_dim: 256
+    n_token: 178
+  plbert:
+    vocab_size: 178
+    hidden_size: 768
+    num_attention_heads: 12
+    intermediate_size: 2048
+    dropout: 0.1
+data_params:
+  train_data: "shethjenil/audiodata"
+  root_path: ""
+  min_length: 50
+preprocess_params:
+  sr: 24000
+  n_fft: 2048
+  win_length: 1200
+  hop_length: 300
+model_params:
+  multispeaker: true
+  dim_in: 64
+  hidden_dim: 128
+  max_conv_dim: 512
+  n_layer: 2
+  n_mels: 80
+  n_token: 178
+  max_dur: 50
+  style_dim: 128
+  dropout: 0.2
+  decoder:
+    type: "istftnet"
+    hidden_dim: 256
+    decoder_out_dim: 256
+    asr_res_in: 128
+    resblock_kernel_sizes: [3, 3]
+    upsample_rates: [10, 6]
+    upsample_initial_channel: 256
+    resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5], [1, 3, 5]]
+    upsample_kernel_sizes: [20, 12]
+    gen_istft_n_fft: 20
+    gen_istft_hop_size: 5
+    disable_complex: true
+  slm:
+    model: "microsoft/wavlm-base-plus"
+    sr: 16000
+    hidden: 768
+    nlayers: 13
+    initial_channel: 64
+  diffusion:
+    embedding_mask_proba: 0.1
+    transformer:
+      num_layers: 3
+      num_heads: 8
+      head_features: 64
+      multiplier: 2
+    dist:
+      sigma_data: 0.2
+      estimate_sigma_data: true
+      mean: -3.0
+      std: 1.0
+loss_params:
+  lambda_mel: 5.0
+  lambda_gen: 1.0
+  lambda_slm: 1.0
+  lambda_mono: 1.0
+  lambda_s2s: 1.0
+  lambda_f0: 1.0
+  lambda_norm: 1.0
+  lambda_dur: 1.0
+  lambda_ce: 20.0
+  lambda_sty: 1.0
+  lambda_diff: 1.0
+  diff_epoch: 10
+  joint_epoch: 30
+optimizer_params:
+  lr: 0.0001
+  bert_lr: 0.00001
+  ft_lr: 0.0001
+slmadv_params:
+  min_len: 400
+  max_len: 500
+  batch_percentage: 0.5
+  iter: 10
+  thresh: 5.0
+  scale: 0.01
+  sig: 1.5