accumulate_grad_batches: 1
base_config: ''
batch_max_tokens: 4000
batch_size: 5
cfg_init: 1.0
cfg_scale: 4.0
cfg_schedule: linear
check_val_every_n_epoch: 10
clip_grad_norm: 0.5
data_dir: ''
datamodule_target: ''
debug: false
deep_speed_strategy_stage: 2
drop_last: true
endless_ds: false
exp_name: ''
filter_args:
  lang:
  - zh
  - en
  max_spk_num: 6
  speech_ratio: 0.6
gradient_clip_val: 1.0
indexed_ds: true
infer: false
infer_exp_name: ''
infer_json_path: ''
inference_ckpt: ''
inference_mode: nonstreaming
initialize_from: ''
kimia_data_state_path: datastates/zeqian_ft.datastate
learning_rate: 1e-4
limit_val_batches: 100
load_opt: false
log_interval: 10
logger_type: tensorboard
loss:
  mel_loss: l1
max_epochs: 1000
max_eval_sentences: -1
max_eval_tokens: -1
max_prompt_ratio: 0.5
max_segment_cnt: 20000
max_sentences: -1
max_speech_duration: 20
max_tokens: 31250
max_training_steps: 200000
max_updates: 160000
mel_mean: -4.479605
mel_std: 3.4584913
meta_dir: null
min_prompt_duration: 0.1
min_speech_duration: -1
model:
  dit:
    chunk_params:
      hz: 50
      max_chunk: 3.0
      max_chunk_history: 500000
      min_chunk: 0.5
      need_block_shift: true
    depth: 10
    ffn_act_layer: gleu_tanh
    ffn_conv_kernel_size: 5
    ffn_gated_glu: false
    ffn_type: vanilla_mlp
    hidden_size: 2048
    input_size: 80
    max_seq_len: 4096
    mlp_ratio: 4.0
    num_heads: 16
    position_embedding_type: skip
    prompt_cfg_dropout: 0.2
    rope_params:
      max_position_embeddings: 4096
      rope_base: 10000.0
      rope_interpolation_factor: 1.0
    semantic_cfg_dropout: 0.15
    semantic_vocab_size: 8192
    use_chunk_setting: true
    use_rope: true
  position_id_start_from: 0
  random_position_start: true
  restart_position_ids: false
  upsample_args:
    rate: 1.0
need_merge_same_speaker: true
no_verlap: true
normalize_mel: true
num_nodes: 4
num_sanity_val_steps: 0
num_workers: 3
ode_steps: 150
optimizer_adam_beta1: 0.9
optimizer_adam_beta2: 0.98
optimizer_class: adamw
pin_memory: true
precision: bf16-mixed
save_topk: 10
seed: 1234
shuffle: true
sort_by_len: true
src_sample_rate: 16000
strategy: ddp
tensorboard_dir: ''
test_num: 100
tgt_sample_rate: 24000
timescale: 240000
use_cfg: false
use_cfg_rescale: false
use_chunk_setting: true
use_distributed_sampler: false
val_check_interval: 2000
vocoder_ckpt: ''
vocoder_config_path: ''
wandb_name: ''
warmup_updates: 2000
weight_decay: 0.0001
work_dir: ''