accumulate_grad_batches: 1 base_config: '' batch_max_tokens: 4000 batch_size: 5 cfg_init: 1.0 cfg_scale: 4.0 cfg_schedule: linear check_val_every_n_epoch: 10 clip_grad_norm: 0.5 data_dir: '' datamodule_target: '' debug: false deep_speed_strategy_stage: 2 drop_last: true endless_ds: false exp_name: '' filter_args: lang: - zh - en max_spk_num: 6 speech_ratio: 0.6 gradient_clip_val: 1.0 indexed_ds: true infer: false infer_exp_name: '' infer_json_path: '' inference_ckpt: '' inference_mode: nonstreaming initialize_from: '' kimia_data_state_path: datastates/zeqian_ft.datastate learning_rate: 1e-4 limit_val_batches: 100 load_opt: false log_interval: 10 logger_type: tensorboard loss: mel_loss: l1 max_epochs: 1000 max_eval_sentences: -1 max_eval_tokens: -1 max_prompt_ratio: 0.5 max_segment_cnt: 20000 max_sentences: -1 max_speech_duration: 20 max_tokens: 31250 max_training_steps: 200000 max_updates: 160000 mel_mean: -4.479605 mel_std: 3.4584913 meta_dir: null min_prompt_duration: 0.1 min_speech_duration: -1 model: dit: chunk_params: hz: 50 max_chunk: 3.0 max_chunk_history: 500000 min_chunk: 0.5 need_block_shift: true depth: 10 ffn_act_layer: gleu_tanh ffn_conv_kernel_size: 5 ffn_gated_glu: false ffn_type: vanilla_mlp hidden_size: 2048 input_size: 80 max_seq_len: 4096 mlp_ratio: 4.0 num_heads: 16 position_embedding_type: skip prompt_cfg_dropout: 0.2 rope_params: max_position_embeddings: 4096 rope_base: 10000.0 rope_interpolation_factor: 1.0 semantic_cfg_dropout: 0.15 semantic_vocab_size: 8192 use_chunk_setting: true use_rope: true position_id_start_from: 0 random_position_start: true restart_position_ids: false upsample_args: rate: 1.0 need_merge_same_speaker: true no_verlap: true normalize_mel: true num_nodes: 4 num_sanity_val_steps: 0 num_workers: 3 ode_steps: 150 optimizer_adam_beta1: 0.9 optimizer_adam_beta2: 0.98 optimizer_class: adamw pin_memory: true precision: bf16-mixed save_topk: 10 seed: 1234 shuffle: true sort_by_len: true src_sample_rate: 16000 strategy: ddp tensorboard_dir: '' test_num: 100 tgt_sample_rate: 24000 timescale: 240000 use_cfg: false use_cfg_rescale: false use_chunk_setting: true use_distributed_sampler: false val_check_interval: 2000 vocoder_ckpt: '' vocoder_config_path: '' wandb_name: '' warmup_updates: 2000 weight_decay: 0.0001 work_dir: ''