mooncast
/

audio_detokenizer

Model card Files Files and versions

audio_detokenizer / config.yaml

mrfakename's picture

Upload folder using huggingface_hub

c943555 verified about 1 year ago

history blame contribute delete

2.45 kB

	accumulate_grad_batches: 1
	base_config: ''
	batch_max_tokens: 4000
	batch_size: 5
	cfg_init: 1.0
	cfg_scale: 4.0
	cfg_schedule: linear
	check_val_every_n_epoch: 10
	clip_grad_norm: 0.5
	data_dir: ''
	datamodule_target: ''
	debug: false
	deep_speed_strategy_stage: 2
	drop_last: true
	endless_ds: false
	exp_name: ''
	filter_args:
	lang:
	- zh
	- en
	max_spk_num: 6
	speech_ratio: 0.6
	gradient_clip_val: 1.0
	indexed_ds: true
	infer: false
	infer_exp_name: ''
	infer_json_path: ''
	inference_ckpt: ''
	inference_mode: nonstreaming
	initialize_from: ''
	kimia_data_state_path: datastates/zeqian_ft.datastate
	learning_rate: 1e-4
	limit_val_batches: 100
	load_opt: false
	log_interval: 10
	logger_type: tensorboard
	loss:
	mel_loss: l1
	max_epochs: 1000
	max_eval_sentences: -1
	max_eval_tokens: -1
	max_prompt_ratio: 0.5
	max_segment_cnt: 20000
	max_sentences: -1
	max_speech_duration: 20
	max_tokens: 31250
	max_training_steps: 200000
	max_updates: 160000
	mel_mean: -4.479605
	mel_std: 3.4584913
	meta_dir: null
	min_prompt_duration: 0.1
	min_speech_duration: -1
	model:
	dit:
	chunk_params:
	hz: 50
	max_chunk: 3.0
	max_chunk_history: 500000
	min_chunk: 0.5
	need_block_shift: true
	depth: 10
	ffn_act_layer: gleu_tanh
	ffn_conv_kernel_size: 5
	ffn_gated_glu: false
	ffn_type: vanilla_mlp
	hidden_size: 2048
	input_size: 80
	max_seq_len: 4096
	mlp_ratio: 4.0
	num_heads: 16
	position_embedding_type: skip
	prompt_cfg_dropout: 0.2
	rope_params:
	max_position_embeddings: 4096
	rope_base: 10000.0
	rope_interpolation_factor: 1.0
	semantic_cfg_dropout: 0.15
	semantic_vocab_size: 8192
	use_chunk_setting: true
	use_rope: true
	position_id_start_from: 0
	random_position_start: true
	restart_position_ids: false
	upsample_args:
	rate: 1.0
	need_merge_same_speaker: true
	no_verlap: true
	normalize_mel: true
	num_nodes: 4
	num_sanity_val_steps: 0
	num_workers: 3
	ode_steps: 150
	optimizer_adam_beta1: 0.9
	optimizer_adam_beta2: 0.98
	optimizer_class: adamw
	pin_memory: true
	precision: bf16-mixed
	save_topk: 10
	seed: 1234
	shuffle: true
	sort_by_len: true
	src_sample_rate: 16000
	strategy: ddp
	tensorboard_dir: ''
	test_num: 100
	tgt_sample_rate: 24000
	timescale: 240000
	use_cfg: false
	use_cfg_rescale: false
	use_chunk_setting: true
	use_distributed_sampler: false
	val_check_interval: 2000
	vocoder_ckpt: ''
	vocoder_config_path: ''
	wandb_name: ''
	warmup_updates: 2000
	weight_decay: 0.0001
	work_dir: ''