| data_root: N/A |
| input_channels: 1 |
| input_feat_per_channel: 80 |
| multitask: |
| source_unit: |
| data: N/A |
| decoder_type: transformer |
| dict: N/A |
| encoder_layer: 6 |
| loss_weight: 8.0 |
| target_type: text |
| output_channels: 1 |
| output_feat_per_channel: 1 |
| output_feat_reduction_rate: 0 |
| output_sample_rate: 16000 |
| specaugment: |
| freq_mask_F: 27 |
| freq_mask_N: 1 |
| time_mask_N: 1 |
| time_mask_T: 100 |
| time_mask_p: 1.0 |
| time_wrap_W: 0 |
| transforms: |
| _eval: |
| - utterance_cmvn |
| _train: |
| - utterance_cmvn |
| - specaugment |
| vocoder: |
| dur_prediction: true |
| model_path: N/A |
| speaker: false |
| type: code_hifigan |
| hub: |
| input_type: fbank80_w_utt_cmvn |
| tts_model_id: facebookresearch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10 |
| unit_vocoder: true |
| generation_args: |
| beam: 10 |
| max_len_a: 1 |