| from transformers import PretrainedConfig |
|
|
| class SpeechEncoderConfig(PretrainedConfig): |
| model_type = "gslm-speech-encoder" |
|
|
| def __init__( |
| self, |
| |
| hubert_backend: str = "fairseq", |
| |
| hubert_ckpt: str = "hubert_base_ls960.pt", |
| |
| hubert_hf_name: str = "facebook/hubert-base-ls960", |
| hubert_layer: int = 9, |
| expected_sample_rate: int = 16000, |
| code_hop_size: int = 320, |
| |
| quantizer_file: str = "kmeans_100.pt", |
| |
| quantizer_key: str = "", |
| |
| deduplicate: bool = True, |
| add_bos_eos: bool = False, |
| need_f0: bool = False, |
| |
| bos_id: int | None = None, |
| eos_id: int | None = None, |
| |
| feature_norm: str | None = None, |
| **kwargs, |
| ): |
| super().__init__(**kwargs) |
| self.hubert_backend = hubert_backend |
| self.hubert_ckpt = hubert_ckpt |
| self.hubert_hf_name = hubert_hf_name |
| self.hubert_layer = int(hubert_layer) |
| self.expected_sample_rate = int(expected_sample_rate) |
| self.code_hop_size = int(code_hop_size) |
|
|
| self.quantizer_file = quantizer_file |
| self.quantizer_key = quantizer_key |
|
|
| self.deduplicate = bool(deduplicate) |
| self.add_bos_eos = bool(add_bos_eos) |
| self.need_f0 = bool(need_f0) |
| self.bos_id = bos_id |
| self.eos_id = eos_id |
|
|
| self.feature_norm = feature_norm |
|
|