testSpeech / text /symbols.py
CongBang's picture
Upload folder using huggingface_hub
e3c2b9c verified
""" from https://github.com/keithito/tacotron """
'''
Defines the set of symbols used in text input to the model.
'''
# Original
# _pad = '_'
# _punctuation = '[]\'* '
# _letters = 'aảáăặấbcdđefghijklmnoóôớờprstuúữvwxyz'
# _letters_ipa = "ɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456"
# new_en
# _pad = '_'
# _punctuation = '<>[]\'* '
# _letters = 'aáàảãạăắằẳẵặâấầẩẫậbcdđeéèẻẽẹêếềểễệfghiíìỉĩịjklmnoóòỏõọôốồổỗộơớờởỡợqprstuúùủũụưứừửữựvwxyýỳỷỹỵzM'
# _letters_ipa = "θɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456"
# PL_BERT
_pad = '_'
_punctuation = '<>[]\'* '
_letters = 'aáàảãạăắằẳẵặâấầẩẫậbcdđeéèẻẽẹêếềểễệfghiíìỉĩịjklmnoóòỏõọôốồổỗộơớờởỡợqprstuúùủũụưứừửữựvwxyýỳỷỹỵzM'
_letters_ipa = "ɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456"
#_letters = 'aáăặấbcdđefghijklmnoóôớờprstuữvwxyz'
#_letters = 'aảúáăặấbcdđefghijklmnoóôớờprstuữvwxyz'
# Export all symbols:
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
# Special symbol ids
SPACE_ID = symbols.index(" ")