""" from https://github.com/keithito/tacotron """ ''' Defines the set of symbols used in text input to the model. ''' # Original # _pad = '_' # _punctuation = '[]\'* ' # _letters = 'aảáăặấbcdđefghijklmnoóôớờprstuúữvwxyz' # _letters_ipa = "ɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456" # new_en # _pad = '_' # _punctuation = '<>[]\'* ' # _letters = 'aáàảãạăắằẳẵặâấầẩẫậbcdđeéèẻẽẹêếềểễệfghiíìỉĩịjklmnoóòỏõọôốồổỗộơớờởỡợqprstuúùủũụưứừửữựvwxyýỳỷỹỵzM' # _letters_ipa = "θɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456" # PL_BERT _pad = '_' _punctuation = '<>[]\'* ' _letters = 'aáàảãạăắằẳẵặâấầẩẫậbcdđeéèẻẽẹêếềểễệfghiíìỉĩịjklmnoóòỏõọôốồổỗộơớờởỡợqprstuúùủũụưứừửữựvwxyýỳỷỹỵzM' _letters_ipa = "ɑæɔʤəɛɪɯŋɲʂʃʈʧʊɣɤʐˈˌʰʷ˞'̩'ᵻ̆'͡123456" #_letters = 'aáăặấbcdđefghijklmnoóôớờprstuữvwxyz' #_letters = 'aảúáăặấbcdđefghijklmnoóôớờprstuữvwxyz' # Export all symbols: symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) # Special symbol ids SPACE_ID = symbols.index(" ")