| |
| |
| |
|
|
| from Unified_parser.globals import * |
| |
|
|
| |
| def rec_replace(input : str, sub : str, tar : str): |
| while True: |
| output = input.replace(sub, tar) |
| if output == input: |
| break |
| input = output |
| return output |
|
|
| |
| def RemoveUnwanted(input : str) -> str: |
| |
| punctuationList = ["!",";",":","@","#","$","%","^","&","*",",",".","/","'","’","”","“","।", "]", "[", "×", "ñ", "∙","•"] |
|
|
| |
| |
| replaceDict = {"ऩ":"ऩ", "ऱ":"ऱ", "क़":"क़", "ख़":"ख़", "ग़":"ग़", "ज़":"ज़", "ड़":"ड़", "ढ़":"ढ़", "ढ़":"ढ़", "फ़":"फ़", "य़":"य़", "ऴ":"ऴ", |
| "ொ":"ொ", "ோ":"ோ", |
| "ൊ":"ൊ", "ോ":"ോ", "ല്":"ൽ", "ള്":"ൾ", "ര്":"ർ", "ന്":"ൻ", "ണ്":"ൺ"} |
|
|
| output = "" |
| for c in input: |
| if c in punctuationList: |
| continue |
| output += c |
| |
| for k in replaceDict.keys(): |
| output = rec_replace(output, k, replaceDict[k]) |
| return output |
|
|
| |
| |
| def GetFile(g : GLOBALS, LangId : int, type : int) -> str: |
| fileName = g.rootPath |
|
|
| |
| if type == 0: |
| fileName += g.commonFile |
| |
| return fileName |
| |
| elif type == 1: |
| fileName += "dict/" |
| |
| elif type == 2: |
| fileName += "rules/" |
|
|
| langIdNameMapping = { 1 : "malayalam", 2 : "tamil", 3 : "telugu", |
| 4 : "kannada", 5 : "hindi", 6 : "bengali", |
| 7 : "gujarathi", 8 : "odiya", 9 : "punjabi", 10 : "english" } |
| |
| if LangId in langIdNameMapping.keys(): |
| fileName += langIdNameMapping[LangId] |
| |
| if type == 1: |
| fileName += ".dict" |
| elif type == 2: |
| fileName += ".rules" |
| |
| return fileName |
|
|
| |
| def SetlangId(g : GLOBALS, fl : str): |
| id = ord(fl) |
| if(id>=3328 and id<=3455): |
| g.currLang = g.MALAYALAM; |
| elif(id>=2944 and id<=3055): |
| g.currLang = g.TAMIL; |
| elif(id>=3202 and id<=3311): |
| g.currLang = g.KANNADA; |
| elif(id>=3072 and id<=3198): |
| g.currLang = g.TELUGU; |
| elif(id>=2304 and id<=2431): |
| g.currLang = g.HINDI; |
| elif(id>=2432 and id<=2559): |
| g.currLang = g.BENGALI; |
| elif(id>=2688 and id<=2815): |
| g.currLang = g.GUJARATHI; |
| elif(id>=2816 and id<=2943): |
| g.currLang = g.ODIYA; |
| elif(id>=2560 and id <= 2687): |
| g.currLang = g.PUNJABI |
| elif(id>=64 and id<=123): |
| g.currLang = g.ENGLISH; |
|
|
| g.langId = g.currLang |
|
|
| if(g.langId < 5): |
| g.isSouth = 1 |
| if(g.langId == 0): |
| print(f"UNKNOWN LANGUAGE - id = {fl}") |
| exit(0) |
| return 1 |
|
|
| |
| def SetlanguageFeat(g : GLOBALS, input : str) -> int: |
|
|
| |
| |
| try: |
| with open(GetFile(g, 0,0), 'r') as infile: |
| lines = infile.readlines() |
| |
|
|
| except: |
| print("Couldn't open common file for reading") |
| return 0 |
|
|
| str1 = input |
| length = len(str1) |
| if (length == 0): |
| length = 1 |
|
|
| for j in range(0,length): |
| |
| if (ord(str1[j]) < 8204): |
| firstLet = str1[j] |
| break |
| |
| SetlangId(g, firstLet) |
| for i in range(len(lines)): |
| l = lines[i].strip().split('\t') |
| g.symbolTable[i][1] = l[1] |
| g.symbolTable[i][0] = l[1 + g.langId] |
|
|
| return 1 |
|
|
| |
| def CheckSymbol(g : GLOBALS, input : str) -> int: |
| i = 0 |
| for i in range(g.ROW): |
| if (g.symbolTable[i][1] == input): |
| return 1 |
| return 0 |
|
|
| |
| def ConvertToSymbols(g : GLOBALS, input : str) -> str: |
| str1 = input |
|
|
| g.words.syllabifiedWord = "&" |
| for j in range(len(str1)): |
| if (ord(str1[j]) < 8204): |
| g.words.syllabifiedWord += "&" + g.symbolTable[ord(str1[j])%128][1] |
|
|
| g.words.syllabifiedWord = g.words.syllabifiedWord[1:] |
| return g.words.syllabifiedWord |
|
|
| |
| def WriteFile(g : GLOBALS, text : str): |
| g.answer = f"(set! wordstruct '( {text}))" |
|
|
| |
| def CheckVowel(input : str, q : int, rq : int) -> int: |
| if (input.find("a") != -1): |
| return 1 |
| if (input.find("e") != -1): |
| return 1 |
| if (input.find("i") != -1): |
| return 1 |
| if (input.find("o") != -1): |
| return 1 |
| if (input.find("u") != -1): |
| return 1 |
| if (q and input.find("q") != -1): |
| return 1 |
| if (rq and input.find("rq") != -1): |
| return 1 |
| return 0 |
|
|
| |
| def Checkeuv(input : str) -> int: |
| if (input.find("euv") != -1): |
| return 1 |
| return 0 |
|
|
| |
| def CheckSingleVowel(input : str, q : int) -> int: |
| if (input in ['a', 'e', 'i', 'o', 'u']): |
| return 1 |
| if (q != 0 and input == 'q'): |
| return 1 |
| return 0 |
|
|
| |
| def GetPhoneType(g : GLOBALS, input : str, pos : int) -> int: |
| phone = input |
| phone = phone.split('&') |
| phone = list(filter(lambda x : x != '', phone)) |
| pos = min(pos, len(phone)) |
| pch = phone[pos - 1] |
| |
| if (g.flags.DEBUG): |
| print(f'input : {input}') |
| print(f"str : {pch} {GetType(g, pch)}") |
|
|
| return GetType(g, pch) |
|
|
| |
| def GetType(g : GLOBALS, input : str): |
| for i in range(g.VOWELSSIZE): |
| if g.VOWELS[i] == input: |
| return 1 |
| for i in range(g.CONSONANTSSIZE): |
| if g.CONSONANTS[i] == input: |
| return 2 |
| for i in range(g.SEMIVOWELSSIZE): |
| if g.SEMIVOWELS[i] == input: |
| return 3 |
| return 0 |
|
|
| |
| def CheckChillu(input : str) -> int: |
| l = ["nwv", "nnv", "rwv", "lwv", "lnv"] |
| for x in l: |
| if (input.find(x) != -1): |
| return 1 |
| |
| return 0 |
|
|
| |
| def GetUTF(g : GLOBALS, input : str) -> str : |
| for i in range(g.ROW): |
| if (input == g.symbolTable[i][1]): |
| return g.symbolTable[i][0] |
| |
| return 0 |
|
|
| |
| def isEngLetter(p : str) -> int: |
| if (ord(p) >= 97 and ord(p) <= 122): |
| return 1 |
| return 0 |
|
|
| |
| def CleanseWord(phone : str) -> str: |
| phonecopy = "" |
| for c in phone: |
| if (c != '&' and isEngLetter(c) == 0): |
| c = '#' |
| phonecopy += c |
| phonecopy = rec_replace(phonecopy, '$','') |
| phonecopy = rec_replace(phonecopy, '&&','&') |
| return phonecopy |
|
|
| |
| def MiddleVowel(g : GLOBALS, phone : str) -> str: |
|
|
| c1 = '' |
| c2 = '' |
| phonecopy = phone |
| for i in range(g.CONSONANTSSIZE): |
| for j in range(g.VOWELSSIZE): |
| c1 = f'&{g.CONSONANTS[i]}&{g.VOWELS[j]}&' |
| c2 = f'&{g.CONSONANTS[i]}&av&{g.VOWELS[j]}&' |
|
|
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| for i in range(g.SEMIVOWELSSIZE): |
| for j in range(g.VOWELSSIZE): |
| c1 = f'&{g.SEMIVOWELS[i]}&{g.VOWELS[j]}&' |
| c2 = f'&{g.SEMIVOWELS[i]}&av&{g.VOWELS[j]}&' |
|
|
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| return phonecopy |
|
|
| |
| |
| def DoubleModifierCorrection(phone : str) -> str: |
|
|
| doubleModifierList = ["&nwv&","&nnv&","&rwv&","&lwv&","&lnv&","&aav&","&iiv&","&uuv&","&rqv&","&eev&", |
| "&eiv&","&ouv&","&axv&","&oov&","&aiv&","&auv&","&aev&", |
| "&iv&","&ov&","&ev&","&uv&"] |
|
|
| phonecopy = phone |
| for i in range(0,21): |
| for j in range(0,21): |
| c1 = f'{doubleModifierList[i]}#{doubleModifierList[j]}' |
| c2 = f'{doubleModifierList[i]}{doubleModifierList[j]}#&' |
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| phonecopy = rec_replace(phonecopy, "&#&hq&","&hq&#&") |
| phonecopy = rec_replace(phonecopy, "&&","&") |
| return phonecopy |
|
|
| |
| def SchwaDoubleConsonent(phone : str) -> str: |
| consonentList = ["k","kh","lx","rx","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh","nx","t","th","d","dh","n","p","ph","b","bh","m","y","r","l","w","sh","sx","zh","y","s","h","f","dxq"] |
| vowelList = ["av&","nwv&","nnv&","rwv&","lwv&","lnv&","aav&","iiv&","uuv&","rqv&","eev&","eiv&","ouv&", |
| "axv&","oov&","aiv&","nnx&","nxx&","rrx&","llx&","lxx&", |
| "aa&","iv&","ov&","mq&","aa&","ii&","uu&","rq&", |
| "ee&","ei&","ou&","oo&","ax&","ai&","ev&","uv&", |
| "a&","e&","i&","o&","u&"] |
|
|
| phonecopy = phone |
| for i in range(0,39): |
| for j in range(0,39): |
| for k in range(0,42): |
| c1 = f'&euv&{consonentList[i]}&{consonentList[j]}&{vowelList[k]}' |
| c2 = f'&euv&{consonentList[i]}&av&{consonentList[j]}&{vowelList[k]}' |
| phonecopy = phonecopy.replace(c1, c2) |
| phonecopy = rec_replace(phonecopy, "$","") |
| return phonecopy |
|
|
| |
| def SchwaSpecificCorrection(g : GLOBALS, phone : str) -> str: |
| schwaList = ["k","kh","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh", |
| "nx","t","th","d","dh","n","p","ph","b","bh","m","y", |
| "r","l","s","w","sh","sx","zh","h","lx","rx","f","dxq"] |
|
|
| vowelList = ["av&","nwv&","nnv&","rwv&","lwv&","lnv&","aav&","iiv&","uuv&","rqv&","eev&","eiv&","ouv&", |
| "axv&","oov&","aiv&","nnx&","nxx&","rrx&","llx&","lxx&", |
| "aa&","iv&","ov&","mq&","aa&","ii&","uu&","rq&", |
| "ee&","ei&","ou&","oo&","ax&","ai&","ev&","uv&", |
| "a&","e&","i&","o&","u&"] |
|
|
| if (g.flags.DEBUG): |
| print(f'{len(phone)}') |
| |
| phonecopy = phone + '!' |
|
|
| if (g.flags.DEBUG): |
| print(f'phone cur - {phonecopy}') |
| |
| |
| for i in range(0,38): |
| for j in range(1,42): |
| c1 = f'&av&{schwaList[i]}&{vowelList[j]}!' |
| c2 = f'&euv&{schwaList[i]}&{vowelList[j]}!' |
| phonecopy = phonecopy.replace(c1, c2) |
| |
| phonecopy = rec_replace(phonecopy, '!', '') |
|
|
| for i in range(0,38): |
| c1 = f'&av&{schwaList[i]}&av&' |
| c2 = f'&euv$&{schwaList[i]}&av$&' |
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| if(g.flags.DEBUG): |
| print(f"inside schwa {phonecopy}") |
| |
| for i in range(0,38): |
| c1 = f'&av&{schwaList[i]}&' |
| c3 = f'&{schwaList[i]}&' |
|
|
| for j in range(0,41): |
| c4 = f'&euv&{c3}${vowelList[j]}' |
| c2 = f'{c1}{vowelList[j]}' |
| phonecopy = phonecopy.replace(c2, c4) |
|
|
| phonecopy = rec_replace(phonecopy, '$', '') |
|
|
| |
| for i in range(0,38): |
| c1 = f'&q&{schwaList[i]}&euv&' |
| c2 = f'&q&{schwaList[i]}&av&' |
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| return phonecopy |
|
|
| |
| def GeminateCorrection(phone : str, isReverse : int) -> str: |
| geminateList = ["k","kh","lx","rx","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh","nx","t","th","d","dh","n","p","ph","b","bh","m","y", |
| "r","l","w","sh","sx","zh","y","s","h","f","dxq"] |
|
|
| phonecopy = phone |
| for i in range(0, 39): |
| c1 = f'&{geminateList[i]}&eu&{geminateList[i]}&' |
| c2 = f'&{geminateList[i]}&{geminateList[i]}&' |
| phonecopy = rec_replace(phonecopy, c2, c1) if isReverse != 0 else rec_replace(phonecopy, c1, c2) |
| |
| return phonecopy |
|
|
| |
| def Syllabilfy(phone : str) -> str: |
|
|
| phonecopy = phone |
| phonecopy = rec_replace(phonecopy, "&&","&") |
| phonecopy = phonecopy.replace("&eu&","&eu&#&") |
| phonecopy = phonecopy.replace("&euv&","&euv&#&") |
| phonecopy = rec_replace(phonecopy, "&avq","&q&av") |
| phonecopy = phonecopy.replace("&av&","&av&#&") |
| phonecopy = phonecopy.replace("&q","&q&#") |
|
|
| removeList = ["&nwv&","&nnv&","&rwv&","&lwv&","&lnv&","&aav&","&iiv&","&uuv&","&rqv&","&eev&", |
| "&eiv&","&ouv&","&axv&","&oov&","&aiv&","&auv&","&aev&", |
| "&nnx&","&nxx&","&rrx&","&llx&","&lxx&", |
| "&aa&","&iv&","&ov&","&mq&","&aa&","&ii&","&uu&","&rq&","&au&","&ee&", |
| "&ei&","&ou&","&oo&","&ax&","&ai&","&ev&","&uv&","&ae&", |
| "&a&","&e&","&i&","&o&","&u&"] |
|
|
| for i in range(0,45): |
| c1 = removeList[i] |
| c2 = c1 + '#&' |
| phonecopy = phonecopy.replace(c1, c2) |
| phonecopy = rec_replace(phonecopy, "&#&hq&","&hq&#&") |
|
|
| |
| pureVowelList = ["&a&","&e&","&i&","&o&","&u&"] |
| for i in range(0,5): |
| c1 = f'&#{pureVowelList[i]}' |
| phonecopy = phonecopy.replace(pureVowelList[i], c1) |
| |
| consonantList = ["k","kh","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh", |
| "nx","t","th","d","dh","n","p","ph","b","bh","m","y", |
| "r","l","w","sh","sx","zh","y","s","h","lx","rx","f","dxq"] |
|
|
| |
|
|
| for i in range(0,39): |
| c1 = f'&eu&#&{consonantList[i]}&euv&#&' |
| c2 = f'&eu&{consonantList[i]}&av&#&' |
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| for i in range(0,39): |
| c1 = f'&euv&#&{consonantList[i]}&euv&#&' |
| c2 = f'&euv&{consonantList[i]}&av&#&' |
| phonecopy = phonecopy.replace(c1, c2) |
|
|
| phonecopy = phonecopy.replace("&eu&","&eu&#&") |
| return phonecopy |
|
|
| |
| |
| def CheckDictionary(g : GLOBALS, input : str) -> int: |
|
|
| fileName = GetFile(g, g.langId, 1) |
| if (g.flags.DEBUG): |
| print(f'dict : {fileName}') |
| try: |
| with open(fileName, 'r') as output: |
| cnts = output.readlines() |
| except: |
| if g.flags.DEBUG: |
| print(f'Dict not found') |
| if(g.langId == g.ENGLISH): |
| exit(1) |
| return 0 |
|
|
| if (g.langId == g.ENGLISH): |
| input1 = '' |
| for c in input: |
| if ord(c) < 97: |
| c = c.lower() |
| input1 += c |
| input = input1 |
| |
| for l in cnts: |
| l = l.strip().split('\t') |
| assert(len(l) == 3) |
| if g.flags.DEBUG: |
| print(f"word : {l[0]}") |
| if input == l[0]: |
| if g.flags.DEBUG: |
| print(f"match found") |
| print(f'Syllables : {l[1]}') |
| print(f'monophones : {l[2]}') |
| if g.flags.writeFormat == 1: |
| WriteFile(g, l[1]) |
| if g.flags.writeFormat == 0: |
| WriteFile(g, l[2]) |
| return 1 |
|
|
| return 0 |
|
|
| |
| def PositionCorrection(phone : str, left : str, right :str, isReverse:int) -> str: |
| geminateList = ["k","kh","lx","rx","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh","nx","t","th","d","dh", |
| "n","p","ph","b","bh","m","y","r","l","w","sh","sx","zh","y","s","h","f","dxq"] |
| phonecopy = phone |
| for i in range(0,39): |
| c1 = left |
| c2 = right |
| c1 = c1.replace('@', geminateList[i]) |
| c2 = c2.replace('@', geminateList[i]) |
| phonecopy = rec_replace(phonecopy, c2, c1) if isReverse != 0 else rec_replace(phonecopy, c1, c2) |
| return phonecopy |
|
|
| |
| def CountChars(s : str, c : str) -> int: |
| count = 0 |
| for x in s: |
| if x == c: |
| count += 1 |
| return count |
|
|
| |
| def GenerateAllCombinations(g : GLOBALS, j : int, s : str, c : list, isRight : int): |
| t = '' |
| if (c[j][0][0] == '#'): |
| if isRight == 1: |
| g.combvars.rightStr[g.combvars.bi] = s + '&' |
| g.combvars.bi += 1 |
| else: |
| g.combvars.leftStr[g.combvars.bi] = s + '&' |
| g.combvars.bi += 1 |
| else: |
| i = 0 |
| while (c[j][i][0] != '#'): |
| t = s + '&' + c[j][i] |
| GenerateAllCombinations(g, j+1, t, c, isRight) |
| i += 1 |
|
|
| |
| def GenerateMatrix(g : GLOBALS, combMatrix : list, regex : str): |
| row, col, item = 0, 0, 0 |
| for i in range(0, len(regex)): |
| if regex[i] == '&': |
| combMatrix[row][col+1] = '#' |
| row += 1 |
| col = 0 |
| item = 0 |
| combMatrix[row][col] = '' |
| elif regex[i] == '|': |
| col += 1 |
| item = 0 |
| combMatrix[row][col] = '' |
| else: |
| combMatrix[row][col] = combMatrix[row][col][:item] + regex[i] + combMatrix[row][col][(item+1):] |
| item += 1 |
| if g.flags.DEBUG: |
| print(f'{row} {col} {combMatrix[row][col]}') |
|
|
| combMatrix[row][col+1] = '#' |
| combMatrix[row+1][0] = '#' |
|
|
| |
| def CombinationCorrection(g : GLOBALS, phone : str, left : str, right : str, isReverse : int) -> str: |
| leftComb = [['' for _ in range(256)] for _ in range(256)] |
| rightComb = [['' for _ in range(256)] for _ in range(256)] |
| GenerateMatrix(g, leftComb, left) |
| GenerateMatrix(g, rightComb, right) |
|
|
| g.combvars.bi = 0 |
| GenerateAllCombinations(g, 0, '', leftComb, 0) |
| g.combvars.bi = 0 |
| GenerateAllCombinations(g, 0, '', rightComb, 1) |
|
|
| i = 0 |
| phonecopy = phone |
| while g.combvars.leftStr[i] != '': |
| if isReverse != 0: |
| phonecopy = phonecopy.replace(g.combvars.rightStr[i], g.combvars.leftStr[i]) |
| else: |
| phonecopy = phonecopy.replace(g.combvars.leftStr[i], g.combvars.rightStr[i]) |
|
|
| if g.flags.DEBUG: |
| print(f'{g.combvars.leftStr[i]} {g.combvars.rightStr[i]}') |
| |
| i += 1 |
| |
| g.combvars.refresh() |
| return phonecopy |
|
|
| |
| def LangSpecificCorrection(g : GLOBALS, phone : str, langSpecFlag : int) -> str: |
| phonecopy = phone |
| if g.isSouth: |
| phonecopy = rec_replace(phonecopy,"&ei&","&ai&") |
| phonecopy = rec_replace(phonecopy,"&eiv&","&aiv&") |
| else: |
| phonecopy = rec_replace(phonecopy,"&oo&","&o&") |
| phonecopy = rec_replace(phonecopy,"&oov&","&ov&") |
|
|
| phonecopy = phonecopy.replace("&q&","&av&q&") |
| phonecopy = rec_replace(phonecopy, "&a&av&","&a&") |
| phonecopy = rec_replace(phonecopy, "&e&av&","&e&") |
| phonecopy = rec_replace(phonecopy, "&i&av&","&i&") |
| phonecopy = rec_replace(phonecopy, "&o&av&","&o&") |
| phonecopy = rec_replace(phonecopy, "&u&av&","&u&") |
| phonecopy = rec_replace(phonecopy,"&a&rqv&","&rq&") |
| phonecopy = rec_replace(phonecopy,"&aa&av&","&aa&") |
| phonecopy = rec_replace(phonecopy,"&ae&av&","&ae&") |
| phonecopy = rec_replace(phonecopy,"&ax&av&","&ax&") |
| phonecopy = rec_replace(phonecopy,"&ee&av&","&ee&") |
| phonecopy = rec_replace(phonecopy,"&ii&av&","&ii&") |
| phonecopy = rec_replace(phonecopy,"&ai&av&","&ai&") |
| phonecopy = rec_replace(phonecopy,"&au&av&","&au&") |
| phonecopy = rec_replace(phonecopy,"&oo&av&","&oo&") |
| phonecopy = rec_replace(phonecopy,"&uu&av&","&uu&") |
| phonecopy = rec_replace(phonecopy,"&rq&av&","&rq&") |
| phonecopy = rec_replace(phonecopy,"&av&av&","&av&") |
| phonecopy = rec_replace(phonecopy,"&ev&av&","&ev&") |
| phonecopy = rec_replace(phonecopy,"&iv&av&","&iv&") |
| phonecopy = rec_replace(phonecopy,"&ov&av&","&ov&") |
| phonecopy = rec_replace(phonecopy,"&uv&av&","&uv&") |
|
|
| phonecopy = rec_replace(phonecopy, "&av&rqv&","&rqv&") |
| phonecopy = rec_replace(phonecopy, "&aav&av&","&aav&") |
| phonecopy = rec_replace(phonecopy, "&aev&av&","&aev&") |
| phonecopy = rec_replace(phonecopy, "&auv&av&","&auv&") |
| phonecopy = rec_replace(phonecopy, "&axv&av&","&axv&") |
| phonecopy = rec_replace(phonecopy, "&aiv&av&","&aiv&") |
| phonecopy = rec_replace(phonecopy, "&eev&av&","&eev&") |
| phonecopy = rec_replace(phonecopy, "&eiv&av&","&eiv&") |
| phonecopy = rec_replace(phonecopy, "&iiv&av&","&iiv&") |
| phonecopy = rec_replace(phonecopy, "&oov&av&","&oov&") |
| phonecopy = rec_replace(phonecopy, "&ouv&av&","&ouv&") |
| phonecopy = rec_replace(phonecopy, "&uuv&av&","&uuv&") |
| phonecopy = rec_replace(phonecopy, "&rqv&av&","&rqv&") |
|
|
| if langSpecFlag == 0: |
| return phonecopy |
| |
| fileName = GetFile(g, g.langId, 2) |
| with open(fileName, 'r') as output: |
| cnts = output.readlines() |
|
|
| left = '' |
| right = '' |
| phonecopy = '^' + phonecopy + '$' |
|
|
| if (g.flags.DEBUG): |
| print(f'phone : {phonecopy}') |
| |
| for l in cnts: |
| l = l.strip() |
| if (l.find('#') != -1): |
| continue |
| |
| l = l.split('\t') |
| assert(len(l) == 2) |
| left, right = l[0], l[1] |
|
|
| if left.find('|') != -1: |
| a1 = left[1:-1] |
| a2 = right[1:-1] |
| phonecopy = CombinationCorrection(g, phonecopy, a1, a2, 0) |
| if g.flags.DEBUG: |
| print(f'{a1}\t{a2}') |
| elif left.find('@') != -1: |
| phonecopy = PositionCorrection(phonecopy, left, right, 0) |
| else: |
| phonecopy = phonecopy.replace(left, right) |
|
|
| |
| phonecopy = phonecopy.replace('^', '') |
| phonecopy = phonecopy.replace('$', '') |
| |
| count = 0 |
| for i in range(len(phonecopy)): |
| if phonecopy[i] == '&': |
| count = i |
| return phonecopy[:(count+1)] |
|
|
| |
| def SyllableReverseCorrection(g : GLOBALS, phone : str, langSpecFlag : int) -> str: |
| phonecopy = phone |
|
|
| if g.isSouth: |
| phonecopy = rec_replace(phonecopy, "&ai&","&ei&") |
| phonecopy = rec_replace(phonecopy, "&aiv&","&eiv&") |
| else: |
| phonecopy = rec_replace(phonecopy, "&o&","&oo&") |
| phonecopy = rec_replace(phonecopy, "&ov&","&oov&") |
| |
| if langSpecFlag == 0: |
| return phonecopy |
|
|
| fileName = GetFile(g, g.langId, 2) |
| with open(fileName, 'r') as output: |
| cnts = output.readlines() |
|
|
| left = '' |
| right = '' |
| |
| phonecopy = '^' + phonecopy + '$' |
|
|
| if g.flags.DEBUG: |
| print(f'before phone : {phonecopy}') |
| |
| for l in cnts: |
| l = l.strip() |
| if (l.find('#') != -1): |
| continue |
| |
| l = l.split('\t') |
| assert(len(l) == 2) |
| left, right = l[0], l[1] |
|
|
| if left.find('|') != -1: |
| a1 = left[1:-1] |
| a2 = right[1:-1] |
| phonecopy = CombinationCorrection(g, phonecopy, a1, a2, 1) |
| if g.flags.DEBUG: |
| print(f'{a1}\t{a2}') |
| elif left.find('@') != -1: |
| phonecopy = PositionCorrection(phonecopy, left, right, 1) |
| else: |
| phonecopy = phonecopy.replace(right, left) |
|
|
| |
| phonecopy = phonecopy.replace('^', '') |
| phonecopy = phonecopy.replace('$', '') |
| |
| if (g.flags.DEBUG): |
| print(f'after phone : {phonecopy}') |
| return phonecopy |
|
|
| |
| def LangSyllableCorrection(input : str) -> int: |
| if input == "&av&q&": |
| return 1 |
| else: |
| return 0 |
|
|
| |
| def SplitSyllables(g : GLOBALS, input : str) -> int: |
| incopy = input |
|
|
| if g.flags.writeFormat == 2: |
| i = 0 |
| j = 0 |
| fullList = ["k","kh","lx","rx","g","gh","ng","c","ch","j","jh","nj","tx","txh","dx","dxh","nx","t","th","d","dh","n","p","ph","b","bh","m","y","r","l","w","sh","sx","zh","y","s","h","f","dxq"] |
|
|
| for i in range(0,39): |
| for j in range(0,39): |
| c1 = f'&{fullList[i]}&{fullList[j]}&' |
| c2 = f'&{fullList[i]}&euv&#&{fullList[j]}&' |
| incopy = incopy.replace(c1, c2) |
| |
| incopy = rec_replace(incopy, "&#&mq&","&mq&") |
| incopy = rec_replace(incopy, "&#&q&","&q&") |
|
|
| pch = incopy.split('#') |
| g.syllableList = [] |
| for c in pch: |
| if c != '&': |
| g.syllableList.append(c) |
| |
| |
| ln = len(g.syllableList) |
| if (ln == 0): |
| return 1 |
| |
| if g.flags.DEBUG: |
| for i in range(ln): |
| print(f"initStack : {g.syllableList[i]}") |
| |
| |
| if CheckVowel(g.syllableList[ln-1],1,0) == 0 and CheckChillu(g.syllableList[ln-1]) == 0: |
| if g.isSouth: |
| g.syllableList[ln-1] += '&av&' |
| else: |
| g.syllableList[ln-1] += '&euv&' |
|
|
| |
| if g.flags.writeFormat == 2: |
| g.syllableCount = ln |
| g.flags.writeFormat = 1 |
| return 1 |
|
|
| euFlag = 1 |
| if ln > 1: |
| for i in range(ln-1,-1,-1): |
| if LangSyllableCorrection(g.syllableList[i]) == 1: |
| g.syllableList[i-1] += g.syllableList[i] |
| g.syllableList[i] = '' |
|
|
| if g.syllableList[i].find("&eu&") != -1: |
| g.syllableList[i] = g.syllableList[i].replace("&eu&", "!") |
| euFlag = 1 |
|
|
| if g.syllableList[i].find("&euv&") != -1: |
| g.syllableList[i] = g.syllableList[i].replace("&euv&", "!") |
| euFlag = 2 |
| |
| if CheckVowel(g.syllableList[i],0,1) == 0: |
| if i-1 >= 0: |
| g.syllableList[i-1] += g.syllableList[i] |
| g.syllableList[i] = '' |
| else: |
| g.syllableList[i] += g.syllableList[i+1] |
| g.syllableList[i+1] = '' |
| |
| if i-1 > 0: |
| if euFlag == 1: |
| g.syllableList[i-1] = g.syllableList[i-1].replace("!","&eu&") |
| elif euFlag == 2: |
| g.syllableList[i-1] = g.syllableList[i-1].replace("!","&euv&") |
| g.syllableList[i-1] = rec_replace(g.syllableList[i-1], "&&","&") |
| |
| if euFlag == 1: |
| g.syllableList[i] = g.syllableList[i].replace("!","&eu&") |
| elif euFlag == 2: |
| g.syllableList[i] = g.syllableList[i].replace("!","&euv&") |
| else: |
| if (CheckVowel(g.syllableList[0],1,0) == 0 and g.flags.writeFormat != 3) or Checkeuv(g.syllableList[0]) != 0: |
| g.syllableList[0] += '&av' |
|
|
| if g.flags.DEBUG: |
| for i in range(ln): |
| print(f'syllablifiedStack : {g.syllableList[i]}') |
|
|
| |
| for i in range(ln): |
| |
| g.syllableList[i] = g.syllableList[i].replace('1','') |
| if g.flags.DEBUG: |
| print(f'LenStack : {len(g.syllableList[i])}') |
| |
| if len(g.syllableList[i]) > 0: |
| if g.syllableList[i].find("&eu&") != -1: |
| g.syllableList[i] = g.syllableList[i].replace("&eu&", "!") |
| euFlag = 1 |
|
|
| if g.syllableList[i].find("&euv&") != -1: |
| g.syllableList[i] = g.syllableList[i].replace("&euv&", "!") |
| euFlag = 2 |
| |
| if CheckVowel(g.syllableList[i],0,1) == 0 and g.flags.writeFormat != 3: |
| if g.flags.DEBUG: |
| print(f'Stack : {g.syllableList[i]}') |
| g.syllableList[i] += '&av' |
| |
| if g.syllableList[i].find('!') != -1: |
| if euFlag == 1: |
| g.syllableList[i] = g.syllableList[i].replace("!","&eu&") |
| elif euFlag == 2: |
| g.syllableList[i] = g.syllableList[i].replace("!","&euv&") |
| g.syllableList[i] = g.syllableList[i].replace('!', 'eu') |
| |
| g.syllableList[i] = rec_replace(g.syllableList[i], '&&', '&') |
| g.syllableList[i] = GeminateCorrection(g.syllableList[i],1) |
| |
| if g.flags.DEBUG: |
| for i in range(ln): |
| print(f'syllablifiedStack1 : {g.syllableList[i]}') |
| print(f'No of syllables : {ln}') |
|
|
| g.syllableCount = ln |
| if g.flags.writeFormat == 3: |
| g.flags.writeFormat = 0 |
| return 1 |
|
|
| |
| def WritetoFiles(g : GLOBALS) -> int: |
| if g.flags.DEBUG: |
| for i in range(0,g.syllableCount): |
| print(f'syllablifiedStackfinal : {g.syllableList[i]}') |
| |
| validSyllable = 0 |
| for i in range(0,g.syllableCount): |
| if g.syllableList[i] != '': |
| validSyllable += 1 |
| |
| if g.flags.DEBUG: |
| print(f'a correction {g.syllableList[0]}') |
| |
| g.words.outputText = '' |
|
|
| |
| j = 0 |
| if g.flags.writeFormat == 0: |
| syllablesPrint = 0 |
| for i in range(g.syllableCount): |
| g.words.outputText += '(( ' |
| l = g.syllableList[i].split('&') |
| for pch in l: |
| if pch == '': |
| continue |
| if g.flags.DEBUG: |
| print(f'syl {pch}') |
| j = 1 |
| g.words.outputText += f'"{pch}" ' |
| if j != 0: |
| if g.flags.syllTagFlag != 0: |
| if syllablesPrint == 0: |
| g.words.outputText += '_beg' |
| elif syllablesPrint == validSyllable - 1: |
| g.words.outputText += '_end' |
| else: |
| g.words.outputText += '_mid' |
| syllablesPrint += 1 |
| g.words.outputText += ') 0) ' |
| else: |
| g.words.outputText = g.words.outputText[:(len(g.words.outputText) - 3)] |
| j = 0 |
| |
| g.words.outputText = g.words.outputText.replace('v', '') |
| g.words.outputText = g.words.outputText.replace(" \"eu\"","") |
| g.words.outputText = g.words.outputText.replace('!', '') |
|
|
| |
| elif g.flags.writeFormat == 1: |
| syllablesPrint = 0 |
| for i in range(g.syllableCount): |
| g.syllableList[i] = rec_replace(g.syllableList[i], 'euv', 'eu') |
| g.syllableList[i] = SyllableReverseCorrection(g, g.syllableList[i], g.flags.LangSpecificCorrectionFlag) |
| if g.flags.DEBUG: |
| print(f'{g.syllableList[i]}') |
| g.words.outputText += '(( "' |
| l = g.syllableList[i].split('&') |
| for pch in l: |
| if pch == '': |
| continue |
| if g.flags.DEBUG: |
| print(f'syl {pch}') |
| j = 1 |
| if CheckSymbol(g, pch) != 0: |
| g.words.outputText += GetUTF(g, pch) |
| if pch == 'av' and g.flags.DEBUG: |
| print('av found') |
| if j != 0: |
| if g.flags.syllTagFlag != 0: |
| if syllablesPrint == 0: |
| g.words.outputText += '_beg' |
| elif syllablesPrint == validSyllable - 1: |
| g.words.outputText += '_end' |
| else: |
| g.words.outputText += '_mid' |
| syllablesPrint += 1 |
| g.words.outputText += '" ) 0) ' |
| else: |
| g.words.outputText = g.words.outputText[:(len(g.words.outputText) - 4)] |
| j = 0 |
| |
| g.words.outputText = g.words.outputText.replace('#', '') |
| g.words.outputText = g.words.outputText.replace(' ', ' ') |
| if g.flags.DEBUG: |
| print(f'Print text : {g.words.outputText}') |
| |
| WriteFile(g, g.words.outputText) |
| return 1 |
|
|
|
|
| def load_mapping_file(g: GLOBALS): |
| |
| try: |
| |
| with open("/speech/utkarsh/tts_api/Unified_parser/common_hindi.map", 'r') as infile: |
| lines = infile.readlines() |
| |
| except: |
| print("Couldn't open common file for reading") |
| return 0 |
|
|
| table=[] |
| for i in range(len(lines)): |
| l = lines[i].strip().split('\t') |
| table.append(l) |
|
|
| |
| |
|
|
| return table |
|
|
| def set_lang_id(language): |
| if language == "malayalam": |
| lang_id=1 |
| elif language == "tamil": |
| lang_id=2 |
| elif language == "telugu": |
| lang_id=3 |
| elif language == "kannada": |
| lang_id=4 |
| elif language == "hindi": |
| lang_id=5 |
| elif language == "bengali": |
| lang_id=6 |
| elif language == "gujrathi": |
| lang_id=7 |
| elif language == "odiya": |
| lang_id=8 |
| elif language == "punjabi": |
| lang_id=9 |
| return lang_id |
|
|
|
|
| def convert_to_main_lang(g : GLOBALS,input_str,final_lang:str): |
| s= input_str |
| final_lang = "telugu" |
| |
| final_lang_id=set_lang_id(final_lang) |
| c=1 |
| |
| temp_string='' |
| new_string='&' |
| table=load_mapping_file(g) |
| |
| |
| for i in range(1,len(s)): |
| if s[i]=="&": |
| c=1 |
| continue |
| if c==1: |
| temp_string+=s[i] |
| if s[i+1]=="&": |
| c=0 |
| |
| |
| if temp_string=="#": |
| new_string+=temp_string+"&" |
| temp_string='' |
| continue |
| if temp_string =='av': |
| new_string+=temp_string+"&" |
| temp_string='' |
| |
| continue |
| if temp_string =='eu' or temp_string =='euv'or temp_string =='aiv': |
| new_string+=temp_string+"&" |
| |
| |
| temp_string='' |
| continue |
|
|
| |
| |
| for j in range(len(table)): |
| if table[j][1]==temp_string: |
| |
| |
| if ord(table[j][final_lang_id+1][0]) < 122: |
| new_string=new_string+table[j][final_lang_id+1]+"&" |
| temp_string='' |
| |
| break |
| else: |
| new_string+=temp_string+"&" |
| |
| temp_string='' |
| break |
| return new_string |