| from cotlet.utils import * |
| import cutlet |
|
|
| katsu = cutlet.Cutlet(ensure_ascii=False) |
| katsu.use_foreign_spelling = False |
|
|
| def process_japanese_text(ml): |
| |
| if any(char in ml for char in "ぁぃぅぇぉ"): |
| |
| ml = ml.replace("ぁ", "あ") |
| ml = ml.replace("ぃ", "い") |
| ml = ml.replace("ぅ", "う") |
| ml = ml.replace("ぇ", "え") |
| ml = ml.replace("ぉ", "お") |
|
|
| |
|
|
| |
| |
|
|
| output = katsu.romaji(apply_transformations(alphabetreading(ml)), capitalize=False).lower() |
|
|
| |
| if 'j' in output: |
| output = output.replace('j', "dʑ") |
| if 'tt' in output: |
| output = output.replace('tt', "ʔt") |
| if 't t' in output: |
| output = output.replace('t t', "ʔt") |
| if ' ʔt' in output: |
| output = output.replace(' ʔt', "ʔt") |
| if 'ssh' in output: |
| output = output.replace('ssh', "ɕɕ") |
|
|
| |
| output = Roma2IPA(convert_numbers_in_string(output)) |
|
|
| |
| output = hira2ipa(output) |
|
|
| |
| output = replace_chars_2(output) |
| output = replace_repeated_chars(replace_tashdid_2(output)) |
| output = nasal_mapper(output) |
|
|
| |
| if " ɴ" in output: |
| output = output.replace(" ɴ", "ɴ") |
| |
| if ' neɽitai ' in output: |
| output = output.replace(' neɽitai ', "naɽitai") |
|
|
| if 'harɯdʑisama' in output: |
| output = output.replace('harɯdʑisama', "arɯdʑisama") |
|
|
|
|
| if "ki ni ɕinai" in output: |
| output = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', output) |
|
|
| if 'ʔt' in output: |
| output = re.sub(r'(?<!\s)ʔt', r'ʔt', output) |
|
|
| if 'de aɽoɯ' in output: |
| output = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', output) |
|
|
| |
| return output.lstrip() |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
|
|
| def replace_repeating_a(output): |
| |
| patterns = [ |
| (r'(aː)\s*\1+\s*', r'\1~'), |
| (r'(aːa)\s*aː', r'\1~'), |
| (r'aːa', r'aː~'), |
| (r'naː\s*aː', r'naː~'), |
| (r'(oː)\s*\1+\s*', r'\1~'), |
| (r'(oːo)\s*oː', r'\1~'), |
| (r'oːo', r'oː~'), |
| (r'(eː)\s*\1+\s*', r'\1~'), |
| (r'(e)\s*\1+\s*', r'\1~'), |
| (r'(eːe)\s*eː', r'\1~'), |
| (r'eːe', r'eː~'), |
| (r'neː\s*eː', r'neː~'), |
| ] |
|
|
| |
| |
| for pattern, replacement in patterns: |
| output = re.sub(pattern, replacement, output) |
| |
| return output |
|
|
| def phonemize(text): |
| |
| if "っ" in text: |
| text = text.replace("っ","ʔ") |
| |
| output = post_fix(process_japanese_text(text)) |
| |
| |
| if " ɴ" in output: |
| output = output.replace(" ɴ", "ɴ") |
| if "y" in output: |
| output = output.replace("y", "j") |
| if "ɯa" in output: |
| output = output.replace("ɯa", "wa") |
| |
| if "a aː" in output: |
| output = output.replace("a aː","a~") |
| if "a a" in output: |
| output = output.replace("a a","a~") |
|
|
|
|
|
|
| |
| |
| output = replace_repeating_a((output)) |
| output = re.sub(r'\s+~', '~', output) |
| |
| if "oː~o oː~ o" in output: |
| output = output.replace("oː~o oː~ o","oː~~~~~~") |
| if "aː~aː" in output: |
| output = output.replace("aː~aː","aː~~~") |
| if "oɴ naː" in output: |
| output = output.replace("oɴ naː","onnaː") |
| if "aː~~ aː" in output: |
| output = output.replace("aː~~ aː","aː~~~~") |
| if "oː~o" in output: |
| output = output.replace("oː~o","oː~~") |
| if "oː~~o o" in output: |
| output = output.replace("oː~~o o","oː~~~~") |
|
|
| output = random_space_fix(output) |
| output = random_sym_fix(output) |
| output = random_sym_fix_no_space(output) |
| |
| return output.lstrip() |
|
|
| |
| |
|
|