| import textwrap |
| import re |
|
|
| from src.utils import flatten_list, have_emoji, have_langid |
|
|
|
|
| def setup_nltk(): |
| import nltk |
| nltk.download("punkt") |
|
|
|
|
| |
| |
|
|
| sentence_keys = ['sentence_list', 'index'] |
|
|
|
|
| def init_sentence_state(): |
| sentence_state = dict(sentence_list=[], index=0) |
| return sentence_state |
|
|
|
|
| def unpack_state(sentence_state): |
| rets = [] |
| for key in sentence_keys: |
| rets.append(sentence_state[key]) |
| return tuple(rets) |
|
|
|
|
| def pack_state(sentence_state, *args): |
| |
| for keyi, key in enumerate(sentence_keys): |
| if isinstance(sentence_state[key], list): |
| sentence_state[key] = args[keyi] |
| else: |
| sentence_state[key] = args[keyi] |
| return sentence_state |
|
|
|
|
| def split_sentences(sentence, n=250): |
| """ |
| Splits a sentence by spaces into smaller sentences, each with a maximum length of n characters, |
| while preserving whitespace characters like new lines. |
| # 250 due to [!] Warning: The text length exceeds the character limit of 250 for language 'en', this might cause truncated audio. |
| """ |
| |
| words = re.split('(\s+)', sentence) |
| sentences = [] |
| current_sentence = [] |
| current_length = 0 |
|
|
| for word in words: |
| |
| if word == '': |
| continue |
|
|
| |
| if word.isspace(): |
| if word == '\n': |
| |
| sentences.append("".join(current_sentence)) |
| current_sentence = [] |
| current_length = 0 |
| else: |
| |
| current_sentence.append(word) |
| current_length += len(word) |
| else: |
| |
| if current_length + len(word) > n: |
| if current_sentence: |
| sentences.append("".join(current_sentence)) |
| current_sentence = [word] |
| current_length = len(word) |
| else: |
| |
| sentences.append(word) |
| current_length = 0 |
| else: |
| current_sentence.append(word) |
| current_length += len(word) |
|
|
| |
| if current_sentence: |
| sentences.append("".join(current_sentence)) |
|
|
| return sentences |
|
|
|
|
| def _get_sentences(response, verbose=False, min_start=15, max_length=250): |
| |
| import nltk |
| |
| sentences = nltk.sent_tokenize(response[min_start:]) |
| |
| sentences = flatten_list([split_sentences(x, max_length) for x in sentences]) |
| |
| sentences = [x for x in sentences if x.strip()] |
| |
| if sentences and min_start > 0: |
| sentences[0] = response[:min_start] + sentences[0] |
| elif min_start > 0: |
| sentences.append(response[:min_start]) |
|
|
| return sentences |
|
|
|
|
| def get_sentence(response, sentence_state, is_final=False, verbose=False): |
| |
| sentence_list, index = unpack_state(sentence_state) |
| sentences = _get_sentences(response[index:], min_start=15 if index == 0 else 0, verbose=verbose) |
|
|
| if len(sentences) >= 2: |
| |
| |
| index_delta = response[index:].index(sentences[0]) |
| index += index_delta + len(sentences[0]) |
| sentence_list.append(sentences[0]) |
| |
| cleaned_sentence = clean_sentence(sentences[0], verbose=verbose) |
| return cleaned_sentence, pack_state(sentence_state, sentence_list, index), False |
| elif is_final: |
| |
| cleaned_sentence = clean_sentence(' '.join(sentences), verbose=verbose) |
| sentence_list.append(' '.join(sentences)) |
| return cleaned_sentence, pack_state(sentence_state, sentence_list, index), True |
| else: |
| return None, pack_state(sentence_state, sentence_list, index), True |
|
|
|
|
| def clean_sentence(sentence, verbose=False): |
| if sentence is None or len(sentence) == 0: |
| if verbose: |
| print("empty sentence") |
| return '' |
|
|
| |
| sentence = re.sub("```.*?```", "", sentence, flags=re.DOTALL) |
| sentence = re.sub("`.*?`", "", sentence, flags=re.DOTALL) |
| sentence = re.sub("\(.*?\)", "", sentence, flags=re.DOTALL) |
|
|
| |
| sentence = sentence.replace("```", "") |
| sentence = sentence.replace("...", " ") |
| sentence = sentence.replace("(", " ") |
| sentence = sentence.replace(")", " ") |
|
|
| sentence = sentence.replace("Dr. ", "Doctor ") |
| sentence = sentence.replace(" w/ ", " with ") |
|
|
| sentence = sentence.replace('H2O.ai', "aych two oh ae eye.") |
| sentence = sentence.replace('H2O.AI', "aych two oh ae eye.") |
| sentence = sentence.replace('h2o.ai', "aych two oh ae eye.") |
| sentence = sentence.replace('h2o.ai', "aych two oh ae eye.") |
|
|
| |
| if have_emoji: |
| import emoji |
| sentence = ''.join([x for x in sentence if not emoji.is_emoji(x)]) |
|
|
| |
| sentence = re.sub(r'(\d+)\.(\d+)', r"\1 dot \2", sentence) |
|
|
| |
| sentence = re.sub("([^\x00-\x7F]|\w)(\.|\。|\?|\!)", r"\1\2", sentence) |
|
|
| sentence = sentence.strip() |
|
|
| if sentence.startswith('. ') or sentence.startswith('? ') or sentence.startswith('! ') or sentence.startswith(', '): |
| sentence = sentence[2:] |
| if sentence.startswith('.') or sentence.startswith('?') or sentence.startswith('!') or sentence.startswith(','): |
| sentence = sentence[1:] |
|
|
| if sentence == '1.': |
| sentence = 'One' |
| if sentence == '2.': |
| sentence = 'Two' |
| if sentence == '3.': |
| sentence = 'Three' |
| if sentence == '4.': |
| sentence = 'Four' |
| if sentence == '5.': |
| sentence = 'Five' |
| if sentence == '6.': |
| sentence = 'Six' |
| if sentence == '7.': |
| sentence = 'Seven' |
| if sentence == '8.': |
| sentence = 'Eight' |
| if sentence == '9.': |
| sentence = 'Nine' |
| if sentence == '10.': |
| sentence = 'Ten' |
|
|
| if len(sentence) == 0: |
| if verbose: |
| print("EMPTY SENTENCE after processing") |
| return '' |
|
|
| if verbose: |
| print("Sentence for speech: %s" % sentence) |
|
|
| return sentence |
|
|
|
|
| def detect_language(prompt, supported_languages, verbose=False): |
| if not have_langid: |
| |
| return "en" |
|
|
| import langid |
| |
| if len(prompt) > 15: |
| language_predicted = langid.classify(prompt)[0].strip() |
| if language_predicted == "zh": |
| |
| language_predicted = "zh-cn" |
|
|
| if language_predicted not in supported_languages: |
| print(f"Detected a language not supported by xtts :{language_predicted}, switching to english for now") |
| language = "en" |
| else: |
| language = language_predicted |
| if verbose: |
| print(f"Language: Predicted sentence language:{language_predicted} , using language for xtts:{language}") |
| else: |
| |
| language = "en" |
| if verbose: |
| print(f"Language: Prompt is short or autodetect language disabled using english for xtts") |
|
|
| return language |
|
|