| import os |
|
|
| import pytest |
| from tests.utils import wrap_test_forked |
| from src.tts_sentence_parsing import init_sentence_state |
| from tests.test_sentence_parsing import bot_list |
|
|
|
|
| @pytest.mark.audio |
| @wrap_test_forked |
| def test_sentence_to_wave(): |
| os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
| from src.tts_coqui import sentence_to_wave, get_xtt, get_latent, get_role_to_wave_map |
|
|
| chatbot_role = "Female AI Assistant" |
| sentence = "I am an AI assistant. I can help you with any tasks." |
| |
| tts_speed = 1.0 |
| model, supported_languages = get_xtt() |
| latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
| generated_speech = sentence_to_wave(sentence, |
| supported_languages, |
| tts_speed, |
| latent=latent, |
| model=model, |
| return_as_byte=False, |
| return_nonbyte_as_file=True, |
| return_gradio=False) |
| print(generated_speech, flush=True) |
|
|
| |
| import wave |
| with wave.open(generated_speech, mode='rb') as f: |
| pass |
|
|
|
|
| @pytest.mark.audio |
| @wrap_test_forked |
| def test_generate_speech(): |
| os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
| from src.tts_coqui import generate_speech, get_xtt, get_latent, get_role_to_wave_map |
|
|
| chatbot_role = "Female AI Assistant" |
| model, supported_languages = get_xtt() |
| latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
|
|
| response = 'I am an AI assistant. What do you want from me? I am very busy.' |
| for char in response: |
| generate_speech(char, model=model, supported_languages=supported_languages, latent=latent) |
|
|
|
|
| @pytest.mark.audio |
| @wrap_test_forked |
| def test_full_generate_speech(): |
| os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
| from src.tts_coqui import generate_speech, get_xtt, get_latent, get_role_to_wave_map |
| bot = 'I am an AI assistant. What do you want from me? I am very busy.' |
|
|
| def response_gen(): |
| for word1 in bot.split(' '): |
| yield word1 |
|
|
| chatbot_role = "Female AI Assistant" |
| model, supported_languages = get_xtt() |
| latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
|
|
| response = "" |
| sentence_state = init_sentence_state() |
|
|
| sentences = [] |
| audios = [] |
| sentences_expected = ['I am an AI assistant.', 'What do you want from me?', 'I am very busy.'] |
| for word in response_gen(): |
| response += word + ' ' |
| audio, sentence, sentence_state = \ |
| generate_speech(response, |
| model=model, |
| supported_languages=supported_languages, |
| latent=latent, |
| sentence_state=sentence_state, |
| return_as_byte=False, |
| return_nonbyte_as_file=True, |
| return_gradio=False, |
| is_final=False, verbose=True) |
| if sentence is not None: |
| print(sentence) |
| sentences.append(sentence) |
| if audio is not None: |
| audios.append(audio) |
| audio, sentence, sentence_state = \ |
| generate_speech(response, |
| model=model, |
| supported_languages=supported_languages, |
| latent=latent, |
| sentence_state=sentence_state, |
| return_as_byte=False, |
| return_nonbyte_as_file=True, |
| return_gradio=False, |
| is_final=True, verbose=True) |
| if sentence is not None: |
| print(sentence) |
| sentences.append(sentence) |
| if audio is not None: |
| audios.append(audio) |
| assert sentences == sentences_expected |
| assert len(sentences) == len(audios) |
| print(audios) |
|
|
|
|
| @pytest.mark.audio |
| @wrap_test_forked |
| @pytest.mark.parametrize("bot, sentences_expected", bot_list) |
| def test_predict_from_text(bot, sentences_expected): |
| speeches = [] |
| from src.tts import get_tts_model, get_speakers |
| processor, model, vocoder = get_tts_model() |
| speaker = get_speakers()[0] |
| tts_speed = 1.0 |
|
|
| from src.tts import predict_from_text |
| for audio in predict_from_text(bot, speaker, tts_speed, |
| processor=processor, model=model, vocoder=vocoder, |
| return_as_byte=False, |
| verbose=True): |
| if audio[1].shape[0] > 0: |
| speeches.append(audio) |
| assert len(speeches) == len(sentences_expected) |
|
|