| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
|
|
| import torch |
| import torchaudio |
| import time |
| from tortoise.api import TextToSpeech |
| from tortoise.utils.audio import load_voices |
| import humanize |
| import datetime as dt |
|
|
| def generate_speech(path_id, outfile, voice, text, speed="standard"): |
| tts = TextToSpeech(kv_cache=True, half=True) |
| selected_voices = voice.split(',') |
| for k, selected_voice in enumerate(selected_voices): |
| if '&' in selected_voice: |
| voice_sel = selected_voice.split('&') |
| else: |
| voice_sel = [selected_voice] |
| voice_samples, conditioning_latents = load_voices(voice_sel) |
|
|
| gen, dbg_state = tts.tts_with_preset(text, k=1, voice_samples=voice_samples, |
| conditioning_latents=conditioning_latents, |
| return_deterministic_state=True, |
| preset=speed) |
| if isinstance(gen, list): |
| for j, g in enumerate(gen): |
| torchaudio.save(os.path.join("temp", path_id, outfile), g.squeeze(0).cpu(), 24000) |
| else: |
| torchaudio.save(os.path.join("temp", path_id, outfile), gen.squeeze(0).cpu(), 24000) |
| |
|
|
|
|
| if __name__ == '__main__': |
| path_id = os.path.join("temp", "audio", str(int(time.time()))) |
| os.makedirs(path_id, exist_ok=True) |
| tstart = time.time() |
| message = """Apple today confirmed that it will be permanently closing its Infinite Loop retail store in |
| Cupertino, California on January 20. Infinite Loop served as Apple's headquarters between the mid-1990s and |
| 2017, when its current Apple Park headquarters opened a few miles away.""" |
| generate_speech(os.path.join("audio", str(int(time.time()))), "christmas.wav", "train_grace", |
| message, "ultra_fast") |
| |
| |
| |
| |
| print("total time:", humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - tstart)))) |