| from time import time |
| import soundfile as sf |
| from misaki import en, espeak, zh |
| from kokoro_onnx import Kokoro |
|
|
|
|
|
|
| def run_en(): |
| |
| fallback = espeak.EspeakFallback(british=False) |
| g2p = en.G2P(trf=False, british=False, fallback=fallback) |
|
|
| models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/" |
| |
| kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin") |
|
|
| texts = [ |
| "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.", |
| "For example, the geology and terrain along the railway line.", |
| " When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section." |
| ] |
| for index, text in enumerate(texts): |
| |
| |
| phonemes, _ = g2p(text) |
|
|
| |
| start = time() |
| samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True) |
| end = time() |
| time_cost = end - start |
| print(f"time cost: {time_cost} for text: {text}") |
| |
| sf.write(f"audio{index}.wav", samples, sample_rate) |
| print(f"Created audio{index}.wav") |
|
|
| def run_zh(): |
| |
| |
| g2p = zh.ZHG2P() |
|
|
| models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro" |
| |
| kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json") |
|
|
| texts = [ |
| "千里之行,始于足下。", |
| "我想听你唱首歌", |
| "窗前明月光,疑是地上霜。举头望明月,低头思故乡。" |
| ] |
| for index, text in enumerate(texts): |
| phonemes, _ = g2p(text) |
|
|
| |
| start = time() |
| samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0) |
| end = time() |
| time_cost = end - start |
| print(f"time cost: {time_cost} for text: {text}") |
| |
| sf.write(f"audio{index}.wav", samples, sample_rate) |
| print(f"Created audio{index}.wav") |
|
|
| if __name__ == '__main__': |
| run_zh() |