| from pathlib import Path |
| from time import time |
| import os |
|
|
| import soundfile as sf |
| from misaki import zh |
| import onnxruntime |
|
|
| from kokoro_onnx import Kokoro |
|
|
| |
| |
|
|
| def create_session(model_path): |
| |
| providers = onnxruntime.get_available_providers() |
| providers = providers[1:2] |
| print(f"Available onnx runtime providers: {providers}") |
|
|
|
|
| |
| sess_options = onnxruntime.SessionOptions() |
| cpu_count = os.cpu_count() // 2 |
| print(f"Setting threads to CPU cores count: {cpu_count}") |
| |
| session = onnxruntime.InferenceSession( |
| model_path, providers=providers, sess_options=sess_options |
| ) |
| return session |
|
|
| model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro") |
| model_path = str(model_folder/"kokoro-quant.onnx") |
| voice_model_path = str(model_folder/"voices-v1.0.bin") |
| vocab_config = str(model_folder/"zh_config.json") |
|
|
| texts = [ |
| "千里之行,始于足下。", |
| "我想听你唱首歌", |
| "窗前明月光,疑是地上霜。举头望明月,低头思故乡。" |
| ] |
| voice = "zf_xiaoyi" |
| session = create_session(model_path) |
| model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config) |
| g2p = zh.ZHG2P() |
| for i in range(5): |
| for index, text in enumerate(texts): |
| phonemes, _ = g2p(text) |
| start = time() |
| samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True) |
| end = time() |
| time_cost = end - start |
| print(f"time cost: {time_cost} for text: {text}") |
| sf.write(f"audio_{index}.wav", samples, sample_rate) |
| print(f"Created audio_{index}.wav") |