| from pathlib import Path |
| import numpy as np |
| from lib.utils import Timer |
| from s2ts import S2TS |
| from s2ts import TaskExecInfo as CTaskExecInfo |
|
|
| MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8") |
|
|
| class WhisperOv: |
| def __init__(self, model_dir=MODEL_DIR): |
| with Timer("load Whisper"): |
| self.instance = S2TS() |
| ret = self.instance.start_asr_genai("en", str(model_dir), False, "") |
| print(f"model load {'success' if ret else 'failed'}") |
| self._warm_up() |
| def _warm_up(self): |
| fake_audio = np.random.randn(16000).astype(np.float32) |
| self.transcribe(fake_audio, "en") |
|
|
| def transcribe(self, audio: np.ndarray, language): |
| task_info = CTaskExecInfo() |
| task_info.audio_data = audio.tolist() |
| task_info.audio_language = language |
|
|
| with Timer("Whisper inference") as t: |
| self.instance.put_asr(task_info) |
| res: CTaskExecInfo = self.instance.get_asr(0) |
| return "".join(word.text for word in res.words), t.duration |
|
|
|
|
|
|