yujuanqin
/

TestTranslator

Model card Files Files and versions

TestTranslator / lib /models /mac /whisper.py

yujuanqin's picture

support test_models on Intel

b295d06 5 months ago

history blame contribute delete

2.32 kB

	from pywhispercpp.model import Model
	import soundfile
	import numpy as np
	from logging import getLogger
	from pathlib import Path

	from lib.utils import Timer, read_audio

	logger = getLogger(__name__)

	MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
	WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
	WHISPER_PROMPT_EN = "" # "The following is an English sentence."

	class WhisperCPP:
	def __init__(self, model_dir=MODEL_DIR, source_lange: str = 'en') -> None:
	whisper_model = 'large-v3-turbo-q5_0'
	with Timer("load whisper"):
	self.model = Model(
	model=whisper_model,
	models_dir=str(model_dir),
	print_realtime=False,
	print_progress=False,
	print_timestamps=False,
	translate=False,
	# beam_search=1,
	temperature=0.,
	no_context=True
	)
	self._warmup()

	def _warmup(self):
	fake_audio = np.random.randn(16000).astype(np.float32)
	self.model.transcribe(fake_audio, print_progress=False)

	@staticmethod
	def config_language(language):
	if language == "zh":
	return WHISPER_PROMPT_ZH
	elif language == "en":
	return WHISPER_PROMPT_EN
	raise ValueError(f"Unsupported language : {language}")

	def transcribe(self, audio: np.ndarray, language):
	prompt = self.config_language(language)
	try:
	with Timer("whisper inference") as t:
	segments = self.model.transcribe(
	audio,
	initial_prompt=prompt,
	language=language,
	# token_timestamps=True,
	split_on_word=True,
	# max_len=max_len
	)
	text = "".join([s.text for s in segments])
	return text, t.duration
	except Exception as e:
	logger.error(e)
	return []

	if __name__ == '__main__':
	from lib.utils import read_audio
	whisper = WhisperCPP()
	audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
	text, time_cost = whisper.transcribe(audio, "zh")
	print(text)
	print(time_cost)