Spaces:

picard47at
/

classification

Sleeping

picard.tseng

update

7385733 12 months ago

9.02 kB

	import streamlit as st
	from streamlit_mic_recorder import mic_recorder
	#import whisper
	import tempfile
	import os
	from pydub import AudioSegment
	from faster_whisper import WhisperModel
	# Load whisper model
	@st.cache_resource
	def load_model():
	#return whisper.load_model("small")
	return WhisperModel("large-v3", device="auto", compute_type="int8")

	whisper_model = load_model()

	st.title("🗣 中文語音識別 (Whisper + Mic Recorder)")

	mode = st.radio("選擇輸入方式", ["🎤 使用麥克風錄音", "📁 上傳本地音檔","✍️ 手動文字輸入"], horizontal=True)

	def prompt_switch(index, input_text):
	print("prompt_switch")
	prompts = {
	"Qwen/Qwen2.5-7B-Instruct-Turbo": """
	You are an assistant for intent classification.
	Your task is to classify a given user input into one of the following two categories:
	1."Reservation": user input is related to or imply a a restaurant reservation.
	2."unrelated": user input is anything else.
	Your response should be in JSON format either
	{{"result": "Reservation"}} or {{"result": "Others"}}.
	If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
	If the user input is anything else, return {{"result": "Others"}}.

	Here is the user input: {input}
	""".strip(),
	"Qwen/Qwen2.5-Coder-32B-Instruct": """
	You are an assistant for intent classification.
	Your task is to classify a given user input into one of the following two categories:
	"Reservation": user input is related to or imply a a restaurant reservation.
	"unrelated": user input is anything else.
	Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
	Here is the user input: {input}
	""".strip(),
	"google/gemma-2b-it": """
	You are an assistant for intent classification.
	Your task is to classify a given user input into one of the following two categories:
	1."Reservation": user input is related to or imply a a restaurant reservation.
	2."unrelated": user input is anything else.
	Your response should be in JSON format either
	{{"result": "Reservation"}} or {{"result": "Others"}}.
	If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
	If the user input is anything else, return {{"result": "Others"}}.

	Here is the user input: {input}
	""".strip(),
	"google/gemma-2-9b-it": """
	You are an assistant for intent classification.
	Your task is to classify a given user input into one of the following two categories:
	1."Reservation": user input is related to or imply a a restaurant reservation.
	2."unrelated": user input is anything else.
	Your response should be in JSON format either
	{{"result": "Reservation"}} or {{"result": "Others"}}.
	If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
	If the user input is anything else, return {{"result": "Others"}}.

	Here is the user input: {input}
	""".strip(),
	"google/gemma-2-27b-it": """
	You are an assistant for intent classification.
	Your task is to classify a given user input into one of the following two categories:
	1."Reservation": user input is related to or imply a a restaurant reservation.
	2."unrelated": user input is anything else.
	Your response should be in JSON format either
	{{"result": "Reservation"}} or {{"result": "Others"}}.
	If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
	If the user input is anything else, return {{"result": "Others"}}.

	Here is the user input: {input}
	""".strip()
	}
	prompt = prompts[index].format(input=input_text)

	return prompt

	model_option = st.selectbox(
	"你要選擇哪一個模型?",
	("Qwen/Qwen2.5-7B-Instruct-Turbo", "Qwen/Qwen2.5-Coder-32B-Instruct","google/gemma-2b-it", "google/gemma-2-9b-it", "google/gemma-2-27b-it")
	)

	st.write("你選擇的模型:", model_option)
	# Record audio from browser
	# audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)

	import os
	import outlines
	import os, termcolor
	from termcolor import cprint, colored
	from outlines.models import openai
	#from outlines.generate import choice

	# '''
	# prompt_messages_q32b = f"""
	# You are an assistant for intent classification.
	# Your task is to classify a given user input into one of the following two categories:
	# "Reservation": user input is related to or imply a a restaurant reservation.
	# "unrelated": user input is anything else.
	# Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
	# Here is the user input: {input}
	# """.strip()
	# prompt_messages_q7b = f"""
	# You are an assistant for intent classification.
	# Your task is to classify a given user input into one of the following two categories:
	# 1."Reservation": user input is related to or imply a a restaurant reservation.
	# 2."unrelated": user input is anything else.
	# Your response should be in JSON format either
	# {{"result": "Reservation"}} or {{"result": "Others"}}.
	# If the user input is related to restaurant reservation, return {{"result": "Reservation"}};
	# If the user input is anything else, return {{"result": "Others"}}.

	# Here is the user input: {input}
	# """.strip()
	# '''


	def clssification( input):
	st.write("🧠 LLM辨識意圖中..")
	labels = ["Reservation", "unrelated"]

	model = openai(
	#"Qwen/Qwen2.5-Coder-32B-Instruct",
	#"Qwen/Qwen2.5-7B-Instruct-Turbo",
	#"google/gemma-2b-it",
	model_option,
	api_key=os.environ["TOGETHER_API_KEY"],
	base_url="https://api.together.xyz/v1"
	)


	generator = outlines.generate.choice(model, labels)

	prompt_message = prompt_switch(model_option, input)
	st.write(prompt_message)
	answer = generator(prompt_message)



	return answer

	def convert_audio_to_wav(audio_bytes, target_sample_rate=16000):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_input:
	temp_input.write(audio_bytes)
	temp_input_path = temp_input.name

	audio = AudioSegment.from_file(temp_input_path)
	audio = audio.set_channels(1).set_frame_rate(target_sample_rate)

	converted_path = temp_input_path.replace(".wav", "_converted.wav")
	audio.export(converted_path, format="wav")
	os.remove(temp_input_path)
	return converted_path

	def transcribe_audio(wav_path):
	st.write("🧠 Whisper 正在識別語音..")
	#result = whisper_model.transcribe(wav_path, language="zh")
	#st.text_area("📜 轉寫結果", result["text"], height=200)
	segments, info = whisper_model.transcribe(wav_path, language="zh")
	result_text = "".join([seg.text for seg in segments])
	st.text_area("📜 轉寫結果", result_text, height=200)
	os.remove(wav_path)
	intent_classification(result_text)

	def intent_classification(input_text):
	st.write("🧠 意圖識別 ")
	intent=clssification(input_text)
	st.write(intent)

	# --- Mode: Microphone ---
	if mode == "🎤 使用麥克風錄音":
	audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)

	if audio_data:
	st.audio(audio_data["bytes"], format="audio/wav")
	wav_path = convert_audio_to_wav(audio_data["bytes"])
	transcribe_audio(wav_path)

	# --- Mode: File Upload ---
	elif mode == "📁 上傳本地音檔":
	uploaded_file = st.file_uploader("上傳音頻文件 (支持 wav, mp3, m4a 等)", type=["wav", "mp3", "m4a", "ogg", "flac"])

	if uploaded_file is not None:
	st.audio(uploaded_file, format="audio/wav")
	wav_path = convert_audio_to_wav(uploaded_file.read())
	transcribe_audio(wav_path)
	elif mode == "✍️ 手動文字輸入":
	manual_text = st.text_area("請輸入文字", height=200, key="manual_input")
	if st.button("確認輸入"):
	st.success("✅ 已接收輸入內容！")
	st.text_area("📜 輸入內容", manual_text, height=200, key="manual_output")
	intent_classification(manual_text)

	#==============