Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from streamlit_mic_recorder import mic_recorder | |
| #import whisper | |
| import tempfile | |
| import os | |
| from pydub import AudioSegment | |
| from faster_whisper import WhisperModel | |
| # Load whisper model | |
| def load_model(): | |
| #return whisper.load_model("small") | |
| return WhisperModel("large-v3", device="auto", compute_type="int8") | |
| whisper_model = load_model() | |
| st.title("🗣 中文語音識別 (Whisper + Mic Recorder)") | |
| mode = st.radio("選擇輸入方式", ["🎤 使用麥克風錄音", "📁 上傳本地音檔","✍️ 手動文字輸入"], horizontal=True) | |
| def prompt_switch(index, input_text): | |
| print("prompt_switch") | |
| prompts = { | |
| "Qwen/Qwen2.5-7B-Instruct-Turbo": """ | |
| You are an assistant for intent classification. | |
| Your task is to classify a given user input into one of the following two categories: | |
| 1."Reservation": user input is related to or imply a a restaurant reservation. | |
| 2."unrelated": user input is anything else. | |
| Your response should be in JSON format either | |
| {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| If the user input is related to restaurant reservation, return {{"result": "Reservation"}}; | |
| If the user input is anything else, return {{"result": "Others"}}. | |
| Here is the user input: {input} | |
| """.strip(), | |
| "Qwen/Qwen2.5-Coder-32B-Instruct": """ | |
| You are an assistant for intent classification. | |
| Your task is to classify a given user input into one of the following two categories: | |
| "Reservation": user input is related to or imply a a restaurant reservation. | |
| "unrelated": user input is anything else. | |
| Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| Here is the user input: {input} | |
| """.strip(), | |
| "google/gemma-2b-it": """ | |
| You are an assistant for intent classification. | |
| Your task is to classify a given user input into one of the following two categories: | |
| 1."Reservation": user input is related to or imply a a restaurant reservation. | |
| 2."unrelated": user input is anything else. | |
| Your response should be in JSON format either | |
| {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| If the user input is related to restaurant reservation, return {{"result": "Reservation"}}; | |
| If the user input is anything else, return {{"result": "Others"}}. | |
| Here is the user input: {input} | |
| """.strip(), | |
| "google/gemma-2-9b-it": """ | |
| You are an assistant for intent classification. | |
| Your task is to classify a given user input into one of the following two categories: | |
| 1."Reservation": user input is related to or imply a a restaurant reservation. | |
| 2."unrelated": user input is anything else. | |
| Your response should be in JSON format either | |
| {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| If the user input is related to restaurant reservation, return {{"result": "Reservation"}}; | |
| If the user input is anything else, return {{"result": "Others"}}. | |
| Here is the user input: {input} | |
| """.strip(), | |
| "google/gemma-2-27b-it": """ | |
| You are an assistant for intent classification. | |
| Your task is to classify a given user input into one of the following two categories: | |
| 1."Reservation": user input is related to or imply a a restaurant reservation. | |
| 2."unrelated": user input is anything else. | |
| Your response should be in JSON format either | |
| {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| If the user input is related to restaurant reservation, return {{"result": "Reservation"}}; | |
| If the user input is anything else, return {{"result": "Others"}}. | |
| Here is the user input: {input} | |
| """.strip() | |
| } | |
| prompt = prompts[index].format(input=input_text) | |
| return prompt | |
| model_option = st.selectbox( | |
| "你要選擇哪一個模型?", | |
| ("Qwen/Qwen2.5-7B-Instruct-Turbo", "Qwen/Qwen2.5-Coder-32B-Instruct","google/gemma-2b-it", "google/gemma-2-9b-it", "google/gemma-2-27b-it") | |
| ) | |
| st.write("你選擇的模型:", model_option) | |
| # Record audio from browser | |
| # audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True) | |
| import os | |
| import outlines | |
| import os, termcolor | |
| from termcolor import cprint, colored | |
| from outlines.models import openai | |
| #from outlines.generate import choice | |
| # ''' | |
| # prompt_messages_q32b = f""" | |
| # You are an assistant for intent classification. | |
| # Your task is to classify a given user input into one of the following two categories: | |
| # "Reservation": user input is related to or imply a a restaurant reservation. | |
| # "unrelated": user input is anything else. | |
| # Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| # Here is the user input: {input} | |
| # """.strip() | |
| # prompt_messages_q7b = f""" | |
| # You are an assistant for intent classification. | |
| # Your task is to classify a given user input into one of the following two categories: | |
| # 1."Reservation": user input is related to or imply a a restaurant reservation. | |
| # 2."unrelated": user input is anything else. | |
| # Your response should be in JSON format either | |
| # {{"result": "Reservation"}} or {{"result": "Others"}}. | |
| # If the user input is related to restaurant reservation, return {{"result": "Reservation"}}; | |
| # If the user input is anything else, return {{"result": "Others"}}. | |
| # Here is the user input: {input} | |
| # """.strip() | |
| # ''' | |
| def clssification( input): | |
| st.write("🧠 LLM辨識意圖中..") | |
| labels = ["Reservation", "unrelated"] | |
| model = openai( | |
| #"Qwen/Qwen2.5-Coder-32B-Instruct", | |
| #"Qwen/Qwen2.5-7B-Instruct-Turbo", | |
| #"google/gemma-2b-it", | |
| model_option, | |
| api_key=os.environ["TOGETHER_API_KEY"], | |
| base_url="https://api.together.xyz/v1" | |
| ) | |
| generator = outlines.generate.choice(model, labels) | |
| prompt_message = prompt_switch(model_option, input) | |
| st.write(prompt_message) | |
| answer = generator(prompt_message) | |
| return answer | |
| def convert_audio_to_wav(audio_bytes, target_sample_rate=16000): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_input: | |
| temp_input.write(audio_bytes) | |
| temp_input_path = temp_input.name | |
| audio = AudioSegment.from_file(temp_input_path) | |
| audio = audio.set_channels(1).set_frame_rate(target_sample_rate) | |
| converted_path = temp_input_path.replace(".wav", "_converted.wav") | |
| audio.export(converted_path, format="wav") | |
| os.remove(temp_input_path) | |
| return converted_path | |
| def transcribe_audio(wav_path): | |
| st.write("🧠 Whisper 正在識別語音..") | |
| #result = whisper_model.transcribe(wav_path, language="zh") | |
| #st.text_area("📜 轉寫結果", result["text"], height=200) | |
| segments, info = whisper_model.transcribe(wav_path, language="zh") | |
| result_text = "".join([seg.text for seg in segments]) | |
| st.text_area("📜 轉寫結果", result_text, height=200) | |
| os.remove(wav_path) | |
| intent_classification(result_text) | |
| def intent_classification(input_text): | |
| st.write("🧠 意圖識別 ") | |
| intent=clssification(input_text) | |
| st.write(intent) | |
| # --- Mode: Microphone --- | |
| if mode == "🎤 使用麥克風錄音": | |
| audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True) | |
| if audio_data: | |
| st.audio(audio_data["bytes"], format="audio/wav") | |
| wav_path = convert_audio_to_wav(audio_data["bytes"]) | |
| transcribe_audio(wav_path) | |
| # --- Mode: File Upload --- | |
| elif mode == "📁 上傳本地音檔": | |
| uploaded_file = st.file_uploader("上傳音頻文件 (支持 wav, mp3, m4a 等)", type=["wav", "mp3", "m4a", "ogg", "flac"]) | |
| if uploaded_file is not None: | |
| st.audio(uploaded_file, format="audio/wav") | |
| wav_path = convert_audio_to_wav(uploaded_file.read()) | |
| transcribe_audio(wav_path) | |
| elif mode == "✍️ 手動文字輸入": | |
| manual_text = st.text_area("請輸入文字", height=200, key="manual_input") | |
| if st.button("確認輸入"): | |
| st.success("✅ 已接收輸入內容!") | |
| st.text_area("📜 輸入內容", manual_text, height=200, key="manual_output") | |
| intent_classification(manual_text) | |
| #============== | |