Spaces:

picard47at
/

classification

Sleeping

App Files Files Community

picard.tseng commited on Apr 15, 2025

Commit

d1c7692

1 Parent(s): 8bd31da

First commit

Browse files

Files changed (2) hide show

app.py +101 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import streamlit as st
+from streamlit_mic_recorder import mic_recorder
+#import whisper
+import tempfile
+import os
+from pydub import AudioSegment
+from faster_whisper import WhisperModel
+# Load whisper model
+@st.cache_resource
+def load_model():
+    #return whisper.load_model("small")
+    return WhisperModel("large-v3", device="auto", compute_type="int8")
+whisper_model = load_model()
+st.title("🗣 中文語音識別 (Whisper + Mic Recorder)")
+mode = st.radio("選擇輸入方式", ["🎤 使用麥克風錄音", "📁 上傳本地音檔"], horizontal=True)
+# Record audio from browser
+# audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)
+import os
+import outlines
+import os, termcolor
+from termcolor import cprint, colored
+from outlines.models import openai
+#from outlines.generate import choice
+TOGETHER_API_KEY = ""
+model = openai(
+    #"Qwen/Qwen2.5-Coder-32B-Instruct",
+    #"Qwen/Qwen2.5-7B-Instruct-Turbo",
+    "google/gemma-2b-it",
+    api_key=os.environ["TOGETHER_API_KEY"],
+    base_url="https://api.together.xyz/v1"
+    )
+def clssification(model, input):
+    labels = ["Reservation", "unrelated"]
+    prompt_messages = f"""
+            You are an assistant for intent classification.
+            Your task is to classify a given user input into one of the following two categories:
+            "Reservation": user input is related to or imply a a restaurant reservation.
+            "unrelated": user input is anything else.
+            Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
+            Here is the user input: {input}
+            """.strip()
+    generator = outlines.generate.choice(model, labels)
+    #cprint(prompt_messages,"blue")
+    answer = generator(prompt_messages)
+    return answer
+def convert_audio_to_wav(audio_bytes, target_sample_rate=16000):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_input:
+        temp_input.write(audio_bytes)
+        temp_input_path = temp_input.name
+    audio = AudioSegment.from_file(temp_input_path)
+    audio = audio.set_channels(1).set_frame_rate(target_sample_rate)
+    converted_path = temp_input_path.replace(".wav", "_converted.wav")
+    audio.export(converted_path, format="wav")
+    os.remove(temp_input_path)
+    return converted_path
+def transcribe_audio(wav_path):
+    st.write("🧠 Whisper 正在識別語音..")
+    #result = whisper_model.transcribe(wav_path, language="zh")
+    #st.text_area("📜 轉寫結果", result["text"], height=200)
+    segments, info = whisper_model.transcribe(wav_path, language="zh")
+    result_text = "".join([seg.text for seg in segments])
+    st.text_area("📜 轉寫結果", result_text, height=200)
+    os.remove(wav_path)
+    st.write("🧠 意圖識別 ")
+    intent=clssification(model, result_text)
+    st.write(intent)
+# --- Mode: Microphone ---
+if mode == "🎤 使用麥克風錄音":
+    audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)
+    if audio_data:
+        st.audio(audio_data["bytes"], format="audio/wav")
+        wav_path = convert_audio_to_wav(audio_data["bytes"])
+        transcribe_audio(wav_path)
+# --- Mode: File Upload ---
+elif mode == "📁 上傳本地音檔":
+    uploaded_file = st.file_uploader("上傳音頻文件 (支持 wav, mp3, m4a 等)", type=["wav", "mp3", "m4a", "ogg", "flac"])
+    if uploaded_file is not None:
+        st.audio(uploaded_file, format="audio/wav")
+        wav_path = convert_audio_to_wav(uploaded_file.read())
+        transcribe_audio(wav_path)
+#==============
+    st.write(answer)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+streamlit
+pandas
+openai-whisper
+numpy
+gradio
+yt_dlp
+transformers
+streamlit-mic-recorder
+pydub
+faster_whisper
+together
+outlines
+termcolor
+openai