picard.tseng commited on
Commit
d1c7692
·
1 Parent(s): 8bd31da

First commit

Browse files
Files changed (2) hide show
  1. app.py +101 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_mic_recorder import mic_recorder
3
+ #import whisper
4
+ import tempfile
5
+ import os
6
+ from pydub import AudioSegment
7
+ from faster_whisper import WhisperModel
8
+ # Load whisper model
9
+ @st.cache_resource
10
+ def load_model():
11
+ #return whisper.load_model("small")
12
+ return WhisperModel("large-v3", device="auto", compute_type="int8")
13
+
14
+ whisper_model = load_model()
15
+
16
+ st.title("🗣 中文語音識別 (Whisper + Mic Recorder)")
17
+
18
+ mode = st.radio("選擇輸入方式", ["🎤 使用麥克風錄音", "📁 上傳本地音檔"], horizontal=True)
19
+
20
+ # Record audio from browser
21
+ # audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)
22
+
23
+ import os
24
+ import outlines
25
+ import os, termcolor
26
+ from termcolor import cprint, colored
27
+ from outlines.models import openai
28
+ #from outlines.generate import choice
29
+ TOGETHER_API_KEY = ""
30
+
31
+ model = openai(
32
+ #"Qwen/Qwen2.5-Coder-32B-Instruct",
33
+ #"Qwen/Qwen2.5-7B-Instruct-Turbo",
34
+ "google/gemma-2b-it",
35
+ api_key=os.environ["TOGETHER_API_KEY"],
36
+ base_url="https://api.together.xyz/v1"
37
+ )
38
+
39
+ def clssification(model, input):
40
+
41
+ labels = ["Reservation", "unrelated"]
42
+ prompt_messages = f"""
43
+ You are an assistant for intent classification.
44
+ Your task is to classify a given user input into one of the following two categories:
45
+ "Reservation": user input is related to or imply a a restaurant reservation.
46
+ "unrelated": user input is anything else.
47
+ Your response should be in JSON format either {{"result": "Reservation"}} or {{"result": "Others"}}.
48
+ Here is the user input: {input}
49
+ """.strip()
50
+
51
+ generator = outlines.generate.choice(model, labels)
52
+ #cprint(prompt_messages,"blue")
53
+ answer = generator(prompt_messages)
54
+ return answer
55
+
56
+ def convert_audio_to_wav(audio_bytes, target_sample_rate=16000):
57
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_input:
58
+ temp_input.write(audio_bytes)
59
+ temp_input_path = temp_input.name
60
+
61
+ audio = AudioSegment.from_file(temp_input_path)
62
+ audio = audio.set_channels(1).set_frame_rate(target_sample_rate)
63
+
64
+ converted_path = temp_input_path.replace(".wav", "_converted.wav")
65
+ audio.export(converted_path, format="wav")
66
+ os.remove(temp_input_path)
67
+ return converted_path
68
+
69
+ def transcribe_audio(wav_path):
70
+ st.write("🧠 Whisper 正在識別語音..")
71
+ #result = whisper_model.transcribe(wav_path, language="zh")
72
+ #st.text_area("📜 轉寫結果", result["text"], height=200)
73
+ segments, info = whisper_model.transcribe(wav_path, language="zh")
74
+ result_text = "".join([seg.text for seg in segments])
75
+ st.text_area("📜 轉寫結果", result_text, height=200)
76
+ os.remove(wav_path)
77
+ st.write("🧠 意圖識別 ")
78
+ intent=clssification(model, result_text)
79
+ st.write(intent)
80
+ # --- Mode: Microphone ---
81
+ if mode == "🎤 使用麥克風錄音":
82
+ audio_data = mic_recorder(start_prompt="🎤 點擊開始錄音", stop_prompt="⏹️ 停止錄音", just_once=True, use_container_width=True)
83
+
84
+ if audio_data:
85
+ st.audio(audio_data["bytes"], format="audio/wav")
86
+ wav_path = convert_audio_to_wav(audio_data["bytes"])
87
+ transcribe_audio(wav_path)
88
+
89
+ # --- Mode: File Upload ---
90
+ elif mode == "📁 上傳本地音檔":
91
+ uploaded_file = st.file_uploader("上傳音頻文件 (支持 wav, mp3, m4a 等)", type=["wav", "mp3", "m4a", "ogg", "flac"])
92
+
93
+ if uploaded_file is not None:
94
+ st.audio(uploaded_file, format="audio/wav")
95
+ wav_path = convert_audio_to_wav(uploaded_file.read())
96
+ transcribe_audio(wav_path)
97
+
98
+ #==============
99
+
100
+
101
+ st.write(answer)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ openai-whisper
4
+ numpy
5
+ gradio
6
+ yt_dlp
7
+ transformers
8
+ streamlit-mic-recorder
9
+ pydub
10
+ faster_whisper
11
+ together
12
+ outlines
13
+ termcolor
14
+ openai