Spaces:
Sleeping
Sleeping
keshavgautam03 commited on
Commit Β·
0d27fe1
1
Parent(s): 1e81b0d
Add mic input, remove How It Works section
Browse files
app.py
CHANGED
|
@@ -35,17 +35,27 @@ st.sidebar.markdown("""
|
|
| 35 |
- Pronunciation accuracy
|
| 36 |
""")
|
| 37 |
|
| 38 |
-
# ββ
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
|
| 50 |
if st.button("Analyze Fluency", type="primary"):
|
| 51 |
# ββ Step 1: VAD ββ
|
|
@@ -210,13 +220,4 @@ if uploaded_file is not None:
|
|
| 210 |
st.dataframe(feature_df, use_container_width=True)
|
| 211 |
|
| 212 |
else:
|
| 213 |
-
st.info("Upload a .wav, .mp3, or .m4a audio file to begin analysis.")
|
| 214 |
-
st.markdown("""
|
| 215 |
-
### How it works
|
| 216 |
-
1. **Voice Activity Detection** β identifies speech vs silence segments
|
| 217 |
-
2. **Transcription** β WhisperX produces word-level aligned transcript
|
| 218 |
-
3. **Pause Classification** β each pause classified as boundary or mid-clause
|
| 219 |
-
4. **Word-Level Analysis** β confidence, filled pauses, articulation rate
|
| 220 |
-
5. **Syntactic Analysis** β POS-tagged pause context (content vs function words)
|
| 221 |
-
6. **Scoring** β 6 dimensions combined into fluency percentile
|
| 222 |
-
""")
|
|
|
|
| 35 |
- Pronunciation accuracy
|
| 36 |
""")
|
| 37 |
|
| 38 |
+
# ββ Input ββ
|
| 39 |
+
input_method = st.radio("Choose input method", ["Upload File", "Record from Mic"], horizontal=True)
|
| 40 |
+
|
| 41 |
+
audio_path = None
|
| 42 |
+
if input_method == "Upload File":
|
| 43 |
+
uploaded_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "m4a", "ogg", "flac"])
|
| 44 |
+
if uploaded_file is not None:
|
| 45 |
+
suffix = Path(uploaded_file.name).suffix
|
| 46 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
| 47 |
+
tmp.write(uploaded_file.read())
|
| 48 |
+
audio_path = tmp.name
|
| 49 |
+
st.audio(uploaded_file, format=f"audio/{suffix.strip('.')}")
|
| 50 |
+
else:
|
| 51 |
+
mic_audio = st.audio_input("Record audio")
|
| 52 |
+
if mic_audio is not None:
|
| 53 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
| 54 |
+
tmp.write(mic_audio.read())
|
| 55 |
+
audio_path = tmp.name
|
| 56 |
+
st.audio(mic_audio, format="audio/wav")
|
| 57 |
|
| 58 |
+
if audio_path is not None:
|
| 59 |
|
| 60 |
if st.button("Analyze Fluency", type="primary"):
|
| 61 |
# ββ Step 1: VAD ββ
|
|
|
|
| 220 |
st.dataframe(feature_df, use_container_width=True)
|
| 221 |
|
| 222 |
else:
|
| 223 |
+
st.info("Upload a .wav, .mp3, or .m4a audio file, or record from your microphone to begin analysis.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|