Spaces:

dlmn
/

SakecAI

Sleeping

dlmn commited on Oct 7, 2023

Commit

489b056

1 Parent(s): 63c7847

created app.py

created
![download.jpg](https://cdn-uploads.huggingface.co/production/uploads/651cfd6f0cd82f89cc3e2647/k6YXj54-kPv6otdcP4mfW.jpeg)

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import streamlit as st
+from transformers import pipeline
+import torchaudio
+import tempfile
+import os
+import torch
+# Create a Streamlit app title
+st.title("ASR with Hugging Face Whisper")
+# Load the ASR model
+asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2",
+               device=0 if torch.cuda.is_available() else "cpu")
+# Create a file uploader widget
+uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)")
+# Check if an audio file is uploaded
+if uploaded_audio:
+    # Read the uploaded audio file
+    audio_data, sample_rate = torchaudio.load(uploaded_audio)
+    # Perform ASR on the uploaded audio
+    with st.spinner("Performing ASR..."):
+        transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate)
+    # Display the ASR result
+    st.subheader("Transcription:")
+    for idx, transcription in enumerate(transcriptions):
+        st.write(f"Segment {idx + 1}: {transcription['text']}")
+# Provide instructions
+st.write("Instructions:")
+st.write("1. Upload an audio file in WAV or MP3 format.")
+st.write("2. Click the 'Perform ASR' button to transcribe the audio.")
+# Add a sample audio file for testing (optional)
+st.write("Sample Audio for Testing:")
+sample_audio = "Wave_files_demos_Welcome.wav"
+st.audio(sample_audio, format="audio/wav")
+# Define the path to the sample audio file
+sample_audio_path = os.path.join(os.getcwd(), sample_audio)
+# Add a button to transcribe the sample audio (optional)
+if st.button("Transcribe Sample Audio"):
+    # Read the sample audio file
+    sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path)
+    # Perform ASR on the sample audio
+    with st.spinner("Performing ASR..."):
+        sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate)
+    # Display the ASR result for the sample audio
+    st.subheader("Transcription (Sample Audio):")
+    for idx, transcription in enumerate(sample_transcriptions):
+        st.write(f"Segment {idx + 1}: {transcription['text']}")