audiotranscribe / app.py
madanyc's picture
Upload folder using huggingface_hub
96b3623 verified
import os
import re
import tempfile
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
asr_client = InferenceClient(provider="hf-inference")
llm_client = InferenceClient()
LLM_MODEL = "meta-llama/Llama-3.3-70B-Instruct"
def transcribe_and_summarize(audio):
if audio is None:
return "", ""
try:
text = asr_client.automatic_speech_recognition(audio, model="openai/whisper-large-v3-turbo")
transcript = text.text
except Exception as e:
return "", f"Transcription failed: {e}"
if len(transcript.split()) < 30:
return transcript, "Text is too short to summarize."
try:
result = llm_client.chat_completion(
model=LLM_MODEL,
messages=[
{"role": "system", "content": (
"You are a helpful assistant. Provide a concise summary of the following text, "
"followed by a section titled 'Action Items'. For each action item, identify the "
"owner (if mentioned) and any deadline. Format action items as a numbered list. "
"If no action items are found, write 'No action items found.' under that section."
)},
{"role": "user", "content": transcript},
],
max_tokens=512,
)
return transcript, result.choices[0].message.content
except Exception as e:
return transcript, f"Summarization failed: {e}"
def _generate_filename(summary):
if summary and summary.strip() and not summary.startswith(("No text", "Text is too short", "Summarization failed")):
slug = re.sub(r"[^a-zA-Z0-9 ]", "", summary[:60]).strip()
slug = re.sub(r"\s+", "_", slug).lower()
if slug:
return f"{slug}.txt"
return "transcript_output.txt"
def download_txt(transcript, summary):
parts = []
if transcript and transcript.strip():
parts.append("=== TRANSCRIPTION ===\n" + transcript)
if summary and summary.strip():
parts.append("=== SUMMARY & ACTION ITEMS ===\n" + summary)
if not parts:
return None
content = "\n\n".join(parts)
filename = _generate_filename(summary)
path = os.path.join(tempfile.gettempdir(), filename)
with open(path, "w", encoding="utf-8") as f:
f.write(content)
return path
with gr.Blocks(title="Audio Transcription & Summary") as demo:
gr.Markdown("# 🎙️ Audio Transcription & Summary")
gr.Markdown("Upload an audio file or record from your microphone, then transcribe and get a summary with action items.")
with gr.Row():
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio")
transcribe_summarize_btn = gr.Button("Transcribe & Summarize", variant="primary")
transcription_output = gr.Textbox(label="Transcription", lines=10)
summary_output = gr.Textbox(label="Summary & Action Items", lines=10)
download_file = gr.File(label="Download", interactive=False)
download_btn = gr.Button("Download as .txt")
transcribe_summarize_btn.click(
fn=transcribe_and_summarize,
inputs=audio_input,
outputs=[transcription_output, summary_output],
)
download_btn.click(
fn=download_txt,
inputs=[transcription_output, summary_output],
outputs=download_file,
)
demo.launch()