import os import re import tempfile import gradio as gr from dotenv import load_dotenv from huggingface_hub import InferenceClient load_dotenv() asr_client = InferenceClient(provider="hf-inference") llm_client = InferenceClient() LLM_MODEL = "meta-llama/Llama-3.3-70B-Instruct" def transcribe_and_summarize(audio): if audio is None: return "", "" try: text = asr_client.automatic_speech_recognition(audio, model="openai/whisper-large-v3-turbo") transcript = text.text except Exception as e: return "", f"Transcription failed: {e}" if len(transcript.split()) < 30: return transcript, "Text is too short to summarize." try: result = llm_client.chat_completion( model=LLM_MODEL, messages=[ {"role": "system", "content": ( "You are a helpful assistant. Provide a concise summary of the following text, " "followed by a section titled 'Action Items'. For each action item, identify the " "owner (if mentioned) and any deadline. Format action items as a numbered list. " "If no action items are found, write 'No action items found.' under that section." )}, {"role": "user", "content": transcript}, ], max_tokens=512, ) return transcript, result.choices[0].message.content except Exception as e: return transcript, f"Summarization failed: {e}" def _generate_filename(summary): if summary and summary.strip() and not summary.startswith(("No text", "Text is too short", "Summarization failed")): slug = re.sub(r"[^a-zA-Z0-9 ]", "", summary[:60]).strip() slug = re.sub(r"\s+", "_", slug).lower() if slug: return f"{slug}.txt" return "transcript_output.txt" def download_txt(transcript, summary): parts = [] if transcript and transcript.strip(): parts.append("=== TRANSCRIPTION ===\n" + transcript) if summary and summary.strip(): parts.append("=== SUMMARY & ACTION ITEMS ===\n" + summary) if not parts: return None content = "\n\n".join(parts) filename = _generate_filename(summary) path = os.path.join(tempfile.gettempdir(), filename) with open(path, "w", encoding="utf-8") as f: f.write(content) return path with gr.Blocks(title="Audio Transcription & Summary") as demo: gr.Markdown("# 🎙️ Audio Transcription & Summary") gr.Markdown("Upload an audio file or record from your microphone, then transcribe and get a summary with action items.") with gr.Row(): audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio") transcribe_summarize_btn = gr.Button("Transcribe & Summarize", variant="primary") transcription_output = gr.Textbox(label="Transcription", lines=10) summary_output = gr.Textbox(label="Summary & Action Items", lines=10) download_file = gr.File(label="Download", interactive=False) download_btn = gr.Button("Download as .txt") transcribe_summarize_btn.click( fn=transcribe_and_summarize, inputs=audio_input, outputs=[transcription_output, summary_output], ) download_btn.click( fn=download_txt, inputs=[transcription_output, summary_output], outputs=download_file, ) demo.launch()