File size: 3,543 Bytes
493729a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96b3623
493729a
 
 
 
96b3623
493729a
96b3623
493729a
 
 
 
 
96b3623
 
 
 
493729a
96b3623
493729a
 
 
96b3623
493729a
96b3623
493729a
 
 
 
 
 
 
 
 
 
 
96b3623
493729a
 
 
 
96b3623
493729a
 
 
 
 
 
 
 
 
 
 
 
96b3623
493729a
 
 
 
 
 
 
96b3623
493729a
 
 
 
 
 
 
96b3623
493729a
 
 
96b3623
493729a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import re
import tempfile

import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient

load_dotenv()

asr_client = InferenceClient(provider="hf-inference")
llm_client = InferenceClient()

LLM_MODEL = "meta-llama/Llama-3.3-70B-Instruct"


def transcribe_and_summarize(audio):
    if audio is None:
        return "", ""
    try:
        text = asr_client.automatic_speech_recognition(audio, model="openai/whisper-large-v3-turbo")
        transcript = text.text
    except Exception as e:
        return "", f"Transcription failed: {e}"
    if len(transcript.split()) < 30:
        return transcript, "Text is too short to summarize."
    try:
        result = llm_client.chat_completion(
            model=LLM_MODEL,
            messages=[
                {"role": "system", "content": (
                    "You are a helpful assistant. Provide a concise summary of the following text, "
                    "followed by a section titled 'Action Items'. For each action item, identify the "
                    "owner (if mentioned) and any deadline. Format action items as a numbered list. "
                    "If no action items are found, write 'No action items found.' under that section."
                )},
                {"role": "user", "content": transcript},
            ],
            max_tokens=512,
        )
        return transcript, result.choices[0].message.content
    except Exception as e:
        return transcript, f"Summarization failed: {e}"


def _generate_filename(summary):
    if summary and summary.strip() and not summary.startswith(("No text", "Text is too short", "Summarization failed")):
        slug = re.sub(r"[^a-zA-Z0-9 ]", "", summary[:60]).strip()
        slug = re.sub(r"\s+", "_", slug).lower()
        if slug:
            return f"{slug}.txt"
    return "transcript_output.txt"


def download_txt(transcript, summary):
    parts = []
    if transcript and transcript.strip():
        parts.append("=== TRANSCRIPTION ===\n" + transcript)
    if summary and summary.strip():
        parts.append("=== SUMMARY & ACTION ITEMS ===\n" + summary)
    if not parts:
        return None
    content = "\n\n".join(parts)
    filename = _generate_filename(summary)
    path = os.path.join(tempfile.gettempdir(), filename)
    with open(path, "w", encoding="utf-8") as f:
        f.write(content)
    return path


with gr.Blocks(title="Audio Transcription & Summary") as demo:
    gr.Markdown("# 🎙️ Audio Transcription & Summary")
    gr.Markdown("Upload an audio file or record from your microphone, then transcribe and get a summary with action items.")

    with gr.Row():
        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio")

    transcribe_summarize_btn = gr.Button("Transcribe & Summarize", variant="primary")

    transcription_output = gr.Textbox(label="Transcription", lines=10)
    summary_output = gr.Textbox(label="Summary & Action Items", lines=10)

    download_file = gr.File(label="Download", interactive=False)
    download_btn = gr.Button("Download as .txt")

    transcribe_summarize_btn.click(
        fn=transcribe_and_summarize,
        inputs=audio_input,
        outputs=[transcription_output, summary_output],
    )
    download_btn.click(
        fn=download_txt,
        inputs=[transcription_output, summary_output],
        outputs=download_file,
    )

demo.launch()