| |
| import os, tempfile, subprocess, gradio as gr |
| from dotenv import load_dotenv |
| import whisper |
| import pvfalcon |
|
|
| |
| |
| |
| load_dotenv() |
| FALCON_ACCESS_KEY = os.getenv("FALCON_ACCESS_KEY") |
| if not FALCON_ACCESS_KEY: |
| raise RuntimeError( |
| "Set FALCON_ACCESS_KEY in your environment or .env file " |
| "(get one free at https://console.picovoice.ai)." |
| ) |
|
|
| |
| |
| |
| whisper_model = whisper.load_model("base") |
| falcon = pvfalcon.create(access_key=FALCON_ACCESS_KEY) |
|
|
| |
| |
| |
| def process_video(file, language="Auto"): |
| |
| lang_code = None if language == "Auto" else language.lower() |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav: |
| wav_path = wav.name |
| subprocess.run( |
| ["ffmpeg", "-y", "-i", file.name, |
| "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", wav_path], |
| stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL |
| ) |
| if not os.path.getsize(wav_path): |
| return "Audio extraction failed.", "" |
|
|
| |
| segments = falcon.process_file(wav_path) |
| diarized_map, label_map, counter = [], {}, 1 |
| for seg in segments: |
| tag = seg.speaker_tag |
| if tag not in label_map: |
| label_map[tag] = f"Speaker {counter}" |
| counter += 1 |
| diarized_map.append( |
| dict(start=seg.start_sec, end=seg.end_sec, speaker=label_map[tag]) |
| ) |
|
|
| |
| res = whisper_model.transcribe(wav_path, language=lang_code) |
| paragraph_transcript = res["text"] |
|
|
| |
| speaker_lines = [] |
| for s in res.get("segments", []): |
| speaker = next( |
| (m["speaker"] for m in diarized_map if m["start"] <= s["start"] <= m["end"]), |
| "Unknown" |
| ) |
| speaker_lines.append(f"{speaker}: {s['text']}") |
| speaker_transcript = "\n".join(speaker_lines) |
|
|
| |
| return speaker_transcript, paragraph_transcript |
|
|
| |
| |
| |
| demo = gr.Interface( |
| fn=process_video, |
| inputs=[ |
| gr.File(label="Upload Video", type="filepath"), |
| gr.Dropdown(["Auto", "English", "Hindi", "Urdu"], label="Language") |
| ], |
| outputs=[ |
| gr.Textbox(label="Speaker-wise Transcript", show_copy_button=True), |
| gr.Textbox(label=" Transcription", show_copy_button=True) |
| ], |
| title="Transcription + Speaker Segmentation", |
| description="Whisper + Picovoice Falcon running fully on CPU." |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|