Spaces:
Sleeping
Sleeping
| """ | |
| VoiceVerse AI β Main Application. | |
| Input sources (three tabs): | |
| Tab 1 β Upload PDF or TXT file | |
| Tab 2 β URL / YouTube link | |
| Tab 3 β Paste raw text | |
| Delivery Modes: | |
| Summary / Podcast / Song / Rap / Debate | |
| No status card shown. RAG pipeline unchanged. | |
| """ | |
| import os | |
| import gradio as gr | |
| from utils import logger, validate_file, format_error | |
| from rag import extract_text, RAGStore | |
| from script_gen import generate_script | |
| from tts import generate_audio, generate_audio_podcast, generate_audio_debate, generate_audio_rap, generate_audio_story | |
| from ingestion import ingest_from_url_or_text, extract_pasted_text | |
| # ββ Global RAG Store ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| rag_store = RAGStore() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Shared RAG + Script + TTS pipeline | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _run_pipeline( | |
| text: str, | |
| delivery_mode: str, | |
| song_rap_sub: str, | |
| progress, | |
| ) -> tuple[str, str]: | |
| """ | |
| RAG β script β audio. Shared by all three input tabs. | |
| Returns (script, audio_path). | |
| """ | |
| # RAG: chunk & embed | |
| progress(0.30, desc="π§ Building knowledge indexβ¦") | |
| rag_store.add_document(text) | |
| chunk_count = len(rag_store.chunks) | |
| logger.info("RAG index: %d chunks", chunk_count) | |
| # RAG: retrieve | |
| progress(0.45, desc="π Retrieving relevant contentβ¦") | |
| if chunk_count <= 8: | |
| context_chunks = rag_store.get_all_chunks() | |
| else: | |
| context_chunks = rag_store.query( | |
| "What are the main topics, key insights, and important details?", | |
| top_k=6, | |
| ) | |
| # Script generation | |
| progress(0.60, desc=f"βοΈ Writing {_mode_label(delivery_mode, song_rap_sub)} scriptβ¦") | |
| script = generate_script( | |
| context_chunks=context_chunks, | |
| mode=delivery_mode, | |
| sub_mode=song_rap_sub, | |
| ) | |
| logger.info("Script: %d chars", len(script)) | |
| # TTS β route by mode | |
| progress(0.80, desc="ποΈ Synthesising audioβ¦") | |
| m = delivery_mode.strip().lower() | |
| if m == "podcast": | |
| audio_path, engine = generate_audio_podcast(script) | |
| elif m == "debate": | |
| audio_path, engine = generate_audio_debate(script) | |
| elif m == "song / rap" and song_rap_sub.lower() == "rap": | |
| audio_path, engine = generate_audio_rap(script) | |
| elif m == "story": | |
| audio_path, engine = generate_audio_story(script) | |
| else: | |
| audio_path, engine = generate_audio(script) | |
| logger.info("Audio via %s: %s", engine, audio_path) | |
| progress(1.00, desc="β Done!") | |
| return script, audio_path | |
| def _mode_label(mode: str, sub_mode: str) -> str: | |
| m = mode.lower() | |
| if "podcast" in m: | |
| return "podcast" | |
| if "debate" in m: | |
| return "debate" | |
| if "story" in m: | |
| return "story" | |
| if "song" in m or "rap" in m: | |
| return sub_mode.lower() | |
| return "summary" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Per-tab handlers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process_file(file, delivery_mode, song_rap_sub, progress=gr.Progress()): | |
| if file is None: | |
| raise gr.Error("Please upload a PDF or TXT file first.") | |
| file_path = file.name if hasattr(file, "name") else str(file) | |
| is_valid, msg = validate_file(file_path) | |
| if not is_valid: | |
| raise gr.Error(msg) | |
| try: | |
| progress(0.10, desc="π Extracting text from documentβ¦") | |
| text = extract_text(file_path) | |
| if not text or len(text.strip()) < 50: | |
| raise gr.Error("Document has too little text. Please upload a richer file.") | |
| progress(0.20, desc="β Text extracted") | |
| return _run_pipeline(text, delivery_mode, song_rap_sub, progress) | |
| except gr.Error: | |
| raise | |
| except EnvironmentError as e: | |
| raise gr.Error(str(e)) | |
| except Exception as e: | |
| raise gr.Error(format_error("pipeline", e)) | |
| def process_url(url_input, delivery_mode, song_rap_sub, progress=gr.Progress()): | |
| if not url_input or not url_input.strip(): | |
| raise gr.Error("Please enter a URL or YouTube link.") | |
| try: | |
| progress(0.05, desc="π Fetching contentβ¦") | |
| text, source_label = ingest_from_url_or_text(url_input.strip()) | |
| logger.info("Ingested from %s: %d chars", source_label, len(text)) | |
| progress(0.20, desc=f"β Content fetched from {source_label}") | |
| return _run_pipeline(text, delivery_mode, song_rap_sub, progress) | |
| except gr.Error: | |
| raise | |
| except ValueError as e: | |
| raise gr.Error(str(e)) | |
| except EnvironmentError as e: | |
| raise gr.Error(str(e)) | |
| except Exception as e: | |
| raise gr.Error(format_error("pipeline", e)) | |
| def process_paste(pasted_text, delivery_mode, song_rap_sub, progress=gr.Progress()): | |
| if not pasted_text or not pasted_text.strip(): | |
| raise gr.Error("Please paste some text first.") | |
| try: | |
| progress(0.10, desc="π Processing pasted textβ¦") | |
| text = extract_pasted_text(pasted_text) | |
| progress(0.20, desc="β Text ready") | |
| return _run_pipeline(text, delivery_mode, song_rap_sub, progress) | |
| except gr.Error: | |
| raise | |
| except ValueError as e: | |
| raise gr.Error(str(e)) | |
| except EnvironmentError as e: | |
| raise gr.Error(str(e)) | |
| except Exception as e: | |
| raise gr.Error(format_error("pipeline", e)) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _mode_description(mode: str) -> str: | |
| return { | |
| "Summary": ( | |
| "*π **Summary** β Structured narration: intro, key points, conclusion. " | |
| "Single voice, neutral tone.*" | |
| ), | |
| "Podcast": ( | |
| "*ποΈ **Podcast** β Two-host conversation. Female host guides; " | |
| "Male host explains. Dual voices.*" | |
| ), | |
| "Song / Rap": ( | |
| "*π΅ **Song / Rap** β Key ideas as a rhythmic track. " | |
| "Song = smooth flow Β· Rap = fast, punchy, bass-boosted.*" | |
| ), | |
| "Debate": ( | |
| "*βοΈ **Debate** β Two debaters argue opposing sides. " | |
| "Female voice (pro, assertive) vs Male voice (con, deliberate).*" | |
| ), | |
| "Story": ( | |
| "*π **Story** β Content retold as an immersive narrative. " | |
| "Slow, warm delivery with expressive pauses.*" | |
| ), | |
| }.get(mode, "") | |
| def _on_mode_change(mode: str): | |
| show_sub = "song" in mode.lower() or "rap" in mode.lower() | |
| return gr.update(visible=show_sub), _mode_description(mode) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_ui() -> gr.Blocks: | |
| css = """ | |
| .main-header { text-align: center; margin-bottom: 1rem; } | |
| .main-header h1 { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5rem; | |
| font-weight: 800; | |
| margin-bottom: 0.25rem; | |
| } | |
| .main-header p { color: #6b7280; font-size: 1.1rem; } | |
| .mode-card { | |
| background: linear-gradient(135deg, #f8f7ff 0%, #f0edff 100%); | |
| border: 1px solid #e0d9ff; | |
| border-radius: 12px; | |
| padding: 1rem 1.25rem; | |
| margin-top: 0.75rem; | |
| margin-bottom: 0.75rem; | |
| } | |
| /* Hide the "Radio" label Gradio adds automatically */ | |
| #delivery-mode-radio .label-wrap { display: none !important; } | |
| .url-hint { color: #6b7280; font-size: 0.82rem; margin-top: 0.3rem; } | |
| """ | |
| with gr.Blocks( | |
| title="VoiceVerse AI", | |
| theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), | |
| css=css, | |
| ) as app: | |
| # ββ Header βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>ποΈ VoiceVerse AI</h1> | |
| <p>Transform any content into an engaging audio experience</p> | |
| </div> | |
| """) | |
| with gr.Row(equal_height=False): | |
| # ββ LEFT COLUMN βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π₯ Choose Your Content Source") | |
| with gr.Tabs(): | |
| # ββ Tab 1: File upload ββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π File Upload"): | |
| file_input = gr.File( | |
| label="Upload a PDF or TXT file", | |
| file_types=[".pdf", ".txt"], | |
| type="filepath", | |
| ) | |
| file_btn = gr.Button( | |
| "ποΈ Generate Audio", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| # ββ Tab 2: URL / YouTube ββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π URL"): | |
| url_input = gr.Textbox( | |
| label=None, | |
| placeholder=( | |
| "Paste any link hereβ¦\n\n" | |
| "βΆ YouTube: https://youtube.com/watch?v=...\n" | |
| "π° Article: https://example.com/article\n" | |
| "π Website: https://en.wikipedia.org/wiki/..." | |
| ), | |
| lines=5, | |
| max_lines=6, | |
| show_label=False, | |
| ) | |
| gr.HTML( | |
| "<p class='url-hint'>" | |
| "β Works with: YouTube (with captions), news articles, " | |
| "blogs, Wikipedia, most public pages.<br>" | |
| "β Won't work: paywalled or login-required pages." | |
| "</p>" | |
| ) | |
| url_btn = gr.Button( | |
| "ποΈ Generate Audio", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| # ββ Tab 3: Paste text βββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Paste Text"): | |
| paste_input = gr.Textbox( | |
| label=None, | |
| placeholder=( | |
| "Paste any text here β article content, notes, " | |
| "transcripts, research, anythingβ¦" | |
| ), | |
| lines=10, | |
| max_lines=40, | |
| show_label=False, | |
| ) | |
| paste_btn = gr.Button( | |
| "ποΈ Generate Audio", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| # ββ Delivery Mode card ββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("### π¨ Choose Audio Experience") | |
| delivery_mode = gr.Radio( | |
| choices=["Summary", "Podcast", "Song / Rap", "Debate", "Story"], | |
| value="Summary", | |
| show_label=False, # removes the "Radio" label | |
| elem_id="delivery-mode-radio", | |
| ) | |
| # Song/Rap sub-option β hidden unless Song/Rap is selected | |
| with gr.Row(visible=False) as song_rap_row: | |
| song_rap_sub = gr.Radio( | |
| choices=["Song", "Rap"], | |
| value="Rap", | |
| label="Style", | |
| ) | |
| mode_description = gr.Markdown(value=_mode_description("Summary")) | |
| # ββ RIGHT COLUMN ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π§ Generated Audio") | |
| audio_output = gr.Audio( | |
| label="Audio", | |
| type="filepath", | |
| interactive=False, | |
| show_download_button=True, | |
| ) | |
| gr.Markdown("### βοΈ Generated Script") | |
| script_output = gr.Textbox( | |
| label="Script", | |
| lines=14, | |
| max_lines=22, | |
| interactive=False, | |
| placeholder="Your generated script will appear hereβ¦", | |
| show_copy_button=True, | |
| ) | |
| # ββ Footer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown( | |
| "<center style='color:#9ca3af;margin-top:1rem;'>" | |
| "Built with β€οΈ using SmolLM3-3B Β· Qwen3-TTS Β· Edge-TTS Β· Gradio" | |
| "</center>" | |
| ) | |
| # ββ Event wiring βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| delivery_mode.change( | |
| fn=_on_mode_change, | |
| inputs=[delivery_mode], | |
| outputs=[song_rap_row, mode_description], | |
| ) | |
| file_btn.click( | |
| fn=process_file, | |
| inputs=[file_input, delivery_mode, song_rap_sub], | |
| outputs=[script_output, audio_output], | |
| ) | |
| url_btn.click( | |
| fn=process_url, | |
| inputs=[url_input, delivery_mode, song_rap_sub], | |
| outputs=[script_output, audio_output], | |
| ) | |
| paste_btn.click( | |
| fn=process_paste, | |
| inputs=[paste_input, delivery_mode, song_rap_sub], | |
| outputs=[script_output, audio_output], | |
| ) | |
| return app | |
| # ββ Entry point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| logger.info("Starting VoiceVerse AIβ¦") | |
| app = build_ui() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| ) | |