""" VoiceVerse AI — Main Application. Input sources (three tabs): Tab 1 — Upload PDF or TXT file Tab 2 — URL / YouTube link Tab 3 — Paste raw text Delivery Modes: Summary / Podcast / Song / Rap / Debate No status card shown. RAG pipeline unchanged. """ import os import gradio as gr from utils import logger, validate_file, format_error from rag import extract_text, RAGStore from script_gen import generate_script from tts import generate_audio, generate_audio_podcast, generate_audio_debate, generate_audio_rap, generate_audio_story, apply_pitch_shift from ingestion import ingest_from_url_or_text, extract_pasted_text # ── Global RAG Store ────────────────────────────────────────────────────────── rag_store = RAGStore() # ══════════════════════════════════════════════════════════════════════════════ # Shared RAG + Script + TTS pipeline # ══════════════════════════════════════════════════════════════════════════════ def _run_pipeline( text: str, delivery_mode: str, song_rap_sub: str, pitch_shift: float, progress, ) -> tuple[str, str]: """ RAG → script → audio. Shared by all three input tabs. Returns (script, audio_path). """ # RAG: chunk & embed progress(0.30, desc="🧠 Building knowledge index…") rag_store.add_document(text) chunk_count = len(rag_store.chunks) logger.info("RAG index: %d chunks", chunk_count) # RAG: retrieve progress(0.45, desc="🔍 Retrieving relevant content…") if chunk_count <= 8: context_chunks = rag_store.get_all_chunks() else: context_chunks = rag_store.query( "What are the main topics, key insights, and important details?", top_k=6, ) # Script generation progress(0.60, desc=f"✍️ Writing {_mode_label(delivery_mode, song_rap_sub)} script…") script = generate_script( context_chunks=context_chunks, mode=delivery_mode, sub_mode=song_rap_sub, ) logger.info("Script: %d chars", len(script)) # TTS — route by mode progress(0.80, desc="🎙️ Synthesising audio…") m = delivery_mode.strip().lower() if m == "podcast": audio_path, engine = generate_audio_podcast(script) elif m == "debate": audio_path, engine = generate_audio_debate(script) elif "rap" in m: audio_path, engine = generate_audio_rap(script) elif m == "story": audio_path, engine = generate_audio_story(script) else: audio_path, engine = generate_audio(script) logger.info("Audio via %s: %s", engine, audio_path) # Apply pitch shift if requested if pitch_shift and abs(pitch_shift) >= 0.1: progress(0.90, desc="🎵 Adjusting pitch…") audio_path = apply_pitch_shift(audio_path, pitch_shift) progress(1.00, desc="✅ Done!") return script, audio_path def _mode_label(mode: str, sub_mode: str) -> str: m = mode.lower() if "podcast" in m: return "podcast" if "debate" in m: return "debate" if "story" in m: return "story" if "song" in m or "rap" in m: return sub_mode.lower() return "summary" # ══════════════════════════════════════════════════════════════════════════════ # Per-tab handlers # ══════════════════════════════════════════════════════════════════════════════ def process_file(file, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()): if file is None: raise gr.Error("Please upload a PDF or TXT file first.") file_path = file.name if hasattr(file, "name") else str(file) is_valid, msg = validate_file(file_path) if not is_valid: raise gr.Error(msg) try: progress(0.10, desc="📄 Extracting text from document…") text = extract_text(file_path) if not text or len(text.strip()) < 50: raise gr.Error("Document has too little text. Please upload a richer file.") progress(0.20, desc="✅ Text extracted") return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress) except gr.Error: raise except EnvironmentError as e: raise gr.Error(str(e)) except Exception as e: raise gr.Error(format_error("pipeline", e)) def process_url(url_input, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()): if not url_input or not url_input.strip(): raise gr.Error("Please enter a URL or YouTube link.") try: progress(0.05, desc="🌐 Fetching content…") text, source_label = ingest_from_url_or_text(url_input.strip()) logger.info("Ingested from %s: %d chars", source_label, len(text)) progress(0.20, desc=f"✅ Content fetched from {source_label}") return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress) except gr.Error: raise except ValueError as e: raise gr.Error(str(e)) except EnvironmentError as e: raise gr.Error(str(e)) except Exception as e: raise gr.Error(format_error("pipeline", e)) def process_paste(pasted_text, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()): if not pasted_text or not pasted_text.strip(): raise gr.Error("Please paste some text first.") try: progress(0.10, desc="📋 Processing pasted text…") text = extract_pasted_text(pasted_text) progress(0.20, desc="✅ Text ready") return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress) except gr.Error: raise except ValueError as e: raise gr.Error(str(e)) except EnvironmentError as e: raise gr.Error(str(e)) except Exception as e: raise gr.Error(format_error("pipeline", e)) # ══════════════════════════════════════════════════════════════════════════════ # UI helpers # ══════════════════════════════════════════════════════════════════════════════ def _mode_description(mode: str) -> str: return { "Summary": ( "*📋 **Summary** — Structured narration: intro, key points, conclusion. " "Single voice, neutral tone.*" ), "Podcast": ( "*🎙️ **Podcast** — Two-host conversation between Alex and Sam. " "Alex guides; Sam explains. Dual voices.*" ), "Rap": ( "*🎵 **Rap** — Key ideas as a punchy rhythmic track. " "Fast delivery, bass-boosted, line-by-line flow.*" ), "Debate": ( "*⚔️ **Debate** — Maya (pro) vs Ryan (con) argue opposing sides. " "Female voice (assertive) vs Male voice (deliberate).*" ), "Story": ( "*📖 **Story** — Content retold as an immersive narrative. " "Slow, warm delivery with expressive pauses.*" ), }.get(mode, "") def _on_mode_change(mode: str): show_sub = "song" in mode.lower() or "rap" in mode.lower() return gr.update(visible=show_sub), _mode_description(mode) # ══════════════════════════════════════════════════════════════════════════════ # Gradio UI # ══════════════════════════════════════════════════════════════════════════════ def build_ui() -> gr.Blocks: css = """ .main-header { text-align: center; margin-bottom: 1rem; } .main-header h1 { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5rem; font-weight: 800; margin-bottom: 0.25rem; } .main-header p { color: #6b7280; font-size: 1.1rem; } .mode-card { background: linear-gradient(135deg, #f8f7ff 0%, #f0edff 100%); border: 1px solid #e0d9ff; border-radius: 12px; padding: 1rem 1.25rem; margin-top: 0.75rem; margin-bottom: 0.75rem; } /* Hide the "Radio" label Gradio adds automatically */ #delivery-mode-radio .label-wrap { display: none !important; } .url-hint { color: #6b7280; font-size: 0.82rem; margin-top: 0.3rem; } """ with gr.Blocks( title="VoiceVerse AI", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), css=css, ) as app: # ── Header ─────────────────────────────────────────────────────────── gr.HTML("""

🎙️ VoiceVerse AI

Transform any content into an engaging audio experience

""") with gr.Row(equal_height=False): # ── LEFT COLUMN ─────────────────────────────────────────────────── with gr.Column(scale=1): gr.Markdown("### 📥 Choose Your Content Source") with gr.Tabs(): # ── Tab 1: File upload ──────────────────────────────────── with gr.Tab("📄 File Upload"): file_input = gr.File( label="Upload a PDF or TXT file", file_types=[".pdf", ".txt"], type="filepath", ) file_btn = gr.Button( "🎙️ Generate Audio", variant="primary", size="lg", ) # ── Tab 2: URL ──────────────────────────────────────────── with gr.Tab("🔗 URL"): url_input = gr.Textbox( label=None, placeholder=( "Paste any link here…\n\n" "📰 Article: https://example.com/article\n" "🌐 Website: https://en.wikipedia.org/wiki/...\n" "📝 Blog: https://blog.example.com/post" ), lines=5, max_lines=6, show_label=False, ) gr.HTML( "

" "✅ Works with: news articles, " "blogs, Wikipedia, most public pages.
" "❌ Won't work: paywalled or login-required pages." "

" ) url_btn = gr.Button( "🎙️ Generate Audio", variant="primary", size="lg", ) # ── Tab 3: Paste text ───────────────────────────────────── with gr.Tab("📋 Paste Text"): paste_input = gr.Textbox( label=None, placeholder=( "Paste any text here — article content, notes, " "transcripts, research, anything…" ), lines=10, max_lines=40, show_label=False, ) paste_btn = gr.Button( "🎙️ Generate Audio", variant="primary", size="lg", ) # ── Delivery Mode card ──────────────────────────────────────── gr.Markdown("### 🎨 Choose Audio Experience") delivery_mode = gr.Radio( choices=["Summary", "Podcast", "Rap", "Debate", "Story"], value="Summary", show_label=False, elem_id="delivery-mode-radio", ) # Hidden state for backward compat song_rap_sub = gr.State("Rap") mode_description = gr.Markdown(value=_mode_description("Summary")) # ── Pitch adjustment ────────────────────────────────────────── gr.Markdown("### 🎵 Audio Adjustments") pitch_slider = gr.Slider( minimum=-6, maximum=6, step=0.5, value=0, label="Pitch Shift (semitones)", info="Negative = deeper voice, Positive = higher voice", ) # ── RIGHT COLUMN ────────────────────────────────────────────────── with gr.Column(scale=1): gr.Markdown("### 🎧 Generated Audio") audio_output = gr.Audio( label="Audio", type="filepath", interactive=False, show_download_button=True, ) gr.Markdown("### ✍️ Generated Script") script_output = gr.Textbox( label="Script", lines=14, max_lines=22, interactive=False, placeholder="Your generated script will appear here…", show_copy_button=True, ) # ── Footer ─────────────────────────────────────────────────────────── gr.Markdown( "
" "Built with ❤️ using SmolLM3-3B · Qwen3-TTS · Edge-TTS · Gradio" "
" ) # ── Event wiring ───────────────────────────────────────────────────── delivery_mode.change( fn=lambda mode: _mode_description(mode), inputs=[delivery_mode], outputs=[mode_description], ) file_btn.click( fn=process_file, inputs=[file_input, delivery_mode, song_rap_sub, pitch_slider], outputs=[script_output, audio_output], ) url_btn.click( fn=process_url, inputs=[url_input, delivery_mode, song_rap_sub, pitch_slider], outputs=[script_output, audio_output], ) paste_btn.click( fn=process_paste, inputs=[paste_input, delivery_mode, song_rap_sub, pitch_slider], outputs=[script_output, audio_output], ) return app # ── Entry point ─────────────────────────────────────────────────────────────── if __name__ == "__main__": logger.info("Starting VoiceVerse AI…") app = build_ui() app.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, )