🎙️ VoiceVerse AI
Transform your documents into engaging podcast-style audio
""" VoiceVerse AI — Main Application. Gradio-based UI that orchestrates the full document-to-audio pipeline: 1. Upload PDF/TXT → extract text 2. RAG: chunk, embed, retrieve relevant context 3. Generate a spoken-style script via Mistral-7B-Instruct 4. Convert script to expressive audio via Qwen TTS / Edge-TTS 5. Play audio in the browser Entry point for Hugging Face Spaces deployment. """ import os import gradio as gr from utils import logger, validate_file, format_error from rag import extract_text, RAGStore from script_gen import generate_script from tts import generate_audio # ── Global RAG Store (single-user demo) ────────────────────────────────────── rag_store = RAGStore() # ── Pipeline Orchestration ─────────────────────────────────────────────────── def process_document(file, progress=gr.Progress()): """ Full pipeline: upload → extract → RAG → script → audio. Args: file: Gradio uploaded file object (has .name attribute) Returns: Tuple of (script_text, audio_file_path, status_message) """ # ── Step 0: Validate ───────────────────────────────────────────────── if file is None: raise gr.Error("Please upload a PDF or TXT file first.") file_path = file.name if hasattr(file, "name") else str(file) is_valid, msg = validate_file(file_path) if not is_valid: raise gr.Error(msg) try: # ── Step 1: Extract Text ───────────────────────────────────────── progress(0.1, desc="📄 Extracting text from document...") logger.info("Processing file: %s", file_path) text = extract_text(file_path) if not text or len(text.strip()) < 50: raise gr.Error( "The document contains too little text to generate audio. " "Please upload a document with more content." ) progress(0.2, desc="✅ Text extracted successfully") # ── Step 2: RAG — Chunk & Embed ────────────────────────────────── progress(0.3, desc="🧠 Processing document with AI...") rag_store.add_document(text) chunk_count = len(rag_store.chunks) logger.info("Document processed: %d chunks created", chunk_count) # ── Step 3: Retrieve Context ───────────────────────────────────── progress(0.4, desc="🔍 Retrieving key content...") # For short documents, use all chunks; for longer ones, retrieve smartly if chunk_count <= 8: context_chunks = rag_store.get_all_chunks() else: context_chunks = rag_store.query( "What are the main topics, key insights, and important details?", top_k=6, ) progress(0.5, desc="✅ Context retrieved") # ── Step 4: Generate Script ────────────────────────────────────── progress(0.6, desc="✍️ Writing spoken script...") script = generate_script(context_chunks) logger.info("Script generated: %d characters", len(script)) progress(0.75, desc="✅ Script ready") # ── Step 5: Generate Audio ─────────────────────────────────────── progress(0.8, desc="🎙️ Generating expressive audio...") audio_path, engine = generate_audio(script) logger.info("Audio generated via %s: %s", engine, audio_path) progress(1.0, desc="✅ Audio ready!") # ── Build status message ───────────────────────────────────────── status = ( f"✅ **Generation complete!**\n\n" f"- 📄 Document: {os.path.basename(file_path)}\n" f"- 📝 Text extracted: {len(text):,} characters\n" f"- 🧩 Chunks created: {chunk_count}\n" f"- ✍️ Script length: {len(script):,} characters\n" f"- 🎙️ Voice engine: {engine}\n" ) return script, audio_path, status except gr.Error: raise # Re-raise Gradio errors as-is except EnvironmentError as e: raise gr.Error(str(e)) except Exception as e: error_msg = format_error("pipeline", e) raise gr.Error(error_msg) # ── Gradio UI ──────────────────────────────────────────────────────────────── def build_ui() -> gr.Blocks: """Build and return the Gradio Blocks interface.""" # Custom CSS for a clean, polished look css = """ .main-header { text-align: center; margin-bottom: 1rem; } .main-header h1 { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5rem; font-weight: 800; margin-bottom: 0.25rem; } .main-header p { color: #6b7280; font-size: 1.1rem; } .status-box { border-left: 3px solid #667eea; padding-left: 1rem; margin: 0.5rem 0; } """ with gr.Blocks( title="VoiceVerse AI — Document to Audio", theme=gr.themes.Soft( primary_hue="indigo", secondary_hue="purple", ), css=css, ) as app: # ── Header ─────────────────────────────────────────────────────── gr.HTML("""
Transform your documents into engaging podcast-style audio