Spaces:
Sleeping
Sleeping
| """ | |
| VoiceVerse AI β Main Application. | |
| Gradio-based UI that orchestrates the full document-to-audio pipeline: | |
| 1. Upload PDF/TXT β extract text | |
| 2. RAG: chunk, embed, retrieve relevant context | |
| 3. Generate a spoken-style script via Mistral-7B-Instruct | |
| 4. Convert script to expressive audio via Qwen TTS / Edge-TTS | |
| 5. Play audio in the browser | |
| Entry point for Hugging Face Spaces deployment. | |
| """ | |
| import os | |
| import gradio as gr | |
| from utils import logger, validate_file, format_error | |
| from rag import extract_text, RAGStore | |
| from script_gen import generate_script | |
| from tts import generate_audio | |
| # ββ Global RAG Store (single-user demo) ββββββββββββββββββββββββββββββββββββββ | |
| rag_store = RAGStore() | |
| # ββ Pipeline Orchestration βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process_document(file, progress=gr.Progress()): | |
| """ | |
| Full pipeline: upload β extract β RAG β script β audio. | |
| Args: | |
| file: Gradio uploaded file object (has .name attribute) | |
| Returns: | |
| Tuple of (script_text, audio_file_path, status_message) | |
| """ | |
| # ββ Step 0: Validate βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if file is None: | |
| raise gr.Error("Please upload a PDF or TXT file first.") | |
| file_path = file.name if hasattr(file, "name") else str(file) | |
| is_valid, msg = validate_file(file_path) | |
| if not is_valid: | |
| raise gr.Error(msg) | |
| try: | |
| # ββ Step 1: Extract Text βββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.1, desc="π Extracting text from document...") | |
| logger.info("Processing file: %s", file_path) | |
| text = extract_text(file_path) | |
| if not text or len(text.strip()) < 50: | |
| raise gr.Error( | |
| "The document contains too little text to generate audio. " | |
| "Please upload a document with more content." | |
| ) | |
| progress(0.2, desc="β Text extracted successfully") | |
| # ββ Step 2: RAG β Chunk & Embed ββββββββββββββββββββββββββββββββββ | |
| progress(0.3, desc="π§ Processing document with AI...") | |
| rag_store.add_document(text) | |
| chunk_count = len(rag_store.chunks) | |
| logger.info("Document processed: %d chunks created", chunk_count) | |
| # ββ Step 3: Retrieve Context βββββββββββββββββββββββββββββββββββββ | |
| progress(0.4, desc="π Retrieving key content...") | |
| # For short documents, use all chunks; for longer ones, retrieve smartly | |
| if chunk_count <= 8: | |
| context_chunks = rag_store.get_all_chunks() | |
| else: | |
| context_chunks = rag_store.query( | |
| "What are the main topics, key insights, and important details?", | |
| top_k=6, | |
| ) | |
| progress(0.5, desc="β Context retrieved") | |
| # ββ Step 4: Generate Script ββββββββββββββββββββββββββββββββββββββ | |
| progress(0.6, desc="βοΈ Writing spoken script...") | |
| script = generate_script(context_chunks) | |
| logger.info("Script generated: %d characters", len(script)) | |
| progress(0.75, desc="β Script ready") | |
| # ββ Step 5: Generate Audio βββββββββββββββββββββββββββββββββββββββ | |
| progress(0.8, desc="ποΈ Generating expressive audio...") | |
| audio_path, engine = generate_audio(script) | |
| logger.info("Audio generated via %s: %s", engine, audio_path) | |
| progress(1.0, desc="β Audio ready!") | |
| # ββ Build status message βββββββββββββββββββββββββββββββββββββββββ | |
| status = ( | |
| f"β **Generation complete!**\n\n" | |
| f"- π Document: {os.path.basename(file_path)}\n" | |
| f"- π Text extracted: {len(text):,} characters\n" | |
| f"- π§© Chunks created: {chunk_count}\n" | |
| f"- βοΈ Script length: {len(script):,} characters\n" | |
| f"- ποΈ Voice engine: {engine}\n" | |
| ) | |
| return script, audio_path, status | |
| except gr.Error: | |
| raise # Re-raise Gradio errors as-is | |
| except EnvironmentError as e: | |
| raise gr.Error(str(e)) | |
| except Exception as e: | |
| error_msg = format_error("pipeline", e) | |
| raise gr.Error(error_msg) | |
| # ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_ui() -> gr.Blocks: | |
| """Build and return the Gradio Blocks interface.""" | |
| # Custom CSS for a clean, polished look | |
| css = """ | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 1rem; | |
| } | |
| .main-header h1 { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5rem; | |
| font-weight: 800; | |
| margin-bottom: 0.25rem; | |
| } | |
| .main-header p { | |
| color: #6b7280; | |
| font-size: 1.1rem; | |
| } | |
| .status-box { | |
| border-left: 3px solid #667eea; | |
| padding-left: 1rem; | |
| margin: 0.5rem 0; | |
| } | |
| """ | |
| with gr.Blocks( | |
| title="VoiceVerse AI β Document to Audio", | |
| theme=gr.themes.Soft( | |
| primary_hue="indigo", | |
| secondary_hue="purple", | |
| ), | |
| css=css, | |
| ) as app: | |
| # ββ Header βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>ποΈ VoiceVerse AI</h1> | |
| <p>Transform your documents into engaging podcast-style audio</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # ββ Left Column: Input βββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Upload Document") | |
| file_input = gr.File( | |
| label="Upload a PDF or TXT file", | |
| file_types=[".pdf", ".txt"], | |
| type="filepath", | |
| elem_id="file-upload", | |
| ) | |
| generate_btn = gr.Button( | |
| "ποΈ Generate Audio", | |
| variant="primary", | |
| size="lg", | |
| elem_id="generate-btn", | |
| ) | |
| status_output = gr.Markdown( | |
| value="*Upload a document and click Generate to start.*", | |
| elem_classes=["status-box"], | |
| ) | |
| # ββ Right Column: Output βββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π§ Generated Audio") | |
| audio_output = gr.Audio( | |
| label="Audio Narration", | |
| type="filepath", | |
| elem_id="audio-player", | |
| interactive=False, | |
| ) | |
| gr.Markdown("### βοΈ Generated Script") | |
| script_output = gr.Textbox( | |
| label="Spoken Script", | |
| lines=12, | |
| max_lines=20, | |
| interactive=False, | |
| placeholder="The generated script will appear here...", | |
| elem_id="script-display", | |
| ) | |
| # ββ Wire up the generate button ββββββββββββββββββββββββββββββββββ | |
| generate_btn.click( | |
| fn=process_document, | |
| inputs=[file_input], | |
| outputs=[script_output, audio_output, status_output], | |
| ) | |
| # ββ Footer βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown( | |
| "<center style='color: #9ca3af; margin-top: 1rem;'>" | |
| "Built with β€οΈ using Mistral-7B-Instruct Β· Qwen3-TTS Β· Edge-TTS Β· Gradio" | |
| "</center>" | |
| ) | |
| return app | |
| # ββ Entry Point ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| logger.info("Starting VoiceVerse AI...") | |
| app = build_ui() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| ) | |