Spaces:

Isshi14
/

Voiceover-ai-2

Sleeping

File size: 16,652 Bytes

3828c7d

"""
VoiceVerse AI — Main Application.

Input sources (three tabs):
  Tab 1 — Upload PDF or TXT file
  Tab 2 — URL / YouTube link
  Tab 3 — Paste raw text

Delivery Modes:
  Summary / Podcast / Song / Rap / Debate

No status card shown. RAG pipeline unchanged.
"""

import os
import gradio as gr
from utils import logger, validate_file, format_error
from rag import extract_text, RAGStore
from script_gen import generate_script
from tts import generate_audio, generate_audio_podcast, generate_audio_debate, generate_audio_rap, generate_audio_story
from ingestion import ingest_from_url_or_text, extract_pasted_text

# ── Global RAG Store ──────────────────────────────────────────────────────────
rag_store = RAGStore()


# ══════════════════════════════════════════════════════════════════════════════
# Shared RAG + Script + TTS pipeline
# ══════════════════════════════════════════════════════════════════════════════

def _run_pipeline(
    text: str,
    delivery_mode: str,
    song_rap_sub: str,
    progress,
) -> tuple[str, str]:
    """
    RAG → script → audio. Shared by all three input tabs.
    Returns (script, audio_path).
    """
    # RAG: chunk & embed
    progress(0.30, desc="🧠 Building knowledge index…")
    rag_store.add_document(text)
    chunk_count = len(rag_store.chunks)
    logger.info("RAG index: %d chunks", chunk_count)

    # RAG: retrieve
    progress(0.45, desc="🔍 Retrieving relevant content…")
    if chunk_count <= 8:
        context_chunks = rag_store.get_all_chunks()
    else:
        context_chunks = rag_store.query(
            "What are the main topics, key insights, and important details?",
            top_k=6,
        )

    # Script generation
    progress(0.60, desc=f"✍️ Writing {_mode_label(delivery_mode, song_rap_sub)} script…")
    script = generate_script(
        context_chunks=context_chunks,
        mode=delivery_mode,
        sub_mode=song_rap_sub,
    )
    logger.info("Script: %d chars", len(script))

    # TTS — route by mode
    progress(0.80, desc="🎙️ Synthesising audio…")
    m = delivery_mode.strip().lower()
    if m == "podcast":
        audio_path, engine = generate_audio_podcast(script)
    elif m == "debate":
        audio_path, engine = generate_audio_debate(script)
    elif m == "song / rap" and song_rap_sub.lower() == "rap":
        audio_path, engine = generate_audio_rap(script)
    elif m == "story":
        audio_path, engine = generate_audio_story(script)
    else:
        audio_path, engine = generate_audio(script)
    logger.info("Audio via %s: %s", engine, audio_path)

    progress(1.00, desc="✅ Done!")
    return script, audio_path


def _mode_label(mode: str, sub_mode: str) -> str:
    m = mode.lower()
    if "podcast" in m:
        return "podcast"
    if "debate" in m:
        return "debate"
    if "story" in m:
        return "story"
    if "song" in m or "rap" in m:
        return sub_mode.lower()
    return "summary"


# ══════════════════════════════════════════════════════════════════════════════
# Per-tab handlers
# ══════════════════════════════════════════════════════════════════════════════

def process_file(file, delivery_mode, song_rap_sub, progress=gr.Progress()):
    if file is None:
        raise gr.Error("Please upload a PDF or TXT file first.")
    file_path = file.name if hasattr(file, "name") else str(file)
    is_valid, msg = validate_file(file_path)
    if not is_valid:
        raise gr.Error(msg)
    try:
        progress(0.10, desc="📄 Extracting text from document…")
        text = extract_text(file_path)
        if not text or len(text.strip()) < 50:
            raise gr.Error("Document has too little text. Please upload a richer file.")
        progress(0.20, desc="✅ Text extracted")
        return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
    except gr.Error:
        raise
    except EnvironmentError as e:
        raise gr.Error(str(e))
    except Exception as e:
        raise gr.Error(format_error("pipeline", e))


def process_url(url_input, delivery_mode, song_rap_sub, progress=gr.Progress()):
    if not url_input or not url_input.strip():
        raise gr.Error("Please enter a URL or YouTube link.")
    try:
        progress(0.05, desc="🌐 Fetching content…")
        text, source_label = ingest_from_url_or_text(url_input.strip())
        logger.info("Ingested from %s: %d chars", source_label, len(text))
        progress(0.20, desc=f"✅ Content fetched from {source_label}")
        return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
    except gr.Error:
        raise
    except ValueError as e:
        raise gr.Error(str(e))
    except EnvironmentError as e:
        raise gr.Error(str(e))
    except Exception as e:
        raise gr.Error(format_error("pipeline", e))


def process_paste(pasted_text, delivery_mode, song_rap_sub, progress=gr.Progress()):
    if not pasted_text or not pasted_text.strip():
        raise gr.Error("Please paste some text first.")
    try:
        progress(0.10, desc="📋 Processing pasted text…")
        text = extract_pasted_text(pasted_text)
        progress(0.20, desc="✅ Text ready")
        return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
    except gr.Error:
        raise
    except ValueError as e:
        raise gr.Error(str(e))
    except EnvironmentError as e:
        raise gr.Error(str(e))
    except Exception as e:
        raise gr.Error(format_error("pipeline", e))


# ══════════════════════════════════════════════════════════════════════════════
# UI helpers
# ══════════════════════════════════════════════════════════════════════════════

def _mode_description(mode: str) -> str:
    return {
        "Summary": (
            "*📋 **Summary** — Structured narration: intro, key points, conclusion. "
            "Single voice, neutral tone.*"
        ),
        "Podcast": (
            "*🎙️ **Podcast** — Two-host conversation. Female host guides; "
            "Male host explains. Dual voices.*"
        ),
        "Song / Rap": (
            "*🎵 **Song / Rap** — Key ideas as a rhythmic track. "
            "Song = smooth flow · Rap = fast, punchy, bass-boosted.*"
        ),
        "Debate": (
            "*⚔️ **Debate** — Two debaters argue opposing sides. "
            "Female voice (pro, assertive) vs Male voice (con, deliberate).*"
        ),
        "Story": (
            "*📖 **Story** — Content retold as an immersive narrative. "
            "Slow, warm delivery with expressive pauses.*"
        ),
    }.get(mode, "")


def _on_mode_change(mode: str):
    show_sub = "song" in mode.lower() or "rap" in mode.lower()
    return gr.update(visible=show_sub), _mode_description(mode)


# ══════════════════════════════════════════════════════════════════════════════
# Gradio UI
# ══════════════════════════════════════════════════════════════════════════════

def build_ui() -> gr.Blocks:

    css = """
    .main-header { text-align: center; margin-bottom: 1rem; }
    .main-header h1 {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        font-size: 2.5rem;
        font-weight: 800;
        margin-bottom: 0.25rem;
    }
    .main-header p { color: #6b7280; font-size: 1.1rem; }

    .mode-card {
        background: linear-gradient(135deg, #f8f7ff 0%, #f0edff 100%);
        border: 1px solid #e0d9ff;
        border-radius: 12px;
        padding: 1rem 1.25rem;
        margin-top: 0.75rem;
        margin-bottom: 0.75rem;
    }

    /* Hide the "Radio" label Gradio adds automatically */
    #delivery-mode-radio .label-wrap { display: none !important; }

    .url-hint { color: #6b7280; font-size: 0.82rem; margin-top: 0.3rem; }
    """

    with gr.Blocks(
        title="VoiceVerse AI",
        theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
        css=css,
    ) as app:

        # ── Header ───────────────────────────────────────────────────────────
        gr.HTML("""
        <div class="main-header">
            <h1>🎙️ VoiceVerse AI</h1>
            <p>Transform any content into an engaging audio experience</p>
        </div>
        """)

        with gr.Row(equal_height=False):

            # ── LEFT COLUMN ───────────────────────────────────────────────────
            with gr.Column(scale=1):

                gr.Markdown("### 📥 Choose Your Content Source")

                with gr.Tabs():

                    # ── Tab 1: File upload ────────────────────────────────────
                    with gr.Tab("📄 File Upload"):
                        file_input = gr.File(
                            label="Upload a PDF or TXT file",
                            file_types=[".pdf", ".txt"],
                            type="filepath",
                        )
                        file_btn = gr.Button(
                            "🎙️ Generate Audio",
                            variant="primary",
                            size="lg",
                        )

                    # ── Tab 2: URL / YouTube ──────────────────────────────────
                    with gr.Tab("🔗 URL"):
                        url_input = gr.Textbox(
                            label=None,
                            placeholder=(
                                "Paste any link here…\n\n"
                                "▶  YouTube:  https://youtube.com/watch?v=...\n"
                                "📰 Article:  https://example.com/article\n"
                                "🌐 Website:  https://en.wikipedia.org/wiki/..."
                            ),
                            lines=5,
                            max_lines=6,
                            show_label=False,
                        )
                        gr.HTML(
                            "<p class='url-hint'>"
                            "✅ Works with: YouTube (with captions), news articles, "
                            "blogs, Wikipedia, most public pages.<br>"
                            "❌ Won't work: paywalled or login-required pages."
                            "</p>"
                        )
                        url_btn = gr.Button(
                            "🎙️ Generate Audio",
                            variant="primary",
                            size="lg",
                        )

                    # ── Tab 3: Paste text ─────────────────────────────────────
                    with gr.Tab("📋 Paste Text"):
                        paste_input = gr.Textbox(
                            label=None,
                            placeholder=(
                                "Paste any text here — article content, notes, "
                                "transcripts, research, anything…"
                            ),
                            lines=10,
                            max_lines=40,
                            show_label=False,
                        )
                        paste_btn = gr.Button(
                            "🎙️ Generate Audio",
                            variant="primary",
                            size="lg",
                        )

                # ── Delivery Mode card ────────────────────────────────────────
                gr.Markdown("### 🎨 Choose Audio Experience")

                delivery_mode = gr.Radio(
                    choices=["Summary", "Podcast", "Song / Rap", "Debate", "Story"],
                    value="Summary",
                    show_label=False,          # removes the "Radio" label
                    elem_id="delivery-mode-radio",
                )

                # Song/Rap sub-option — hidden unless Song/Rap is selected
                with gr.Row(visible=False) as song_rap_row:
                    song_rap_sub = gr.Radio(
                        choices=["Song", "Rap"],
                        value="Rap",
                        label="Style",
                    )

                mode_description = gr.Markdown(value=_mode_description("Summary"))

            # ── RIGHT COLUMN ──────────────────────────────────────────────────
            with gr.Column(scale=1):

                gr.Markdown("### 🎧 Generated Audio")
                audio_output = gr.Audio(
                    label="Audio",
                    type="filepath",
                    interactive=False,
                    show_download_button=True,
                )

                gr.Markdown("### ✍️ Generated Script")
                script_output = gr.Textbox(
                    label="Script",
                    lines=14,
                    max_lines=22,
                    interactive=False,
                    placeholder="Your generated script will appear here…",
                    show_copy_button=True,
                )

        # ── Footer ───────────────────────────────────────────────────────────
        gr.Markdown(
            "<center style='color:#9ca3af;margin-top:1rem;'>"
            "Built with ❤️ using SmolLM3-3B · Qwen3-TTS · Edge-TTS · Gradio"
            "</center>"
        )

        # ── Event wiring ─────────────────────────────────────────────────────

        delivery_mode.change(
            fn=_on_mode_change,
            inputs=[delivery_mode],
            outputs=[song_rap_row, mode_description],
        )
        file_btn.click(
            fn=process_file,
            inputs=[file_input, delivery_mode, song_rap_sub],
            outputs=[script_output, audio_output],
        )
        url_btn.click(
            fn=process_url,
            inputs=[url_input, delivery_mode, song_rap_sub],
            outputs=[script_output, audio_output],
        )
        paste_btn.click(
            fn=process_paste,
            inputs=[paste_input, delivery_mode, song_rap_sub],
            outputs=[script_output, audio_output],
        )

    return app


# ── Entry point ───────────────────────────────────────────────────────────────

if __name__ == "__main__":
    logger.info("Starting VoiceVerse AI…")
    app = build_ui()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True,
    )