CHECK / app (1).py
Isshi14's picture
Upload 12 files
ebd182e verified
"""
VoiceVerse AI β€” Main Application.
Input sources (three tabs):
Tab 1 β€” Upload PDF or TXT file
Tab 2 β€” URL / YouTube link
Tab 3 β€” Paste raw text
Delivery Modes:
Summary / Podcast / Song / Rap / Debate
No status card shown. RAG pipeline unchanged.
"""
import os
import gradio as gr
from utils import logger, validate_file, format_error
from rag import extract_text, RAGStore
from script_gen import generate_script
from tts import generate_audio, generate_audio_podcast, generate_audio_debate, generate_audio_rap, generate_audio_story, apply_pitch_shift
from ingestion import ingest_from_url_or_text, extract_pasted_text
# ── Global RAG Store ──────────────────────────────────────────────────────────
rag_store = RAGStore()
# ══════════════════════════════════════════════════════════════════════════════
# Shared RAG + Script + TTS pipeline
# ══════════════════════════════════════════════════════════════════════════════
def _run_pipeline(
text: str,
delivery_mode: str,
song_rap_sub: str,
pitch_shift: float,
progress,
) -> tuple[str, str]:
"""
RAG β†’ script β†’ audio. Shared by all three input tabs.
Returns (script, audio_path).
"""
# RAG: chunk & embed
progress(0.30, desc="🧠 Building knowledge index…")
rag_store.add_document(text)
chunk_count = len(rag_store.chunks)
logger.info("RAG index: %d chunks", chunk_count)
# RAG: retrieve
progress(0.45, desc="πŸ” Retrieving relevant content…")
if chunk_count <= 8:
context_chunks = rag_store.get_all_chunks()
else:
context_chunks = rag_store.query(
"What are the main topics, key insights, and important details?",
top_k=6,
)
# Script generation
progress(0.60, desc=f"✍️ Writing {_mode_label(delivery_mode, song_rap_sub)} script…")
script = generate_script(
context_chunks=context_chunks,
mode=delivery_mode,
sub_mode=song_rap_sub,
)
logger.info("Script: %d chars", len(script))
# TTS β€” route by mode
progress(0.80, desc="πŸŽ™οΈ Synthesising audio…")
m = delivery_mode.strip().lower()
if m == "podcast":
audio_path, engine = generate_audio_podcast(script)
elif m == "debate":
audio_path, engine = generate_audio_debate(script)
elif "rap" in m:
audio_path, engine = generate_audio_rap(script)
elif m == "story":
audio_path, engine = generate_audio_story(script)
else:
audio_path, engine = generate_audio(script)
logger.info("Audio via %s: %s", engine, audio_path)
# Apply pitch shift if requested
if pitch_shift and abs(pitch_shift) >= 0.1:
progress(0.90, desc="🎡 Adjusting pitch…")
audio_path = apply_pitch_shift(audio_path, pitch_shift)
progress(1.00, desc="βœ… Done!")
return script, audio_path
def _mode_label(mode: str, sub_mode: str) -> str:
m = mode.lower()
if "podcast" in m:
return "podcast"
if "debate" in m:
return "debate"
if "story" in m:
return "story"
if "song" in m or "rap" in m:
return sub_mode.lower()
return "summary"
# ══════════════════════════════════════════════════════════════════════════════
# Per-tab handlers
# ══════════════════════════════════════════════════════════════════════════════
def process_file(file, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()):
if file is None:
raise gr.Error("Please upload a PDF or TXT file first.")
file_path = file.name if hasattr(file, "name") else str(file)
is_valid, msg = validate_file(file_path)
if not is_valid:
raise gr.Error(msg)
try:
progress(0.10, desc="πŸ“„ Extracting text from document…")
text = extract_text(file_path)
if not text or len(text.strip()) < 50:
raise gr.Error("Document has too little text. Please upload a richer file.")
progress(0.20, desc="βœ… Text extracted")
return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress)
except gr.Error:
raise
except EnvironmentError as e:
raise gr.Error(str(e))
except Exception as e:
raise gr.Error(format_error("pipeline", e))
def process_url(url_input, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()):
if not url_input or not url_input.strip():
raise gr.Error("Please enter a URL or YouTube link.")
try:
progress(0.05, desc="🌐 Fetching content…")
text, source_label = ingest_from_url_or_text(url_input.strip())
logger.info("Ingested from %s: %d chars", source_label, len(text))
progress(0.20, desc=f"βœ… Content fetched from {source_label}")
return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress)
except gr.Error:
raise
except ValueError as e:
raise gr.Error(str(e))
except EnvironmentError as e:
raise gr.Error(str(e))
except Exception as e:
raise gr.Error(format_error("pipeline", e))
def process_paste(pasted_text, delivery_mode, song_rap_sub, pitch_shift, progress=gr.Progress()):
if not pasted_text or not pasted_text.strip():
raise gr.Error("Please paste some text first.")
try:
progress(0.10, desc="πŸ“‹ Processing pasted text…")
text = extract_pasted_text(pasted_text)
progress(0.20, desc="βœ… Text ready")
return _run_pipeline(text, delivery_mode, song_rap_sub, pitch_shift, progress)
except gr.Error:
raise
except ValueError as e:
raise gr.Error(str(e))
except EnvironmentError as e:
raise gr.Error(str(e))
except Exception as e:
raise gr.Error(format_error("pipeline", e))
# ══════════════════════════════════════════════════════════════════════════════
# UI helpers
# ══════════════════════════════════════════════════════════════════════════════
def _mode_description(mode: str) -> str:
return {
"Summary": (
"*πŸ“‹ **Summary** β€” Structured narration: intro, key points, conclusion. "
"Single voice, neutral tone.*"
),
"Podcast": (
"*πŸŽ™οΈ **Podcast** β€” Two-host conversation between Alex and Sam. "
"Alex guides; Sam explains. Dual voices.*"
),
"Rap": (
"*🎡 **Rap** β€” Key ideas as a punchy rhythmic track. "
"Fast delivery, bass-boosted, line-by-line flow.*"
),
"Debate": (
"*βš”οΈ **Debate** β€” Maya (pro) vs Ryan (con) argue opposing sides. "
"Female voice (assertive) vs Male voice (deliberate).*"
),
"Story": (
"*πŸ“– **Story** β€” Content retold as an immersive narrative. "
"Slow, warm delivery with expressive pauses.*"
),
}.get(mode, "")
def _on_mode_change(mode: str):
show_sub = "song" in mode.lower() or "rap" in mode.lower()
return gr.update(visible=show_sub), _mode_description(mode)
# ══════════════════════════════════════════════════════════════════════════════
# Gradio UI
# ══════════════════════════════════════════════════════════════════════════════
def build_ui() -> gr.Blocks:
css = """
.main-header { text-align: center; margin-bottom: 1rem; }
.main-header h1 {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5rem;
font-weight: 800;
margin-bottom: 0.25rem;
}
.main-header p { color: #6b7280; font-size: 1.1rem; }
.mode-card {
background: linear-gradient(135deg, #f8f7ff 0%, #f0edff 100%);
border: 1px solid #e0d9ff;
border-radius: 12px;
padding: 1rem 1.25rem;
margin-top: 0.75rem;
margin-bottom: 0.75rem;
}
/* Hide the "Radio" label Gradio adds automatically */
#delivery-mode-radio .label-wrap { display: none !important; }
.url-hint { color: #6b7280; font-size: 0.82rem; margin-top: 0.3rem; }
"""
with gr.Blocks(
title="VoiceVerse AI",
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
css=css,
) as app:
# ── Header ───────────────────────────────────────────────────────────
gr.HTML("""
<div class="main-header">
<h1>πŸŽ™οΈ VoiceVerse AI</h1>
<p>Transform any content into an engaging audio experience</p>
</div>
""")
with gr.Row(equal_height=False):
# ── LEFT COLUMN ───────────────────────────────────────────────────
with gr.Column(scale=1):
gr.Markdown("### πŸ“₯ Choose Your Content Source")
with gr.Tabs():
# ── Tab 1: File upload ────────────────────────────────────
with gr.Tab("πŸ“„ File Upload"):
file_input = gr.File(
label="Upload a PDF or TXT file",
file_types=[".pdf", ".txt"],
type="filepath",
)
file_btn = gr.Button(
"πŸŽ™οΈ Generate Audio",
variant="primary",
size="lg",
)
# ── Tab 2: URL ────────────────────────────────────────────
with gr.Tab("πŸ”— URL"):
url_input = gr.Textbox(
label=None,
placeholder=(
"Paste any link here…\n\n"
"πŸ“° Article: https://example.com/article\n"
"🌐 Website: https://en.wikipedia.org/wiki/...\n"
"πŸ“ Blog: https://blog.example.com/post"
),
lines=5,
max_lines=6,
show_label=False,
)
gr.HTML(
"<p class='url-hint'>"
"βœ… Works with: news articles, "
"blogs, Wikipedia, most public pages.<br>"
"❌ Won't work: paywalled or login-required pages."
"</p>"
)
url_btn = gr.Button(
"πŸŽ™οΈ Generate Audio",
variant="primary",
size="lg",
)
# ── Tab 3: Paste text ─────────────────────────────────────
with gr.Tab("πŸ“‹ Paste Text"):
paste_input = gr.Textbox(
label=None,
placeholder=(
"Paste any text here β€” article content, notes, "
"transcripts, research, anything…"
),
lines=10,
max_lines=40,
show_label=False,
)
paste_btn = gr.Button(
"πŸŽ™οΈ Generate Audio",
variant="primary",
size="lg",
)
# ── Delivery Mode card ────────────────────────────────────────
gr.Markdown("### 🎨 Choose Audio Experience")
delivery_mode = gr.Radio(
choices=["Summary", "Podcast", "Rap", "Debate", "Story"],
value="Summary",
show_label=False,
elem_id="delivery-mode-radio",
)
# Hidden state for backward compat
song_rap_sub = gr.State("Rap")
mode_description = gr.Markdown(value=_mode_description("Summary"))
# ── Pitch adjustment ──────────────────────────────────────────
gr.Markdown("### 🎡 Audio Adjustments")
pitch_slider = gr.Slider(
minimum=-6, maximum=6, step=0.5, value=0,
label="Pitch Shift (semitones)",
info="Negative = deeper voice, Positive = higher voice",
)
# ── RIGHT COLUMN ──────────────────────────────────────────────────
with gr.Column(scale=1):
gr.Markdown("### 🎧 Generated Audio")
audio_output = gr.Audio(
label="Audio",
type="filepath",
interactive=False,
show_download_button=True,
)
gr.Markdown("### ✍️ Generated Script")
script_output = gr.Textbox(
label="Script",
lines=14,
max_lines=22,
interactive=False,
placeholder="Your generated script will appear here…",
show_copy_button=True,
)
# ── Footer ───────────────────────────────────────────────────────────
gr.Markdown(
"<center style='color:#9ca3af;margin-top:1rem;'>"
"Built with ❀️ using SmolLM3-3B · Qwen3-TTS · Edge-TTS · Gradio"
"</center>"
)
# ── Event wiring ─────────────────────────────────────────────────────
delivery_mode.change(
fn=lambda mode: _mode_description(mode),
inputs=[delivery_mode],
outputs=[mode_description],
)
file_btn.click(
fn=process_file,
inputs=[file_input, delivery_mode, song_rap_sub, pitch_slider],
outputs=[script_output, audio_output],
)
url_btn.click(
fn=process_url,
inputs=[url_input, delivery_mode, song_rap_sub, pitch_slider],
outputs=[script_output, audio_output],
)
paste_btn.click(
fn=process_paste,
inputs=[paste_input, delivery_mode, song_rap_sub, pitch_slider],
outputs=[script_output, audio_output],
)
return app
# ── Entry point ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
logger.info("Starting VoiceVerse AI…")
app = build_ui()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True,
)