voiceverse-ai-test

Sleeping

App Files Files Community

voiceverse-ai-test / app.py

Isshi14

Upload 11 files

ff6ba78 verified 3 months ago

raw

history blame contribute delete

16.7 kB

	"""
	VoiceVerse AI — Main Application.

	Input sources (three tabs):
	Tab 1 — Upload PDF or TXT file
	Tab 2 — URL / YouTube link
	Tab 3 — Paste raw text

	Delivery Modes:
	Summary / Podcast / Song / Rap / Debate

	No status card shown. RAG pipeline unchanged.
	"""

	import os
	import gradio as gr
	from utils import logger, validate_file, format_error
	from rag import extract_text, RAGStore
	from script_gen import generate_script
	from tts import generate_audio, generate_audio_podcast, generate_audio_debate, generate_audio_rap, generate_audio_story
	from ingestion import ingest_from_url_or_text, extract_pasted_text

	# ── Global RAG Store ──────────────────────────────────────────────────────────
	rag_store = RAGStore()


	# ══════════════════════════════════════════════════════════════════════════════
	# Shared RAG + Script + TTS pipeline
	# ══════════════════════════════════════════════════════════════════════════════

	def _run_pipeline(
	text: str,
	delivery_mode: str,
	song_rap_sub: str,
	progress,
	) -> tuple[str, str]:
	"""
	RAG → script → audio. Shared by all three input tabs.
	Returns (script, audio_path).
	"""
	# RAG: chunk & embed
	progress(0.30, desc="🧠 Building knowledge index…")
	rag_store.add_document(text)
	chunk_count = len(rag_store.chunks)
	logger.info("RAG index: %d chunks", chunk_count)

	# RAG: retrieve
	progress(0.45, desc="🔍 Retrieving relevant content…")
	if chunk_count <= 8:
	context_chunks = rag_store.get_all_chunks()
	else:
	context_chunks = rag_store.query(
	"What are the main topics, key insights, and important details?",
	top_k=6,
	)

	# Script generation
	progress(0.60, desc=f"✍️ Writing {_mode_label(delivery_mode, song_rap_sub)} script…")
	script = generate_script(
	context_chunks=context_chunks,
	mode=delivery_mode,
	sub_mode=song_rap_sub,
	)
	logger.info("Script: %d chars", len(script))

	# TTS — route by mode
	progress(0.80, desc="🎙️ Synthesising audio…")
	m = delivery_mode.strip().lower()
	if m == "podcast":
	audio_path, engine = generate_audio_podcast(script)
	elif m == "debate":
	audio_path, engine = generate_audio_debate(script)
	elif m == "song / rap" and song_rap_sub.lower() == "rap":
	audio_path, engine = generate_audio_rap(script)
	elif m == "story":
	audio_path, engine = generate_audio_story(script)
	else:
	audio_path, engine = generate_audio(script)
	logger.info("Audio via %s: %s", engine, audio_path)

	progress(1.00, desc="✅ Done!")
	return script, audio_path


	def _mode_label(mode: str, sub_mode: str) -> str:
	m = mode.lower()
	if "podcast" in m:
	return "podcast"
	if "debate" in m:
	return "debate"
	if "story" in m:
	return "story"
	if "song" in m or "rap" in m:
	return sub_mode.lower()
	return "summary"


	# ══════════════════════════════════════════════════════════════════════════════
	# Per-tab handlers
	# ══════════════════════════════════════════════════════════════════════════════

	def process_file(file, delivery_mode, song_rap_sub, progress=gr.Progress()):
	if file is None:
	raise gr.Error("Please upload a PDF or TXT file first.")
	file_path = file.name if hasattr(file, "name") else str(file)
	is_valid, msg = validate_file(file_path)
	if not is_valid:
	raise gr.Error(msg)
	try:
	progress(0.10, desc="📄 Extracting text from document…")
	text = extract_text(file_path)
	if not text or len(text.strip()) < 50:
	raise gr.Error("Document has too little text. Please upload a richer file.")
	progress(0.20, desc="✅ Text extracted")
	return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
	except gr.Error:
	raise
	except EnvironmentError as e:
	raise gr.Error(str(e))
	except Exception as e:
	raise gr.Error(format_error("pipeline", e))


	def process_url(url_input, delivery_mode, song_rap_sub, progress=gr.Progress()):
	if not url_input or not url_input.strip():
	raise gr.Error("Please enter a URL or YouTube link.")
	try:
	progress(0.05, desc="🌐 Fetching content…")
	text, source_label = ingest_from_url_or_text(url_input.strip())
	logger.info("Ingested from %s: %d chars", source_label, len(text))
	progress(0.20, desc=f"✅ Content fetched from {source_label}")
	return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
	except gr.Error:
	raise
	except ValueError as e:
	raise gr.Error(str(e))
	except EnvironmentError as e:
	raise gr.Error(str(e))
	except Exception as e:
	raise gr.Error(format_error("pipeline", e))


	def process_paste(pasted_text, delivery_mode, song_rap_sub, progress=gr.Progress()):
	if not pasted_text or not pasted_text.strip():
	raise gr.Error("Please paste some text first.")
	try:
	progress(0.10, desc="📋 Processing pasted text…")
	text = extract_pasted_text(pasted_text)
	progress(0.20, desc="✅ Text ready")
	return _run_pipeline(text, delivery_mode, song_rap_sub, progress)
	except gr.Error:
	raise
	except ValueError as e:
	raise gr.Error(str(e))
	except EnvironmentError as e:
	raise gr.Error(str(e))
	except Exception as e:
	raise gr.Error(format_error("pipeline", e))


	# ══════════════════════════════════════════════════════════════════════════════
	# UI helpers
	# ══════════════════════════════════════════════════════════════════════════════

	def _mode_description(mode: str) -> str:
	return {
	"Summary": (
	"📋 Summary* — Structured narration: intro, key points, conclusion. "
	"Single voice, neutral tone.*"
	),
	"Podcast": (
	"🎙️ Podcast* — Two-host conversation. Female host guides; "
	"Male host explains. Dual voices.*"
	),
	"Song / Rap": (
	"🎵 Song / Rap* — Key ideas as a rhythmic track. "
	"Song = smooth flow · Rap = fast, punchy, bass-boosted.*"
	),
	"Debate": (
	"⚔️ Debate* — Two debaters argue opposing sides. "
	"Female voice (pro, assertive) vs Male voice (con, deliberate).*"
	),
	"Story": (
	"📖 Story* — Content retold as an immersive narrative. "
	"Slow, warm delivery with expressive pauses.*"
	),
	}.get(mode, "")


	def _on_mode_change(mode: str):
	show_sub = "song" in mode.lower() or "rap" in mode.lower()
	return gr.update(visible=show_sub), _mode_description(mode)


	# ══════════════════════════════════════════════════════════════════════════════
	# Gradio UI
	# ══════════════════════════════════════════════════════════════════════════════

	def build_ui() -> gr.Blocks:

	css = """
	.main-header { text-align: center; margin-bottom: 1rem; }
	.main-header h1 {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.5rem;
	font-weight: 800;
	margin-bottom: 0.25rem;
	}
	.main-header p { color: #6b7280; font-size: 1.1rem; }

	.mode-card {
	background: linear-gradient(135deg, #f8f7ff 0%, #f0edff 100%);
	border: 1px solid #e0d9ff;
	border-radius: 12px;
	padding: 1rem 1.25rem;
	margin-top: 0.75rem;
	margin-bottom: 0.75rem;
	}

	/* Hide the "Radio" label Gradio adds automatically */
	#delivery-mode-radio .label-wrap { display: none !important; }

	.url-hint { color: #6b7280; font-size: 0.82rem; margin-top: 0.3rem; }
	"""

	with gr.Blocks(
	title="VoiceVerse AI",
	theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
	css=css,
	) as app:

	# ── Header ───────────────────────────────────────────────────────────
	gr.HTML("""
	<div class="main-header">
	<h1>🎙️ VoiceVerse AI</h1>
	<p>Transform any content into an engaging audio experience</p>
	</div>
	""")

	with gr.Row(equal_height=False):

	# ── LEFT COLUMN ───────────────────────────────────────────────────
	with gr.Column(scale=1):

	gr.Markdown("### 📥 Choose Your Content Source")

	with gr.Tabs():

	# ── Tab 1: File upload ────────────────────────────────────
	with gr.Tab("📄 File Upload"):
	file_input = gr.File(
	label="Upload a PDF or TXT file",
	file_types=[".pdf", ".txt"],
	type="filepath",
	)
	file_btn = gr.Button(
	"🎙️ Generate Audio",
	variant="primary",
	size="lg",
	)

	# ── Tab 2: URL / YouTube ──────────────────────────────────
	with gr.Tab("🔗 URL"):
	url_input = gr.Textbox(
	label=None,
	placeholder=(
	"Paste any link here…\n\n"
	"▶ YouTube: https://youtube.com/watch?v=...\n"
	"📰 Article: https://example.com/article\n"
	"🌐 Website: https://en.wikipedia.org/wiki/..."
	),
	lines=5,
	max_lines=6,
	show_label=False,
	)
	gr.HTML(
	"<p class='url-hint'>"
	"✅ Works with: YouTube (with captions), news articles, "
	"blogs, Wikipedia, most public pages.<br>"
	"❌ Won't work: paywalled or login-required pages."
	"</p>"
	)
	url_btn = gr.Button(
	"🎙️ Generate Audio",
	variant="primary",
	size="lg",
	)

	# ── Tab 3: Paste text ─────────────────────────────────────
	with gr.Tab("📋 Paste Text"):
	paste_input = gr.Textbox(
	label=None,
	placeholder=(
	"Paste any text here — article content, notes, "
	"transcripts, research, anything…"
	),
	lines=10,
	max_lines=40,
	show_label=False,
	)
	paste_btn = gr.Button(
	"🎙️ Generate Audio",
	variant="primary",
	size="lg",
	)

	# ── Delivery Mode card ────────────────────────────────────────
	gr.Markdown("### 🎨 Choose Audio Experience")

	delivery_mode = gr.Radio(
	choices=["Summary", "Podcast", "Song / Rap", "Debate", "Story"],
	value="Summary",
	show_label=False, # removes the "Radio" label
	elem_id="delivery-mode-radio",
	)

	# Song/Rap sub-option — hidden unless Song/Rap is selected
	with gr.Row(visible=False) as song_rap_row:
	song_rap_sub = gr.Radio(
	choices=["Song", "Rap"],
	value="Rap",
	label="Style",
	)

	mode_description = gr.Markdown(value=_mode_description("Summary"))

	# ── RIGHT COLUMN ──────────────────────────────────────────────────
	with gr.Column(scale=1):

	gr.Markdown("### 🎧 Generated Audio")
	audio_output = gr.Audio(
	label="Audio",
	type="filepath",
	interactive=False,
	show_download_button=True,
	)

	gr.Markdown("### ✍️ Generated Script")
	script_output = gr.Textbox(
	label="Script",
	lines=14,
	max_lines=22,
	interactive=False,
	placeholder="Your generated script will appear here…",
	show_copy_button=True,
	)

	# ── Footer ───────────────────────────────────────────────────────────
	gr.Markdown(
	"<center style='color:#9ca3af;margin-top:1rem;'>"
	"Built with ❤️ using SmolLM3-3B · Qwen3-TTS · Edge-TTS · Gradio"
	"</center>"
	)

	# ── Event wiring ─────────────────────────────────────────────────────

	delivery_mode.change(
	fn=_on_mode_change,
	inputs=[delivery_mode],
	outputs=[song_rap_row, mode_description],
	)
	file_btn.click(
	fn=process_file,
	inputs=[file_input, delivery_mode, song_rap_sub],
	outputs=[script_output, audio_output],
	)
	url_btn.click(
	fn=process_url,
	inputs=[url_input, delivery_mode, song_rap_sub],
	outputs=[script_output, audio_output],
	)
	paste_btn.click(
	fn=process_paste,
	inputs=[paste_input, delivery_mode, song_rap_sub],
	outputs=[script_output, audio_output],
	)

	return app


	# ── Entry point ───────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	logger.info("Starting VoiceVerse AI…")
	app = build_ui()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	)