Spaces:

Isshi14
/

voiceverse-ai

Sleeping

App Files Files Community

voiceverse-ai / app.py

Isshi14

Upload 8 files

7abe9d2 verified 3 months ago

raw

history blame contribute delete

9.64 kB

	"""
	VoiceVerse AI — Main Application.

	Gradio-based UI that orchestrates the full document-to-audio pipeline:
	1. Upload PDF/TXT → extract text
	2. RAG: chunk, embed, retrieve relevant context
	3. Generate a spoken-style script via Mistral-7B-Instruct
	4. Convert script to expressive audio via Qwen TTS / Edge-TTS
	5. Play audio in the browser

	Entry point for Hugging Face Spaces deployment.
	"""

	import os
	import gradio as gr
	from utils import logger, validate_file, format_error
	from rag import extract_text, RAGStore
	from script_gen import generate_script
	from tts import generate_audio

	# ── Global RAG Store (single-user demo) ──────────────────────────────────────
	rag_store = RAGStore()


	# ── Pipeline Orchestration ───────────────────────────────────────────────────

	def process_document(file, progress=gr.Progress()):
	"""
	Full pipeline: upload → extract → RAG → script → audio.

	Args:
	file: Gradio uploaded file object (has .name attribute)

	Returns:
	Tuple of (script_text, audio_file_path, status_message)
	"""
	# ── Step 0: Validate ─────────────────────────────────────────────────
	if file is None:
	raise gr.Error("Please upload a PDF or TXT file first.")

	file_path = file.name if hasattr(file, "name") else str(file)
	is_valid, msg = validate_file(file_path)
	if not is_valid:
	raise gr.Error(msg)

	try:
	# ── Step 1: Extract Text ─────────────────────────────────────────
	progress(0.1, desc="📄 Extracting text from document...")
	logger.info("Processing file: %s", file_path)

	text = extract_text(file_path)
	if not text or len(text.strip()) < 50:
	raise gr.Error(
	"The document contains too little text to generate audio. "
	"Please upload a document with more content."
	)

	progress(0.2, desc="✅ Text extracted successfully")

	# ── Step 2: RAG — Chunk & Embed ──────────────────────────────────
	progress(0.3, desc="🧠 Processing document with AI...")
	rag_store.add_document(text)

	chunk_count = len(rag_store.chunks)
	logger.info("Document processed: %d chunks created", chunk_count)

	# ── Step 3: Retrieve Context ─────────────────────────────────────
	progress(0.4, desc="🔍 Retrieving key content...")

	# For short documents, use all chunks; for longer ones, retrieve smartly
	if chunk_count <= 8:
	context_chunks = rag_store.get_all_chunks()
	else:
	context_chunks = rag_store.query(
	"What are the main topics, key insights, and important details?",
	top_k=6,
	)

	progress(0.5, desc="✅ Context retrieved")

	# ── Step 4: Generate Script ──────────────────────────────────────
	progress(0.6, desc="✍️ Writing spoken script...")

	script = generate_script(context_chunks)
	logger.info("Script generated: %d characters", len(script))

	progress(0.75, desc="✅ Script ready")

	# ── Step 5: Generate Audio ───────────────────────────────────────
	progress(0.8, desc="🎙️ Generating expressive audio...")

	audio_path, engine = generate_audio(script)
	logger.info("Audio generated via %s: %s", engine, audio_path)

	progress(1.0, desc="✅ Audio ready!")

	# ── Build status message ─────────────────────────────────────────
	status = (
	f"✅ Generation complete!\n\n"
	f"- 📄 Document: {os.path.basename(file_path)}\n"
	f"- 📝 Text extracted: {len(text):,} characters\n"
	f"- 🧩 Chunks created: {chunk_count}\n"
	f"- ✍️ Script length: {len(script):,} characters\n"
	f"- 🎙️ Voice engine: {engine}\n"
	)

	return script, audio_path, status

	except gr.Error:
	raise # Re-raise Gradio errors as-is
	except EnvironmentError as e:
	raise gr.Error(str(e))
	except Exception as e:
	error_msg = format_error("pipeline", e)
	raise gr.Error(error_msg)


	# ── Gradio UI ────────────────────────────────────────────────────────────────

	def build_ui() -> gr.Blocks:
	"""Build and return the Gradio Blocks interface."""

	# Custom CSS for a clean, polished look
	css = """
	.main-header {
	text-align: center;
	margin-bottom: 1rem;
	}
	.main-header h1 {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.5rem;
	font-weight: 800;
	margin-bottom: 0.25rem;
	}
	.main-header p {
	color: #6b7280;
	font-size: 1.1rem;
	}
	.status-box {
	border-left: 3px solid #667eea;
	padding-left: 1rem;
	margin: 0.5rem 0;
	}
	"""

	with gr.Blocks(
	title="VoiceVerse AI — Document to Audio",
	theme=gr.themes.Soft(
	primary_hue="indigo",
	secondary_hue="purple",
	),
	css=css,
	) as app:

	# ── Header ───────────────────────────────────────────────────────
	gr.HTML("""
	<div class="main-header">
	<h1>🎙️ VoiceVerse AI</h1>
	<p>Transform your documents into engaging podcast-style audio</p>
	</div>
	""")

	with gr.Row():
	# ── Left Column: Input ───────────────────────────────────────
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Document")

	file_input = gr.File(
	label="Upload a PDF or TXT file",
	file_types=[".pdf", ".txt"],
	type="filepath",
	elem_id="file-upload",
	)

	generate_btn = gr.Button(
	"🎙️ Generate Audio",
	variant="primary",
	size="lg",
	elem_id="generate-btn",
	)

	status_output = gr.Markdown(
	value="Upload a document and click Generate to start.",
	elem_classes=["status-box"],
	)

	# ── Right Column: Output ─────────────────────────────────────
	with gr.Column(scale=1):
	gr.Markdown("### 🎧 Generated Audio")

	audio_output = gr.Audio(
	label="Audio Narration",
	type="filepath",
	elem_id="audio-player",
	interactive=False,
	)

	gr.Markdown("### ✍️ Generated Script")

	script_output = gr.Textbox(
	label="Spoken Script",
	lines=12,
	max_lines=20,
	interactive=False,
	placeholder="The generated script will appear here...",
	elem_id="script-display",
	)

	# ── Wire up the generate button ──────────────────────────────────
	generate_btn.click(
	fn=process_document,
	inputs=[file_input],
	outputs=[script_output, audio_output, status_output],
	)

	# ── Footer ───────────────────────────────────────────────────────
	gr.Markdown(
	"<center style='color: #9ca3af; margin-top: 1rem;'>"
	"Built with ❤️ using Mistral-7B-Instruct · Qwen3-TTS · Edge-TTS · Gradio"
	"</center>"
	)

	return app


	# ── Entry Point ──────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	logger.info("Starting VoiceVerse AI...")

	app = build_ui()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	)