Spaces:

ktejeshnaidu
/

DocuMind_hf

Running

App Files Files Community

DocuMind_hf / app.py

ktejeshnaidu

Update app.py

7e788d0 verified about 2 months ago

raw

history blame contribute delete

8.25 kB


	import streamlit as st
	import json
	import os
	import time
	import requests

	# ============================================================================
	# STREAMLIT CONFIG (Must be first)
	# ============================================================================
	st.set_page_config(
	page_title="DocuMind - RAG ChatBOT",
	page_icon="📃",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# ============================================================================
	# API CONFIGURATION
	# ============================================================================
	# Use localhost for internal communication
	API_URL = "http://127.0.0.1:8000"
	API_TIMEOUT = 30 # seconds

	# ============================================================================
	# UTILITY FUNCTIONS
	# ============================================================================
	def check_backend_health(retries=5):
	"""Check if backend API is running"""
	for attempt in range(retries):
	try:
	response = requests.get(f"{API_URL}/docs", timeout=5)
	if response.status_code == 200:
	return True
	except:
	if attempt < retries - 1:
	time.sleep(1)
	return False

	def safe_api_call(method, endpoint, **kwargs):
	"""Safely call API with error handling"""
	try:
	url = f"{API_URL}{endpoint}"
	kwargs.setdefault('timeout', API_TIMEOUT)

	if method == "GET":
	response = requests.get(url, **kwargs)
	elif method == "POST":
	response = requests.post(url, **kwargs)
	else:
	return None, "Invalid method"

	return response, None
	except requests.exceptions.Timeout:
	return None, "⏱️ Request timed out"
	except requests.exceptions.ConnectionError:
	return None, "⚠️ Backend not responding"
	except Exception as e:
	return None, f"❌ Error: {str(e)}"

	# ============================================================================
	# STREAMLIT FRONTEND UI
	# ============================================================================
	st.title("📃 DocuMind")
	st.markdown("Enterprise Document Intelligence Chatbot")
	st.markdown("---")

	# Health check on load
	if "backend_checked" not in st.session_state:
	st.session_state.backend_checked = False
	st.session_state.backend_healthy = False

	if not st.session_state.backend_checked:
	with st.spinner("🔄 Starting backend..."):
	time.sleep(2) # Give backend time to start
	st.session_state.backend_healthy = check_backend_health()
	st.session_state.backend_checked = True

	# Show status
	if not st.session_state.backend_healthy:
	st.warning(
	"⚠️ Backend is starting up. This may take 30-60 seconds on first load. "
	"Please refresh the page if you see this message for more than 1 minute."
	)

	# --- Sidebar for Document Upload ---
	with st.sidebar:
	st.header("🏢 Document Knowledge Base")
	st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")

	uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])

	if uploaded_file and st.button("Ingest Document", key="ingest_btn"):
	with st.spinner("Ingesting document (creating chunks & embeddings)..."):
	files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
	response, error = safe_api_call("POST", "/ingest", files=files)

	if error:
	st.error(error)
	elif response and response.status_code == 200:
	st.success(f"✅ {uploaded_file.name} ingested successfully!")
	else:
	st.error(f"❌ Failed to ingest: {response.text if response else 'Unknown error'}")

	st.divider()

	st.subheader("📄 Indexed Documents")
	response, error = safe_api_call("GET", "/sources")

	if error:
	st.warning(f"Could not fetch documents: {error}")
	elif response and response.status_code == 200:
	documents = response.json().get("documents", [])
	if documents:
	for doc in documents:
	st.markdown(f"- `{doc}`")
	else:
	st.info("No documents indexed yet.")
	else:
	st.info("Backend not ready yet...")

	# --- Main Chat Interface ---
	#st.subheader("💬 Chat with Your Documents")

	# Initialize session state
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])
	if "sources" in msg and msg.get("sources"):
	with st.expander("📚 Show Sources"):
	for idx, src in enumerate(msg["sources"]):
	score = src.get('score', 0)
	st.caption(f"Source {idx+1} [Relevance: {score:.2%}]")
	st.markdown(f"From: `{src.get('source', 'Unknown')}`")
	content = src.get('content', '')
	if len(content) > 500:
	st.markdown(f"> {content[:500]}...")
	else:
	st.markdown(f"> {content}")

	# Chat input
	user_input = st.chat_input("Ask a question about your documents...")

	if user_input:
	# Add user message to history
	st.session_state.messages.append({"role": "user", "content": user_input})

	# Display user message
	with st.chat_message("user"):
	st.markdown(user_input)

	# Get assistant response
	with st.chat_message("assistant"):
	placeholder = st.empty()
	full_response = ""
	sources = []

	response, error = safe_api_call(
	"POST",
	"/query",
	json={"question": user_input},
	stream=True
	)

	if error:
	error_msg = error
	st.error(error_msg)
	full_response = error_msg
	elif response:
	try:
	for line in response.iter_lines():
	if line:
	try:
	decoded_line = line.decode('utf-8')
	data = json.loads(decoded_line)

	if data.get("type") == "sources":
	sources = data.get("data", [])
	elif data.get("type") == "token":
	full_response += data.get("content", "")
	placeholder.markdown(full_response + "▌")
	except json.JSONDecodeError:
	continue

	placeholder.markdown(full_response)

	# Display sources if available
	if sources:
	with st.expander("📚 Show Sources"):
	for idx, src in enumerate(sources):
	score = src.get('score', 0)
	st.caption(f"Source {idx+1} [Relevance: {score:.2%}]")
	st.markdown(f"From: `{src.get('source', 'Unknown')}`")
	content = src.get('content', '')
	if len(content) > 500:
	st.markdown(f"> {content[:500]}...")
	else:
	st.markdown(f"> {content}")

	except Exception as e:
	error_msg = f"❌ Error processing response: {str(e)}"
	placeholder.markdown(error_msg)
	st.error(error_msg)
	full_response = error_msg

	# Save assistant message
	st.session_state.messages.append({
	"role": "assistant",
	"content": full_response,
	"sources": sources
	})

	# Footer
	st.divider()
	st.markdown(
	"<div style='text-align: center; color: var(--color-text-secondary); font-size: 0.85em;'>"
	"DocuMind - Enterprise RAG Chatbot \| "
	"<a href='https://github.com/TejeshNaiduKona/DocuMind' target='_blank'>GitHub</a>"
	"</div>",
	unsafe_allow_html=True
	)