DocuMind_hf / app.py
ktejeshnaidu's picture
Update app.py
7e788d0 verified
import streamlit as st
import json
import os
import time
import requests
# ============================================================================
# STREAMLIT CONFIG (Must be first)
# ============================================================================
st.set_page_config(
page_title="DocuMind - RAG ChatBOT",
page_icon="πŸ“ƒ",
layout="wide",
initial_sidebar_state="expanded"
)
# ============================================================================
# API CONFIGURATION
# ============================================================================
# Use localhost for internal communication
API_URL = "http://127.0.0.1:8000"
API_TIMEOUT = 30 # seconds
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def check_backend_health(retries=5):
"""Check if backend API is running"""
for attempt in range(retries):
try:
response = requests.get(f"{API_URL}/docs", timeout=5)
if response.status_code == 200:
return True
except:
if attempt < retries - 1:
time.sleep(1)
return False
def safe_api_call(method, endpoint, **kwargs):
"""Safely call API with error handling"""
try:
url = f"{API_URL}{endpoint}"
kwargs.setdefault('timeout', API_TIMEOUT)
if method == "GET":
response = requests.get(url, **kwargs)
elif method == "POST":
response = requests.post(url, **kwargs)
else:
return None, "Invalid method"
return response, None
except requests.exceptions.Timeout:
return None, "⏱️ Request timed out"
except requests.exceptions.ConnectionError:
return None, "⚠️ Backend not responding"
except Exception as e:
return None, f"❌ Error: {str(e)}"
# ============================================================================
# STREAMLIT FRONTEND UI
# ============================================================================
st.title("πŸ“ƒ DocuMind")
st.markdown("**Enterprise Document Intelligence Chatbot**")
st.markdown("---")
# Health check on load
if "backend_checked" not in st.session_state:
st.session_state.backend_checked = False
st.session_state.backend_healthy = False
if not st.session_state.backend_checked:
with st.spinner("πŸ”„ Starting backend..."):
time.sleep(2) # Give backend time to start
st.session_state.backend_healthy = check_backend_health()
st.session_state.backend_checked = True
# Show status
if not st.session_state.backend_healthy:
st.warning(
"⚠️ Backend is starting up. This may take 30-60 seconds on first load. "
"Please refresh the page if you see this message for more than 1 minute."
)
# --- Sidebar for Document Upload ---
with st.sidebar:
st.header("🏒 Document Knowledge Base")
st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")
uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])
if uploaded_file and st.button("Ingest Document", key="ingest_btn"):
with st.spinner("Ingesting document (creating chunks & embeddings)..."):
files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
response, error = safe_api_call("POST", "/ingest", files=files)
if error:
st.error(error)
elif response and response.status_code == 200:
st.success(f"βœ… {uploaded_file.name} ingested successfully!")
else:
st.error(f"❌ Failed to ingest: {response.text if response else 'Unknown error'}")
st.divider()
st.subheader("πŸ“„ Indexed Documents")
response, error = safe_api_call("GET", "/sources")
if error:
st.warning(f"Could not fetch documents: {error}")
elif response and response.status_code == 200:
documents = response.json().get("documents", [])
if documents:
for doc in documents:
st.markdown(f"- `{doc}`")
else:
st.info("No documents indexed yet.")
else:
st.info("Backend not ready yet...")
# --- Main Chat Interface ---
#st.subheader("πŸ’¬ Chat with Your Documents")
# Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat history
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if "sources" in msg and msg.get("sources"):
with st.expander("πŸ“š Show Sources"):
for idx, src in enumerate(msg["sources"]):
score = src.get('score', 0)
st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
content = src.get('content', '')
if len(content) > 500:
st.markdown(f"> {content[:500]}...")
else:
st.markdown(f"> {content}")
# Chat input
user_input = st.chat_input("Ask a question about your documents...")
if user_input:
# Add user message to history
st.session_state.messages.append({"role": "user", "content": user_input})
# Display user message
with st.chat_message("user"):
st.markdown(user_input)
# Get assistant response
with st.chat_message("assistant"):
placeholder = st.empty()
full_response = ""
sources = []
response, error = safe_api_call(
"POST",
"/query",
json={"question": user_input},
stream=True
)
if error:
error_msg = error
st.error(error_msg)
full_response = error_msg
elif response:
try:
for line in response.iter_lines():
if line:
try:
decoded_line = line.decode('utf-8')
data = json.loads(decoded_line)
if data.get("type") == "sources":
sources = data.get("data", [])
elif data.get("type") == "token":
full_response += data.get("content", "")
placeholder.markdown(full_response + "β–Œ")
except json.JSONDecodeError:
continue
placeholder.markdown(full_response)
# Display sources if available
if sources:
with st.expander("πŸ“š Show Sources"):
for idx, src in enumerate(sources):
score = src.get('score', 0)
st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
content = src.get('content', '')
if len(content) > 500:
st.markdown(f"> {content[:500]}...")
else:
st.markdown(f"> {content}")
except Exception as e:
error_msg = f"❌ Error processing response: {str(e)}"
placeholder.markdown(error_msg)
st.error(error_msg)
full_response = error_msg
# Save assistant message
st.session_state.messages.append({
"role": "assistant",
"content": full_response,
"sources": sources
})
# Footer
st.divider()
st.markdown(
"<div style='text-align: center; color: var(--color-text-secondary); font-size: 0.85em;'>"
"DocuMind - Enterprise RAG Chatbot | "
"<a href='https://github.com/TejeshNaiduKona/DocuMind' target='_blank'>GitHub</a>"
"</div>",
unsafe_allow_html=True
)