import streamlit as st import json import os import time import requests # ============================================================================ # STREAMLIT CONFIG (Must be first) # ============================================================================ st.set_page_config( page_title="DocuMind - RAG ChatBOT", page_icon="📃", layout="wide", initial_sidebar_state="expanded" ) # ============================================================================ # API CONFIGURATION # ============================================================================ # Use localhost for internal communication API_URL = "http://127.0.0.1:8000" API_TIMEOUT = 30 # seconds # ============================================================================ # UTILITY FUNCTIONS # ============================================================================ def check_backend_health(retries=5): """Check if backend API is running""" for attempt in range(retries): try: response = requests.get(f"{API_URL}/docs", timeout=5) if response.status_code == 200: return True except: if attempt < retries - 1: time.sleep(1) return False def safe_api_call(method, endpoint, **kwargs): """Safely call API with error handling""" try: url = f"{API_URL}{endpoint}" kwargs.setdefault('timeout', API_TIMEOUT) if method == "GET": response = requests.get(url, **kwargs) elif method == "POST": response = requests.post(url, **kwargs) else: return None, "Invalid method" return response, None except requests.exceptions.Timeout: return None, "⏱️ Request timed out" except requests.exceptions.ConnectionError: return None, "⚠️ Backend not responding" except Exception as e: return None, f"❌ Error: {str(e)}" # ============================================================================ # STREAMLIT FRONTEND UI # ============================================================================ st.title("📃 DocuMind") st.markdown("**Enterprise Document Intelligence Chatbot**") st.markdown("---") # Health check on load if "backend_checked" not in st.session_state: st.session_state.backend_checked = False st.session_state.backend_healthy = False if not st.session_state.backend_checked: with st.spinner("🔄 Starting backend..."): time.sleep(2) # Give backend time to start st.session_state.backend_healthy = check_backend_health() st.session_state.backend_checked = True # Show status if not st.session_state.backend_healthy: st.warning( "⚠️ Backend is starting up. This may take 30-60 seconds on first load. " "Please refresh the page if you see this message for more than 1 minute." ) # --- Sidebar for Document Upload --- with st.sidebar: st.header("🏢 Document Knowledge Base") st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.") uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"]) if uploaded_file and st.button("Ingest Document", key="ingest_btn"): with st.spinner("Ingesting document (creating chunks & embeddings)..."): files = {"file": (uploaded_file.name, uploaded_file.getvalue())} response, error = safe_api_call("POST", "/ingest", files=files) if error: st.error(error) elif response and response.status_code == 200: st.success(f"✅ {uploaded_file.name} ingested successfully!") else: st.error(f"❌ Failed to ingest: {response.text if response else 'Unknown error'}") st.divider() st.subheader("📄 Indexed Documents") response, error = safe_api_call("GET", "/sources") if error: st.warning(f"Could not fetch documents: {error}") elif response and response.status_code == 200: documents = response.json().get("documents", []) if documents: for doc in documents: st.markdown(f"- `{doc}`") else: st.info("No documents indexed yet.") else: st.info("Backend not ready yet...") # --- Main Chat Interface --- #st.subheader("💬 Chat with Your Documents") # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] # Display chat history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if "sources" in msg and msg.get("sources"): with st.expander("📚 Show Sources"): for idx, src in enumerate(msg["sources"]): score = src.get('score', 0) st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]") st.markdown(f"**From:** `{src.get('source', 'Unknown')}`") content = src.get('content', '') if len(content) > 500: st.markdown(f"> {content[:500]}...") else: st.markdown(f"> {content}") # Chat input user_input = st.chat_input("Ask a question about your documents...") if user_input: # Add user message to history st.session_state.messages.append({"role": "user", "content": user_input}) # Display user message with st.chat_message("user"): st.markdown(user_input) # Get assistant response with st.chat_message("assistant"): placeholder = st.empty() full_response = "" sources = [] response, error = safe_api_call( "POST", "/query", json={"question": user_input}, stream=True ) if error: error_msg = error st.error(error_msg) full_response = error_msg elif response: try: for line in response.iter_lines(): if line: try: decoded_line = line.decode('utf-8') data = json.loads(decoded_line) if data.get("type") == "sources": sources = data.get("data", []) elif data.get("type") == "token": full_response += data.get("content", "") placeholder.markdown(full_response + "▌") except json.JSONDecodeError: continue placeholder.markdown(full_response) # Display sources if available if sources: with st.expander("📚 Show Sources"): for idx, src in enumerate(sources): score = src.get('score', 0) st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]") st.markdown(f"**From:** `{src.get('source', 'Unknown')}`") content = src.get('content', '') if len(content) > 500: st.markdown(f"> {content[:500]}...") else: st.markdown(f"> {content}") except Exception as e: error_msg = f"❌ Error processing response: {str(e)}" placeholder.markdown(error_msg) st.error(error_msg) full_response = error_msg # Save assistant message st.session_state.messages.append({ "role": "assistant", "content": full_response, "sources": sources }) # Footer st.divider() st.markdown( "
" "DocuMind - Enterprise RAG Chatbot | " "GitHub" "
", unsafe_allow_html=True )