Spaces:

ktejeshnaidu
/

DocuMind_hf

Running

App Files Files Community

ktejeshnaidu commited on Apr 2

Commit

e8be885

verified ·

1 Parent(s): c20b064

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -44

app.py CHANGED Viewed

@@ -1,63 +1,146 @@
 import streamlit as st
 import requests
 import json
 import os
-st.set_page_config(page_title="DocuMind - Enterprise RAG", page_icon="🧠", layout="wide")
-API_URL = os.environ.get("API_URL", "http://127.0.0.1:8000")
 st.title("🧠 DocuMind")
-st.markdown("Enterprise Document Intelligence Chatbot")
-# --- Sidebar ---
 with st.sidebar:
     st.header("🏢 Document Knowledge Base")
     st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")
     uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])
-    if uploaded_file and st.button("Ingest Document"):
         with st.spinner("Ingesting document (creating chunks & embeddings)..."):
             files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
             try:
-                res = requests.post(f"{API_URL}/ingest", files=files)
                 if res.status_code == 200:
-                    st.success(f"{uploaded_file.name} ingested successfully!")
                 else:
-                    st.error(f"Failed to ingest: {res.text}")
             except Exception as e:
-                st.error(f"Backend is not running: {e}")
     st.divider()
-    st.subheader("Indexed Documents")
     try:
-        res = requests.get(f"{API_URL}/sources")
         if res.status_code == 200:
-            for doc in res.json().get("documents", []):
-                st.markdown(f"- 📄 `{doc}`")
-    except:
-        st.warning("Could not connect to FastAPI server.")
-# --- Chat Interface ---
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# Display history
 for msg in st.session_state.messages:
     with st.chat_message(msg["role"]):
         st.markdown(msg["content"])
-        if "sources" in msg and msg["sources"]:
-            with st.expander("Show Sources"):
                 for idx, src in enumerate(msg["sources"]):
-                    st.caption(f"**Source {idx+1} [Relevance: {src['score']:.2f}]**: {src['source']}")
-                    st.markdown(f"> {src['content']}")
-if user_input := st.chat_input("Ask a question about your documents..."):
-    # Add user message
     st.session_state.messages.append({"role": "user", "content": user_input})
     with st.chat_message("user"):
         st.markdown(user_input)
     # Get assistant response
     with st.chat_message("assistant"):
         placeholder = st.empty()
@@ -65,32 +148,65 @@ if user_input := st.chat_input("Ask a question about your documents..."):
         sources = []
         try:
-            with requests.post(f"{API_URL}/query", json={"question": user_input}, stream=True) as r:
                 r.raise_for_status()
                 for line in r.iter_lines():
                     if line:
-                        decoded_line = line.decode('utf-8')
-                        data = json.loads(decoded_line)
-                        if data["type"] == "sources":
-                            sources = data["data"]
-                        elif data["type"] == "token":
-                            full_response += data["content"]
-                            placeholder.markdown(full_response + "▌")
-            placeholder.markdown(full_response)
-            if sources:
-                with st.expander("Show Sources"):
-                    for idx, src in enumerate(sources):
-                        st.caption(f"**Source {idx+1} [Relevance: {src['score']:.2f}]**: {src['source']}")
-                        st.markdown(f"> {src['content']}")
         except Exception as e:
-            st.error(f"Error querying backend: {e}")
-            full_response = "Sorry, the backend encountered an error."
     # Save assistant message
     st.session_state.messages.append({
-        "role": "assistant",
         "content": full_response,
         "sources": sources
     })

+"""
+DocuMind - Unified FastAPI + Streamlit App for HuggingFace Spaces
+Combines backend API and frontend UI in a single process
+"""
 import streamlit as st
 import requests
 import json
 import os
+import subprocess
+import time
+import sys
+from threading import Thread
+from pathlib import Path
+# ============================================================================
+# STREAMLIT CONFIG (Must be first)
+# ============================================================================
+st.set_page_config(
+    page_title="DocuMind - Enterprise RAG",
+    page_icon="🧠",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# ============================================================================
+# BACKEND STARTUP
+# ============================================================================
+@st.cache_resource
+def start_backend_server():
+    """Start FastAPI backend in a separate thread"""
+    def run_server():
+        # Import FastAPI backend components
+        from main import app as fastapi_app
+        import uvicorn
+        # Run uvicorn server
+        uvicorn.run(
+            fastapi_app,
+            host="127.0.0.1",
+            port=8000,
+            log_level="warning",
+            access_log=False
+        )
+    # Start backend in daemon thread
+    backend_thread = Thread(target=run_server, daemon=True)
+    backend_thread.start()
+    # Give backend time to start
+    time.sleep(2)
+    return "Backend started"
+# Start backend
+try:
+    start_backend_server()
+    API_URL = "http://127.0.0.1:8000"
+except Exception as e:
+    st.error(f"Failed to start backend: {e}")
+    st.stop()
+# ============================================================================
+# STREAMLIT FRONTEND UI
+# ============================================================================
 st.title("🧠 DocuMind")
+st.markdown("**Enterprise Document Intelligence Chatbot**")
+st.markdown("---")
+# --- Sidebar for Document Upload ---
 with st.sidebar:
     st.header("🏢 Document Knowledge Base")
     st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")
     uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])
+    if uploaded_file and st.button("Ingest Document", key="ingest_btn"):
         with st.spinner("Ingesting document (creating chunks & embeddings)..."):
             files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
             try:
+                res = requests.post(f"{API_URL}/ingest", files=files, timeout=30)
                 if res.status_code == 200:
+                    st.success(f"✅ {uploaded_file.name} ingested successfully!")
                 else:
+                    st.error(f"❌ Failed to ingest: {res.text}")
+            except requests.exceptions.ConnectionError:
+                st.error("⚠️ Backend is not running. Please refresh the page.")
             except Exception as e:
+                st.error(f"⚠️ Error: {str(e)}")
     st.divider()
+    st.subheader("📄 Indexed Documents")
     try:
+        res = requests.get(f"{API_URL}/sources", timeout=10)
         if res.status_code == 200:
+            documents = res.json().get("documents", [])
+            if documents:
+                for doc in documents:
+                    st.markdown(f"- `{doc}`")
+            else:
+                st.info("No documents indexed yet.")
+        else:
+            st.warning("Could not fetch documents list")
+    except requests.exceptions.ConnectionError:
+        st.warning("⚠️ Could not connect to backend")
+    except Exception as e:
+        st.warning(f"⚠️ Error: {str(e)}")
+# --- Main Chat Interface ---
+st.subheader("💬 Chat with Your Documents")
+# Initialize session state
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# Display chat history
 for msg in st.session_state.messages:
     with st.chat_message(msg["role"]):
         st.markdown(msg["content"])
+        if "sources" in msg and msg.get("sources"):
+            with st.expander("📚 Show Sources"):
                 for idx, src in enumerate(msg["sources"]):
+                    score = src.get('score', 0)
+                    st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
+                    st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
+                    st.markdown(f"> {src.get('content', '')[:500]}...")
+# Chat input
+user_input = st.chat_input("Ask a question about your documents...")
+if user_input:
+    # Add user message to history
     st.session_state.messages.append({"role": "user", "content": user_input})
+    # Display user message
     with st.chat_message("user"):
         st.markdown(user_input)
     # Get assistant response
     with st.chat_message("assistant"):
         placeholder = st.empty()
         sources = []
         try:
+            with requests.post(
+                f"{API_URL}/query",
+                json={"question": user_input},
+                stream=True,
+                timeout=60
+            ) as r:
                 r.raise_for_status()
                 for line in r.iter_lines():
                     if line:
+                        try:
+                            decoded_line = line.decode('utf-8')
+                            data = json.loads(decoded_line)
+                            if data.get("type") == "sources":
+                                sources = data.get("data", [])
+                            elif data.get("type") == "token":
+                                full_response += data.get("content", "")
+                                placeholder.markdown(full_response + "▌")
+                        except json.JSONDecodeError:
+                            continue
+                placeholder.markdown(full_response)
+                # Display sources if available
+                if sources:
+                    with st.expander("📚 Show Sources"):
+                        for idx, src in enumerate(sources):
+                            score = src.get('score', 0)
+                            st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
+                            st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
+                            st.markdown(f"> {src.get('content', '')[:500]}...")
+        except requests.exceptions.Timeout:
+            error_msg = "⏱️ Request timed out. Please try a shorter question."
+            st.error(error_msg)
+            full_response = error_msg
+        except requests.exceptions.ConnectionError:
+            error_msg = "⚠️ Backend connection failed. Please refresh the page."
+            st.error(error_msg)
+            full_response = error_msg
         except Exception as e:
+            error_msg = f"❌ Error: {str(e)}"
+            st.error(error_msg)
+            full_response = error_msg
     # Save assistant message
     st.session_state.messages.append({
+        "role": "assistant",
         "content": full_response,
         "sources": sources
     })
+# Footer
+st.divider()
+st.markdown(
+    "<div style='text-align: center; color: gray; font-size: 0.8em;'>"
+    "DocuMind - Enterprise RAG Chatbot | "
+    "<a href='https://github.com/TejeshNaiduKona/DocuMind' target='_blank'>GitHub</a>"
+    "</div>",
+    unsafe_allow_html=True
+)