Spaces:

ktejeshnaidu
/

DocuMind_hf

Running

File size: 8,250 Bytes


import streamlit as st
import json
import os
import time
import requests

# ============================================================================
# STREAMLIT CONFIG (Must be first)
# ============================================================================
st.set_page_config(
    page_title="DocuMind - RAG ChatBOT",
    page_icon="📃",
    layout="wide",
    initial_sidebar_state="expanded"
)

# ============================================================================
# API CONFIGURATION
# ============================================================================
# Use localhost for internal communication
API_URL = "http://127.0.0.1:8000"
API_TIMEOUT = 30  # seconds

# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def check_backend_health(retries=5):
    """Check if backend API is running"""
    for attempt in range(retries):
        try:
            response = requests.get(f"{API_URL}/docs", timeout=5)
            if response.status_code == 200:
                return True
        except:
            if attempt < retries - 1:
                time.sleep(1)
    return False

def safe_api_call(method, endpoint, **kwargs):
    """Safely call API with error handling"""
    try:
        url = f"{API_URL}{endpoint}"
        kwargs.setdefault('timeout', API_TIMEOUT)
        
        if method == "GET":
            response = requests.get(url, **kwargs)
        elif method == "POST":
            response = requests.post(url, **kwargs)
        else:
            return None, "Invalid method"
        
        return response, None
    except requests.exceptions.Timeout:
        return None, "⏱️ Request timed out"
    except requests.exceptions.ConnectionError:
        return None, "⚠️ Backend not responding"
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

# ============================================================================
# STREAMLIT FRONTEND UI
# ============================================================================
st.title("📃 DocuMind")
st.markdown("**Enterprise Document Intelligence Chatbot**")
st.markdown("---")

# Health check on load
if "backend_checked" not in st.session_state:
    st.session_state.backend_checked = False
    st.session_state.backend_healthy = False

if not st.session_state.backend_checked:
    with st.spinner("🔄 Starting backend..."):
        time.sleep(2)  # Give backend time to start
        st.session_state.backend_healthy = check_backend_health()
    st.session_state.backend_checked = True

# Show status
if not st.session_state.backend_healthy:
    st.warning(
        "⚠️ Backend is starting up. This may take 30-60 seconds on first load. "
        "Please refresh the page if you see this message for more than 1 minute."
    )

# --- Sidebar for Document Upload ---
with st.sidebar:
    st.header("🏢 Document Knowledge Base")
    st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")
    
    uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])
    
    if uploaded_file and st.button("Ingest Document", key="ingest_btn"):
        with st.spinner("Ingesting document (creating chunks & embeddings)..."):
            files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
            response, error = safe_api_call("POST", "/ingest", files=files)
            
            if error:
                st.error(error)
            elif response and response.status_code == 200:
                st.success(f"✅ {uploaded_file.name} ingested successfully!")
            else:
                st.error(f"❌ Failed to ingest: {response.text if response else 'Unknown error'}")
    
    st.divider()
    
    st.subheader("📄 Indexed Documents")
    response, error = safe_api_call("GET", "/sources")
    
    if error:
        st.warning(f"Could not fetch documents: {error}")
    elif response and response.status_code == 200:
        documents = response.json().get("documents", [])
        if documents:
            for doc in documents:
                st.markdown(f"- `{doc}`")
        else:
            st.info("No documents indexed yet.")
    else:
        st.info("Backend not ready yet...")

# --- Main Chat Interface ---
#st.subheader("💬 Chat with Your Documents")

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat history
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])
        if "sources" in msg and msg.get("sources"):
            with st.expander("📚 Show Sources"):
                for idx, src in enumerate(msg["sources"]):
                    score = src.get('score', 0)
                    st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
                    st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
                    content = src.get('content', '')
                    if len(content) > 500:
                        st.markdown(f"> {content[:500]}...")
                    else:
                        st.markdown(f"> {content}")

# Chat input
user_input = st.chat_input("Ask a question about your documents...")

if user_input:
    # Add user message to history
    st.session_state.messages.append({"role": "user", "content": user_input})
    
    # Display user message
    with st.chat_message("user"):
        st.markdown(user_input)
    
    # Get assistant response
    with st.chat_message("assistant"):
        placeholder = st.empty()
        full_response = ""
        sources = []
        
        response, error = safe_api_call(
            "POST",
            "/query",
            json={"question": user_input},
            stream=True
        )
        
        if error:
            error_msg = error
            st.error(error_msg)
            full_response = error_msg
        elif response:
            try:
                for line in response.iter_lines():
                    if line:
                        try:
                            decoded_line = line.decode('utf-8')
                            data = json.loads(decoded_line)
                            
                            if data.get("type") == "sources":
                                sources = data.get("data", [])
                            elif data.get("type") == "token":
                                full_response += data.get("content", "")
                                placeholder.markdown(full_response + "▌")
                        except json.JSONDecodeError:
                            continue
                
                placeholder.markdown(full_response)
                
                # Display sources if available
                if sources:
                    with st.expander("📚 Show Sources"):
                        for idx, src in enumerate(sources):
                            score = src.get('score', 0)
                            st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
                            st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
                            content = src.get('content', '')
                            if len(content) > 500:
                                st.markdown(f"> {content[:500]}...")
                            else:
                                st.markdown(f"> {content}")
            
            except Exception as e:
                error_msg = f"❌ Error processing response: {str(e)}"
                placeholder.markdown(error_msg)
                st.error(error_msg)
                full_response = error_msg
    
    # Save assistant message
    st.session_state.messages.append({
        "role": "assistant",
        "content": full_response,
        "sources": sources
    })

# Footer
st.divider()
st.markdown(
    "<div style='text-align: center; color: var(--color-text-secondary); font-size: 0.85em;'>"
    "DocuMind - Enterprise RAG Chatbot | "
    "<a href='https://github.com/TejeshNaiduKona/DocuMind' target='_blank'>GitHub</a>"
    "</div>",
    unsafe_allow_html=True
)