File size: 8,250 Bytes
03033fc
5078486
f83e60c
f590c6d
e8be885
03033fc
950ba0f
 
5078486
950ba0f
e8be885
7e788d0
 
e8be885
5078486
e8be885
f83e60c
950ba0f
f590c6d
950ba0f
f590c6d
 
 
08220ff
 
f590c6d
08220ff
f590c6d
 
 
 
 
 
 
 
 
 
 
08220ff
5078486
f590c6d
08220ff
8cbbd4c
5078486
 
 
 
 
 
08220ff
5078486
f590c6d
5078486
f590c6d
 
 
 
5078486
 
08220ff
950ba0f
f590c6d
950ba0f
7e788d0
f590c6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32f61fa
f590c6d
 
5078486
f590c6d
5078486
f590c6d
 
950ba0f
5078486
f590c6d
 
 
 
 
950ba0f
f590c6d
5078486
f590c6d
5078486
f590c6d
5078486
f590c6d
 
 
 
5078486
f590c6d
 
 
 
 
5078486
f590c6d
08220ff
f590c6d
7e788d0
950ba0f
f590c6d
 
 
67efdc2
f590c6d
08220ff
f590c6d
5078486
f590c6d
 
 
 
 
 
 
 
 
 
 
 
 
 
e8be885
 
f590c6d
5078486
f590c6d
 
 
5078486
 
f590c6d
 
5078486
 
f590c6d
5078486
 
f590c6d
 
 
5078486
08220ff
5078486
 
f590c6d
 
 
5078486
 
 
 
 
 
 
f590c6d
 
 
 
5078486
 
 
 
 
f590c6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5078486
f590c6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

import streamlit as st
import json
import os
import time
import requests

# ============================================================================
# STREAMLIT CONFIG (Must be first)
# ============================================================================
st.set_page_config(
    page_title="DocuMind - RAG ChatBOT",
    page_icon="πŸ“ƒ",
    layout="wide",
    initial_sidebar_state="expanded"
)

# ============================================================================
# API CONFIGURATION
# ============================================================================
# Use localhost for internal communication
API_URL = "http://127.0.0.1:8000"
API_TIMEOUT = 30  # seconds

# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def check_backend_health(retries=5):
    """Check if backend API is running"""
    for attempt in range(retries):
        try:
            response = requests.get(f"{API_URL}/docs", timeout=5)
            if response.status_code == 200:
                return True
        except:
            if attempt < retries - 1:
                time.sleep(1)
    return False

def safe_api_call(method, endpoint, **kwargs):
    """Safely call API with error handling"""
    try:
        url = f"{API_URL}{endpoint}"
        kwargs.setdefault('timeout', API_TIMEOUT)
        
        if method == "GET":
            response = requests.get(url, **kwargs)
        elif method == "POST":
            response = requests.post(url, **kwargs)
        else:
            return None, "Invalid method"
        
        return response, None
    except requests.exceptions.Timeout:
        return None, "⏱️ Request timed out"
    except requests.exceptions.ConnectionError:
        return None, "⚠️ Backend not responding"
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

# ============================================================================
# STREAMLIT FRONTEND UI
# ============================================================================
st.title("πŸ“ƒ DocuMind")
st.markdown("**Enterprise Document Intelligence Chatbot**")
st.markdown("---")

# Health check on load
if "backend_checked" not in st.session_state:
    st.session_state.backend_checked = False
    st.session_state.backend_healthy = False

if not st.session_state.backend_checked:
    with st.spinner("πŸ”„ Starting backend..."):
        time.sleep(2)  # Give backend time to start
        st.session_state.backend_healthy = check_backend_health()
    st.session_state.backend_checked = True

# Show status
if not st.session_state.backend_healthy:
    st.warning(
        "⚠️ Backend is starting up. This may take 30-60 seconds on first load. "
        "Please refresh the page if you see this message for more than 1 minute."
    )

# --- Sidebar for Document Upload ---
with st.sidebar:
    st.header("🏒 Document Knowledge Base")
    st.markdown("Upload PDFs, DOCX, or TXT documents to add them to the system.")
    
    uploaded_file = st.file_uploader("Upload a new document", type=["txt", "pdf", "docx"])
    
    if uploaded_file and st.button("Ingest Document", key="ingest_btn"):
        with st.spinner("Ingesting document (creating chunks & embeddings)..."):
            files = {"file": (uploaded_file.name, uploaded_file.getvalue())}
            response, error = safe_api_call("POST", "/ingest", files=files)
            
            if error:
                st.error(error)
            elif response and response.status_code == 200:
                st.success(f"βœ… {uploaded_file.name} ingested successfully!")
            else:
                st.error(f"❌ Failed to ingest: {response.text if response else 'Unknown error'}")
    
    st.divider()
    
    st.subheader("πŸ“„ Indexed Documents")
    response, error = safe_api_call("GET", "/sources")
    
    if error:
        st.warning(f"Could not fetch documents: {error}")
    elif response and response.status_code == 200:
        documents = response.json().get("documents", [])
        if documents:
            for doc in documents:
                st.markdown(f"- `{doc}`")
        else:
            st.info("No documents indexed yet.")
    else:
        st.info("Backend not ready yet...")

# --- Main Chat Interface ---
#st.subheader("πŸ’¬ Chat with Your Documents")

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat history
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])
        if "sources" in msg and msg.get("sources"):
            with st.expander("πŸ“š Show Sources"):
                for idx, src in enumerate(msg["sources"]):
                    score = src.get('score', 0)
                    st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
                    st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
                    content = src.get('content', '')
                    if len(content) > 500:
                        st.markdown(f"> {content[:500]}...")
                    else:
                        st.markdown(f"> {content}")

# Chat input
user_input = st.chat_input("Ask a question about your documents...")

if user_input:
    # Add user message to history
    st.session_state.messages.append({"role": "user", "content": user_input})
    
    # Display user message
    with st.chat_message("user"):
        st.markdown(user_input)
    
    # Get assistant response
    with st.chat_message("assistant"):
        placeholder = st.empty()
        full_response = ""
        sources = []
        
        response, error = safe_api_call(
            "POST",
            "/query",
            json={"question": user_input},
            stream=True
        )
        
        if error:
            error_msg = error
            st.error(error_msg)
            full_response = error_msg
        elif response:
            try:
                for line in response.iter_lines():
                    if line:
                        try:
                            decoded_line = line.decode('utf-8')
                            data = json.loads(decoded_line)
                            
                            if data.get("type") == "sources":
                                sources = data.get("data", [])
                            elif data.get("type") == "token":
                                full_response += data.get("content", "")
                                placeholder.markdown(full_response + "β–Œ")
                        except json.JSONDecodeError:
                            continue
                
                placeholder.markdown(full_response)
                
                # Display sources if available
                if sources:
                    with st.expander("πŸ“š Show Sources"):
                        for idx, src in enumerate(sources):
                            score = src.get('score', 0)
                            st.caption(f"**Source {idx+1}** [Relevance: {score:.2%}]")
                            st.markdown(f"**From:** `{src.get('source', 'Unknown')}`")
                            content = src.get('content', '')
                            if len(content) > 500:
                                st.markdown(f"> {content[:500]}...")
                            else:
                                st.markdown(f"> {content}")
            
            except Exception as e:
                error_msg = f"❌ Error processing response: {str(e)}"
                placeholder.markdown(error_msg)
                st.error(error_msg)
                full_response = error_msg
    
    # Save assistant message
    st.session_state.messages.append({
        "role": "assistant",
        "content": full_response,
        "sources": sources
    })

# Footer
st.divider()
st.markdown(
    "<div style='text-align: center; color: var(--color-text-secondary); font-size: 0.85em;'>"
    "DocuMind - Enterprise RAG Chatbot | "
    "<a href='https://github.com/TejeshNaiduKona/DocuMind' target='_blank'>GitHub</a>"
    "</div>",
    unsafe_allow_html=True
)