import gradio as gr from rag_pipeline import RAGPipeline from document_processor import DocumentProcessor import os import uuid from dotenv import load_dotenv load_dotenv() class DocumentRagApp: def __init__(self): self.processor = DocumentProcessor() self.rag_pipeline = RAGPipeline() def initialize_session(self, session_data): """ Initialize or restore a user session. Session expires after 7 days (consistent with file deletion). Args: session_data: Dict with 'id' and 'created_at' or None for new session Returns: session_data (dict), loaded documents list, and status message """ from datetime import datetime, timedelta now = datetime.now() if session_data is None: # New user - generate session with timestamp session_data = {"id": str(uuid.uuid4()), "created_at": now.isoformat()} return session_data, [], "" # Check if session has expired (older than 7 days) try: created_at = datetime.fromisoformat(session_data.get("created_at", "")) if now - created_at > timedelta(days=7): # Session expired - create new session session_data = {"id": str(uuid.uuid4()), "created_at": now.isoformat()} return session_data, [], "Session expired. Started fresh session." except (ValueError, TypeError): # Invalid timestamp - create new session session_data = {"id": str(uuid.uuid4()), "created_at": now.isoformat()} return session_data, [], "" # Valid session - load existing documents session_id = session_data.get("id") existing_docs = self.rag_pipeline.get_documents_by_session(session_id) doc_names = [doc["filename"] for doc in existing_docs] status = f"✓ Restored {len(existing_docs)} documents" if existing_docs else "" return session_data, doc_names, status def load_samples(self, vertical, session_id, current_docs): """Load sample documents with live progress updates""" samples = { "Legal": [ "data/samples/legal/service_agreement.txt", "data/samples/legal/amendment.txt", "data/samples/legal/nda.txt", ], "Research": [ "data/samples/research/llm_enterprise_survey.txt", "data/samples/research/rag_methodology.txt", "data/samples/research/vector_db_benchmark.txt", ], "FinOps": [ "data/samples/finops/cloud_cost_optimization.txt", "data/samples/finops/aws_invoice_sept2024.txt", "data/samples/finops/kubernetes_cost_allocation.txt", ], } loaded_docs = list(current_docs) if current_docs else [] try: total_chunks = 0 for idx, path in enumerate(samples[vertical], 1): if os.path.exists(path): yield ( f"Loading document {idx}/{len(samples[vertical])}...", loaded_docs, ) chunks = self.processor.process_txt(path) yield ( f"Creating smart chunks ({len(chunks)} chunks)...", loaded_docs, ) # Samples are global (is_sample=True), no session filtering self.rag_pipeline.add_documents( chunks, session_id=None, is_sample=True ) doc_name = os.path.basename(path) if doc_name not in loaded_docs: loaded_docs.append(doc_name) total_chunks += len(chunks) yield ( f"✓ Success! Loaded {len(samples[vertical])} documents ({total_chunks} searchable chunks)", loaded_docs, ) except Exception as e: yield f"❌ Error: {str(e)}", loaded_docs def process_file(self, files, session_id, current_docs): """Process uploaded file(s) with live progress updates. Supports single or multiple files.""" loaded_docs = list(current_docs) if current_docs else [] if not files: yield "⚠️ Please upload a file", loaded_docs return # Normalize to list (handles both single file and list of files) file_list = files if isinstance(files, list) else [files] total_files = len(file_list) total_chunks = 0 processed_files = [] try: for idx, file in enumerate(file_list, 1): filename = os.path.basename(file.name) yield f"📄 Processing {idx}/{total_files}: {filename}...", loaded_docs ext = os.path.splitext(file.name)[1].lower() if ext == ".pdf": chunks = self.processor.process_pdf(file.name) elif ext == ".txt": chunks = self.processor.process_txt(file.name) elif ext == ".docx": chunks = self.processor.process_docx(file.name) else: yield ( f"⚠️ Skipped {filename}: Unsupported format (use PDF, DOCX, or TXT)", loaded_docs, ) continue yield f"✂️ {filename}: Created {len(chunks)} chunks...", loaded_docs # Pass session_id for user document isolation self.rag_pipeline.add_documents( chunks, session_id=session_id, is_sample=False ) if filename not in loaded_docs: loaded_docs.append(filename) total_chunks += len(chunks) processed_files.append(filename) # Final success message if processed_files: if len(processed_files) == 1: yield ( f"✓ Success! {processed_files[0]} ready ({total_chunks} searchable chunks)", loaded_docs, ) else: yield ( f"✓ Success! {len(processed_files)} documents processed ({total_chunks} total chunks)", loaded_docs, ) else: yield "⚠️ No valid documents to process", loaded_docs except Exception as e: yield ( f"❌ Error: {str(e)}. Please try again or contact support.", loaded_docs, ) def switch_model(self, model_choice): """Handle model switching from UI radio button""" model_map = { "GPT-OSS 120B (OpenAI) - Default": "gpt-oss-120b", "Llama 3.3 70B (Meta)": "llama-3.3-70b", "Gemma 3 27B (Google)": "gemma-3-27b", } model_key = model_map.get(model_choice) if not model_key: return "❌ Invalid model selection" try: display_name = self.rag_pipeline.switch_model(model_key) return f"✓ Switched to {display_name}" except Exception as e: return f"❌ Error switching model: {str(e)}" def ask(self, question, session_id, current_docs): """Answer a question using documents from this session""" if not current_docs: return "Please load documents first" if not question.strip(): return "Please enter a question" try: result = self.rag_pipeline.query(question, session_id=session_id) return result["answer"] except Exception as e: return f"Error: {str(e)}" def ask_stream(self, question, session_id, current_docs): """Stream answer with thinking indicator for real-time display.""" if not current_docs: yield "Please load documents first" return if not question.strip(): yield "Please enter a question" return # Thinking indicator yield "🔍 Analyzing documents..." try: for answer_text in self.rag_pipeline.query_stream(question, session_id): yield answer_text except Exception as e: yield f"Error: {str(e)}" def delete_document(self, doc_to_delete, session_id, current_docs): """ Delete a document from the session. - Sample docs: only removed from session (not from storage) - User docs: removed from session AND storage/ChromaDB """ if not doc_to_delete: return current_docs, "No document selected" # Check if it's a sample document sample_names = [ "service_agreement.txt", "amendment.txt", "nda.txt", "llm_enterprise_survey.txt", "rag_methodology.txt", "vector_db_benchmark.txt", "cloud_cost_optimization.txt", "aws_invoice_sept2024.txt", "kubernetes_cost_allocation.txt", ] is_sample = doc_to_delete in sample_names if is_sample: # Sample doc: just remove from this session's list (not from storage) print(f"[DEBUG] Removing SAMPLE doc from session only: {doc_to_delete}") updated_docs = [d for d in current_docs if d != doc_to_delete] return updated_docs, f"✓ Removed {doc_to_delete}" else: # User doc: remove from session AND delete from ChromaDB print( f"[DEBUG] Deleting USER doc from session AND storage: {doc_to_delete}" ) user_docs = self.rag_pipeline.get_documents_by_session(session_id) for doc in user_docs: if doc["filename"] == doc_to_delete: print(f"[DEBUG] Found in storage, deleting: {doc['path']}") success = self.rag_pipeline.delete_document(session_id, doc["path"]) if success: print(f"[DEBUG] Successfully deleted from ChromaDB") updated_docs = [d for d in current_docs if d != doc_to_delete] return updated_docs, f"✓ Deleted {doc_to_delete}" else: print(f"[DEBUG] Failed to delete from ChromaDB") return current_docs, f"❌ Failed to delete {doc_to_delete}" # Document not found in storage, just remove from list print(f"[DEBUG] Doc not in storage, just removing from session list") updated_docs = [d for d in current_docs if d != doc_to_delete] return updated_docs, f"✓ Removed {doc_to_delete}" app = DocumentRagApp() # Premium Enterprise Design System (Restored & Cleaned) css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&family=Outfit:wght@400;500;600;700&display=swap'); :root { /* Material Design Color Palette */ --primary-gradient: linear-gradient(135deg, #2196F3 0%, #1976D2 100%); --cta-discovery-gradient: linear-gradient(135deg, #00C853 0%, #00A152 100%); --surface-dark: #0B0F19; --surface-glass: rgba(17, 24, 39, 0.7); --border-glass: rgba(255, 255, 255, 0.08); --text-primary: #F9FAFB; --text-secondary: #9CA3AF; --accent: #2196F3; --accent-hover: #1976D2; --cta-discovery: #00C853; --font-heading: 'Outfit', sans-serif; --font-body: 'Inter', sans-serif; } /* --- GLOBAL RESET & CLEANING --- */ body, .gradio-container { background-color: var(--surface-dark) !important; font-family: var(--font-body) !important; color: var(--text-primary) !important; } /* ⚠️ CRITICAL: Remove Gradio's default nested boxes/backgrounds ⚠️ */ .gradio-container .block, .gradio-container .form, .gradio-container .gradio-box, .gradio-container .padded, .gradio-container .gradio-group, .gradio-container .gradio-row, .gradio-container .gradio-column { background-color: transparent !important; border: none !important; box-shadow: none !important; padding: 0 !important; } /* Force transparency on specific internal elements to fix "Grey Box" issue */ .glass-card div { background-color: transparent !important; border: none !important; } /* Re-assert styles for Inputs/Buttons since the rule above is aggressive */ .glass-card textarea, .glass-card input[type="text"], .glass-card .gradio-dropdown { background-color: rgba(0, 0, 0, 0.3) !important; border: 1px solid var(--border-glass) !important; } .glass-card button.primary-btn { background: var(--primary-gradient) !important; } .glass-card button.query-btn { background: rgba(255, 255, 255, 0.05) !important; border: 1px solid var(--border-glass) !important; } /* Typography */ h1, h2, h3, h4 { font-family: var(--font-heading); } span, p, div { font-family: var(--font-body); } /* --- HERO SECTION --- */ #header { text-align: center; margin-bottom: 4rem; padding-top: 2rem; } #header h1 { font-size: 3.5rem; font-weight: 700; margin-bottom: 0.5rem; color: #FFFFFF; /* High contrast white */ text-shadow: 0 0 20px rgba(59, 130, 246, 0.5); /* Glow effect instead of gradient text */ letter-spacing: -0.02em; } #header p { font-size: 1.2rem; color: var(--text-secondary); } /* --- GLASS CARDS --- */ .glass-card { background: var(--surface-glass) !important; backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px); border: 1px solid var(--border-glass) !important; border-radius: 20px !important; padding: 2rem 2rem 1.5rem 2rem !important; /* Reduced bottom padding */ margin-bottom: 2rem !important; box-shadow: 0 20px 40px -10px rgba(0,0,0,0.5) !important; height: 100% !important; display: flex !important; flex-direction: column !important; } /* Prevent left column from expanding - constrain height and hide scrollbar */ .gradio-row > .gradio-column:first-child .glass-card { max-height: none; overflow: visible; } /* Hide all scrollbars in main container */ .gradio-container { overflow-x: hidden !important; } body { overflow-x: hidden !important; } .card-header { font-family: var(--font-heading); font-size: 0.9rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); margin-bottom: 0.5rem; /* Reduced bottom margin */ border-bottom: 1px solid var(--border-glass); padding-bottom: 0.5rem; } /* --- INPUTS & BUTTONS (Cleaned) --- */ .gradio-dropdown, .gradio-textbox textarea { background-color: rgba(0, 0, 0, 0.3) !important; border: 1px solid var(--border-glass) !important; border-radius: 10px !important; color: var(--text-primary) !important; } /* Upload Area specific */ .gradio-file { background-color: rgba(0, 0, 0, 0.15) !important; border: 2px dashed rgba(255, 255, 255, 0.3) !important; border-radius: 12px !important; padding: 1rem !important; } .gradio-file:hover { background-color: rgba(0, 0, 0, 0.2) !important; border-color: var(--accent) !important; } .gradio-dropdown:hover, .gradio-textbox textarea:hover { border-color: var(--accent) !important; } /* Primary Button */ .primary-btn { background: var(--primary-gradient) !important; border: 1px solid rgba(33, 150, 243, 0.3) !important; color: white !important; font-weight: 600 !important; padding: 1rem !important; border-radius: 10px !important; transition: all 0.3s ease; box-shadow: 0 4px 15px rgba(33, 150, 243, 0.25); margin-top: 0 !important; } .primary-btn:hover { transform: translateY(-2px); box-shadow: 0 0 0 1px rgba(33, 150, 243, 0.5), 0 8px 25px rgba(33, 150, 243, 0.4); border-color: rgba(33, 150, 243, 0.6) !important; } /* Quick Query Buttons */ .query-btn { background: rgba(255, 255, 255, 0.05) !important; border: 1px solid var(--border-glass) !important; color: var(--text-secondary) !important; border-radius: 8px !important; padding: 0.8rem !important; text-align: left !important; font-size: 0.95rem !important; } .query-btn:hover { background: rgba(255, 255, 255, 0.1) !important; color: var(--text-primary) !important; border-color: var(--accent) !important; } /* --- TABS (Seamless) --- */ .tab-nav { border: none !important; margin-top: 1rem !important; /* Spacing above tabs */ margin-bottom: 1.5rem !important; /* Spacing below tabs */ background: rgba(0,0,0,0.2) !important; border-radius: 12px; padding: 4px !important; display: flex; gap: 4px; } .tab-nav button { border: none !important; color: var(--text-secondary) !important; background: transparent !important; border-radius: 8px !important; flex-grow: 1; font-family: var(--font-heading) !important; } .tab-nav button.selected { background: rgba(255,255,255,0.1) !important; color: var(--text-primary) !important; font-weight: 600 !important; } /* --- ANSWER SECTION --- */ #answer-section { background: rgba(0,0,0,0.2) !important; border-radius: 12px; padding: 1.5rem !important; border: 1px solid var(--border-glass); } #answer-section .markdown { font-size: 1.1rem; line-height: 1.7; color: var(--text-primary); } #answer-section strong { color: #60A5FA; } /* Calendar/Discovery Badge - Professional Green CTA */ .calendar-badge { background: linear-gradient(135deg, #00C853 0%, #00A152 100%) !important; color: white; padding: 0.75rem 1.6rem; border-radius: 100px; font-weight: 600; text-decoration: none; border: none; transition: all 0.3s ease; box-shadow: 0 4px 16px rgba(0, 200, 83, 0.35); display: inline-block; } .calendar-badge:hover { background: linear-gradient(135deg, #00A152 0%, #00853E 100%) !important; transform: translateY(-2px); box-shadow: 0 6px 24px rgba(0, 200, 83, 0.5); } .calendar-badge span { font-size: 1.1rem; margin-right: 0.3rem; } /* --- MODEL SELECTOR --- */ .model-selector { background: rgba(0, 0, 0, 0.15) !important; border-radius: 8px !important; padding: 0.75rem !important; margin-bottom: 1rem !important; border: 1px solid var(--border-glass) !important; } .model-selector label { background: rgba(255, 255, 255, 0.05) !important; border: 1px solid var(--border-glass) !important; padding: 0.5rem 0.75rem !important; border-radius: 6px !important; transition: all 0.2s !important; cursor: pointer !important; margin: 0.2rem 0 !important; display: block !important; font-size: 0.875rem !important; } .model-selector label:hover { background: rgba(255, 255, 255, 0.1) !important; border-color: var(--accent) !important; transform: translateX(3px) !important; } .model-selector input:checked + label { background: var(--primary-gradient) !important; border-color: transparent !important; font-weight: 600 !important; box-shadow: 0 3px 12px rgba(33, 150, 243, 0.4) !important; } .model-status { font-size: 0.8rem; color: var(--text-secondary); padding: 0.25rem 0.5rem; margin-top: 0.1rem; } /* --- SECURITY BADGE --- */ .security-badge { display: flex; align-items: center; gap: 0.75rem; background: rgba(255, 255, 255, 0.03) !important; border: 1px solid rgba(255, 255, 255, 0.1) !important; border-radius: 12px; padding: 0.5rem 0.8rem; margin-top: 0.5rem; margin-bottom: 0 !important; transition: all 0.3s ease; } .security-badge:hover { background: rgba(255, 255, 255, 0.05) !important; border-color: rgba(255, 255, 255, 0.15) !important; box-shadow: 0 0 15px rgba(100, 100, 100, 0.2); } .badge-icon { font-size: 1.3rem; line-height: 1; opacity: 0.9; } .badge-content { flex: 1; } .badge-title { font-size: 0.85rem; font-weight: 600; color: var(--text-primary); margin-bottom: 0.15rem; } .badge-subtitle { font-size: 0.7rem; color: var(--text-secondary); opacity: 0.7; } /* --- DOCUMENT CHECKBOX GROUP --- */ .doc-checkbox-group { margin-top: 0.5rem !important; margin-bottom: 0.5rem !important; } .doc-checkbox-group .wrap { display: flex !important; flex-wrap: wrap !important; gap: 0.6rem !important; } .doc-checkbox-group label { display: flex !important; align-items: center !important; gap: 0.5rem !important; background: rgba(255, 255, 255, 0.08) !important; border: 1px solid rgba(255, 255, 255, 0.12) !important; border-radius: 100px !important; padding: 0.4rem 0.9rem 0.4rem 0.6rem !important; font-size: 0.8rem !important; color: var(--text-secondary) !important; cursor: pointer !important; transition: all 0.15s ease !important; } .doc-checkbox-group label:hover { background: rgba(255, 255, 255, 0.12) !important; border-color: rgba(255, 255, 255, 0.2) !important; } .doc-checkbox-group label.selected { background: rgba(239, 68, 68, 0.15) !important; border-color: rgba(239, 68, 68, 0.4) !important; color: #fca5a5 !important; } /* Show checkbox with custom styling */ .doc-checkbox-group input[type="checkbox"] { appearance: none !important; -webkit-appearance: none !important; width: 14px !important; height: 14px !important; border: 1.5px solid rgba(255, 255, 255, 0.4) !important; border-radius: 3px !important; background: transparent !important; cursor: pointer !important; margin: 0 !important; flex-shrink: 0 !important; } .doc-checkbox-group input[type="checkbox"]:checked { background: #ef4444 !important; border-color: #ef4444 !important; } .doc-checkbox-group input[type="checkbox"]:checked::after { content: '✓' !important; display: block !important; text-align: center !important; font-size: 10px !important; line-height: 12px !important; color: white !important; font-weight: bold !important; } """ with gr.Blocks(css=css, theme=gr.themes.Base(), title="Enterprise RAG") as demo: # Session and document state (persisted in browser localStorage) session_state = gr.BrowserState(default_value=None, storage_key="rag_session_id") docs_state = gr.State(value=[]) # List of loaded document names with gr.Column(elem_id="main-container"): # --- HERO --- gr.HTML("""
Secure, Scalable, Agentic Document Intelligence for the Modern Enterprise.
🔒 Secure Environment: Documents stored securely & auto-deleted after 7 days.
© 2024 Enterprise RAG Platform. Licensed under MIT.