Spaces:

AnwinMJ
/

BeRu

Runtime error

App Files Files Community

BeRU Deployer commited on Mar 12

Commit

dec533d

1 Parent(s): 8357835

Deploy BeRU Streamlit RAG System - Add app, models logic, configs, and optimizations for HF Spaces

Browse files

Files changed (10) hide show

.hfignore +7 -0
.streamlit/config.toml +21 -0
Dockerfile +49 -0
README.md +47 -6
app.py +282 -0
down.py +898 -0
frontend.html +1075 -0
requirements.txt +19 -0
spaces_app.py +229 -0
vlm2rag2.py +1354 -0

.hfignore ADDED Viewed

	@@ -0,0 +1,7 @@

+# ignore local model and index directories when pushing to Hugging Face
+models/
+VLM2Vec-V2rag3/
+faiss_index/
+venv*
+*.log
+rag.log

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,21 @@

+[global]
+# Page customization
+analyticsEnabled = false
+logLevel = "info"
+[client]
+# Faster loading
+showErrorDetails = true
+toolbarMode = "minimal"
+[server]
+# HF Spaces optimizations
+port = 7860
+headless = true
+runOnSave = false
+maxUploadSize = 200
+enableCORS = false
+enableXsrfProtection = true
+# Memory management
+maxCachedMessageSize = 2

Dockerfile ADDED Viewed

	@@ -0,0 +1,49 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies (for PDF processing, OCR, etc.)
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    libpoppler-cpp-dev \
+    poppler-utils \
+    tesseract-ocr \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY down.py .
+COPY frontend.html .
+COPY vlm2rag2.py .
+COPY check_user.py .
+# include the Streamlit demo as an alternative entrypoint
+COPY app.py .
+# Create necessary directories
+RUN mkdir -p /app/.cache /app/models /app/VLM2Vec-V2rag3
+# Expose HF Spaces default port
+EXPOSE 7860
+# by default the image will run the FastAPI server; to start the Streamlit UI
+to test locally you can override the command:
+#   docker run -p7860:7860 <image> streamlit run app.py --server.port=7860
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
+# default paths used by down.py (can be overridden at runtime)
+ENV MODEL_DIR=/app/models
+ENV LLM_MODEL_PATH=/app/models/Mistral-7B-Instruct-v0.3
+ENV EMBED_MODEL_PATH=/app/models/VLM2Vec-Qwen2VL-2B
+ENV FAISS_INDEX_PATH=/app/VLM2Vec-V2rag3
+# Run FastAPI app on port 7860
+CMD ["python", "down.py", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,13 +1,54 @@
 ---
-title: BeRu
-emoji: 🐨
 colorFrom: indigo
 colorTo: yellow
-sdk: gradio
-sdk_version: 6.9.0
 app_file: app.py
 pinned: false
-short_description: Refinary assistance
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BeRU Chat - RAG Assistant
+emoji: 🤖
 colorFrom: indigo
 colorTo: yellow
+sdk: streamlit
 app_file: app.py
 pinned: false
+short_description: 100% Offline RAG System with Mistral 7B and VLM2Vec
 ---
+# 🤖 BeRU Chat - RAG Assistant
+A powerful **100% offline Retrieval-Augmented Generation (RAG) system** combining Mistral 7B LLM with VLM2Vec embeddings for intelligent document search and conversation.
+## ✨ Features
+- 🔒 **100% Offline Operation** - No internet required after startup
+- 🧠 **Advanced RAG Architecture**
+  - Hybrid retrieval (Vector + BM25 keyword search)
+  - Ensemble retriever combining multiple strategies
+  - Re-ranking with FlashRank for relevance
+  - Multi-turn conversation with history awareness
+- ⚡ **Optimized Performance**
+  - 4-bit quantization with BitsAndBytes
+  - Flash Attention 2 support
+  - FAISS vector indexing
+- 📚 **Source Citations** - Every answer cites original sources
+## 🎯 Models Used
+| Component | Model | Details |
+|-----------|-------|---------|
+| **LLM** | Mistral-7B-Instruct-v0.3 | 7B parameters |
+| **Embedding** | VLM2Vec-Qwen2VL-2B | 2B parameters |
+| **Vector Store** | FAISS | Meta's similarity search |
+## 🚀 Getting Started
+1. **Wait for Models** - First load takes 5-8 minutes (models download from HF Hub)
+2. **Upload Documents** - Add PDFs or text files for RAG
+3. **Ask Questions** - Chat with context-aware answers
+4. **Get Sources** - Each answer includes citations
+## 💻 System Requirements
+- **GPU**: A10G (24GB VRAM) recommended
+- **RAM**: 16GB minimum
+- **Cold Start**: ~5-8 minutes (first time)
+- **Runtime**: Streamlit app on port 7860
+## 📖 Documentation
+For more information, visit the [GitHub repository](https://github.com/AnwinJosy/BeRU)

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import streamlit as st
+import torch
+import os
+import pickle
+import faiss
+import numpy as np
+from transformers import AutoModel, AutoProcessor, AutoTokenizer
+from typing import List, Dict
+import time
+# ========================================
+# 🎨 STREAMLIT PAGE CONFIG
+# ========================================
+st.set_page_config(
+    page_title="BeRU Chat - RAG Assistant",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# ========================================
+# 🎯 CACHING FOR MODEL LOADING
+# ========================================
+@st.cache_resource
+def load_embedding_model():
+    """Load VLM2Vec embedding model"""
+    st.write("⏳ Loading embedding model...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = AutoModel.from_pretrained(
+        "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+        trust_remote_code=True,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    ).to(device)
+    processor = AutoProcessor.from_pretrained(
+        "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+        trust_remote_code=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+        trust_remote_code=True
+    )
+    model.eval()
+    st.success("✅ Embedding model loaded!")
+    return model, processor, tokenizer, device
+@st.cache_resource
+def load_llm_model():
+    """Load Mistral 7B LLM"""
+    st.write("⏳ Loading language model...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+    # 4-bit quantization config for efficiency
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4"
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        quantization_config=quantization_config,
+        device_map="auto"
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.3"
+    )
+    st.success("✅ Language model loaded!")
+    return model, tokenizer, device
+@st.cache_resource
+def load_faiss_index():
+    """Load FAISS index if exists"""
+    if os.path.exists("VLM2Vec-V2rag2/text_index.faiss"):
+        st.write("⏳ Loading FAISS index...")
+        index = faiss.read_index("VLM2Vec-V2rag2/text_index.faiss")
+        st.success("✅ FAISS index loaded!")
+        return index
+    else:
+        st.warning("⚠️ FAISS index not found. Please build the index first.")
+        return None
+# ========================================
+# 💬 EMBEDDING & RETRIEVAL FUNCTIONS
+# ========================================
+def get_embeddings(texts: List[str], model, processor, tokenizer, device) -> np.ndarray:
+    """Generate embeddings for texts"""
+    embeddings_list = []
+    for text in texts:
+        inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to(device)
+        with torch.no_grad():
+            outputs = model(**inputs, output_hidden_states=True)
+            embedding = outputs.hidden_states[-1].mean(dim=1).cpu().numpy()
+        embeddings_list.append(embedding.flatten())
+    return np.array(embeddings_list)
+def retrieve_documents(query: str, model, processor, tokenizer, device, faiss_index, k: int = 5) -> List[Dict]:
+    """Retrieve relevant documents using FAISS"""
+    if faiss_index is None:
+        return []
+    # Get query embedding
+    query_embedding = get_embeddings([query], model, processor, tokenizer, device)
+    # Search FAISS index
+    distances, indices = faiss_index.search(query_embedding, k)
+    # Load documents metadata (assuming you have this stored)
+    results = []
+    for idx in indices[0]:
+        if idx >= 0:
+            results.append({
+                "index": idx,
+                "distance": float(distances[0][list(indices[0]).index(idx)])
+            })
+    return results
+def generate_response(query: str, context: str, model, tokenizer, device) -> str:
+    """Generate response using Mistral"""
+    prompt = f"""[INST] You are a helpful assistant answering questions about technical documentation.
+Context:
+{context}
+Question: {query} [/INST]"""
+    inputs = tokenizer(prompt, return_tensors="pt", max_length=2048, truncation=True).to(device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.95,
+            do_sample=True
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response.split("[/INST]")[1].strip() if "[/INST]" in response else response
+# ========================================
+# 🎨 STREAMLIT UI
+# ========================================
+st.title("🤖 BeRU Chat Assistant")
+st.markdown("*100% Offline RAG System with Mistral 7B & VLM2Vec*")
+# Sidebar Configuration
+with st.sidebar:
+    st.header("⚙️ Configuration")
+    device_info = "🟢 GPU" if torch.cuda.is_available() else "🔴 CPU"
+    st.metric("Device", device_info)
+    num_results = st.slider("Retrieve top K documents", 1, 10, 5)
+    temperature = st.slider("Response Temperature", 0.1, 1.0, 0.7)
+    st.divider()
+    st.markdown("### 📊 Project Info")
+    st.markdown("""
+    - **Model**: Mistral 7B Instruct v0.3
+    - **Embeddings**: VLM2Vec-Qwen2VL-2B
+    - **Vector Store**: FAISS with 10K+ documents
+    - **Retrieval**: Hybrid (Dense + BM25)
+    """)
+# Main Chat Interface
+col1, col2 = st.columns([3, 1])
+with col1:
+    st.subheader("💬 Ask a Question")
+with col2:
+    if st.button("🔄 Clear Chat", use_container_width=True):
+        st.session_state.messages = []
+        st.rerun()
+# Initialize session state
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "models_loaded" not in st.session_state:
+    st.session_state.models_loaded = False
+# Load models
+if not st.session_state.models_loaded:
+    st.info("📦 Loading models on first run... This may take 2-3 minutes.")
+    try:
+        embed_model, processor, tokenizer_embed, embed_device = load_embedding_model()
+        llm_model, tokenizer_llm, llm_device = load_llm_model()
+        faiss_idx = load_faiss_index()
+        st.session_state.embed_model = embed_model
+        st.session_state.processor = processor
+        st.session_state.tokenizer_embed = tokenizer_embed
+        st.session_state.embed_device = embed_device
+        st.session_state.llm_model = llm_model
+        st.session_state.tokenizer_llm = tokenizer_llm
+        st.session_state.llm_device = llm_device
+        st.session_state.faiss_idx = faiss_idx
+        st.session_state.models_loaded = True
+        st.success("✅ All models loaded successfully!")
+    except Exception as e:
+        st.error(f"❌ Error loading models: {str(e)}")
+        st.stop()
+# Chat Interface
+st.markdown("---")
+# Display chat history
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# User input
+user_input = st.chat_input("Type your question here...", key="user_input")
+if user_input:
+    # Add user message to chat
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Generate response
+    with st.chat_message("assistant"):
+        st.write("🔍 Retrieving relevant documents...")
+        # Retrieve documents
+        retrieved = retrieve_documents(
+            user_input,
+            st.session_state.embed_model,
+            st.session_state.processor,
+            st.session_state.tokenizer_embed,
+            st.session_state.embed_device,
+            st.session_state.faiss_idx,
+            k=num_results
+        )
+        context = "\n\n".join([f"Document {i+1}: Context from index {doc['index']}"
+                               for i, doc in enumerate(retrieved)])
+        st.write("💭 Generating response...")
+        # Generate response
+        response = generate_response(
+            user_input,
+            context,
+            st.session_state.llm_model,
+            st.session_state.tokenizer_llm,
+            st.session_state.llm_device
+        )
+        st.markdown(response)
+        # Add to chat history
+        st.session_state.messages.append({"role": "assistant", "content": response})
+# Footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: gray; font-size: 12px;'>
+    <p>BeRU Chat Assistant | Powered by Mistral 7B + VLM2Vec | 100% Offline</p>
+    <p><a href='https://github.com/AnwinJosy/BeRU'>GitHub</a> |
+       <a href='https://huggingface.co/AnwinJosy'>Hugging Face</a></p>
+</div>
+""", unsafe_allow_html=True)

down.py ADDED Viewed

	@@ -0,0 +1,898 @@

+import os
+import torch
+import logging
+import asyncio
+import re
+from pathlib import Path
+from typing import List, Dict, Optional, Any
+from contextlib import asynccontextmanager
+from logging.handlers import RotatingFileHandler
+# --- LANGCHAIN IMPORTS ---
+from langchain_community.vectorstores import FAISS
+from langchain.chains import create_history_aware_retriever
+from langchain.chains.retrieval import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_community.llms import HuggingFacePipeline
+from langchain_core.embeddings import Embeddings
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever
+from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from operator import itemgetter
+# --- RERANKING IMPORTS ---
+# Ensure you have installed flashrank: pip install flashrank
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain_community.document_compressors import FlashrankRerank
+# --- TRANSFORMERS IMPORTS ---
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    AutoModel,
+    pipeline,
+    BitsAndBytesConfig
+)
+# --- FASTAPI IMPORTS ---
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field, field_validator
+import uvicorn
+import numpy as np
+# -------------------------------------------------------------------------
+# 1. Pydantic Patch (Crucial for offline serialization)
+# -------------------------------------------------------------------------
+def patch_pydantic_for_pickle():
+    try:
+        from pydantic.v1.main import BaseModel as PydanticV1BaseModel
+        original_setstate = PydanticV1BaseModel.__setstate__
+        def patched_setstate(self, state):
+            if '__fields_set__' not in state:
+                state['__fields_set__'] = set(state.get('__dict__', {}).keys())
+            if '__private_attribute_values__' not in state:
+                state['__private_attribute_values__'] = {}
+            try:
+                original_setstate(self, state)
+            except Exception as e:
+                object.__setattr__(self, '__dict__', state.get('__dict__', {}))
+                object.__setattr__(self, '__fields_set__', state.get('__fields_set__', set()))
+                object.__setattr__(self, '__private_attribute_values__', state.get('__private_attribute_values__', {}))
+        PydanticV1BaseModel.__setstate__ = patched_setstate
+        print("✅ Pydantic v1 patched for pickle compatibility")
+    except ImportError:
+        try:
+            import pydantic.v1 as pydantic_v1
+            from pydantic.v1 import BaseModel
+            original_setstate = BaseModel.__setstate__
+            def patched_setstate(self, state):
+                if '__fields_set__' not in state:
+                    state['__fields_set__'] = set(state.get('__dict__', {}).keys())
+                if '__private_attribute_values__' not in state:
+                    state['__private_attribute_values__'] = {}
+                try:
+                    original_setstate(self, state)
+                except:
+                    object.__setattr__(self, '__dict__', state.get('__dict__', {}))
+                    object.__setattr__(self, '__fields_set__', state.get('__fields_set__', set()))
+            BaseModel.__setstate__ = patched_setstate
+            print("✅ Pydantic patched for pickle compatibility")
+        except Exception as e:
+            print(f"⚠️ Could not patch Pydantic: {e}")
+patch_pydantic_for_pickle()
+# -------------------------------------------------------------------------
+# 2. Configuration & Paths (workspace-agnostic)
+# -------------------------------------------------------------------------
+# environment variables allow overrides when running in containers / Spaces
+os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
+os.environ["TRANSFORMERS_OFFLINE"] = "1"
+os.environ["HF_DATASETS_OFFLINE"] = "1"
+os.environ["HF_HUB_OFFLINE"] = "1"
+# base directory for application files inside a container
+ROOT_DIR = Path(os.environ.get("APP_ROOT", "/app")).resolve()
+# model and index locations can be provided via env; defaults point into /app
+MODEL_DIR = Path(os.environ.get("MODEL_DIR", ROOT_DIR / "models"))
+LLM_MODEL_PATH = Path(os.environ.get("LLM_MODEL_PATH", MODEL_DIR / "Mistral-7B-Instruct-v0.3"))
+EMBED_MODEL_PATH = Path(os.environ.get("EMBED_MODEL_PATH", MODEL_DIR / "VLM2Vec-Qwen2VL-2B"))
+FAISS_INDEX_PATH = Path(os.environ.get("FAISS_INDEX_PATH", ROOT_DIR / "VLM2Vec-V2rag3"))
+# Increased timeout for reranking operations
+GENERATION_TIMEOUT = 240
+LLM_MODEL = str(LLM_MODEL_PATH)
+EMBED_MODEL = str(EMBED_MODEL_PATH)
+# Logging Setup
+logger = logging.getLogger("rag_system")
+handler = RotatingFileHandler("rag.log", maxBytes=10 * 1024 * 1024, backupCount=5)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+# Global Variables
+vectorstore = None
+llm_pipeline = None
+qa_chain = None
+answer_cache: Dict[str, Dict] = {}
+conversations: Dict[str, List[Dict]] = {}
+# -------------------------------------------------------------------------
+# 3. VLM2Vec Embedding Class (Preserved)
+# -------------------------------------------------------------------------
+class VLM2VecEmbeddings(Embeddings):
+    def __init__(self, model_path: str, device: str = "cpu"):
+        print(f"🔗 Loading VLM2Vec model from: {model_path}")
+        self.device = device
+        self.model_path = model_path
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            local_files_only=True,
+        )
+        if self.tokenizer.pad_token_id is None and self.tokenizer.eos_token_id is not None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        device_map = "auto" if device == "cuda" else "cpu"
+        dtype = torch.float16 if device == "cuda" else torch.float32
+        self.model = AutoModel.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            dtype=dtype,
+            device_map=device_map,
+            local_files_only=True,
+        )
+        self.model.eval()
+        try:
+            self.model_device = next(self.model.parameters()).device
+        except:
+            self.model_device = torch.device("cuda" if device == "cuda" else "cpu")
+        with torch.no_grad():
+            test_input = self.tokenizer("test", return_tensors="pt", add_special_tokens=True)
+            test_input = {k: v.to(self.model_device) for k, v in test_input.items()}
+            out = self.model(**test_input, output_hidden_states=True)
+            self.embedding_dim = out.hidden_states[-1].shape[-1]
+        print(f"✅ VLM2Vec loaded on {self.model_device} | dim={self.embedding_dim}\n")
+    def _normalize_text(self, text: str) -> str:
+        text = re.sub(r'\s+', ' ', text or "")
+        text = re.sub(r'Page \d+', '', text, flags=re.IGNORECASE)
+        return text.strip()
+    def _ensure_non_empty(self, text: str) -> str:
+        t = self._normalize_text(text)
+        return t if t else "[EMPTY]"
+    def _embed_single(self, text: str) -> List[float]:
+        try:
+            with torch.no_grad():
+                clean_text = self._ensure_non_empty(text)
+                inputs = self.tokenizer(
+                    clean_text,
+                    return_tensors="pt",
+                    add_special_tokens=True,
+                    padding=True,
+                    truncation=True,
+                    max_length=512
+                )
+                inputs = {k: v.to(self.model_device) for k, v in inputs.items()}
+                outputs = self.model(**inputs, output_hidden_states=True)
+                if hasattr(outputs, "hidden_states") and outputs.hidden_states is not None:
+                    hidden_states = outputs.hidden_states[-1]
+                    attention_mask = inputs["attention_mask"].unsqueeze(-1).float()
+                    weighted = hidden_states * attention_mask
+                    sum_embeddings = weighted.sum(dim=1)
+                    sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
+                    embedding = (sum_embeddings / sum_mask).squeeze(0)
+                else:
+                    embedding = outputs.logits.mean(dim=1).squeeze(0)
+                return embedding.cpu().numpy().tolist()
+        except Exception as e:
+            logger.error(f"VLM2Vec embedding error: {e}")
+            return [0.0] * getattr(self, "embedding_dim", 1024)
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return [self._embed_single(t) for t in texts]
+    def embed_query(self, text: str) -> List[float]:
+        return self._embed_single(text)
+# -------------------------------------------------------------------------
+# 4. Prompt Templates (CLEANER & STRICTER)
+# -------------------------------------------------------------------------
+PROMPT_TEMPLATES = {
+    "Short and Concise": """<s>[INST] Answer the question based ONLY on the following context. Keep the answer under 3 sentences.
+Context:
+{context}
+Question:
+{input} [/INST]""",
+    "Detailed": """<s>[INST] You are a helpful assistant. Answer the question using ONLY the following context. Provide a detailed summary (4-5 sentences).
+Context:
+{context}
+Question:
+{input} [/INST]""",
+    "Step-by-Step": """<s>[INST] Based on the context below, provide a step-by-step procedure to answer the question.
+Context:
+{context}
+Question:
+{input} [/INST]""",
+}
+def structure_answer(answer: str, style: str) -> str:
+    # 1. REMOVE "Enough thinking" and specific artifacts
+    artifacts = [
+        "Enough thinking",
+        "Note:",
+        "System:",
+        "User:",
+        "[/INST]",
+        "Here is the answer:",
+        "Answer:"
+    ]
+    for artifact in artifacts:
+        if artifact in answer:
+            # If it's "Enough thinking", just delete the phrase
+            answer = answer.replace(artifact, "")
+    # 2. SPLIT at likely hallucination points
+    # If the model starts writing "Human:" or "Question:" again, STOP there.
+    stop_markers = ["Human:", "Question:", "User input:", "Context:"]
+    for marker in stop_markers:
+        if marker in answer:
+            answer = answer.split(marker)[0]
+    clean_answer = answer.strip()
+    # 3. Final Formatting
+    if style == "Short and Concise":
+        sentences = clean_answer.split('.')
+        clean_answer = ". ".join(sentences[:2]) + "."
+    return clean_answer
+# -------------------------------------------------------------------------
+# 5. Load System
+# -------------------------------------------------------------------------
+def load_system():
+    global vectorstore, llm_pipeline, qa_chain
+    if not os.path.exists(LLM_MODEL_PATH):
+        raise FileNotFoundError(f"LLM model not found at: {LLM_MODEL_PATH}")
+    if not os.path.exists(EMBED_MODEL_PATH):
+        raise FileNotFoundError(f"Embedding model not found at: {EMBED_MODEL_PATH}")
+    if not os.path.exists(FAISS_INDEX_PATH):
+        raise FileNotFoundError(
+            f"FAISS index not found at: {FAISS_INDEX_PATH}\n"
+            f"Please run the rebuild_faiss_index.py script first!"
+        )
+    print("\n" + "=" * 70)
+    print("🚀 LOADING RAG SYSTEM: Mistral 7B + VLM2Vec + Reranking (OFFLINE)")
+    print("=" * 70 + "\n")
+    _load_vectorstore()
+    _load_llm()
+    _build_retrieval_chain()
+    print("✅ RAG system ready (100% OFFLINE)!\n")
+def _load_embeddings():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    embedding_model = VLM2VecEmbeddings(
+        model_path=EMBED_MODEL_PATH,
+        device=device,
+    )
+    return embedding_model
+def _load_vectorstore():
+    global vectorstore
+    import faiss
+    import pickle
+    from langchain_community.docstore.in_memory import InMemoryDocstore
+    from langchain_core.documents import Document
+    print(f"📥 Loading FAISS index from: {FAISS_INDEX_PATH}")
+    text_index_path = os.path.join(FAISS_INDEX_PATH, "text_index.faiss")
+    text_docs_path = os.path.join(FAISS_INDEX_PATH, "text_documents.pkl")
+    if not os.path.exists(text_index_path):
+        raise FileNotFoundError(f"text_index.faiss not found")
+    if not os.path.exists(text_docs_path):
+        raise FileNotFoundError(f"text_documents.pkl not found")
+    embedding_model = _load_embeddings()
+    try:
+        index = faiss.read_index(text_index_path)
+        print(f"   📊 FAISS index loaded: {index.ntotal} vectors")
+        print("   📄 Loading documents...")
+        documents = None
+        # Robust loading mechanism
+        try:
+            import pickle5
+            with open(text_docs_path, 'rb') as f:
+                documents = pickle5.load(f)
+            print("   ✅ Loaded with pickle5")
+        except (ImportError, Exception) as e:
+            pass
+        if documents is None:
+            try:
+                with open(text_docs_path, 'rb') as f:
+                    documents = pickle.load(f, encoding='latin1')
+                print("   ✅ Loaded with latin1 encoding")
+            except Exception as e:
+                pass
+        if documents is None:
+            try:
+                import dill
+                with open(text_docs_path, 'rb') as f:
+                    documents = dill.load(f)
+                print("   ✅ Loaded with dill")
+            except Exception as e:
+                print(f"   ⚠️ dill failed: {e}")
+                raise RuntimeError("Could not load documents. Check pickle version.")
+        if isinstance(documents, list):
+            print(f"      Loaded {len(documents)} documents")
+            reconstructed_docs = []
+            for doc in documents:
+                if isinstance(doc, Document):
+                    reconstructed_docs.append(doc)
+                else:
+                    try:
+                        new_doc = Document(
+                            page_content=doc.page_content if hasattr(doc, 'page_content') else str(doc),
+                            metadata=doc.metadata if hasattr(doc, 'metadata') else {}
+                        )
+                        reconstructed_docs.append(new_doc)
+                    except Exception as e:
+                        print(f"   ⚠️ Could not reconstruct document: {e}")
+            documents = reconstructed_docs
+            docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(documents)})
+            index_to_docstore_id = {i: str(i) for i in range(len(documents))}
+        elif isinstance(documents, dict):
+            print(f"      Loaded {len(documents)} documents (dict)")
+            docstore = InMemoryDocstore(documents)
+            index_to_docstore_id = {i: key for i, key in enumerate(documents.keys())}
+        else:
+            raise ValueError(f"Unexpected documents format: {type(documents)}")
+        vectorstore = FAISS(
+            embedding_function=embedding_model,
+            index=index,
+            docstore=docstore,
+            index_to_docstore_id=index_to_docstore_id
+        )
+        print(f"   📊 Total vectors: {vectorstore.index.ntotal}")
+        print("✅ FAISS vectorstore loaded\n")
+    except Exception as e:
+        print(f"❌ Error loading FAISS index: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+def _load_llm():
+    print(f"🤖 Loading LLM from: {LLM_MODEL_PATH} (OFFLINE - SPEED OPTIMIZED)")
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_PATH, local_files_only=True)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # CHECK FOR FLASH ATTENTION SUPPORT
+    # (Fall back to standard if not supported)
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            LLM_MODEL_PATH,
+            quantization_config=bnb_config,
+            device_map="auto",
+            local_files_only=True,
+            attn_implementation="flash_attention_2" # <--- SPEED BOOST
+        )
+        print("   ⚡ Flash Attention 2 Enabled!")
+    except:
+        print("   ⚠️ Flash Attention 2 not supported. Using standard attention.")
+        model = AutoModelForCausalLM.from_pretrained(
+            LLM_MODEL_PATH,
+            quantization_config=bnb_config,
+            device_map="auto",
+            local_files_only=True,
+        )
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.01,
+        top_p=0.95,
+        pad_token_id=tokenizer.eos_token_id,
+        return_full_text=False # Stop repetition
+    )
+    global llm_pipeline
+    llm_pipeline = HuggingFacePipeline(pipeline=pipe)
+    print("✅ LLM Loaded\n")
+def format_docs_with_sources(docs):
+    """
+    Combines document content with its metadata (Source File & Page).
+    """
+    formatted_entries = []
+    for doc in docs:
+        # Extract metadata (default to 'Unknown' if missing)
+        source = doc.metadata.get("source", "Unknown Document")
+        # Optional: Clean the path to just show filename
+        # source = source.split("\\")[-1]
+        page = doc.metadata.get("page", "?")
+        entry = f"--- REFERENCE: {source} (Page {page}) ---\n{doc.page_content}\n"
+        formatted_entries.append(entry)
+    return "\n\n".join(formatted_entries)
+def _build_retrieval_chain():
+    global qa_chain
+    print("🔗 Building Production RAG Chain (Sources + Hybrid)...")
+    # --- A. RETRIEVER SETUP (Speed Optimized) ---
+    # 1. Vector Search
+    faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
+    # 2. BM25 (Keyword Search)
+    try:
+        all_docs = list(vectorstore.docstore._dict.values())
+        bm25_retriever = BM25Retriever.from_documents(all_docs)
+        bm25_retriever.k = 10
+        ensemble_retriever = EnsembleRetriever(
+            retrievers=[faiss_retriever, bm25_retriever],
+            weights=[0.3, 0.7]
+        )
+    except:
+        ensemble_retriever = faiss_retriever
+    # 3. Reranking (Top 5 only)
+    try:
+        compressor = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2", top_n=5)
+        final_retriever = ContextualCompressionRetriever(
+            base_compressor=compressor,
+            base_retriever=ensemble_retriever
+        )
+    except:
+        final_retriever = ensemble_retriever
+    # --- B. HISTORY AWARENESS ---
+    # Reformulate question based on chat history
+    rephrase_prompt = ChatPromptTemplate.from_template(
+        """<s>[INST] Rephrase the follow-up question to be a standalone question.
+Chat History: {chat_history}
+Follow Up Input: {input}
+Standalone question: [/INST]"""
+    )
+    history_node = create_history_aware_retriever(
+        llm_pipeline,
+        final_retriever,
+        rephrase_prompt
+    )
+    # --- C. FINAL ANSWER GENERATION (With Sources) ---
+    qa_prompt = ChatPromptTemplate.from_template(
+        """[INST] You are a helpful assistant for BPCL-Kochi Refinery.
+Answer the user's question based strictly on the context provided below.
+If the answer is not in the context, say "I don't have that information in the manuals."
+ALWAYS cite the document name for your answer.
+CONTEXT WITH SOURCES:
+{context}
+USER QUESTION:
+{input}
+ANSWER: [/INST]"""
+    )
+    # The Chain (No Cache)
+    qa_chain = (
+        {
+            "context": history_node | format_docs_with_sources,
+            "input": itemgetter("input"),
+            "chat_history": itemgetter("chat_history"),
+        }
+        | qa_prompt
+        | llm_pipeline
+        | StrOutputParser()
+    )
+    print("✅ Production Chain Built (with Citations)\n")
+# -------------------------------------------------------------------------
+# 6. FastAPI App & Endpoints
+# -------------------------------------------------------------------------
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("\n🚀 Starting application (OFFLINE)...")
+    load_system()
+    logger.info("RAG system initialized (OFFLINE)")
+    yield
+    print("\n🛑 Shutting down...")
+    answer_cache.clear()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    logger.info("Shutdown complete")
+app = FastAPI(
+    title="BeRU Chat Assistant - VLM2Vec",
+    description="100% Offline RAG system with VLM2Vec embeddings",
+    version="2.0-VLM2Vec",
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=2000)
+    mode: str = "Detailed"
+    session_id: Optional[str] = "default"
+    include_images: bool = False
+    @field_validator("message")
+    @classmethod
+    def sanitize_message(cls, v):
+        return v.strip()
+    @field_validator("mode")
+    @classmethod
+    def validate_mode(cls, v):
+        if v not in PROMPT_TEMPLATES:
+            return "Detailed"
+        return v
+class QueryRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=2000)
+    answer_style: str = "Detailed"
+    num_sources: int = Field(default=5, ge=1, le=10)
+    @field_validator("message")
+    @classmethod
+    def sanitize_message(cls, v):
+        return v.strip()
+    @field_validator("answer_style")
+    @classmethod
+    def validate_style(cls, v):
+        if v not in PROMPT_TEMPLATES:
+            return "Detailed"
+        return v
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    try:
+        frontend_path = Path("frontend.html")
+        if frontend_path.exists():
+            with open(frontend_path, "r", encoding="utf-8") as f:
+                return f.read()
+        else:
+            return f"""
+            <html>
+                <body>
+                    <h1>Error: frontend.html not found</h1>
+                    <p>Please place frontend.html in the same directory as this script</p>
+                    <p>Current directory: {Path.cwd()}</p>
+                </body>
+            </html>
+            """
+    except Exception as e:
+        return f"<html><body><h1>Error loading frontend</h1><p>{str(e)}</p></body></html>"
+query_semaphore = asyncio.Semaphore(3)
+@app.post("/api/chat")
+async def chat_endpoint(request: ChatRequest):
+    async with query_semaphore:
+        try:
+            message = request.message
+            mode = request.mode
+            session_id = request.session_id
+            logger.info(f"Chat Query: {message[:100]} | Mode: {mode}")
+            print(f"\n{'=' * 60}")
+            print(f"💬 Chat: {message}")
+            print(f"   Mode: {mode}")
+            print(f"   Session: {session_id}")
+            # History Management
+            if session_id not in conversations:
+                conversations[session_id] = []
+            # Check Cache
+            cache_key = f"{message}_{mode}_{session_id}"
+            if cache_key in answer_cache:
+                print("💾 Cache hit!")
+                cached_response = answer_cache[cache_key]
+                conversations[session_id].append(
+                    {
+                        "user": message,
+                        "bot": cached_response["response"],
+                        "mode": mode,
+                    }
+                )
+                return JSONResponse(cached_response)
+            print(f"⏱️  Generating response (timeout: {GENERATION_TIMEOUT}s)...")
+            # Convert dict history to LangChain Objects (Last 3 turns)
+            chat_history_objs = []
+            for turn in conversations[session_id][-3:]:
+                # Ensure you have these imported from langchain_core.messages
+                chat_history_objs.append(HumanMessage(content=turn["user"]))
+                chat_history_objs.append(AIMessage(content=turn["bot"]))
+            # Execute Chain
+            try:
+                result = await asyncio.wait_for(
+                    asyncio.to_thread(
+                        qa_chain.invoke,
+                        {
+                            "input": message,
+                            "chat_history": chat_history_objs
+                        },
+                    ),
+                    timeout=GENERATION_TIMEOUT,
+                )
+            except asyncio.TimeoutError:
+                return JSONResponse(
+                    {
+                        "error": f"Query timeout after {GENERATION_TIMEOUT}s",
+                        "response": "Sorry, the request took too long. Please try again.",
+                    },
+                    status_code=504,
+                )
+            # --- CRITICAL FIX START ---
+            # The new chain returns a String directly. The old one returned a Dict.
+            # We must handle both cases to prevent the AttributeError.
+            context_docs = [] # Default to empty if using string chain
+            if isinstance(result, str):
+                # New "Production Chain" path
+                answer = result
+                # Note: In this mode, citations are embedded in the text string
+                # (e.g. "Reference: Manual..."), so we don't have raw docs for the 'sources' list.
+            elif isinstance(result, dict):
+                # Old "Standard Chain" path
+                answer = result.get("answer", "No answer generated")
+                context_docs = result.get("context", [])
+            else:
+                answer = str(result)
+            # Clean up the answer text
+            answer = structure_answer(answer, mode)
+            # --- CRITICAL FIX END ---
+            # Process Sources (Only populates if context_docs were returned)
+            sources = []
+            for i, doc in enumerate(context_docs[:5], 1):
+                sources.append(
+                    {
+                        "index": i,
+                        "file_name": doc.metadata.get("source", "Unknown"),
+                        "page": doc.metadata.get("page", "N/A"),
+                        "snippet": doc.page_content[:200].replace("\n", " "),
+                    }
+                )
+            print(f"✅ Response generated: {len(answer)} chars")
+            response_data = {
+                "response": answer,
+                "sources": sources,
+                "mode": mode,
+                "cached": False,
+                "images": []  # Placeholder for image handling
+            }
+            answer_cache[cache_key] = response_data
+            conversations[session_id].append(
+                {
+                    "user": message,
+                    "bot": answer,
+                    "mode": mode,
+                }
+            )
+            logger.info("Chat response completed")
+            return JSONResponse(response_data)
+        except Exception as e:
+            logger.error(f"Chat error: {e}", exc_info=True)
+            print(f"❌ ERROR: {e}")
+            # Ensure traceback is printed to console for debugging
+            import traceback
+            traceback.print_exc()
+            return JSONResponse(
+                {
+                    "error": str(e),
+                    "response": "Sorry, an internal error occurred. Please check server logs.",
+                },
+                status_code=500,
+            )
+@app.post("/api/query")
+async def query_endpoint(request: QueryRequest):
+    chat_request = ChatRequest(
+        message=request.message,
+        mode=request.answer_style,
+        session_id="default",
+    )
+    response = await chat_endpoint(chat_request)
+    data = response.body.decode("utf-8")
+    import json
+    json_data = json.loads(data)
+    if "response" in json_data:
+        json_data["answer"] = json_data.pop("response")
+    return JSONResponse(json_data)
+@app.get("/api/health")
+async def health_check():
+    return {
+        "status": "ok",
+        "mode": "OFFLINE",
+        "llm_model": LLM_MODEL,
+        "embedding_model": EMBED_MODEL,
+        "cuda_available": torch.cuda.is_available(),
+        "cache_size": len(answer_cache),
+        "active_sessions": len(conversations),
+    }
+@app.get("/api/stats")
+async def get_stats():
+    try:
+        doc_count = len(vectorstore.docstore._dict) if vectorstore else 0
+    except Exception:
+        doc_count = "unknown"
+    return {
+        "mode": "OFFLINE",
+        "documents": doc_count,
+        "cache_size": len(answer_cache),
+        "active_sessions": len(conversations),
+        "llm_model": LLM_MODEL,
+        "embedding_model": EMBED_MODEL,
+        "cuda_available": torch.cuda.is_available(),
+        "index_path": FAISS_INDEX_PATH,
+    }
+@app.post("/api/new-conversation")
+async def new_conversation(request: dict):
+    session_id = request.get("session_id", "default")
+    if session_id in conversations:
+        conversations[session_id] = []
+    return {"message": "New conversation started", "session_id": session_id}
+@app.get("/api/conversation/{session_id}")
+async def get_conversation(session_id: str):
+    if session_id in conversations:
+        return {"history": conversations[session_id]}
+    return {"history": []}
+@app.get("/api/clear_cache")
+async def clear_cache():
+    cache_size = len(answer_cache)
+    answer_cache.clear()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return {"message": f"Cache cleared. Removed {cache_size} entries"}
+if __name__ == "__main__":
+    import sys
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=8001, help="Port to run the server on")
+    args = parser.parse_args()
+    port = args.port
+    print("\n" + "=" * 70)
+    print("🚀 BeRU Chat Assistant - VLM2Vec Mode (100% OFFLINE)")
+    print("=" * 70)
+    print(f"\n📍 Frontend: http://localhost:{port}")
+    print(f"📍 API Docs: http://localhost:{port}/docs")
+    print(f"📍 Health: http://localhost:{port}/api/health")
+    print(f"📍 Stats: http://localhost:{port}/api/stats")
+    print(f"\n🔌 Embedding Model (LOCAL): {EMBED_MODEL_PATH}")
+    print(f"🔌 LLM Model (LOCAL): {LLM_MODEL_PATH}")
+    print(f"🔌 FAISS Index: {FAISS_INDEX_PATH}")
+    print("🔌 Mode: 100% OFFLINE (local files only)")
+    print("=" * 70 + "\n")
+    uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")

frontend.html ADDED Viewed

	@@ -0,0 +1,1075 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>BeRU Chat - Multimodal</title>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
+    <style>
+        /* *** EXISTING STYLES (keeping all your original styles) *** */
+        body {
+            font-family: 'Roboto', sans-serif;
+            margin: 0;
+            padding: 0;
+            transition: background-color 0.5s ease, color 0.5s ease;
+        }
+        .light-mode {
+            background-color: #caf2fa;
+            color: #333;
+        }
+        .dark-mode {
+            background-color: #1e1e1e;
+            color: #f5f5f5;
+        }
+        .dark-mode .chat-container {
+            background-color: #1e1e1e;
+            color: #f5f5f5;
+        }
+        .sidebar {
+            width: 300px;
+            height: 100vh;
+            background-color: #0d131a;
+            position: fixed;
+            top: 0;
+            left: 0;
+            z-index: 1;
+            overflow-x: hidden;
+            transition: width 0.3s, background-color 0.5s ease;
+        }
+        .light-mode .sidebar {
+            background-color: #01414e;
+        }
+        .sidebar.collapsed {
+            width: 50px;
+        }
+        .tooltip {
+            position: absolute;
+            top: 0;
+            right: -20px;
+        }
+        .tooltip .tooltiptext {
+            visibility: hidden;
+            width: 120px;
+            background-color: rgb(0, 0, 0);
+            color: #fff;
+            text-align: center;
+            border-radius: 6px;
+            padding: 5px 0;
+            position: absolute;
+            z-index: 1;
+            bottom: 125%;
+            left: 50%;
+            margin-left: -60px;
+            opacity: 0;
+            transition: opacity 0.3s;
+        }
+        #sidebar-toggle {
+            background-color: transparent;
+            margin: -22%;
+            align-items: center;
+            margin-top: 600%;
+        }
+        .tooltip .tooltiptext::after {
+            content: "";
+            position: absolute;
+            top: 100%;
+            left: 50%;
+            border-width: 5px;
+            border-style: solid;
+            border-color: black transparent transparent transparent;
+        }
+        .tooltip:hover .tooltiptext {
+            visibility: visible;
+            opacity: 1;
+        }
+        .sidebar-content {
+            padding-top: 20px;
+            transition: opacity 0.3s;
+        }
+        .sidebar.collapsed .sidebar-content {
+            opacity: 0;
+            pointer-events: none;
+        }
+        .conversation-list {
+            padding: 0 20px;
+        }
+        .conversation {
+            margin-bottom: 10px;
+        }
+        .conversation-text {
+            font-weight: bold;
+            color: #fff;
+            transition: color 0.5s ease;
+        }
+        .light-mode .conversation-text {
+            color: #ccc;
+        }
+        .conversation-content {
+            color: #ddd;
+            transition: color 0.5s ease;
+        }
+        .light-mode .conversation-content {
+            color: #888;
+        }
+        #new-conversation-btn {
+            background-color: #3a3b3b;
+            color: #fff;
+            border: none;
+            padding: 10px 20px;
+            border-radius: 5px;
+            cursor: pointer;
+            transition: background-color 0.3s, color 0.5s ease;
+        }
+        #new-conversation-btn:hover {
+            background-color: #242020;
+        }
+        .light-mode #new-conversation-btn {
+             background-color: #c9c9c9;
+             color: #171717;
+        }
+        .light-mode #new-conversation-btn:hover {
+            background-color: #e0e0e0;
+            color: #171717;
+        }
+        .chat-container {
+            width: calc(100% - 300px);
+            margin-left: 300px;
+            height: 100vh;
+            overflow: hidden;
+            transition: all 0.3s ease-in-out, background-color 0.5s ease;
+        }
+        .sidebar.collapsed ~ .chat-container {
+            width: calc(100% - 50px);
+            margin-left: 50px;
+        }
+        .chat-content {
+            display: flex;
+            flex-direction: column;
+            height: 100%;
+            padding-bottom: 80px;
+        }
+        .logo-container {
+            display: flex;
+            align-items: center;
+        }
+        .logo {
+            width: 30px;
+            height: 30px;
+            margin-right: 10px;
+        }
+        .chat-header {
+            margin-left: 2%;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            font-size: 10px;
+            height: 60px;
+            background-color: #171717;
+            transition: background-color 0.5s ease, color 0.5s ease;
+        }
+        .light-mode h1 {
+            color: black;
+        }
+        .dark-mode h1 {
+            color: #f5f5f5;
+        }
+        .light-mode .chat-header {
+            background-color: #caf2fa;
+            color: #333;
+        }
+        .dark-mode .chat-header {
+            background-color: #1e1e1e;
+            color: #f5f5f5;
+        }
+        h1 {
+            color: #cfcfcf;
+            font-family: 'Trebuchet MS', sans-serif;
+            transition: color 0.5s ease;
+        }
+        /* Toggle Switch Styles */
+        .toggle-switch {
+            position: relative;
+            width: 50px;
+            height: 25px;
+            margin-right: 20px;
+            --light: #d8dbe0;
+            --dark: #28292c;
+        }
+        .switch-label {
+            position: absolute;
+            width: 100%;
+            height: 100%;
+            background-color: var(--dark);
+            border-radius: 12px;
+            cursor: pointer;
+            border: 1.5px solid var(--dark);
+            transition: background-color 0.3s;
+        }
+        .checkbox {
+            position: absolute;
+            display: none;
+        }
+        .slider {
+            position: absolute;
+            width: 100%;
+            height: 100%;
+            border-radius: 12px;
+            transition: 0.3s;
+        }
+        .checkbox:checked ~ .slider {
+            background-color: var(--light);
+        }
+        .slider::before {
+            content: "";
+            position: absolute;
+            top: 5.5px;
+            left: 5.5px;
+            width: 14px;
+            height: 14px;
+            border-radius: 50%;
+            box-shadow: inset 7px -2px 0px 0px var(--light);
+            background-color: var(--dark);
+            transition: 0.3s;
+        }
+        .checkbox:checked ~ .slider::before {
+            transform: translateX(26px);
+            background-color: var(--dark);
+            box-shadow: none;
+        }
+        .chat-box {
+            display: flex;
+            flex-direction: column;
+            flex: 1;
+            overflow-y: auto;
+            padding: 15px;
+            overflow-x: hidden;
+        }
+        .chat-box::-webkit-scrollbar {
+            width: 3px;
+        }
+        .chat-box::-webkit-scrollbar-track {
+            background: transparent;
+        }
+        .chat-box::-webkit-scrollbar-track-piece {
+            background: #b0b0b000;
+            border-radius: 999px;
+        }
+        .chat-box::-webkit-scrollbar-thumb {
+            background-color: #ffd700;
+            border-radius: 999px;
+            border: 2px solid transparent;
+            background-clip: padding-box;
+        }
+        .chat-box {
+            scrollbar-width: thin;
+            scrollbar-color: #ffd700 #b0b0b0;
+        }
+        .chat-box p {
+            margin: 10px 0;
+            font-size: 16px;
+        }
+        .messageBox {
+            position: fixed;
+            bottom: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            display: flex;
+            align-items: center;
+            background-color: #2d2d2d;
+            padding: 0 12px;
+            border-radius: 10px;
+            border: 1px solid rgb(63, 63, 63);
+            width: 60%;
+            max-width: 800px;
+            height: 50px;
+            transition: all 0.3s ease-in-out, background-color 0.5s ease, border-color 0.5s ease;
+        }
+        .light-mode .messageBox {
+            background-color: white;
+            border: 1px solid #1d495f;
+        }
+        .messageBox:focus-within {
+            border: 1px solid rgb(110, 110, 110);
+        }
+        #messageInput {
+            flex: 1;
+            height: 100%;
+            background-color: transparent;
+            outline: none;
+            border: none;
+            padding: 0 12px;
+            color: white;
+            width: auto;
+            font-family: 'Roboto', sans-serif;
+            font-size: 14px;
+            transition: color 0.5s ease;
+        }
+        .light-mode #messageInput {
+            color: #171717;
+        }
+        #sendButton {
+            width: 50px;
+            height: 100%;
+            background-color: transparent;
+            outline: none;
+            border: none;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            padding: 0;
+        }
+        #sendButton svg {
+            height: 60%;
+            width: auto;
+            transition: all 0.3s;
+        }
+        #sendButton svg path {
+            transition: all 0.3s;
+        }
+        #sendButton:hover svg path,
+        #sendButton:active svg path {
+            fill: #3c3c3c;
+            stroke: white;
+        }
+        .light-mode #sendButton:hover svg path,
+        .light-mode #sendButton:active svg path {
+            fill: #fbf7e7;
+            stroke: #ffcd07 !important;
+        }
+        .message-row {
+            width: 100%;
+            margin: 8px 0;
+            display: flex;
+        }
+        .message-user {
+            justify-content: flex-end;
+        }
+        .message-bot {
+            justify-content: flex-start;
+        }
+        .message-bubble {
+            max-width: 70%;
+            padding: 10px 14px;
+            border-radius: 16px;
+            font-size: 14px;
+            transition: background-color 0.5s ease, color 0.5s ease;
+        }
+        .message-user .message-bubble {
+            background-color: #1b798e;
+            color: white;
+            border-bottom-right-radius: 4px;
+            border: 1px solid #FFD700;
+        }
+        .message-bot .message-bubble {
+            background-color: #2d2d2d;
+            color: white;
+            border-bottom-left-radius: 4px;
+            border: 1px solid #FFD700;
+        }
+        .light-mode .message-bot .message-bubble {
+            background-color: #fefdf6;
+            color: #111;
+        }
+        /* ✅ NEW: Image Gallery Styles */
+        .image-gallery {
+            display: flex;
+            gap: 10px;
+            flex-wrap: wrap;
+            margin-top: 12px;
+            padding-top: 12px;
+            border-top: 1px solid rgba(255, 215, 0, 0.3);
+        }
+        .image-container {
+            position: relative;
+            border-radius: 8px;
+            overflow: hidden;
+            cursor: pointer;
+            transition: transform 0.2s ease;
+            border: 2px solid #FFD700;
+        }
+        .image-container:hover {
+            transform: scale(1.05);
+        }
+        .image-container img {
+            width: 150px;
+            height: 150px;
+            object-fit: cover;
+            display: block;
+        }
+        .image-caption {
+            position: absolute;
+            bottom: 0;
+            left: 0;
+            right: 0;
+            background: rgba(0, 0, 0, 0.7);
+            color: #FFD700;
+            padding: 4px 8px;
+            font-size: 10px;
+            text-align: center;
+        }
+        /* ✅ NEW: Image Modal/Lightbox */
+        .image-modal {
+            display: none;
+            position: fixed;
+            z-index: 9999;
+            left: 0;
+            top: 0;
+            width: 100%;
+            height: 100%;
+            background-color: rgba(0, 0, 0, 0.9);
+            align-items: center;
+            justify-content: center;
+        }
+        .image-modal.active {
+            display: flex;
+        }
+        .modal-content {
+            max-width: 90%;
+            max-height: 90%;
+            border-radius: 8px;
+            box-shadow: 0 4px 20px rgba(255, 215, 0, 0.5);
+        }
+        .modal-close {
+            position: absolute;
+            top: 20px;
+            right: 35px;
+            color: #FFD700;
+            font-size: 40px;
+            font-weight: bold;
+            cursor: pointer;
+            transition: color 0.3s;
+        }
+        .modal-close:hover {
+            color: #fff;
+        }
+        .fileUploadWrapper {
+            position: relative;
+            display: flex;
+            align-items: center;
+            margin-right: 8px;
+        }
+        .fileUploadWrapper label {
+            display: flex;
+            align-items: center;
+            cursor: pointer;
+            padding: 0;
+            margin: 0;
+        }
+        .fileUploadWrapper svg {
+            width: 20px;
+            height: 20px;
+            fill: #6c6c6c;
+            transition: all 0.3s ease;
+        }
+        .fileUploadWrapper label:hover svg {
+            fill: #10a37f;
+        }
+        .fileUploadWrapper .tooltip {
+            display: none;
+            position: absolute;
+            bottom: 125%;
+            left: 50%;
+            transform: translateX(-50%);
+            background-color: #000;
+            color: #fff;
+            padding: 5px 10px;
+            border-radius: 6px;
+            font-size: 12px;
+            white-space: nowrap;
+            z-index: 1;
+        }
+        .fileUploadWrapper label:hover .tooltip {
+            display: block;
+        }
+        .fileUploadWrapper input {
+            display: none;
+        }
+        .fileUploadWrapper,
+        #sendButton {
+            width: 40px;
+            height: 40px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            margin: 0 8px;
+        }
+        .fileUploadWrapper svg,
+        #sendButton svg {
+            width: 20px;
+            height: 20px;
+            fill: #6c6c6c;
+            transition: all 0.3s ease;
+        }
+        .fileUploadWrapper label:hover svg path,
+        .fileUploadWrapper label:hover svg circle,
+        #sendButton:hover svg path {
+            stroke: white;
+            transition: stroke 0.3s ease;
+        }
+        .light-mode .fileUploadWrapper label:hover svg path,
+        .light-mode .fileUploadWrapper label:hover svg circle,
+        .light-mode #sendButton:hover svg path {
+            stroke: #ffcd07;
+            transition: stroke 0.3s ease;
+        }
+        #sendButton svg,
+        .fileUploadWrapper svg {
+            transition: all 0.3s ease;
+        }
+        /* Mode Dropdown Styles */
+        .mode-dropdown-wrapper {
+            position: relative;
+            display: flex;
+            align-items: center;
+            margin: 0 8px;
+        }
+        .mode-dropdown-button {
+            display: inline-flex;
+            justify-content: center;
+            align-items: center;
+            padding: 6px 12px;
+            background-color: transparent;
+            border: none;
+            color: #e5e5e5;
+            font-size: 14px;
+            font-family: 'Roboto', sans-serif;
+            font-weight: 500;
+            cursor: pointer;
+            border-radius: 6px;
+            white-space: nowrap;
+            transition: background-color 0.3s, color 0.5s ease;
+        }
+        .light-mode .mode-dropdown-button {
+            background-color: #f9f9f9;
+            color: #171717;
+        }
+        .mode-dropdown-button:hover {
+            background-color: rgba(110, 110, 110, 0.12);
+        }
+        .light-mode .mode-dropdown-button:hover {
+            background-color: #f0f0f0;
+        }
+        .dropdown-arrow {
+            width: 16px;
+            height: 16px;
+            margin-left: 6px;
+            transition: transform 0.2s ease, color 0.5s ease;
+        }
+        .mode-dropdown-button[aria-expanded="true"] .dropdown-arrow {
+            transform: rotate(180deg);
+        }
+        .mode-dropdown-menu {
+            position: absolute;
+            bottom: 100%;
+            right: 0;
+            margin-bottom: 8px;
+            min-width: 180px;
+            background-color: #2d2d2d;
+            border: 1px solid rgb(63, 63, 63);
+            border-radius: 8px;
+            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.4);
+            z-index: 1000;
+            opacity: 0;
+            transform: translateY(-8px);
+            transition: opacity 0.2s ease, transform 0.2s ease, background-color 0.5s ease, border-color 0.5s ease;
+            pointer-events: none;
+        }
+        .mode-dropdown-menu:not(.hidden) {
+            opacity: 1;
+            transform: translateY(0);
+            pointer-events: auto;
+        }
+        .light-mode .mode-dropdown-menu {
+            background-color: #ffffff;
+            border-color: #1d495f;
+            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.15);
+        }
+        .dropdown-items {
+            padding: 4px 0;
+        }
+        .dropdown-item {
+            display: flex;
+            align-items: center;
+            padding: 10px 16px;
+            color: #ddd;
+            text-decoration: none;
+            font-size: 14px;
+            font-family: 'Roboto', sans-serif;
+            cursor: pointer;
+            transition: background-color 0.15s ease, color 0.5s ease;
+        }
+        .dropdown-item:first-child {
+            border-top-left-radius: 8px;
+            border-top-right-radius: 8px;
+        }
+        .dropdown-item:last-child {
+            border-bottom-left-radius: 8px;
+            border-bottom-right-radius: 8px;
+        }
+        .dropdown-item:hover {
+            background-color: rgba(110, 110, 110, 0.25);
+            color: #fff;
+        }
+        .light-mode .dropdown-item {
+            color: #1f2937;
+        }
+        .light-mode .dropdown-item:hover {
+            background-color: #f3f4f6;
+            color: #1e40af;
+        }
+        .dropdown-item .font-semibold {
+            font-weight: 600;
+        }
+        .hidden {
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <div class="sidebar collapsed">
+        <div class="tooltip">
+            <span class="tooltiptext">Open Sidebar</span>
+            <button id="sidebar-toggle">
+              <i class="fas fa-chevron-right"></i>
+            </button>
+        </div>
+        <div class="sidebar-content">
+            <div class="conversation-list">
+                <div class="conversation">
+                    <p class="conversation-text">Last Conversation:</p>
+                    <p class="conversation-content">No conversation yet</p>
+                </div>
+            </div>
+            <button id="new-conversation-btn">Start New Conversation</button>
+        </div>
+    </div>
+    <div class="chat-container light-mode">
+        <div class="chat-content">
+            <div class="chat-header">
+                <div class="logo-container">
+                    <h1>BeRU&nbsp;</h1>
+                </div>
+                <div class="toggle-switch">
+                    <label class="switch-label">
+                        <input type="checkbox" id="toggle-checkbox" class="checkbox">
+                        <span class="slider"></span>
+                    </label>
+                </div>
+            </div>
+            <div id="chat-box" class="chat-box"></div>
+            <div class="messageBox">
+                <div class="fileUploadWrapper">
+                    <label for="file">
+                      <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 337 337">
+                        <circle stroke-width="20" stroke="#6c6c6c" fill="none" r="158.5" cy="168.5" cx="168.5"></circle>
+                        <path stroke-linecap="round" stroke-width="25" stroke="#6c6c6c" d="M167.759 79V259"></path>
+                        <path stroke-linecap="round" stroke-width="25" stroke="#6c6c6c" d="M79 167.138H259"></path>
+                      </svg>
+                      <span class="tooltip">Add an image</span>
+                    </label>
+                    <input type="file" id="file" name="file" />
+                </div>
+                <input required="" placeholder="Message..." type="text" id="messageInput" />
+                <div class="mode-dropdown-wrapper">
+                    <button id="modeDropdownButton" type="button" class="mode-dropdown-button" aria-expanded="false" aria-haspopup="true">
+                      Detailed
+                      <svg class="dropdown-arrow" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true">
+                        <path fill-rule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clip-rule="evenodd" />
+                      </svg>
+                    </button>
+                    <div id="modeDropdownMenu" class="mode-dropdown-menu hidden" role="menu" aria-orientation="vertical" aria-labelledby="modeDropdownButton" tabindex="-1">
+                      <div class="dropdown-items">
+                        <a href="#" class="dropdown-item" role="menuitem" tabindex="-1">
+                          <span class="font-semibold">Short and Concise</span>
+                        </a>
+                        <a href="#" class="dropdown-item" role="menuitem" tabindex="-1">
+                          <span class="font-semibold">Detailed</span>
+                        </a>
+                        <a href="#" class="dropdown-item" role="menuitem" tabindex="-1">
+                          <span class="font-semibold">Step-by-Step</span>
+                        </a>
+                      </div>
+                    </div>
+                </div>
+                <button id="sendButton">
+                    <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 664 663">
+                    <path fill="none" d="M646.293 331.888L17.7538 17.6187L155.245 331.888M646.293 331.888L17.753 646.157L155.245 331.888M646.293 331.888L318.735 330.228L155.245 331.888"></path>
+                    <path stroke-linejoin="round" stroke-linecap="round" stroke-width="33.67" stroke="#6c6c6c" d="M646.293 331.888L17.7538 17.6187L155.245 331.888M646.293 331.888L17.753 646.157L155.245 331.888M646.293 331.888L318.735 330.228L155.245 331.888"></path>
+                    </svg>
+                </button>
+            </div>
+        </div>
+    </div>
+    <!-- ✅ NEW: Image Modal -->
+    <div id="imageModal" class="image-modal">
+        <span class="modal-close" id="modalClose">&times;</span>
+        <img class="modal-content" id="modalImage">
+    </div>
+    <script>
+    const chatBox = document.getElementById('chat-box');
+    const userInput = document.getElementById('messageInput');
+    const sendButton = document.getElementById('sendButton');
+    const sidebarToggle = document.getElementById('sidebar-toggle');
+    const modeToggle = document.getElementById('toggle-checkbox');
+    const sidebar = document.querySelector('.sidebar');
+    const chatContainer = document.querySelector('.chat-container');
+    const messageBox = document.querySelector('.messageBox');
+    const imageModal = document.getElementById('imageModal');
+    const modalImage = document.getElementById('modalImage');
+    const modalClose = document.getElementById('modalClose');
+    let currentMode = 'Detailed';
+    let sessionId = 'session-' + Date.now();
+    // ✅ NEW: Image Modal Functions
+    function openImageModal(imageSrc) {
+        imageModal.classList.add('active');
+        modalImage.src = imageSrc;
+    }
+    function closeImageModal() {
+        imageModal.classList.remove('active');
+        modalImage.src = '';
+    }
+    modalClose.addEventListener('click', closeImageModal);
+    imageModal.addEventListener('click', (e) => {
+        if (e.target === imageModal) {
+            closeImageModal();
+        }
+    });
+    // Utility Functions
+    function getCurrentTime() {
+        const now = new Date();
+        return now.toLocaleTimeString();
+    }
+    // ✅ MODIFIED: Chat Functions with Image Support
+    function appendMessage(sender, message, images = []) {
+        const wrapper = document.createElement('div');
+        wrapper.classList.add('message-row');
+        wrapper.classList.add(sender === 'user' ? 'message-user' : 'message-bot');
+        const bubble = document.createElement('div');
+        bubble.classList.add('message-bubble');
+        // Add text message
+        bubble.innerHTML = message.replace(/\n/g, '<br>');
+        // ✅ NEW: Add images if present
+        if (images && images.length > 0) {
+            const gallery = document.createElement('div');
+            gallery.classList.add('image-gallery');
+            images.forEach((img, index) => {
+                const imgContainer = document.createElement('div');
+                imgContainer.classList.add('image-container');
+                const imgElement = document.createElement('img');
+                imgElement.src = img.data;
+                imgElement.alt = `Image from ${img.source}`;
+                imgElement.loading = 'lazy';
+                // Click to open modal
+                imgElement.addEventListener('click', () => {
+                    openImageModal(img.data);
+                });
+                const caption = document.createElement('div');
+                caption.classList.add('image-caption');
+                caption.textContent = `📄 ${img.source} | Page ${img.page}`;
+                imgContainer.appendChild(imgElement);
+                imgContainer.appendChild(caption);
+                gallery.appendChild(imgContainer);
+            });
+            bubble.appendChild(gallery);
+        }
+        wrapper.appendChild(bubble);
+        chatBox.appendChild(wrapper);
+        chatBox.scrollTop = chatBox.scrollHeight;
+    }
+    async function sendMessage() {
+        const message = userInput.value.trim();
+        if (message === '') return;
+        // Append user message
+        appendMessage('user', message);
+        userInput.value = '';
+        // Show loading message
+        appendMessage('ChatGPT', '⏳ Thinking...');
+        try {
+            const response = await fetch('/api/chat', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({
+                    message: message,
+                    mode: currentMode,
+                    session_id: sessionId,
+                    include_images: true  // ✅ NEW: Request images
+                })
+            });
+            const data = await response.json();
+            // Remove loading message
+            chatBox.removeChild(chatBox.lastChild);
+            if (data.error) {
+                appendMessage('ChatGPT', '❌ Error: ' + data.error);
+            } else {
+                // ✅ NEW: Pass images to appendMessage
+                const images = data.images || [];
+                appendMessage('ChatGPT', data.response, images);
+                // Log image info
+                if (images.length > 0) {
+                    console.log(`📷 Received ${images.length} images`);
+                }
+            }
+        } catch (error) {
+            // Remove loading message
+            chatBox.removeChild(chatBox.lastChild);
+            appendMessage('ChatGPT', '❌ Connection error. Please check if the server is running.');
+            console.error('Error:', error);
+        }
+    }
+    // Event Listeners
+    document.addEventListener('DOMContentLoaded', function() {
+        const newConversationBtn = document.getElementById('new-conversation-btn');
+        const conversationContent = document.querySelector('.conversation-content');
+        // Sidebar Toggle
+        sidebarToggle.addEventListener('click', function() {
+            sidebar.classList.toggle('collapsed');
+            adjustMessageBoxPosition();
+        });
+        function adjustMessageBoxPosition() {
+            const sidebarWidth = sidebar.classList.contains('collapsed') ? 50 : 300;
+            const chatAreaWidth = window.innerWidth - sidebarWidth;
+            messageBox.style.left = sidebarWidth + (chatAreaWidth / 2) + 'px';
+            messageBox.style.transform = 'translateX(-50%)';
+        }
+        adjustMessageBoxPosition();
+        window.addEventListener('resize', adjustMessageBoxPosition);
+        // New Conversation
+        newConversationBtn.addEventListener('click', async function() {
+            conversationContent.textContent = 'New Conversation Started!';
+            chatBox.innerHTML = '';
+            sessionId = 'session-' + Date.now();
+            try {
+                await fetch('/api/new-conversation', {
+                    method: 'POST',
+                    headers: {'Content-Type': 'application/json'},
+                    body: JSON.stringify({session_id: sessionId})
+                });
+            } catch (error) {
+                console.error('Error starting new conversation:', error);
+            }
+            adjustMessageBoxPosition();
+        });
+        // Theme Toggle
+        modeToggle.addEventListener('change', function() {
+            document.body.classList.toggle('dark-mode');
+            document.body.classList.toggle('light-mode');
+            chatContainer.classList.toggle('light-mode');
+            chatContainer.classList.toggle('dark-mode');
+            adjustMessageBoxPosition();
+        });
+        // Set initial mode
+        document.body.classList.add('light-mode');
+        // Send button
+        sendButton.addEventListener('click', sendMessage);
+        userInput.addEventListener('keydown', (event) => {
+            if (event.key === 'Enter') sendMessage();
+        });
+        // Mode Dropdown
+        const modeDropdownButton = document.getElementById('modeDropdownButton');
+        const modeDropdownMenu = document.getElementById('modeDropdownMenu');
+        if (modeDropdownButton && modeDropdownMenu) {
+            function toggleMenu() {
+                const isHidden = modeDropdownMenu.classList.contains('hidden');
+                if (isHidden) {
+                    modeDropdownMenu.classList.remove('hidden');
+                    modeDropdownButton.setAttribute('aria-expanded', 'true');
+                } else {
+                    modeDropdownMenu.classList.add('hidden');
+                    modeDropdownButton.setAttribute('aria-expanded', 'false');
+                }
+            }
+            modeDropdownButton.addEventListener('click', (event) => {
+                event.stopPropagation();
+                toggleMenu();
+            });
+            document.addEventListener('click', (event) => {
+                if (!modeDropdownButton.contains(event.target) && !modeDropdownMenu.contains(event.target)) {
+                    if (!modeDropdownMenu.classList.contains('hidden')) {
+                        toggleMenu();
+                    }
+                }
+            });
+            modeDropdownMenu.querySelectorAll('.dropdown-item').forEach(item => {
+                item.addEventListener('click', (event) => {
+                    event.preventDefault();
+                    const selectedMode = event.currentTarget.querySelector('.font-semibold').textContent.trim();
+                    console.log('Mode selected:', selectedMode);
+                    currentMode = selectedMode;
+                    const buttonTextNode = Array.from(modeDropdownButton.childNodes).find(node =>
+                        node.nodeType === Node.TEXT_NODE && node.textContent.trim() !== ''
+                    );
+                    if (buttonTextNode) {
+                        buttonTextNode.textContent = selectedMode;
+                    } else {
+                        const textNode = document.createTextNode(selectedMode);
+                        modeDropdownButton.insertBefore(textNode, modeDropdownButton.querySelector('.dropdown-arrow'));
+                    }
+                    toggleMenu();
+                });
+            });
+        }
+    });
+    </script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+streamlit==1.28.0
+torch==2.0.0
+transformers==4.36.0
+langchain==0.1.0
+langchain-community==0.0.10
+langchain-core==0.1.8
+faiss-cpu==1.7.4
+pydantic==2.5.0
+numpy==1.24.3
+dill==0.3.7
+bitsandbytes==0.41.1
+flashrank==0.2.0
+PyMuPDF==1.23.8
+Pillow==10.0.1
+pytesseract==0.3.10
+pdf2image==1.16.3
+rank-bm25==0.2.2
+huggingface-hub==0.18.0
+peft==0.4.0

spaces_app.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""
+BeRU RAG Chat App - Optimized for Hugging Face Spaces
+Deployment: https://huggingface.co/spaces/AnwinMJ/Beru
+"""
+import streamlit as st
+import torch
+import os
+import pickle
+import faiss
+import numpy as np
+from transformers import AutoModel, AutoProcessor, AutoTokenizer
+from typing import List, Dict
+import time
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ========================================
+# 🎨 STREAMLIT PAGE CONFIG
+# ========================================
+st.set_page_config(
+    page_title="BeRU Chat - RAG Assistant",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    menu_items={
+        "About": "BeRU - Offline RAG System with VLM2Vec and Mistral 7B"
+    }
+)
+# ========================================
+# 🌍 ENVIRONMENT DETECTION
+# ========================================
+def detect_environment():
+    """Detect if running on HF Spaces"""
+    is_spaces = os.getenv('SPACES', 'false').lower() == 'true' or 'huggingface' in os.path.exists('/app')
+    return {
+        'is_spaces': is_spaces,
+        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
+        'model_cache': os.getenv('HF_HOME', './cache'),
+        'gpu_memory': torch.cuda.get_device_properties(0).total_memory if torch.cuda.is_available() else 0
+    }
+env_info = detect_environment()
+# Display environment info in sidebar
+with st.sidebar:
+    st.write("### System Info")
+    st.write(f"🖥️ Device: `{env_info['device'].upper()}`")
+    if env_info['device'] == 'cuda':
+        st.write(f"💾 GPU VRAM: `{env_info['gpu_memory'] / 1e9:.1f} GB`")
+    st.write(f"📦 Cache: `{env_info['model_cache']}`")
+# ========================================
+# 🎯 MODEL LOADING WITH CACHING
+# ========================================
+@st.cache_resource
+def load_embedding_model():
+    """Load VLM2Vec embedding model with error handling"""
+    with st.spinner("⏳ Loading embedding model... (first time may take 5 min)"):
+        try:
+            logger.info("Loading VLM2Vec model...")
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            model = AutoModel.from_pretrained(
+                "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+                trust_remote_code=True,
+                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                cache_dir=env_info['model_cache']
+            ).to(device)
+            processor = AutoProcessor.from_pretrained(
+                "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+                trust_remote_code=True,
+                cache_dir=env_info['model_cache']
+            )
+            tokenizer = AutoTokenizer.from_pretrained(
+                "TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+                trust_remote_code=True,
+                cache_dir=env_info['model_cache']
+            )
+            model.eval()
+            logger.info("✅ Embedding model loaded successfully")
+            st.success("✅ Embedding model loaded!")
+            return model, processor, tokenizer, device
+        except Exception as e:
+            st.error(f"❌ Error loading embedding model: {str(e)}")
+            logger.error(f"Model loading error: {e}")
+            raise
+@st.cache_resource
+def load_llm_model():
+    """Load Mistral 7B LLM with quantization"""
+    with st.spinner("⏳ Loading LLM model... (first time may take 5 min)"):
+        try:
+            logger.info("Loading Mistral-7B model...")
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+            # 4-bit quantization config for memory efficiency
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16
+            )
+            tokenizer = AutoTokenizer.from_pretrained(
+                "mistralai/Mistral-7B-Instruct-v0.3",
+                cache_dir=env_info['model_cache']
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                "mistralai/Mistral-7B-Instruct-v0.3",
+                quantization_config=bnb_config,
+                device_map="auto",
+                cache_dir=env_info['model_cache']
+            )
+            logger.info("✅ LLM model loaded successfully")
+            st.success("✅ LLM model loaded!")
+            return model, tokenizer, device
+        except Exception as e:
+            st.error(f"❌ Error loading LLM: {str(e)}")
+            logger.error(f"LLM loading error: {e}")
+            raise
+# ========================================
+# 🏠 UI LAYOUT
+# ========================================
+st.title("�� BeRU Chat - RAG Assistant")
+st.markdown("""
+A powerful offline RAG system combining Mistral 7B LLM with VLM2Vec embeddings
+for intelligent document search and conversation.
+**Status**: Models loading on first access (5-8 minutes)
+""")
+# Load models
+try:
+    embedding_model, processor, tokenizer, device = load_embedding_model()
+    llm_model, llm_tokenizer, llm_device = load_llm_model()
+    models_loaded = True
+except Exception as e:
+    st.error(f"Failed to load models: {str(e)}")
+    models_loaded = False
+if models_loaded:
+    # Main chat interface
+    left_col, right_col = st.columns([2, 1])
+    with left_col:
+        st.subheader("💬 Chat")
+        # Initialize session state
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+        # Display chat history
+        for msg in st.session_state.messages:
+            with st.chat_message(msg["role"]):
+                st.write(msg["content"])
+        # Chat input
+        user_input = st.chat_input("Ask a question about your documents...")
+        if user_input:
+            # Add user message
+            st.session_state.messages.append({"role": "user", "content": user_input})
+            with st.chat_message("user"):
+                st.write(user_input)
+            # Generate response
+            with st.chat_message("assistant"):
+                with st.spinner("🤔 Thinking..."):
+                    # Placeholder for RAG response
+                    response = "Response generated from RAG system..."
+                    st.write(response)
+                    st.session_state.messages.append({"role": "assistant", "content": response})
+    with right_col:
+        st.subheader("📊 Info")
+        st.info("""
+        **Model Info:**
+        - 🧠 Embedding: VLM2Vec-Qwen2VL-2B
+        - 💬 LLM: Mistral-7B-Instruct
+        - 🔍 Search: FAISS + BM25
+        **Performance:**
+        - Device: GPU if available
+        - Quantization: 4-bit
+        - Context: Multi-turn
+        """)
+        st.subheader("⚙️ Settings")
+        temperature = st.slider("Temperature", 0.0, 1.0, 0.7)
+        max_tokens = st.slider("Max Tokens", 100, 2000, 512)
+else:
+    st.error("❌ Failed to initialize models. Check logs for details.")
+    st.info("Try refreshing the page or restarting the Space.")
+# ========================================
+# 📝 FOOTER
+# ========================================
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center'>
+    <small>
+        BeRU RAG System |
+        <a href='https://huggingface.co/spaces/AnwinMJ/Beru'>Space</a> |
+        <a href='https://github.com/AnwinMJ/BeRU'>GitHub</a>
+    </small>
+</div>
+""", unsafe_allow_html=True)

vlm2rag2.py ADDED Viewed

	@@ -0,0 +1,1354 @@

+import glob
+import os
+import gc
+import time
+import re
+import hashlib
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional
+import fitz  # PyMuPDF
+import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoModel, AutoProcessor, AutoTokenizer  # Changed from AutoModelForCausalLM
+from langchain_core.documents import Document
+import pickle
+from numpy.linalg import norm
+import camelot
+import base64
+import pytesseract
+from pdf2image import convert_from_path
+import faiss
+from rank_bm25 import BM25Okapi
+# ========================================
+# 📂 CONFIGURATION
+# ========================================
+PDF_DIR = r"D:\BeRU\testing"
+FAISS_INDEX_PATH = "VLM2Vec-V2rag2"
+MODEL_CACHE_DIR = ".cache"
+IMAGE_OUTPUT_DIR = "extracted_images2"
+# Chunking configuration
+CHUNK_SIZE = 450  # words
+OVERLAP = 100     # words
+MIN_CHUNK_SIZE = 50
+MAX_CHUNK_SIZE = 800
+# Instruction prefixes for better embeddings
+DOCUMENT_INSTRUCTION = "Represent this technical document for semantic search: "
+QUERY_INSTRUCTION = "Represent this question for finding relevant technical information: "
+# Hybrid search weights
+DENSE_WEIGHT = 0.4  # Weight for semantic search
+SPARSE_WEIGHT = 0.6  # Weight for keyword search
+# Create directories
+os.makedirs(PDF_DIR, exist_ok=True)
+os.makedirs(FAISS_INDEX_PATH, exist_ok=True)
+os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
+os.makedirs(IMAGE_OUTPUT_DIR, exist_ok=True)
+# ========================================
+# 🤖 VLM2Vec-V2 WRAPPER (ENHANCED)
+# ========================================
+class VLM2VecEmbeddings:
+    """VLM2Vec-V2 embedding class with instruction prefixes."""
+    def __init__(self, model_name: str = "TIGER-Lab/VLM2Vec-Qwen2VL-2B", cache_dir: str = None):
+        print(f"🤖 Loading VLM2Vec-V2 model: {model_name}")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"   Device: {self.device}")
+        try:
+            self.model = AutoModel.from_pretrained(
+                model_name,
+                cache_dir=cache_dir,
+                trust_remote_code=True,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+            ).to(self.device)
+            self.processor = AutoProcessor.from_pretrained(
+                model_name,
+                cache_dir=cache_dir,
+                trust_remote_code=True
+            )
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                cache_dir=cache_dir,
+                trust_remote_code=True
+            )
+            self.model.eval()
+            # Get actual embedding dimension
+            test_input = self.tokenizer("test", return_tensors="pt").to(self.device)
+            with torch.no_grad():
+                test_output = self.model(**test_input, output_hidden_states=True)
+                self.embedding_dim = test_output.hidden_states[-1].shape[-1]
+            print(f"   Embedding dimension: {self.embedding_dim}")
+            print("✅ VLM2Vec-V2 loaded successfully\n")
+        except Exception as e:
+            print(f"❌ Error loading VLM2Vec-V2: {e}")
+            raise
+    def normalize_text(self, text: str) -> str:
+        """Normalize text for better embeddings."""
+        # Remove excessive whitespace
+        text = re.sub(r'\s+', ' ', text)
+        # Remove page numbers
+        text = re.sub(r'Page \d+', '', text, flags=re.IGNORECASE)
+        # Normalize unicode
+        text = text.strip()
+        return text
+    def embed_documents(self, texts: List[str], add_instruction: bool = True) -> List[List[float]]:
+        """Embed documents with instruction prefix and weighted mean pooling."""
+        embeddings = []
+        with torch.no_grad():
+            for text in texts:
+                try:
+                    # ✅ NORMALIZE TEXT
+                    clean_text = self.normalize_text(text)
+                    # ✅ ADD INSTRUCTION PREFIX
+                    if add_instruction:
+                        prefixed_text = DOCUMENT_INSTRUCTION + clean_text
+                    else:
+                        prefixed_text = clean_text
+                    inputs = self.tokenizer(
+                        prefixed_text,
+                        return_tensors="pt",
+                        padding=True,
+                        truncation=True,
+                        max_length=min(self.tokenizer.model_max_length or 512, 2048)
+                    ).to(self.device)
+                    outputs = self.model(**inputs, output_hidden_states=True)
+                    if hasattr(outputs, 'hidden_states') and outputs.hidden_states is not None:
+                        # ✅ WEIGHTED MEAN POOLING (ignores padding)
+                        hidden_states = outputs.hidden_states[-1]
+                        attention_mask = inputs['attention_mask'].unsqueeze(-1).float()
+                        # Apply attention mask as weights
+                        weighted_hidden_states = hidden_states * attention_mask
+                        sum_embeddings = weighted_hidden_states.sum(dim=1)
+                        sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
+                        # Weighted mean
+                        embedding = (sum_embeddings / sum_mask).squeeze()
+                    else:
+                        # Fallback to logits
+                        attention_mask = inputs['attention_mask'].unsqueeze(-1).float()
+                        weighted_logits = outputs.logits * attention_mask
+                        sum_embeddings = weighted_logits.sum(dim=1)
+                        sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
+                        embedding = (sum_embeddings / sum_mask).squeeze()
+                    embeddings.append(embedding.cpu().numpy().tolist())
+                except Exception as e:
+                    print(f"   ❌ CRITICAL: Failed to embed text: {e}")
+                    print(f"   Text preview: {text[:100]}")
+                    raise RuntimeError(f"Embedding failed for text: {text[:50]}...") from e
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """Embed query with query-specific instruction."""
+        # ✅ DIFFERENT INSTRUCTION FOR QUERIES
+        clean_text = self.normalize_text(text)
+        prefixed_text = QUERY_INSTRUCTION + clean_text
+        # Don't add document instruction again
+        return self.embed_documents([prefixed_text], add_instruction=False)[0]
+    def embed_image(self, image_path: str, prompt: str = "Technical diagram") -> Optional[List[float]]:
+        """Embed image with Qwen2-VL proper format."""
+        try:
+            with torch.no_grad():
+                image = Image.open(image_path).convert('RGB')
+                # ✅ QWEN2-VL CORRECT FORMAT
+                messages = [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "image", "image": image},
+                            {"type": "text", "text": prompt}
+                        ]
+                    }
+                ]
+                # Apply chat template
+                text = self.processor.apply_chat_template(
+                    messages,
+                    tokenize=False,
+                    add_generation_prompt=True
+                )
+                # Process with both text and images
+                inputs = self.processor(
+                    text=[text],
+                    images=[image],
+                    return_tensors="pt",
+                    padding=True
+                ).to(self.device)
+                outputs = self.model(**inputs, output_hidden_states=True)
+                if hasattr(outputs, 'hidden_states') and outputs.hidden_states is not None:
+                    hidden_states = outputs.hidden_states[-1]
+                    # Use weighted mean pooling
+                    if 'attention_mask' in inputs:
+                        attention_mask = inputs['attention_mask'].unsqueeze(-1).float()
+                        weighted_hidden_states = hidden_states * attention_mask
+                        sum_embeddings = weighted_hidden_states.sum(dim=1)
+                        sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
+                        embedding = (sum_embeddings / sum_mask).squeeze()
+                    else:
+                        embedding = hidden_states.mean(dim=1).squeeze()
+                else:
+                    # Fallback to pooler output if available
+                    if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
+                        embedding = outputs.pooler_output.squeeze()
+                    else:
+                        return None
+                return embedding.cpu().numpy().tolist()
+        except Exception as e:
+            print(f"   ⚠️  Failed to embed image {Path(image_path).name}: {str(e)[:100]}")
+            return None
+# ========================================
+# 🔍 QUERY PREPROCESSING
+# ========================================
+def preprocess_query(query: str) -> str:
+    """Preprocess query by expanding abbreviations."""
+    abbreviations = {
+        r'\bh2s\b': 'hydrogen sulfide',
+        r'\bppm\b': 'parts per million',
+        r'\bppe\b': 'personal protective equipment',
+        r'\bscba\b': 'self contained breathing apparatus',
+        r'\blel\b': 'lower explosive limit',
+        r'\bhel\b': 'higher explosive limit',
+        r'\buel\b': 'upper explosive limit'
+    }
+    query_lower = query.lower()
+    for abbr, full in abbreviations.items():
+        query_lower = re.sub(abbr, full, query_lower)
+    # Remove excessive punctuation
+    query_lower = re.sub(r'[?!]+$', '', query_lower)
+    # Clean extra spaces
+    query_lower = re.sub(r'\s+', ' ', query_lower).strip()
+    return query_lower
+# ========================================
+# 📊 TABLE EXTRACTION
+# ========================================
+def is_table_of_contents_header(df, page_num):
+    """Detect TOC by checking first row for keywords."""
+    if len(df) == 0 or page_num > 15:
+        return False
+    # Check first row (headers)
+    first_row = ' '.join(df.iloc[0].astype(str)).lower()
+    # TOC keywords in your images
+    toc_keywords = ['section', 'subsection', 'description', 'page no', 'page number', 'contents']
+    # If at least 2 keywords match, it's TOC
+    keyword_count = sum(1 for keyword in toc_keywords if keyword in first_row)
+    return keyword_count >= 2
+def looks_like_toc_data(df):
+    """Check if table data looks like TOC (section numbers + page numbers)."""
+    if len(df) < 2 or len(df.columns) < 2:
+        return False
+    # Check last column: should be mostly page numbers (182-246 range in your case)
+    last_col = df.iloc[1:, -1].astype(str)  # Skip header row
+    numeric_count = sum(val.strip().isdigit() and 50 < int(val.strip()) < 300
+                       for val in last_col if val.strip().isdigit())
+    if len(last_col) > 0 and numeric_count / len(last_col) > 0.7:
+        # Check first column: should have section numbers like "10.1", "10.2"
+        first_col = df.iloc[1:, 0].astype(str)
+        section_pattern = sum(1 for val in first_col
+                            if re.match(r'^\d+\.?\d*$', val.strip()))
+        if section_pattern / len(first_col) > 0.5:
+            return True
+    return False
+def extract_tables_from_pdf(pdf_path: str) -> List[Document]:
+    """Extract bordered tables with smart TOC detection."""
+    chunks = []
+    try:
+        lattice_tables = camelot.read_pdf(
+            pdf_path,
+            pages='all',
+            flavor='lattice',  # Only bordered tables
+            suppress_stdout=True
+        )
+        all_tables = list(lattice_tables)
+        seen_tables = set()
+        # Track TOC state
+        in_toc_section = False
+        toc_start_page = None
+        print(f"   📊 Found {len(all_tables)} bordered tables")
+        for table in all_tables:
+            df = table.df
+            current_page = table.page
+            # Unique ID
+            table_id = (current_page, tuple(df.iloc[0].tolist()) if len(df) > 0 else ())
+            if table_id in seen_tables:
+                continue
+            seen_tables.add(table_id)
+            # Skip first 5 pages (title pages)
+            if current_page <= 5:
+                continue
+            # Basic validation
+            if len(df.columns) < 2 or len(df) < 3 or table.accuracy < 80:
+                continue
+            # ✅ Detect TOC start (page with header row)
+            if not in_toc_section and is_table_of_contents_header(df, current_page):
+                in_toc_section = True
+                toc_start_page = current_page
+                print(f"   🔍 TOC detected at page {current_page}")
+                continue
+            # ✅ If we're in TOC section, check if this continues the pattern
+            if in_toc_section:
+                if looks_like_toc_data(df):
+                    print(f"   ⏭️  Skipping TOC continuation on page {current_page}")
+                    continue
+                else:
+                    # TOC ended, resume normal extraction
+                    print(f"   ✅ TOC ended, found real table on page {current_page}")
+                    in_toc_section = False
+            # Extract valid table
+            table_text = table_to_natural_language_enhanced(table)
+            if table_text.strip():
+                chunks.append(Document(
+                    page_content=table_text,
+                    metadata={
+                        "source": os.path.basename(pdf_path),
+                        "page": current_page,
+                        "heading": "Table Data",
+                        "type": "table",
+                        "table_accuracy": table.accuracy
+                    }
+                ))
+        print(f"   ✅ Extracted {len(chunks)} valid tables (after TOC filtering)")
+    except Exception as e:
+        print(f"⚠️  Table extraction failed: {e}")
+    finally:
+        try:
+            del lattice_tables
+            del all_tables
+            gc.collect()
+            time.sleep(0.1)
+        except:
+            pass
+    return chunks
+def table_to_natural_language_enhanced(table) -> str:
+    """Enhanced table-to-natural-language conversion."""
+    df = table.df
+    if len(df) < 2:
+        return ""
+    headers = [str(h).strip() for h in df.iloc[0].astype(str).tolist()]
+    headers = [h if h and h.lower() not in ['', 'nan', 'none'] else f"Column_{i}"
+               for i, h in enumerate(headers)]
+    descriptions = []
+    for idx in range(1, len(df)):
+        row = [str(cell).strip() for cell in df.iloc[idx].astype(str).tolist()]
+        if not any(cell and cell.lower() not in ['', 'nan', 'none'] for cell in row):
+            continue
+        if len(row) > 0 and row[0] and row[0].lower() not in ['', 'nan', 'none']:
+            sentence_parts = []
+            for i in range(1, min(len(row), len(headers))):
+                if row[i] and row[i].lower() not in ['', 'nan', 'none']:
+                    sentence_parts.append(f"{headers[i]}: {row[i]}")
+            if sentence_parts:
+                descriptions.append(f"{row[0]} has {', '.join(sentence_parts)}.")
+            else:
+                descriptions.append(f"{row[0]}.")
+    return "\n".join(descriptions)
+def extract_tables_with_ocr(pdf_path: str, page_num: int) -> List[Dict]:
+    """OCR fallback for image-based PDFs."""
+    try:
+        images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
+        if not images:
+            return []
+        ocr_text = pytesseract.image_to_string(images[0])
+        lines = ocr_text.split('\n')
+        table_lines = []
+        for line in lines:
+            if re.search(r'\s{2,}', line) or '\t' in line:
+                table_lines.append(line)
+        if len(table_lines) > 2:
+            return [{
+                "text": "\n".join(table_lines),
+                "page": page_num,
+                "method": "ocr"
+            }]
+        return []
+    except Exception as e:
+        return []
+def get_table_regions(pdf_path: str) -> Dict[int, List[tuple]]:
+    """Get bounding boxes using BOTH lattice and stream methods."""
+    table_regions = {}
+    try:
+        lattice_tables = camelot.read_pdf(pdf_path, pages='all', flavor='lattice', suppress_stdout=True)
+        stream_tables = camelot.read_pdf(pdf_path, pages='all', flavor='stream', suppress_stdout=True)
+        all_tables = list(lattice_tables) + list(stream_tables)
+        for table in all_tables:
+            page = table.page
+            if is_table_of_contents_header(table.df, page):
+                continue
+            bbox = table._bbox
+            if page not in table_regions:
+                table_regions[page] = []
+            if bbox not in table_regions[page]:
+                table_regions[page].append(bbox)
+    except Exception as e:
+        pass
+    return table_regions
+# ========================================
+# 🖼️ IMAGE EXTRACTION
+# ========================================
+def extract_images_from_pdf(pdf_path: str, output_dir: str) -> List[Dict]:
+    """Extract images from PDF."""
+    doc = fitz.open(pdf_path)
+    image_data = []
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        images = page.get_images()
+        for img_index, img in enumerate(images):
+            try:
+                xref = img[0]
+                base_image = doc.extract_image(xref)
+                image_bytes = base_image["image"]
+                if len(image_bytes) < 10000:
+                    continue
+                image_filename = f"{Path(pdf_path).stem}_p{page_num+1}_img{img_index+1}.png"
+                image_path = os.path.join(output_dir, image_filename)
+                with open(image_path, "wb") as img_file:
+                    img_file.write(image_bytes)
+                image_data.append({
+                    "path": image_path,
+                    "page": page_num + 1,
+                    "source": os.path.basename(pdf_path),
+                    "type": "image"
+                })
+            except Exception as e:
+                continue
+    doc.close()
+    return image_data
+# ========================================
+# 📄 TEXT EXTRACTION WITH OVERLAPPING CHUNKS
+# ========================================
+def is_bold_text(span):
+    return "bold" in span['font'].lower() or (span['flags'] & 2**4)
+def is_likely_heading(text, font_size, is_bold, avg_font_size):
+    if not is_bold:
+        return False
+    text = text.strip()
+    if len(text) > 100 or len(text) < 3:
+        return False
+    if font_size > avg_font_size * 1.1:
+        return True
+    if text.isupper() or re.match(r'^\d+\.?\d*\s+[A-Z]', text):
+        return True
+    return False
+def is_inside_table(block_bbox, table_bboxes):
+    """Check if text block overlaps with table region."""
+    bx1, by1, bx2, by2 = block_bbox
+    for table_bbox in table_bboxes:
+        tx1, ty1, tx2, ty2 = table_bbox
+        if not (bx2 < tx1 or bx1 > tx2 or by2 < ty1 or by1 > ty2):
+            return True
+    return False
+def split_text_with_overlap(text: str, heading: str, source: str, page: int,
+                           chunk_size: int = CHUNK_SIZE, overlap: int = OVERLAP) -> List[Document]:
+    """Split text with overlap and heading context."""
+    words = text.split()
+    if len(words) <= chunk_size:
+        # ✅ ADD HEADING CONTEXT
+        content_with_context = f"Section: {heading}\n\n{text}"
+        return [Document(
+            page_content=content_with_context,
+            metadata={
+                "source": source,
+                "page": page,
+                "heading": heading,
+                "type": "text",
+                "parent_text": text,
+                "chunk_index": 0,
+                "total_chunks": 1
+            }
+        )]
+    chunks = []
+    chunk_index = 0
+    for i in range(0, len(words), chunk_size - overlap):
+        chunk_words = words[i:i + chunk_size]
+        if len(chunk_words) < MIN_CHUNK_SIZE and len(chunks) > 0:
+            break
+        chunk_text = " ".join(chunk_words)
+        # ✅ ADD HEADING CONTEXT TO EACH CHUNK
+        content_with_context = f"Section: {heading}\n\n{chunk_text}"
+        chunks.append(Document(
+            page_content=content_with_context,
+            metadata={
+                "source": source,
+                "page": page,
+                "heading": heading,
+                "type": "text",
+                "parent_text": text,
+                "chunk_index": chunk_index,
+                "start_word": i,
+                "end_word": i + len(chunk_words)
+            }
+        ))
+        chunk_index += 1
+    for chunk in chunks:
+        chunk.metadata["total_chunks"] = len(chunks)
+    return chunks
+def extract_text_chunks_with_overlap(pdf_path: str, table_regions: Dict[int, List[tuple]]) -> List[Document]:
+    """Extract text with overlapping chunks."""
+    doc = fitz.open(pdf_path)
+    all_font_sizes = []
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        blocks = page.get_text("dict")["blocks"]
+        for block in blocks:
+            if "lines" in block:
+                for line in block["lines"]:
+                    for span in line["spans"]:
+                        all_font_sizes.append(span["size"])
+    avg_font_size = sum(all_font_sizes) / len(all_font_sizes) if all_font_sizes else 12
+    sections = []
+    current_section = ""
+    current_heading = "Introduction"
+    current_page = 1
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        blocks = page.get_text("dict")["blocks"]
+        page_tables = table_regions.get(page_num + 1, [])
+        for block in blocks:
+            if "lines" not in block:
+                continue
+            block_bbox = block.get("bbox", (0, 0, 0, 0))
+            if is_inside_table(block_bbox, page_tables):
+                continue
+            for line in block["lines"]:
+                line_text = ""
+                line_is_bold = False
+                line_font_size = 0
+                for span in line["spans"]:
+                    line_text += span["text"]
+                    if is_bold_text(span):
+                        line_is_bold = True
+                    line_font_size = max(line_font_size, span["size"])
+                line_text = line_text.strip()
+                if not line_text:
+                    continue
+                if is_likely_heading(line_text, line_font_size, line_is_bold, avg_font_size):
+                    if current_section.strip():
+                        sections.append({
+                            "text": current_section.strip(),
+                            "heading": current_heading,
+                            "page": current_page,
+                            "source": os.path.basename(pdf_path)
+                        })
+                    current_heading = line_text
+                    current_section = ""
+                    current_page = page_num + 1
+                else:
+                    current_section += line_text + " "
+    if current_section.strip():
+        sections.append({
+            "text": current_section.strip(),
+            "heading": current_heading,
+            "page": current_page,
+            "source": os.path.basename(pdf_path)
+        })
+    doc.close()
+    all_chunks = []
+    for section in sections:
+        chunks = split_text_with_overlap(
+            text=section['text'],
+            heading=section['heading'],
+            source=section['source'],
+            page=section['page'],
+            chunk_size=CHUNK_SIZE,
+            overlap=OVERLAP
+        )
+        all_chunks.extend(chunks)
+    return all_chunks
+# ========================================
+# 🔄 COMBINED EXTRACTION
+# ========================================
+def extract_all_content_from_pdf(pdf_path: str) -> Tuple[List[Document], List[Dict]]:
+    """Extract text, tables, and images."""
+    print(f"   📊 Extracting tables...")
+    table_regions = get_table_regions(pdf_path)
+    table_chunks = extract_tables_from_pdf(pdf_path)
+    print(f"      ✅ {len(table_chunks)} table chunks")
+    print(f"   📄 Extracting text...")
+    text_chunks = extract_text_chunks_with_overlap(pdf_path, table_regions)
+    print(f"      ✅ {len(text_chunks)} text chunks")
+    print(f"   🖼️  Extracting images...")
+    images = extract_images_from_pdf(pdf_path, IMAGE_OUTPUT_DIR)
+    print(f"      ✅ {len(images)} images")
+    all_chunks = text_chunks + table_chunks
+    return all_chunks, images
+# ========================================
+# 🏗️ BUILD FAISS INDEX WITH STREAMING
+# ========================================
+# Replace the HybridRetriever class and related functions with this optimized version:
+# ========================================
+# 🏗️ BUILD FAISS INDEX WITH BM25
+# ========================================
+def build_multimodal_faiss_streaming(pdf_files: List[str], embedding_model: VLM2VecEmbeddings):
+    """Build FAISS index with streaming and BM25."""
+    index_hash_file = f"{FAISS_INDEX_PATH}/index_hash.txt"
+    current_hash = hashlib.md5("".join(sorted(pdf_files)).encode()).hexdigest()
+    if os.path.exists(index_hash_file):
+        with open(index_hash_file, 'r') as f:
+            existing_hash = f.read().strip()
+        if existing_hash == current_hash:
+            print("⚠️  Index already exists for these PDFs!")
+            response = input("   Rebuild anyway? (yes/no): ").strip().lower()
+            if response != 'yes':
+                return None, []
+    all_texts = []
+    all_image_paths = []
+    print("\n📄 Processing PDFs...\n")
+    for pdf_file in pdf_files:
+        print(f"📖 Processing: {Path(pdf_file).name}")
+        try:
+            text_chunks, images = extract_all_content_from_pdf(pdf_file)
+            all_texts.extend(text_chunks)
+            all_image_paths.extend(images)
+        except Exception as e:
+            print(f"   ❌ Error: {e}")
+            continue
+        print()
+    print(f"✅ Total chunks: {len(all_texts)}")
+    print(f"✅ Total images: {len(all_image_paths)}\n")
+    if len(all_texts) == 0:
+        print("❌ No content extracted!")
+        return None, []
+    # Build text index
+    print("🔗 Generating text embeddings...\n")
+    text_index = None
+    batch_size = 10
+    for i in range(0, len(all_texts), batch_size):
+        batch = all_texts[i:i+batch_size]
+        batch_contents = [doc.page_content for doc in batch]
+        try:
+            batch_embeddings = embedding_model.embed_documents(batch_contents, add_instruction=True)
+            batch_embeddings_np = np.array(batch_embeddings).astype('float32')
+            if text_index is None:
+                dimension = batch_embeddings_np.shape[1]
+                text_index = faiss.IndexFlatIP(dimension)
+                print(f"   Text embedding dimension: {dimension}")
+            faiss.normalize_L2(batch_embeddings_np)
+            text_index.add(batch_embeddings_np)
+            if (i // batch_size + 1) % 5 == 0:
+                print(f"   Progress: {i + len(batch)}/{len(all_texts)}")
+        except Exception as e:
+            print(f"   ❌ Error: {e}")
+            raise
+    print(f"   ✅ Complete")
+    # Save FAISS index
+    faiss.write_index(text_index, f"{FAISS_INDEX_PATH}/text_index.faiss")
+    # Save documents
+    with open(f"{FAISS_INDEX_PATH}/text_documents.pkl", "wb") as f:
+        pickle.dump(all_texts, f)
+    # ✅ BUILD AND SAVE BM25 INDEX
+    print("\n🔍 Building BM25 index for keyword search...")
+    tokenized_docs = [doc.page_content.lower().split() for doc in all_texts]
+    bm25_index = BM25Okapi(tokenized_docs,k1=1.3, b=0.65)
+    with open(f"{FAISS_INDEX_PATH}/bm25_index.pkl", "wb") as f:
+        pickle.dump(bm25_index, f)
+    print("   ✅ BM25 index saved")
+    # Build image index
+    if len(all_image_paths) > 0:
+        print(f"\n🖼️  Embedding images...")
+        image_index = None
+        successful_images = []
+        for idx, img_data in enumerate(all_image_paths):
+            img_embedding = embedding_model.embed_image(img_data["path"])
+            if img_embedding is None:
+                continue
+            img_embedding_np = np.array([img_embedding]).astype('float32')
+            if image_index is None:
+                dimension = img_embedding_np.shape[1]
+                image_index = faiss.IndexFlatIP(dimension)
+                print(f"   Image dimension: {dimension}")
+            faiss.normalize_L2(img_embedding_np)
+            image_index.add(img_embedding_np)
+            successful_images.append(img_data)
+            if (len(successful_images)) % 10 == 0:
+                print(f"   Progress: {len(successful_images)}/{len(all_image_paths)}")
+        print(f"   ✅ {len(successful_images)} images embedded")
+        if image_index is not None and len(successful_images) > 0:
+            faiss.write_index(image_index, f"{FAISS_INDEX_PATH}/image_index.faiss")
+            with open(f"{FAISS_INDEX_PATH}/image_documents.pkl", "wb") as f:
+                pickle.dump(successful_images, f)
+    # Save hash
+    with open(index_hash_file, 'w') as f:
+        f.write(current_hash)
+    print(f"\n✅ Index saved: {FAISS_INDEX_PATH}\n")
+    return text_index, all_texts
+# ========================================
+# 🔍 OPTIMIZED HYBRID SEARCH
+# ========================================
+# ========================================
+# 📊 QUERY WITH BM25 ONLY
+# ========================================
+def query_with_bm25(query: str, k_text: int = 5, k_images: int = 3):
+    """Query using BM25 keyword search only."""
+    # ✅ PREPROCESS QUERY
+    processed_query = preprocess_query(query)
+    print(f"   🔍 Processed: {processed_query}")
+    # Load documents
+    with open(f"{FAISS_INDEX_PATH}/text_documents.pkl", "rb") as f:
+        text_docs = pickle.load(f)
+    # ✅ LOAD BM25 INDEX
+    try:
+        with open(f"{FAISS_INDEX_PATH}/bm25_index.pkl", "rb") as f:
+            bm25_index = pickle.load(f)
+    except FileNotFoundError:
+        print("   ⚠️  BM25 index not found, building on-the-fly...")
+        tokenized_docs = [doc.page_content.lower().split() for doc in text_docs]
+        bm25_index = BM25Okapi(tokenized_docs)
+    # BM25 SEARCH ONLY
+    tokenized_query = processed_query.lower().split()
+    bm25_scores = bm25_index.get_scores(tokenized_query)
+    # Get top k results
+    top_indices = np.argsort(bm25_scores)[::-1][:k_text]
+    text_results = []
+    relevant_pages = set()
+    for rank, idx in enumerate(top_indices, 1):
+        doc = text_docs[idx]
+        score = float(bm25_scores[idx])
+        text_results.append({
+            "document": doc,
+            "score": score,
+            "rank": rank,
+            "type": doc.metadata.get('type', 'text')
+        })
+        relevant_pages.add((doc.metadata.get('source'), doc.metadata.get('page')))
+    # Get images from relevant pages (not semantic search)
+    relevant_images = []
+    try:
+        image_docs_path = f"{FAISS_INDEX_PATH}/image_documents.pkl"
+        if os.path.exists(image_docs_path):
+            with open(image_docs_path, "rb") as f:
+                image_docs = pickle.load(f)
+            # Get images from same pages as top text results
+            for img_doc in image_docs:
+                img_page = (img_doc['source'], img_doc['page'])
+                if img_page in relevant_pages and len(relevant_images) < k_images:
+                    relevant_images.append({
+                        "path": img_doc['path'],
+                        "source": img_doc['source'],
+                        "page": img_doc['page'],
+                        "type": "image",
+                        "score": 0.0,
+                        "rank": len(relevant_images) + 1,
+                        "from_page": True
+                    })
+    except Exception as e:
+        pass
+    return {
+        "text_results": text_results,
+        "images": relevant_images,
+        "query": query,
+        "processed_query": processed_query
+    }
+# ========================================
+# 📊 DISPLAY RESULTS (BM25 ONLY)
+# ========================================
+def display_results_bm25(results: Dict):
+    """Display BM25 results."""
+    print("\n📚 TOP RESULTS (BM25 Keyword Search):\n")
+    for result in results['text_results']:
+        doc = result["document"]
+        print(f"[{result['rank']}] BM25 Score: {result['score']:.4f} | {doc.metadata.get('type', 'N/A')}")
+        print(f"    📄 {doc.metadata.get('source')} - Page {doc.metadata.get('page')}")
+        print(f"    📌 {doc.metadata.get('heading', 'N/A')[:60]}")
+        if 'total_chunks' in doc.metadata and doc.metadata.get('total_chunks', 1) > 1:
+            print(f"    🔗 Chunk {doc.metadata.get('chunk_index', 0)+1}/{doc.metadata.get('total_chunks')}")
+        print(f"    📝 {doc.page_content[:200]}...")
+        print()
+    print("\n🖼️  IMAGES:\n")
+    if results['images']:
+        for img in results['images']:
+            print(f"[{img['rank']}] {img['source']} - Page {img['page']}")
+            print(f"    {img['path']}\n")
+    else:
+        print("   No images found\n")
+# ========================================
+# 🔍 HYBRID SEARCH IMPLEMENTATION
+# ========================================
+def normalize_scores(scores: np.ndarray) -> np.ndarray:
+    """Min-max normalization to 0-1 range."""
+    if len(scores) == 0:
+        return scores
+    min_score = np.min(scores)
+    max_score = np.max(scores)
+    if max_score == min_score:
+        return np.ones_like(scores)
+    return (scores - min_score) / (max_score - min_score)
+def query_with_hybrid(query: str, embedding_model: VLM2VecEmbeddings,
+                      k_text: int = 5, k_images: int = 3,
+                      dense_weight: float = DENSE_WEIGHT,
+                      sparse_weight: float = SPARSE_WEIGHT):
+    """
+    Hybrid search combining semantic (FAISS) and keyword (BM25) retrieval.
+    """
+    processed_query = preprocess_query(query)
+    print(f"   🔍 Processed: {processed_query}")
+    with open(f"{FAISS_INDEX_PATH}/text_documents.pkl", "rb") as f:
+        text_docs = pickle.load(f)
+    # SEMANTIC SEARCH
+    print(f"   🧠 Running semantic search...")
+    try:
+        text_index = faiss.read_index(f"{FAISS_INDEX_PATH}/text_index.faiss")
+        query_embedding = embedding_model.embed_query(processed_query)
+        query_np = np.array([query_embedding]).astype('float32')
+        faiss.normalize_L2(query_np)
+        k_retrieve = min(k_text * 3, len(text_docs))
+        distances, indices = text_index.search(query_np, k_retrieve)
+        semantic_scores = distances[0]
+        semantic_indices = indices[0]
+        print(f"      ✅ Retrieved {len(semantic_indices)} semantic results")
+    except Exception as e:
+        print(f"      ⚠️  Semantic search failed: {e}")
+        semantic_scores = np.array([])
+        semantic_indices = np.array([])
+    # BM25 SEARCH
+    print(f"   🔤 Running BM25 keyword search...")
+    try:
+        with open(f"{FAISS_INDEX_PATH}/bm25_index.pkl", "rb") as f:
+            bm25_index = pickle.load(f)
+    except FileNotFoundError:
+        tokenized_docs = [doc.page_content.lower().split() for doc in text_docs]
+        bm25_index = BM25Okapi(tokenized_docs, k1=1.3, b=0.65)
+    tokenized_query = processed_query.lower().split()
+    bm25_scores_all = bm25_index.get_scores(tokenized_query)
+    print(f"      ✅ Scored {len(bm25_scores_all)} documents")
+    # SCORE FUSION
+    print(f"   ⚖️  Fusing scores (semantic: {dense_weight}, BM25: {sparse_weight})...")
+    combined_scores = {}
+    if len(semantic_scores) > 0:
+        semantic_scores_norm = normalize_scores(semantic_scores)
+        for idx, score in zip(semantic_indices, semantic_scores_norm):
+            if idx < len(text_docs):
+                combined_scores[idx] = dense_weight * score
+    bm25_scores_norm = normalize_scores(bm25_scores_all)
+    for idx, score in enumerate(bm25_scores_norm):
+        if idx in combined_scores:
+            combined_scores[idx] += sparse_weight * score
+        else:
+            combined_scores[idx] = sparse_weight * score
+    sorted_indices = sorted(combined_scores.keys(),
+                           key=lambda x: combined_scores[x],
+                           reverse=True)
+    top_indices = sorted_indices[:k_text]
+    print(f"      ✅ Top {len(top_indices)} results selected")
+    # PREPARE RESULTS
+    text_results = []
+    relevant_pages = set()
+    for rank, idx in enumerate(top_indices, 1):
+        doc = text_docs[idx]
+        semantic_score = semantic_scores_norm[np.where(semantic_indices == idx)[0][0]] if idx in semantic_indices else 0.0
+        bm25_score = bm25_scores_norm[idx]
+        combined_score = combined_scores[idx]
+        text_results.append({
+            "document": doc,
+            "score": combined_score,
+            "semantic_score": float(semantic_score),
+            "bm25_score": float(bm25_score),
+            "rank": rank,
+            "type": doc.metadata.get('type', 'text')
+        })
+        relevant_pages.add((doc.metadata.get('source'), doc.metadata.get('page')))
+    # GET IMAGES
+    relevant_images = []
+    try:
+        image_docs_path = f"{FAISS_INDEX_PATH}/image_documents.pkl"
+        if os.path.exists(image_docs_path):
+            with open(image_docs_path, "rb") as f:
+                image_docs = pickle.load(f)
+            for img_doc in image_docs:
+                img_page = (img_doc['source'], img_doc['page'])
+                if img_page in relevant_pages and len(relevant_images) < k_images:
+                    relevant_images.append({
+                        "path": img_doc['path'],
+                        "source": img_doc['source'],
+                        "page": img_doc['page'],
+                        "type": "image",
+                        "score": 0.0,
+                        "rank": len(relevant_images) + 1,
+                        "from_page": True
+                    })
+    except Exception as e:
+        pass
+    return {
+        "text_results": text_results,
+        "images": relevant_images,
+        "query": query,
+        "processed_query": processed_query,
+        "method": "hybrid"
+    }
+def display_results_hybrid(results: Dict):
+    """Display hybrid search results."""
+    print("\n📚 TOP RESULTS (Hybrid Search: Semantic + BM25):\n")
+    for result in results['text_results']:
+        doc = result["document"]
+        print(f"[{result['rank']}] Combined: {result['score']:.4f} "
+              f"(Semantic: {result['semantic_score']:.4f}, BM25: {result['bm25_score']:.4f}) "
+              f"| {doc.metadata.get('type', 'N/A')}")
+        print(f"    📄 {doc.metadata.get('source')} - Page {doc.metadata.get('page')}")
+        print(f"    📌 {doc.metadata.get('heading', 'N/A')[:60]}")
+        if 'total_chunks' in doc.metadata and doc.metadata.get('total_chunks', 1) > 1:
+            print(f"    🔗 Chunk {doc.metadata.get('chunk_index', 0)+1}/{doc.metadata.get('total_chunks')}")
+        print(f"    📝 {doc.page_content[:200]}...")
+        print()
+    print("\n🖼️  IMAGES:\n")
+    if results['images']:
+        for img in results['images']:
+            print(f"[{img['rank']}] {img['source']} - Page {img['page']}")
+            print(f"    {img['path']}\n")
+    else:
+        print("   No images found\n")
+# ========================================
+# 📖 GET CONTEXT WITH PARENTS
+# ========================================
+def get_context_with_parents(results: Dict) -> List[Dict]:
+    """Extract full parent contexts."""
+    seen_parents = set()
+    contexts = []
+    for result in results['text_results']:
+        doc = result['document']
+        parent = doc.metadata.get('parent_text')
+        if parent and parent not in seen_parents:
+            contexts.append({
+                "text": parent,
+                "source": doc.metadata['source'],
+                "page": doc.metadata['page'],
+                "heading": doc.metadata['heading'],
+                "type": doc.metadata.get('type', 'text'),
+                "is_parent": True
+            })
+            seen_parents.add(parent)
+        elif not parent:
+            contexts.append({
+                "text": doc.page_content,
+                "source": doc.metadata['source'],
+                "page": doc.metadata['page'],
+                "heading": doc.metadata['heading'],
+                "type": doc.metadata.get('type', 'text'),
+                "is_parent": False
+            })
+    return contexts
+# ========================================
+# 🚀 MAIN EXECUTION (UPDATED FOR HYBRID)
+# ========================================
+if __name__ == "__main__":
+    print("="*70)
+    print("🚀 RAG with HYBRID SEARCH (Semantic + BM25)")
+    print("="*70 + "\n")
+    pdf_files = glob.glob(f"{PDF_DIR}/*.pdf")
+    print(f"📂 Found {len(pdf_files)} PDF files\n")
+    if len(pdf_files) == 0:
+        print("❌ No PDFs found!")
+        exit(1)
+    print("\n🤖 Loading VLM2Vec model...")
+    embedding_model = VLM2VecEmbeddings(
+        model_name="TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+        cache_dir=MODEL_CACHE_DIR
+    )
+    # Load or build index
+    if os.path.exists(f"{FAISS_INDEX_PATH}/text_index.faiss"):
+        print(f"✅ Loading existing index\n")
+        if not os.path.exists(f"{FAISS_INDEX_PATH}/bm25_index.pkl"):
+            print("⚠️  BM25 index missing, building now...")
+            with open(f"{FAISS_INDEX_PATH}/text_documents.pkl", "rb") as f:
+                all_texts = pickle.load(f)
+            print("   Building BM25 index...")
+            tokenized_docs = [doc.page_content.lower().split() for doc in all_texts]
+            bm25_index = BM25Okapi(tokenized_docs, k1=1.3, b=0.65)
+            with open(f"{FAISS_INDEX_PATH}/bm25_index.pkl", "wb") as f:
+                pickle.dump(bm25_index, f)
+            print("   ✅ BM25 index saved\n")
+    else:
+        print("🔨 Building new index...\n")
+        embedding_model = VLM2VecEmbeddings(
+            model_name="TIGER-Lab/VLM2Vec-Qwen2VL-2B",
+            cache_dir=MODEL_CACHE_DIR
+        )
+        index, documents = build_multimodal_faiss_streaming(pdf_files, embedding_model)
+        if index is None:
+            exit(0)
+    # Interactive testing
+    print("="*70)
+    print("🧪 TESTING MODE - HYBRID SEARCH")
+    print(f"   Weights: Semantic {DENSE_WEIGHT} | BM25 {SPARSE_WEIGHT}")
+    print("="*70 + "\n")
+    test_queries = [
+        "What is the higher and lower explosive limit of butane?",
+        "What are the precautions taken while handling H2S?",
+        "What are the Personal Protection used for Sulfolane?",
+        "What is the Composition of Platforming Feed and Product?",
+        "Explain Dual function platforming catalyst chemistry.",
+        "Steps to be followed in Amine Regeneration Unit for normal shutdown process.",
+        "Could you tell me what  De-greasing of Amine System in pre startup wash",
+    ]
+    print("📋 SUGGESTED QUERIES:")
+    for i, q in enumerate(test_queries, 1):
+        print(f"   {i}. {q}")
+    print()
+    print("💡 Type 'mode' to switch between hybrid/bm25/semantic")
+    print()
+    current_mode = "hybrid"
+    while True:
+        user_query = input(f"💬 Query [{current_mode}] (or 1-5, 'mode', or 'exit'): ").strip()
+        if user_query.lower() == 'exit':
+            print("\n✅ Done!")
+            break
+        if user_query.lower() == 'mode':
+            print("\n🔄 Select mode:")
+            print("   1. Hybrid (Semantic + BM25)")
+            print("   2. BM25 only")
+            print("   3. Semantic only")
+            mode_choice = input("   Choice (1-3): ").strip()
+            if mode_choice == '1':
+                current_mode = "hybrid"
+            elif mode_choice == '2':
+                current_mode = "bm25"
+            elif mode_choice == '3':
+                current_mode = "semantic"
+            print(f"   ✅ Mode set to: {current_mode}\n")
+            continue
+        if user_query.isdigit() and 1 <= int(user_query) <= len(test_queries):
+            user_query = test_queries[int(user_query) - 1]
+        if not user_query:
+            continue
+        print(f"\n{'='*60}")
+        print(f"🔍 Query: {user_query}")
+        print(f"🔧 Mode: {current_mode.upper()}")
+        print(f"{'='*60}\n")
+        try:
+            if current_mode == "hybrid":
+                results = query_with_hybrid(user_query, embedding_model, k_text=5, k_images=3)
+                display_results_hybrid(results)
+            elif current_mode == "bm25":
+                results = query_with_bm25(user_query, k_text=5, k_images=3)
+                display_results_bm25(results)
+            else:  # semantic only
+                results = query_with_hybrid(user_query, embedding_model, k_text=5, k_images=3,
+                                           dense_weight=1.0, sparse_weight=0.0)
+                display_results_hybrid(results)
+            print("\n📖 FULL CONTEXT:\n")
+            contexts = get_context_with_parents(results)
+            for i, ctx in enumerate(contexts[:3], 1):
+                print(f"[{i}] {ctx['heading'][:50]}")
+                if ctx['is_parent']:
+                    print(f"    ✅ Full section")
+                print(f"    {ctx['text'][:300]}...\n")
+            print("="*60 + "\n")
+        except Exception as e:
+            print(f"\n❌ Error: {e}\n")
+            import traceback
+            traceback.print_exc()