Shouvik599 commited on
Commit
7ae27cd
Β·
1 Parent(s): f1f031f

added feature improvements

Browse files
Files changed (8) hide show
  1. Dockerfile +16 -7
  2. app.py +9 -9
  3. features_to_add.txt +20 -0
  4. frontend/index.html +130 -5
  5. ingest.py +49 -0
  6. rag_chain.py +184 -37
  7. requirements.txt +3 -2
  8. start.sh +8 -5
Dockerfile CHANGED
@@ -1,19 +1,28 @@
1
- # Use an official Python runtime as a parent image
2
  FROM python:3.11-slim
3
 
4
- # Set the working directory in the container
5
  WORKDIR /app
6
 
7
- # Copy the requirements file and install dependencies
8
  COPY requirements.txt .
9
  RUN pip install --no-cache-dir -r requirements.txt
10
 
11
- # Copy the application code
12
- COPY . .
 
 
 
13
 
14
- # Make the start script executable
 
 
 
 
 
 
15
  RUN chmod +x start.sh
16
 
17
  # HF Spaces requires port 7860
18
- # We use the shell script as the entry point
 
19
  CMD ["./start.sh"]
 
 
1
  FROM python:3.11-slim
2
 
3
+ # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install dependencies
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
+ # Create a non-root user for HF compliance
11
+ RUN useradd -m -u 1000 user
12
+ USER user
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
 
16
+ # Set working directory to user's home
17
+ WORKDIR $HOME/app
18
+
19
+ # Copy application code and set ownership to our user
20
+ COPY --chown=user . $HOME/app
21
+
22
+ # Ensure the start script is executable
23
  RUN chmod +x start.sh
24
 
25
  # HF Spaces requires port 7860
26
+ EXPOSE 7860
27
+
28
  CMD ["./start.sh"]
app.py CHANGED
@@ -15,8 +15,9 @@ from fastapi import FastAPI, HTTPException
15
  from fastapi.middleware.cors import CORSMiddleware
16
  from pydantic import BaseModel, Field
17
  from dotenv import load_dotenv
18
- from fastapi.responses import FileResponse
19
  from rag_chain import query_sacred_texts, get_embeddings, get_vector_store # ← FIXED
 
20
 
21
  load_dotenv()
22
 
@@ -91,8 +92,8 @@ def list_books():
91
  raise HTTPException(status_code=500, detail=f"Could not read knowledge base: {e}")
92
 
93
 
94
- @app.post("/ask", response_model=AskResponse, tags=["Query"])
95
- def ask(request: AskRequest):
96
  """
97
  Ask a spiritual or philosophical question.
98
  The answer is grounded strictly in the sacred texts.
@@ -101,11 +102,10 @@ def ask(request: AskRequest):
101
  raise HTTPException(status_code=400, detail="Question cannot be empty.")
102
 
103
  try:
104
- result = query_sacred_texts(request.question)
105
- return AskResponse(
106
- question=request.question,
107
- answer=result["answer"],
108
- sources=[Source(**s) for s in result["sources"]],
109
  )
110
  except FileNotFoundError:
111
  raise HTTPException(
@@ -137,4 +137,4 @@ if __name__ == "__main__":
137
  print(f"🌐 Running at : http://{host}:{port}")
138
  print(f"{'─' * 40}\n")
139
 
140
- uvicorn.run("app:app", host=host, port=port, reload=False) # reload=False for production
 
15
  from fastapi.middleware.cors import CORSMiddleware
16
  from pydantic import BaseModel, Field
17
  from dotenv import load_dotenv
18
+ from fastapi.responses import StreamingResponse, FileResponse
19
  from rag_chain import query_sacred_texts, get_embeddings, get_vector_store # ← FIXED
20
+ from starlette.concurrency import run_in_threadpool
21
 
22
  load_dotenv()
23
 
 
92
  raise HTTPException(status_code=500, detail=f"Could not read knowledge base: {e}")
93
 
94
 
95
+ @app.post("/ask", tags=["Query"])
96
+ async def ask(request: AskRequest):
97
  """
98
  Ask a spiritual or philosophical question.
99
  The answer is grounded strictly in the sacred texts.
 
102
  raise HTTPException(status_code=400, detail="Question cannot be empty.")
103
 
104
  try:
105
+
106
+ return StreamingResponse(
107
+ query_sacred_texts(request.question),
108
+ media_type="application/json"
 
109
  )
110
  except FileNotFoundError:
111
  raise HTTPException(
 
137
  print(f"🌐 Running at : http://{host}:{port}")
138
  print(f"{'─' * 40}\n")
139
 
140
+ uvicorn.run("app:app", host=host, port=port, reload=False) # reload=False for production
features_to_add.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Contextual chunk expansion β€” when a chunk is retrieved, also fetch the surrounding chunks (Β±1) to avoid cut-off verses losing their meaning
2
+ Hypothetical Document Embedding (HyDE) β€” generate a hypothetical ideal answer first, embed that, then search β€” dramatically improves recall for abstract questions
3
+
4
+ Multi-turn conversation β€” add chat history using LangChain ConversationBufferMemory so users can ask follow-up questions like "Elaborate on the second point"
5
+ Answer faithfulness scoring β€” use an LLM-as-judge step to self-check whether the answer is actually grounded in the retrieved chunks before returning it
6
+ Query rewriting β€” if the user query is vague, have the LLM rephrase it into a better search query before retrieval (improves semantic matching)
7
+
8
+ Multi-language support β€” ingest Arabic Quran + Sanskrit Gita alongside English translations; embed both and let users query in their preferred language
9
+ Incremental ingestion β€” track which PDFs have been ingested (via a manifest file) so re-running ingest.py only processes new books, not the whole library
10
+ Book versioning β€” support multiple translations of the same book (e.g. KJV vs NIV Bible) and let users choose
11
+
12
+ Snippet preview on hover β€” show the actual retrieved passage when hovering over a source badge in the UI
13
+ Query suggestions β€” after each answer, suggest 2-3 related follow-up questions
14
+ Topic explorer β€” a sidebar with pre-grouped themes (Death & Afterlife, Compassion, Duty, Prayer) that users can browse
15
+ Compare mode β€” a dedicated side-by-side view for "How does Book A vs Book B address X"
16
+
17
+ Hallucination guardrail β€” run a separate verification pass checking every claim in the answer maps back to a retrieved chunk; flag or remove unsupported claims
18
+ Out-of-scope detection β€” classify queries before retrieval; politely decline non-spiritual questions (e.g. "Write me code") with a prompt-level or classifier-level guard
19
+ Rate limiting β€” add per-IP request throttling in FastAPI to prevent API key exhaustion
20
+ API key security β€” move to server-side key storage properly; never expose NVIDIA_API_KEY or GEMINI_API_KEY in frontend calls
frontend/index.html CHANGED
@@ -41,6 +41,57 @@
41
  /* violet β€” Sikh royal purple */
42
  }
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  html,
45
  body {
46
  height: 100%;
@@ -723,13 +774,69 @@
723
  throw new Error(err.detail || "Server error");
724
  }
725
 
726
- const data = await res.json();
727
- replaceLoadingWithAnswer(loadingEl, data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  } catch (err) {
729
  let msg = err.message;
730
- if (msg.includes("fetch") || msg.includes("NetworkError") || msg.includes("Failed")) {
731
- msg = "Connecting to the divine knowledge base... Please wait a moment or refresh the page.";
732
- }
733
  replaceLoadingWithError(loadingEl, msg);
734
  } finally {
735
  isLoading = false;
@@ -738,6 +845,24 @@
738
  }
739
  }
740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  function askSuggested(btn) {
742
  const input = document.getElementById("questionInput");
743
  input.value = btn.textContent;
 
41
  /* violet β€” Sikh royal purple */
42
  }
43
 
44
+ /* Animated Thinking state for streaming */
45
+ .thinking-dots {
46
+ display: inline-flex;
47
+ gap: 4px;
48
+ margin-left: 4px;
49
+ }
50
+
51
+ .thinking-dots span {
52
+ width: 4px;
53
+ height: 4px;
54
+ background: var(--gold);
55
+ border-radius: 50%;
56
+ animation: bounce 1.4s infinite ease-in-out;
57
+ }
58
+
59
+ @keyframes bounce {
60
+
61
+ 0%,
62
+ 80%,
63
+ 100% {
64
+ transform: scale(0);
65
+ }
66
+
67
+ 40% {
68
+ transform: scale(1);
69
+ }
70
+ }
71
+
72
+ /* Make streaming text fade in slightly for smoothness */
73
+ #currentStreamingMsg p {
74
+ animation: fadeIn 0.3s ease-in;
75
+ }
76
+
77
+ @keyframes fadeIn {
78
+ from {
79
+ opacity: 0.7;
80
+ }
81
+
82
+ to {
83
+ opacity: 1;
84
+ }
85
+ }
86
+
87
+ /* Ensure the bubble has a minimum height so it doesn't look like a "small block" */
88
+ .msg-bubble:empty::before {
89
+ content: "Writing wisdom...";
90
+ color: var(--muted);
91
+ font-style: italic;
92
+ font-size: 0.9rem;
93
+ }
94
+
95
  html,
96
  body {
97
  height: 100%;
 
774
  throw new Error(err.detail || "Server error");
775
  }
776
 
777
+ // Initialize variables to build the UI
778
+ const reader = res.body.getReader();
779
+ const decoder = new TextDecoder();
780
+ let fullAnswer = "";
781
+ let sourcesData = [];
782
+
783
+ // Prepare the assistant UI bubble immediately
784
+ loadingEl.innerHTML = `
785
+ <span class="msg-label">Sacred Texts</span>
786
+ <div class="msg-bubble" id="currentStreamingMsg">
787
+ <div class="loading-text">The scriptures are being revealed<span class="thinking-dots"><span></span><span></span><span></span></span></div>
788
+ </div>
789
+ <div id="currentStreamingSources"></div>
790
+ `;
791
+ const bubble = document.getElementById("currentStreamingMsg");
792
+ const sourcesContainer = document.getElementById("currentStreamingSources");
793
+ let firstTokenReceived = false;
794
+
795
+ while (true) {
796
+ const { done, value } = await reader.read();
797
+ if (done) break;
798
+
799
+ const chunk = decoder.decode(value, { stream: true });
800
+ const lines = chunk.split("\n");
801
+
802
+ for (const line of lines) {
803
+ if (!line.trim()) continue;
804
+ try {
805
+ const parsed = JSON.parse(line);
806
+
807
+ if (parsed.type === "token") {
808
+ //Remove the loading text as soon as the first word arrives
809
+ if (!firstTokenReceived) {
810
+ bubble.innerHTML = "";
811
+ firstTokenReceived = true;
812
+ }
813
+
814
+ fullAnswer += parsed.data;
815
+ // Dynamically update the bubble with formatted markdown/paragraphs
816
+ bubble.innerHTML = formatAnswer(fullAnswer);
817
+ scrollToBottom();
818
+ }
819
+ else if (parsed.type === "sources") {
820
+ sourcesData = parsed.data;
821
+ renderSourcesInPlace(sourcesContainer, sourcesData);
822
+ }
823
+ else if (parsed.type === "cache") {
824
+ bubble.innerHTML = formatAnswer(parsed.data.answer);
825
+ renderSourcesInPlace(sourcesContainer, parsed.data.sources);
826
+ scrollToBottom();
827
+ }
828
+ } catch (e) {
829
+ console.error("Error parsing NDJSON line:", e);
830
+ }
831
+ }
832
+ }
833
+
834
+ // Clean up IDs once done so next messages don't conflict
835
+ bubble.removeAttribute("id");
836
+ sourcesContainer.removeAttribute("id");
837
+
838
  } catch (err) {
839
  let msg = err.message;
 
 
 
840
  replaceLoadingWithError(loadingEl, msg);
841
  } finally {
842
  isLoading = false;
 
845
  }
846
  }
847
 
848
+ // Helper to render sources inside the streaming flow
849
+ function renderSourcesInPlace(container, sources) {
850
+ const sourceTags = (sources || []).map(s => {
851
+ const cls = getSourceClass(s.book);
852
+ // Use verse citations as the primary text
853
+ return `<span class="source-tag ${cls}" title="${s.snippet}">πŸ“– ${s.book}</span>`;
854
+ }).join("");
855
+
856
+ if (sourceTags) {
857
+ container.innerHTML = `
858
+ <div class="sources">
859
+ <div class="sources-label">Citations</div>
860
+ <div class="source-tags">${sourceTags}</div>
861
+ </div>
862
+ `;
863
+ }
864
+ }
865
+
866
  function askSuggested(btn) {
867
  const input = document.getElementById("questionInput");
868
  input.value = btn.textContent;
ingest.py CHANGED
@@ -20,6 +20,7 @@ from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
20
  from langchain_text_splitters import RecursiveCharacterTextSplitter
21
  from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
22
  from langchain_chroma import Chroma
 
23
 
24
  load_dotenv()
25
 
@@ -46,8 +47,45 @@ CHUNK_SIZE = 800 # characters per chunk
46
  CHUNK_OVERLAP = 150 # overlap to preserve verse context across boundaries
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # ─── Helpers ──────────────────────────────────────────────────────────────────
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def detect_book_name(filename: str) -> str:
52
  """Infer the book's display name from its filename."""
53
  name_lower = filename.lower()
@@ -83,6 +121,7 @@ def tag_documents(docs: list, book_name: str, source_file: str) -> list:
83
  """
84
  for doc in docs:
85
  doc.metadata["book"] = book_name
 
86
  doc.metadata["source_file"] = source_file
87
  # Keep the page number if already present from the loader
88
  if "page" not in doc.metadata:
@@ -135,6 +174,16 @@ def ingest():
135
  )
136
  chunks = splitter.split_documents(all_docs)
137
  print(f" β†’ {len(chunks)} chunks created")
 
 
 
 
 
 
 
 
 
 
138
 
139
  # ── Step 3: Embed & store ────────────────────────────────────────────────
140
  print(f"\nπŸ”’ Initialising NVIDIA embedding model (llama-nemotron-embed-vl-1b-v2)...")
 
20
  from langchain_text_splitters import RecursiveCharacterTextSplitter
21
  from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
22
  from langchain_chroma import Chroma
23
+ import re
24
 
25
  load_dotenv()
26
 
 
47
  CHUNK_OVERLAP = 150 # overlap to preserve verse context across boundaries
48
 
49
 
50
+ # Regex patterns for different scriptures
51
+ VERSE_PATTERNS = {
52
+ "Bhagavad Gita": r"(?:Verse\s+)?(\d+\.\d+)", # Matches 2.47 or Verse 2.47
53
+ "Quran": r"(\d+:\d+)", # Matches 2:286
54
+ "Bible": r"(\d+\s+)?[A-Z][a-z]+\s+\d+:\d+", # Matches John 3:16 or 1 Cor 13:4
55
+ "Guru Granth Sahib": r"(?:Ang\s+)?(\d+)" # Matches Ang 1 or 1
56
+ }
57
+
58
+ # Patterns to identify structure in the text
59
+ STRUCTURE_PATTERNS = {
60
+ "Bhagavad Gita": r"(\d+)\.(\d+)", # Matches 2.47 (Chapter.Verse)
61
+ "Quran": r"(\d+):(\d+)", # Matches 2:186 (Surah:Verse)
62
+ "Bible": r"(\d+):(\d+)", # Matches 3:16 (Chapter:Verse)
63
+ "Guru Granth Sahib": r"Ang\s+(\d+)" # Matches Ang 1
64
+ }
65
+
66
  # ─── Helpers ──────────────────────────────────────────────────────────────────
67
 
68
+ def parse_structure(text, book_name):
69
+ pattern = STRUCTURE_PATTERNS.get(book_name)
70
+ if not pattern:
71
+ return {}
72
+
73
+ match = re.search(pattern, text)
74
+ if match:
75
+ if book_name == "Guru Granth Sahib":
76
+ return {"ang": int(match.group(1))}
77
+ return {"chapter": int(match.group(1)), "verse": int(match.group(2))}
78
+ return {}
79
+
80
+ def extract_verse(text: str, book_name: str) -> str:
81
+ """Extracts a verse reference from a text chunk based on the book."""
82
+ pattern = VERSE_PATTERNS.get(book_name)
83
+ if not pattern:
84
+ return "Unknown"
85
+
86
+ match = re.search(pattern, text)
87
+ return match.group(0) if match else "General Context"
88
+
89
  def detect_book_name(filename: str) -> str:
90
  """Infer the book's display name from its filename."""
91
  name_lower = filename.lower()
 
121
  """
122
  for doc in docs:
123
  doc.metadata["book"] = book_name
124
+ doc.metadata["verse_citation"] = extract_verse(doc.page_content, book_name)
125
  doc.metadata["source_file"] = source_file
126
  # Keep the page number if already present from the loader
127
  if "page" not in doc.metadata:
 
174
  )
175
  chunks = splitter.split_documents(all_docs)
176
  print(f" β†’ {len(chunks)} chunks created")
177
+
178
+ # Add verse citations to chunk metadata for better source attribution
179
+ print(f"🏷️ Parsing structure (chapters/verses) for {len(chunks)} chunks...")
180
+ for chunk in chunks:
181
+ # Use the parse_structure function you defined
182
+ structure = parse_structure(chunk.page_content, chunk.metadata["book"])
183
+ # Update the chunk metadata so it is saved in ChromaDB
184
+ chunk.metadata.update(structure)
185
+
186
+ print(f" β†’ {len(chunks)} chunks created and tagged")
187
 
188
  # ── Step 3: Embed & store ────────────────────────────────────────────────
189
  print(f"\nπŸ”’ Initialising NVIDIA embedding model (llama-nemotron-embed-vl-1b-v2)...")
rag_chain.py CHANGED
@@ -19,12 +19,16 @@ Returns a dict with:
19
  """
20
 
21
  import os
 
22
  from dotenv import load_dotenv
23
- from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
24
  from langchain_chroma import Chroma
25
  from langchain_core.prompts import ChatPromptTemplate
26
  from langchain_core.output_parsers import StrOutputParser
 
 
27
  load_dotenv()
 
28
 
29
  NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
30
  CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH", "./chroma_db")
@@ -41,6 +45,8 @@ KNOWN_BOOKS = [
41
  "Guru Granth Sahib",
42
  ]
43
 
 
 
44
 
45
  # ─── System Prompt ────────────────────────────────────────────────────────────
46
 
@@ -51,7 +57,7 @@ STRICT RULES you must ALWAYS follow:
51
  1. Answer ONLY using the provided context passages. Do NOT use any external knowledge.
52
  2. If a specific book's passages are provided but not relevant to the question, skip that book.
53
  3. If NONE of the context is relevant, say: "The provided texts do not directly address this question."
54
- 4. Always cite which book(s) your answer draws from.
55
  5. When the question asks to COMPARE books (e.g. "what do Quran and Gita say"), you MUST
56
  address EACH of those books separately, then synthesise the common thread.
57
  6. Be respectful and neutral toward all faiths β€” treat each text with equal reverence.
@@ -90,6 +96,26 @@ def get_vector_store(embeddings):
90
 
91
  # ─── Per-Book Retrieval ───────────────────────────────────────────────────────
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def retrieve_per_book(question: str, vector_store: Chroma) -> list:
94
  """
95
  Retrieve CHUNKS_PER_BOOK chunks from EACH known book independently,
@@ -97,23 +123,83 @@ def retrieve_per_book(question: str, vector_store: Chroma) -> list:
97
  in the context β€” no book can be crowded out by higher-scoring chunks
98
  from another book.
99
  """
100
- all_docs = []
101
- for book in KNOWN_BOOKS:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  try:
103
- results = vector_store.similarity_search(
104
- query=question,
105
- k=CHUNKS_PER_BOOK,
106
- filter={"book": book}, # ← metadata filter: only this book
107
- )
108
- if results:
109
- print(f" πŸ“– {book}: {len(results)} chunk(s) retrieved")
110
- else:
111
- print(f" ⚠️ {book}: 0 chunks found (not ingested?)")
112
- all_docs.extend(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  except Exception as e:
114
  print(f" ❌ {book}: retrieval error β€” {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- return all_docs
117
 
118
 
119
  # ─── Format Retrieved Docs ────────────────────────────────────────────────────
@@ -135,7 +221,18 @@ def format_docs(docs: list) -> str:
135
  chunks = []
136
  for i, doc in enumerate(book_docs, 1):
137
  page = doc.metadata.get("page", "?")
138
- chunks.append(f" [{i}] (Page {page}): {doc.page_content.strip()}")
 
 
 
 
 
 
 
 
 
 
 
139
  sections.append(header + "\n" + "\n\n".join(chunks))
140
 
141
  return "\n\n".join(sections)
@@ -174,7 +271,7 @@ _llm_chain = None
174
  _vector_store = None
175
 
176
 
177
- def query_sacred_texts(question: str) -> dict:
178
  """
179
  Query the sacred texts knowledge base with guaranteed per-book retrieval.
180
 
@@ -192,38 +289,88 @@ def query_sacred_texts(question: str) -> dict:
192
  if _llm_chain is None:
193
  print("πŸ”§ Initialising RAG chain (first call)...")
194
  _llm_chain, _vector_store = build_chain()
 
 
 
 
 
 
 
195
 
 
 
 
 
 
 
 
 
 
196
  # Step 1: Retrieve per-book (guaranteed slots for every scripture)
197
  print(f"\nπŸ” Retrieving {CHUNKS_PER_BOOK} chunks per book for: '{question}'")
198
  source_docs = retrieve_per_book(question, _vector_store)
199
 
200
  if not source_docs:
201
- return {
202
- "answer": "No content found in the knowledge base. Please run ingest.py first.",
203
- "sources": [],
204
- }
205
-
206
- # Step 2: Format context grouped by book
207
- context = format_docs(source_docs)
208
 
209
- # Step 3: Generate answer
210
- answer = _llm_chain.invoke({"context": context, "question": question})
211
-
212
- # Step 4: Build deduplicated source list for the UI
213
- seen_books = set()
214
  sources = []
215
  for doc in source_docs:
216
  book = doc.metadata.get("book", "Unknown")
217
- page = doc.metadata.get("page", "?")
 
 
 
 
 
 
 
 
 
 
 
218
  snippet = doc.page_content[:200].strip() + "..."
219
- if book not in seen_books:
220
- seen_books.add(book)
221
- sources.append({"book": book, "page": page, "snippet": snippet})
222
-
223
- return {
224
- "answer": answer,
225
- "sources": sources,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  }
 
 
 
 
 
 
 
 
 
 
227
 
228
 
229
  # ─── Quick CLI Test ───────────────────────────────────────────────────────────
 
19
  """
20
 
21
  import os
22
+ from pydoc import doc
23
  from dotenv import load_dotenv
24
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA, NVIDIARerank
25
  from langchain_chroma import Chroma
26
  from langchain_core.prompts import ChatPromptTemplate
27
  from langchain_core.output_parsers import StrOutputParser
28
+ from langchain_community.retrievers import BM25Retriever
29
+ from langchain_classic.retrievers import EnsembleRetriever, ContextualCompressionRetriever
30
  load_dotenv()
31
+ import json
32
 
33
  NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
34
  CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH", "./chroma_db")
 
45
  "Guru Granth Sahib",
46
  ]
47
 
48
+ # Create a separate collection for semantic cache
49
+ CACHE_COLLECTION = "semantic_cache"
50
 
51
  # ─── System Prompt ────────────────────────────────────────────────────────────
52
 
 
57
  1. Answer ONLY using the provided context passages. Do NOT use any external knowledge.
58
  2. If a specific book's passages are provided but not relevant to the question, skip that book.
59
  3. If NONE of the context is relevant, say: "The provided texts do not directly address this question."
60
+ 4. Always explicitly name and cite which book(s) your answer draws from in the text of your answer.
61
  5. When the question asks to COMPARE books (e.g. "what do Quran and Gita say"), you MUST
62
  address EACH of those books separately, then synthesise the common thread.
63
  6. Be respectful and neutral toward all faiths β€” treat each text with equal reverence.
 
96
 
97
  # ─── Per-Book Retrieval ───────────────────────────────────────────────────────
98
 
99
+ def get_reranked_retriever(base_retriever):
100
+ """
101
+ Wraps your Hybrid/Per-Book retriever with a Reranking layer.
102
+ """
103
+ # 1. Initialize the NVIDIA Reranker (NIM or API Catalog)
104
+ # Using nvidia/llama-3.2-nv-rerankqa-1b-v2 or similar
105
+ reranker = NVIDIARerank(
106
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
107
+ api_key=NVIDIA_API_KEY,
108
+ top_n=5 # Only send the top 5 most relevant chunks to the LLM
109
+ )
110
+
111
+ # 2. Wrap the base retriever
112
+ compression_retriever = ContextualCompressionRetriever(
113
+ base_compressor=reranker,
114
+ base_retriever=base_retriever
115
+ )
116
+
117
+ return compression_retriever
118
+
119
  def retrieve_per_book(question: str, vector_store: Chroma) -> list:
120
  """
121
  Retrieve CHUNKS_PER_BOOK chunks from EACH known book independently,
 
123
  in the context β€” no book can be crowded out by higher-scoring chunks
124
  from another book.
125
  """
126
+ all_candidates = []
127
+
128
+ # Detect if user is asking about a specific book
129
+ target_books = []
130
+ question_lower = question.lower()
131
+
132
+ # Check for keywords in the question
133
+ if any(kw in question_lower for kw in ["gita", "bhagavad", "hindu", "hinduism"]):
134
+ target_books.append("Bhagavad Gita")
135
+ if any(kw in question_lower for kw in ["quran", "koran", "islam", "muslim", "muhammad"]):
136
+ target_books.append("Quran")
137
+ if any(kw in question_lower for kw in ["bible", "testament", "christian", "jesus", "christ"]):
138
+ target_books.append("Bible")
139
+ if any(kw in question_lower for kw in ["granth", "guru", "sikh", "sikhism", "nanak"]):
140
+ target_books.append("Guru Granth Sahib")
141
+
142
+ # If no specific book is detected, use all books
143
+ books_to_search = target_books if target_books else KNOWN_BOOKS
144
+
145
+ print(f"🎯 Routing query to: {books_to_search}")
146
+
147
+ for book in books_to_search:
148
  try:
149
+ # Increase k for the base retrieval to 10
150
+ CANDIDATE_COUNT = 10
151
+
152
+ # Get the full collection of documents for this book to build BM25
153
+ # For small demo, we can pull into memory; for larger corpora, consider a more efficient approach
154
+ book_data = vector_store.get(where={"book": book})
155
+ book_docs = []
156
+ from langchain_core.documents import Document
157
+ book_docs = [Document(page_content=d, metadata=m)
158
+ for d, m in zip(book_data["documents"], book_data["metadatas"])]
159
+ if not book_docs:
160
+ continue
161
+
162
+
163
+ # Setup BM25
164
+ bm25_retriever = BM25Retriever.from_documents(book_docs)
165
+ bm25_retriever.k = CANDIDATE_COUNT
166
+
167
+
168
+ # Setup vector retriever
169
+ vector_retriever = vector_store.as_retriever(search_kwargs={"k": CANDIDATE_COUNT, "filter": {"book": book}})
170
+
171
+
172
+ # Combine into ensemble retriever
173
+ ensemble_retriver = EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever], weights=[0.5, 0.5])
174
+
175
+ # Colect candidates without reranking yet
176
+ book_candidates = ensemble_retriver.invoke(question)
177
+ all_candidates.extend(book_candidates)
178
+ print(f" πŸ“¦ {book}: Found {len(book_candidates)} candidates")
179
+
180
  except Exception as e:
181
  print(f" ❌ {book}: retrieval error β€” {e}")
182
+
183
+
184
+ # Rerank the entire pool at once
185
+ if not all_candidates:
186
+ return []
187
+
188
+ print(f"πŸš€ Reranking {len(all_candidates)} total candidates...")
189
+ reranker = NVIDIARerank(
190
+ model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
191
+ api_key=NVIDIA_API_KEY,
192
+ top_n=5 # Final count for LLM context
193
+ )
194
+
195
+ # Use the reranker directly to compress the full list
196
+ final_docs = reranker.compress_documents(all_candidates, question)
197
+
198
+ for i, doc in enumerate(final_docs):
199
+ score = doc.metadata.get("relevance_score", "N/A")
200
+ print(f"Rank {i+1} [{doc.metadata['book']}]: Score {score}")
201
 
202
+ return final_docs
203
 
204
 
205
  # ─── Format Retrieved Docs ────────────────────────────────────────────────────
 
221
  chunks = []
222
  for i, doc in enumerate(book_docs, 1):
223
  page = doc.metadata.get("page", "?")
224
+ ch = doc.metadata.get("chapter")
225
+ vs = doc.metadata.get("verse")
226
+ ang = doc.metadata.get("ang")
227
+
228
+ # Create a clean citation string
229
+ if ang:
230
+ citation = f"Ang {ang}"
231
+ elif ch and vs:
232
+ citation = f"{ch}:{vs}"
233
+ else:
234
+ citation = f"Page {doc.metadata.get('page', '?')}"
235
+ chunks.append(f" [{i}] ({citation}): {doc.page_content.strip()}")
236
  sections.append(header + "\n" + "\n\n".join(chunks))
237
 
238
  return "\n\n".join(sections)
 
271
  _vector_store = None
272
 
273
 
274
+ def query_sacred_texts(question: str):
275
  """
276
  Query the sacred texts knowledge base with guaranteed per-book retrieval.
277
 
 
289
  if _llm_chain is None:
290
  print("πŸ”§ Initialising RAG chain (first call)...")
291
  _llm_chain, _vector_store = build_chain()
292
+
293
+ # --- Semantic cache check ---
294
+ cache_coll = _vector_store._client.get_or_create_collection(CACHE_COLLECTION)
295
+ cache_results = cache_coll.query(
296
+ query_texts=[question],
297
+ n_results=1
298
+ )
299
 
300
+ THRESHOLD = 0.35
301
+ # FIXED: Added check for cache_results['ids'] and ensuring distances is not empty
302
+ if cache_results['ids'] and cache_results['ids'][0]:
303
+ distance = cache_results['distances'][0][0]
304
+ if distance < THRESHOLD: # Similarity threshold
305
+ print(f"⚑️ Semantic Cache Hit! (Distance: {distance:.4f})")
306
+ yield json.dumps({"type": "cache","data": json.loads(cache_results['metadatas'][0][0]['response_json'])}) + "\n"
307
+ return
308
+
309
  # Step 1: Retrieve per-book (guaranteed slots for every scripture)
310
  print(f"\nπŸ” Retrieving {CHUNKS_PER_BOOK} chunks per book for: '{question}'")
311
  source_docs = retrieve_per_book(question, _vector_store)
312
 
313
  if not source_docs:
314
+ yield json.dumps({"type": "token", "data": "No content found in the knowledge base."}) + "\n"
315
+ return
 
 
 
 
 
316
 
317
+ # 3. Step 2: Format sources for the UI immediately
318
+ seen_sources = set()
 
 
 
319
  sources = []
320
  for doc in source_docs:
321
  book = doc.metadata.get("book", "Unknown")
322
+ ch = doc.metadata.get("chapter")
323
+ vs = doc.metadata.get("verse")
324
+ ang = doc.metadata.get("ang")
325
+
326
+ if ang:
327
+ cite_val = f"Ang {ang}"
328
+ elif ch and vs:
329
+ cite_val = f"{ch}:{vs}"
330
+ else:
331
+ cite_val = f"p. {doc.metadata.get('page', '?')}"
332
+
333
+ display_name = f"{book} {cite_val}"
334
  snippet = doc.page_content[:200].strip() + "..."
335
+ if display_name not in seen_sources:
336
+ seen_sources.add(display_name)
337
+ sources.append({"book": display_name, "page": cite_val, "snippet": snippet})
338
+ # Step 2: Format context grouped by book
339
+ context = format_docs(source_docs)
340
+ full_answer =""
341
+
342
+ # Step 3: Stream from the chain:
343
+ for chunk in _llm_chain.invoke({"context": context, "question": question}):
344
+ full_answer += chunk
345
+ yield json.dumps({"type": "token", "data": chunk}) + "\n" # Stream the answer as it's generated
346
+
347
+
348
+ # Filter sources to only those the LLM actually referenced
349
+ final_sources = []
350
+ ansnwer_lower = full_answer.lower()
351
+
352
+ for s in sources:
353
+ if s["book"].lower() in ansnwer_lower:
354
+ final_sources.append(s)
355
+
356
+ # If the LLM didn't explicitly reference any sources, we can optionally include all retrieved ones or none
357
+ display_sources = final_sources if final_sources else []
358
+
359
+ # Step 4: After streaming is done, save to semantic cache for future similar queries
360
+ result = {
361
+ "answer": full_answer,
362
+ "sources": display_sources,
363
  }
364
+
365
+ cache_coll.add(
366
+ documents=[question],
367
+ metadatas=[{"response_json": json.dumps(result)}],
368
+ ids=[question]
369
+ )
370
+
371
+ # Send sources as a final message after the answer is fully streamed
372
+ yield json.dumps({"type": "sources", "data": sources}) + "\n"
373
+
374
 
375
 
376
  # ─── Quick CLI Test ───────────────────────────────────────────────────────────
requirements.txt CHANGED
@@ -3,8 +3,9 @@ langchain
3
  langchain-community
4
  langchain-chroma
5
  langchain-nvidia-ai-endpoints
6
- langchain-text-splitters
7
-
 
8
  # Vector Store
9
  chromadb
10
 
 
3
  langchain-community
4
  langchain-chroma
5
  langchain-nvidia-ai-endpoints
6
+ langchain-text-splitters
7
+ langchain-core
8
+ rank_bm25
9
  # Vector Store
10
  chromadb
11
 
start.sh CHANGED
@@ -1,13 +1,16 @@
1
  #!/bin/bash
2
 
3
- # Check if the ChromaDB directory already exists
4
- if [ ! -d "/code/chroma_db" ]; then
 
 
5
  echo "πŸ“¦ ChromaDB not found. Starting ingestion..."
6
  python ingest.py
7
  else
8
  echo "βœ… ChromaDB found. Skipping ingestion."
9
  fi
10
 
11
- # Start the FastAPI application
12
- echo "πŸš€ Starting FastAPI server..."
13
- uvicorn app:app --host 0.0.0.0 --port 7860
 
 
1
  #!/bin/bash
2
 
3
+ # Use the absolute path relative to the app directory
4
+ CHROMA_PATH="./chroma_db"
5
+
6
+ if [ ! -d "$CHROMA_PATH" ]; then
7
  echo "πŸ“¦ ChromaDB not found. Starting ingestion..."
8
  python ingest.py
9
  else
10
  echo "βœ… ChromaDB found. Skipping ingestion."
11
  fi
12
 
13
+ echo "πŸš€ Starting FastAPI server with concurrency..."
14
+ # --workers 2 allows two simultaneous processes
15
+ # --timeout-keep-alive is increased for slow LLM responses
16
+ exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 2 --timeout-keep-alive 60