Shouvik599 commited on
Commit Β·
7ae27cd
1
Parent(s): f1f031f
added feature improvements
Browse files- Dockerfile +16 -7
- app.py +9 -9
- features_to_add.txt +20 -0
- frontend/index.html +130 -5
- ingest.py +49 -0
- rag_chain.py +184 -37
- requirements.txt +3 -2
- start.sh +8 -5
Dockerfile
CHANGED
|
@@ -1,19 +1,28 @@
|
|
| 1 |
-
# Use an official Python runtime as a parent image
|
| 2 |
FROM python:3.11-slim
|
| 3 |
|
| 4 |
-
# Set
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
-
#
|
| 8 |
COPY requirements.txt .
|
| 9 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
RUN chmod +x start.sh
|
| 16 |
|
| 17 |
# HF Spaces requires port 7860
|
| 18 |
-
|
|
|
|
| 19 |
CMD ["./start.sh"]
|
|
|
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
# Set working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
+
# Install dependencies
|
| 7 |
COPY requirements.txt .
|
| 8 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
|
| 10 |
+
# Create a non-root user for HF compliance
|
| 11 |
+
RUN useradd -m -u 1000 user
|
| 12 |
+
USER user
|
| 13 |
+
ENV HOME=/home/user \
|
| 14 |
+
PATH=/home/user/.local/bin:$PATH
|
| 15 |
|
| 16 |
+
# Set working directory to user's home
|
| 17 |
+
WORKDIR $HOME/app
|
| 18 |
+
|
| 19 |
+
# Copy application code and set ownership to our user
|
| 20 |
+
COPY --chown=user . $HOME/app
|
| 21 |
+
|
| 22 |
+
# Ensure the start script is executable
|
| 23 |
RUN chmod +x start.sh
|
| 24 |
|
| 25 |
# HF Spaces requires port 7860
|
| 26 |
+
EXPOSE 7860
|
| 27 |
+
|
| 28 |
CMD ["./start.sh"]
|
app.py
CHANGED
|
@@ -15,8 +15,9 @@ from fastapi import FastAPI, HTTPException
|
|
| 15 |
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
from pydantic import BaseModel, Field
|
| 17 |
from dotenv import load_dotenv
|
| 18 |
-
from fastapi.responses import FileResponse
|
| 19 |
from rag_chain import query_sacred_texts, get_embeddings, get_vector_store # β FIXED
|
|
|
|
| 20 |
|
| 21 |
load_dotenv()
|
| 22 |
|
|
@@ -91,8 +92,8 @@ def list_books():
|
|
| 91 |
raise HTTPException(status_code=500, detail=f"Could not read knowledge base: {e}")
|
| 92 |
|
| 93 |
|
| 94 |
-
@app.post("/ask",
|
| 95 |
-
def ask(request: AskRequest):
|
| 96 |
"""
|
| 97 |
Ask a spiritual or philosophical question.
|
| 98 |
The answer is grounded strictly in the sacred texts.
|
|
@@ -101,11 +102,10 @@ def ask(request: AskRequest):
|
|
| 101 |
raise HTTPException(status_code=400, detail="Question cannot be empty.")
|
| 102 |
|
| 103 |
try:
|
| 104 |
-
|
| 105 |
-
return
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
sources=[Source(**s) for s in result["sources"]],
|
| 109 |
)
|
| 110 |
except FileNotFoundError:
|
| 111 |
raise HTTPException(
|
|
@@ -137,4 +137,4 @@ if __name__ == "__main__":
|
|
| 137 |
print(f"π Running at : http://{host}:{port}")
|
| 138 |
print(f"{'β' * 40}\n")
|
| 139 |
|
| 140 |
-
uvicorn.run("app:app", host=host, port=port, reload=False) # reload=False for production
|
|
|
|
| 15 |
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
from pydantic import BaseModel, Field
|
| 17 |
from dotenv import load_dotenv
|
| 18 |
+
from fastapi.responses import StreamingResponse, FileResponse
|
| 19 |
from rag_chain import query_sacred_texts, get_embeddings, get_vector_store # β FIXED
|
| 20 |
+
from starlette.concurrency import run_in_threadpool
|
| 21 |
|
| 22 |
load_dotenv()
|
| 23 |
|
|
|
|
| 92 |
raise HTTPException(status_code=500, detail=f"Could not read knowledge base: {e}")
|
| 93 |
|
| 94 |
|
| 95 |
+
@app.post("/ask", tags=["Query"])
|
| 96 |
+
async def ask(request: AskRequest):
|
| 97 |
"""
|
| 98 |
Ask a spiritual or philosophical question.
|
| 99 |
The answer is grounded strictly in the sacred texts.
|
|
|
|
| 102 |
raise HTTPException(status_code=400, detail="Question cannot be empty.")
|
| 103 |
|
| 104 |
try:
|
| 105 |
+
|
| 106 |
+
return StreamingResponse(
|
| 107 |
+
query_sacred_texts(request.question),
|
| 108 |
+
media_type="application/json"
|
|
|
|
| 109 |
)
|
| 110 |
except FileNotFoundError:
|
| 111 |
raise HTTPException(
|
|
|
|
| 137 |
print(f"π Running at : http://{host}:{port}")
|
| 138 |
print(f"{'β' * 40}\n")
|
| 139 |
|
| 140 |
+
uvicorn.run("app:app", host=host, port=port, reload=False) # reload=False for production
|
features_to_add.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Contextual chunk expansion β when a chunk is retrieved, also fetch the surrounding chunks (Β±1) to avoid cut-off verses losing their meaning
|
| 2 |
+
Hypothetical Document Embedding (HyDE) β generate a hypothetical ideal answer first, embed that, then search β dramatically improves recall for abstract questions
|
| 3 |
+
|
| 4 |
+
Multi-turn conversation β add chat history using LangChain ConversationBufferMemory so users can ask follow-up questions like "Elaborate on the second point"
|
| 5 |
+
Answer faithfulness scoring β use an LLM-as-judge step to self-check whether the answer is actually grounded in the retrieved chunks before returning it
|
| 6 |
+
Query rewriting β if the user query is vague, have the LLM rephrase it into a better search query before retrieval (improves semantic matching)
|
| 7 |
+
|
| 8 |
+
Multi-language support β ingest Arabic Quran + Sanskrit Gita alongside English translations; embed both and let users query in their preferred language
|
| 9 |
+
Incremental ingestion β track which PDFs have been ingested (via a manifest file) so re-running ingest.py only processes new books, not the whole library
|
| 10 |
+
Book versioning β support multiple translations of the same book (e.g. KJV vs NIV Bible) and let users choose
|
| 11 |
+
|
| 12 |
+
Snippet preview on hover β show the actual retrieved passage when hovering over a source badge in the UI
|
| 13 |
+
Query suggestions β after each answer, suggest 2-3 related follow-up questions
|
| 14 |
+
Topic explorer β a sidebar with pre-grouped themes (Death & Afterlife, Compassion, Duty, Prayer) that users can browse
|
| 15 |
+
Compare mode β a dedicated side-by-side view for "How does Book A vs Book B address X"
|
| 16 |
+
|
| 17 |
+
Hallucination guardrail β run a separate verification pass checking every claim in the answer maps back to a retrieved chunk; flag or remove unsupported claims
|
| 18 |
+
Out-of-scope detection β classify queries before retrieval; politely decline non-spiritual questions (e.g. "Write me code") with a prompt-level or classifier-level guard
|
| 19 |
+
Rate limiting β add per-IP request throttling in FastAPI to prevent API key exhaustion
|
| 20 |
+
API key security β move to server-side key storage properly; never expose NVIDIA_API_KEY or GEMINI_API_KEY in frontend calls
|
frontend/index.html
CHANGED
|
@@ -41,6 +41,57 @@
|
|
| 41 |
/* violet β Sikh royal purple */
|
| 42 |
}
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
html,
|
| 45 |
body {
|
| 46 |
height: 100%;
|
|
@@ -723,13 +774,69 @@
|
|
| 723 |
throw new Error(err.detail || "Server error");
|
| 724 |
}
|
| 725 |
|
| 726 |
-
|
| 727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
} catch (err) {
|
| 729 |
let msg = err.message;
|
| 730 |
-
if (msg.includes("fetch") || msg.includes("NetworkError") || msg.includes("Failed")) {
|
| 731 |
-
msg = "Connecting to the divine knowledge base... Please wait a moment or refresh the page.";
|
| 732 |
-
}
|
| 733 |
replaceLoadingWithError(loadingEl, msg);
|
| 734 |
} finally {
|
| 735 |
isLoading = false;
|
|
@@ -738,6 +845,24 @@
|
|
| 738 |
}
|
| 739 |
}
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
function askSuggested(btn) {
|
| 742 |
const input = document.getElementById("questionInput");
|
| 743 |
input.value = btn.textContent;
|
|
|
|
| 41 |
/* violet β Sikh royal purple */
|
| 42 |
}
|
| 43 |
|
| 44 |
+
/* Animated Thinking state for streaming */
|
| 45 |
+
.thinking-dots {
|
| 46 |
+
display: inline-flex;
|
| 47 |
+
gap: 4px;
|
| 48 |
+
margin-left: 4px;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
.thinking-dots span {
|
| 52 |
+
width: 4px;
|
| 53 |
+
height: 4px;
|
| 54 |
+
background: var(--gold);
|
| 55 |
+
border-radius: 50%;
|
| 56 |
+
animation: bounce 1.4s infinite ease-in-out;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
@keyframes bounce {
|
| 60 |
+
|
| 61 |
+
0%,
|
| 62 |
+
80%,
|
| 63 |
+
100% {
|
| 64 |
+
transform: scale(0);
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
40% {
|
| 68 |
+
transform: scale(1);
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Make streaming text fade in slightly for smoothness */
|
| 73 |
+
#currentStreamingMsg p {
|
| 74 |
+
animation: fadeIn 0.3s ease-in;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
@keyframes fadeIn {
|
| 78 |
+
from {
|
| 79 |
+
opacity: 0.7;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
to {
|
| 83 |
+
opacity: 1;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/* Ensure the bubble has a minimum height so it doesn't look like a "small block" */
|
| 88 |
+
.msg-bubble:empty::before {
|
| 89 |
+
content: "Writing wisdom...";
|
| 90 |
+
color: var(--muted);
|
| 91 |
+
font-style: italic;
|
| 92 |
+
font-size: 0.9rem;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
html,
|
| 96 |
body {
|
| 97 |
height: 100%;
|
|
|
|
| 774 |
throw new Error(err.detail || "Server error");
|
| 775 |
}
|
| 776 |
|
| 777 |
+
// Initialize variables to build the UI
|
| 778 |
+
const reader = res.body.getReader();
|
| 779 |
+
const decoder = new TextDecoder();
|
| 780 |
+
let fullAnswer = "";
|
| 781 |
+
let sourcesData = [];
|
| 782 |
+
|
| 783 |
+
// Prepare the assistant UI bubble immediately
|
| 784 |
+
loadingEl.innerHTML = `
|
| 785 |
+
<span class="msg-label">Sacred Texts</span>
|
| 786 |
+
<div class="msg-bubble" id="currentStreamingMsg">
|
| 787 |
+
<div class="loading-text">The scriptures are being revealed<span class="thinking-dots"><span></span><span></span><span></span></span></div>
|
| 788 |
+
</div>
|
| 789 |
+
<div id="currentStreamingSources"></div>
|
| 790 |
+
`;
|
| 791 |
+
const bubble = document.getElementById("currentStreamingMsg");
|
| 792 |
+
const sourcesContainer = document.getElementById("currentStreamingSources");
|
| 793 |
+
let firstTokenReceived = false;
|
| 794 |
+
|
| 795 |
+
while (true) {
|
| 796 |
+
const { done, value } = await reader.read();
|
| 797 |
+
if (done) break;
|
| 798 |
+
|
| 799 |
+
const chunk = decoder.decode(value, { stream: true });
|
| 800 |
+
const lines = chunk.split("\n");
|
| 801 |
+
|
| 802 |
+
for (const line of lines) {
|
| 803 |
+
if (!line.trim()) continue;
|
| 804 |
+
try {
|
| 805 |
+
const parsed = JSON.parse(line);
|
| 806 |
+
|
| 807 |
+
if (parsed.type === "token") {
|
| 808 |
+
//Remove the loading text as soon as the first word arrives
|
| 809 |
+
if (!firstTokenReceived) {
|
| 810 |
+
bubble.innerHTML = "";
|
| 811 |
+
firstTokenReceived = true;
|
| 812 |
+
}
|
| 813 |
+
|
| 814 |
+
fullAnswer += parsed.data;
|
| 815 |
+
// Dynamically update the bubble with formatted markdown/paragraphs
|
| 816 |
+
bubble.innerHTML = formatAnswer(fullAnswer);
|
| 817 |
+
scrollToBottom();
|
| 818 |
+
}
|
| 819 |
+
else if (parsed.type === "sources") {
|
| 820 |
+
sourcesData = parsed.data;
|
| 821 |
+
renderSourcesInPlace(sourcesContainer, sourcesData);
|
| 822 |
+
}
|
| 823 |
+
else if (parsed.type === "cache") {
|
| 824 |
+
bubble.innerHTML = formatAnswer(parsed.data.answer);
|
| 825 |
+
renderSourcesInPlace(sourcesContainer, parsed.data.sources);
|
| 826 |
+
scrollToBottom();
|
| 827 |
+
}
|
| 828 |
+
} catch (e) {
|
| 829 |
+
console.error("Error parsing NDJSON line:", e);
|
| 830 |
+
}
|
| 831 |
+
}
|
| 832 |
+
}
|
| 833 |
+
|
| 834 |
+
// Clean up IDs once done so next messages don't conflict
|
| 835 |
+
bubble.removeAttribute("id");
|
| 836 |
+
sourcesContainer.removeAttribute("id");
|
| 837 |
+
|
| 838 |
} catch (err) {
|
| 839 |
let msg = err.message;
|
|
|
|
|
|
|
|
|
|
| 840 |
replaceLoadingWithError(loadingEl, msg);
|
| 841 |
} finally {
|
| 842 |
isLoading = false;
|
|
|
|
| 845 |
}
|
| 846 |
}
|
| 847 |
|
| 848 |
+
// Helper to render sources inside the streaming flow
|
| 849 |
+
function renderSourcesInPlace(container, sources) {
|
| 850 |
+
const sourceTags = (sources || []).map(s => {
|
| 851 |
+
const cls = getSourceClass(s.book);
|
| 852 |
+
// Use verse citations as the primary text
|
| 853 |
+
return `<span class="source-tag ${cls}" title="${s.snippet}">π ${s.book}</span>`;
|
| 854 |
+
}).join("");
|
| 855 |
+
|
| 856 |
+
if (sourceTags) {
|
| 857 |
+
container.innerHTML = `
|
| 858 |
+
<div class="sources">
|
| 859 |
+
<div class="sources-label">Citations</div>
|
| 860 |
+
<div class="source-tags">${sourceTags}</div>
|
| 861 |
+
</div>
|
| 862 |
+
`;
|
| 863 |
+
}
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
function askSuggested(btn) {
|
| 867 |
const input = document.getElementById("questionInput");
|
| 868 |
input.value = btn.textContent;
|
ingest.py
CHANGED
|
@@ -20,6 +20,7 @@ from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
|
|
| 20 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 21 |
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
|
| 22 |
from langchain_chroma import Chroma
|
|
|
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
|
|
@@ -46,8 +47,45 @@ CHUNK_SIZE = 800 # characters per chunk
|
|
| 46 |
CHUNK_OVERLAP = 150 # overlap to preserve verse context across boundaries
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def detect_book_name(filename: str) -> str:
|
| 52 |
"""Infer the book's display name from its filename."""
|
| 53 |
name_lower = filename.lower()
|
|
@@ -83,6 +121,7 @@ def tag_documents(docs: list, book_name: str, source_file: str) -> list:
|
|
| 83 |
"""
|
| 84 |
for doc in docs:
|
| 85 |
doc.metadata["book"] = book_name
|
|
|
|
| 86 |
doc.metadata["source_file"] = source_file
|
| 87 |
# Keep the page number if already present from the loader
|
| 88 |
if "page" not in doc.metadata:
|
|
@@ -135,6 +174,16 @@ def ingest():
|
|
| 135 |
)
|
| 136 |
chunks = splitter.split_documents(all_docs)
|
| 137 |
print(f" β {len(chunks)} chunks created")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
# ββ Step 3: Embed & store ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
print(f"\nπ’ Initialising NVIDIA embedding model (llama-nemotron-embed-vl-1b-v2)...")
|
|
|
|
| 20 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 21 |
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
|
| 22 |
from langchain_chroma import Chroma
|
| 23 |
+
import re
|
| 24 |
|
| 25 |
load_dotenv()
|
| 26 |
|
|
|
|
| 47 |
CHUNK_OVERLAP = 150 # overlap to preserve verse context across boundaries
|
| 48 |
|
| 49 |
|
| 50 |
+
# Regex patterns for different scriptures
|
| 51 |
+
VERSE_PATTERNS = {
|
| 52 |
+
"Bhagavad Gita": r"(?:Verse\s+)?(\d+\.\d+)", # Matches 2.47 or Verse 2.47
|
| 53 |
+
"Quran": r"(\d+:\d+)", # Matches 2:286
|
| 54 |
+
"Bible": r"(\d+\s+)?[A-Z][a-z]+\s+\d+:\d+", # Matches John 3:16 or 1 Cor 13:4
|
| 55 |
+
"Guru Granth Sahib": r"(?:Ang\s+)?(\d+)" # Matches Ang 1 or 1
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Patterns to identify structure in the text
|
| 59 |
+
STRUCTURE_PATTERNS = {
|
| 60 |
+
"Bhagavad Gita": r"(\d+)\.(\d+)", # Matches 2.47 (Chapter.Verse)
|
| 61 |
+
"Quran": r"(\d+):(\d+)", # Matches 2:186 (Surah:Verse)
|
| 62 |
+
"Bible": r"(\d+):(\d+)", # Matches 3:16 (Chapter:Verse)
|
| 63 |
+
"Guru Granth Sahib": r"Ang\s+(\d+)" # Matches Ang 1
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
# βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
|
| 68 |
+
def parse_structure(text, book_name):
|
| 69 |
+
pattern = STRUCTURE_PATTERNS.get(book_name)
|
| 70 |
+
if not pattern:
|
| 71 |
+
return {}
|
| 72 |
+
|
| 73 |
+
match = re.search(pattern, text)
|
| 74 |
+
if match:
|
| 75 |
+
if book_name == "Guru Granth Sahib":
|
| 76 |
+
return {"ang": int(match.group(1))}
|
| 77 |
+
return {"chapter": int(match.group(1)), "verse": int(match.group(2))}
|
| 78 |
+
return {}
|
| 79 |
+
|
| 80 |
+
def extract_verse(text: str, book_name: str) -> str:
|
| 81 |
+
"""Extracts a verse reference from a text chunk based on the book."""
|
| 82 |
+
pattern = VERSE_PATTERNS.get(book_name)
|
| 83 |
+
if not pattern:
|
| 84 |
+
return "Unknown"
|
| 85 |
+
|
| 86 |
+
match = re.search(pattern, text)
|
| 87 |
+
return match.group(0) if match else "General Context"
|
| 88 |
+
|
| 89 |
def detect_book_name(filename: str) -> str:
|
| 90 |
"""Infer the book's display name from its filename."""
|
| 91 |
name_lower = filename.lower()
|
|
|
|
| 121 |
"""
|
| 122 |
for doc in docs:
|
| 123 |
doc.metadata["book"] = book_name
|
| 124 |
+
doc.metadata["verse_citation"] = extract_verse(doc.page_content, book_name)
|
| 125 |
doc.metadata["source_file"] = source_file
|
| 126 |
# Keep the page number if already present from the loader
|
| 127 |
if "page" not in doc.metadata:
|
|
|
|
| 174 |
)
|
| 175 |
chunks = splitter.split_documents(all_docs)
|
| 176 |
print(f" β {len(chunks)} chunks created")
|
| 177 |
+
|
| 178 |
+
# Add verse citations to chunk metadata for better source attribution
|
| 179 |
+
print(f"π·οΈ Parsing structure (chapters/verses) for {len(chunks)} chunks...")
|
| 180 |
+
for chunk in chunks:
|
| 181 |
+
# Use the parse_structure function you defined
|
| 182 |
+
structure = parse_structure(chunk.page_content, chunk.metadata["book"])
|
| 183 |
+
# Update the chunk metadata so it is saved in ChromaDB
|
| 184 |
+
chunk.metadata.update(structure)
|
| 185 |
+
|
| 186 |
+
print(f" β {len(chunks)} chunks created and tagged")
|
| 187 |
|
| 188 |
# ββ Step 3: Embed & store ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 189 |
print(f"\nπ’ Initialising NVIDIA embedding model (llama-nemotron-embed-vl-1b-v2)...")
|
rag_chain.py
CHANGED
|
@@ -19,12 +19,16 @@ Returns a dict with:
|
|
| 19 |
"""
|
| 20 |
|
| 21 |
import os
|
|
|
|
| 22 |
from dotenv import load_dotenv
|
| 23 |
-
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
|
| 24 |
from langchain_chroma import Chroma
|
| 25 |
from langchain_core.prompts import ChatPromptTemplate
|
| 26 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
|
|
|
| 27 |
load_dotenv()
|
|
|
|
| 28 |
|
| 29 |
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
| 30 |
CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH", "./chroma_db")
|
|
@@ -41,6 +45,8 @@ KNOWN_BOOKS = [
|
|
| 41 |
"Guru Granth Sahib",
|
| 42 |
]
|
| 43 |
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# βββ System Prompt ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
|
|
@@ -51,7 +57,7 @@ STRICT RULES you must ALWAYS follow:
|
|
| 51 |
1. Answer ONLY using the provided context passages. Do NOT use any external knowledge.
|
| 52 |
2. If a specific book's passages are provided but not relevant to the question, skip that book.
|
| 53 |
3. If NONE of the context is relevant, say: "The provided texts do not directly address this question."
|
| 54 |
-
4. Always cite which book(s) your answer draws from.
|
| 55 |
5. When the question asks to COMPARE books (e.g. "what do Quran and Gita say"), you MUST
|
| 56 |
address EACH of those books separately, then synthesise the common thread.
|
| 57 |
6. Be respectful and neutral toward all faiths β treat each text with equal reverence.
|
|
@@ -90,6 +96,26 @@ def get_vector_store(embeddings):
|
|
| 90 |
|
| 91 |
# βββ Per-Book Retrieval βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def retrieve_per_book(question: str, vector_store: Chroma) -> list:
|
| 94 |
"""
|
| 95 |
Retrieve CHUNKS_PER_BOOK chunks from EACH known book independently,
|
|
@@ -97,23 +123,83 @@ def retrieve_per_book(question: str, vector_store: Chroma) -> list:
|
|
| 97 |
in the context β no book can be crowded out by higher-scoring chunks
|
| 98 |
from another book.
|
| 99 |
"""
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
try:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
except Exception as e:
|
| 114 |
print(f" β {book}: retrieval error β {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
return
|
| 117 |
|
| 118 |
|
| 119 |
# βββ Format Retrieved Docs ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -135,7 +221,18 @@ def format_docs(docs: list) -> str:
|
|
| 135 |
chunks = []
|
| 136 |
for i, doc in enumerate(book_docs, 1):
|
| 137 |
page = doc.metadata.get("page", "?")
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
sections.append(header + "\n" + "\n\n".join(chunks))
|
| 140 |
|
| 141 |
return "\n\n".join(sections)
|
|
@@ -174,7 +271,7 @@ _llm_chain = None
|
|
| 174 |
_vector_store = None
|
| 175 |
|
| 176 |
|
| 177 |
-
def query_sacred_texts(question: str)
|
| 178 |
"""
|
| 179 |
Query the sacred texts knowledge base with guaranteed per-book retrieval.
|
| 180 |
|
|
@@ -192,38 +289,88 @@ def query_sacred_texts(question: str) -> dict:
|
|
| 192 |
if _llm_chain is None:
|
| 193 |
print("π§ Initialising RAG chain (first call)...")
|
| 194 |
_llm_chain, _vector_store = build_chain()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
# Step 1: Retrieve per-book (guaranteed slots for every scripture)
|
| 197 |
print(f"\nπ Retrieving {CHUNKS_PER_BOOK} chunks per book for: '{question}'")
|
| 198 |
source_docs = retrieve_per_book(question, _vector_store)
|
| 199 |
|
| 200 |
if not source_docs:
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
"sources": [],
|
| 204 |
-
}
|
| 205 |
-
|
| 206 |
-
# Step 2: Format context grouped by book
|
| 207 |
-
context = format_docs(source_docs)
|
| 208 |
|
| 209 |
-
# Step
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
# Step 4: Build deduplicated source list for the UI
|
| 213 |
-
seen_books = set()
|
| 214 |
sources = []
|
| 215 |
for doc in source_docs:
|
| 216 |
book = doc.metadata.get("book", "Unknown")
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
snippet = doc.page_content[:200].strip() + "..."
|
| 219 |
-
if
|
| 220 |
-
|
| 221 |
-
sources.append({"book":
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
|
| 229 |
# βββ Quick CLI Test βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 19 |
"""
|
| 20 |
|
| 21 |
import os
|
| 22 |
+
from pydoc import doc
|
| 23 |
from dotenv import load_dotenv
|
| 24 |
+
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA, NVIDIARerank
|
| 25 |
from langchain_chroma import Chroma
|
| 26 |
from langchain_core.prompts import ChatPromptTemplate
|
| 27 |
from langchain_core.output_parsers import StrOutputParser
|
| 28 |
+
from langchain_community.retrievers import BM25Retriever
|
| 29 |
+
from langchain_classic.retrievers import EnsembleRetriever, ContextualCompressionRetriever
|
| 30 |
load_dotenv()
|
| 31 |
+
import json
|
| 32 |
|
| 33 |
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
| 34 |
CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH", "./chroma_db")
|
|
|
|
| 45 |
"Guru Granth Sahib",
|
| 46 |
]
|
| 47 |
|
| 48 |
+
# Create a separate collection for semantic cache
|
| 49 |
+
CACHE_COLLECTION = "semantic_cache"
|
| 50 |
|
| 51 |
# βββ System Prompt ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
|
|
|
|
| 57 |
1. Answer ONLY using the provided context passages. Do NOT use any external knowledge.
|
| 58 |
2. If a specific book's passages are provided but not relevant to the question, skip that book.
|
| 59 |
3. If NONE of the context is relevant, say: "The provided texts do not directly address this question."
|
| 60 |
+
4. Always explicitly name and cite which book(s) your answer draws from in the text of your answer.
|
| 61 |
5. When the question asks to COMPARE books (e.g. "what do Quran and Gita say"), you MUST
|
| 62 |
address EACH of those books separately, then synthesise the common thread.
|
| 63 |
6. Be respectful and neutral toward all faiths β treat each text with equal reverence.
|
|
|
|
| 96 |
|
| 97 |
# βββ Per-Book Retrieval βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
|
| 99 |
+
def get_reranked_retriever(base_retriever):
|
| 100 |
+
"""
|
| 101 |
+
Wraps your Hybrid/Per-Book retriever with a Reranking layer.
|
| 102 |
+
"""
|
| 103 |
+
# 1. Initialize the NVIDIA Reranker (NIM or API Catalog)
|
| 104 |
+
# Using nvidia/llama-3.2-nv-rerankqa-1b-v2 or similar
|
| 105 |
+
reranker = NVIDIARerank(
|
| 106 |
+
model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
| 107 |
+
api_key=NVIDIA_API_KEY,
|
| 108 |
+
top_n=5 # Only send the top 5 most relevant chunks to the LLM
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# 2. Wrap the base retriever
|
| 112 |
+
compression_retriever = ContextualCompressionRetriever(
|
| 113 |
+
base_compressor=reranker,
|
| 114 |
+
base_retriever=base_retriever
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
return compression_retriever
|
| 118 |
+
|
| 119 |
def retrieve_per_book(question: str, vector_store: Chroma) -> list:
|
| 120 |
"""
|
| 121 |
Retrieve CHUNKS_PER_BOOK chunks from EACH known book independently,
|
|
|
|
| 123 |
in the context β no book can be crowded out by higher-scoring chunks
|
| 124 |
from another book.
|
| 125 |
"""
|
| 126 |
+
all_candidates = []
|
| 127 |
+
|
| 128 |
+
# Detect if user is asking about a specific book
|
| 129 |
+
target_books = []
|
| 130 |
+
question_lower = question.lower()
|
| 131 |
+
|
| 132 |
+
# Check for keywords in the question
|
| 133 |
+
if any(kw in question_lower for kw in ["gita", "bhagavad", "hindu", "hinduism"]):
|
| 134 |
+
target_books.append("Bhagavad Gita")
|
| 135 |
+
if any(kw in question_lower for kw in ["quran", "koran", "islam", "muslim", "muhammad"]):
|
| 136 |
+
target_books.append("Quran")
|
| 137 |
+
if any(kw in question_lower for kw in ["bible", "testament", "christian", "jesus", "christ"]):
|
| 138 |
+
target_books.append("Bible")
|
| 139 |
+
if any(kw in question_lower for kw in ["granth", "guru", "sikh", "sikhism", "nanak"]):
|
| 140 |
+
target_books.append("Guru Granth Sahib")
|
| 141 |
+
|
| 142 |
+
# If no specific book is detected, use all books
|
| 143 |
+
books_to_search = target_books if target_books else KNOWN_BOOKS
|
| 144 |
+
|
| 145 |
+
print(f"π― Routing query to: {books_to_search}")
|
| 146 |
+
|
| 147 |
+
for book in books_to_search:
|
| 148 |
try:
|
| 149 |
+
# Increase k for the base retrieval to 10
|
| 150 |
+
CANDIDATE_COUNT = 10
|
| 151 |
+
|
| 152 |
+
# Get the full collection of documents for this book to build BM25
|
| 153 |
+
# For small demo, we can pull into memory; for larger corpora, consider a more efficient approach
|
| 154 |
+
book_data = vector_store.get(where={"book": book})
|
| 155 |
+
book_docs = []
|
| 156 |
+
from langchain_core.documents import Document
|
| 157 |
+
book_docs = [Document(page_content=d, metadata=m)
|
| 158 |
+
for d, m in zip(book_data["documents"], book_data["metadatas"])]
|
| 159 |
+
if not book_docs:
|
| 160 |
+
continue
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# Setup BM25
|
| 164 |
+
bm25_retriever = BM25Retriever.from_documents(book_docs)
|
| 165 |
+
bm25_retriever.k = CANDIDATE_COUNT
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# Setup vector retriever
|
| 169 |
+
vector_retriever = vector_store.as_retriever(search_kwargs={"k": CANDIDATE_COUNT, "filter": {"book": book}})
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# Combine into ensemble retriever
|
| 173 |
+
ensemble_retriver = EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever], weights=[0.5, 0.5])
|
| 174 |
+
|
| 175 |
+
# Colect candidates without reranking yet
|
| 176 |
+
book_candidates = ensemble_retriver.invoke(question)
|
| 177 |
+
all_candidates.extend(book_candidates)
|
| 178 |
+
print(f" π¦ {book}: Found {len(book_candidates)} candidates")
|
| 179 |
+
|
| 180 |
except Exception as e:
|
| 181 |
print(f" β {book}: retrieval error β {e}")
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# Rerank the entire pool at once
|
| 185 |
+
if not all_candidates:
|
| 186 |
+
return []
|
| 187 |
+
|
| 188 |
+
print(f"π Reranking {len(all_candidates)} total candidates...")
|
| 189 |
+
reranker = NVIDIARerank(
|
| 190 |
+
model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
| 191 |
+
api_key=NVIDIA_API_KEY,
|
| 192 |
+
top_n=5 # Final count for LLM context
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Use the reranker directly to compress the full list
|
| 196 |
+
final_docs = reranker.compress_documents(all_candidates, question)
|
| 197 |
+
|
| 198 |
+
for i, doc in enumerate(final_docs):
|
| 199 |
+
score = doc.metadata.get("relevance_score", "N/A")
|
| 200 |
+
print(f"Rank {i+1} [{doc.metadata['book']}]: Score {score}")
|
| 201 |
|
| 202 |
+
return final_docs
|
| 203 |
|
| 204 |
|
| 205 |
# βββ Format Retrieved Docs ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 221 |
chunks = []
|
| 222 |
for i, doc in enumerate(book_docs, 1):
|
| 223 |
page = doc.metadata.get("page", "?")
|
| 224 |
+
ch = doc.metadata.get("chapter")
|
| 225 |
+
vs = doc.metadata.get("verse")
|
| 226 |
+
ang = doc.metadata.get("ang")
|
| 227 |
+
|
| 228 |
+
# Create a clean citation string
|
| 229 |
+
if ang:
|
| 230 |
+
citation = f"Ang {ang}"
|
| 231 |
+
elif ch and vs:
|
| 232 |
+
citation = f"{ch}:{vs}"
|
| 233 |
+
else:
|
| 234 |
+
citation = f"Page {doc.metadata.get('page', '?')}"
|
| 235 |
+
chunks.append(f" [{i}] ({citation}): {doc.page_content.strip()}")
|
| 236 |
sections.append(header + "\n" + "\n\n".join(chunks))
|
| 237 |
|
| 238 |
return "\n\n".join(sections)
|
|
|
|
| 271 |
_vector_store = None
|
| 272 |
|
| 273 |
|
| 274 |
+
def query_sacred_texts(question: str):
|
| 275 |
"""
|
| 276 |
Query the sacred texts knowledge base with guaranteed per-book retrieval.
|
| 277 |
|
|
|
|
| 289 |
if _llm_chain is None:
|
| 290 |
print("π§ Initialising RAG chain (first call)...")
|
| 291 |
_llm_chain, _vector_store = build_chain()
|
| 292 |
+
|
| 293 |
+
# --- Semantic cache check ---
|
| 294 |
+
cache_coll = _vector_store._client.get_or_create_collection(CACHE_COLLECTION)
|
| 295 |
+
cache_results = cache_coll.query(
|
| 296 |
+
query_texts=[question],
|
| 297 |
+
n_results=1
|
| 298 |
+
)
|
| 299 |
|
| 300 |
+
THRESHOLD = 0.35
|
| 301 |
+
# FIXED: Added check for cache_results['ids'] and ensuring distances is not empty
|
| 302 |
+
if cache_results['ids'] and cache_results['ids'][0]:
|
| 303 |
+
distance = cache_results['distances'][0][0]
|
| 304 |
+
if distance < THRESHOLD: # Similarity threshold
|
| 305 |
+
print(f"β‘οΈ Semantic Cache Hit! (Distance: {distance:.4f})")
|
| 306 |
+
yield json.dumps({"type": "cache","data": json.loads(cache_results['metadatas'][0][0]['response_json'])}) + "\n"
|
| 307 |
+
return
|
| 308 |
+
|
| 309 |
# Step 1: Retrieve per-book (guaranteed slots for every scripture)
|
| 310 |
print(f"\nπ Retrieving {CHUNKS_PER_BOOK} chunks per book for: '{question}'")
|
| 311 |
source_docs = retrieve_per_book(question, _vector_store)
|
| 312 |
|
| 313 |
if not source_docs:
|
| 314 |
+
yield json.dumps({"type": "token", "data": "No content found in the knowledge base."}) + "\n"
|
| 315 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
+
# 3. Step 2: Format sources for the UI immediately
|
| 318 |
+
seen_sources = set()
|
|
|
|
|
|
|
|
|
|
| 319 |
sources = []
|
| 320 |
for doc in source_docs:
|
| 321 |
book = doc.metadata.get("book", "Unknown")
|
| 322 |
+
ch = doc.metadata.get("chapter")
|
| 323 |
+
vs = doc.metadata.get("verse")
|
| 324 |
+
ang = doc.metadata.get("ang")
|
| 325 |
+
|
| 326 |
+
if ang:
|
| 327 |
+
cite_val = f"Ang {ang}"
|
| 328 |
+
elif ch and vs:
|
| 329 |
+
cite_val = f"{ch}:{vs}"
|
| 330 |
+
else:
|
| 331 |
+
cite_val = f"p. {doc.metadata.get('page', '?')}"
|
| 332 |
+
|
| 333 |
+
display_name = f"{book} {cite_val}"
|
| 334 |
snippet = doc.page_content[:200].strip() + "..."
|
| 335 |
+
if display_name not in seen_sources:
|
| 336 |
+
seen_sources.add(display_name)
|
| 337 |
+
sources.append({"book": display_name, "page": cite_val, "snippet": snippet})
|
| 338 |
+
# Step 2: Format context grouped by book
|
| 339 |
+
context = format_docs(source_docs)
|
| 340 |
+
full_answer =""
|
| 341 |
+
|
| 342 |
+
# Step 3: Stream from the chain:
|
| 343 |
+
for chunk in _llm_chain.invoke({"context": context, "question": question}):
|
| 344 |
+
full_answer += chunk
|
| 345 |
+
yield json.dumps({"type": "token", "data": chunk}) + "\n" # Stream the answer as it's generated
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
# Filter sources to only those the LLM actually referenced
|
| 349 |
+
final_sources = []
|
| 350 |
+
ansnwer_lower = full_answer.lower()
|
| 351 |
+
|
| 352 |
+
for s in sources:
|
| 353 |
+
if s["book"].lower() in ansnwer_lower:
|
| 354 |
+
final_sources.append(s)
|
| 355 |
+
|
| 356 |
+
# If the LLM didn't explicitly reference any sources, we can optionally include all retrieved ones or none
|
| 357 |
+
display_sources = final_sources if final_sources else []
|
| 358 |
+
|
| 359 |
+
# Step 4: After streaming is done, save to semantic cache for future similar queries
|
| 360 |
+
result = {
|
| 361 |
+
"answer": full_answer,
|
| 362 |
+
"sources": display_sources,
|
| 363 |
}
|
| 364 |
+
|
| 365 |
+
cache_coll.add(
|
| 366 |
+
documents=[question],
|
| 367 |
+
metadatas=[{"response_json": json.dumps(result)}],
|
| 368 |
+
ids=[question]
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
# Send sources as a final message after the answer is fully streamed
|
| 372 |
+
yield json.dumps({"type": "sources", "data": sources}) + "\n"
|
| 373 |
+
|
| 374 |
|
| 375 |
|
| 376 |
# βββ Quick CLI Test βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
requirements.txt
CHANGED
|
@@ -3,8 +3,9 @@ langchain
|
|
| 3 |
langchain-community
|
| 4 |
langchain-chroma
|
| 5 |
langchain-nvidia-ai-endpoints
|
| 6 |
-
langchain-text-splitters
|
| 7 |
-
|
|
|
|
| 8 |
# Vector Store
|
| 9 |
chromadb
|
| 10 |
|
|
|
|
| 3 |
langchain-community
|
| 4 |
langchain-chroma
|
| 5 |
langchain-nvidia-ai-endpoints
|
| 6 |
+
langchain-text-splitters
|
| 7 |
+
langchain-core
|
| 8 |
+
rank_bm25
|
| 9 |
# Vector Store
|
| 10 |
chromadb
|
| 11 |
|
start.sh
CHANGED
|
@@ -1,13 +1,16 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
echo "π¦ ChromaDB not found. Starting ingestion..."
|
| 6 |
python ingest.py
|
| 7 |
else
|
| 8 |
echo "β
ChromaDB found. Skipping ingestion."
|
| 9 |
fi
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
+
# Use the absolute path relative to the app directory
|
| 4 |
+
CHROMA_PATH="./chroma_db"
|
| 5 |
+
|
| 6 |
+
if [ ! -d "$CHROMA_PATH" ]; then
|
| 7 |
echo "π¦ ChromaDB not found. Starting ingestion..."
|
| 8 |
python ingest.py
|
| 9 |
else
|
| 10 |
echo "β
ChromaDB found. Skipping ingestion."
|
| 11 |
fi
|
| 12 |
|
| 13 |
+
echo "π Starting FastAPI server with concurrency..."
|
| 14 |
+
# --workers 2 allows two simultaneous processes
|
| 15 |
+
# --timeout-keep-alive is increased for slow LLM responses
|
| 16 |
+
exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 2 --timeout-keep-alive 60
|