import streamlit as st import os import zipfile from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_community.vectorstores import FAISS from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser # --- Configuration --- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "") # Paths FAISS_ZIP_PATH = "faiss_index.zip" FAISS_INDEX_PATH = "faiss_index" def extract_faiss_index(): """Extract FAISS index from zip file if needed.""" index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss") # Already extracted if os.path.exists(index_file): return True # Extract from zip if os.path.exists(FAISS_ZIP_PATH): with zipfile.ZipFile(FAISS_ZIP_PATH, 'r') as zip_ref: zip_ref.extractall(".") return True return False def get_conversational_chain(api_key): """Create the QA chain with strict context-only answering.""" prompt_template = """ You are a helpful assistant that ONLY answers based on the context provided from the PDF document. STRICT RULES: 1. Do NOT use any external knowledge or assumptions. 2. Do NOT make up information that is not in the context. 3. If the answer is not found in the context below, reply with: "I don't have information about that in the uploaded document." 4. Always base your answers strictly on the provided context. Context from PDF: {context} Question: {question} Answer (based only on the context above): """ model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0, google_api_key=api_key) prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) chain = prompt | model | StrOutputParser() return chain def format_docs(docs): """Format documents into a single string for context.""" return "\n\n".join(doc.page_content for doc in docs) def user_input(user_question, vector_store, api_key): """Process user question and return answer from the PDF context.""" docs = vector_store.similarity_search(user_question) chain = get_conversational_chain(api_key) context = format_docs(docs) response = chain.invoke({"context": context, "question": user_question}) return response @st.cache_resource def load_vector_store(_api_key): """Load pre-built FAISS vector store.""" # Extract zip if needed extract_faiss_index() embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", google_api_key=_api_key ) vector_store = FAISS.load_local( FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True ) return vector_store def main(): st.set_page_config( page_title="Papal Encyclicals RAG", page_icon="📄", layout="centered", initial_sidebar_state="collapsed" ) # Custom CSS st.markdown( """ """, unsafe_allow_html=True ) # Header st.markdown('

Papal Encyclicals RAG

', unsafe_allow_html=True) st.markdown('

Ask questions about papal encyclicals and get answers based on the source document

', unsafe_allow_html=True) # Check for API key if not GOOGLE_API_KEY: st.error("Google API Key not found in environment variables.") st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets.") st.stop() # Check if FAISS index or zip exists index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss") if not os.path.exists(index_file) and not os.path.exists(FAISS_ZIP_PATH): st.error("FAISS index not found!") st.info("Please upload faiss_index.zip or the faiss_index folder to your Space.") st.stop() # Load vector store (cached) with st.spinner("Loading index..."): try: vector_store = load_vector_store(GOOGLE_API_KEY) except Exception as e: st.error(f"Error loading index: {str(e)}") st.stop() # Status badge st.markdown( '''

Document ready

''', unsafe_allow_html=True ) # Question input user_question = st.text_input( "Your question", placeholder="e.g., What are the main themes discussed in the encyclicals?", help="The AI will only answer based on the content of the uploaded PDF", label_visibility="visible" ) if user_question: with st.spinner("Searching for answer..."): try: answer = user_input(user_question, vector_store, GOOGLE_API_KEY) st.markdown( f'''

Answer

{answer}

''', unsafe_allow_html=True ) except Exception as e: st.error(f"Error getting answer: {str(e)}") if __name__ == "__main__": main()