Spaces:

Rahaf2001
/

RAG-Project

Sleeping

App Files Files Community

Rahaf2001 commited on Oct 22, 2025

Commit

3d12ae8

verified ·

1 Parent(s): 288b8b4

Update rag_core.py

Browse files

Files changed (1) hide show

rag_core.py +33 -18

rag_core.py CHANGED Viewed

@@ -27,46 +27,61 @@ def scrape_and_process_url(url: str) -> str:
         # This handles parsing and extracting main content from various web pages
         loader = WebBaseLoader(url)
         docs = loader.load()
         if not docs:
             return "Failed to load content from the URL. Please check the URL or try another one."
         # Split documents into smaller chunks
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
         chunks = text_splitter.split_documents(docs)
         # Create embeddings and vector store
         # Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
         embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
         vector_store = FAISS.from_documents(chunks, embeddings)
-        # Create RAG chain
         prompt = ChatPromptTemplate.from_messages([
-            ("system", "Use the following context to answer the user's question. "
-             "If the answer is not in the context, say 'I cannot find that information in the provided documentation.' "
-             "Do not add any information not found in the context. "
-             "Context:\n{context}"),
             ("user", "{input}")
         ])
         document_chain = create_stuff_documents_chain(llm, prompt)
-        retrieval_chain = create_retrieval_chain(vector_store.as_retriever(), document_chain)
-        return f"Successfully scraped and processed content from {url}. You can now ask questions."
     except Exception as e:
-        return f"An error occurred during scraping or processing: {str(e)}"
 def answer_question(question: str) -> str:
     global retrieval_chain
     if retrieval_chain is None:
-        return "Please scrape and process a URL first before asking questions."
     try:
         response = retrieval_chain.invoke({"input": question})
         return response["answer"]
     except Exception as e:
-        return f"An error occurred while answering the question: {str(e)}"
 # Initialize LLM when the module is imported
-initialize_rag_components()

         # This handles parsing and extracting main content from various web pages
         loader = WebBaseLoader(url)
         docs = loader.load()
         if not docs:
             return "Failed to load content from the URL. Please check the URL or try another one."
         # Split documents into smaller chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
         chunks = text_splitter.split_documents(docs)
         # Create embeddings and vector store
         # Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
         embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
         vector_store = FAISS.from_documents(chunks, embeddings)
+        # Create RAG chain with polished prompt
         prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are a knowledgeable and friendly assistant helping users understand documentation. Answer questions naturally and conversationally, as if you're explaining to a colleague.
+Your task:
+- Read the context carefully and provide clear, helpful answers based on what's there
+- Explain concepts in a simple, approachable way that anyone can understand
+- If you find the answer in the context, explain it thoroughly with examples when available
+- Be direct and confident in your responses - act like an expert who knows this documentation well
+- If the information isn't in the context, simply say "I don't see that information in this documentation"
+- Use a warm, professional tone - like a helpful coworker, not a robot
+Context from documentation:
+{context}"""),
             ("user", "{input}")
         ])
         document_chain = create_stuff_documents_chain(llm, prompt)
+        retrieval_chain = create_retrieval_chain(
+            vector_store.as_retriever(search_kwargs={"k": 4}),
+            document_chain
+        )
+        return f"✅ Successfully scraped and processed content from {url}.\n\nDocument chunks created: {len(chunks)}\n\nYou can now ask questions about the documentation!"
     except Exception as e:
+        return f"❌ An error occurred during scraping or processing: {str(e)}"
 def answer_question(question: str) -> str:
     global retrieval_chain
     if retrieval_chain is None:
+        return "⚠️ Please scrape and process a URL first before asking questions."
     try:
         response = retrieval_chain.invoke({"input": question})
         return response["answer"]
     except Exception as e:
+        return f"❌ An error occurred while answering the question: {str(e)}"
 # Initialize LLM when the module is imported
+initialize_rag_components()