Rahaf2001 commited on
Commit
3d12ae8
·
verified ·
1 Parent(s): 288b8b4

Update rag_core.py

Browse files
Files changed (1) hide show
  1. rag_core.py +33 -18
rag_core.py CHANGED
@@ -27,46 +27,61 @@ def scrape_and_process_url(url: str) -> str:
27
  # This handles parsing and extracting main content from various web pages
28
  loader = WebBaseLoader(url)
29
  docs = loader.load()
30
-
31
  if not docs:
32
  return "Failed to load content from the URL. Please check the URL or try another one."
33
-
34
  # Split documents into smaller chunks
35
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
 
 
36
  chunks = text_splitter.split_documents(docs)
37
-
38
  # Create embeddings and vector store
39
  # Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
40
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
41
  vector_store = FAISS.from_documents(chunks, embeddings)
42
 
43
- # Create RAG chain
44
  prompt = ChatPromptTemplate.from_messages([
45
- ("system", "Use the following context to answer the user's question. "
46
- "If the answer is not in the context, say 'I cannot find that information in the provided documentation.' "
47
- "Do not add any information not found in the context. "
48
- "Context:\n{context}"),
 
 
 
 
 
 
 
 
49
  ("user", "{input}")
50
  ])
 
51
  document_chain = create_stuff_documents_chain(llm, prompt)
52
- retrieval_chain = create_retrieval_chain(vector_store.as_retriever(), document_chain)
53
-
54
- return f"Successfully scraped and processed content from {url}. You can now ask questions."
55
-
 
 
 
56
  except Exception as e:
57
- return f"An error occurred during scraping or processing: {str(e)}"
58
 
59
  def answer_question(question: str) -> str:
60
  global retrieval_chain
 
61
  if retrieval_chain is None:
62
- return "Please scrape and process a URL first before asking questions."
63
 
64
  try:
65
  response = retrieval_chain.invoke({"input": question})
66
  return response["answer"]
67
  except Exception as e:
68
- return f"An error occurred while answering the question: {str(e)}"
69
 
70
  # Initialize LLM when the module is imported
71
- initialize_rag_components()
72
-
 
27
  # This handles parsing and extracting main content from various web pages
28
  loader = WebBaseLoader(url)
29
  docs = loader.load()
30
+
31
  if not docs:
32
  return "Failed to load content from the URL. Please check the URL or try another one."
33
+
34
  # Split documents into smaller chunks
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=1000,
37
+ chunk_overlap=200
38
+ )
39
  chunks = text_splitter.split_documents(docs)
40
+
41
  # Create embeddings and vector store
42
  # Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
43
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
44
  vector_store = FAISS.from_documents(chunks, embeddings)
45
 
46
+ # Create RAG chain with polished prompt
47
  prompt = ChatPromptTemplate.from_messages([
48
+ ("system", """You are a knowledgeable and friendly assistant helping users understand documentation. Answer questions naturally and conversationally, as if you're explaining to a colleague.
49
+
50
+ Your task:
51
+ - Read the context carefully and provide clear, helpful answers based on what's there
52
+ - Explain concepts in a simple, approachable way that anyone can understand
53
+ - If you find the answer in the context, explain it thoroughly with examples when available
54
+ - Be direct and confident in your responses - act like an expert who knows this documentation well
55
+ - If the information isn't in the context, simply say "I don't see that information in this documentation"
56
+ - Use a warm, professional tone - like a helpful coworker, not a robot
57
+
58
+ Context from documentation:
59
+ {context}"""),
60
  ("user", "{input}")
61
  ])
62
+
63
  document_chain = create_stuff_documents_chain(llm, prompt)
64
+ retrieval_chain = create_retrieval_chain(
65
+ vector_store.as_retriever(search_kwargs={"k": 4}),
66
+ document_chain
67
+ )
68
+
69
+ return f"✅ Successfully scraped and processed content from {url}.\n\nDocument chunks created: {len(chunks)}\n\nYou can now ask questions about the documentation!"
70
+
71
  except Exception as e:
72
+ return f"An error occurred during scraping or processing: {str(e)}"
73
 
74
  def answer_question(question: str) -> str:
75
  global retrieval_chain
76
+
77
  if retrieval_chain is None:
78
+ return "⚠️ Please scrape and process a URL first before asking questions."
79
 
80
  try:
81
  response = retrieval_chain.invoke({"input": question})
82
  return response["answer"]
83
  except Exception as e:
84
+ return f"An error occurred while answering the question: {str(e)}"
85
 
86
  # Initialize LLM when the module is imported
87
+ initialize_rag_components()