Spaces:

Rahaf2001
/

RAG-Project

Sleeping

App Files Files Community

RAG-Project / rag_core.py

Rahaf2001

Update rag_core.py

3d12ae8 verified 6 months ago

raw

history blame contribute delete

3.64 kB

	import requests
	from bs4 import BeautifulSoup
	from langchain_community.document_loaders import WebBaseLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import OpenAIEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	import os

	# --- Global variables for RAG components ---
	vector_store = None
	llm = None
	retrieval_chain = None

	def initialize_rag_components():
	global llm
	llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)

	def scrape_and_process_url(url: str) -> str:
	global vector_store, retrieval_chain

	try:
	# Scrape content using WebBaseLoader for simplicity and robustness
	# This handles parsing and extracting main content from various web pages
	loader = WebBaseLoader(url)
	docs = loader.load()

	if not docs:
	return "Failed to load content from the URL. Please check the URL or try another one."

	# Split documents into smaller chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200
	)
	chunks = text_splitter.split_documents(docs)

	# Create embeddings and vector store
	# Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
	vector_store = FAISS.from_documents(chunks, embeddings)

	# Create RAG chain with polished prompt
	prompt = ChatPromptTemplate.from_messages([
	("system", """You are a knowledgeable and friendly assistant helping users understand documentation. Answer questions naturally and conversationally, as if you're explaining to a colleague.

	Your task:
	- Read the context carefully and provide clear, helpful answers based on what's there
	- Explain concepts in a simple, approachable way that anyone can understand
	- If you find the answer in the context, explain it thoroughly with examples when available
	- Be direct and confident in your responses - act like an expert who knows this documentation well
	- If the information isn't in the context, simply say "I don't see that information in this documentation"
	- Use a warm, professional tone - like a helpful coworker, not a robot

	Context from documentation:
	{context}"""),
	("user", "{input}")
	])

	document_chain = create_stuff_documents_chain(llm, prompt)
	retrieval_chain = create_retrieval_chain(
	vector_store.as_retriever(search_kwargs={"k": 4}),
	document_chain
	)

	return f"✅ Successfully scraped and processed content from {url}.\n\nDocument chunks created: {len(chunks)}\n\nYou can now ask questions about the documentation!"

	except Exception as e:
	return f"❌ An error occurred during scraping or processing: {str(e)}"

	def answer_question(question: str) -> str:
	global retrieval_chain

	if retrieval_chain is None:
	return "⚠️ Please scrape and process a URL first before asking questions."

	try:
	response = retrieval_chain.invoke({"input": question})
	return response["answer"]
	except Exception as e:
	return f"❌ An error occurred while answering the question: {str(e)}"

	# Initialize LLM when the module is imported
	initialize_rag_components()