Spaces:

engrrifatullah
/

RAG_Based_Application

Sleeping

App Files Files Community

RAG_Based_Application / app.py

engrrifatullah

Update app.py

1f03038 verified over 1 year ago

raw

history blame contribute delete

4.65 kB

	import numpy # Ensure NumPy is loaded first to avoid FAISS issues
	import faiss # Load FAISS after NumPy
	import os
	import streamlit as st
	import pandas as pd
	import pdfplumber
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	import numpy as np

	# API key for Groq
	API_KEY = "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"
	client = Groq(api_key=API_KEY)

	# Initialize the embedding model
	embed_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	with pdfplumber.open(pdf_file) as pdf:
	return ' '.join(page.extract_text() for page in pdf.pages)

	# Function to create embeddings and store them in FAISS
	def create_embeddings(text):
	chunks = [text[i:i+500] for i in range(0, len(text), 500)]
	embeddings = embed_model.encode(chunks)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	return chunks, embeddings, index

	# Function to find the most relevant chunk for the user's question
	def get_relevant_chunk(question, embeddings, index, chunks):
	question_embedding = embed_model.encode([question])
	D, I = index.search(np.array(question_embedding).astype(np.float32), 1) # Retrieve top 1 chunk
	relevant_chunk = chunks[I[0][0]] # The chunk corresponding to the closest embedding
	return relevant_chunk

	# Function to get the model's response from Groq API
	def get_answer_from_groq(question, context):
	chat_completion = client.chat.completions.create(
	messages=[
	{"role": "user", "content": f"Answer the following question based on the context:\nContext: {context}\nQuestion: {question}"}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	# Streamlit app
	def main():
	st.set_page_config(
	page_title="RAG Based Application",
	page_icon="📄",
	layout="centered",
	)

	# Custom CSS for styling
	st.markdown(
	"""
	<style>
	body {
	background-color: #f4f7f9;
	}
	.main-header {
	font-size: 2.5rem;
	color: #1d3557;
	text-align: center;
	margin-bottom: 1rem;
	}
	.upload-box {
	border: 2px dashed #457b9d;
	border-radius: 10px;
	padding: 1rem;
	text-align: center;
	background-color: #f1faee;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	# App title and description
	st.markdown('<div class="main-header">RAG Based Application</div>', unsafe_allow_html=True)
	st.write("Upload your document (PDF, CSV, or Excel) to process and generate embeddings stored in a FAISS index.")

	# File upload section
	uploaded_file = st.file_uploader("Drag and drop or browse files", type=["pdf", "csv", "xlsx"])

	if uploaded_file:
	# Identify file type
	file_type = uploaded_file.type
	st.markdown('<div class="upload-box">File Uploaded Successfully!</div>', unsafe_allow_html=True)

	# Extract content
	if file_type == "application/pdf":
	text = extract_text_from_pdf(uploaded_file)
	elif file_type in ["text/csv", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
	df = pd.read_csv(uploaded_file) if file_type == "text/csv" else pd.read_excel(uploaded_file)
	text = df.to_string()

	# Display content
	st.subheader("Document Content:")
	st.text_area("Extracted Text", text, height=300)

	# Create embeddings
	st.write("🔄 Creating embeddings... This may take a moment.")
	chunks, embeddings, index = create_embeddings(text)
	st.success("✅ Embeddings created and stored in FAISS index!")

	# Question Section
	question = st.text_input("Ask a question based on the uploaded document:")

	if question:
	# Retrieve the most relevant chunk for the question
	relevant_chunk = get_relevant_chunk(question, embeddings, index, chunks)

	# Get the model's answer based on the relevant chunk
	st.write("🔄 Retrieving the answer...")
	answer = get_answer_from_groq(question, relevant_chunk)

	# Display the answer
	st.subheader("Answer:")
	st.write(answer)

	# Summary Section
	st.subheader("Process Summary:")
	st.write("- Uploaded file type:", file_type)
	st.write("- Number of chunks processed:", len(text) // 500 + 1)

	# Run the app
	if __name__ == "__main__":
	main()