Spaces:

rahideer
/

dataset

Sleeping

App Files Files Community

dataset / app.py

rahideer

Update app.py

184a854 verified about 1 year ago

raw

history blame contribute delete

2.14 kB

	import streamlit as st
	import PyPDF2
	import os
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	from transformers import pipeline

	st.set_page_config(page_title="📘 PDF RAG QA", layout="wide")

	# Custom styles
	st.markdown("""
	<style>
	.main {background-color: #f7faff;}
	h1 {color: #4a4a8a;}
	.stTextInput>div>div>input {border: 2px solid #d0d7ff;}
	.stButton button {background-color: #4a4a8a; color: white;}
	</style>
	""", unsafe_allow_html=True)

	st.title("📘 Ask Me Anything About Machine Learning")
	st.caption("Using RAG (Retrieval-Augmented Generation) and a preloaded PDF")

	# Load PDF from local file
	PDF_FILE = "ml_large_dataset.pdf"

	def load_pdf(file_path):
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	return [page.extract_text() for page in reader.pages]

	def chunk_text(pages, max_len=1000):
	text = " ".join(pages)
	words = text.split()
	return [' '.join(words[i:i+max_len]) for i in range(0, len(words), max_len)]

	@st.cache_resource
	def setup_rag():
	pages = load_pdf(PDF_FILE)
	chunks = chunk_text(pages)
	model = SentenceTransformer('all-MiniLM-L6-v2')
	embeddings = model.encode(chunks)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(np.array(embeddings))
	qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
	return chunks, model, index, qa

	def retrieve_answer(question, chunks, model, index, qa_pipeline, k=6):
	q_embed = model.encode([question])
	_, I = index.search(np.array(q_embed), k)
	context = "\n\n".join([chunks[i] for i in I[0]])
	result = qa_pipeline(question=question, context=context)
	return result['answer']

	chunks, embed_model, faiss_index, qa_model = setup_rag()

	st.subheader("💬 Ask a Question")
	question = st.text_input("Enter your question:", placeholder="e.g., What is supervised learning?")

	if question:
	with st.spinner("🧠 Searching for the answer..."):
	answer = retrieve_answer(question, chunks, embed_model, faiss_index, qa_model)
	st.markdown("#### 📖 Answer:")
	st.write(answer)