| |
| import PyPDF2 |
| from getpass import getpass |
| from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser |
| from haystack.document_stores import InMemoryDocumentStore |
| from haystack import Document, Pipeline |
| from haystack.nodes import BM25Retriever |
| from pprint import pprint |
| import streamlit as st |
| import logging |
| from dotenv import load_dotenv |
| load_dotenv() |
| import os |
| import logging |
| logging.basicConfig(level=logging.DEBUG) |
|
|
| |
| def extract_text_from_pdf(pdf_path): |
| text = "" |
| with open(pdf_path, "rb") as pdf_file: |
| pdf_reader = PyPDF2.PdfReader(pdf_file) |
| for page_num in range(len(pdf_reader.pages)): |
| page = pdf_reader.pages[page_num] |
| text += page.extract_text() or "" |
| return text |
|
|
| |
| pdf_file_path = "Data/MR. MPROFY.pdf" |
| pdf_text = extract_text_from_pdf(pdf_file_path) |
| if not pdf_text: |
| raise ValueError("No text extracted from PDF.") |
|
|
| |
| doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) |
|
|
| |
| document_store = InMemoryDocumentStore(use_bm25=True) |
| document_store.write_documents([doc]) |
|
|
| |
| retriever = BM25Retriever(document_store=document_store, top_k=2) |
|
|
| |
| qa_template = PromptTemplate( |
| prompt=""" |
| Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. |
| I won’t ask any follow-up questions myself. |
| If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. |
| Context: {join(documents)}; |
| Question: {query} |
| Answer: |
| """, |
| output_parser=AnswerParser() |
| ) |
|
|
| |
| HF_TOKEN = os.getenv['HF_TOKEN'] |
|
|
| |
| prompt_node = PromptNode( |
| model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", |
| api_key=HF_TOKEN, |
| default_prompt_template=qa_template, |
| max_length=500, |
| model_kwargs={"model_max_length": 5000} |
| ) |
|
|
| |
| rag_pipeline = Pipeline() |
| rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) |
| rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) |
|
|
| |
| def run_streamlit_app(): |
| st.title("Mprofier - AI Assistant") |
| query_text = st.text_input("Enter your question:") |
| |
| if st.button("Get Answer"): |
| response = rag_pipeline.run(query=query_text) |
| answer = response["answers"][0].answer if response["answers"] else "No answer found." |
| st.write(answer) |
|
|
| |
| if __name__ == "__main__": |
| run_streamlit_app() |