FD900's picture
Update app.py
8b1008c verified
raw
history blame
2.62 kB
# Standard imports
import os
import sys
import warnings
# LangChain community imports (Updated for v0.2+)
from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import OpenAI
from langchain_community.tools import DuckDuckGoSearchRun
# Other imports (you may have these depending on use)
from langchain.chains import RetrievalQA
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
# Add your environment key for OpenAI if required
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-api-key-here")
def load_documents(directory: str):
"""Loads documents from a directory using supported loaders."""
docs = []
for filename in os.listdir(directory):
filepath = os.path.join(directory, filename)
if filename.endswith(".txt"):
loader = TextLoader(filepath)
elif filename.endswith(".pdf"):
loader = PyPDFLoader(filepath)
elif filename.endswith(".csv"):
loader = CSVLoader(filepath)
else:
continue
docs.extend(loader.load())
return docs
def build_vector_store(docs):
"""Build FAISS index from documents using OpenAI embeddings."""
embeddings = OpenAIEmbeddings()
return FAISS.from_documents(docs, embeddings)
def build_qa_chain(vectorstore):
"""Create a RetrievalQA chain from the vector store."""
retriever = vectorstore.as_retriever()
llm = OpenAI(temperature=0)
return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
def main():
# Load and process data
data_path = "data/" # Change to your actual directory
print("[INFO] Loading documents...")
documents = load_documents(data_path)
print("[INFO] Splitting text...")
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = splitter.split_documents(documents)
print("[INFO] Creating vector store...")
vectorstore = build_vector_store(split_docs)
print("[INFO] Building QA chain...")
qa_chain = build_qa_chain(vectorstore)
print("\n[READY] Ask questions (type 'exit' to quit):\n")
while True:
question = input("Q: ")
if question.lower() in ["exit", "quit"]:
print("Goodbye!")
break
answer = qa_chain.run(question)
print("A:", answer)
# Main entry point
if __name__ == "__main__":
warnings.filterwarnings("ignore", category=DeprecationWarning)
main()