Spaces:
Sleeping
Sleeping
| # Standard imports | |
| import os | |
| import sys | |
| import warnings | |
| # LangChain community imports (Updated for v0.2+) | |
| from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader | |
| from langchain_community.embeddings import OpenAIEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.llms import OpenAI | |
| from langchain_community.tools import DuckDuckGoSearchRun | |
| # Other imports (you may have these depending on use) | |
| from langchain.chains import RetrievalQA | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.schema import Document | |
| # Add your environment key for OpenAI if required | |
| os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-api-key-here") | |
| def load_documents(directory: str): | |
| """Loads documents from a directory using supported loaders.""" | |
| docs = [] | |
| for filename in os.listdir(directory): | |
| filepath = os.path.join(directory, filename) | |
| if filename.endswith(".txt"): | |
| loader = TextLoader(filepath) | |
| elif filename.endswith(".pdf"): | |
| loader = PyPDFLoader(filepath) | |
| elif filename.endswith(".csv"): | |
| loader = CSVLoader(filepath) | |
| else: | |
| continue | |
| docs.extend(loader.load()) | |
| return docs | |
| def build_vector_store(docs): | |
| """Build FAISS index from documents using OpenAI embeddings.""" | |
| embeddings = OpenAIEmbeddings() | |
| return FAISS.from_documents(docs, embeddings) | |
| def build_qa_chain(vectorstore): | |
| """Create a RetrievalQA chain from the vector store.""" | |
| retriever = vectorstore.as_retriever() | |
| llm = OpenAI(temperature=0) | |
| return RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| def main(): | |
| # Load and process data | |
| data_path = "data/" # Change to your actual directory | |
| print("[INFO] Loading documents...") | |
| documents = load_documents(data_path) | |
| print("[INFO] Splitting text...") | |
| splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| split_docs = splitter.split_documents(documents) | |
| print("[INFO] Creating vector store...") | |
| vectorstore = build_vector_store(split_docs) | |
| print("[INFO] Building QA chain...") | |
| qa_chain = build_qa_chain(vectorstore) | |
| print("\n[READY] Ask questions (type 'exit' to quit):\n") | |
| while True: | |
| question = input("Q: ") | |
| if question.lower() in ["exit", "quit"]: | |
| print("Goodbye!") | |
| break | |
| answer = qa_chain.run(question) | |
| print("A:", answer) | |
| # Main entry point | |
| if __name__ == "__main__": | |
| warnings.filterwarnings("ignore", category=DeprecationWarning) | |
| main() | |