| |
|
|
| import os |
| import chromadb |
| from dotenv import load_dotenv |
|
|
| from langchain_together import TogetherEmbeddings, ChatTogether |
| from langchain.vectorstores import Chroma |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain_core.output_parsers import StrOutputParser |
| from langchain_core.runnables import RunnablePassthrough |
| from langchain.chains import create_retrieval_chain |
| from langchain.chains.combine_documents import create_stuff_documents_chain |
| from langchain_core.documents import Document |
|
|
| |
| load_dotenv() |
|
|
| |
| db_directory = "database" |
| db_path = os.path.join(db_directory, "processed_documents.db") |
| VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma") |
| collection_name = "my_instrument_manual_chunks" |
|
|
| |
| together_api_key = os.getenv("TOGETHER_API_KEY") |
| if not together_api_key: |
| |
| raise ValueError("TOGETHER_API_KEY environment variable not set. Please set it in your .env file.") |
|
|
| |
| try: |
| llm = ChatTogether( |
| model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", |
| temperature=0.3, |
| api_key=together_api_key |
| ) |
| print("LLM (Together AI) Instantiation succeeded.") |
| except Exception as e: |
| llm = None |
| print(f"Error instantiating LLM: {e}") |
| print("Please check your TOGETHER_API_KEY and model name.") |
|
|
| |
| embeddings_model_name = "intfloat/multilingual-e5-large-instruct" |
|
|
| try: |
| retriever_embeddings = TogetherEmbeddings( |
| model=embeddings_model_name, |
| api_key=together_api_key |
| ) |
|
|
| |
| client = chromadb.PersistentClient(path=VECTOR_DB_DIR) |
| vectorstore = Chroma( |
| client=client, |
| collection_name=collection_name, |
| embedding_function=retriever_embeddings |
| ) |
| |
| if vectorstore._collection.count() == 0: |
| print(f"Warning: ChromaDB collection '{collection_name}' is empty. Please run 'python setup_knowledge_base.py' to populate the knowledge base.") |
| |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) |
| print(f"Retriever (ChromaDB) Instantiation succeeded, retrieving {retriever.search_kwargs['k']} chunks.") |
|
|
| except Exception as e: |
| retriever = None |
| print(f"Error setting up ChromaDB retriever: {e}") |
| print("Please ensure you have run 'python setup_knowledge_base.py' to create and populate the vector database.") |
|
|
| |
| answer_prompt = """ |
| You are a professional HPLC instrument troubleshooting expert who specializes in helping junior researchers and students. |
| Your task is to answer the user's troubleshooting questions in detail and clearly based on the HPLC instrument knowledge provided below. |
| If there is no direct answer in the knowledge, please provide the most reasonable speculative suggestions based on your expert judgment, or ask further clarifying questions. |
| Please ensure that your answers are logically clear, easy to understand, and directly address the user's questions. |
| """ |
|
|
| |
| rag_chain = None |
| if llm and retriever: |
| try: |
| |
| def format_docs(docs): |
| return "\n\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content')) |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| prompt = ChatPromptTemplate.from_messages([ |
| ("system", answer_prompt + "\n\nContext:\n{context}"), |
| ("user", "{question}"), |
| ]) |
| print("Prompt Template build success.") |
|
|
| |
| rag_chain = ( |
| { |
| "context": retriever | format_docs, |
| "question": RunnablePassthrough() |
| } |
| | prompt |
| | llm |
| | StrOutputParser() |
| ) |
| print("RAG LangChain assemble success.") |
| except Exception as e: |
| rag_chain = None |
| print(f"Error assembling RAG LangChain: {e}") |
| print("Please check previous setup steps for LLM and Retriever.") |
| else: |
| print("RAG LangChain could not be assembled due to LLM or Retriever initialization failure.") |
|
|
| |
| if __name__ == "__main__": |
| print("--- Running main.py for direct test ---") |
| if rag_chain: |
| |
| question_1 = "What are the steps for instrument calibration?" |
| print(f"\n--- Executing query: {question_1} ---") |
| response_1 = rag_chain.invoke(question_1) |
| print("\nLLM's Answer:") |
| print(response_1) |
|
|
| print("\n--- Executing another query ---") |
| question_2 = "How do I troubleshoot common equipment malfunctions?" |
| response_2 = rag_chain.invoke(question_2) |
| print("\nLLM's Answer:") |
| print(response_2) |
|
|
| print("\n--- Executing a query that might not have an answer ---") |
| question_3 = "How to make Tiramisu cake?" |
| response_3 = rag_chain.invoke(question_3) |
| print("\nLLM's Answer:") |
| print(response_3) |
| else: |
| print("RAG chain is not available for testing. Please ensure 'setup_knowledge_base.py' has been run successfully and check API key/model setup in main.py.") |