Spaces:

sumitrwk
/

omnirouter-api

Sleeping

File size: 3,046 Bytes

b534a53

from langchain_openai import ChatOpenAI
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain.chains.history_aware_retriever import create_history_aware_retriever
# from langchain.chains.retrieval import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from src.rag.vector_store import get_vector_store

def build_doc_assistant(api_key: str):
    """

    Constructs the conversational RAG pipeline.

    """
    # 1. Initialize our LLM (temperature=0 because we want factual answers, not creative ones)
    llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo", temperature=0)
    
    # 2. Connect to our Vector DB (k=2 means return the top 2 most relevant chunks)
    retriever = get_vector_store(api_key).as_retriever(search_kwargs={"k": 2})

    # ==========================================
    # STEP 1: The "Question Reformulation" Prompt
    # ==========================================
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages([
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"), # Injects our memory here
        ("human", "{input}"),
    ])
    
    # This chain automatically handles rewriting the query before searching
    history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

    # ==========================================
    # STEP 2: The "Final Answer" Prompt
    # ==========================================
    system_prompt = (
        "You are an elite AI Engineering Assistant. "
        "Use the following pieces of retrieved context to answer the question. "
        "If the answer is not contained in the context, say 'I don't know based on the documentation.' "
        "Do not make up an answer. Keep it concise.\n\n"
        "Context: {context}"
    )
    qa_prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ])
    
    # This chain handles injecting the retrieved chunks into the {context} variable
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

    # ==========================================
    # STEP 3: Tie it all together
    # ==========================================
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
    return rag_chain