import os
import gradio as gr

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters.sentence_transformers import SentenceTransformersTokenTextSplitter
from langchain_chroma import Chroma


# ==============================
# CONFIG
# ==============================
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN", "")

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY not found in environment variables")

DATASET_PATH = "dataset.pdf"
PERSIST_DIR = "pharma_db"

os.makedirs(PERSIST_DIR, exist_ok=True)


# ==============================
# EMBEDDINGS (FASTER MODEL)
# ==============================
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


# ==============================
# VECTOR DB
# ==============================
db = Chroma(
    persist_directory=PERSIST_DIR,
    embedding_function=embeddings
)


# ==============================
# LOAD & INDEX PDF
# ==============================
if os.path.exists(DATASET_PATH):

    # Only index if DB empty
    if len(db.get()["ids"]) == 0:
        print("Indexing PDF...")

        loader = PyPDFLoader(DATASET_PATH)
        documents = loader.load()

        splitter = SentenceTransformersTokenTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )

        chunks = splitter.split_documents(documents)
        db.add_documents(chunks)

        print("✅ PDF indexed.")

else:
    print("⚠️ PDF not found in repo.")
    

# ==============================
# PROMPT
# ==============================
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are 'Dr MomAI Assistant', a specialized medical AI expert focused on mom and baby.
    GUIDELINES:
    1. INTERACTIVE GREETINGS: If the user greets you (e.g., "Hi", "Hello", "Who are you?"), respond politely, introduce yourself as Dr Mom AI Assistant, and explain that you are here to help them understand information.
    2. CONTEXTUAL ACCURACY: For all medical or factual questions, prioritize the information provided in the 'Context' section below.
    3. STRICTNESS: If the question is medical in nature but the answer is NOT found in the context, explicitly state something like this: "I'm sorry, but that specific information is not available in my current medical knowledge." 
    4. TONE: Maintain a professional, empathetic, and clinical tone. Use bullet points for complex medical explanations to ensure clarity.
    Context:
    {context}"""),
    ("human", "{question}")
])

output_parser = StrOutputParser()


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# ==============================
# RAG QUERY
# ==============================
def run_query(question):

    if not question.strip():
        return "Please enter a question."

    retriever = db.as_retriever(search_kwargs={"k": 5})

    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        api_key=GROQ_API_KEY,
        temperature=0
    )

    rag_chain = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough(),
        }
        | prompt
        | llm
        | output_parser
    )

    return rag_chain.invoke(question)


# ==============================
# GRADIO UI
# ==============================
interface = gr.Interface(
    fn=run_query,
    inputs=gr.Textbox(
        label="Question",
        placeholder="Ask me something..."
    ),
    outputs=gr.Textbox(
        label="Response",
        lines=10
    ),
    title="Your Assistant",
    description="Ask questions"
)

interface.launch()