import gradio as gr
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os

print("Files in current directory:", os.listdir())

# -----------------------------
# Load RAG components
# -----------------------------
embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

index = faiss.read_index("faiss_index.bin")
chunks = pickle.load(open("chunks.pkl", "rb"))
metadata = pickle.load(open("metadata.pkl", "rb"))

# -----------------------------
# Intent detection
# -----------------------------
def detect_query(query):
    query = query.lower()
    animal = None
    topic = None

    if "goat" in query:
        animal = "goat"
    elif "cow" in query:
        animal = "cow"

    if any(word in query for word in ["feed", "diet", "khilana"]):
        topic = "feeding"
    elif any(word in query for word in ["disease", "bimari"]):
        topic = "disease"

    return animal, topic

# -----------------------------
# Retrieve context (RAG)
# -----------------------------
def retrieve_context(query):
    animal, topic = detect_query(query)

    filtered_indices = []
    for i, meta in enumerate(metadata):
        if animal and meta["animal"] != animal:
            continue
        if topic and meta["topic"] != topic:
            continue
        filtered_indices.append(i)

    if not filtered_indices:
        filtered_indices = list(range(len(chunks)))

    query_embedding = embed_model.encode([query])
    filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
    distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
    top_indices = distances.argsort()[:2]

    context = ""
    for idx in top_indices:
        real_index = filtered_indices[idx]
        context += chunks[real_index] + "\n"

    return context.strip()

# -----------------------------
# Load Qwen model (CPU only, no accelerate)
# -----------------------------
model_name = "Qwen/Qwen2.5-1.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32  # CPU only
)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    do_sample=True,
    temperature=0.6,
    device=-1  # ensures CPU is used
)

print("LLM loaded successfully!")

# -----------------------------
# Chat function
# -----------------------------
def chat(user_input):
    context = retrieve_context(user_input)

    if not context:
        return "I don't know."

    prompt = f"""
You are a livestock expert assistant for goat and cows.

Use ONLY the information below to answer.
If answer is not present, say "I don't know".

Context:
{context}

Question:
{user_input}

Answer in short and clear sentences.
"""
    response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
    text = response[0]["generated_text"]

    # Remove prompt if repeated
    if prompt.strip() in text:
        text = text.split(prompt.strip())[-1].strip()

    return text

# -----------------------------
# Gradio UI
# -----------------------------
gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Livestock Chatbot (RAG + Qwen)",
    description="This chatbot answers livestock questions using RAG retrieval and Qwen model generation."
).launch()