Spaces:

Goated121
/

ChatBot

Sleeping

App Files Files Community

Goated121 commited on 24 days ago

Commit

91b4de2

verified ·

1 Parent(s): 81026e1

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -57

app.py CHANGED Viewed

@@ -5,15 +5,15 @@ import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # -----------------------------
-# Load embedding model (for RAG)
 # -----------------------------
 embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
-# -----------------------------
-# Load FAISS + data
-# -----------------------------
 index = faiss.read_index("faiss_index.bin")
 chunks = pickle.load(open("chunks.pkl", "rb"))
 metadata = pickle.load(open("metadata.pkl", "rb"))
@@ -56,10 +56,8 @@ def retrieve_context(query):
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
     filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
     top_indices = distances.argsort()[:2]
     context = ""
@@ -70,52 +68,49 @@ def retrieve_context(query):
     return context.strip()
 # -----------------------------
-# Load Qwen model (CPU SAFE)
 # -----------------------------
-MODEL_NAME = "Qwen/Qwen3.5-0.8B"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float32   # CPU safe
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    max_new_tokens=150,
     do_sample=True,
-    temperature=0.6
 )
-print("Model loaded successfully!")
 # -----------------------------
-# Chat function (RAG + LLM)
 # -----------------------------
-def chat_fn(user_input, history):
-    if history is None:
-        history = []
     context = retrieve_context(user_input)
-    # No context
-    if not context:
-        response = "I don't know."
-    else:
-        # Small context → direct RAG
-        if len(context.split()) < 50:
-            response = context.strip()
-        else:
-            prompt = f"""
-You are a livestock expert assistant for goats and cows.
 Use ONLY the information below to answer.
-If the answer is not present, say "I don't know".
 Context:
 {context}
@@ -126,36 +121,22 @@ Question:
 Answer in short and clear sentences.
 """
-            output = generator(
-                prompt,
-                max_new_tokens=120,   # ✅ remove max_length warning
-                do_sample=True,
-                temperature=0.6
-            )
-            text = output[0]["generated_text"]
-            if prompt.strip() in text:
-                text = text.split(prompt.strip())[-1].strip()
-            response = text
-    # ✅ FIXED FORMAT (IMPORTANT)
-    history.append({"role": "user", "content": user_input})
-    history.append({"role": "assistant", "content": response})
-    return history
 # -----------------------------
 # Gradio UI
 # -----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🐐 Livestock Chatbot (RAG + Qwen)")
-    chatbot = gr.Chatbot(type="messages")  # ✅ REQUIRED
-    msg = gr.Textbox()
-    btn = gr.Button("Send")
-    btn.click(chat_fn, [msg, chatbot], chatbot)
-demo.launch()

 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+import os
+print("Files in current directory:", os.listdir())
 # -----------------------------
+# Load RAG components
 # -----------------------------
 embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
 index = faiss.read_index("faiss_index.bin")
 chunks = pickle.load(open("chunks.pkl", "rb"))
 metadata = pickle.load(open("metadata.pkl", "rb"))
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
     filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
     top_indices = distances.argsort()[:2]
     context = ""
     return context.strip()
 # -----------------------------
+# Load FAST model (CPU friendly)
 # -----------------------------
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+print("Loading fast model...")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float32
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    max_new_tokens=120,
     do_sample=True,
+    temperature=0.6,
+    device=-1  # CPU
 )
+print("Fast LLM loaded successfully!")
 # -----------------------------
+# Chat function
 # -----------------------------
+def chat(user_input):
     context = retrieve_context(user_input)
+    # ⚡ Instant response if context is already short
+    if context and len(context.split()) < 50:
+        return context.strip()
+    if not context:
+        return "I don't know."
+    prompt = f"""
+You are a livestock expert assistant for goat and cows.
 Use ONLY the information below to answer.
+If answer is not present, say "I don't know".
 Context:
 {context}
 Answer in short and clear sentences.
 """
+    response = generator(prompt)
+    text = response[0]["generated_text"]
+    # Remove prompt if repeated
+    if prompt.strip() in text:
+        text = text.split(prompt.strip())[-1].strip()
+    return text
 # -----------------------------
 # Gradio UI
 # -----------------------------
+gr.Interface(
+    fn=chat,
+    inputs="text",
+    outputs="text",
+    title="Livestock Chatbot (RAG + Fast LLM)",
+    description="Fast chatbot using RAG + TinyLlama (optimized for CPU)"
+).launch()