Spaces:

Goated121
/

ChatBot

Sleeping

App Files Files Community

Goated121 commited on 24 days ago

Commit

ce0c959

verified ·

1 Parent(s): f888dd3

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -42

app.py CHANGED Viewed

@@ -13,33 +13,11 @@ print("Files in current directory:", os.listdir())
 # Load RAG components
 # -----------------------------
 embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
 index = faiss.read_index("faiss_index.bin")
 chunks = pickle.load(open("chunks.pkl", "rb"))
 metadata = pickle.load(open("metadata.pkl", "rb"))
-# -----------------------------
-# Load Qwen 2.5B Instruct model
-# -----------------------------
-model_name = "Qwen/Qwen2.5-1.5B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-)
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=200,
-    do_sample=True,
-    temperature=0.6
-)
-print("Qwen model loaded successfully!")
 # -----------------------------
 # Intent detection
 # -----------------------------
@@ -63,40 +41,69 @@ def detect_query(query):
 # -----------------------------
 # Retrieve context (RAG)
 # -----------------------------
-def retrieve_context(query, top_k=2):
     animal, topic = detect_query(query)
-    filtered_indices = [
-        i for i, meta in enumerate(metadata)
-        if (not animal or meta["animal"] == animal) and
-           (not topic or meta["topic"] == topic)
-    ]
     if not filtered_indices:
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
     filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
-    top_indices = distances.argsort()[:top_k]
-    context = "\n".join(chunks[filtered_indices[idx]] for idx in top_indices)
     return context.strip()
 # -----------------------------
-# Chat function (RAG + Qwen)
 # -----------------------------
 def chat(user_input):
     context = retrieve_context(user_input)
     if not context:
         return "I don't know."
     prompt = f"""
 You are a livestock expert assistant.
-Use ONLY the information below to answer the question.
-If the answer is not present, say "I don't know".
 Context:
 {context}
@@ -104,13 +111,12 @@ Context:
 Question:
 {user_input}
-Answer in full, clear sentences.
 """
-    response = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.6)
     text = response[0]["generated_text"]
-    # Remove prompt repetition
     if prompt.strip() in text:
         text = text.split(prompt.strip())[-1].strip()
@@ -121,8 +127,8 @@ Answer in full, clear sentences.
 # -----------------------------
 gr.Interface(
     fn=chat,
-    inputs=gr.Textbox(lines=2, placeholder="Ask a question about livestock..."),
-    outputs=gr.Textbox(),
     title="Livestock Chatbot (RAG + Qwen)",
-    description="This chatbot answers livestock questions using retrieved data and Qwen Instruct model."
 ).launch()

 # Load RAG components
 # -----------------------------
 embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
 index = faiss.read_index("faiss_index.bin")
 chunks = pickle.load(open("chunks.pkl", "rb"))
 metadata = pickle.load(open("metadata.pkl", "rb"))
 # -----------------------------
 # Intent detection
 # -----------------------------
 # -----------------------------
 # Retrieve context (RAG)
 # -----------------------------
+def retrieve_context(query):
     animal, topic = detect_query(query)
+    filtered_indices = []
+    for i, meta in enumerate(metadata):
+        if animal and meta["animal"] != animal:
+            continue
+        if topic and meta["topic"] != topic:
+            continue
+        filtered_indices.append(i)
     if not filtered_indices:
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
     filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
+    top_indices = distances.argsort()[:2]
+    context = ""
+    for idx in top_indices:
+        real_index = filtered_indices[idx]
+        context += chunks[real_index] + "\n"
     return context.strip()
 # -----------------------------
+# Load Qwen model (CPU only, no accelerate)
+# -----------------------------
+model_name = "Qwen/Qwen2.5-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float32  # CPU only
+)
+generator = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=150,
+    do_sample=True,
+    temperature=0.6,
+    device=-1  # ensures CPU is used
+)
+print("LLM loaded successfully!")
+# -----------------------------
+# Chat function
 # -----------------------------
 def chat(user_input):
     context = retrieve_context(user_input)
     if not context:
         return "I don't know."
     prompt = f"""
 You are a livestock expert assistant.
+Use ONLY the information below to answer.
+If answer is not present, say "I don't know".
 Context:
 {context}
 Question:
 {user_input}
+Answer in short and clear sentences.
 """
+    response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
     text = response[0]["generated_text"]
+    # Remove prompt if repeated
     if prompt.strip() in text:
         text = text.split(prompt.strip())[-1].strip()
 # -----------------------------
 gr.Interface(
     fn=chat,
+    inputs="text",
+    outputs="text",
     title="Livestock Chatbot (RAG + Qwen)",
+    description="This chatbot answers livestock questions using RAG retrieval and Qwen model generation."
 ).launch()