Spaces:

Goated121
/

ChatBot

Sleeping

App Files Files Community

Goated121 commited on 24 days ago

Commit

625127c

verified ·

1 Parent(s): 28ffb6e

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -68,14 +68,16 @@ def retrieve_context(query):
     return context.strip()
 # -----------------------------
-# Load Qwen model (CPU only, no accelerate)
 # -----------------------------
-model_name = "meta-llama/Llama-3.2-1B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float32  # CPU-friendly
 )
 generator = pipeline(
     "text-generation",
     model=model,
@@ -83,7 +85,7 @@ generator = pipeline(
     max_new_tokens=150,
     do_sample=True,
     temperature=0.6,
-    device=-1  # CPU
 )
 print("LLM loaded successfully!")
@@ -93,15 +95,14 @@ print("LLM loaded successfully!")
 # -----------------------------
 def chat(user_input):
     context = retrieve_context(user_input)
     if not context:
         return "I don't know."
     prompt = f"""
-You are a livestock expert assistant for goat and cows.
 Use ONLY the information below to answer.
-If answer is not present, say "I don't know".
 Context:
 {context}
@@ -111,7 +112,7 @@ Question:
 Answer in short and clear sentences.
 """
-    response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
     text = response[0]["generated_text"]
     # Remove prompt if repeated
@@ -127,6 +128,6 @@ gr.Interface(
     fn=chat,
     inputs="text",
     outputs="text",
-    title="Livestock Chatbot (RAG + Qwen)",
-    description="This chatbot answers livestock questions using RAG retrieval and Qwen model generation."
 ).launch()

     return context.strip()
 # -----------------------------
+# Load Qwen3.5-0.8B (CPU optimized)
 # -----------------------------
+model_name = "Qwen/Qwen3.5-0.8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float32  # CPU-friendly
 )
 generator = pipeline(
     "text-generation",
     model=model,
     max_new_tokens=150,
     do_sample=True,
     temperature=0.6,
+    device=-1  # CPU only
 )
 print("LLM loaded successfully!")
 # -----------------------------
 def chat(user_input):
     context = retrieve_context(user_input)
     if not context:
         return "I don't know."
     prompt = f"""
+You are a livestock expert assistant for goats and cows.
 Use ONLY the information below to answer.
+If the answer is not present, say "I don't know".
 Context:
 {context}
 Answer in short and clear sentences.
 """
+    response = generator(prompt)
     text = response[0]["generated_text"]
     # Remove prompt if repeated
     fn=chat,
     inputs="text",
     outputs="text",
+    title="Livestock Chatbot (RAG + Qwen3.5-0.8B)",
+    description="This chatbot answers livestock questions using RAG retrieval and Qwen3.5-0.8B model generation (CPU optimized)."
 ).launch()