Goated121 commited on
Commit
e540d02
·
verified ·
1 Parent(s): 91b4de2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -68,30 +68,27 @@ def retrieve_context(query):
68
  return context.strip()
69
 
70
  # -----------------------------
71
- # Load FAST model (CPU friendly)
72
  # -----------------------------
73
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
74
-
75
- print("Loading fast model...")
76
 
77
  tokenizer = AutoTokenizer.from_pretrained(model_name)
78
-
79
  model = AutoModelForCausalLM.from_pretrained(
80
  model_name,
81
- torch_dtype=torch.float32
82
  )
83
 
84
  generator = pipeline(
85
  "text-generation",
86
  model=model,
87
  tokenizer=tokenizer,
88
- max_new_tokens=120,
89
  do_sample=True,
90
  temperature=0.6,
91
- device=-1 # CPU
92
  )
93
 
94
- print("Fast LLM loaded successfully!")
95
 
96
  # -----------------------------
97
  # Chat function
@@ -99,10 +96,6 @@ print("Fast LLM loaded successfully!")
99
  def chat(user_input):
100
  context = retrieve_context(user_input)
101
 
102
- # ⚡ Instant response if context is already short
103
- if context and len(context.split()) < 50:
104
- return context.strip()
105
-
106
  if not context:
107
  return "I don't know."
108
 
@@ -120,8 +113,7 @@ Question:
120
 
121
  Answer in short and clear sentences.
122
  """
123
-
124
- response = generator(prompt)
125
  text = response[0]["generated_text"]
126
 
127
  # Remove prompt if repeated
@@ -137,6 +129,6 @@ gr.Interface(
137
  fn=chat,
138
  inputs="text",
139
  outputs="text",
140
- title="Livestock Chatbot (RAG + Fast LLM)",
141
- description="Fast chatbot using RAG + TinyLlama (optimized for CPU)"
142
  ).launch()
 
68
  return context.strip()
69
 
70
  # -----------------------------
71
+ # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
73
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
 
 
74
 
75
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
76
  model = AutoModelForCausalLM.from_pretrained(
77
  model_name,
78
+ torch_dtype=torch.float32 # CPU only
79
  )
80
 
81
  generator = pipeline(
82
  "text-generation",
83
  model=model,
84
  tokenizer=tokenizer,
85
+ max_new_tokens=150,
86
  do_sample=True,
87
  temperature=0.6,
88
+ device=-1 # ensures CPU is used
89
  )
90
 
91
+ print("LLM loaded successfully!")
92
 
93
  # -----------------------------
94
  # Chat function
 
96
  def chat(user_input):
97
  context = retrieve_context(user_input)
98
 
 
 
 
 
99
  if not context:
100
  return "I don't know."
101
 
 
113
 
114
  Answer in short and clear sentences.
115
  """
116
+ response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
 
117
  text = response[0]["generated_text"]
118
 
119
  # Remove prompt if repeated
 
129
  fn=chat,
130
  inputs="text",
131
  outputs="text",
132
+ title="Livestock Chatbot (RAG + Qwen)",
133
+ description="This chatbot answers livestock questions using RAG retrieval and Qwen model generation."
134
  ).launch()