Spaces:

Goated121
/

ChatBot

Sleeping

Goated121 commited on 24 days ago

Commit

d35835a

verified ·

1 Parent(s): ccff6ac

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,47 +70,24 @@ def retrieve_context(query):
 # -----------------------------
 # Load Qwen model (CPU only, no accelerate)
 # -----------------------------
-import os
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# 1. Access the token from Space Secrets
-# Make sure you've added "HF_TOKEN" in your Space Settings > Variables and Secrets
-hf_token = os.getenv("HF_TOKEN")
-# 2. Use a confirmed model path (Qwen2.5-1.5B or Qwen2.5-0.5B are highly reliable)
-# If you are certain about 3.5, ensure the spelling matches the HF Repo exactly.
-model_name = "Qwen/Qwen2.5-0.5B-Instruct"
-# 3. Load Tokenizer with authentication
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    token=hf_token
-)
-# 4. Load Model with authentication
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    token=hf_token,
-    torch_dtype=torch.float32,  # Optimized for CPU
-    device_map="cpu"            # Explicitly force CPU
 )
-# 5. Setup Pipeline
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     do_sample=True,
-    temperature=0.6
 )
-# Usage Example:
-# result = generator("How do I run a Flutter project?")
-# print(result[0]['generated_text'])
 print("LLM loaded successfully!")
 # -----------------------------

 # -----------------------------
 # Load Qwen model (CPU only, no accelerate)
 # -----------------------------
+model_name = "Qwen/Qwen2.5-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float32  # CPU only
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     do_sample=True,
+    temperature=0.6,
+    device=-1  # ensures CPU is used
 )
 print("LLM loaded successfully!")
 # -----------------------------