Goated121 commited on
Commit
d35835a
·
verified ·
1 Parent(s): ccff6ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -28
app.py CHANGED
@@ -70,47 +70,24 @@ def retrieve_context(query):
70
  # -----------------------------
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
 
73
 
74
- import os
75
- import torch
76
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
77
-
78
- # 1. Access the token from Space Secrets
79
- # Make sure you've added "HF_TOKEN" in your Space Settings > Variables and Secrets
80
- hf_token = os.getenv("HF_TOKEN")
81
-
82
- # 2. Use a confirmed model path (Qwen2.5-1.5B or Qwen2.5-0.5B are highly reliable)
83
- # If you are certain about 3.5, ensure the spelling matches the HF Repo exactly.
84
- model_name = "Qwen/Qwen2.5-0.5B-Instruct"
85
-
86
- # 3. Load Tokenizer with authentication
87
- tokenizer = AutoTokenizer.from_pretrained(
88
- model_name,
89
- token=hf_token
90
- )
91
-
92
- # 4. Load Model with authentication
93
  model = AutoModelForCausalLM.from_pretrained(
94
  model_name,
95
- token=hf_token,
96
- torch_dtype=torch.float32, # Optimized for CPU
97
- device_map="cpu" # Explicitly force CPU
98
  )
99
 
100
- # 5. Setup Pipeline
101
  generator = pipeline(
102
  "text-generation",
103
  model=model,
104
  tokenizer=tokenizer,
105
  max_new_tokens=150,
106
  do_sample=True,
107
- temperature=0.6
 
108
  )
109
 
110
- # Usage Example:
111
- # result = generator("How do I run a Flutter project?")
112
- # print(result[0]['generated_text'])
113
-
114
  print("LLM loaded successfully!")
115
 
116
  # -----------------------------
 
70
  # -----------------------------
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
73
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
74
 
75
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  model = AutoModelForCausalLM.from_pretrained(
77
  model_name,
78
+ torch_dtype=torch.float32 # CPU only
 
 
79
  )
80
 
 
81
  generator = pipeline(
82
  "text-generation",
83
  model=model,
84
  tokenizer=tokenizer,
85
  max_new_tokens=150,
86
  do_sample=True,
87
+ temperature=0.6,
88
+ device=-1 # ensures CPU is used
89
  )
90
 
 
 
 
 
91
  print("LLM loaded successfully!")
92
 
93
  # -----------------------------