minzo456 commited on
Commit
6767b71
·
verified ·
1 Parent(s): d084fd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -14,9 +14,14 @@ app.add_middleware(
14
  allow_headers=["*"],
15
  )
16
 
17
- # 🔱 Loading local engine (SmolLM2 - Very fast and smart)
18
  print("🔱 Initializing Internal Neural Core...")
19
- pipe = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto")
 
 
 
 
 
20
  print("🔱 Engine Ready!")
21
 
22
  def web_search(query):
@@ -31,7 +36,6 @@ async def generate(request: Request):
31
  try:
32
  data = await request.json()
33
  user_prompt = data.get("prompt")
34
-
35
  context = web_search(user_prompt)
36
 
37
  messages = [
@@ -40,9 +44,13 @@ async def generate(request: Request):
40
  ]
41
 
42
  prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
43
- outputs = pipe(prompt, max_new_tokens=500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
 
 
 
 
44
 
45
- return {"response": outputs[0]["generated_text"].split("<|im_start|>assistant")[-1].strip()}
46
 
47
  except Exception as e:
48
  return {"error": str(e)}
 
14
  allow_headers=["*"],
15
  )
16
 
17
+ # 🔱 Loading local engine (SmolLM2 - Stable Version)
18
  print("🔱 Initializing Internal Neural Core...")
19
+ pipe = pipeline(
20
+ "text-generation",
21
+ model="HuggingFaceTB/SmolLM2-1.7B-Instruct",
22
+ device_map="auto",
23
+ torch_dtype=torch.bfloat16 # RAM එක ඉතිරි කර ගැනීමට
24
+ )
25
  print("🔱 Engine Ready!")
26
 
27
  def web_search(query):
 
36
  try:
37
  data = await request.json()
38
  user_prompt = data.get("prompt")
 
39
  context = web_search(user_prompt)
40
 
41
  messages = [
 
44
  ]
45
 
46
  prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
47
+ outputs = pipe(prompt, max_new_tokens=500, do_sample=True, temperature=0.7)
48
+
49
+ # පිළිතුර පමණක් වෙන් කර ගැනීම
50
+ generated_text = outputs[0]["generated_text"]
51
+ response = generated_text.split("<|im_start|>assistant")[-1].strip()
52
 
53
+ return {"response": response}
54
 
55
  except Exception as e:
56
  return {"error": str(e)}