MINZO4546 commited on
Commit
4ce55c5
·
verified ·
1 Parent(s): b8bfd06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -51
app.py CHANGED
@@ -1,71 +1,42 @@
1
- import torch
2
  import os
3
- import requests
4
  from fastapi import FastAPI
5
- from transformers import pipeline
6
-
7
- # 🔱 CPU Core Management: Stop 99% CPU Usage
8
- # HF Free Space එකක සාමාන්‍යයෙන් CPU Cores 2ක් තියෙන නිසා අපි 2කට සීමා කරමු
9
- os.environ["OMP_NUM_THREADS"] = "2"
10
- os.environ["MKL_NUM_THREADS"] = "2"
11
- torch.set_num_threads(2)
12
-
13
- main = FastAPI()
14
 
15
- # 🔱 Inachi Identity Settings
16
- MODEL_ID = "google/gemma-3-1b-it"
17
 
18
- # 🔱 Optimized Pipeline
19
- # bfloat16 පාවිච්චි කිරීමෙන් RAM සහ CPU මතකය ඉතිරි වේ
20
- pipe = pipeline(
21
- "text-generation",
22
- model=MODEL_ID,
23
- device_map="cpu",
24
- torch_dtype=torch.bfloat16,
25
- trust_remote_code=True
26
  )
27
 
28
- def web_search(query):
29
- try:
30
- # Simple DuckDuckGo API for search context
31
- url = f"https://api.duckduckgo.com/?q={query}&format=json"
32
- response = requests.get(url, timeout=5).json()
33
- return response.get("AbstractText", "No specific data found.")
34
- except:
35
- return "Search unavailable."
36
 
37
  @main.post("/v1/chat")
38
  async def chat(data: dict):
39
  user_query = data.get("message", "")
40
 
41
- # 🔱 System Identity: Developed by Inachi Team
42
- system_prompt = (
43
- "You are Inachi AI, a highly advanced assistant developed by the Inachi Team. "
44
- "You are an expert in system architecture and web development. "
45
- "Always identify as Inachi AI."
46
- )
47
 
48
- # Search logic
49
- search_context = ""
50
- if "search" in user_query.lower():
51
- search_context = f"\nWeb Context: {web_search(user_query)}"
52
-
53
- # Prompt construction
54
- full_prompt = f"{system_prompt}\n{search_context}\nUser: {user_query}\nInachi AI:"
55
 
56
- # 🔱 Inference with limited tokens for speed
57
- results = pipe(
58
- full_prompt,
59
- max_new_tokens=512,
60
- do_sample=True,
61
- temperature=0.7,
62
- top_p=0.9
63
  )
64
 
65
- reply = results[0]['generated_text'].split("Inachi AI:")[-1].strip()
66
  return {"reply": reply}
67
 
68
  if __name__ == "__main__":
69
  import uvicorn
70
- # HF Spaces uses port 7860 by default
71
  uvicorn.run(main, host="0.0.0.0", port=7860)
 
 
1
  import os
 
2
  from fastapi import FastAPI
3
+ from llama_cpp import Llama
4
+ import requests
 
 
 
 
 
 
 
5
 
6
+ # 🔱 CPU Core Management
7
+ threads = int(os.cpu_count() or 2)
8
 
9
+ # 🔱 Load Model (GGUF Version is best for CPU)
10
+ # Google Gemma 3 1B IT - GGUF format
11
+ llm = Llama(
12
+ model_path="google/gemma-3-1b-it", # Hugging Face විසින් auto load කරයි හෝ path එක ලබා දෙන්න
13
+ n_ctx=2048,
14
+ n_threads=threads,
15
+ verbose=False
 
16
  )
17
 
18
+ main = FastAPI()
 
 
 
 
 
 
 
19
 
20
  @main.post("/v1/chat")
21
  async def chat(data: dict):
22
  user_query = data.get("message", "")
23
 
24
+ # 🔱 Inachi Identity Prompt
25
+ system_instr = "You are Inachi AI, developed by the Inachi Team. Focus on tech and architecture."
 
 
 
 
26
 
27
+ prompt = f"<bos><start_of_turn>system\n{system_instr}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
 
 
 
28
 
29
+ # 🔱 Efficient Generation
30
+ output = llm(
31
+ prompt,
32
+ max_tokens=512,
33
+ stop=["<end_of_turn>"],
34
+ echo=False
 
35
  )
36
 
37
+ reply = output['choices'][0]['text'].strip()
38
  return {"reply": reply}
39
 
40
  if __name__ == "__main__":
41
  import uvicorn
 
42
  uvicorn.run(main, host="0.0.0.0", port=7860)