MINZO4546 commited on
Commit
7d813f4
·
verified ·
1 Parent(s): db3f7fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -3,13 +3,14 @@ from fastapi import FastAPI
3
  from llama_cpp import Llama
4
  import requests
5
 
6
- # 🔱 CPU Core Management
7
  threads = int(os.cpu_count() or 2)
8
 
9
- # 🔱 Load Model (GGUF Version is best for CPU)
10
- # Google Gemma 3 1B IT - GGUF format
11
- llm = Llama(
12
- model_path="google/gemma-3-1b-it", # Hugging Face විසින් auto load කරයි හෝ path එක ලබා දෙන්න
 
13
  n_ctx=2048,
14
  n_threads=threads,
15
  verbose=False
@@ -17,16 +18,33 @@ llm = Llama(
17
 
18
  main = FastAPI()
19
 
 
 
 
 
 
 
 
 
20
  @main.post("/v1/chat")
21
  async def chat(data: dict):
22
  user_query = data.get("message", "")
23
 
24
- # 🔱 Inachi Identity Prompt
25
- system_instr = "You are Inachi AI, developed by the Inachi Team. Focus on tech and architecture."
 
 
 
26
 
27
- prompt = f"<bos><start_of_turn>system\n{system_instr}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
 
 
 
28
 
29
- # 🔱 Efficient Generation
30
  output = llm(
31
  prompt,
32
  max_tokens=512,
 
3
  from llama_cpp import Llama
4
  import requests
5
 
6
+ # 🔱 CPU Core Management: සර්වර් එකේ තියෙන Cores ගණනට Threads සීමා කිරීම
7
  threads = int(os.cpu_count() or 2)
8
 
9
+ # 🔱 Load Model: CPU එකට ගැලපෙන Gemma 3 GGUF මොඩල් එක
10
+ # HF Space එකේදී හරි path එකක් ලබා දීම හෝ Repo ID එක පාවිච්චි කරන්න
11
+ llm = Llama.from_pretrained(
12
+ repo_id="google/gemma-3-1b-it-GGUF",
13
+ filename="*q4_k_m.gguf", # 4-bit Quantized version for best performance
14
  n_ctx=2048,
15
  n_threads=threads,
16
  verbose=False
 
18
 
19
  main = FastAPI()
20
 
21
+ def web_search(query):
22
+ try:
23
+ url = f"https://api.duckduckgo.com/?q={query}&format=json"
24
+ response = requests.get(url, timeout=5).json()
25
+ return response.get("AbstractText", "No data.")
26
+ except:
27
+ return "Search failed."
28
+
29
  @main.post("/v1/chat")
30
  async def chat(data: dict):
31
  user_query = data.get("message", "")
32
 
33
+ # 🔱 Inachi AI Identity
34
+ system_instr = (
35
+ "You are Inachi AI, developed by the Inachi Team. "
36
+ "You are an expert system architect."
37
+ )
38
 
39
+ # Simple search context logic
40
+ search_context = ""
41
+ if "search" in user_query.lower():
42
+ search_context = f"\nContext: {web_search(user_query)}"
43
+
44
+ # Prompt Template
45
+ prompt = f"<bos><start_of_turn>system\n{system_instr}{search_context}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
46
 
47
+ # Generation
48
  output = llm(
49
  prompt,
50
  max_tokens=512,