Update app.py
Browse files
app.py
CHANGED
|
@@ -3,13 +3,14 @@ from fastapi import FastAPI
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
import requests
|
| 5 |
|
| 6 |
-
# 🔱 CPU Core Management
|
| 7 |
threads = int(os.cpu_count() or 2)
|
| 8 |
|
| 9 |
-
# 🔱 Load Model
|
| 10 |
-
#
|
| 11 |
-
llm = Llama(
|
| 12 |
-
|
|
|
|
| 13 |
n_ctx=2048,
|
| 14 |
n_threads=threads,
|
| 15 |
verbose=False
|
|
@@ -17,16 +18,33 @@ llm = Llama(
|
|
| 17 |
|
| 18 |
main = FastAPI()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
@main.post("/v1/chat")
|
| 21 |
async def chat(data: dict):
|
| 22 |
user_query = data.get("message", "")
|
| 23 |
|
| 24 |
-
# 🔱 Inachi
|
| 25 |
-
system_instr =
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
#
|
| 30 |
output = llm(
|
| 31 |
prompt,
|
| 32 |
max_tokens=512,
|
|
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
import requests
|
| 5 |
|
| 6 |
+
# 🔱 CPU Core Management: සර්වර් එකේ තියෙන Cores ගණනට Threads සීමා කිරීම
|
| 7 |
threads = int(os.cpu_count() or 2)
|
| 8 |
|
| 9 |
+
# 🔱 Load Model: CPU එකට ගැලපෙන Gemma 3 GGUF මොඩල් එක
|
| 10 |
+
# HF Space එකේදී හරි path එකක් ලබා දීම හෝ Repo ID එක පාවිච්චි කරන්න
|
| 11 |
+
llm = Llama.from_pretrained(
|
| 12 |
+
repo_id="google/gemma-3-1b-it-GGUF",
|
| 13 |
+
filename="*q4_k_m.gguf", # 4-bit Quantized version for best performance
|
| 14 |
n_ctx=2048,
|
| 15 |
n_threads=threads,
|
| 16 |
verbose=False
|
|
|
|
| 18 |
|
| 19 |
main = FastAPI()
|
| 20 |
|
| 21 |
+
def web_search(query):
|
| 22 |
+
try:
|
| 23 |
+
url = f"https://api.duckduckgo.com/?q={query}&format=json"
|
| 24 |
+
response = requests.get(url, timeout=5).json()
|
| 25 |
+
return response.get("AbstractText", "No data.")
|
| 26 |
+
except:
|
| 27 |
+
return "Search failed."
|
| 28 |
+
|
| 29 |
@main.post("/v1/chat")
|
| 30 |
async def chat(data: dict):
|
| 31 |
user_query = data.get("message", "")
|
| 32 |
|
| 33 |
+
# 🔱 Inachi AI Identity
|
| 34 |
+
system_instr = (
|
| 35 |
+
"You are Inachi AI, developed by the Inachi Team. "
|
| 36 |
+
"You are an expert system architect."
|
| 37 |
+
)
|
| 38 |
|
| 39 |
+
# Simple search context logic
|
| 40 |
+
search_context = ""
|
| 41 |
+
if "search" in user_query.lower():
|
| 42 |
+
search_context = f"\nContext: {web_search(user_query)}"
|
| 43 |
+
|
| 44 |
+
# Prompt Template
|
| 45 |
+
prompt = f"<bos><start_of_turn>system\n{system_instr}{search_context}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
|
| 46 |
|
| 47 |
+
# Generation
|
| 48 |
output = llm(
|
| 49 |
prompt,
|
| 50 |
max_tokens=512,
|