Update app.py
Browse files
app.py
CHANGED
|
@@ -1,71 +1,42 @@
|
|
| 1 |
-
import torch
|
| 2 |
import os
|
| 3 |
-
import requests
|
| 4 |
from fastapi import FastAPI
|
| 5 |
-
from
|
| 6 |
-
|
| 7 |
-
# 🔱 CPU Core Management: Stop 99% CPU Usage
|
| 8 |
-
# HF Free Space එකක සාමාන්යයෙන් CPU Cores 2ක් තියෙන නිසා අපි 2කට සීමා කරමු
|
| 9 |
-
os.environ["OMP_NUM_THREADS"] = "2"
|
| 10 |
-
os.environ["MKL_NUM_THREADS"] = "2"
|
| 11 |
-
torch.set_num_threads(2)
|
| 12 |
-
|
| 13 |
-
main = FastAPI()
|
| 14 |
|
| 15 |
-
# 🔱
|
| 16 |
-
|
| 17 |
|
| 18 |
-
# 🔱
|
| 19 |
-
#
|
| 20 |
-
|
| 21 |
-
"
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
trust_remote_code=True
|
| 26 |
)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
try:
|
| 30 |
-
# Simple DuckDuckGo API for search context
|
| 31 |
-
url = f"https://api.duckduckgo.com/?q={query}&format=json"
|
| 32 |
-
response = requests.get(url, timeout=5).json()
|
| 33 |
-
return response.get("AbstractText", "No specific data found.")
|
| 34 |
-
except:
|
| 35 |
-
return "Search unavailable."
|
| 36 |
|
| 37 |
@main.post("/v1/chat")
|
| 38 |
async def chat(data: dict):
|
| 39 |
user_query = data.get("message", "")
|
| 40 |
|
| 41 |
-
# 🔱
|
| 42 |
-
|
| 43 |
-
"You are Inachi AI, a highly advanced assistant developed by the Inachi Team. "
|
| 44 |
-
"You are an expert in system architecture and web development. "
|
| 45 |
-
"Always identify as Inachi AI."
|
| 46 |
-
)
|
| 47 |
|
| 48 |
-
|
| 49 |
-
search_context = ""
|
| 50 |
-
if "search" in user_query.lower():
|
| 51 |
-
search_context = f"\nWeb Context: {web_search(user_query)}"
|
| 52 |
-
|
| 53 |
-
# Prompt construction
|
| 54 |
-
full_prompt = f"{system_prompt}\n{search_context}\nUser: {user_query}\nInachi AI:"
|
| 55 |
|
| 56 |
-
# 🔱
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
top_p=0.9
|
| 63 |
)
|
| 64 |
|
| 65 |
-
reply =
|
| 66 |
return {"reply": reply}
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
| 69 |
import uvicorn
|
| 70 |
-
# HF Spaces uses port 7860 by default
|
| 71 |
uvicorn.run(main, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
from fastapi import FastAPI
|
| 3 |
+
from llama_cpp import Llama
|
| 4 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# 🔱 CPU Core Management
|
| 7 |
+
threads = int(os.cpu_count() or 2)
|
| 8 |
|
| 9 |
+
# 🔱 Load Model (GGUF Version is best for CPU)
|
| 10 |
+
# Google Gemma 3 1B IT - GGUF format
|
| 11 |
+
llm = Llama(
|
| 12 |
+
model_path="google/gemma-3-1b-it", # Hugging Face විසින් auto load කරයි හෝ path එක ලබා දෙන්න
|
| 13 |
+
n_ctx=2048,
|
| 14 |
+
n_threads=threads,
|
| 15 |
+
verbose=False
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
+
main = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@main.post("/v1/chat")
|
| 21 |
async def chat(data: dict):
|
| 22 |
user_query = data.get("message", "")
|
| 23 |
|
| 24 |
+
# 🔱 Inachi Identity Prompt
|
| 25 |
+
system_instr = "You are Inachi AI, developed by the Inachi Team. Focus on tech and architecture."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
prompt = f"<bos><start_of_turn>system\n{system_instr}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# 🔱 Efficient Generation
|
| 30 |
+
output = llm(
|
| 31 |
+
prompt,
|
| 32 |
+
max_tokens=512,
|
| 33 |
+
stop=["<end_of_turn>"],
|
| 34 |
+
echo=False
|
|
|
|
| 35 |
)
|
| 36 |
|
| 37 |
+
reply = output['choices'][0]['text'].strip()
|
| 38 |
return {"reply": reply}
|
| 39 |
|
| 40 |
if __name__ == "__main__":
|
| 41 |
import uvicorn
|
|
|
|
| 42 |
uvicorn.run(main, host="0.0.0.0", port=7860)
|