Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 12 days ago

Commit

7d813f4

verified ·

1 Parent(s): db3f7fb

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -9

app.py CHANGED Viewed

@@ -3,13 +3,14 @@ from fastapi import FastAPI
 from llama_cpp import Llama
 import requests
-# 🔱 CPU Core Management
 threads = int(os.cpu_count() or 2)
-# 🔱 Load Model (GGUF Version is best for CPU)
-# Google Gemma 3 1B IT - GGUF format
-llm = Llama(
-    model_path="google/gemma-3-1b-it", # Hugging Face විසින් auto load කරයි හෝ path එක ලබා දෙන්න
     n_ctx=2048,
     n_threads=threads,
     verbose=False
@@ -17,16 +18,33 @@ llm = Llama(
 main = FastAPI()
 @main.post("/v1/chat")
 async def chat(data: dict):
     user_query = data.get("message", "")
-    # 🔱 Inachi Identity Prompt
-    system_instr = "You are Inachi AI, developed by the Inachi Team. Focus on tech and architecture."
-    prompt = f"<bos><start_of_turn>system\n{system_instr}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
-    # 🔱 Efficient Generation
     output = llm(
         prompt,
         max_tokens=512,

 from llama_cpp import Llama
 import requests
+# 🔱 CPU Core Management: සර්වර් එකේ තියෙන Cores ගණනට Threads සීමා කිරීම
 threads = int(os.cpu_count() or 2)
+# 🔱 Load Model: CPU එකට ගැලපෙන Gemma 3 GGUF මොඩල් එක
+# HF Space එකේදී හරි path එකක් ලබා දීම හෝ Repo ID එක පාවිච්චි කරන්න
+llm = Llama.from_pretrained(
+    repo_id="google/gemma-3-1b-it-GGUF",
+    filename="*q4_k_m.gguf", # 4-bit Quantized version for best performance
     n_ctx=2048,
     n_threads=threads,
     verbose=False
 main = FastAPI()
+def web_search(query):
+    try:
+        url = f"https://api.duckduckgo.com/?q={query}&format=json"
+        response = requests.get(url, timeout=5).json()
+        return response.get("AbstractText", "No data.")
+    except:
+        return "Search failed."
 @main.post("/v1/chat")
 async def chat(data: dict):
     user_query = data.get("message", "")
+    # 🔱 Inachi AI Identity
+    system_instr = (
+        "You are Inachi AI, developed by the Inachi Team. "
+        "You are an expert system architect."
+    )
+    # Simple search context logic
+    search_context = ""
+    if "search" in user_query.lower():
+        search_context = f"\nContext: {web_search(user_query)}"
+    # Prompt Template
+    prompt = f"<bos><start_of_turn>system\n{system_instr}{search_context}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
+    # Generation
     output = llm(
         prompt,
         max_tokens=512,