Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 13 days ago

Commit

4ce55c5

verified ·

1 Parent(s): b8bfd06

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -51

app.py CHANGED Viewed

@@ -1,71 +1,42 @@
-import torch
 import os
-import requests
 from fastapi import FastAPI
-from transformers import pipeline
-# 🔱 CPU Core Management: Stop 99% CPU Usage
-# HF Free Space එකක සාමාන්‍යයෙන් CPU Cores 2ක් තියෙන නිසා අපි 2කට සීමා කරමු
-os.environ["OMP_NUM_THREADS"] = "2"
-os.environ["MKL_NUM_THREADS"] = "2"
-torch.set_num_threads(2)
-main = FastAPI()
-# 🔱 Inachi Identity Settings
-MODEL_ID = "google/gemma-3-1b-it"
-# 🔱 Optimized Pipeline
-# bfloat16 පාවිච්චි කිරීමෙන් RAM සහ CPU මතකය ඉතිරි වේ
-pipe = pipeline(
-    "text-generation",
-    model=MODEL_ID,
-    device_map="cpu",
-    torch_dtype=torch.bfloat16,
-    trust_remote_code=True
 )
-def web_search(query):
-    try:
-        # Simple DuckDuckGo API for search context
-        url = f"https://api.duckduckgo.com/?q={query}&format=json"
-        response = requests.get(url, timeout=5).json()
-        return response.get("AbstractText", "No specific data found.")
-    except:
-        return "Search unavailable."
 @main.post("/v1/chat")
 async def chat(data: dict):
     user_query = data.get("message", "")
-    # 🔱 System Identity: Developed by Inachi Team
-    system_prompt = (
-        "You are Inachi AI, a highly advanced assistant developed by the Inachi Team. "
-        "You are an expert in system architecture and web development. "
-        "Always identify as Inachi AI."
-    )
-    # Search logic
-    search_context = ""
-    if "search" in user_query.lower():
-        search_context = f"\nWeb Context: {web_search(user_query)}"
-    # Prompt construction
-    full_prompt = f"{system_prompt}\n{search_context}\nUser: {user_query}\nInachi AI:"
-    # 🔱 Inference with limited tokens for speed
-    results = pipe(
-        full_prompt,
-        max_new_tokens=512,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9
     )
-    reply = results[0]['generated_text'].split("Inachi AI:")[-1].strip()
     return {"reply": reply}
 if __name__ == "__main__":
     import uvicorn
-    # HF Spaces uses port 7860 by default
     uvicorn.run(main, host="0.0.0.0", port=7860)

 import os
 from fastapi import FastAPI
+from llama_cpp import Llama
+import requests
+# 🔱 CPU Core Management
+threads = int(os.cpu_count() or 2)
+# 🔱 Load Model (GGUF Version is best for CPU)
+# Google Gemma 3 1B IT - GGUF format
+llm = Llama(
+    model_path="google/gemma-3-1b-it", # Hugging Face විසින් auto load කරයි හෝ path එක ලබා දෙන්න
+    n_ctx=2048,
+    n_threads=threads,
+    verbose=False
 )
+main = FastAPI()
 @main.post("/v1/chat")
 async def chat(data: dict):
     user_query = data.get("message", "")
+    # 🔱 Inachi Identity Prompt
+    system_instr = "You are Inachi AI, developed by the Inachi Team. Focus on tech and architecture."
+    prompt = f"<bos><start_of_turn>system\n{system_instr}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
+    # 🔱 Efficient Generation
+    output = llm(
+        prompt,
+        max_tokens=512,
+        stop=["<end_of_turn>"],
+        echo=False
     )
+    reply = output['choices'][0]['text'].strip()
     return {"reply": reply}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(main, host="0.0.0.0", port=7860)