Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 24 days ago

Commit

5cac8e2

verified ·

1 Parent(s): 2fb9a37

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -33

app.py CHANGED Viewed

@@ -3,14 +3,12 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
 import os
-import json
-import datetime
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
 app = FastAPI()
-# CORS Fix for Dashboard connectivity
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -23,20 +21,20 @@ API_KEYS_DB = {
     "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"}
 }
 ADMIN_SECRET = "MINZO-SECRET-2026"
-LEARNING_VAULT = "neural_learning_data.jsonl"
-# --- AI Model (Gemma-3-1B-it Upgrade) ---
-# Specialist, අපි මෙතනදී bfloat16 පාවිච්චි කරනවා ඔයාගේ 18GB RAM එකෙන් 3GB විතරක් වැය වෙන්න.
 model_id = "google/gemma-3-1b-it"
 HF_TOKEN = os.getenv("HF_TOKEN")
 print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="cpu", # CPU එකේ ඉතාම වේගයෙන් මේක වැඩ කරයි
     token=HF_TOKEN
 )
@@ -49,7 +47,7 @@ class KeyRequest(BaseModel):
 # --- API Endpoints ---
 @app.get("/")
 def home():
-    return {"status": "Elephant Pro Active (Gemma 3)", "keys": len(API_KEYS_DB)}
 @app.post("/admin/add-key")
 async def add_key(data: KeyRequest):
@@ -65,33 +63,31 @@ async def get_usage(x_api_key: str = Header(None)):
     info = API_KEYS_DB[x_api_key]
     return {
         "used": info["used"],
-        "limit": info["limit"],
-        "percentage": (info["used"] / info["limit"]) * 100 if info["limit"] > 0 else 0
     }
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
-        raise HTTPException(status_code=403, detail="Access Denied")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
     query = message.get("query", "")
     # 2026 Web Search Logic
     context = ""
-    if any(w in query.lower() for w in ["today", "now", "2026", "current"]):
-        try:
             with DDGS() as ddgs:
                 context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
-        except: pass
-    # --- Gemma 3 Inference ---
-    # Identity එක සහ Context එක System message එකට එකතු කළා
     msgs = [
-        {"role": "system", "content": f"You are Elephant AI by MINZO-PRIME. 2026 mode enabled. Context: {context}"},
         {"role": "user", "content": query}
     ]
@@ -99,20 +95,10 @@ async def chat(message: dict, x_api_key: str = Header(None)):
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
-        ids = model.generate(
-            inputs.input_ids,
-            max_new_tokens=450, # ටිකක් වැඩිපුර tokens ලබා දුන්නා හොඳ පිළිතුරක් සඳහා
-            temperature=0.7,
-            do_sample=True
-        )
-        # Gemma 3 හි Output එක පිරිසිදු කිරීම
-        full_response = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
-        ans = full_response.split("model")[-1].strip()
-    # Update Stats
     API_KEYS_DB[x_api_key]["used"] += 1
-    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"], "model": "Gemma-3-1B"}
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 import torch
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
+# 🔱 Server එකට "app" කියන නමම අවශ්‍යයි
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"}
 }
 ADMIN_SECRET = "MINZO-SECRET-2026"
+# --- AI Model (Gemma-3-1B-it) ---
 model_id = "google/gemma-3-1b-it"
 HF_TOKEN = os.getenv("HF_TOKEN")
 print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
+# 🔱 [Fix] torch_dtype වෙනුවට dtype පාවිච්චි කරන ලදී
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    dtype=torch.bfloat16,
+    device_map="cpu",
     token=HF_TOKEN
 )
 # --- API Endpoints ---
 @app.get("/")
 def home():
+    return {"status": "Elephant Pro Active", "model": "Gemma-3-1B"}
 @app.post("/admin/add-key")
 async def add_key(data: KeyRequest):
     info = API_KEYS_DB[x_api_key]
     return {
         "used": info["used"],
+        "limit": info["limit"]
     }
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
+        raise HTTPException(status_code=403)
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
     query = message.get("query", "")
     # 2026 Web Search Logic
     context = ""
+    try:
+        if any(w in query.lower() for w in ["today", "now", "2026"]):
             with DDGS() as ddgs:
                 context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
+    except: pass
+    # AI Inference
     msgs = [
+        {"role": "system", "content": f"Elephant AI by MINZO-PRIME. 2026 Edition. Context: {context}"},
         {"role": "user", "content": query}
     ]
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
+        ids = model.generate(inputs.input_ids, max_new_tokens=300, temperature=0.7, do_sample=True)
+        full_ans = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
+        ans = full_ans.split("model")[-1].strip()
     API_KEYS_DB[x_api_key]["used"] += 1
+    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}