Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 24 days ago

Commit

4e68fa6

verified ·

1 Parent(s): 7948022

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -21

app.py CHANGED Viewed

@@ -25,11 +25,20 @@ API_KEYS_DB = {
 ADMIN_SECRET = "MINZO-SECRET-2026"
 LEARNING_VAULT = "neural_learning_data.jsonl"
-# --- AI Model (Qwen-2.5-1.5B) ---
-model_id = "Qwen/Qwen2.5-1.5B-Instruct"
-print("🐘 Elephant Node v3.7 Loading...")
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
 # --- Data Models ---
 class KeyRequest(BaseModel):
@@ -38,10 +47,9 @@ class KeyRequest(BaseModel):
     limit: int = 100
 # --- API Endpoints ---
 @app.get("/")
 def home():
-    return {"status": "Elephant Pro Active", "keys": len(API_KEYS_DB)}
 @app.post("/admin/add-key")
 async def add_key(data: KeyRequest):
@@ -52,7 +60,6 @@ async def add_key(data: KeyRequest):
 @app.get("/v1/usage")
 async def get_usage(x_api_key: str = Header(None)):
-    """Key එකේ පාවිච්චිය පරීක්ෂා කිරීමේ Endpoint එක"""
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Invalid Key")
     info = API_KEYS_DB[x_api_key]
@@ -64,34 +71,48 @@ async def get_usage(x_api_key: str = Header(None)):
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
-    if x_api_key not in API_KEYS_DB:
-        raise HTTPException(status_code=403)
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
-    query = message.get("query", "")
     # 2026 Web Search Logic
     context = ""
-    if any(w in query.lower() for w in ["today", "now", "2026"]):
         try:
             with DDGS() as ddgs:
                 context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
         except: pass
-    # AI Inference
-    msgs = [{"role": "system", "content": f"Elephant AI. 2026 mode. Context: {context}"}, {"role": "user", "content": query}]
     text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
-        ids = model.generate(inputs.input_ids, max_new_tokens=256)
-        ans = tokenizer.batch_decode(ids, skip_special_tokens=True)[0].split("assistant")[-1].strip()
     # Update Stats
     API_KEYS_DB[x_api_key]["used"] += 1
-    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
-main = app

 ADMIN_SECRET = "MINZO-SECRET-2026"
 LEARNING_VAULT = "neural_learning_data.jsonl"
+# --- AI Model (Gemma-3-1B-it Upgrade) ---
+# Specialist, අපි මෙතනදී bfloat16 පාවිච්චි කරනවා ඔයාගේ 18GB RAM එකෙන් 3GB විතරක් වැය වෙන්න.
+model_id = "google/gemma-3-1b-it"
+HF_TOKEN = os.getenv("HF_TOKEN")
+print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="cpu", # CPU එකේ ඉතාම වේගයෙන් මේක වැඩ කරයි
+    token=HF_TOKEN
+)
 # --- Data Models ---
 class KeyRequest(BaseModel):
     limit: int = 100
 # --- API Endpoints ---
 @app.get("/")
 def home():
+    return {"status": "Elephant Pro Active (Gemma 3)", "keys": len(API_KEYS_DB)}
 @app.post("/admin/add-key")
 async def add_key(data: KeyRequest):
 @app.get("/v1/usage")
 async def get_usage(x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Invalid Key")
     info = API_KEYS_DB[x_api_key]
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
+    if not x_api_key or x_api_key not in API_KEYS_DB:
+        raise HTTPException(status_code=403, detail="Access Denied")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
+    query = message.get("query", "")
     # 2026 Web Search Logic
     context = ""
+    if any(w in query.lower() for w in ["today", "now", "2026", "current"]):
         try:
             with DDGS() as ddgs:
                 context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
         except: pass
+    # --- Gemma 3 Inference ---
+    # Identity එක සහ Context එක System message එකට එකතු කළා
+    msgs = [
+        {"role": "system", "content": f"You are Elephant AI by MINZO-PRIME. 2026 mode enabled. Context: {context}"},
+        {"role": "user", "content": query}
+    ]
     text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
+        ids = model.generate(
+            inputs.input_ids,
+            max_new_tokens=450, # ටිකක් වැඩිපුර tokens ලබා දුන්නා හොඳ පිළිතුරක් සඳහා
+            temperature=0.7,
+            do_sample=True
+        )
+        # Gemma 3 හි Output එක පිරිසිදු කිරීම
+        full_response = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
+        ans = full_response.split("model")[-1].strip()
     # Update Stats
     API_KEYS_DB[x_api_key]["used"] += 1
+    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"], "model": "Gemma-3-1B"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)