Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 24 days ago

Commit

249aa04

verified ·

1 Parent(s): ba18023

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -87

app.py CHANGED Viewed

@@ -1,15 +1,18 @@
-import os
-import torch
-import uuid
-import re
 from fastapi import FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
 app = FastAPI()
-# 🔱 CORS Setup
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -17,103 +20,102 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# --- 🔱 Specialist DB (Memory Based) ---
-# සටහන: සර්වර් එක Restart වූ විට මේවා මැකේ. ස්ථිර කිරීමට DB එකක් අවශ්‍යයි.
 API_KEYS_DB = {
-    "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active", "owner": "MINZO-PRIME"},
 }
-# --- 🔱 Model Configuration (CPU Stable Engine) ---
-MODEL_ID = "google/gemma-3-270m"
-print(f"🔱 INACHI-CORE: Launching Gemma-3 on CPU Engine...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-# float16 නිසා එන 'NaN' error එක වැළැක්වීමට float32 පාවිච්චි කරමු
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.float32,
-    low_cpu_mem_usage=True,
-    device_map="cpu"
-)
-# --- 🔱 Web Context Retrieval ---
-def get_web_context(query: str):
-    try:
-        with DDGS() as ddgs:
-            # නව ddgs version එකට ගැලපෙන පරිදි update කර ඇත
-            results = [r['body'] for r in ddgs.text(query, max_results=3)]
-            return "\n".join(results)
-    except Exception as e:
-        print(f"Search Error: {e}")
-        return ""
-# --- 🔱 Admin Routes ---
-@app.get("/sys/generate-key")
-async def create_key(admin_key: str = Header(None)):
-    """අලුත් API Keys සාදා ගැනීමට: Header එකේ 'admin-key' ලෙස ELE-PRIME-ADMIN-SYS ලබා දෙන්න."""
-    if admin_key != "ELE-PRIME-ADMIN-SYS":
-        raise HTTPException(status_code=403, detail="Unauthorized Specialist Access")
-    new_key = f"ELE-PRIME-{uuid.uuid4().hex[:8].upper()}"
-    API_KEYS_DB[new_key] = {"limit": 5000, "used": 0, "status": "active", "owner": "Specialist"}
-    return {"status": "success", "new_key": new_key}
-# --- 🔱 Chat Endpoint ---
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
-    # 1. API Key Validation
     if not x_api_key or x_api_key not in API_KEYS_DB:
-        raise HTTPException(status_code=403, detail="Invalid Specialist Key")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
-        raise HTTPException(status_code=429, detail="API Limit Reached for this Key")
-    user_query = message.get("query", "")
-    web_data = get_web_context(user_query)
-    # 2. Inachi System Prompt
-    system_prompt = (
-        "You are Inachi-Prime, a multimodal AI developed by Specialist MINZO-PRIME. "
-        "Respond directly without internal thought process. "
-        f"\nContext: {web_data}"
     )
-    full_prompt = f"<start_of_turn>system\n{system_prompt}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
-    inputs = tokenizer(full_prompt, return_tensors="pt")
-    # 3. Generation Logic (Stable Parameters)
     with torch.no_grad():
-        try:
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=512,
-                temperature=0.7,
-                do_sample=True,
-                renormalize_logits=True # Probability Error එක වැළැක්වීමට
-            )
-            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Response එක පිරිසිදු කිරීම
-            final_reply = full_response.split("model\n")[-1].strip()
-            final_reply = re.sub(r'<thought>.*?</thought>', '', final_reply, flags=re.DOTALL).strip()
-            # 4. Usage tracking
-            API_KEYS_DB[x_api_key]["used"] += 1
-            return {
-                "reply": final_reply,
-                "usage": f"{API_KEYS_DB[x_api_key]['used']}/{API_KEYS_DB[x_api_key]['limit']}"
-            }
-        except RuntimeError as e:
-            print(f"Generation Error: {e}")
-            return {"reply": "Core engine destabilized. Retrying process recommended."}
-if __name__ == "__main__":
-    import uvicorn
-    # Hugging Face Space සඳහා Port 7860 අනිවාර්යයි
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import torch
+import os
+import json
+import re
+import uuid
+import secrets
+import datetime
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# --- Database & Config ---
+# ආරම්භක Keys
 API_KEYS_DB = {
+    "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
+    "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"}
 }
+ADMIN_SECRET = "MINZO-SECRET-2026"
+# --- AI Model ---
+model_id = "google/gemma-3-270m"
+print(f"🔱 INACHI-CORE: Loading {model_id}...")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
+# --- Data Models ---
+class AdminRequest(BaseModel):
+    admin_pass: str
+    limit: int = 1000
+# --- API Endpoints ---
+@app.get("/")
+def home():
+    return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)}
+# 🔱 අලුතින් Key එකක් Auto-Generate කරන Endpoint එක
+@app.post("/v1/generate-key")
+async def generate_key(data: AdminRequest):
+    if data.admin_pass != ADMIN_SECRET:
+        raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
+    # Random Key එකක් නිර්මාණය කිරීම (උදා: ELE-PRIME-X8A2...)
+    new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
+    API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
+    return {
+        "message": "New Specialist Key Activated",
+        "api_key": new_key,
+        "limit": data.limit
+    }
 @app.post("/v1/chat")
 async def chat(message: dict, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
+        raise HTTPException(status_code=403, detail="Access Denied")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
+        raise HTTPException(status_code=429, detail="Limit Reached")
+    query = message.get("query", "")
+    # Web Search
+    context = ""
+    if any(w in query.lower() for w in ["today", "now", "2026", "අද"]):
+        try:
+            with DDGS() as ddgs:
+                results = list(ddgs.text(query, max_results=2))
+                context = "\n".join([r['body'] for r in results])
+        except: pass
+    # 🔱 Language Adaptive System Instruction
+    system_instruction = (
+        "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
+        "Respond in the language used by the user (Sinhala or English). "
+        f"Real-time Context: {context}"
     )
+    msgs = [
+        {"role": "system", "content": system_instruction},
+        {"role": "user", "content": query}
+    ]
+    text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
+        outputs = model.generate(
+            inputs.input_ids,
+            max_new_tokens=512,
+            temperature=0.6,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+        ans = full_response.split("assistant")[-1].strip()
+        # Cleaning Logic
+        if "</think>" in ans: ans = ans.split("</think>")[-1].strip()
+        ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
+        ans = re.sub(r' +', ' ', ans).strip()
+    API_KEYS_DB[x_api_key]["used"] += 1
+    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
+main = app