Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 14 days ago

Commit

9154c39

verified ·

1 Parent(s): 1dea909

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -50

app.py CHANGED Viewed

@@ -2,12 +2,8 @@ from fastapi import FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
-import os
-import json
 import re
-import uuid
 import secrets
-import datetime
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
@@ -20,102 +16,195 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# --- Database & Config ---
-# ආරම්භක Keys
 API_KEYS_DB = {
     "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
-    "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"}
 }
 ADMIN_SECRET = "MINZO-SECRET-2026"
-# --- AI Model ---
 model_id = "AngelSlim/Hy-MT1.5-1.8B-1.25bit"
-print(f"🔱 INACHI-CORE: Loading {model_id}...")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
-# --- Data Models ---
 class AdminRequest(BaseModel):
     admin_pass: str
     limit: int = 1000
-# --- API Endpoints ---
 @app.get("/")
 def home():
-    return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)}
-# 🔱 අලුතින් Key එකක් Auto-Generate කරන Endpoint එක
 @app.post("/v1/generate-key")
 async def generate_key(data: AdminRequest):
     if data.admin_pass != ADMIN_SECRET:
         raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
-    # Random Key එකක් නිර්මාණය කිරීම (උදා: ELE-PRIME-X8A2...)
     new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
     API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
     return {
         "message": "New Specialist Key Activated",
         "api_key": new_key,
-        "limit": data.limit
     }
 @app.post("/v1/chat")
-async def chat(message: dict, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
-    query = message.get("query", "")
-    # Web Search
     context = ""
-    if any(w in query.lower() for w in ["today", "now", "2026", "අද"]):
-        try:
-            with DDGS() as ddgs:
-                results = list(ddgs.text(query, max_results=2))
-                context = "\n".join([r['body'] for r in results])
-        except: pass
-    # 🔱 Language Adaptive System Instruction
     system_instruction = (
         "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
-        "Respond in the language used by the user (Sinhala or English). "
-        f"Real-time Context: {context}"
     )
     msgs = [
-        {"role": "system", "content": system_instruction},
-        {"role": "user", "content": query}
     ]
-    text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
         outputs = model.generate(
-            inputs.input_ids,
             max_new_tokens=512,
-            temperature=0.6,
             top_p=0.9,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
-        full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
-        ans = full_response.split("assistant")[-1].strip()
-        # Cleaning Logic
-        if "</think>" in ans: ans = ans.split("</think>")[-1].strip()
-        ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
-        ans = re.sub(r' +', ' ', ans).strip()
     API_KEYS_DB[x_api_key]["used"] += 1
-    return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
 main = app

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
 import re
 import secrets
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
     allow_headers=["*"],
 )
+# ── API Keys Database ──
 API_KEYS_DB = {
     "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
+    "ELE-PRIME-YG5EPZFQ":  {"limit": 5000,  "used": 0, "status": "active"},
 }
 ADMIN_SECRET = "MINZO-SECRET-2026"
+# ── Load AI Model ──
 model_id = "AngelSlim/Hy-MT1.5-1.8B-1.25bit"
+print(f"Loading {model_id} ...")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, torch_dtype="auto", device_map="cpu"
+)
+print("Model loaded.")
+# ── Pydantic Models ──
 class AdminRequest(BaseModel):
     admin_pass: str
     limit: int = 1000
+class ChatRequest(BaseModel):
+    query: str
+    search: bool = True        # client can disable search per-request
+    max_results: int = 3       # how many DDG results to inject
+# ──────────────────────────────────────
+# REAL-TIME WEB SEARCH HELPER
+# ──────────────────────────────────────
+def web_search(query: str, max_results: int = 3) -> str:
+    """
+    Search DuckDuckGo and return formatted context string.
+    Returns empty string on failure so the model still responds.
+    """
+    try:
+        with DDGS() as ddgs:
+            results = list(
+                ddgs.text(
+                    query,
+                    max_results=max_results,
+                    safesearch="moderate",
+                    timelimit=None,   # no time limit → more results
+                )
+            )
+        if not results:
+            return ""
+        lines = ["[WEB SEARCH RESULTS — Real-time]"]
+        for i, r in enumerate(results, 1):
+            title   = r.get("title", "").strip()
+            body    = r.get("body",  "").strip()
+            href    = r.get("href",  "").strip()
+            lines.append(f"\n{i}. {title}\n   {body}\n   Source: {href}")
+        lines.append("\n[END OF SEARCH RESULTS]")
+        return "\n".join(lines)
+    except Exception as e:
+        print(f"[DDG search error] {e}")
+        return ""
+# ── Decide whether to search ──
+def should_search(query: str) -> bool:
+    """
+    Always search unless the query is clearly a pure code/math task
+    with no factual component. This keeps it simple and reliable.
+    """
+    no_search_patterns = [
+        r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
+        r"^\s*explain\s+(this\s+)?(code|function|snippet)",
+        r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$",   # simple definitions
+    ]
+    q = query.lower().strip()
+    for pat in no_search_patterns:
+        if re.match(pat, q, re.I):
+            return False
+    return True   # search by default for everything else
+# ──────────────────────────────────────
+# ENDPOINTS
+# ──────────────────────────────────────
 @app.get("/")
 def home():
+    return {
+        "status": "Elephant Pro Active",
+        "active_keys": len(API_KEYS_DB),
+        "search": "DuckDuckGo real-time",
+    }
 @app.post("/v1/generate-key")
 async def generate_key(data: AdminRequest):
     if data.admin_pass != ADMIN_SECRET:
         raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
     new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
     API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
     return {
         "message": "New Specialist Key Activated",
         "api_key": new_key,
+        "limit": data.limit,
     }
 @app.post("/v1/chat")
+async def chat(message: ChatRequest, x_api_key: str = Header(None)):
+    # ── Auth ──
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
     key_info = API_KEYS_DB[x_api_key]
     if key_info["used"] >= key_info["limit"]:
         raise HTTPException(status_code=429, detail="Limit Reached")
+    query = message.query.strip()
+    if not query:
+        raise HTTPException(status_code=400, detail="Empty query")
+    # ── Real-time Web Search ──
     context = ""
+    search_used = False
+    if message.search and should_search(query):
+        print(f"[SEARCH] Querying DDG: {query[:80]}")
+        context = web_search(query, max_results=message.max_results)
+        if context:
+            search_used = True
+            print(f"[SEARCH] Got {message.max_results} results.")
+        else:
+            print("[SEARCH] No results returned.")
+    # ── System Prompt ──
+    today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
     system_instruction = (
         "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
+        "Respond in the same language the user uses (Sinhala or English). "
+        "Be concise, accurate, and helpful. "
+        f"Current UTC date/time: {today}. "
     )
+    if search_used:
+        system_instruction += (
+            "\nYou have been given real-time web search results below. "
+            "Use them to answer accurately. Always cite the source URL when referencing search results.\n"
+            + context
+        )
+    # ── Build Messages ──
     msgs = [
+        {"role": "system",  "content": system_instruction},
+        {"role": "user",    "content": query},
     ]
+    # ── Tokenize & Generate ──
+    text = tokenizer.apply_chat_template(
+        msgs, tokenize=False, add_generation_prompt=True
+    )
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     with torch.no_grad():
         outputs = model.generate(
+            inputs.input_ids,
             max_new_tokens=512,
+            temperature=0.6,
             top_p=0.9,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
         )
+    full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+    # ── Clean Output ──
+    ans = full_response.split("assistant")[-1].strip()
+    if "</think>" in ans:
+        ans = ans.split("</think>")[-1].strip()
+    ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
+    ans = re.sub(r" +", " ", ans).strip()
+    # ── Update Usage ──
     API_KEYS_DB[x_api_key]["used"] += 1
+    return {
+        "reply":        ans,
+        "search_used":  search_used,
+        "usage":        API_KEYS_DB[x_api_key]["used"],
+        "limit":        key_info["limit"],
+    }
+# HuggingFace Spaces entrypoint
 main = app