Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 14 days ago

Commit

c8bfde7

verified ·

1 Parent(s): 749df63

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -77

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import secrets
 import requests
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Hugging Face සර්වර් එක 'main' ලෝඩ් කිරීමට උත්සාහ කරන බැවින් මෙය මෙසේ නම් කරන ලදී.
 main = FastAPI()
 main.add_middleware(
@@ -25,29 +25,21 @@ API_KEYS_DB = {
 ADMIN_SECRET = "MINZO-SECRET-2026"
 # ── Google Search Config ──
-# MINZO-PRIME, ඔයාගේ Keys මෙතනට ඇතුළත් කරන්න.
 GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
 GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
 # ── Load AI Model for CPU ──
 model_id = "google/gemma-2-9b-it"
-print(f"Loading {model_id} on CPU (Optimized for 18GB RAM)...")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-# 'torch_dtype' වෙනුවට අලුත් 'dtype' පරාමිතිය භාවිතා කර ඇත.
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     dtype=torch.bfloat16,
     device_map="cpu",
     trust_remote_code=True
 )
-print("Model loaded on CPU successfully.")
-# ── Pydantic Models ──
-class AdminRequest(BaseModel):
-    admin_pass: str
-    limit: int = 1000
 class ChatRequest(BaseModel):
     query: str
@@ -55,98 +47,55 @@ class ChatRequest(BaseModel):
     max_results: int = 3
 # ──────────────────────────────────────
-# GOOGLE REAL-TIME WEB SEARCH HELPER
 # ──────────────────────────────────────
 def google_search(query: str, max_results: int = 3) -> str:
     url = "https://www.googleapis.com/customsearch/v1"
-    params = {
-        "q": query,
-        "key": GOOGLE_API_KEY,
-        "cx": GOOGLE_CX,
-        "num": max_results
-    }
     try:
         response = requests.get(url, params=params)
         results = response.json().get("items", [])
-        if not results:
-            return ""
-        lines = ["[GOOGLE SEARCH RESULTS — Real-time]"]
         for i, r in enumerate(results, 1):
-            title = r.get("title", "").strip()
-            snippet = r.get("snippet", "").strip()
-            link = r.get("link", "").strip()
-            lines.append(f"\n{i}. {title}\n   {snippet}\n   Source: {link}")
-        lines.append("\n[END OF SEARCH RESULTS]")
         return "\n".join(lines)
-    except Exception as e:
-        print(f"[Google search error] {e}")
-        return ""
-def should_search(query: str) -> bool:
-    no_search_patterns = [
-        r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
-        r"^\s*explain\s+(this\s+)?(code|function|snippet)",
-        r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$",
-    ]
-    q = query.lower().strip()
-    for pat in no_search_patterns:
-        if re.match(pat, q, re.I):
-            return False
-    return True
 # ──────────────────────────────────────
-# ENDPOINTS
 # ──────────────────────────────────────
-@main.get("/")
-def home():
-    return {
-        "status": "Inachi-Core Active",
-        "active_keys": len(API_KEYS_DB),
-        "search": "Google Real-time",
-    }
-@main.post("/v1/generate-key")
-async def generate_key(data: AdminRequest):
-    if data.admin_pass != ADMIN_SECRET:
-        raise HTTPException(status_code=401, detail="Unauthorized Access!")
-    new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
-    API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
-    return {"api_key": new_key, "limit": data.limit}
 @main.post("/v1/chat")
 async def chat(message: ChatRequest, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
-    key_info = API_KEYS_DB[x_api_key]
-    if key_info["used"] >= key_info["limit"]:
-        raise HTTPException(status_code=429, detail="Limit Reached")
     query = message.query.strip()
     context = ""
     search_used = False
-    if message.search and should_search(query):
         context = google_search(query, max_results=message.max_results)
-        if context:
-            search_used = True
-    today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
-    system_instruction = (
-        "You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
-        "Respond in the same language the user uses. "
-        f"Current date: {today}. "
     )
     if search_used:
-        system_instruction += "\nUse these web results to answer accurately:\n" + context
     msgs = [
-        {"role": "system", "content": system_instruction},
-        {"role": "user", "content": query},
     ]
     text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
@@ -162,4 +111,7 @@ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
     ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
     API_KEYS_DB[x_api_key]["used"] += 1
-    return {"reply": ans, "search_used": search_used, "usage": API_KEYS_DB[x_api_key]["used"]}

 import requests
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Hugging Face server needs 'main'
 main = FastAPI()
 main.add_middleware(
 ADMIN_SECRET = "MINZO-SECRET-2026"
 # ── Google Search Config ──
 GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
 GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
 # ── Load AI Model for CPU ──
 model_id = "google/gemma-2-9b-it"
+print(f"Loading {model_id} on CPU...")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     dtype=torch.bfloat16,
     device_map="cpu",
     trust_remote_code=True
 )
+print("Model loaded successfully.")
 class ChatRequest(BaseModel):
     query: str
     max_results: int = 3
 # ──────────────────────────────────────
+# SEARCH HELPER
 # ──────────────────────────────────────
 def google_search(query: str, max_results: int = 3) -> str:
     url = "https://www.googleapis.com/customsearch/v1"
+    params = {"q": query, "key": GOOGLE_API_KEY, "cx": GOOGLE_CX, "num": max_results}
     try:
         response = requests.get(url, params=params)
         results = response.json().get("items", [])
+        if not results: return ""
+        lines = ["[WEB SEARCH RESULTS]"]
         for i, r in enumerate(results, 1):
+            lines.append(f"\n{i}. {r.get('title')}\n   {r.get('snippet')}\n   Source: {r.get('link')}")
         return "\n".join(lines)
+    except: return ""
 # ──────────────────────────────────────
+# CHAT ENDPOINT (FIXED)
 # ──────────────────────────────────────
 @main.post("/v1/chat")
 async def chat(message: ChatRequest, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
     query = message.query.strip()
     context = ""
     search_used = False
+    if message.search:
         context = google_search(query, max_results=message.max_results)
+        if context: search_used = True
+    # SYSTEM INSTRUCTION RE-FORMATTED FOR GEMMA
+    today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y")
+    # Gemma doesn't support 'system' role, so we merge it into the user prompt
+    combined_prompt = (
+        f"Instruction: You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
+        f"Respond in the same language the user uses. Current date: {today}.\n"
     )
     if search_used:
+        combined_prompt += f"\nUse these web results to answer: {context}\n"
+    combined_prompt += f"\nUser Query: {query}"
     msgs = [
+        {"role": "user", "content": combined_prompt},
     ]
+    # Apply template (Now only with 'user' role)
     text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer([text], return_tensors="pt").to("cpu")
     ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
     API_KEYS_DB[x_api_key]["used"] += 1
+    return {"reply": ans, "search_used": search_used}
+@main.get("/")
+def home(): return {"status": "Inachi-Core Online"}