Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI, Header, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import torch | |
| import os | |
| import json | |
| import re | |
| import uuid | |
| import secrets | |
| import datetime | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from duckduckgo_search import DDGS | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --- Database & Config --- | |
| # ආරම්භක Keys | |
| API_KEYS_DB = { | |
| "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"}, | |
| "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"} | |
| } | |
| ADMIN_SECRET = "MINZO-SECRET-2026" | |
| # --- AI Model --- | |
| model_id = "mlx-community/gemma-4-e4b-it-4bit" | |
| print(f"🔱 INACHI-CORE: Loading {model_id}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu") | |
| # --- Data Models --- | |
| class AdminRequest(BaseModel): | |
| admin_pass: str | |
| limit: int = 1000 | |
| # --- API Endpoints --- | |
| def home(): | |
| return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)} | |
| # 🔱 අලුතින් Key එකක් Auto-Generate කරන Endpoint එක | |
| async def generate_key(data: AdminRequest): | |
| if data.admin_pass != ADMIN_SECRET: | |
| raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!") | |
| # Random Key එකක් නිර්මාණය කිරීම (උදා: ELE-PRIME-X8A2...) | |
| new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}" | |
| API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"} | |
| return { | |
| "message": "New Specialist Key Activated", | |
| "api_key": new_key, | |
| "limit": data.limit | |
| } | |
| async def chat(message: dict, x_api_key: str = Header(None)): | |
| if not x_api_key or x_api_key not in API_KEYS_DB: | |
| raise HTTPException(status_code=403, detail="Access Denied") | |
| key_info = API_KEYS_DB[x_api_key] | |
| if key_info["used"] >= key_info["limit"]: | |
| raise HTTPException(status_code=429, detail="Limit Reached") | |
| query = message.get("query", "") | |
| # Web Search | |
| context = "" | |
| if any(w in query.lower() for w in ["today", "now", "2026", "අද"]): | |
| try: | |
| with DDGS() as ddgs: | |
| results = list(ddgs.text(query, max_results=2)) | |
| context = "\n".join([r['body'] for r in results]) | |
| except: pass | |
| # 🔱 Language Adaptive System Instruction | |
| system_instruction = ( | |
| "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. " | |
| "Respond in the language used by the user (Sinhala or English). " | |
| f"Real-time Context: {context}" | |
| ) | |
| msgs = [ | |
| {"role": "system", "content": system_instruction}, | |
| {"role": "user", "content": query} | |
| ] | |
| text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer([text], return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_new_tokens=512, | |
| temperature=0.6, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
| ans = full_response.split("assistant")[-1].strip() | |
| # Cleaning Logic | |
| if "</think>" in ans: ans = ans.split("</think>")[-1].strip() | |
| ans = ans.replace("Ċ", "\n").replace("Ġ", " ") | |
| ans = re.sub(r' +', ' ', ans).strip() | |
| API_KEYS_DB[x_api_key]["used"] += 1 | |
| return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]} | |
| main = app |