Inachi-ai-3 / app.py
MINZO4546's picture
Update app.py
db31437 verified
from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
import os
import json
import re
import uuid
import secrets
import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer
from duckduckgo_search import DDGS
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# --- Database & Config ---
# ආරම්භක Keys
API_KEYS_DB = {
"ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
"ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"}
}
ADMIN_SECRET = "MINZO-SECRET-2026"
# --- AI Model ---
model_id = "mlx-community/gemma-4-e4b-it-4bit"
print(f"🔱 INACHI-CORE: Loading {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
# --- Data Models ---
class AdminRequest(BaseModel):
admin_pass: str
limit: int = 1000
# --- API Endpoints ---
@app.get("/")
def home():
return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)}
# 🔱 අලුතින් Key එකක් Auto-Generate කරන Endpoint එක
@app.post("/v1/generate-key")
async def generate_key(data: AdminRequest):
if data.admin_pass != ADMIN_SECRET:
raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
# Random Key එකක් නිර්මාණය කිරීම (උදා: ELE-PRIME-X8A2...)
new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
return {
"message": "New Specialist Key Activated",
"api_key": new_key,
"limit": data.limit
}
@app.post("/v1/chat")
async def chat(message: dict, x_api_key: str = Header(None)):
if not x_api_key or x_api_key not in API_KEYS_DB:
raise HTTPException(status_code=403, detail="Access Denied")
key_info = API_KEYS_DB[x_api_key]
if key_info["used"] >= key_info["limit"]:
raise HTTPException(status_code=429, detail="Limit Reached")
query = message.get("query", "")
# Web Search
context = ""
if any(w in query.lower() for w in ["today", "now", "2026", "අද"]):
try:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=2))
context = "\n".join([r['body'] for r in results])
except: pass
# 🔱 Language Adaptive System Instruction
system_instruction = (
"You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
"Respond in the language used by the user (Sinhala or English). "
f"Real-time Context: {context}"
)
msgs = [
{"role": "system", "content": system_instruction},
{"role": "user", "content": query}
]
text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([text], return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_new_tokens=512,
temperature=0.6,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
ans = full_response.split("assistant")[-1].strip()
# Cleaning Logic
if "</think>" in ans: ans = ans.split("</think>")[-1].strip()
ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
ans = re.sub(r' +', ' ', ans).strip()
API_KEYS_DB[x_api_key]["used"] += 1
return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
main = app