Spaces:
Runtime error
Runtime error
File size: 2,475 Bytes
7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b c38e36c 7a03f1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
import os
import re
import secrets
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from duckduckgo_search import DDGS
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# --- Specialist DB ---
API_KEYS_DB = {
"ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active"},
"ELE-PRIME-VOID-X": {"limit": 50000, "used": 0, "status": "active"}
}
ADMIN_SECRET = "MINZO-SECRET-2026"
# --- MiMo-Audio 7B Optimization ---
model_id = "XiaomiMiMo/MiMo-Audio-7B-Instruct"
print(f"🔱 INACHI-CORE: Deploying Multimodal Engine {model_id}...")
# 4-bit Quantization එකතු කිරීම (CPU/RAM ඉතිරි කිරීමට)
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=quant_config,
device_map="auto" # මෙය ස්වයංක්රීයව RAM එක කළමනාකරණය කරයි
)
class AdminRequest(BaseModel):
admin_pass: str
limit: int = 5000
@app.post("/v1/chat")
async def chat(message: dict, x_api_key: str = Header(None)):
if not x_api_key or x_api_key not in API_KEYS_DB:
raise HTTPException(status_code=403, detail="Invalid Key")
query = message.get("query", "")
# MiMo පද්ධතියේ System Prompt එක
system_instruction = (
"You are Inachi-Prime, an Any-to-Any multimodal AI assistant. "
"You are designed by Specialist MINZO-PRIME. "
"Respond accurately in Sinhala or English based on user input."
)
inputs = tokenizer(f"{system_instruction}\n\nUser: {query}\nAssistant:", return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
ans = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
API_KEYS_DB[x_api_key]["used"] += 1
return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
main = app |