minzo-api / app.py
MINZO4546's picture
Create app.py
d04d078 verified
raw
history blame
2.03 kB
from fastapi import FastAPI, Header, HTTPException
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from duckduckgo_search import DDGS
app = FastAPI()
# පද්ධතියේ මතකය (Storage සඳහා)
LEARNING_FILE = "/data/elephant_learning_data.jsonl" # HF Storage path
# 18GB RAM එකට ගැලපෙන පරිදි Mistral 4-bit වලින් Load කිරීම
model_id = "mistralai/Mistral-7B-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
load_in_4bit=True
)
# API Keys 50 (ELE-PRIME-001 to ELE-PRIME-050)
API_KEYS = {f"ELE-PRIME-{i:03d}": {"credits": 5000} for i in range(1, 51)}
@app.get("/")
def read_root():
return {"message": "Elephant API Node 2026 is Online"}
@app.post("/v1/chat")
async def chat(message: dict, x_api_key: str = Header(None)):
if x_api_key not in API_KEYS:
raise HTTPException(status_code=403, detail="Invalid API Key")
user_query = message.get("query", "")
# Web Search for 2026 Live Data
context = ""
try:
with DDGS() as ddgs:
results = [r['body'] for r in ddgs.text(user_query, max_results=2)]
context = "\n".join(results)
except:
context = "No live data available."
# Response Generation
input_text = f"Context: {context}\nUser: {user_query}\nAssistant:"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=256)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
# Learning Loop: දත්ත පසුව Fine-tuning සඳහා Save කිරීම
log_data = {"q": user_query, "a": response, "key": x_api_key}
with open("learning_log.jsonl", "a") as f:
f.write(json.dumps(log_data) + "\n")
return {"reply": response, "status": "learned"}
main = app