File size: 2,026 Bytes
d04d078 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | from fastapi import FastAPI, Header, HTTPException
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from duckduckgo_search import DDGS
app = FastAPI()
# පද්ධතියේ මතකය (Storage සඳහා)
LEARNING_FILE = "/data/elephant_learning_data.jsonl" # HF Storage path
# 18GB RAM එකට ගැලපෙන පරිදි Mistral 4-bit වලින් Load කිරීම
model_id = "mistralai/Mistral-7B-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
load_in_4bit=True
)
# API Keys 50 (ELE-PRIME-001 to ELE-PRIME-050)
API_KEYS = {f"ELE-PRIME-{i:03d}": {"credits": 5000} for i in range(1, 51)}
@app.get("/")
def read_root():
return {"message": "Elephant API Node 2026 is Online"}
@app.post("/v1/chat")
async def chat(message: dict, x_api_key: str = Header(None)):
if x_api_key not in API_KEYS:
raise HTTPException(status_code=403, detail="Invalid API Key")
user_query = message.get("query", "")
# Web Search for 2026 Live Data
context = ""
try:
with DDGS() as ddgs:
results = [r['body'] for r in ddgs.text(user_query, max_results=2)]
context = "\n".join(results)
except:
context = "No live data available."
# Response Generation
input_text = f"Context: {context}\nUser: {user_query}\nAssistant:"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=256)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
# Learning Loop: දත්ත පසුව Fine-tuning සඳහා Save කිරීම
log_data = {"q": user_query, "a": response, "key": x_api_key}
with open("learning_log.jsonl", "a") as f:
f.write(json.dumps(log_data) + "\n")
return {"reply": response, "status": "learned"}
main = app |