from fastapi import FastAPI, Header, HTTPException import torch import json from transformers import AutoModelForCausalLM, AutoTokenizer from duckduckgo_search import DDGS app = FastAPI() # පද්ධතියේ මතකය (Storage සඳහා) LEARNING_FILE = "/data/elephant_learning_data.jsonl" # HF Storage path # 18GB RAM එකට ගැලපෙන පරිදි Mistral 4-bit වලින් Load කිරීම model_id = "mistralai/Mistral-7B-v0.3" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", load_in_4bit=True ) # API Keys 50 (ELE-PRIME-001 to ELE-PRIME-050) API_KEYS = {f"ELE-PRIME-{i:03d}": {"credits": 5000} for i in range(1, 51)} @app.get("/") def read_root(): return {"message": "Elephant API Node 2026 is Online"} @app.post("/v1/chat") async def chat(message: dict, x_api_key: str = Header(None)): if x_api_key not in API_KEYS: raise HTTPException(status_code=403, detail="Invalid API Key") user_query = message.get("query", "") # Web Search for 2026 Live Data context = "" try: with DDGS() as ddgs: results = [r['body'] for r in ddgs.text(user_query, max_results=2)] context = "\n".join(results) except: context = "No live data available." # Response Generation input_text = f"Context: {context}\nUser: {user_query}\nAssistant:" inputs = tokenizer(input_text, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=256) response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip() # Learning Loop: දත්ත පසුව Fine-tuning සඳහා Save කිරීම log_data = {"q": user_query, "a": response, "key": x_api_key} with open("learning_log.jsonl", "a") as f: f.write(json.dumps(log_data) + "\n") return {"reply": response, "status": "learned"} main = app