Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 26 days ago

Commit

e48110c

verified ·

1 Parent(s): 3961e8b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -33

app.py CHANGED Viewed

@@ -1,58 +1,85 @@
 from fastapi import FastAPI, Header, HTTPException
 import torch
 import json
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
 app = FastAPI()
-# පද්ධතියේ මතකය (Storage සඳහා)
-LEARNING_FILE = "/data/elephant_learning_data.jsonl" # HF Storage path
-# 18GB RAM එකට ගැලපෙන පරිදි Mistral 4-bit වලින් Load කිරීම
 model_id = "mistralai/Mistral-7B-v0.3"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
-    load_in_4bit=True
 )
-# API Keys 50 (ELE-PRIME-001 to ELE-PRIME-050)
-API_KEYS = {f"ELE-PRIME-{i:03d}": {"credits": 5000} for i in range(1, 51)}
 @app.get("/")
-def read_root():
-    return {"message": "Elephant API Node 2026 is Online"}
 @app.post("/v1/chat")
-async def chat(message: dict, x_api_key: str = Header(None)):
-    if x_api_key not in API_KEYS:
-        raise HTTPException(status_code=403, detail="Invalid API Key")
     user_query = message.get("query", "")
-    # Web Search for 2026 Live Data
     context = ""
-    try:
-        with DDGS() as ddgs:
-            results = [r['body'] for r in ddgs.text(user_query, max_results=2)]
-            context = "\n".join(results)
-    except:
-        context = "No live data available."
-    # Response Generation
-    input_text = f"Context: {context}\nUser: {user_query}\nAssistant:"
-    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens=256)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
-    # Learning Loop: දත්ත පසුව Fine-tuning සඳහා Save කිරීම
-    log_data = {"q": user_query, "a": response, "key": x_api_key}
-    with open("learning_log.jsonl", "a") as f:
-        f.write(json.dumps(log_data) + "\n")
-    return {"reply": response, "status": "learned"}
 main = app

 from fastapi import FastAPI, Header, HTTPException
 import torch
+import os
 import json
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from duckduckgo_search import DDGS
 app = FastAPI()
+# 1. API Keys 50 ක ලැයිස්තුව (Hardcoded for now as requested)
+# Format: ELE-PRIME-001, ELE-PRIME-002 ... ELE-PRIME-050
+API_KEYS_DB = {f"ELE-PRIME-{i:03d}": {"credits": 5000, "status": "active"} for i in range(1, 51)}
+# 2. GPU පවතිනවාදැයි පරීක්ෂා කිරීම සහ Quantization සැකසීම
 model_id = "mistralai/Mistral-7B-v0.3"
+HF_TOKEN = os.getenv("HF_TOKEN")
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+)
+# 3. මොඩලය Load කිරීම
+print("Loading Elephant Engine (Mistral-7B)...")
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=quant_config,
     device_map="auto",
+    token=HF_TOKEN
 )
+# 4. Web Search පහසුකම
+def get_live_data(query):
+    try:
+        with DDGS() as ddgs:
+            results = [r['body'] for r in ddgs.text(query, max_results=3)]
+            return "\n".join(results)
+    except:
+        return ""
 @app.get("/")
+def health_check():
+    return {"status": "Elephant API Node 2026 is Active", "keys_loaded": len(API_KEYS_DB)}
 @app.post("/v1/chat")
+async def chat_api(message: dict, x_api_key: str = Header(None)):
+    # API Key එක පරීක්ෂා කිරීම
+    if x_api_key not in API_KEYS_DB:
+        raise HTTPException(status_code=403, detail="Unauthorized: Invalid API Key")
     user_query = message.get("query", "")
+    # 2026 දත්ත සඳහා Web Search කිරීම
     context = ""
+    if any(word in user_query.lower() for word in ["today", "now", "2026", "news", "current"]):
+        context = get_live_data(user_query)
+    # Prompt එක සැකසීම
+    system_instr = "Current Year: 2026. You are Elephant AI. Use the provided context to answer."
+    full_prompt = f"System: {system_instr}\nContext: {context}\nUser: {user_query}\nAssistant:"
+    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
+    # Response එක Generate කිරීම
+    with torch.no_grad():
+        output_tokens = model.generate(**inputs, max_new_tokens=300, do_sample=True, temperature=0.7)
+    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
+    # Learning Loop: පද්ධතිය ඉගෙන ගැනීමට දත්ත ලොග් කිරීම
+    with open("learning_vault.jsonl", "a") as f:
+        log_entry = {"q": user_query, "ctx": context, "ans": response, "key": x_api_key}
+        f.write(json.dumps(log_entry) + "\n")
+    return {
+        "reply": response,
+        "model": "Elephant-Mistral-7B-v0.3",
+        "key_id": x_api_key,
+        "timestamp": "2026-04-27"
+    }
 main = app