MINZO4546 commited on
Commit
e48110c
·
verified ·
1 Parent(s): 3961e8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -33
app.py CHANGED
@@ -1,58 +1,85 @@
1
  from fastapi import FastAPI, Header, HTTPException
2
  import torch
 
3
  import json
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from duckduckgo_search import DDGS
6
 
7
  app = FastAPI()
8
 
9
- # පද්ධතියේ මතකය (Storage සඳහා)
10
- LEARNING_FILE = "/data/elephant_learning_data.jsonl" # HF Storage path
 
11
 
12
- # 18GB RAM එකට ගැලෙන පරිදි Mistral 4-bit වලිනLoad කිරීම
13
  model_id = "mistralai/Mistral-7B-v0.3"
14
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
- model_id,
17
- torch_dtype=torch.bfloat16,
18
  device_map="auto",
19
- load_in_4bit=True
20
  )
21
 
22
- # API Keys 50 (ELE-PRIME-001 to ELE-PRIME-050)
23
- API_KEYS = {f"ELE-PRIME-{i:03d}": {"credits": 5000} for i in range(1, 51)}
 
 
 
 
 
 
24
 
25
  @app.get("/")
26
- def read_root():
27
- return {"message": "Elephant API Node 2026 is Online"}
28
 
29
  @app.post("/v1/chat")
30
- async def chat(message: dict, x_api_key: str = Header(None)):
31
- if x_api_key not in API_KEYS:
32
- raise HTTPException(status_code=403, detail="Invalid API Key")
33
-
 
34
  user_query = message.get("query", "")
35
 
36
- # Web Search for 2026 Live Data
37
  context = ""
38
- try:
39
- with DDGS() as ddgs:
40
- results = [r['body'] for r in ddgs.text(user_query, max_results=2)]
41
- context = "\n".join(results)
42
- except:
43
- context = "No live data available."
44
 
45
- # Response Generation
46
- input_text = f"Context: {context}\nUser: {user_query}\nAssistant:"
47
- inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
48
- outputs = model.generate(**inputs, max_new_tokens=256)
49
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
 
 
 
 
 
 
50
 
51
- # Learning Loop: ද්ත පසුව Fine-tuning සඳහා Save කිරීම
52
- log_data = {"q": user_query, "a": response, "key": x_api_key}
53
- with open("learning_log.jsonl", "a") as f:
54
- f.write(json.dumps(log_data) + "\n")
55
 
56
- return {"reply": response, "status": "learned"}
 
 
 
 
 
57
 
58
  main = app
 
1
  from fastapi import FastAPI, Header, HTTPException
2
  import torch
3
+ import os
4
  import json
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
6
  from duckduckgo_search import DDGS
7
 
8
  app = FastAPI()
9
 
10
+ # 1. API Keys 50 ලැිස්තුව (Hardcoded for now as requested)
11
+ # Format: ELE-PRIME-001, ELE-PRIME-002 ... ELE-PRIME-050
12
+ API_KEYS_DB = {f"ELE-PRIME-{i:03d}": {"credits": 5000, "status": "active"} for i in range(1, 51)}
13
 
14
+ # 2. GPUවතනවාැයපරීකෂා කිරීම සහ Quantization සැකසීම
15
  model_id = "mistralai/Mistral-7B-v0.3"
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ quant_config = BitsAndBytesConfig(
19
+ load_in_4bit=True,
20
+ bnb_4bit_compute_dtype=torch.bfloat16,
21
+ bnb_4bit_quant_type="nf4",
22
+ bnb_4bit_use_double_quant=True,
23
+ )
24
+
25
+ # 3. මොඩලය Load කිරීම
26
+ print("Loading Elephant Engine (Mistral-7B)...")
27
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
28
  model = AutoModelForCausalLM.from_pretrained(
29
+ model_id,
30
+ quantization_config=quant_config,
31
  device_map="auto",
32
+ token=HF_TOKEN
33
  )
34
 
35
+ # 4. Web Search පහසුකම
36
+ def get_live_data(query):
37
+ try:
38
+ with DDGS() as ddgs:
39
+ results = [r['body'] for r in ddgs.text(query, max_results=3)]
40
+ return "\n".join(results)
41
+ except:
42
+ return ""
43
 
44
  @app.get("/")
45
+ def health_check():
46
+ return {"status": "Elephant API Node 2026 is Active", "keys_loaded": len(API_KEYS_DB)}
47
 
48
  @app.post("/v1/chat")
49
+ async def chat_api(message: dict, x_api_key: str = Header(None)):
50
+ # API Key එක පරීක්ෂා කිරීම
51
+ if x_api_key not in API_KEYS_DB:
52
+ raise HTTPException(status_code=403, detail="Unauthorized: Invalid API Key")
53
+
54
  user_query = message.get("query", "")
55
 
56
+ # 2026 දත්ත සඳහා Web Search කිරීම
57
  context = ""
58
+ if any(word in user_query.lower() for word in ["today", "now", "2026", "news", "current"]):
59
+ context = get_live_data(user_query)
 
 
 
 
60
 
61
+ # Prompt එක සැකසීම
62
+ system_instr = "Current Year: 2026. You are Elephant AI. Use the provided context to answer."
63
+ full_prompt = f"System: {system_instr}\nContext: {context}\nUser: {user_query}\nAssistant:"
64
+
65
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
66
+
67
+ # Response එක Generate කිරීම
68
+ with torch.no_grad():
69
+ output_tokens = model.generate(**inputs, max_new_tokens=300, do_sample=True, temperature=0.7)
70
+
71
+ response = tokenizer.decode(output_tokens[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
72
 
73
+ # Learning Loop: ද්ිය ඉගෙන ගැනීමට දත්ත ලොග් කිරීම
74
+ with open("learning_vault.jsonl", "a") as f:
75
+ log_entry = {"q": user_query, "ctx": context, "ans": response, "key": x_api_key}
76
+ f.write(json.dumps(log_entry) + "\n")
77
 
78
+ return {
79
+ "reply": response,
80
+ "model": "Elephant-Mistral-7B-v0.3",
81
+ "key_id": x_api_key,
82
+ "timestamp": "2026-04-27"
83
+ }
84
 
85
  main = app