MINZO4546 commited on
Commit
5cac8e2
·
verified ·
1 Parent(s): 2fb9a37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -33
app.py CHANGED
@@ -3,14 +3,12 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
5
  import os
6
- import json
7
- import datetime
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  from duckduckgo_search import DDGS
10
 
 
11
  app = FastAPI()
12
 
13
- # CORS Fix for Dashboard connectivity
14
  app.add_middleware(
15
  CORSMiddleware,
16
  allow_origins=["*"],
@@ -23,20 +21,20 @@ API_KEYS_DB = {
23
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"}
24
  }
25
  ADMIN_SECRET = "MINZO-SECRET-2026"
26
- LEARNING_VAULT = "neural_learning_data.jsonl"
27
 
28
- # --- AI Model (Gemma-3-1B-it Upgrade) ---
29
- # Specialist, අපි මෙතනදී bfloat16 පාවිච්චි කරනවා ඔයාගේ 18GB RAM එකෙන් 3GB විතරක් වැය වෙන්න.
30
  model_id = "google/gemma-3-1b-it"
31
  HF_TOKEN = os.getenv("HF_TOKEN")
32
 
33
  print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
34
 
35
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
 
 
36
  model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
- torch_dtype=torch.bfloat16,
39
- device_map="cpu", # CPU එකේ ඉතාම වේගයෙන් මේක වැඩ කරයි
40
  token=HF_TOKEN
41
  )
42
 
@@ -49,7 +47,7 @@ class KeyRequest(BaseModel):
49
  # --- API Endpoints ---
50
  @app.get("/")
51
  def home():
52
- return {"status": "Elephant Pro Active (Gemma 3)", "keys": len(API_KEYS_DB)}
53
 
54
  @app.post("/admin/add-key")
55
  async def add_key(data: KeyRequest):
@@ -65,33 +63,31 @@ async def get_usage(x_api_key: str = Header(None)):
65
  info = API_KEYS_DB[x_api_key]
66
  return {
67
  "used": info["used"],
68
- "limit": info["limit"],
69
- "percentage": (info["used"] / info["limit"]) * 100 if info["limit"] > 0 else 0
70
  }
71
 
72
  @app.post("/v1/chat")
73
  async def chat(message: dict, x_api_key: str = Header(None)):
74
  if not x_api_key or x_api_key not in API_KEYS_DB:
75
- raise HTTPException(status_code=403, detail="Access Denied")
76
 
77
  key_info = API_KEYS_DB[x_api_key]
78
  if key_info["used"] >= key_info["limit"]:
79
  raise HTTPException(status_code=429, detail="Limit Reached")
80
 
81
  query = message.get("query", "")
82
-
83
  # 2026 Web Search Logic
84
  context = ""
85
- if any(w in query.lower() for w in ["today", "now", "2026", "current"]):
86
- try:
87
  with DDGS() as ddgs:
88
  context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
89
- except: pass
90
 
91
- # --- Gemma 3 Inference ---
92
- # Identity එක සහ Context එක System message එකට එකතු කළා
93
  msgs = [
94
- {"role": "system", "content": f"You are Elephant AI by MINZO-PRIME. 2026 mode enabled. Context: {context}"},
95
  {"role": "user", "content": query}
96
  ]
97
 
@@ -99,20 +95,10 @@ async def chat(message: dict, x_api_key: str = Header(None)):
99
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
100
 
101
  with torch.no_grad():
102
- ids = model.generate(
103
- inputs.input_ids,
104
- max_new_tokens=450, # ටිකක් වැඩිපුර tokens ලබා දුන්නා හොඳ පිළිතුරක් සඳහා
105
- temperature=0.7,
106
- do_sample=True
107
- )
108
- # Gemma 3 හි Output එක පිරිසිදු කිරීම
109
- full_response = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
110
- ans = full_response.split("model")[-1].strip()
111
 
112
- # Update Stats
113
  API_KEYS_DB[x_api_key]["used"] += 1
114
- return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"], "model": "Gemma-3-1B"}
115
 
116
- if __name__ == "__main__":
117
- import uvicorn
118
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
3
  from pydantic import BaseModel
4
  import torch
5
  import os
 
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from duckduckgo_search import DDGS
8
 
9
+ # 🔱 Server එකට "app" කියන නමම අවශ්‍යයි
10
  app = FastAPI()
11
 
 
12
  app.add_middleware(
13
  CORSMiddleware,
14
  allow_origins=["*"],
 
21
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"}
22
  }
23
  ADMIN_SECRET = "MINZO-SECRET-2026"
 
24
 
25
+ # --- AI Model (Gemma-3-1B-it) ---
 
26
  model_id = "google/gemma-3-1b-it"
27
  HF_TOKEN = os.getenv("HF_TOKEN")
28
 
29
  print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
32
+
33
+ # 🔱 [Fix] torch_dtype වෙනුවට dtype පාවිච්චි කරන ලදී
34
  model = AutoModelForCausalLM.from_pretrained(
35
  model_id,
36
+ dtype=torch.bfloat16,
37
+ device_map="cpu",
38
  token=HF_TOKEN
39
  )
40
 
 
47
  # --- API Endpoints ---
48
  @app.get("/")
49
  def home():
50
+ return {"status": "Elephant Pro Active", "model": "Gemma-3-1B"}
51
 
52
  @app.post("/admin/add-key")
53
  async def add_key(data: KeyRequest):
 
63
  info = API_KEYS_DB[x_api_key]
64
  return {
65
  "used": info["used"],
66
+ "limit": info["limit"]
 
67
  }
68
 
69
  @app.post("/v1/chat")
70
  async def chat(message: dict, x_api_key: str = Header(None)):
71
  if not x_api_key or x_api_key not in API_KEYS_DB:
72
+ raise HTTPException(status_code=403)
73
 
74
  key_info = API_KEYS_DB[x_api_key]
75
  if key_info["used"] >= key_info["limit"]:
76
  raise HTTPException(status_code=429, detail="Limit Reached")
77
 
78
  query = message.get("query", "")
79
+
80
  # 2026 Web Search Logic
81
  context = ""
82
+ try:
83
+ if any(w in query.lower() for w in ["today", "now", "2026"]):
84
  with DDGS() as ddgs:
85
  context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
86
+ except: pass
87
 
88
+ # AI Inference
 
89
  msgs = [
90
+ {"role": "system", "content": f"Elephant AI by MINZO-PRIME. 2026 Edition. Context: {context}"},
91
  {"role": "user", "content": query}
92
  ]
93
 
 
95
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
96
 
97
  with torch.no_grad():
98
+ ids = model.generate(inputs.input_ids, max_new_tokens=300, temperature=0.7, do_sample=True)
99
+ full_ans = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
100
+ ans = full_ans.split("model")[-1].strip()
 
 
 
 
 
 
101
 
 
102
  API_KEYS_DB[x_api_key]["used"] += 1
103
+ return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
104