MINZO4546 commited on
Commit
4e68fa6
·
verified ·
1 Parent(s): 7948022

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -21
app.py CHANGED
@@ -25,11 +25,20 @@ API_KEYS_DB = {
25
  ADMIN_SECRET = "MINZO-SECRET-2026"
26
  LEARNING_VAULT = "neural_learning_data.jsonl"
27
 
28
- # --- AI Model (Qwen-2.5-1.5B) ---
29
- model_id = "Qwen/Qwen2.5-1.5B-Instruct"
30
- print("🐘 Elephant Node v3.7 Loading...")
31
- tokenizer = AutoTokenizer.from_pretrained(model_id)
32
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
 
 
 
 
 
 
 
 
 
33
 
34
  # --- Data Models ---
35
  class KeyRequest(BaseModel):
@@ -38,10 +47,9 @@ class KeyRequest(BaseModel):
38
  limit: int = 100
39
 
40
  # --- API Endpoints ---
41
-
42
  @app.get("/")
43
  def home():
44
- return {"status": "Elephant Pro Active", "keys": len(API_KEYS_DB)}
45
 
46
  @app.post("/admin/add-key")
47
  async def add_key(data: KeyRequest):
@@ -52,7 +60,6 @@ async def add_key(data: KeyRequest):
52
 
53
  @app.get("/v1/usage")
54
  async def get_usage(x_api_key: str = Header(None)):
55
- """Key එකේ පාවිච්චිය පරීක්ෂා කිරීමේ Endpoint එක"""
56
  if not x_api_key or x_api_key not in API_KEYS_DB:
57
  raise HTTPException(status_code=403, detail="Invalid Key")
58
  info = API_KEYS_DB[x_api_key]
@@ -64,34 +71,48 @@ async def get_usage(x_api_key: str = Header(None)):
64
 
65
  @app.post("/v1/chat")
66
  async def chat(message: dict, x_api_key: str = Header(None)):
67
- if x_api_key not in API_KEYS_DB:
68
- raise HTTPException(status_code=403)
69
-
70
  key_info = API_KEYS_DB[x_api_key]
71
  if key_info["used"] >= key_info["limit"]:
72
  raise HTTPException(status_code=429, detail="Limit Reached")
73
-
74
- query = message.get("query", "")
75
 
 
 
76
  # 2026 Web Search Logic
77
  context = ""
78
- if any(w in query.lower() for w in ["today", "now", "2026"]):
79
  try:
80
  with DDGS() as ddgs:
81
  context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
82
  except: pass
83
 
84
- # AI Inference
85
- msgs = [{"role": "system", "content": f"Elephant AI. 2026 mode. Context: {context}"}, {"role": "user", "content": query}]
 
 
 
 
 
86
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
87
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
88
-
89
  with torch.no_grad():
90
- ids = model.generate(inputs.input_ids, max_new_tokens=256)
91
- ans = tokenizer.batch_decode(ids, skip_special_tokens=True)[0].split("assistant")[-1].strip()
 
 
 
 
 
 
 
92
 
93
  # Update Stats
94
  API_KEYS_DB[x_api_key]["used"] += 1
95
- return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
96
 
97
- main = app
 
 
 
25
  ADMIN_SECRET = "MINZO-SECRET-2026"
26
  LEARNING_VAULT = "neural_learning_data.jsonl"
27
 
28
+ # --- AI Model (Gemma-3-1B-it Upgrade) ---
29
+ # Specialist, අපි මෙතනදී bfloat16 පාවිච්චි කරනවා ඔයාගේ 18GB RAM එකෙන් 3GB විතරක් වැය වෙන්න.
30
+ model_id = "google/gemma-3-1b-it"
31
+ HF_TOKEN = os.getenv("HF_TOKEN")
32
+
33
+ print(f"🐘 Elephant Node v3.7 Loading: {model_id}...")
34
+
35
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ model_id,
38
+ torch_dtype=torch.bfloat16,
39
+ device_map="cpu", # CPU එකේ ඉතාම වේගයෙන් මේක වැඩ කරයි
40
+ token=HF_TOKEN
41
+ )
42
 
43
  # --- Data Models ---
44
  class KeyRequest(BaseModel):
 
47
  limit: int = 100
48
 
49
  # --- API Endpoints ---
 
50
  @app.get("/")
51
  def home():
52
+ return {"status": "Elephant Pro Active (Gemma 3)", "keys": len(API_KEYS_DB)}
53
 
54
  @app.post("/admin/add-key")
55
  async def add_key(data: KeyRequest):
 
60
 
61
  @app.get("/v1/usage")
62
  async def get_usage(x_api_key: str = Header(None)):
 
63
  if not x_api_key or x_api_key not in API_KEYS_DB:
64
  raise HTTPException(status_code=403, detail="Invalid Key")
65
  info = API_KEYS_DB[x_api_key]
 
71
 
72
  @app.post("/v1/chat")
73
  async def chat(message: dict, x_api_key: str = Header(None)):
74
+ if not x_api_key or x_api_key not in API_KEYS_DB:
75
+ raise HTTPException(status_code=403, detail="Access Denied")
76
+
77
  key_info = API_KEYS_DB[x_api_key]
78
  if key_info["used"] >= key_info["limit"]:
79
  raise HTTPException(status_code=429, detail="Limit Reached")
 
 
80
 
81
+ query = message.get("query", "")
82
+
83
  # 2026 Web Search Logic
84
  context = ""
85
+ if any(w in query.lower() for w in ["today", "now", "2026", "current"]):
86
  try:
87
  with DDGS() as ddgs:
88
  context = "\n".join([r['body'] for r in ddgs.text(query, max_results=2)])
89
  except: pass
90
 
91
+ # --- Gemma 3 Inference ---
92
+ # Identity එක සහ Context එක System message එකට එකතු කළා
93
+ msgs = [
94
+ {"role": "system", "content": f"You are Elephant AI by MINZO-PRIME. 2026 mode enabled. Context: {context}"},
95
+ {"role": "user", "content": query}
96
+ ]
97
+
98
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
99
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
100
+
101
  with torch.no_grad():
102
+ ids = model.generate(
103
+ inputs.input_ids,
104
+ max_new_tokens=450, # ටිකක් වැඩිපුර tokens ලබා දුන්නා හොඳ පිළිතුරක් සඳහා
105
+ temperature=0.7,
106
+ do_sample=True
107
+ )
108
+ # Gemma 3 හි Output එක පිරිසිදු කිරීම
109
+ full_response = tokenizer.batch_decode(ids, skip_special_tokens=True)[0]
110
+ ans = full_response.split("model")[-1].strip()
111
 
112
  # Update Stats
113
  API_KEYS_DB[x_api_key]["used"] += 1
114
+ return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"], "model": "Gemma-3-1B"}
115
 
116
+ if __name__ == "__main__":
117
+ import uvicorn
118
+ uvicorn.run(app, host="0.0.0.0", port=7860)