MINZO4546 commited on
Commit
9154c39
Β·
verified Β·
1 Parent(s): 1dea909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -50
app.py CHANGED
@@ -2,12 +2,8 @@ from fastapi import FastAPI, Header, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
5
- import os
6
- import json
7
  import re
8
- import uuid
9
  import secrets
10
- import datetime
11
  from transformers import AutoModelForCausalLM, AutoTokenizer
12
  from duckduckgo_search import DDGS
13
 
@@ -20,102 +16,195 @@ app.add_middleware(
20
  allow_headers=["*"],
21
  )
22
 
23
- # --- Database & Config ---
24
- # ΰΆ†ΰΆ»ΰΆΈΰ·ŠΰΆ·ΰΆš Keys
25
  API_KEYS_DB = {
26
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
27
- "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"}
28
  }
29
  ADMIN_SECRET = "MINZO-SECRET-2026"
30
 
31
- # --- AI Model ---
32
  model_id = "AngelSlim/Hy-MT1.5-1.8B-1.25bit"
33
- print(f"πŸ”± INACHI-CORE: Loading {model_id}...")
34
-
35
  tokenizer = AutoTokenizer.from_pretrained(model_id)
36
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
 
 
 
37
 
38
- # --- Data Models ---
39
  class AdminRequest(BaseModel):
40
  admin_pass: str
41
  limit: int = 1000
42
 
43
- # --- API Endpoints ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
45
  @app.get("/")
46
  def home():
47
- return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)}
 
 
 
 
 
48
 
49
- # πŸ”± ΰΆ…ΰΆ½ΰ·”ΰΆ­ΰ·’ΰΆ±ΰ·Š Key ΰΆ‘ΰΆšΰΆšΰ·Š Auto-Generate ࢚ࢻࢱ Endpoint ΰΆ‘ΰΆš
50
  @app.post("/v1/generate-key")
51
  async def generate_key(data: AdminRequest):
52
  if data.admin_pass != ADMIN_SECRET:
53
  raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
54
-
55
- # Random Key ΰΆ‘ΰΆšΰΆšΰ·Š ΰΆ±ΰ·’ΰΆ»ΰ·ŠΰΆΈΰ·ΰΆ«ΰΆΊ ΰΆšΰ·’ΰΆ»ΰ·“ΰΆΈ (ࢋࢯා: ELE-PRIME-X8A2...)
56
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
57
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
58
-
59
  return {
60
  "message": "New Specialist Key Activated",
61
  "api_key": new_key,
62
- "limit": data.limit
63
  }
64
 
 
65
  @app.post("/v1/chat")
66
- async def chat(message: dict, x_api_key: str = Header(None)):
 
67
  if not x_api_key or x_api_key not in API_KEYS_DB:
68
  raise HTTPException(status_code=403, detail="Access Denied")
69
-
70
  key_info = API_KEYS_DB[x_api_key]
71
  if key_info["used"] >= key_info["limit"]:
72
  raise HTTPException(status_code=429, detail="Limit Reached")
73
-
74
- query = message.get("query", "")
75
 
76
- # Web Search
 
 
 
 
77
  context = ""
78
- if any(w in query.lower() for w in ["today", "now", "2026", "ΰΆ…ΰΆ―"]):
79
- try:
80
- with DDGS() as ddgs:
81
- results = list(ddgs.text(query, max_results=2))
82
- context = "\n".join([r['body'] for r in results])
83
- except: pass
84
-
85
- # πŸ”± Language Adaptive System Instruction
 
 
 
 
 
86
  system_instruction = (
87
  "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
88
- "Respond in the language used by the user (Sinhala or English). "
89
- f"Real-time Context: {context}"
 
90
  )
 
 
 
 
 
 
91
 
 
92
  msgs = [
93
- {"role": "system", "content": system_instruction},
94
- {"role": "user", "content": query}
95
  ]
96
-
97
- text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
 
 
 
98
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
99
 
100
  with torch.no_grad():
101
  outputs = model.generate(
102
- inputs.input_ids,
103
  max_new_tokens=512,
104
- temperature=0.6,
105
  top_p=0.9,
106
  do_sample=True,
107
- pad_token_id=tokenizer.eos_token_id
108
  )
109
-
110
- full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
111
- ans = full_response.split("assistant")[-1].strip()
112
-
113
- # Cleaning Logic
114
- if "</think>" in ans: ans = ans.split("</think>")[-1].strip()
115
- ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
116
- ans = re.sub(r' +', ' ', ans).strip()
117
 
 
 
 
 
 
 
 
 
 
 
118
  API_KEYS_DB[x_api_key]["used"] += 1
119
- return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
120
 
 
 
 
 
 
 
 
 
 
121
  main = app
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
 
 
5
  import re
 
6
  import secrets
 
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from duckduckgo_search import DDGS
9
 
 
16
  allow_headers=["*"],
17
  )
18
 
19
+ # ── API Keys Database ──
 
20
  API_KEYS_DB = {
21
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
22
+ "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"},
23
  }
24
  ADMIN_SECRET = "MINZO-SECRET-2026"
25
 
26
+ # ── Load AI Model ──
27
  model_id = "AngelSlim/Hy-MT1.5-1.8B-1.25bit"
28
+ print(f"Loading {model_id} ...")
 
29
  tokenizer = AutoTokenizer.from_pretrained(model_id)
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ model_id, torch_dtype="auto", device_map="cpu"
32
+ )
33
+ print("Model loaded.")
34
 
35
+ # ── Pydantic Models ──
36
  class AdminRequest(BaseModel):
37
  admin_pass: str
38
  limit: int = 1000
39
 
40
+ class ChatRequest(BaseModel):
41
+ query: str
42
+ search: bool = True # client can disable search per-request
43
+ max_results: int = 3 # how many DDG results to inject
44
+
45
+ # ──────────────────────────────────────
46
+ # REAL-TIME WEB SEARCH HELPER
47
+ # ──────────────────────────────────────
48
+ def web_search(query: str, max_results: int = 3) -> str:
49
+ """
50
+ Search DuckDuckGo and return formatted context string.
51
+ Returns empty string on failure so the model still responds.
52
+ """
53
+ try:
54
+ with DDGS() as ddgs:
55
+ results = list(
56
+ ddgs.text(
57
+ query,
58
+ max_results=max_results,
59
+ safesearch="moderate",
60
+ timelimit=None, # no time limit β†’ more results
61
+ )
62
+ )
63
+ if not results:
64
+ return ""
65
+
66
+ lines = ["[WEB SEARCH RESULTS β€” Real-time]"]
67
+ for i, r in enumerate(results, 1):
68
+ title = r.get("title", "").strip()
69
+ body = r.get("body", "").strip()
70
+ href = r.get("href", "").strip()
71
+ lines.append(f"\n{i}. {title}\n {body}\n Source: {href}")
72
+ lines.append("\n[END OF SEARCH RESULTS]")
73
+ return "\n".join(lines)
74
+
75
+ except Exception as e:
76
+ print(f"[DDG search error] {e}")
77
+ return ""
78
+
79
+
80
+ # ── Decide whether to search ──
81
+ def should_search(query: str) -> bool:
82
+ """
83
+ Always search unless the query is clearly a pure code/math task
84
+ with no factual component. This keeps it simple and reliable.
85
+ """
86
+ no_search_patterns = [
87
+ r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
88
+ r"^\s*explain\s+(this\s+)?(code|function|snippet)",
89
+ r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$", # simple definitions
90
+ ]
91
+ q = query.lower().strip()
92
+ for pat in no_search_patterns:
93
+ if re.match(pat, q, re.I):
94
+ return False
95
+ return True # search by default for everything else
96
+
97
 
98
+ # ──────────────────────────────────────
99
+ # ENDPOINTS
100
+ # ──────────────────────────────────────
101
  @app.get("/")
102
  def home():
103
+ return {
104
+ "status": "Elephant Pro Active",
105
+ "active_keys": len(API_KEYS_DB),
106
+ "search": "DuckDuckGo real-time",
107
+ }
108
+
109
 
 
110
  @app.post("/v1/generate-key")
111
  async def generate_key(data: AdminRequest):
112
  if data.admin_pass != ADMIN_SECRET:
113
  raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
114
+
 
115
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
116
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
117
+
118
  return {
119
  "message": "New Specialist Key Activated",
120
  "api_key": new_key,
121
+ "limit": data.limit,
122
  }
123
 
124
+
125
  @app.post("/v1/chat")
126
+ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
127
+ # ── Auth ──
128
  if not x_api_key or x_api_key not in API_KEYS_DB:
129
  raise HTTPException(status_code=403, detail="Access Denied")
130
+
131
  key_info = API_KEYS_DB[x_api_key]
132
  if key_info["used"] >= key_info["limit"]:
133
  raise HTTPException(status_code=429, detail="Limit Reached")
 
 
134
 
135
+ query = message.query.strip()
136
+ if not query:
137
+ raise HTTPException(status_code=400, detail="Empty query")
138
+
139
+ # ── Real-time Web Search ──
140
  context = ""
141
+ search_used = False
142
+
143
+ if message.search and should_search(query):
144
+ print(f"[SEARCH] Querying DDG: {query[:80]}")
145
+ context = web_search(query, max_results=message.max_results)
146
+ if context:
147
+ search_used = True
148
+ print(f"[SEARCH] Got {message.max_results} results.")
149
+ else:
150
+ print("[SEARCH] No results returned.")
151
+
152
+ # ── System Prompt ──
153
+ today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
154
  system_instruction = (
155
  "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
156
+ "Respond in the same language the user uses (Sinhala or English). "
157
+ "Be concise, accurate, and helpful. "
158
+ f"Current UTC date/time: {today}. "
159
  )
160
+ if search_used:
161
+ system_instruction += (
162
+ "\nYou have been given real-time web search results below. "
163
+ "Use them to answer accurately. Always cite the source URL when referencing search results.\n"
164
+ + context
165
+ )
166
 
167
+ # ── Build Messages ──
168
  msgs = [
169
+ {"role": "system", "content": system_instruction},
170
+ {"role": "user", "content": query},
171
  ]
172
+
173
+ # ── Tokenize & Generate ──
174
+ text = tokenizer.apply_chat_template(
175
+ msgs, tokenize=False, add_generation_prompt=True
176
+ )
177
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
178
 
179
  with torch.no_grad():
180
  outputs = model.generate(
181
+ inputs.input_ids,
182
  max_new_tokens=512,
183
+ temperature=0.6,
184
  top_p=0.9,
185
  do_sample=True,
186
+ pad_token_id=tokenizer.eos_token_id,
187
  )
 
 
 
 
 
 
 
 
188
 
189
+ full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
190
+
191
+ # ── Clean Output ──
192
+ ans = full_response.split("assistant")[-1].strip()
193
+ if "</think>" in ans:
194
+ ans = ans.split("</think>")[-1].strip()
195
+ ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
196
+ ans = re.sub(r" +", " ", ans).strip()
197
+
198
+ # ── Update Usage ──
199
  API_KEYS_DB[x_api_key]["used"] += 1
 
200
 
201
+ return {
202
+ "reply": ans,
203
+ "search_used": search_used,
204
+ "usage": API_KEYS_DB[x_api_key]["used"],
205
+ "limit": key_info["limit"],
206
+ }
207
+
208
+
209
+ # HuggingFace Spaces entrypoint
210
  main = app