MINZO4546 commited on
Commit
f0868c6
Β·
verified Β·
1 Parent(s): 2bac2fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -103
app.py CHANGED
@@ -4,8 +4,8 @@ from pydantic import BaseModel
4
  import torch
5
  import re
6
  import secrets
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
8
- from duckduckgo_search import DDGS
9
 
10
  app = FastAPI()
11
 
@@ -16,27 +16,35 @@ app.add_middleware(
16
  allow_headers=["*"],
17
  )
18
 
19
- # ── API Keys Database ──
20
  API_KEYS_DB = {
21
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
22
  "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"},
23
  }
24
  ADMIN_SECRET = "MINZO-SECRET-2026"
25
 
26
- # ── Load AI Model ──
27
- model_id = "google/gemma-4-E4B-it"
28
- print(f"Loading {model_id} ...")
29
- tokenizer = AutoTokenizer.from_pretrained(
30
- model_id,
31
- trust_remote_code=True,
 
 
 
 
 
 
32
  )
 
 
33
  model = AutoModelForCausalLM.from_pretrained(
34
  model_id,
35
- torch_dtype="auto",
36
- device_map="cpu",
37
- trust_remote_code=True,
38
  )
39
- print("Model loaded.")
40
 
41
  # ── Pydantic Models ──
42
  class AdminRequest(BaseModel):
@@ -45,61 +53,55 @@ class AdminRequest(BaseModel):
45
 
46
  class ChatRequest(BaseModel):
47
  query: str
48
- search: bool = True # client can disable search per-request
49
- max_results: int = 3 # how many DDG results to inject
50
 
51
  # ──────────────────────────────────────
52
- # REAL-TIME WEB SEARCH HELPER
53
  # ──────────────────────────────────────
54
- def web_search(query: str, max_results: int = 3) -> str:
55
  """
56
- Search DuckDuckGo and return formatted context string.
57
- Returns empty string on failure so the model still responds.
58
  """
 
 
 
 
 
 
 
59
  try:
60
- with DDGS() as ddgs:
61
- results = list(
62
- ddgs.text(
63
- query,
64
- max_results=max_results,
65
- safesearch="moderate",
66
- timelimit=None, # no time limit β†’ more results
67
- )
68
- )
69
  if not results:
70
  return ""
71
 
72
- lines = ["[WEB SEARCH RESULTS β€” Real-time]"]
73
  for i, r in enumerate(results, 1):
74
- title = r.get("title", "").strip()
75
- body = r.get("body", "").strip()
76
- href = r.get("href", "").strip()
77
- lines.append(f"\n{i}. {title}\n {body}\n Source: {href}")
78
  lines.append("\n[END OF SEARCH RESULTS]")
79
  return "\n".join(lines)
80
 
81
  except Exception as e:
82
- print(f"[DDG search error] {e}")
83
  return ""
84
 
85
-
86
  # ── Decide whether to search ──
87
  def should_search(query: str) -> bool:
88
- """
89
- Always search unless the query is clearly a pure code/math task
90
- with no factual component. This keeps it simple and reliable.
91
- """
92
  no_search_patterns = [
93
  r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
94
  r"^\s*explain\s+(this\s+)?(code|function|snippet)",
95
- r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$", # simple definitions
96
  ]
97
  q = query.lower().strip()
98
  for pat in no_search_patterns:
99
  if re.match(pat, q, re.I):
100
  return False
101
- return True # search by default for everything else
102
-
103
 
104
  # ──────────────────────────────────────
105
  # ENDPOINTS
@@ -107,30 +109,21 @@ def should_search(query: str) -> bool:
107
  @app.get("/")
108
  def home():
109
  return {
110
- "status": "Elephant Pro Active",
111
  "active_keys": len(API_KEYS_DB),
112
- "search": "DuckDuckGo real-time",
113
  }
114
 
115
-
116
  @app.post("/v1/generate-key")
117
  async def generate_key(data: AdminRequest):
118
  if data.admin_pass != ADMIN_SECRET:
119
  raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
120
-
121
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
122
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
123
-
124
- return {
125
- "message": "New Specialist Key Activated",
126
- "api_key": new_key,
127
- "limit": data.limit,
128
- }
129
-
130
 
131
  @app.post("/v1/chat")
132
  async def chat(message: ChatRequest, x_api_key: str = Header(None)):
133
- # ── Auth ──
134
  if not x_api_key or x_api_key not in API_KEYS_DB:
135
  raise HTTPException(status_code=403, detail="Access Denied")
136
 
@@ -139,78 +132,41 @@ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
139
  raise HTTPException(status_code=429, detail="Limit Reached")
140
 
141
  query = message.query.strip()
142
- if not query:
143
- raise HTTPException(status_code=400, detail="Empty query")
144
-
145
- # ── Real-time Web Search ──
146
  context = ""
147
  search_used = False
148
 
149
  if message.search and should_search(query):
150
- print(f"[SEARCH] Querying DDG: {query[:80]}")
151
- context = web_search(query, max_results=message.max_results)
152
  if context:
153
  search_used = True
154
- print(f"[SEARCH] Got {message.max_results} results.")
155
- else:
156
- print("[SEARCH] No results returned.")
157
 
158
- # ── System Prompt ──
159
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
160
  system_instruction = (
161
  "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
162
- "Respond in the same language the user uses (Sinhala or English). "
163
- "Be concise, accurate, and helpful. "
164
- f"Current UTC date/time: {today}. "
165
  )
166
  if search_used:
167
- system_instruction += (
168
- "\nYou have been given real-time web search results below. "
169
- "Use them to answer accurately. Always cite the source URL when referencing search results.\n"
170
- + context
171
- )
172
 
173
- # ── Build Messages ──
174
  msgs = [
175
- {"role": "system", "content": system_instruction},
176
- {"role": "user", "content": query},
177
  ]
178
 
179
- # ── Tokenize & Generate ──
180
- text = tokenizer.apply_chat_template(
181
- msgs, tokenize=False, add_generation_prompt=True
182
- )
183
- inputs = tokenizer([text], return_tensors="pt").to("cpu")
184
 
185
  with torch.no_grad():
186
  outputs = model.generate(
187
- inputs.input_ids,
188
  max_new_tokens=512,
189
  temperature=0.6,
190
- top_p=0.9,
191
  do_sample=True,
192
  pad_token_id=tokenizer.eos_token_id,
193
  )
194
 
195
- full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
196
-
197
- # ── Clean Output ──
198
- ans = full_response.split("assistant")[-1].strip()
199
- if "</think>" in ans:
200
- ans = ans.split("</think>")[-1].strip()
201
- ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
202
- ans = re.sub(r" +", " ", ans).strip()
203
-
204
- # ── Update Usage ──
205
  API_KEYS_DB[x_api_key]["used"] += 1
206
-
207
- return {
208
- "reply": ans,
209
- "search_used": search_used,
210
- "usage": API_KEYS_DB[x_api_key]["used"],
211
- "limit": key_info["limit"],
212
- }
213
-
214
-
215
- # HuggingFace Spaces entrypoint
216
- main = app
 
4
  import torch
5
  import re
6
  import secrets
7
+ import requests # Google Search API ΰΆ‘ΰΆšΰΆ§ ΰΆ…ΰ·€ΰ·ΰ·Šβ€ΰΆΊΰΆΊΰ·’
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
 
10
  app = FastAPI()
11
 
 
16
  allow_headers=["*"],
17
  )
18
 
19
+ # ── API Keys & Config ──
20
  API_KEYS_DB = {
21
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
22
  "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"},
23
  }
24
  ADMIN_SECRET = "MINZO-SECRET-2026"
25
 
26
+ # ── Google Search Config ──
27
+ # Specialist, ࢸේ ΰΆ―ΰ·™ΰΆš ΰΆ”ΰΆΊΰ·ΰΆœΰ·š Google Cloud Console ΰΆ‘ΰΆšΰ·™ΰΆ±ΰ·Š ΰΆ…ΰΆ»ΰΆ±ΰ·Š ΰΆΈΰ·™ΰΆ­ΰΆ±ΰΆ§ ࢯාࢱ්ࢱ
28
+ GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
29
+ GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
30
+
31
+ # ── Load AI Model with 4-bit Quantization ──
32
+ model_id = "google/gemma-2-9b-it" # Gemma-2-9B ΰΆ‘ΰΆš ΰΆ΄ΰ·ΰ·€ΰ·’ΰΆ ΰ·ŠΰΆ ΰ·’ ΰΆšΰΆ»ΰΆΈΰ·”
33
+ print(f"Loading {model_id} with 4-bit quantization...")
34
+
35
+ quant_config = BitsAndBytesConfig(
36
+ load_in_4bit=True,
37
+ bnb_4bit_compute_dtype=torch.bfloat16
38
  )
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_id,
43
+ quantization_config=quant_config,
44
+ device_map="auto", # 18GB Space ΰΆ‘ΰΆšΰ·š GPU ΰΆ‘ΰΆšΰΆ§ auto load ΰ·€ΰ·™ΰΆΊΰ·’
45
+ trust_remote_code=True
46
  )
47
+ print("Model loaded and optimized.")
48
 
49
  # ── Pydantic Models ──
50
  class AdminRequest(BaseModel):
 
53
 
54
  class ChatRequest(BaseModel):
55
  query: str
56
+ search: bool = True
57
+ max_results: int = 3
58
 
59
  # ──────────────────────────────────────
60
+ # GOOGLE REAL-TIME WEB SEARCH HELPER
61
  # ──────────────────────────────────────
62
+ def google_search(query: str, max_results: int = 3) -> str:
63
  """
64
+ Search Google and return formatted context string.
 
65
  """
66
+ url = "https://www.googleapis.com/customsearch/v1"
67
+ params = {
68
+ "q": query,
69
+ "key": GOOGLE_API_KEY,
70
+ "cx": GOOGLE_CX,
71
+ "num": max_results
72
+ }
73
  try:
74
+ response = requests.get(url, params=params)
75
+ results = response.json().get("items", [])
76
+
 
 
 
 
 
 
77
  if not results:
78
  return ""
79
 
80
+ lines = ["[GOOGLE SEARCH RESULTS β€” Real-time]"]
81
  for i, r in enumerate(results, 1):
82
+ title = r.get("title", "").strip()
83
+ snippet = r.get("snippet", "").strip()
84
+ link = r.get("link", "").strip()
85
+ lines.append(f"\n{i}. {title}\n {snippet}\n Source: {link}")
86
  lines.append("\n[END OF SEARCH RESULTS]")
87
  return "\n".join(lines)
88
 
89
  except Exception as e:
90
+ print(f"[Google search error] {e}")
91
  return ""
92
 
 
93
  # ── Decide whether to search ──
94
  def should_search(query: str) -> bool:
 
 
 
 
95
  no_search_patterns = [
96
  r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
97
  r"^\s*explain\s+(this\s+)?(code|function|snippet)",
98
+ r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$",
99
  ]
100
  q = query.lower().strip()
101
  for pat in no_search_patterns:
102
  if re.match(pat, q, re.I):
103
  return False
104
+ return True
 
105
 
106
  # ──────────────────────────────────────
107
  # ENDPOINTS
 
109
  @app.get("/")
110
  def home():
111
  return {
112
+ "status": "Inachi-Core Active",
113
  "active_keys": len(API_KEYS_DB),
114
+ "search": "Google Real-time",
115
  }
116
 
 
117
  @app.post("/v1/generate-key")
118
  async def generate_key(data: AdminRequest):
119
  if data.admin_pass != ADMIN_SECRET:
120
  raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
 
121
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
122
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
123
+ return {"api_key": new_key, "limit": data.limit}
 
 
 
 
 
 
124
 
125
  @app.post("/v1/chat")
126
  async def chat(message: ChatRequest, x_api_key: str = Header(None)):
 
127
  if not x_api_key or x_api_key not in API_KEYS_DB:
128
  raise HTTPException(status_code=403, detail="Access Denied")
129
 
 
132
  raise HTTPException(status_code=429, detail="Limit Reached")
133
 
134
  query = message.query.strip()
 
 
 
 
135
  context = ""
136
  search_used = False
137
 
138
  if message.search and should_search(query):
139
+ context = google_search(query, max_results=message.max_results)
 
140
  if context:
141
  search_used = True
 
 
 
142
 
 
143
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
144
  system_instruction = (
145
  "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
146
+ "Respond in the same language the user uses. "
147
+ f"Current date: {today}. "
 
148
  )
149
  if search_used:
150
+ system_instruction += "\nUse these web results to answer accurately:\n" + context
 
 
 
 
151
 
 
152
  msgs = [
153
+ {"role": "system", "content": system_instruction},
154
+ {"role": "user", "content": query},
155
  ]
156
 
157
+ text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
158
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
 
 
159
 
160
  with torch.no_grad():
161
  outputs = model.generate(
162
+ **inputs,
163
  max_new_tokens=512,
164
  temperature=0.6,
 
165
  do_sample=True,
166
  pad_token_id=tokenizer.eos_token_id,
167
  )
168
 
169
+ ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
170
+
 
 
 
 
 
 
 
 
171
  API_KEYS_DB[x_api_key]["used"] += 1
172
+ return {"reply": ans, "search_used": search_used, "usage": API_KEYS_DB[x_api_key]["used"]}