MINZO4546 commited on
Commit
c8bfde7
Β·
verified Β·
1 Parent(s): 749df63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -77
app.py CHANGED
@@ -7,7 +7,7 @@ import secrets
7
  import requests
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
- # Hugging Face ΰ·ƒΰΆ»ΰ·Šΰ·€ΰΆ»ΰ·Š ΰΆ‘ΰΆš 'main' ࢽෝࢩ් ΰΆšΰ·’ΰΆ»ΰ·“ΰΆΈΰΆ§ ΰΆ‹ΰΆ­ΰ·Šΰ·ƒΰ·ΰ·„ ࢚ࢻࢱ ΰΆΆΰ·ΰ·€ΰ·’ΰΆ±ΰ·Š ΰΆΈΰ·™ΰΆΊ ΰΆΈΰ·™ΰ·ƒΰ·š ࢱࢸ් ࢚ࢻࢱ ΰΆ½ΰΆ―ΰ·“.
11
  main = FastAPI()
12
 
13
  main.add_middleware(
@@ -25,29 +25,21 @@ API_KEYS_DB = {
25
  ADMIN_SECRET = "MINZO-SECRET-2026"
26
 
27
  # ── Google Search Config ──
28
- # MINZO-PRIME, ΰΆ”ΰΆΊΰ·ΰΆœΰ·š Keys ΰΆΈΰ·™ΰΆ­ΰΆ±ΰΆ§ ΰΆ‡ΰΆ­ΰ·”ΰ·…ΰΆ­ΰ·Š ࢚ࢻࢱ්ࢱ.
29
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
30
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
31
 
32
  # ── Load AI Model for CPU ──
33
  model_id = "google/gemma-2-9b-it"
34
- print(f"Loading {model_id} on CPU (Optimized for 18GB RAM)...")
35
 
36
  tokenizer = AutoTokenizer.from_pretrained(model_id)
37
-
38
- # 'torch_dtype' ΰ·€ΰ·™ΰΆ±ΰ·”ΰ·€ΰΆ§ ΰΆ…ΰΆ½ΰ·”ΰΆ­ΰ·Š 'dtype' ࢴࢻාࢸිࢭිࢺ ࢷාවිࢭා ࢚ࢻ ΰΆ‡ΰΆ­.
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
41
  dtype=torch.bfloat16,
42
  device_map="cpu",
43
  trust_remote_code=True
44
  )
45
- print("Model loaded on CPU successfully.")
46
-
47
- # ── Pydantic Models ──
48
- class AdminRequest(BaseModel):
49
- admin_pass: str
50
- limit: int = 1000
51
 
52
  class ChatRequest(BaseModel):
53
  query: str
@@ -55,98 +47,55 @@ class ChatRequest(BaseModel):
55
  max_results: int = 3
56
 
57
  # ──────────────────────────────────────
58
- # GOOGLE REAL-TIME WEB SEARCH HELPER
59
  # ──────────────────────────────────────
60
  def google_search(query: str, max_results: int = 3) -> str:
61
  url = "https://www.googleapis.com/customsearch/v1"
62
- params = {
63
- "q": query,
64
- "key": GOOGLE_API_KEY,
65
- "cx": GOOGLE_CX,
66
- "num": max_results
67
- }
68
  try:
69
  response = requests.get(url, params=params)
70
  results = response.json().get("items", [])
71
-
72
- if not results:
73
- return ""
74
-
75
- lines = ["[GOOGLE SEARCH RESULTS β€” Real-time]"]
76
  for i, r in enumerate(results, 1):
77
- title = r.get("title", "").strip()
78
- snippet = r.get("snippet", "").strip()
79
- link = r.get("link", "").strip()
80
- lines.append(f"\n{i}. {title}\n {snippet}\n Source: {link}")
81
- lines.append("\n[END OF SEARCH RESULTS]")
82
  return "\n".join(lines)
83
- except Exception as e:
84
- print(f"[Google search error] {e}")
85
- return ""
86
-
87
- def should_search(query: str) -> bool:
88
- no_search_patterns = [
89
- r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
90
- r"^\s*explain\s+(this\s+)?(code|function|snippet)",
91
- r"^\s*(what is|define)\s+[a-z ]+\s*\??\s*$",
92
- ]
93
- q = query.lower().strip()
94
- for pat in no_search_patterns:
95
- if re.match(pat, q, re.I):
96
- return False
97
- return True
98
 
99
  # ──────────────────────────────────────
100
- # ENDPOINTS
101
  # ──────────────────────────────────────
102
- @main.get("/")
103
- def home():
104
- return {
105
- "status": "Inachi-Core Active",
106
- "active_keys": len(API_KEYS_DB),
107
- "search": "Google Real-time",
108
- }
109
-
110
- @main.post("/v1/generate-key")
111
- async def generate_key(data: AdminRequest):
112
- if data.admin_pass != ADMIN_SECRET:
113
- raise HTTPException(status_code=401, detail="Unauthorized Access!")
114
- new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
115
- API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
116
- return {"api_key": new_key, "limit": data.limit}
117
-
118
  @main.post("/v1/chat")
119
  async def chat(message: ChatRequest, x_api_key: str = Header(None)):
120
  if not x_api_key or x_api_key not in API_KEYS_DB:
121
  raise HTTPException(status_code=403, detail="Access Denied")
122
 
123
- key_info = API_KEYS_DB[x_api_key]
124
- if key_info["used"] >= key_info["limit"]:
125
- raise HTTPException(status_code=429, detail="Limit Reached")
126
-
127
  query = message.query.strip()
128
  context = ""
129
  search_used = False
130
 
131
- if message.search and should_search(query):
132
  context = google_search(query, max_results=message.max_results)
133
- if context:
134
- search_used = True
135
-
136
- today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
137
- system_instruction = (
138
- "You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
139
- "Respond in the same language the user uses. "
140
- f"Current date: {today}. "
 
141
  )
142
  if search_used:
143
- system_instruction += "\nUse these web results to answer accurately:\n" + context
 
 
144
 
145
  msgs = [
146
- {"role": "system", "content": system_instruction},
147
- {"role": "user", "content": query},
148
  ]
149
 
 
150
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
151
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
152
 
@@ -162,4 +111,7 @@ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
162
  ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
163
 
164
  API_KEYS_DB[x_api_key]["used"] += 1
165
- return {"reply": ans, "search_used": search_used, "usage": API_KEYS_DB[x_api_key]["used"]}
 
 
 
 
7
  import requests
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
+ # Hugging Face server needs 'main'
11
  main = FastAPI()
12
 
13
  main.add_middleware(
 
25
  ADMIN_SECRET = "MINZO-SECRET-2026"
26
 
27
  # ── Google Search Config ──
 
28
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
29
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
30
 
31
  # ── Load AI Model for CPU ──
32
  model_id = "google/gemma-2-9b-it"
33
+ print(f"Loading {model_id} on CPU...")
34
 
35
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
36
  model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
  dtype=torch.bfloat16,
39
  device_map="cpu",
40
  trust_remote_code=True
41
  )
42
+ print("Model loaded successfully.")
 
 
 
 
 
43
 
44
  class ChatRequest(BaseModel):
45
  query: str
 
47
  max_results: int = 3
48
 
49
  # ──────────────────────────────────────
50
+ # SEARCH HELPER
51
  # ──────────────────────────────────────
52
  def google_search(query: str, max_results: int = 3) -> str:
53
  url = "https://www.googleapis.com/customsearch/v1"
54
+ params = {"q": query, "key": GOOGLE_API_KEY, "cx": GOOGLE_CX, "num": max_results}
 
 
 
 
 
55
  try:
56
  response = requests.get(url, params=params)
57
  results = response.json().get("items", [])
58
+ if not results: return ""
59
+ lines = ["[WEB SEARCH RESULTS]"]
 
 
 
60
  for i, r in enumerate(results, 1):
61
+ lines.append(f"\n{i}. {r.get('title')}\n {r.get('snippet')}\n Source: {r.get('link')}")
 
 
 
 
62
  return "\n".join(lines)
63
+ except: return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # ──────────────────────────────────────
66
+ # CHAT ENDPOINT (FIXED)
67
  # ──────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  @main.post("/v1/chat")
69
  async def chat(message: ChatRequest, x_api_key: str = Header(None)):
70
  if not x_api_key or x_api_key not in API_KEYS_DB:
71
  raise HTTPException(status_code=403, detail="Access Denied")
72
 
 
 
 
 
73
  query = message.query.strip()
74
  context = ""
75
  search_used = False
76
 
77
+ if message.search:
78
  context = google_search(query, max_results=message.max_results)
79
+ if context: search_used = True
80
+
81
+ # SYSTEM INSTRUCTION RE-FORMATTED FOR GEMMA
82
+ today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y")
83
+
84
+ # Gemma doesn't support 'system' role, so we merge it into the user prompt
85
+ combined_prompt = (
86
+ f"Instruction: You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
87
+ f"Respond in the same language the user uses. Current date: {today}.\n"
88
  )
89
  if search_used:
90
+ combined_prompt += f"\nUse these web results to answer: {context}\n"
91
+
92
+ combined_prompt += f"\nUser Query: {query}"
93
 
94
  msgs = [
95
+ {"role": "user", "content": combined_prompt},
 
96
  ]
97
 
98
+ # Apply template (Now only with 'user' role)
99
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
100
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
101
 
 
111
  ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
112
 
113
  API_KEYS_DB[x_api_key]["used"] += 1
114
+ return {"reply": ans, "search_used": search_used}
115
+
116
+ @main.get("/")
117
+ def home(): return {"status": "Inachi-Core Online"}