MINZO4546 commited on
Commit
7ae0e9a
·
verified ·
1 Parent(s): c8bfde7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -42
app.py CHANGED
@@ -2,12 +2,10 @@ from fastapi import FastAPI, Header, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
5
- import re
6
- import secrets
7
  import requests
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
- # Hugging Face server needs 'main'
11
  main = FastAPI()
12
 
13
  main.add_middleware(
@@ -17,43 +15,47 @@ main.add_middleware(
17
  allow_headers=["*"],
18
  )
19
 
20
- # ── API Keys & Config ──
21
  API_KEYS_DB = {
22
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
23
  "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"},
24
  }
25
- ADMIN_SECRET = "MINZO-SECRET-2026"
26
-
27
- # ── Google Search Config ──
28
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
29
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
30
 
31
- # ── Load AI Model for CPU ──
32
- model_id = "google/gemma-2-9b-it"
33
- print(f"Loading {model_id} on CPU...")
 
 
 
 
 
 
34
 
35
  tokenizer = AutoTokenizer.from_pretrained(model_id)
36
  model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
- dtype=torch.bfloat16,
39
- device_map="cpu",
40
  trust_remote_code=True
41
  )
42
- print("Model loaded successfully.")
43
 
 
44
  class ChatRequest(BaseModel):
45
- query: str
 
 
46
  search: bool = True
47
  max_results: int = 3
48
 
49
- # ──────────────────────────────────────
50
- # SEARCH HELPER
51
- # ──────────────────────────────────────
52
  def google_search(query: str, max_results: int = 3) -> str:
53
  url = "https://www.googleapis.com/customsearch/v1"
54
  params = {"q": query, "key": GOOGLE_API_KEY, "cx": GOOGLE_CX, "num": max_results}
55
  try:
56
- response = requests.get(url, params=params)
57
  results = response.json().get("items", [])
58
  if not results: return ""
59
  lines = ["[WEB SEARCH RESULTS]"]
@@ -62,56 +64,60 @@ def google_search(query: str, max_results: int = 3) -> str:
62
  return "\n".join(lines)
63
  except: return ""
64
 
65
- # ──────────────────────────────────────
66
- # CHAT ENDPOINT (FIXED)
67
- # ──────────────────────────────────────
68
  @main.post("/v1/chat")
69
- async def chat(message: ChatRequest, x_api_key: str = Header(None)):
 
70
  if not x_api_key or x_api_key not in API_KEYS_DB:
71
  raise HTTPException(status_code=403, detail="Access Denied")
72
 
73
- query = message.query.strip()
74
  context = ""
75
  search_used = False
76
 
77
- if message.search:
78
- context = google_search(query, max_results=message.max_results)
79
  if context: search_used = True
80
 
81
- # SYSTEM INSTRUCTION RE-FORMATTED FOR GEMMA
82
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y")
83
 
84
- # Gemma doesn't support 'system' role, so we merge it into the user prompt
85
- combined_prompt = (
86
  f"Instruction: You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
87
  f"Respond in the same language the user uses. Current date: {today}.\n"
88
  )
 
 
89
  if search_used:
90
- combined_prompt += f"\nUse these web results to answer: {context}\n"
91
 
92
- combined_prompt += f"\nUser Query: {query}"
93
-
94
- msgs = [
95
- {"role": "user", "content": combined_prompt},
96
- ]
97
 
98
- # Apply template (Now only with 'user' role)
99
- text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
100
- inputs = tokenizer([text], return_tensors="pt").to("cpu")
 
101
 
102
  with torch.no_grad():
103
  outputs = model.generate(
104
  **inputs,
105
- max_new_tokens=512,
106
  temperature=0.6,
107
  do_sample=True,
108
  pad_token_id=tokenizer.eos_token_id,
109
  )
110
 
111
- ans = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
112
 
113
  API_KEYS_DB[x_api_key]["used"] += 1
114
- return {"reply": ans, "search_used": search_used}
 
 
 
 
 
 
115
 
116
  @main.get("/")
117
- def home(): return {"status": "Inachi-Core Online"}
 
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
 
 
5
  import requests
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
 
8
+ # ── API INITIALIZATION ──
9
  main = FastAPI()
10
 
11
  main.add_middleware(
 
15
  allow_headers=["*"],
16
  )
17
 
18
+ # ── CONFIGURATION ──
19
  API_KEYS_DB = {
20
  "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
21
  "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"},
22
  }
 
 
 
23
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
24
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
25
 
26
+ # ── MODEL LOADING (OPTIMIZED FOR 16GB RAM) ──
27
+ model_id = "google/gemma-2-9b-it"
28
+ print(f"🔱 Specialist, Loading {model_id} with 4-bit Quantization...")
29
+
30
+ # 16GB RAM එකට ගැලපෙන්න model එක පොඩි කරන config එක
31
+ bnb_config = BitsAndBytesConfig(
32
+ load_in_4bit=True,
33
+ bnb_4bit_compute_dtype=torch.bfloat16
34
+ )
35
 
36
  tokenizer = AutoTokenizer.from_pretrained(model_id)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  model_id,
39
+ quantization_config=bnb_config,
40
+ device_map="auto", # GPU තිබේ නම් GPU ගනී, නැතිනම් CPU ගනී
41
  trust_remote_code=True
42
  )
43
+ print("🔱 Model loaded successfully. Inachi-Core is Online.")
44
 
45
+ # ── DATA MODELS (FIXED FOR FRONTEND MATCH) ──
46
  class ChatRequest(BaseModel):
47
+ message: str # HTML එකේ 'message' ලෙස එවන නිසා මෙතනත් 'message' විය යුතුයි
48
+ history: list = [] # Gradio history එක array එකක් ලෙස එනවා
49
+ think_level: str = "high"
50
  search: bool = True
51
  max_results: int = 3
52
 
53
+ # ── SEARCH HELPER ──
 
 
54
  def google_search(query: str, max_results: int = 3) -> str:
55
  url = "https://www.googleapis.com/customsearch/v1"
56
  params = {"q": query, "key": GOOGLE_API_KEY, "cx": GOOGLE_CX, "num": max_results}
57
  try:
58
+ response = requests.get(url, params=params, timeout=5)
59
  results = response.json().get("items", [])
60
  if not results: return ""
61
  lines = ["[WEB SEARCH RESULTS]"]
 
64
  return "\n".join(lines)
65
  except: return ""
66
 
67
+ # ── CHAT ENDPOINT ──
 
 
68
  @main.post("/v1/chat")
69
+ async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
70
+ # API Key Validation
71
  if not x_api_key or x_api_key not in API_KEYS_DB:
72
  raise HTTPException(status_code=403, detail="Access Denied")
73
 
74
+ user_query = request_data.message.strip()
75
  context = ""
76
  search_used = False
77
 
78
+ if request_data.search:
79
+ context = google_search(user_query, max_results=request_data.max_results)
80
  if context: search_used = True
81
 
82
+ # Gemma prompt construction
83
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y")
84
 
85
+ system_instr = (
 
86
  f"Instruction: You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
87
  f"Respond in the same language the user uses. Current date: {today}.\n"
88
  )
89
+
90
+ full_prompt = system_instr
91
  if search_used:
92
+ full_prompt += f"\nUse these web results to answer:\n{context}\n"
93
 
94
+ full_prompt += f"\nUser Query: {user_query}"
 
 
 
 
95
 
96
+ # Gemma Template
97
+ msgs = [{"role": "user", "content": full_prompt}]
98
+ input_text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
99
+ inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
100
 
101
  with torch.no_grad():
102
  outputs = model.generate(
103
  **inputs,
104
+ max_new_tokens=1024,
105
  temperature=0.6,
106
  do_sample=True,
107
  pad_token_id=tokenizer.eos_token_id,
108
  )
109
 
110
+ response_text = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()
111
 
112
  API_KEYS_DB[x_api_key]["used"] += 1
113
+
114
+ # HTML එක බලාපොරොත්තු වෙන format එකට output එක දීම
115
+ return {
116
+ "reply": response_text,
117
+ "search_used": search_used,
118
+ "status": "success"
119
+ }
120
 
121
  @main.get("/")
122
+ def home():
123
+ return {"status": "Inachi-Core Online", "model": "Gemma-2-9b-it-4bit"}