MINZO4546 commited on
Commit
8f635ff
·
verified ·
1 Parent(s): ebfdf66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  import re
6
  import secrets
7
  import requests # Google Search API එකට අවශ්‍යයි
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
 
10
  app = FastAPI()
11
 
@@ -24,27 +24,22 @@ API_KEYS_DB = {
24
  ADMIN_SECRET = "MINZO-SECRET-2026"
25
 
26
  # ── Google Search Config ──
27
- # Specialist, මේ දෙක ඔයාගේ Google Cloud Console එකෙන් අරන් මෙතනට දාන්න
28
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
29
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
30
 
31
- # ── Load AI Model with 4-bit Quantization ──
32
- model_id = "google/gemma-4-E4B-it" # Gemma-2-9B එක පාවිච්චි කරමු
33
- print(f"Loading {model_id} with 4-bit quantization...")
34
-
35
- quant_config = BitsAndBytesConfig(
36
- load_in_4bit=True,
37
- bnb_4bit_compute_dtype=torch.bfloat16
38
- )
39
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_id,
43
- quantization_config=quant_config,
44
- device_map="auto", # 18GB Space එකේ GPU එකට auto load වෙයි
45
  trust_remote_code=True
46
  )
47
- print("Model loaded and optimized.")
48
 
49
  # ── Pydantic Models ──
50
  class AdminRequest(BaseModel):
@@ -60,9 +55,6 @@ class ChatRequest(BaseModel):
60
  # GOOGLE REAL-TIME WEB SEARCH HELPER
61
  # ──────────────────────────────────────
62
  def google_search(query: str, max_results: int = 3) -> str:
63
- """
64
- Search Google and return formatted context string.
65
- """
66
  url = "https://www.googleapis.com/customsearch/v1"
67
  params = {
68
  "q": query,
@@ -85,12 +77,10 @@ def google_search(query: str, max_results: int = 3) -> str:
85
  lines.append(f"\n{i}. {title}\n {snippet}\n Source: {link}")
86
  lines.append("\n[END OF SEARCH RESULTS]")
87
  return "\n".join(lines)
88
-
89
  except Exception as e:
90
  print(f"[Google search error] {e}")
91
  return ""
92
 
93
- # ── Decide whether to search ──
94
  def should_search(query: str) -> bool:
95
  no_search_patterns = [
96
  r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
@@ -117,7 +107,7 @@ def home():
117
  @app.post("/v1/generate-key")
118
  async def generate_key(data: AdminRequest):
119
  if data.admin_pass != ADMIN_SECRET:
120
- raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
121
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
122
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
123
  return {"api_key": new_key, "limit": data.limit}
@@ -142,7 +132,7 @@ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
142
 
143
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
144
  system_instruction = (
145
- "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
146
  "Respond in the same language the user uses. "
147
  f"Current date: {today}. "
148
  )
@@ -155,7 +145,7 @@ async def chat(message: ChatRequest, x_api_key: str = Header(None)):
155
  ]
156
 
157
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
158
- inputs = tokenizer([text], return_tensors="pt").to(model.device)
159
 
160
  with torch.no_grad():
161
  outputs = model.generate(
 
5
  import re
6
  import secrets
7
  import requests # Google Search API එකට අවශ්‍යයි
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
  app = FastAPI()
11
 
 
24
  ADMIN_SECRET = "MINZO-SECRET-2026"
25
 
26
  # ── Google Search Config ──
 
27
  GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
28
  GOOGLE_CX = "YOUR_CUSTOM_SEARCH_ENGINE_ID"
29
 
30
+ # ── Load AI Model for CPU ──
31
+ model_id = "google/gemma-4-E4B-it"
32
+ print(f"Loading {model_id} on CPU (Optimized for 18GB RAM)...")
 
 
 
 
 
33
 
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
+ # CPU එකේ දුවන නිසා torch_dtype එක float16 හෝ bfloat16 දැමීමෙන් RAM එක ඉතිරි කරගත හැක
36
  model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
+ torch_dtype=torch.bfloat16,
39
+ device_map="cpu",
40
  trust_remote_code=True
41
  )
42
+ print("Model loaded on CPU successfully.")
43
 
44
  # ── Pydantic Models ──
45
  class AdminRequest(BaseModel):
 
55
  # GOOGLE REAL-TIME WEB SEARCH HELPER
56
  # ──────────────────────────────────────
57
  def google_search(query: str, max_results: int = 3) -> str:
 
 
 
58
  url = "https://www.googleapis.com/customsearch/v1"
59
  params = {
60
  "q": query,
 
77
  lines.append(f"\n{i}. {title}\n {snippet}\n Source: {link}")
78
  lines.append("\n[END OF SEARCH RESULTS]")
79
  return "\n".join(lines)
 
80
  except Exception as e:
81
  print(f"[Google search error] {e}")
82
  return ""
83
 
 
84
  def should_search(query: str) -> bool:
85
  no_search_patterns = [
86
  r"^\s*(write|create|generate|make|build)\s+(a\s+)?(code|function|script|program|class)",
 
107
  @app.post("/v1/generate-key")
108
  async def generate_key(data: AdminRequest):
109
  if data.admin_pass != ADMIN_SECRET:
110
+ raise HTTPException(status_code=401, detail="Unauthorized Access!")
111
  new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
112
  API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
113
  return {"api_key": new_key, "limit": data.limit}
 
132
 
133
  today = __import__("datetime").datetime.utcnow().strftime("%A, %d %B %Y, %H:%M UTC")
134
  system_instruction = (
135
+ "You are Elephant AI (Inachi-Core), an expert assistant for MINZO-PRIME. "
136
  "Respond in the same language the user uses. "
137
  f"Current date: {today}. "
138
  )
 
145
  ]
146
 
147
  text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
148
+ inputs = tokenizer([text], return_tensors="pt").to("cpu")
149
 
150
  with torch.no_grad():
151
  outputs = model.generate(