MINZO4546 commited on
Commit
ba18023
·
verified ·
1 Parent(s): e1c2f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -26
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import os
2
  import torch
 
 
3
  from fastapi import FastAPI, Header, HTTPException
4
  from fastapi.middleware.cors import CORSMiddleware
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from duckduckgo_search import DDGS
7
- import re
8
 
9
  app = FastAPI()
10
 
 
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
@@ -15,66 +17,103 @@ app.add_middleware(
15
  allow_headers=["*"],
16
  )
17
 
18
- # --- Specialist DB ---
 
19
  API_KEYS_DB = {
20
- "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active"},
21
- "ELE-PRIME-VOID-X": {"limit": 50000, "used": 0, "status": "active"}
22
  }
23
 
24
- # 🔱 Model Configuration (CPU Friendly Load)
25
  MODEL_ID = "google/gemma-3-270m"
26
- print(f"🔱 INACHI-CORE: Launching Gemma-4 on CPU Engine...")
27
 
28
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
29
 
30
- # GPUැත නිසා 4-bit quantizationෙනුවට CPU Memory optimization පාවිච්චි කරමු
31
  model = AutoModelForCausalLM.from_pretrained(
32
  MODEL_ID,
33
- torch_dtype=torch.float16,
34
  low_cpu_mem_usage=True,
35
  device_map="cpu"
36
  )
37
 
 
38
  def get_web_context(query: str):
39
  try:
40
  with DDGS() as ddgs:
 
41
  results = [r['body'] for r in ddgs.text(query, max_results=3)]
42
  return "\n".join(results)
43
- except:
 
44
  return ""
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  @app.post("/v1/chat")
47
  async def chat(message: dict, x_api_key: str = Header(None)):
 
48
  if not x_api_key or x_api_key not in API_KEYS_DB:
49
  raise HTTPException(status_code=403, detail="Invalid Specialist Key")
50
-
 
 
 
 
51
  user_query = message.get("query", "")
52
  web_data = get_web_context(user_query)
53
 
 
54
  system_prompt = (
55
  "You are Inachi-Prime, a multimodal AI developed by Specialist MINZO-PRIME. "
56
- "Respond directly without showing your internal thought process. "
57
- f"\nWeb Context: {web_data}"
58
  )
59
 
60
  full_prompt = f"<start_of_turn>system\n{system_prompt}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
61
 
62
  inputs = tokenizer(full_prompt, return_tensors="pt")
63
 
 
64
  with torch.no_grad():
65
- outputs = model.generate(
66
- **inputs,
67
- max_new_tokens=512,
68
- temperature=0.7,
69
- do_sample=True
70
- )
71
-
72
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
- # Thinking කොටස ඉවත් කිරීම
74
- final_reply = full_response.split("model\n")[-1].strip()
75
- final_reply = re.sub(r'<thought>.*?</thought>', '', final_reply, flags=re.DOTALL).strip()
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- API_KEYS_DB[x_api_key]["used"] += 1
78
- return {"reply": final_reply}
 
79
 
80
- main = app
 
 
 
 
1
  import os
2
  import torch
3
+ import uuid
4
+ import re
5
  from fastapi import FastAPI, Header, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from duckduckgo_search import DDGS
 
9
 
10
  app = FastAPI()
11
 
12
+ # 🔱 CORS Setup
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"],
 
17
  allow_headers=["*"],
18
  )
19
 
20
+ # --- 🔱 Specialist DB (Memory Based) ---
21
+ # සටහන: සර්වර් එක Restart වූ විට මේවා මැකේ. ස්ථිර කිරීමට DB එකක් අවශ්‍යයි.
22
  API_KEYS_DB = {
23
+ "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active", "owner": "MINZO-PRIME"},
 
24
  }
25
 
26
+ # --- 🔱 Model Configuration (CPU Stable Engine) ---
27
  MODEL_ID = "google/gemma-3-270m"
28
+ print(f"🔱 INACHI-CORE: Launching Gemma-3 on CPU Engine...")
29
 
30
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
31
 
32
+ # float16 නිසා එන 'NaN' error එක ැළැක්ීමfloat32 පාවිච්චි කරමු
33
  model = AutoModelForCausalLM.from_pretrained(
34
  MODEL_ID,
35
+ torch_dtype=torch.float32,
36
  low_cpu_mem_usage=True,
37
  device_map="cpu"
38
  )
39
 
40
+ # --- 🔱 Web Context Retrieval ---
41
  def get_web_context(query: str):
42
  try:
43
  with DDGS() as ddgs:
44
+ # නව ddgs version එකට ගැලපෙන පරිදි update කර ඇත
45
  results = [r['body'] for r in ddgs.text(query, max_results=3)]
46
  return "\n".join(results)
47
+ except Exception as e:
48
+ print(f"Search Error: {e}")
49
  return ""
50
 
51
+ # --- 🔱 Admin Routes ---
52
+ @app.get("/sys/generate-key")
53
+ async def create_key(admin_key: str = Header(None)):
54
+ """අලුත් API Keys සාදා ගැනීමට: Header එකේ 'admin-key' ලෙස ELE-PRIME-ADMIN-SYS ලබා දෙන්න."""
55
+ if admin_key != "ELE-PRIME-ADMIN-SYS":
56
+ raise HTTPException(status_code=403, detail="Unauthorized Specialist Access")
57
+
58
+ new_key = f"ELE-PRIME-{uuid.uuid4().hex[:8].upper()}"
59
+ API_KEYS_DB[new_key] = {"limit": 5000, "used": 0, "status": "active", "owner": "Specialist"}
60
+ return {"status": "success", "new_key": new_key}
61
+
62
+ # --- 🔱 Chat Endpoint ---
63
  @app.post("/v1/chat")
64
  async def chat(message: dict, x_api_key: str = Header(None)):
65
+ # 1. API Key Validation
66
  if not x_api_key or x_api_key not in API_KEYS_DB:
67
  raise HTTPException(status_code=403, detail="Invalid Specialist Key")
68
+
69
+ key_info = API_KEYS_DB[x_api_key]
70
+ if key_info["used"] >= key_info["limit"]:
71
+ raise HTTPException(status_code=429, detail="API Limit Reached for this Key")
72
+
73
  user_query = message.get("query", "")
74
  web_data = get_web_context(user_query)
75
 
76
+ # 2. Inachi System Prompt
77
  system_prompt = (
78
  "You are Inachi-Prime, a multimodal AI developed by Specialist MINZO-PRIME. "
79
+ "Respond directly without internal thought process. "
80
+ f"\nContext: {web_data}"
81
  )
82
 
83
  full_prompt = f"<start_of_turn>system\n{system_prompt}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
84
 
85
  inputs = tokenizer(full_prompt, return_tensors="pt")
86
 
87
+ # 3. Generation Logic (Stable Parameters)
88
  with torch.no_grad():
89
+ try:
90
+ outputs = model.generate(
91
+ **inputs,
92
+ max_new_tokens=512,
93
+ temperature=0.7,
94
+ do_sample=True,
95
+ renormalize_logits=True # Probability Error එක වැළැක්වීමට
96
+ )
97
+
98
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
+
100
+ # Response එක පිරිසිදු කිරීම
101
+ final_reply = full_response.split("model\n")[-1].strip()
102
+ final_reply = re.sub(r'<thought>.*?</thought>', '', final_reply, flags=re.DOTALL).strip()
103
+
104
+ # 4. Usage tracking
105
+ API_KEYS_DB[x_api_key]["used"] += 1
106
+
107
+ return {
108
+ "reply": final_reply,
109
+ "usage": f"{API_KEYS_DB[x_api_key]['used']}/{API_KEYS_DB[x_api_key]['limit']}"
110
+ }
111
 
112
+ except RuntimeError as e:
113
+ print(f"Generation Error: {e}")
114
+ return {"reply": "Core engine destabilized. Retrying process recommended."}
115
 
116
+ if __name__ == "__main__":
117
+ import uvicorn
118
+ # Hugging Face Space සඳහා Port 7860 අනිවාර්යයි
119
+ uvicorn.run(app, host="0.0.0.0", port=7860)