MINZO4546 commited on
Commit
249aa04
·
verified ·
1 Parent(s): ba18023

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -87
app.py CHANGED
@@ -1,15 +1,18 @@
1
- import os
2
- import torch
3
- import uuid
4
- import re
5
  from fastapi import FastAPI, Header, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from duckduckgo_search import DDGS
9
 
10
  app = FastAPI()
11
 
12
- # 🔱 CORS Setup
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"],
@@ -17,103 +20,102 @@ app.add_middleware(
17
  allow_headers=["*"],
18
  )
19
 
20
- # --- 🔱 Specialist DB (Memory Based) ---
21
- # සටහන: ස්වර් එක Restart වූ විට ේවා මැකේ. සථිර ිරීමට DB එකක් අවශ්‍යයි.
22
  API_KEYS_DB = {
23
- "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active", "owner": "MINZO-PRIME"},
 
24
  }
 
25
 
26
- # --- 🔱 Model Configuration (CPU Stable Engine) ---
27
- MODEL_ID = "google/gemma-3-270m"
28
- print(f"🔱 INACHI-CORE: Launching Gemma-3 on CPU Engine...")
29
 
30
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
31
 
32
- # float16 නිසා එන 'NaN' error එක වැළැක්වීමට float32 පාවිච්චි කරමු
33
- model = AutoModelForCausalLM.from_pretrained(
34
- MODEL_ID,
35
- torch_dtype=torch.float32,
36
- low_cpu_mem_usage=True,
37
- device_map="cpu"
38
- )
 
 
 
39
 
40
- # --- 🔱 Web Context Retrieval ---
41
- def get_web_context(query: str):
42
- try:
43
- with DDGS() as ddgs:
44
- # නව ddgs version එකට ගැලපෙන පරිදි update කර ඇත
45
- results = [r['body'] for r in ddgs.text(query, max_results=3)]
46
- return "\n".join(results)
47
- except Exception as e:
48
- print(f"Search Error: {e}")
49
- return ""
50
-
51
- # --- 🔱 Admin Routes ---
52
- @app.get("/sys/generate-key")
53
- async def create_key(admin_key: str = Header(None)):
54
- """අලුත් API Keys සාදා ගැනීමට: Header එකේ 'admin-key' ලෙස ELE-PRIME-ADMIN-SYS ලබා දෙන්න."""
55
- if admin_key != "ELE-PRIME-ADMIN-SYS":
56
- raise HTTPException(status_code=403, detail="Unauthorized Specialist Access")
57
 
58
- new_key = f"ELE-PRIME-{uuid.uuid4().hex[:8].upper()}"
59
- API_KEYS_DB[new_key] = {"limit": 5000, "used": 0, "status": "active", "owner": "Specialist"}
60
- return {"status": "success", "new_key": new_key}
 
 
 
 
 
 
61
 
62
- # --- 🔱 Chat Endpoint ---
63
  @app.post("/v1/chat")
64
  async def chat(message: dict, x_api_key: str = Header(None)):
65
- # 1. API Key Validation
66
  if not x_api_key or x_api_key not in API_KEYS_DB:
67
- raise HTTPException(status_code=403, detail="Invalid Specialist Key")
68
-
69
  key_info = API_KEYS_DB[x_api_key]
70
  if key_info["used"] >= key_info["limit"]:
71
- raise HTTPException(status_code=429, detail="API Limit Reached for this Key")
72
-
73
- user_query = message.get("query", "")
74
- web_data = get_web_context(user_query)
75
 
76
- # 2. Inachi System Prompt
77
- system_prompt = (
78
- "You are Inachi-Prime, a multimodal AI developed by Specialist MINZO-PRIME. "
79
- "Respond directly without internal thought process. "
80
- f"\nContext: {web_data}"
 
 
 
 
 
 
 
 
 
 
 
81
  )
82
 
83
- full_prompt = f"<start_of_turn>system\n{system_prompt}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
84
 
85
- inputs = tokenizer(full_prompt, return_tensors="pt")
86
-
87
- # 3. Generation Logic (Stable Parameters)
88
  with torch.no_grad():
89
- try:
90
- outputs = model.generate(
91
- **inputs,
92
- max_new_tokens=512,
93
- temperature=0.7,
94
- do_sample=True,
95
- renormalize_logits=True # Probability Error එක වැළැක්වීමට
96
- )
97
-
98
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
-
100
- # Response එක පිරිසිදු කිරීම
101
- final_reply = full_response.split("model\n")[-1].strip()
102
- final_reply = re.sub(r'<thought>.*?</thought>', '', final_reply, flags=re.DOTALL).strip()
103
-
104
- # 4. Usage tracking
105
- API_KEYS_DB[x_api_key]["used"] += 1
106
-
107
- return {
108
- "reply": final_reply,
109
- "usage": f"{API_KEYS_DB[x_api_key]['used']}/{API_KEYS_DB[x_api_key]['limit']}"
110
- }
111
-
112
- except RuntimeError as e:
113
- print(f"Generation Error: {e}")
114
- return {"reply": "Core engine destabilized. Retrying process recommended."}
115
-
116
- if __name__ == "__main__":
117
- import uvicorn
118
- # Hugging Face Space සඳහා Port 7860 අනිවාර්යයි
119
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
1
  from fastapi import FastAPI, Header, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import torch
5
+ import os
6
+ import json
7
+ import re
8
+ import uuid
9
+ import secrets
10
+ import datetime
11
  from transformers import AutoModelForCausalLM, AutoTokenizer
12
  from duckduckgo_search import DDGS
13
 
14
  app = FastAPI()
15
 
 
16
  app.add_middleware(
17
  CORSMiddleware,
18
  allow_origins=["*"],
 
20
  allow_headers=["*"],
21
  )
22
 
23
+ # --- Database & Config ---
24
+ # රම්Keys
25
  API_KEYS_DB = {
26
+ "ELE-PRIME-ADMIN-SYS": {"limit": 10000, "used": 0, "status": "active"},
27
+ "ELE-PRIME-YG5EPZFQ": {"limit": 5000, "used": 0, "status": "active"}
28
  }
29
+ ADMIN_SECRET = "MINZO-SECRET-2026"
30
 
31
+ # --- AI Model ---
32
+ model_id = "google/gemma-3-270m"
33
+ print(f"🔱 INACHI-CORE: Loading {model_id}...")
34
 
35
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
36
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cpu")
37
 
38
+ # --- Data Models ---
39
+ class AdminRequest(BaseModel):
40
+ admin_pass: str
41
+ limit: int = 1000
42
+
43
+ # --- API Endpoints ---
44
+
45
+ @app.get("/")
46
+ def home():
47
+ return {"status": "Elephant Pro Active", "active_keys": len(API_KEYS_DB)}
48
 
49
+ # 🔱 අලුතින් Key එකක් Auto-Generate කරන Endpoint එක
50
+ @app.post("/v1/generate-key")
51
+ async def generate_key(data: AdminRequest):
52
+ if data.admin_pass != ADMIN_SECRET:
53
+ raise HTTPException(status_code=401, detail="Unauthorized Specialist Access!")
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # Random Key එකක් නිර්මාණය කිරීම (උදා: ELE-PRIME-X8A2...)
56
+ new_key = f"ELE-PRIME-{secrets.token_hex(4).upper()}"
57
+ API_KEYS_DB[new_key] = {"limit": data.limit, "used": 0, "status": "active"}
58
+
59
+ return {
60
+ "message": "New Specialist Key Activated",
61
+ "api_key": new_key,
62
+ "limit": data.limit
63
+ }
64
 
 
65
  @app.post("/v1/chat")
66
  async def chat(message: dict, x_api_key: str = Header(None)):
 
67
  if not x_api_key or x_api_key not in API_KEYS_DB:
68
+ raise HTTPException(status_code=403, detail="Access Denied")
69
+
70
  key_info = API_KEYS_DB[x_api_key]
71
  if key_info["used"] >= key_info["limit"]:
72
+ raise HTTPException(status_code=429, detail="Limit Reached")
 
 
 
73
 
74
+ query = message.get("query", "")
75
+
76
+ # Web Search
77
+ context = ""
78
+ if any(w in query.lower() for w in ["today", "now", "2026", "අද"]):
79
+ try:
80
+ with DDGS() as ddgs:
81
+ results = list(ddgs.text(query, max_results=2))
82
+ context = "\n".join([r['body'] for r in results])
83
+ except: pass
84
+
85
+ # 🔱 Language Adaptive System Instruction
86
+ system_instruction = (
87
+ "You are Elephant AI (Inachi-Core), an expert assistant for Specialist MINZO-PRIME. "
88
+ "Respond in the language used by the user (Sinhala or English). "
89
+ f"Real-time Context: {context}"
90
  )
91
 
92
+ msgs = [
93
+ {"role": "system", "content": system_instruction},
94
+ {"role": "user", "content": query}
95
+ ]
96
 
97
+ text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
98
+ inputs = tokenizer([text], return_tensors="pt").to("cpu")
99
+
100
  with torch.no_grad():
101
+ outputs = model.generate(
102
+ inputs.input_ids,
103
+ max_new_tokens=512,
104
+ temperature=0.6,
105
+ top_p=0.9,
106
+ do_sample=True,
107
+ pad_token_id=tokenizer.eos_token_id
108
+ )
109
+
110
+ full_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
111
+ ans = full_response.split("assistant")[-1].strip()
112
+
113
+ # Cleaning Logic
114
+ if "</think>" in ans: ans = ans.split("</think>")[-1].strip()
115
+ ans = ans.replace("Ċ", "\n").replace("Ġ", " ")
116
+ ans = re.sub(r' +', ' ', ans).strip()
117
+
118
+ API_KEYS_DB[x_api_key]["used"] += 1
119
+ return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
120
+
121
+ main = app