MINZO4546 commited on
Commit
5e1de17
·
verified ·
1 Parent(s): c52d572

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -6,9 +6,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import datetime
7
 
8
  # ── API INITIALIZATION ──
9
- app = FastAPI()
 
10
 
11
- app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
14
  allow_methods=["*"],
@@ -21,18 +22,20 @@ API_KEYS_DB = {
21
  }
22
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
23
 
24
- print(f"🔱 Specialist, Loading {MODEL_ID} on HF Space...")
25
 
26
- # Load Tokenizer and Model
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_ID,
30
- device_map="auto",
31
- torch_dtype=torch.float16,
32
  trust_remote_code=True
33
  )
34
 
35
- print("🔱 Inachi-Lite is Online.")
36
 
37
  # ── DATA MODELS ──
38
  class ChatRequest(BaseModel):
@@ -42,24 +45,25 @@ class ChatRequest(BaseModel):
42
  max_tokens: int = 512
43
 
44
  # ── CHAT ENDPOINT ──
45
- @app.post("/v1/chat")
46
  async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
 
47
  if not x_api_key or x_api_key not in API_KEYS_DB:
48
  raise HTTPException(status_code=403, detail="Access Denied")
49
 
50
  user_query = request_data.message.strip()
51
  today = datetime.datetime.now().strftime("%Y-%m-%d")
52
 
53
- # Prompt Engineering
54
  prompt = f"System: You are Inachi AI, an expert assistant for MINZO-PRIME. Date: {today}\n"
55
 
56
- # History integration (Optional but recommended)
57
- for human, ai in request_data.history[-3:]: # අන්තිම මැසේජ් 3 විතරක් ගමු
58
  prompt += f"User: {human}\nAI: {ai}\n"
59
 
60
  prompt += f"User: {user_query}\nAI:"
61
 
62
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
63
 
64
  with torch.no_grad():
65
  outputs = model.generate(
@@ -77,6 +81,6 @@ async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
77
  "status": "success"
78
  }
79
 
80
- @app.get("/")
81
  def home():
82
- return {"message": "Inachi-Lite (Hy-MT) is Running", "model": MODEL_ID}
 
6
  import datetime
7
 
8
  # ── API INITIALIZATION ──
9
+ # Hugging Face සොයන 'main' attribute එක මෙතනට ලබා දී ඇත
10
+ main = FastAPI()
11
 
12
+ main.add_middleware(
13
  CORSMiddleware,
14
  allow_origins=["*"],
15
  allow_methods=["*"],
 
22
  }
23
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
24
 
25
+ print(f"🔱 Specialist, Loading {MODEL_ID} on CPU...")
26
 
27
+ # Load Tokenizer
28
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
29
+
30
+ # Load Model Optimized for CPU
31
  model = AutoModelForCausalLM.from_pretrained(
32
  MODEL_ID,
33
+ device_map="cpu", # GPU නොමැති නිසා කෙලින්ම CPU එකට ලබා දීම
34
+ torch_dtype=torch.float32,
35
  trust_remote_code=True
36
  )
37
 
38
+ print("🔱 Inachi-Lite is Online and Ready.")
39
 
40
  # ── DATA MODELS ──
41
  class ChatRequest(BaseModel):
 
45
  max_tokens: int = 512
46
 
47
  # ── CHAT ENDPOINT ──
48
+ @main.post("/v1/chat")
49
  async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
50
+ # API Key පරීක්ෂා කිරීම
51
  if not x_api_key or x_api_key not in API_KEYS_DB:
52
  raise HTTPException(status_code=403, detail="Access Denied")
53
 
54
  user_query = request_data.message.strip()
55
  today = datetime.datetime.now().strftime("%Y-%m-%d")
56
 
57
+ # Prompt සකස් කිරීම
58
  prompt = f"System: You are Inachi AI, an expert assistant for MINZO-PRIME. Date: {today}\n"
59
 
60
+ # History ඇතුළත් කිරීම
61
+ for human, ai in request_data.history[-2:]:
62
  prompt += f"User: {human}\nAI: {ai}\n"
63
 
64
  prompt += f"User: {user_query}\nAI:"
65
 
66
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
67
 
68
  with torch.no_grad():
69
  outputs = model.generate(
 
81
  "status": "success"
82
  }
83
 
84
+ @main.get("/")
85
  def home():
86
+ return {"message": "Inachi-Lite is Running", "status": "online"}