MINZO4546 commited on
Commit
deadea5
·
verified ·
1 Parent(s): fa283f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -33
app.py CHANGED
@@ -2,11 +2,10 @@ import torch
2
  from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
  main = FastAPI()
8
 
9
- # CORS සක්‍රීය කිරීම
10
  main.add_middleware(
11
  CORSMiddleware,
12
  allow_origins=["*"],
@@ -15,13 +14,13 @@ main.add_middleware(
15
  )
16
 
17
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
18
- print(f"🔱 Specialist, Loading {MODEL_ID} on CPU...")
19
 
20
- # Load Model & Tokenizer
21
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
22
- model = AutoModelForCausalLM.from_pretrained(
23
- MODEL_ID,
24
- device_map="cpu",
25
  trust_remote_code=True
26
  )
27
 
@@ -32,30 +31,34 @@ class ChatRequest(BaseModel):
32
  async def chat(request_data: ChatRequest):
33
  user_query = request_data.message.strip()
34
 
35
- # පිසිදු Prompt එකක්
36
- prompt = f"User: {user_query}\nAssistant:"
37
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
38
-
39
- with torch.no_grad():
40
- outputs = model.generate(
41
- **inputs,
42
- max_new_tokens=200,
43
- temperature=0.7,
44
- do_sample=True,
45
- pad_token_id=tokenizer.eos_token_id
46
- )
47
-
48
- # Output එකෙන් prompt එක අයින් කර පිළිතුර පමණක් ගැනීම
49
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
- reply = full_response.split("Assistant:")[-1].strip()
51
-
52
- # පිළිතුර හිස් නම් fallback එකක්
53
- if not reply:
54
- reply = "I am processing your request, MINZO-PRIME. Please ask again."
55
-
56
- print(f"🔱 Generated Reply: {reply}")
57
- return {"reply": str(reply)}
 
 
 
 
58
 
59
  @main.get("/")
60
- def home():
61
- return {"status": "Inachi-Lite Online", "model": MODEL_ID}
 
2
  from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
+ from transformers import pipeline
6
 
7
  main = FastAPI()
8
 
 
9
  main.add_middleware(
10
  CORSMiddleware,
11
  allow_origins=["*"],
 
14
  )
15
 
16
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
17
+ print(f"🔱 Loading {MODEL_ID} optimized...")
18
 
19
+ # Pipeline එක පාවිච්චි කිරීම වඩාත් ස්ථාවරයි
20
+ pipe = pipeline(
21
+ "text-generation",
22
+ model=MODEL_ID,
23
+ device_map="cpu",
24
  trust_remote_code=True
25
  )
26
 
 
31
  async def chat(request_data: ChatRequest):
32
  user_query = request_data.message.strip()
33
 
34
+ # 🔱 මොඩල් එකට ැහැදInstruction එකක් ලබාදීම
35
+ prompt = f"System: You are Inachi AI, a helpful assistant.\nUser: {user_query}\nAssistant:"
36
+
37
+ results = pipe(
38
+ prompt,
39
+ max_new_tokens=256, # 🔱 මොඩල් එකට ලියන්න ඉඩ ලබා දීම
40
+ do_sample=True,
41
+ temperature=0.8, # 🔱 නිර්මාණශීලිත්වය වැඩි කිරීමට
42
+ top_p=0.9,
43
+ repetition_penalty=1.2, # 🔱 එකම දේ ලිවීම නතර කිරීමට
44
+ pad_token_id=50256
45
+ )
46
+
47
+ generated_text = results[0]['generated_text']
48
+
49
+ # Assistant: පසුව එන කොටස වෙන් කර ගැනීම
50
+ if "Assistant:" in generated_text:
51
+ reply = generated_text.split("Assistant:")[-1].strip()
52
+ else:
53
+ reply = generated_text.replace(prompt, "").strip()
54
+
55
+ # 🔱 හිස් පිළිතුරක් ආවොත් raw generation එක පෙන්වන්න (Debug සඳහා)
56
+ if not reply or len(reply) < 2:
57
+ reply = generated_text[:100] + "..."
58
+
59
+ print(f"🔱 Generated: {reply}")
60
+ return {"reply": reply}
61
 
62
  @main.get("/")
63
+ def health():
64
+ return {"status": "Online"}