MINZO4546 commited on
Commit
fa283f3
·
verified ·
1 Parent(s): 27cf60a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -28
app.py CHANGED
@@ -1,8 +1,8 @@
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- from transformers import pipeline
5
- import torch
6
 
7
  main = FastAPI()
8
 
@@ -17,12 +17,12 @@ main.add_middleware(
17
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
18
  print(f"🔱 Specialist, Loading {MODEL_ID} on CPU...")
19
 
20
- # 🔱 Pipeline එක නිවැරදිව load කිරීම
21
- pipe = pipeline(
22
- "text-generation",
23
- model=MODEL_ID,
24
- device_map="cpu",
25
- trust_remote_code=True # කෙලින්ම මෙතනට පමණක් ලබා දෙන්න
26
  )
27
 
28
  class ChatRequest(BaseModel):
@@ -32,28 +32,30 @@ class ChatRequest(BaseModel):
32
  async def chat(request_data: ChatRequest):
33
  user_query = request_data.message.strip()
34
 
35
- # Simple Prompt
36
  prompt = f"User: {user_query}\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Response Generation
39
- results = pipe(
40
- prompt,
41
- max_new_tokens=150,
42
- do_sample=True,
43
- temperature=0.7,
44
- pad_token_id=50256
45
- )
46
-
47
- # Result Cleaning
48
- generated_text = results[0]['generated_text']
49
- # Assistant: කියන කොටසෙන් පස්සේ තියෙන ටික විතරක් ගැනීම
50
- if "Assistant:" in generated_text:
51
- reply = generated_text.split("Assistant:")[-1].strip()
52
- else:
53
- reply = generated_text.replace(prompt, "").strip()
54
-
55
- return {"reply": reply}
56
 
57
  @main.get("/")
58
- def health():
59
  return {"status": "Inachi-Lite Online", "model": MODEL_ID}
 
1
+ import torch
2
  from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
6
 
7
  main = FastAPI()
8
 
 
17
  MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
18
  print(f"🔱 Specialist, Loading {MODEL_ID} on CPU...")
19
 
20
+ # Load Model & Tokenizer
21
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ MODEL_ID,
24
+ device_map="cpu",
25
+ trust_remote_code=True
26
  )
27
 
28
  class ChatRequest(BaseModel):
 
32
  async def chat(request_data: ChatRequest):
33
  user_query = request_data.message.strip()
34
 
35
+ # පිරිසිදු Prompt එකක්
36
  prompt = f"User: {user_query}\nAssistant:"
37
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
38
+
39
+ with torch.no_grad():
40
+ outputs = model.generate(
41
+ **inputs,
42
+ max_new_tokens=200,
43
+ temperature=0.7,
44
+ do_sample=True,
45
+ pad_token_id=tokenizer.eos_token_id
46
+ )
47
+
48
+ # Output එකෙන් prompt එක අයින් කර පිළිතුර පමණක් ගැනීම
49
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+ reply = full_response.split("Assistant:")[-1].strip()
51
+
52
+ # පිළිතුර හිස් නම් fallback එකක්
53
+ if not reply:
54
+ reply = "I am processing your request, MINZO-PRIME. Please ask again."
55
 
56
+ print(f"🔱 Generated Reply: {reply}")
57
+ return {"reply": str(reply)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  @main.get("/")
60
+ def home():
61
  return {"status": "Inachi-Lite Online", "model": MODEL_ID}