Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 13 days ago

Commit

550f38c

verified ·

1 Parent(s): deadea5

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -25

app.py CHANGED Viewed

@@ -13,14 +13,15 @@ main.add_middleware(
     allow_headers=["*"],
 )
-MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
-print(f"🔱 Loading {MODEL_ID} optimized...")
-# Pipeline එක පාවිච්චි කිරීම වඩාත් ස්ථාවරයි
 pipe = pipeline(
     "text-generation",
     model=MODEL_ID,
     device_map="cpu",
     trust_remote_code=True
 )
@@ -31,34 +32,26 @@ class ChatRequest(BaseModel):
 async def chat(request_data: ChatRequest):
     user_query = request_data.message.strip()
-    # 🔱 මොඩල් එකට පැහැදිලි Instruction එකක් ලබාදීම
-    prompt = f"System: You are Inachi AI, a helpful assistant.\nUser: {user_query}\nAssistant:"
     results = pipe(
-        prompt,
-        max_new_tokens=256, # 🔱 මොඩල් එකට ලියන්න ඉඩ ලබා දීම
         do_sample=True,
-        temperature=0.8, # 🔱 නිර්මාණශීලිත්වය වැඩි කිරීමට
-        top_p=0.9,
-        repetition_penalty=1.2, # 🔱 එකම දේ ලිවීම නතර කිරීමට
-        pad_token_id=50256
     )
-    generated_text = results[0]['generated_text']
-    # Assistant: පසුව එන කොටස වෙන් කර ගැනීම
-    if "Assistant:" in generated_text:
-        reply = generated_text.split("Assistant:")[-1].strip()
-    else:
-        reply = generated_text.replace(prompt, "").strip()
-    # 🔱 හිස් පිළිතුරක් ආවොත් raw generation එක පෙන්වන්න (Debug සඳහා)
-    if not reply or len(reply) < 2:
-        reply = generated_text[:100] + "..."
-    print(f"🔱 Generated: {reply}")
     return {"reply": reply}
 @main.get("/")
 def health():
-    return {"status": "Online"}

     allow_headers=["*"],
 )
+# 🔱 Gemma 3 1B මොඩල් එක ලෝඩ් කිරීම
+MODEL_ID = "google/gemma-3-1b-it"
+print(f"🔱 Specialist, Upgrading to {MODEL_ID}...")
 pipe = pipeline(
     "text-generation",
     model=MODEL_ID,
     device_map="cpu",
+    torch_dtype=torch.float32,
     trust_remote_code=True
 )
 async def chat(request_data: ChatRequest):
     user_query = request_data.message.strip()
+    # Gemma 3 Chat Format
+    messages = [
+        {"role": "user", "content": user_query},
+    ]
+    # Generation
     results = pipe(
+        messages,
+        max_new_tokens=256,
         do_sample=True,
+        temperature=0.7,
+        top_p=0.9
     )
+    # 🔱 පිළිතුර පමණක් වෙන් කර ගැනීම
+    reply = results[0]['generated_text'][-1]['content']
+    print(f"🔱 Inachi Response: {reply}")
     return {"reply": reply}
 @main.get("/")
 def health():
+    return {"status": "Gemma-3 Powered Inachi Online"}