Spaces:

MINZO4546
/

minzo-api

Build error

MINZO4546 commited on 14 days ago

Commit

e9d564d

verified ·

1 Parent(s): 550f38c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,15 +13,15 @@ main.add_middleware(
     allow_headers=["*"],
 )
-# 🔱 Gemma 3 1B මොඩල් එක ලෝඩ් කිරීම
-MODEL_ID = "google/gemma-3-1b-it"
-print(f"🔱 Specialist, Upgrading to {MODEL_ID}...")
 pipe = pipeline(
     "text-generation",
     model=MODEL_ID,
     device_map="cpu",
-    torch_dtype=torch.float32,
     trust_remote_code=True
 )
@@ -32,26 +32,18 @@ class ChatRequest(BaseModel):
 async def chat(request_data: ChatRequest):
     user_query = request_data.message.strip()
-    # Gemma 3 Chat Format
     messages = [
         {"role": "user", "content": user_query},
     ]
-    # Generation
     results = pipe(
         messages,
-        max_new_tokens=256,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9
     )
-    # 🔱 පිළිතුර පමණක් වෙන් කර ගැනීම
     reply = results[0]['generated_text'][-1]['content']
-    print(f"🔱 Inachi Response: {reply}")
-    return {"reply": reply}
-@main.get("/")
-def health():
-    return {"status": "Gemma-3 Powered Inachi Online"}

     allow_headers=["*"],
 )
+MODEL_ID = "google/gemma-3-1b-it"
+# 🔱 Optimization 1: KV Cache භාවිතය (වේගය වැඩි කිරීමට)
 pipe = pipeline(
     "text-generation",
     model=MODEL_ID,
     device_map="cpu",
+    torch_dtype=torch.bfloat16, # CPU එක bfloat16 වලට කැමතියි
+    use_cache=True, # කලින් ජෙනරේට් කරපු ටෝකන් මතක තබා ගනී
     trust_remote_code=True
 )
 async def chat(request_data: ChatRequest):
     user_query = request_data.message.strip()
     messages = [
         {"role": "user", "content": user_query},
     ]
+    # 🔱 Optimization 2: Coding සඳහා ප්‍රමාණවත් ඉඩක් ලබා දීම
     results = pipe(
         messages,
+        max_new_tokens=1024, # 🔱 දැන් ඔයාට දිග කෝඩ් එකක් වුණත් ගන්න පුළුවන්
+        do_sample=False,      # Coding වලට sample ඕනේ නැහැ, Greedy search එක වේගවත්
+        temperature=0.0,     # වඩාත් නිවැරදි කෝඩ් එකක් සඳහා (Zero randomness)
+        pad_token_id=50256
     )
     reply = results[0]['generated_text'][-1]['content']
+    return {"reply": reply}