Spaces:

MINZO4546
/

minzo-api

Build error

App Files Files Community

MINZO4546 commited on 13 days ago

Commit

5e1de17

verified ·

1 Parent(s): c52d572

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -14

app.py CHANGED Viewed

@@ -6,9 +6,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import datetime
 # ── API INITIALIZATION ──
-app = FastAPI()
-app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_methods=["*"],
@@ -21,18 +22,20 @@ API_KEYS_DB = {
 }
 MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
-print(f"🔱 Specialist, Loading {MODEL_ID} on HF Space...")
-# Load Tokenizer and Model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    device_map="auto",
-    torch_dtype=torch.float16,
     trust_remote_code=True
 )
-print("🔱 Inachi-Lite is Online.")
 # ── DATA MODELS ──
 class ChatRequest(BaseModel):
@@ -42,24 +45,25 @@ class ChatRequest(BaseModel):
     max_tokens: int = 512
 # ── CHAT ENDPOINT ──
-@app.post("/v1/chat")
 async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
     user_query = request_data.message.strip()
     today = datetime.datetime.now().strftime("%Y-%m-%d")
-    # Prompt Engineering
     prompt = f"System: You are Inachi AI, an expert assistant for MINZO-PRIME. Date: {today}\n"
-    # History integration (Optional but recommended)
-    for human, ai in request_data.history[-3:]: # අන්තිම මැසේජ් 3 විතරක් ගමු
         prompt += f"User: {human}\nAI: {ai}\n"
     prompt += f"User: {user_query}\nAI:"
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
@@ -77,6 +81,6 @@ async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
         "status": "success"
     }
-@app.get("/")
 def home():
-    return {"message": "Inachi-Lite (Hy-MT) is Running", "model": MODEL_ID}

 import datetime
 # ── API INITIALIZATION ──
+# Hugging Face සොයන 'main' attribute එක මෙතනට ලබා දී ඇත
+main = FastAPI()
+main.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_methods=["*"],
 }
 MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit"
+print(f"🔱 Specialist, Loading {MODEL_ID} on CPU...")
+# Load Tokenizer
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# Load Model Optimized for CPU
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    device_map="cpu", # GPU නොමැති නිසා කෙලින්ම CPU එකට ලබා දීම
+    torch_dtype=torch.float32,
     trust_remote_code=True
 )
+print("🔱 Inachi-Lite is Online and Ready.")
 # ── DATA MODELS ──
 class ChatRequest(BaseModel):
     max_tokens: int = 512
 # ── CHAT ENDPOINT ──
+@main.post("/v1/chat")
 async def chat(request_data: ChatRequest, x_api_key: str = Header(None)):
+    # API Key පරීක්ෂා කිරීම
     if not x_api_key or x_api_key not in API_KEYS_DB:
         raise HTTPException(status_code=403, detail="Access Denied")
     user_query = request_data.message.strip()
     today = datetime.datetime.now().strftime("%Y-%m-%d")
+    # Prompt සකස් කිරීම
     prompt = f"System: You are Inachi AI, an expert assistant for MINZO-PRIME. Date: {today}\n"
+    # History ඇතුළත් කිරීම
+    for human, ai in request_data.history[-2:]:
         prompt += f"User: {human}\nAI: {ai}\n"
     prompt += f"User: {user_query}\nAI:"
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
     with torch.no_grad():
         outputs = model.generate(
         "status": "success"
     }
+@main.get("/")
 def home():
+    return {"message": "Inachi-Lite is Running", "status": "online"}