| from fastapi import FastAPI |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from transformers import pipeline |
| import torch |
|
|
| main = FastAPI() |
|
|
| |
| main.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| MODEL_ID = "tencent/Hy-MT1.5-1.8B-2bit" |
| print(f"🔱 Specialist, Loading {MODEL_ID} (Public Mode)...") |
|
|
| |
| pipe = pipeline( |
| "text-generation", |
| model=MODEL_ID, |
| device_map="cpu", |
| model_kwargs={"trust_remote_code": True} |
| ) |
|
|
| class ChatRequest(BaseModel): |
| message: str |
|
|
| @main.post("/v1/chat") |
| async def chat(request_data: ChatRequest): |
| user_query = request_data.message.strip() |
| |
| |
| prompt = f"User: {user_query}\nAssistant:" |
|
|
| |
| results = pipe( |
| prompt, |
| max_new_tokens=150, |
| do_sample=True, |
| temperature=0.7, |
| pad_token_id=50256 |
| ) |
|
|
| |
| generated_text = results[0]['generated_text'] |
| reply = generated_text.split("Assistant:")[-1].strip() |
|
|
| if not reply: |
| reply = "I am here, MINZO-PRIME. Systems are nominal." |
|
|
| return {"reply": reply} |
|
|
| @main.get("/") |
| def health(): |
| return {"status": "Public Inachi-Lite Online"} |