adecco / app.py
Samuele Samonini
New application file
18d4cf7
import os
import threading
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
app = FastAPI()
## you can change model. On hugging face go to Models and then you have the ID. For example:
## Nanbeige/Nanbeige4.1-3B
## Careful about how big the model is, as HF free resources are limited
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
_pipe = None
_pipe_lock = threading.Lock()
class Request(BaseModel):
prompt: str
temperature: float = 0.0
max_tokens: int = 50 ## you can pass the parameter in the request
@app.get("/")
def health():
return {"status": "running", "model_loaded": _pipe is not None}
def get_pipe():
global _pipe
if _pipe is None:
with _pipe_lock:
if _pipe is None:
_pipe = pipeline(
"text-generation",
model=MODEL_ID,
device=-1
)
return _pipe
@app.post("/generate") ## this is the endpoint that you call in the notebook
def generate(req: Request):
try:
pipe = get_pipe()
do_sample = req.temperature > 0
out = pipe(
req.prompt,
max_new_tokens=int(req.max_tokens),
temperature=float(req.temperature),
do_sample=do_sample,
return_full_text=False
)
return {"response": out[0]["generated_text"].strip()}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
get_pipe()