Spaces:

SamuSamo
/

adecco

Sleeping

adecco / app.py

Samuele Samonini

New application file

18d4cf7 about 1 month ago

1.53 kB

	import os
	import threading
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import pipeline

	app = FastAPI()

	## you can change model. On hugging face go to Models and then you have the ID. For example:
	## Nanbeige/Nanbeige4.1-3B

	## Careful about how big the model is, as HF free resources are limited

	MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"

	_pipe = None
	_pipe_lock = threading.Lock()

	class Request(BaseModel):
	prompt: str
	temperature: float = 0.0
	max_tokens: int = 50 ## you can pass the parameter in the request


	@app.get("/")
	def health():
	return {"status": "running", "model_loaded": _pipe is not None}


	def get_pipe():
	global _pipe
	if _pipe is None:
	with _pipe_lock:
	if _pipe is None:
	_pipe = pipeline(
	"text-generation",
	model=MODEL_ID,
	device=-1
	)
	return _pipe


	@app.post("/generate") ## this is the endpoint that you call in the notebook
	def generate(req: Request):
	try:
	pipe = get_pipe()

	do_sample = req.temperature > 0

	out = pipe(
	req.prompt,
	max_new_tokens=int(req.max_tokens),
	temperature=float(req.temperature),
	do_sample=do_sample,
	return_full_text=False
	)

	return {"response": out[0]["generated_text"].strip()}

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	get_pipe()