Samuele Samonini commited on
Commit
41c287c
·
1 Parent(s): 26ef4af

New application file

Browse files
Files changed (3) hide show
  1. Dockerfile +9 -11
  2. app.py +57 -3
  3. requirements.txt +3 -0
Dockerfile CHANGED
@@ -1,16 +1,14 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
 
4
- FROM python:3.9
5
 
6
- RUN useradd -m -u 1000 user
7
- USER user
8
- ENV PATH="/home/user/.local/bin:$PATH"
9
 
10
- WORKDIR /app
 
11
 
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- COPY --chown=user . /app
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.10-slim
 
2
 
3
+ WORKDIR /app
4
 
5
+ ENV HF_HOME=/tmp/hf
6
+ ENV TRANSFORMERS_CACHE=/tmp/hf
7
+ ENV HF_HUB_DISABLE_TELEMETRY=1
8
 
9
+ COPY requirements.txt .
10
+ RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
11
 
12
+ COPY app.py .
 
13
 
14
+ CMD uvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}
 
app.py CHANGED
@@ -1,7 +1,61 @@
1
- from fastapi import FastAPI
 
 
 
 
2
 
3
  app = FastAPI()
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ from transformers import pipeline
6
 
7
  app = FastAPI()
8
 
9
+ ## you can change model. On hugging face go to Models and then you have the ID. For example:
10
+ ## Nanbeige/Nanbeige4.1-3B
11
+
12
+ ## Careful about how big the model is, as HF free resources are limited
13
+
14
+ MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
15
+
16
+ _pipe = None
17
+ _pipe_lock = threading.Lock()
18
+
19
+ class Request(BaseModel):
20
+ prompt: str
21
+ temperature: float = 0.0
22
+ max_tokens: int = 50 ## you can pass the parameter in the request
23
+
24
+
25
  @app.get("/")
26
+ def health():
27
+ return {"status": "running", "model_loaded": _pipe is not None}
28
+
29
+
30
+ def get_pipe():
31
+ global _pipe
32
+ if _pipe is None:
33
+ with _pipe_lock:
34
+ if _pipe is None:
35
+ _pipe = pipeline(
36
+ "text-generation",
37
+ model=MODEL_ID,
38
+ device=-1
39
+ )
40
+ return _pipe
41
+
42
+
43
+ @app.post("/generate") ## this is the endpoint that you call in the notebook
44
+ def generate(req: Request):
45
+ try:
46
+ pipe = get_pipe()
47
+
48
+ do_sample = req.temperature > 0
49
+
50
+ out = pipe(
51
+ req.prompt,
52
+ max_new_tokens=int(req.max_tokens),
53
+ temperature=float(req.temperature),
54
+ do_sample=do_sample,
55
+ return_full_text=False
56
+ )
57
+
58
+ return {"response": out[0]["generated_text"].strip()}
59
+
60
+ except Exception as e:
61
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  fastapi
2
  uvicorn[standard]
 
 
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ transformers
4
+ accelerate
5
+ torch --index-url https://download.pytorch.org/whl/cpu