digifreely commited on
Commit
a27072c
·
verified ·
1 Parent(s): 90b53ab

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +13 -16
Dockerfile CHANGED
@@ -28,29 +28,25 @@ WORKDIR /app
28
  # ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
29
  RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
30
 
31
- # ── 1 of 5 · CPU-only PyTorch (largest wheel – install alone) ────────────
32
  RUN pip install --no-cache-dir --no-compile \
33
  torch==2.3.1 \
34
  --index-url https://download.pytorch.org/whl/cpu
35
 
36
- # ── 2 of 5 · HuggingFace stack (transformers pulls in tokenizers etc.) ───
37
  RUN pip install --no-cache-dir --no-compile \
38
  transformers==4.46.3 \
39
  accelerate==1.1.1
40
 
41
- # ── 3 of 5 · Serialisation libs ──────────────────────────────────────────
42
  RUN pip install --no-cache-dir --no-compile \
43
  sentencepiece==0.2.0 \
44
  protobuf==5.28.3
45
 
46
- # ── 4 of 5 · Async HTTP client ───────────────────────────────────────────
47
- RUN pip install --no-cache-dir --no-compile \
48
- httpx==0.27.2
49
-
50
- # ── 5 of 5 · Web framework + ASGI server ─────────────────────────────────
51
  RUN pip install --no-cache-dir --no-compile \
 
52
  fastapi==0.115.0 \
53
- gunicorn==22.0.0 \
54
  uvicorn[standard]==0.30.6
55
 
56
  # ── Application code ──────────────────────────────────────────────────────
@@ -75,10 +71,11 @@ ENV PYTHONUNBUFFERED=1 \
75
  EXPOSE 7860
76
 
77
  # ── Start-up command ──────────────────────────────────────────────────────
78
- # Gunicorn + uvicorn worker serves the FastAPI app.
79
- # --preload ensures the model is loaded ONCE before workers fork.
80
- CMD ["gunicorn", "app:app", \
81
- "--worker-class", "uvicorn.workers.UvicornWorker", \
82
- "--workers", "1", \
83
- "--bind", "0.0.0.0:7860", \
84
- "--timeout", "300"]
 
 
28
  # ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
29
  RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
30
 
31
+ # ── 1 of 4 · CPU-only PyTorch (largest wheel – install alone) ────────────
32
  RUN pip install --no-cache-dir --no-compile \
33
  torch==2.3.1 \
34
  --index-url https://download.pytorch.org/whl/cpu
35
 
36
+ # ── 2 of 4 · HuggingFace stack (transformers pulls in tokenizers etc.) ───
37
  RUN pip install --no-cache-dir --no-compile \
38
  transformers==4.46.3 \
39
  accelerate==1.1.1
40
 
41
+ # ── 3 of 4 · Serialisation libs ──────────────────────────────────────────
42
  RUN pip install --no-cache-dir --no-compile \
43
  sentencepiece==0.2.0 \
44
  protobuf==5.28.3
45
 
46
+ # ── 4 of 4 · Async HTTP client + Web framework + ASGI server ─────────────
 
 
 
 
47
  RUN pip install --no-cache-dir --no-compile \
48
+ httpx==0.27.2 \
49
  fastapi==0.115.0 \
 
50
  uvicorn[standard]==0.30.6
51
 
52
  # ── Application code ──────────────────────────────────────────────────────
 
71
  EXPOSE 7860
72
 
73
  # ── Start-up command ──────────────────────────────────────────────────────
74
+ # Plain uvicorn no gunicorn shim. Eliminates the gunicorn health-check
75
+ # race that was killing the worker mid-response and causing 502s.
76
+ # timeout-keep-alive covers the full CPU inference time for the 3B model.
77
+ CMD ["uvicorn", "app:app", \
78
+ "--host", "0.0.0.0", \
79
+ "--port", "7860", \
80
+ "--timeout-keep-alive", "300", \
81
+ "--log-level", "info"]