Spaces:

digifreely
/

brain

Sleeping

App Files Files Community

digifreely commited on 13 days ago

Commit

a27072c

verified ·

1 Parent(s): 90b53ab

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +13 -16

Dockerfile CHANGED Viewed

@@ -28,29 +28,25 @@ WORKDIR /app
 # ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
 RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
-# ── 1 of 5 · CPU-only PyTorch (largest wheel – install alone) ────────────
 RUN pip install --no-cache-dir --no-compile \
     torch==2.3.1 \
     --index-url https://download.pytorch.org/whl/cpu
-# ── 2 of 5 · HuggingFace stack (transformers pulls in tokenizers etc.) ───
 RUN pip install --no-cache-dir --no-compile \
     transformers==4.46.3 \
     accelerate==1.1.1
-# ── 3 of 5 · Serialisation libs ──────────────────────────────────────────
 RUN pip install --no-cache-dir --no-compile \
     sentencepiece==0.2.0 \
     protobuf==5.28.3
-# ── 4 of 5 · Async HTTP client ───────────────────────────────────────────
-RUN pip install --no-cache-dir --no-compile \
-    httpx==0.27.2
-# ── 5 of 5 · Web framework + ASGI server ─────────────────────────────────
 RUN pip install --no-cache-dir --no-compile \
     fastapi==0.115.0 \
-    gunicorn==22.0.0 \
     uvicorn[standard]==0.30.6
 # ── Application code ──────────────────────────────────────────────────────
@@ -75,10 +71,11 @@ ENV PYTHONUNBUFFERED=1 \
 EXPOSE 7860
 # ── Start-up command ──────────────────────────────────────────────────────
-# Gunicorn + uvicorn worker serves the FastAPI app.
-# --preload ensures the model is loaded ONCE before workers fork.
-CMD ["gunicorn", "app:app", \
-     "--worker-class", "uvicorn.workers.UvicornWorker", \
-     "--workers",      "1", \
-     "--bind",         "0.0.0.0:7860", \
-     "--timeout",      "300"]

 # ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
 RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
+# ── 1 of 4 · CPU-only PyTorch (largest wheel – install alone) ────────────
 RUN pip install --no-cache-dir --no-compile \
     torch==2.3.1 \
     --index-url https://download.pytorch.org/whl/cpu
+# ── 2 of 4 · HuggingFace stack (transformers pulls in tokenizers etc.) ───
 RUN pip install --no-cache-dir --no-compile \
     transformers==4.46.3 \
     accelerate==1.1.1
+# ── 3 of 4 · Serialisation libs ──────────────────────────────────────────
 RUN pip install --no-cache-dir --no-compile \
     sentencepiece==0.2.0 \
     protobuf==5.28.3
+# ── 4 of 4 · Async HTTP client + Web framework + ASGI server ─────────────
 RUN pip install --no-cache-dir --no-compile \
+    httpx==0.27.2 \
     fastapi==0.115.0 \
     uvicorn[standard]==0.30.6
 # ── Application code ──────────────────────────────────────────────────────
 EXPOSE 7860
 # ── Start-up command ──────────────────────────────────────────────────────
+# Plain uvicorn — no gunicorn shim. Eliminates the gunicorn health-check
+# race that was killing the worker mid-response and causing 502s.
+# timeout-keep-alive covers the full CPU inference time for the 3B model.
+CMD ["uvicorn", "app:app", \
+     "--host",               "0.0.0.0", \
+     "--port",               "7860", \
+     "--timeout-keep-alive", "300", \
+     "--log-level",          "info"]