| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| FROM node:20-slim AS web |
| WORKDIR /web |
|
|
| COPY frontend/package.json frontend/package-lock.json ./ |
| RUN npm ci --no-audit --no-fund |
|
|
| COPY frontend/ ./ |
| RUN npm run build |
|
|
|
|
| |
| FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS app |
|
|
| ENV DEBIAN_FRONTEND=noninteractive \ |
| PYTHONDONTWRITEBYTECODE=1 \ |
| PYTHONUNBUFFERED=1 \ |
| PIP_NO_CACHE_DIR=1 \ |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ |
| PORT=7860 \ |
| HF_HOME=/data/hf \ |
| HUGGINGFACE_HUB_CACHE=/data/hf \ |
| LOCAL_OPENAI_BASE_URL=http://127.0.0.1:1234 \ |
| LOCAL_OPENAI_API_KEY=lm-studio \ |
| LLM_PROVIDER=local |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| python3.11 python3-pip \ |
| curl ca-certificates libgomp1 \ |
| bsdmainutils coreutils \ |
| && ln -sf /usr/bin/python3.11 /usr/bin/python \ |
| && ln -sf /usr/bin/python3.11 /usr/bin/python3 \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| WORKDIR /app |
|
|
| # Python deps (cached layer) |
| # Install llama-cpp-python from the pre-built CUDA 12.1 wheel index (no compilation) |
| COPY requirements.txt /app/requirements.txt |
| RUN pip install --upgrade pip setuptools wheel \ |
| && pip install -r /app/requirements.txt \ |
| && pip install "llama-cpp-python[server]>=0.2.90" \ |
| --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 |
|
|
| # App code |
| COPY pyproject.toml README.md openenv.yaml inference.py start.sh /app/ |
| COPY server /app/server |
| COPY swarm_openenv_env /app/swarm_openenv_env |
| COPY backend /app/backend |
| COPY outputs /app/outputs |
|
|
| # Built frontend from stage 1 |
| COPY --from=web /web/dist /app/frontend/dist |
|
|
| # Persistent cache dir for the GGUF (HF Spaces mount /data as persistent storage) |
| RUN mkdir -p /data/models /data/hf && chmod -R 777 /data |
|
|
| RUN chmod +x /app/start.sh |
|
|
| EXPOSE 7860 |
|
|
| CMD ["bash", "/app/start.sh"] |
|
|