sakhi / Dockerfile
Tushar9802's picture
deps: split into runtime (Path 1) + training (full) requirements files
ba004e6
# ============================================================================
# Sakhi β€” HuggingFace Space Dockerfile (Docker SDK)
#
# Hardware target: T4 small (16 GB GPU, CUDA 12.x, cuDNN 8). Persistent
# storage at /data caches Whisper + Ollama weights across restarts.
#
# Layout:
# Stage 1 (node-builder): builds frontend/dist via Vite
# Stage 2 (runtime): CUDA + cuDNN + Python + Ollama, copies dist in,
# starts Ollama + uvicorn via entrypoint.sh
# ============================================================================
# ----------------------------------------------------------------------------
# Stage 1 β€” Build the React frontend (Vite)
# ----------------------------------------------------------------------------
FROM node:20-slim AS frontend-builder
WORKDIR /build
COPY frontend/package.json frontend/package-lock.json ./frontend/
RUN npm --prefix frontend ci
COPY frontend/ ./frontend/
RUN npm --prefix frontend run build
# ----------------------------------------------------------------------------
# Stage 2 β€” Runtime (CUDA + cuDNN + Python + Ollama)
# ----------------------------------------------------------------------------
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 AS runtime
# Avoid tzdata prompts during apt installs
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# System packages: Python 3.10 (default on ubuntu22.04), pip, curl for Ollama
# installer + entrypoint health probe, ca-certificates for HTTPS, ffmpeg so
# faster-whisper can decode common audio containers (opus/m4a/mpeg).
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
curl \
ca-certificates \
ffmpeg \
zstd \
&& ln -sf /usr/bin/python3 /usr/local/bin/python \
&& rm -rf /var/lib/apt/lists/*
# Install Ollama (writes /usr/local/bin/ollama). The installer's systemd setup
# is harmless in a container β€” we don't use it; entrypoint.sh runs `ollama serve`
# directly.
RUN curl -fsSL https://ollama.com/install.sh | sh
# Python dependencies
WORKDIR /app
COPY requirements-runtime.txt ./
RUN pip install --no-cache-dir -r requirements-runtime.txt
# Application code. Keep the COPY granular so the .dockerignore + the
# requirements layer above stay cache-friendly across iterations.
COPY app.py api.py ./
COPY src/ ./src/
COPY configs/ ./configs/
COPY scripts/ ./scripts/
COPY demo_audio/ ./demo_audio/
COPY FAILURES.md JUDGE_BRIEF.md README.md ./
COPY entrypoint.sh ./
RUN chmod +x entrypoint.sh
# Frontend build output from stage 1 β†’ frontend/dist (where api.py mounts it)
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
# Defaults β€” overridable from the HF Space "Variables and secrets" panel.
ENV PORT=7860 \
OLLAMA_MODEL=gemma4:e4b-it-q4_K_M \
OLLAMA_MODELS=/data/.ollama/models \
HF_HOME=/data/.cache/huggingface \
OLLAMA_KEEP_ALIVE=24h \
WHISPER_MODEL=Tushar9802/whisper-large-v2-hindi-ct2
EXPOSE 7860
ENTRYPOINT ["./entrypoint.sh"]