# ============================================================================ # Sakhi — HuggingFace Space Dockerfile (Docker SDK) # # Hardware target: T4 small (16 GB GPU, CUDA 12.x, cuDNN 8). Persistent # storage at /data caches Whisper + Ollama weights across restarts. # # Layout: # Stage 1 (node-builder): builds frontend/dist via Vite # Stage 2 (runtime): CUDA + cuDNN + Python + Ollama, copies dist in, # starts Ollama + uvicorn via entrypoint.sh # ============================================================================ # ---------------------------------------------------------------------------- # Stage 1 — Build the React frontend (Vite) # ---------------------------------------------------------------------------- FROM node:20-slim AS frontend-builder WORKDIR /build COPY frontend/package.json frontend/package-lock.json ./frontend/ RUN npm --prefix frontend ci COPY frontend/ ./frontend/ RUN npm --prefix frontend run build # ---------------------------------------------------------------------------- # Stage 2 — Runtime (CUDA + cuDNN + Python + Ollama) # ---------------------------------------------------------------------------- FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 AS runtime # Avoid tzdata prompts during apt installs ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 # System packages: Python 3.10 (default on ubuntu22.04), pip, curl for Ollama # installer + entrypoint health probe, ca-certificates for HTTPS, ffmpeg so # faster-whisper can decode common audio containers (opus/m4a/mpeg). RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ curl \ ca-certificates \ ffmpeg \ zstd \ && ln -sf /usr/bin/python3 /usr/local/bin/python \ && rm -rf /var/lib/apt/lists/* # Install Ollama (writes /usr/local/bin/ollama). The installer's systemd setup # is harmless in a container — we don't use it; entrypoint.sh runs `ollama serve` # directly. RUN curl -fsSL https://ollama.com/install.sh | sh # Python dependencies WORKDIR /app COPY requirements-runtime.txt ./ RUN pip install --no-cache-dir -r requirements-runtime.txt # Application code. Keep the COPY granular so the .dockerignore + the # requirements layer above stay cache-friendly across iterations. COPY app.py api.py ./ COPY src/ ./src/ COPY configs/ ./configs/ COPY scripts/ ./scripts/ COPY demo_audio/ ./demo_audio/ COPY FAILURES.md JUDGE_BRIEF.md README.md ./ COPY entrypoint.sh ./ RUN chmod +x entrypoint.sh # Frontend build output from stage 1 → frontend/dist (where api.py mounts it) COPY --from=frontend-builder /build/frontend/dist ./frontend/dist # Defaults — overridable from the HF Space "Variables and secrets" panel. ENV PORT=7860 \ OLLAMA_MODEL=gemma4:e4b-it-q4_K_M \ OLLAMA_MODELS=/data/.ollama/models \ HF_HOME=/data/.cache/huggingface \ OLLAMA_KEEP_ALIVE=24h \ WHISPER_MODEL=Tushar9802/whisper-large-v2-hindi-ct2 EXPOSE 7860 ENTRYPOINT ["./entrypoint.sh"]