Spaces:
Sleeping
Sleeping
File size: 3,122 Bytes
745f62a ba004e6 745f62a d3595cb 745f62a e0663bd 745f62a 5575d97 d3595cb 745f62a e0663bd 745f62a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | # ============================================================================
# Sakhi — HuggingFace Space Dockerfile (Docker SDK)
#
# Hardware target: T4 small (16 GB GPU, CUDA 12.x, cuDNN 8). Persistent
# storage at /data caches Whisper + Ollama weights across restarts.
#
# Layout:
# Stage 1 (node-builder): builds frontend/dist via Vite
# Stage 2 (runtime): CUDA + cuDNN + Python + Ollama, copies dist in,
# starts Ollama + uvicorn via entrypoint.sh
# ============================================================================
# ----------------------------------------------------------------------------
# Stage 1 — Build the React frontend (Vite)
# ----------------------------------------------------------------------------
FROM node:20-slim AS frontend-builder
WORKDIR /build
COPY frontend/package.json frontend/package-lock.json ./frontend/
RUN npm --prefix frontend ci
COPY frontend/ ./frontend/
RUN npm --prefix frontend run build
# ----------------------------------------------------------------------------
# Stage 2 — Runtime (CUDA + cuDNN + Python + Ollama)
# ----------------------------------------------------------------------------
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 AS runtime
# Avoid tzdata prompts during apt installs
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# System packages: Python 3.10 (default on ubuntu22.04), pip, curl for Ollama
# installer + entrypoint health probe, ca-certificates for HTTPS, ffmpeg so
# faster-whisper can decode common audio containers (opus/m4a/mpeg).
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
curl \
ca-certificates \
ffmpeg \
zstd \
&& ln -sf /usr/bin/python3 /usr/local/bin/python \
&& rm -rf /var/lib/apt/lists/*
# Install Ollama (writes /usr/local/bin/ollama). The installer's systemd setup
# is harmless in a container — we don't use it; entrypoint.sh runs `ollama serve`
# directly.
RUN curl -fsSL https://ollama.com/install.sh | sh
# Python dependencies
WORKDIR /app
COPY requirements-runtime.txt ./
RUN pip install --no-cache-dir -r requirements-runtime.txt
# Application code. Keep the COPY granular so the .dockerignore + the
# requirements layer above stay cache-friendly across iterations.
COPY app.py api.py ./
COPY src/ ./src/
COPY configs/ ./configs/
COPY scripts/ ./scripts/
COPY demo_audio/ ./demo_audio/
COPY FAILURES.md JUDGE_BRIEF.md README.md ./
COPY entrypoint.sh ./
RUN chmod +x entrypoint.sh
# Frontend build output from stage 1 → frontend/dist (where api.py mounts it)
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
# Defaults — overridable from the HF Space "Variables and secrets" panel.
ENV PORT=7860 \
OLLAMA_MODEL=gemma4:e4b-it-q4_K_M \
OLLAMA_MODELS=/data/.ollama/models \
HF_HOME=/data/.cache/huggingface \
OLLAMA_KEEP_ALIVE=24h \
WHISPER_MODEL=Tushar9802/whisper-large-v2-hindi-ct2
EXPOSE 7860
ENTRYPOINT ["./entrypoint.sh"]
|