File size: 3,122 Bytes
745f62a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba004e6
 
745f62a
 
 
 
 
 
 
d3595cb
745f62a
 
 
 
 
 
 
 
e0663bd
745f62a
 
5575d97
d3595cb
 
745f62a
e0663bd
745f62a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# ============================================================================
# Sakhi — HuggingFace Space Dockerfile (Docker SDK)
#
# Hardware target: T4 small (16 GB GPU, CUDA 12.x, cuDNN 8). Persistent
# storage at /data caches Whisper + Ollama weights across restarts.
#
# Layout:
#   Stage 1 (node-builder): builds frontend/dist via Vite
#   Stage 2 (runtime):       CUDA + cuDNN + Python + Ollama, copies dist in,
#                            starts Ollama + uvicorn via entrypoint.sh
# ============================================================================

# ----------------------------------------------------------------------------
# Stage 1 — Build the React frontend (Vite)
# ----------------------------------------------------------------------------
FROM node:20-slim AS frontend-builder

WORKDIR /build
COPY frontend/package.json frontend/package-lock.json ./frontend/
RUN npm --prefix frontend ci

COPY frontend/ ./frontend/
RUN npm --prefix frontend run build


# ----------------------------------------------------------------------------
# Stage 2 — Runtime (CUDA + cuDNN + Python + Ollama)
# ----------------------------------------------------------------------------
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 AS runtime

# Avoid tzdata prompts during apt installs
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

# System packages: Python 3.10 (default on ubuntu22.04), pip, curl for Ollama
# installer + entrypoint health probe, ca-certificates for HTTPS, ffmpeg so
# faster-whisper can decode common audio containers (opus/m4a/mpeg).
RUN apt-get update && apt-get install -y --no-install-recommends \
        python3 \
        python3-pip \
        curl \
        ca-certificates \
        ffmpeg \
        zstd \
    && ln -sf /usr/bin/python3 /usr/local/bin/python \
    && rm -rf /var/lib/apt/lists/*

# Install Ollama (writes /usr/local/bin/ollama). The installer's systemd setup
# is harmless in a container — we don't use it; entrypoint.sh runs `ollama serve`
# directly.
RUN curl -fsSL https://ollama.com/install.sh | sh

# Python dependencies
WORKDIR /app
COPY requirements-runtime.txt ./
RUN pip install --no-cache-dir -r requirements-runtime.txt

# Application code. Keep the COPY granular so the .dockerignore + the
# requirements layer above stay cache-friendly across iterations.
COPY app.py api.py ./
COPY src/ ./src/
COPY configs/ ./configs/
COPY scripts/ ./scripts/
COPY demo_audio/ ./demo_audio/
COPY FAILURES.md JUDGE_BRIEF.md README.md ./
COPY entrypoint.sh ./
RUN chmod +x entrypoint.sh

# Frontend build output from stage 1 → frontend/dist (where api.py mounts it)
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist

# Defaults — overridable from the HF Space "Variables and secrets" panel.
ENV PORT=7860 \
    OLLAMA_MODEL=gemma4:e4b-it-q4_K_M \
    OLLAMA_MODELS=/data/.ollama/models \
    HF_HOME=/data/.cache/huggingface \
    OLLAMA_KEEP_ALIVE=24h \
    WHISPER_MODEL=Tushar9802/whisper-large-v2-hindi-ct2

EXPOSE 7860

ENTRYPOINT ["./entrypoint.sh"]