#!/usr/bin/env sh # Start Ollama daemon in the background, wait for it to be ready, # then launch uvicorn on the HF Spaces default port. # # HF Spaces locks down /tmp for unprivileged users — write logs to # $HOME (which we own) instead. set -e # --- Earth-observation toolchain (Phase 1 + Phase 4) ------------------- # Build-time install was blocked by HF's build-disk threshold (5 # attempts; all failed at the same point). Runtime install in the # running container works around the build-sandbox limit — the # running container has more disk. # # Use `--target=$EO_DIR` instead of `--user`: explicit path that we # can prepend to PYTHONPATH ourselves, so the install location is # guaranteed visible regardless of HF Spaces' Python site-config. # The `--user` approach was failing silently because HF's Python # environment apparently bypasses the user-site discovery path. EO_DIR="$HOME/.eo-pkgs" EO_MARKER="$EO_DIR/.installed" if [ ! -f "$EO_MARKER" ]; then echo "[entrypoint] EO toolchain not yet installed; running pip install (~2 min)..." mkdir -p "$EO_DIR" # Bisect: previous build (1cf59ee) added torchvision + 7 more deps # at once and the whole install failed (eo_dir empty, no marker). # Pip's resolver is all-or-nothing per RUN — one bad package fails # everything. Revert to the known-good 4 + just torchvision (the # one terratorch actually needs to import). Once this proves out, # add Prithvi-live deps in a second RUN. if pip install --no-cache-dir --no-deps --target="$EO_DIR" \ terratorch==1.1rc6 \ einops \ diffusers \ timm \ torchvision; then echo "[entrypoint] pip install OK; verifying import..." if PYTHONPATH="$EO_DIR:$PYTHONPATH" python -c " import terratorch from terratorch.registry import FULL_MODEL_REGISTRY import terratorch.models.backbones.terramind.model.terramind_register n = len([k for k in FULL_MODEL_REGISTRY if 'terramind' in k.lower()]) assert n > 0, 'no terramind register entries' print(f'[entrypoint] terratorch ok, terramind register: {n} entries') "; then touch "$EO_MARKER" echo "[entrypoint] EO toolchain READY at $EO_DIR" else echo "[entrypoint] EO verify FAILED — TerraMind/Prithvi-live will skip" fi else echo "[entrypoint] pip install FAILED — TerraMind/Prithvi-live will skip" fi else echo "[entrypoint] EO toolchain already installed at $EO_DIR (cached)" fi # Always export PYTHONPATH so uvicorn can find the install (no-op if # the install failed and the dir is empty — the lazy-import in the # specialists handles that case cleanly). export PYTHONPATH="$EO_DIR:$PYTHONPATH" # Stream Ollama's stdout+stderr to BOTH stdout (so it shows up in HF # Spaces runtime logs — needed to see GPU discovery output from # OLLAMA_DEBUG=1) AND a file (for the readiness fail-fast tail below). LOG_FILE="$HOME/ollama.log" ollama serve 2>&1 | tee "$LOG_FILE" & OLLAMA_PID=$! # Wait for Ollama to be reachable (up to 60 s — first start can be slow # on a cold container with persistent storage being mounted) for i in $(seq 1 60); do if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then echo "[entrypoint] ollama up (pid $OLLAMA_PID) after ${i}s" break fi if ! kill -0 "$OLLAMA_PID" 2>/dev/null; then echo "[entrypoint] FATAL: ollama serve died. Last 40 lines of $LOG_FILE:" tail -40 "$LOG_FILE" || true exit 1 fi sleep 1 done if ! curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then echo "[entrypoint] FATAL: ollama did not become ready within 60s" tail -40 "$LOG_FILE" || true exit 1 fi # Granite 4.1:8b is pulled at runtime instead of baked into the image # — the EO toolchain (Phase 1 Prithvi + Phase 4 TerraMind) doesn't # fit alongside Granite weights in HF's build sandbox. First container # start does the pull (~2 min over the wire). Subsequent runtime # restarts within the same image lifetime reuse Ollama's cache so # this is a one-time per-image cost. # # 3b is also handled if present, but with RIPRAP_OLLAMA_3B_TAG= # granite4.1:8b set, the planner alias resolves to 8b too — so 8b # alone covers planner + reconciler. for model in "granite4.1:8b" "granite4.1:3b"; do if ! ollama list | grep -q "$model"; then if [ "$model" = "granite4.1:8b" ]; then echo "[entrypoint] $model not found; pulling now (~5GB, ~2 min over the wire)..." ollama pull "$model" || { echo "[entrypoint] FATAL: pull failed for $model — reconciler will not work" exit 1 } else # 3B is optional; if it's not there and the env override is set, # the router will route the planner alias to 8B. echo "[entrypoint] $model not found (optional — planner alias remapped to 8b via RIPRAP_OLLAMA_3B_TAG)" fi fi done ollama list # Pre-warm Granite 4.1:8b into VRAM so the first reconcile doesn't pay # the ~30s model-load tax. The empty prompt keeps it tiny; OLLAMA_KEEP_ALIVE # (24h) holds the weights resident through the demo. echo "[entrypoint] pre-warming granite4.1:8b into VRAM (one-shot)..." curl -s -X POST http://127.0.0.1:11434/api/generate \ -d '{"model":"granite4.1:8b","prompt":"hi","stream":false,"keep_alive":"24h","options":{"num_predict":1}}' \ -o /dev/null --max-time 120 \ && echo "[entrypoint] granite4.1:8b warm" \ || echo "[entrypoint] WARNING: 8b warmup failed (will load lazily)" # Log GPU visibility + Ollama lib layout so we can confirm CUDA dispatch # from the runtime logs (paired with OLLAMA_DEBUG=1 in the daemon). if command -v nvidia-smi > /dev/null 2>&1; then echo "[entrypoint] nvidia-smi present:" nvidia-smi -L || true else echo "[entrypoint] nvidia-smi NOT present — Ollama will run on CPU" fi echo "[entrypoint] ollama lib dirs:" ls -d /usr/lib/ollama 2>/dev/null && ls /usr/lib/ollama 2>/dev/null | head -20 || echo " /usr/lib/ollama missing" ls -d /usr/local/lib/ollama 2>/dev/null && ls /usr/local/lib/ollama 2>/dev/null | head -20 || echo " /usr/local/lib/ollama missing" exec uvicorn web.main:app --host 0.0.0.0 --port 7860 --log-level info