File size: 6,164 Bytes
4fe675b bebe2d4 4fe675b 131e277 9e70214 bebe2d4 9e70214 4fe675b bebe2d4 4fe675b bebe2d4 4fe675b bebe2d4 4fe675b bebe2d4 131e277 bebe2d4 4fe675b 131e277 9e70214 4fe675b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | #!/usr/bin/env sh
# Start Ollama daemon in the background, wait for it to be ready,
# then launch uvicorn on the HF Spaces default port.
#
# HF Spaces locks down /tmp for unprivileged users β write logs to
# $HOME (which we own) instead.
set -e
# --- Earth-observation toolchain (Phase 1 + Phase 4) -------------------
# Build-time install was blocked by HF's build-disk threshold (5
# attempts; all failed at the same point). Runtime install in the
# running container works around the build-sandbox limit β the
# running container has more disk.
#
# Use `--target=$EO_DIR` instead of `--user`: explicit path that we
# can prepend to PYTHONPATH ourselves, so the install location is
# guaranteed visible regardless of HF Spaces' Python site-config.
# The `--user` approach was failing silently because HF's Python
# environment apparently bypasses the user-site discovery path.
EO_DIR="$HOME/.eo-pkgs"
EO_MARKER="$EO_DIR/.installed"
if [ ! -f "$EO_MARKER" ]; then
echo "[entrypoint] EO toolchain not yet installed; running pip install (~2 min)..."
mkdir -p "$EO_DIR"
# Bisect: previous build (1cf59ee) added torchvision + 7 more deps
# at once and the whole install failed (eo_dir empty, no marker).
# Pip's resolver is all-or-nothing per RUN β one bad package fails
# everything. Revert to the known-good 4 + just torchvision (the
# one terratorch actually needs to import). Once this proves out,
# add Prithvi-live deps in a second RUN.
if pip install --no-cache-dir --no-deps --target="$EO_DIR" \
terratorch==1.1rc6 \
einops \
diffusers \
timm \
torchvision; then
echo "[entrypoint] pip install OK; verifying import..."
if PYTHONPATH="$EO_DIR:$PYTHONPATH" python -c "
import terratorch
from terratorch.registry import FULL_MODEL_REGISTRY
import terratorch.models.backbones.terramind.model.terramind_register
n = len([k for k in FULL_MODEL_REGISTRY if 'terramind' in k.lower()])
assert n > 0, 'no terramind register entries'
print(f'[entrypoint] terratorch ok, terramind register: {n} entries')
"; then
touch "$EO_MARKER"
echo "[entrypoint] EO toolchain READY at $EO_DIR"
else
echo "[entrypoint] EO verify FAILED β TerraMind/Prithvi-live will skip"
fi
else
echo "[entrypoint] pip install FAILED β TerraMind/Prithvi-live will skip"
fi
else
echo "[entrypoint] EO toolchain already installed at $EO_DIR (cached)"
fi
# Always export PYTHONPATH so uvicorn can find the install (no-op if
# the install failed and the dir is empty β the lazy-import in the
# specialists handles that case cleanly).
export PYTHONPATH="$EO_DIR:$PYTHONPATH"
# Stream Ollama's stdout+stderr to BOTH stdout (so it shows up in HF
# Spaces runtime logs β needed to see GPU discovery output from
# OLLAMA_DEBUG=1) AND a file (for the readiness fail-fast tail below).
LOG_FILE="$HOME/ollama.log"
ollama serve 2>&1 | tee "$LOG_FILE" &
OLLAMA_PID=$!
# Wait for Ollama to be reachable (up to 60 s β first start can be slow
# on a cold container with persistent storage being mounted)
for i in $(seq 1 60); do
if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then
echo "[entrypoint] ollama up (pid $OLLAMA_PID) after ${i}s"
break
fi
if ! kill -0 "$OLLAMA_PID" 2>/dev/null; then
echo "[entrypoint] FATAL: ollama serve died. Last 40 lines of $LOG_FILE:"
tail -40 "$LOG_FILE" || true
exit 1
fi
sleep 1
done
if ! curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then
echo "[entrypoint] FATAL: ollama did not become ready within 60s"
tail -40 "$LOG_FILE" || true
exit 1
fi
# Granite 4.1:8b is pulled at runtime instead of baked into the image
# β the EO toolchain (Phase 1 Prithvi + Phase 4 TerraMind) doesn't
# fit alongside Granite weights in HF's build sandbox. First container
# start does the pull (~2 min over the wire). Subsequent runtime
# restarts within the same image lifetime reuse Ollama's cache so
# this is a one-time per-image cost.
#
# 3b is also handled if present, but with RIPRAP_OLLAMA_3B_TAG=
# granite4.1:8b set, the planner alias resolves to 8b too β so 8b
# alone covers planner + reconciler.
for model in "granite4.1:8b" "granite4.1:3b"; do
if ! ollama list | grep -q "$model"; then
if [ "$model" = "granite4.1:8b" ]; then
echo "[entrypoint] $model not found; pulling now (~5GB, ~2 min over the wire)..."
ollama pull "$model" || {
echo "[entrypoint] FATAL: pull failed for $model β reconciler will not work"
exit 1
}
else
# 3B is optional; if it's not there and the env override is set,
# the router will route the planner alias to 8B.
echo "[entrypoint] $model not found (optional β planner alias remapped to 8b via RIPRAP_OLLAMA_3B_TAG)"
fi
fi
done
ollama list
# Pre-warm Granite 4.1:8b into VRAM so the first reconcile doesn't pay
# the ~30s model-load tax. The empty prompt keeps it tiny; OLLAMA_KEEP_ALIVE
# (24h) holds the weights resident through the demo.
echo "[entrypoint] pre-warming granite4.1:8b into VRAM (one-shot)..."
curl -s -X POST http://127.0.0.1:11434/api/generate \
-d '{"model":"granite4.1:8b","prompt":"hi","stream":false,"keep_alive":"24h","options":{"num_predict":1}}' \
-o /dev/null --max-time 120 \
&& echo "[entrypoint] granite4.1:8b warm" \
|| echo "[entrypoint] WARNING: 8b warmup failed (will load lazily)"
# Log GPU visibility + Ollama lib layout so we can confirm CUDA dispatch
# from the runtime logs (paired with OLLAMA_DEBUG=1 in the daemon).
if command -v nvidia-smi > /dev/null 2>&1; then
echo "[entrypoint] nvidia-smi present:"
nvidia-smi -L || true
else
echo "[entrypoint] nvidia-smi NOT present β Ollama will run on CPU"
fi
echo "[entrypoint] ollama lib dirs:"
ls -d /usr/lib/ollama 2>/dev/null && ls /usr/lib/ollama 2>/dev/null | head -20 || echo " /usr/lib/ollama missing"
ls -d /usr/local/lib/ollama 2>/dev/null && ls /usr/local/lib/ollama 2>/dev/null | head -20 || echo " /usr/local/lib/ollama missing"
exec uvicorn web.main:app --host 0.0.0.0 --port 7860 --log-level info
|