Dockerfile + entrypoint for Hugging Face Spaces
Browse filesThe first deployable image: Python 3.12 base, full pip deps,
Ollama + granite4.1:3b baked in (~10 GB total), entrypoint starts
Ollama daemon in the background then uvicorn on port 7860.
helios_nyc.py is a small CLI probe for sanity-checking the
container locally — it walks the FSM end-to-end against a single
hard-coded address before any web traffic. Will get scrubbed once
agent.py covers the same role.
- Dockerfile +70 -0
- entrypoint.sh +24 -0
- helios_nyc.py +89 -0
Dockerfile
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Riprap — Hugging Face Spaces (Docker SDK) deployment.
|
| 2 |
+
#
|
| 3 |
+
# Bakes:
|
| 4 |
+
# - Python 3.12 + pip deps (~2.5 GB once torch is in)
|
| 5 |
+
# - Ollama + granite4.1:3b model (~2 GB)
|
| 6 |
+
# - All pre-computed fixtures in data/ + corpus/
|
| 7 |
+
#
|
| 8 |
+
# Runtime:
|
| 9 |
+
# - Ollama daemon serves Granite 4.1
|
| 10 |
+
# - Granite Embedding 278M auto-downloads via sentence-transformers
|
| 11 |
+
# on first FastAPI startup (~280 MB) — cached to /home/user/.cache
|
| 12 |
+
# - uvicorn FastAPI on port 7860 (HF default)
|
| 13 |
+
|
| 14 |
+
FROM python:3.12-slim AS base
|
| 15 |
+
|
| 16 |
+
# OS deps for geo libs + curl for ollama installer
|
| 17 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 18 |
+
curl ca-certificates \
|
| 19 |
+
gdal-bin libgdal-dev libgeos-dev libproj-dev \
|
| 20 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 21 |
+
|
| 22 |
+
# HF Spaces convention: run as a non-root "user" account at /home/user/app.
|
| 23 |
+
# Granite Embedding cache lives in /home/user/.cache/huggingface — it
|
| 24 |
+
# survives container restarts when persistent storage is mounted there.
|
| 25 |
+
RUN useradd -m -u 1000 user
|
| 26 |
+
ENV HOME=/home/user \
|
| 27 |
+
PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:/bin \
|
| 28 |
+
PYTHONUNBUFFERED=1 \
|
| 29 |
+
HF_HOME=/home/user/.cache/huggingface \
|
| 30 |
+
OLLAMA_HOST=127.0.0.1:11434 \
|
| 31 |
+
OLLAMA_NUM_PARALLEL=2 \
|
| 32 |
+
OLLAMA_KEEP_ALIVE=24h
|
| 33 |
+
|
| 34 |
+
# Install Ollama (single-binary install)
|
| 35 |
+
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 36 |
+
|
| 37 |
+
WORKDIR /home/user/app
|
| 38 |
+
|
| 39 |
+
# Python deps (cache the layer)
|
| 40 |
+
COPY --chown=user:user requirements.txt ./
|
| 41 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 42 |
+
pip install --no-cache-dir -r requirements.txt
|
| 43 |
+
|
| 44 |
+
# Pull Granite 4.1:3b into the image. The official Ollama installer
|
| 45 |
+
# stores models under /usr/share/ollama/.ollama by default; we point at a
|
| 46 |
+
# user-writable location so the runtime container can also serve.
|
| 47 |
+
ENV OLLAMA_MODELS=/home/user/.ollama/models
|
| 48 |
+
RUN mkdir -p $OLLAMA_MODELS && \
|
| 49 |
+
(ollama serve &) && \
|
| 50 |
+
sleep 3 && \
|
| 51 |
+
ollama pull granite4.1:3b && \
|
| 52 |
+
sleep 1 && \
|
| 53 |
+
pkill -f "ollama serve" || true
|
| 54 |
+
|
| 55 |
+
# App code + fixtures
|
| 56 |
+
COPY --chown=user:user app/ ./app/
|
| 57 |
+
COPY --chown=user:user web/ ./web/
|
| 58 |
+
COPY --chown=user:user scripts/ ./scripts/
|
| 59 |
+
COPY --chown=user:user data/ ./data/
|
| 60 |
+
COPY --chown=user:user corpus/ ./corpus/
|
| 61 |
+
COPY --chown=user:user agent.py helios_nyc.py ./
|
| 62 |
+
COPY --chown=user:user entrypoint.sh ./
|
| 63 |
+
RUN chmod +x ./entrypoint.sh
|
| 64 |
+
|
| 65 |
+
# Hand off to a non-root user the way HF Spaces expects
|
| 66 |
+
RUN chown -R user:user /home/user
|
| 67 |
+
USER user
|
| 68 |
+
|
| 69 |
+
EXPOSE 7860
|
| 70 |
+
CMD ["./entrypoint.sh"]
|
entrypoint.sh
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env sh
|
| 2 |
+
# Start Ollama daemon in the background, wait for it to be ready,
|
| 3 |
+
# then launch uvicorn on the HF Spaces default port.
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
ollama serve > /tmp/ollama.log 2>&1 &
|
| 7 |
+
OLLAMA_PID=$!
|
| 8 |
+
|
| 9 |
+
# Wait for Ollama to be reachable (up to 30 s)
|
| 10 |
+
for i in $(seq 1 30); do
|
| 11 |
+
if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then
|
| 12 |
+
echo "[entrypoint] ollama up (pid $OLLAMA_PID)"
|
| 13 |
+
break
|
| 14 |
+
fi
|
| 15 |
+
sleep 1
|
| 16 |
+
done
|
| 17 |
+
|
| 18 |
+
# Sanity check: Granite 4.1 model is present
|
| 19 |
+
ollama list | grep -q "granite4.1:3b" || {
|
| 20 |
+
echo "[entrypoint] WARNING: granite4.1:3b not found in ollama; pulling..."
|
| 21 |
+
ollama pull granite4.1:3b || true
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
exec uvicorn web.main:app --host 0.0.0.0 --port 7860 --log-level info
|
helios_nyc.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HeliOS-NYC — flood exposure register driver.
|
| 2 |
+
|
| 3 |
+
Phase 1 MVP: schools × (Sandy + DEP Stormwater scenarios) → ranked CSV.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import argparse
|
| 8 |
+
import sys
|
| 9 |
+
import warnings
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
warnings.filterwarnings("ignore")
|
| 13 |
+
|
| 14 |
+
import pandas as pd # noqa: E402
|
| 15 |
+
|
| 16 |
+
from app.assets import schools # noqa: E402
|
| 17 |
+
from app.flood_layers import dep_stormwater, sandy_inundation # noqa: E402
|
| 18 |
+
from app.score import WEIGHTS, score_frame # noqa: E402
|
| 19 |
+
|
| 20 |
+
OUT = Path(__file__).resolve().parent / "outputs"
|
| 21 |
+
OUT.mkdir(exist_ok=True)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def build_schools_register() -> pd.DataFrame:
|
| 25 |
+
print("loading schools...", file=sys.stderr)
|
| 26 |
+
s = schools.load()
|
| 27 |
+
print(f" {len(s)} schools loaded", file=sys.stderr)
|
| 28 |
+
|
| 29 |
+
print("joining Sandy Inundation Zone...", file=sys.stderr)
|
| 30 |
+
s["sandy"] = sandy_inundation.join(s).astype(int)
|
| 31 |
+
print(f" {int(s['sandy'].sum())} schools inside Sandy zone", file=sys.stderr)
|
| 32 |
+
|
| 33 |
+
for scen in ["dep_extreme_2080", "dep_moderate_2050", "dep_moderate_current"]:
|
| 34 |
+
print(f"joining {scen}...", file=sys.stderr)
|
| 35 |
+
j = dep_stormwater.join(s, scen)
|
| 36 |
+
s[scen] = (j["depth_class"] > 0).astype(int)
|
| 37 |
+
s[f"{scen}_depth_class"] = j["depth_class"].values
|
| 38 |
+
s[f"{scen}_depth_label"] = j["depth_label"].values
|
| 39 |
+
print(f" {int(s[scen].sum())} schools inside {scen}", file=sys.stderr)
|
| 40 |
+
|
| 41 |
+
s = score_frame(s)
|
| 42 |
+
|
| 43 |
+
# drop geometry for CSV; keep lat/lon for journalist usability
|
| 44 |
+
s["lat"] = s.geometry.to_crs("EPSG:4326").y
|
| 45 |
+
s["lon"] = s.geometry.to_crs("EPSG:4326").x
|
| 46 |
+
cols = ["loc_code", "name", "address", "borough", "bbl", "bin",
|
| 47 |
+
"geo_district", "lat", "lon",
|
| 48 |
+
"sandy",
|
| 49 |
+
"dep_extreme_2080", "dep_extreme_2080_depth_label",
|
| 50 |
+
"dep_moderate_2050", "dep_moderate_2050_depth_label",
|
| 51 |
+
"dep_moderate_current", "dep_moderate_current_depth_label",
|
| 52 |
+
"score", "tier"]
|
| 53 |
+
return pd.DataFrame(s[cols])
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def main() -> int:
|
| 57 |
+
ap = argparse.ArgumentParser(description="HeliOS-NYC flood exposure register")
|
| 58 |
+
ap.add_argument("--asset-class", default="schools")
|
| 59 |
+
ap.add_argument("--out", default=None)
|
| 60 |
+
ap.add_argument("--top", type=int, default=20, help="rows to print to stdout")
|
| 61 |
+
args = ap.parse_args()
|
| 62 |
+
|
| 63 |
+
if args.asset_class != "schools":
|
| 64 |
+
print(f"asset class '{args.asset_class}' not yet implemented", file=sys.stderr)
|
| 65 |
+
return 2
|
| 66 |
+
|
| 67 |
+
df = build_schools_register()
|
| 68 |
+
df = df.sort_values(["score", "name"], ascending=[False, True])
|
| 69 |
+
|
| 70 |
+
out_path = Path(args.out) if args.out else OUT / "schools_register.csv"
|
| 71 |
+
df.to_csv(out_path, index=False)
|
| 72 |
+
print(f"\nwrote {len(df)} rows -> {out_path}", file=sys.stderr)
|
| 73 |
+
|
| 74 |
+
print(f"\n=== top {args.top} ===")
|
| 75 |
+
print(df.head(args.top).to_string(index=False))
|
| 76 |
+
|
| 77 |
+
print("\n=== tier distribution ===")
|
| 78 |
+
print(df["tier"].value_counts().sort_index().to_string())
|
| 79 |
+
|
| 80 |
+
print("\n=== signal totals ===")
|
| 81 |
+
for k in WEIGHTS:
|
| 82 |
+
if k in df.columns:
|
| 83 |
+
print(f" {k:24s}: {int(df[k].sum()):4d} schools")
|
| 84 |
+
|
| 85 |
+
return 0
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
sys.exit(main())
|