seriffic commited on
Commit
4fe675b
·
1 Parent(s): dbf7a0e

Dockerfile + entrypoint for Hugging Face Spaces

Browse files

The first deployable image: Python 3.12 base, full pip deps,
Ollama + granite4.1:3b baked in (~10 GB total), entrypoint starts
Ollama daemon in the background then uvicorn on port 7860.

helios_nyc.py is a small CLI probe for sanity-checking the
container locally — it walks the FSM end-to-end against a single
hard-coded address before any web traffic. Will get scrubbed once
agent.py covers the same role.

Files changed (3) hide show
  1. Dockerfile +70 -0
  2. entrypoint.sh +24 -0
  3. helios_nyc.py +89 -0
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Riprap — Hugging Face Spaces (Docker SDK) deployment.
2
+ #
3
+ # Bakes:
4
+ # - Python 3.12 + pip deps (~2.5 GB once torch is in)
5
+ # - Ollama + granite4.1:3b model (~2 GB)
6
+ # - All pre-computed fixtures in data/ + corpus/
7
+ #
8
+ # Runtime:
9
+ # - Ollama daemon serves Granite 4.1
10
+ # - Granite Embedding 278M auto-downloads via sentence-transformers
11
+ # on first FastAPI startup (~280 MB) — cached to /home/user/.cache
12
+ # - uvicorn FastAPI on port 7860 (HF default)
13
+
14
+ FROM python:3.12-slim AS base
15
+
16
+ # OS deps for geo libs + curl for ollama installer
17
+ RUN apt-get update && apt-get install -y --no-install-recommends \
18
+ curl ca-certificates \
19
+ gdal-bin libgdal-dev libgeos-dev libproj-dev \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # HF Spaces convention: run as a non-root "user" account at /home/user/app.
23
+ # Granite Embedding cache lives in /home/user/.cache/huggingface — it
24
+ # survives container restarts when persistent storage is mounted there.
25
+ RUN useradd -m -u 1000 user
26
+ ENV HOME=/home/user \
27
+ PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:/bin \
28
+ PYTHONUNBUFFERED=1 \
29
+ HF_HOME=/home/user/.cache/huggingface \
30
+ OLLAMA_HOST=127.0.0.1:11434 \
31
+ OLLAMA_NUM_PARALLEL=2 \
32
+ OLLAMA_KEEP_ALIVE=24h
33
+
34
+ # Install Ollama (single-binary install)
35
+ RUN curl -fsSL https://ollama.com/install.sh | sh
36
+
37
+ WORKDIR /home/user/app
38
+
39
+ # Python deps (cache the layer)
40
+ COPY --chown=user:user requirements.txt ./
41
+ RUN pip install --no-cache-dir --upgrade pip && \
42
+ pip install --no-cache-dir -r requirements.txt
43
+
44
+ # Pull Granite 4.1:3b into the image. The official Ollama installer
45
+ # stores models under /usr/share/ollama/.ollama by default; we point at a
46
+ # user-writable location so the runtime container can also serve.
47
+ ENV OLLAMA_MODELS=/home/user/.ollama/models
48
+ RUN mkdir -p $OLLAMA_MODELS && \
49
+ (ollama serve &) && \
50
+ sleep 3 && \
51
+ ollama pull granite4.1:3b && \
52
+ sleep 1 && \
53
+ pkill -f "ollama serve" || true
54
+
55
+ # App code + fixtures
56
+ COPY --chown=user:user app/ ./app/
57
+ COPY --chown=user:user web/ ./web/
58
+ COPY --chown=user:user scripts/ ./scripts/
59
+ COPY --chown=user:user data/ ./data/
60
+ COPY --chown=user:user corpus/ ./corpus/
61
+ COPY --chown=user:user agent.py helios_nyc.py ./
62
+ COPY --chown=user:user entrypoint.sh ./
63
+ RUN chmod +x ./entrypoint.sh
64
+
65
+ # Hand off to a non-root user the way HF Spaces expects
66
+ RUN chown -R user:user /home/user
67
+ USER user
68
+
69
+ EXPOSE 7860
70
+ CMD ["./entrypoint.sh"]
entrypoint.sh ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env sh
2
+ # Start Ollama daemon in the background, wait for it to be ready,
3
+ # then launch uvicorn on the HF Spaces default port.
4
+ set -e
5
+
6
+ ollama serve > /tmp/ollama.log 2>&1 &
7
+ OLLAMA_PID=$!
8
+
9
+ # Wait for Ollama to be reachable (up to 30 s)
10
+ for i in $(seq 1 30); do
11
+ if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then
12
+ echo "[entrypoint] ollama up (pid $OLLAMA_PID)"
13
+ break
14
+ fi
15
+ sleep 1
16
+ done
17
+
18
+ # Sanity check: Granite 4.1 model is present
19
+ ollama list | grep -q "granite4.1:3b" || {
20
+ echo "[entrypoint] WARNING: granite4.1:3b not found in ollama; pulling..."
21
+ ollama pull granite4.1:3b || true
22
+ }
23
+
24
+ exec uvicorn web.main:app --host 0.0.0.0 --port 7860 --log-level info
helios_nyc.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HeliOS-NYC — flood exposure register driver.
2
+
3
+ Phase 1 MVP: schools × (Sandy + DEP Stormwater scenarios) → ranked CSV.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import sys
9
+ import warnings
10
+ from pathlib import Path
11
+
12
+ warnings.filterwarnings("ignore")
13
+
14
+ import pandas as pd # noqa: E402
15
+
16
+ from app.assets import schools # noqa: E402
17
+ from app.flood_layers import dep_stormwater, sandy_inundation # noqa: E402
18
+ from app.score import WEIGHTS, score_frame # noqa: E402
19
+
20
+ OUT = Path(__file__).resolve().parent / "outputs"
21
+ OUT.mkdir(exist_ok=True)
22
+
23
+
24
+ def build_schools_register() -> pd.DataFrame:
25
+ print("loading schools...", file=sys.stderr)
26
+ s = schools.load()
27
+ print(f" {len(s)} schools loaded", file=sys.stderr)
28
+
29
+ print("joining Sandy Inundation Zone...", file=sys.stderr)
30
+ s["sandy"] = sandy_inundation.join(s).astype(int)
31
+ print(f" {int(s['sandy'].sum())} schools inside Sandy zone", file=sys.stderr)
32
+
33
+ for scen in ["dep_extreme_2080", "dep_moderate_2050", "dep_moderate_current"]:
34
+ print(f"joining {scen}...", file=sys.stderr)
35
+ j = dep_stormwater.join(s, scen)
36
+ s[scen] = (j["depth_class"] > 0).astype(int)
37
+ s[f"{scen}_depth_class"] = j["depth_class"].values
38
+ s[f"{scen}_depth_label"] = j["depth_label"].values
39
+ print(f" {int(s[scen].sum())} schools inside {scen}", file=sys.stderr)
40
+
41
+ s = score_frame(s)
42
+
43
+ # drop geometry for CSV; keep lat/lon for journalist usability
44
+ s["lat"] = s.geometry.to_crs("EPSG:4326").y
45
+ s["lon"] = s.geometry.to_crs("EPSG:4326").x
46
+ cols = ["loc_code", "name", "address", "borough", "bbl", "bin",
47
+ "geo_district", "lat", "lon",
48
+ "sandy",
49
+ "dep_extreme_2080", "dep_extreme_2080_depth_label",
50
+ "dep_moderate_2050", "dep_moderate_2050_depth_label",
51
+ "dep_moderate_current", "dep_moderate_current_depth_label",
52
+ "score", "tier"]
53
+ return pd.DataFrame(s[cols])
54
+
55
+
56
+ def main() -> int:
57
+ ap = argparse.ArgumentParser(description="HeliOS-NYC flood exposure register")
58
+ ap.add_argument("--asset-class", default="schools")
59
+ ap.add_argument("--out", default=None)
60
+ ap.add_argument("--top", type=int, default=20, help="rows to print to stdout")
61
+ args = ap.parse_args()
62
+
63
+ if args.asset_class != "schools":
64
+ print(f"asset class '{args.asset_class}' not yet implemented", file=sys.stderr)
65
+ return 2
66
+
67
+ df = build_schools_register()
68
+ df = df.sort_values(["score", "name"], ascending=[False, True])
69
+
70
+ out_path = Path(args.out) if args.out else OUT / "schools_register.csv"
71
+ df.to_csv(out_path, index=False)
72
+ print(f"\nwrote {len(df)} rows -> {out_path}", file=sys.stderr)
73
+
74
+ print(f"\n=== top {args.top} ===")
75
+ print(df.head(args.top).to_string(index=False))
76
+
77
+ print("\n=== tier distribution ===")
78
+ print(df["tier"].value_counts().sort_index().to_string())
79
+
80
+ print("\n=== signal totals ===")
81
+ for k in WEIGHTS:
82
+ if k in df.columns:
83
+ print(f" {k:24s}: {int(df[k].sum()):4d} schools")
84
+
85
+ return 0
86
+
87
+
88
+ if __name__ == "__main__":
89
+ sys.exit(main())