Offline pre-compute scripts
Browse filesThe fixtures Riprap ships baked all start their lives in scripts/:
fetch_nyc_dem β pull USGS 3DEP tiles, mosaic to NYC bbox
compute_hydrology_indices β TWI + HAND from the DEM via whitebox
fetch_ida_hwms β STN API β ida_2021_hwms_ny.geojson
run_prithvi_flood β Prithvi-EO 2.0 segmentation runner
run_prithvi_ida β Ida-specific pre/post diff variant
build_{nycha,schools,mta_entrances}_register
β full FSM pass over each register's
rows, JSON cached to data/registers/
audit β full evidence audit per query for the
printable report
dry_run β end-to-end CLI smoke without uvicorn
These run offline, on a fat workstation; only their outputs land in
the runtime image.
- scripts/audit.py +159 -0
- scripts/build_mta_entrances_register.py +24 -0
- scripts/build_nycha_register.py +21 -0
- scripts/build_schools_register.py +23 -0
- scripts/compute_hydrology_indices.py +92 -0
- scripts/dry_run.py +127 -0
- scripts/fetch_ida_hwms.py +54 -0
- scripts/fetch_nyc_dem.py +50 -0
- scripts/run_prithvi_flood.py +172 -0
- scripts/run_prithvi_ida.py +214 -0
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hallucination audit harness.
|
| 2 |
+
|
| 3 |
+
Runs the FSM against a curated address sweep, logs every paragraph,
|
| 4 |
+
counts dropped sentences, flags any sentence with an event name not in
|
| 5 |
+
its source documents.
|
| 6 |
+
|
| 7 |
+
Run after the schools register has finished building (otherwise it
|
| 8 |
+
contends with the batch for Ollama).
|
| 9 |
+
|
| 10 |
+
python scripts/audit.py
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import re
|
| 16 |
+
import sys
|
| 17 |
+
import time
|
| 18 |
+
import warnings
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
warnings.filterwarnings("ignore")
|
| 22 |
+
|
| 23 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 24 |
+
sys.path.insert(0, str(ROOT))
|
| 25 |
+
|
| 26 |
+
from app.fsm import run # noqa: E402
|
| 27 |
+
|
| 28 |
+
OUT = ROOT / "outputs" / "audit_log.jsonl"
|
| 29 |
+
OUT.parent.mkdir(exist_ok=True, parents=True)
|
| 30 |
+
|
| 31 |
+
# A curated cross-borough sweep covering the full range of conditions
|
| 32 |
+
ADDRESSES = [
|
| 33 |
+
# Far Rockaway / Sandy zone (everything fires)
|
| 34 |
+
"180 Beach 35 St, Queens",
|
| 35 |
+
"Beach 105 Street and Rockaway Boulevard, Queens",
|
| 36 |
+
|
| 37 |
+
# Hollis / Jamaica (Ida basement deaths)
|
| 38 |
+
"153-09 90 Avenue, Jamaica, Queens",
|
| 39 |
+
"Hollis Avenue and 200th Street, Queens",
|
| 40 |
+
|
| 41 |
+
# Brooklyn coastal β Coney Island / NYCHA
|
| 42 |
+
"2950 W 25 Street, Brooklyn",
|
| 43 |
+
"Surf Avenue and West 25 Street, Brooklyn",
|
| 44 |
+
"Sheepshead Bay Road, Brooklyn",
|
| 45 |
+
|
| 46 |
+
# Carroll Gardens / Gowanus (chronic flooding)
|
| 47 |
+
"Smith and 9 Street, Brooklyn",
|
| 48 |
+
"Carroll Street and 3 Avenue, Brooklyn",
|
| 49 |
+
|
| 50 |
+
# Lower Manhattan / Sandy zone
|
| 51 |
+
"280 Broome Street, Manhattan",
|
| 52 |
+
"South Street Seaport, Manhattan",
|
| 53 |
+
"Battery Park, Manhattan",
|
| 54 |
+
|
| 55 |
+
# Midtown / dry control
|
| 56 |
+
"350 5 Avenue, Manhattan", # Empire State
|
| 57 |
+
"1 Times Square, Manhattan",
|
| 58 |
+
"Lincoln Center, Manhattan",
|
| 59 |
+
|
| 60 |
+
# Bronx
|
| 61 |
+
"Pelham Bay Park, Bronx",
|
| 62 |
+
"Hunts Point, Bronx",
|
| 63 |
+
"Yankee Stadium, Bronx",
|
| 64 |
+
|
| 65 |
+
# Staten Island
|
| 66 |
+
"Tottenville, Staten Island",
|
| 67 |
+
"Great Kills, Staten Island",
|
| 68 |
+
"St. George Ferry Terminal, Staten Island",
|
| 69 |
+
|
| 70 |
+
# Queens dry / inland
|
| 71 |
+
"Forest Hills, Queens",
|
| 72 |
+
"JFK Airport, Queens",
|
| 73 |
+
"Astoria Park, Queens",
|
| 74 |
+
|
| 75 |
+
# Edge cases
|
| 76 |
+
"Brooklyn Bridge Park, Brooklyn",
|
| 77 |
+
"Roosevelt Island, Manhattan",
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
EVENT_NAMES = ["sandy", "ida", "ophelia", "henri", "irene", "isaias",
|
| 81 |
+
"harvey", "katrina", "florence"]
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def find_event_leaks(paragraph: str, doc_corpus: str) -> list[str]:
|
| 85 |
+
leaks = []
|
| 86 |
+
p = paragraph.lower()
|
| 87 |
+
for ev in EVENT_NAMES:
|
| 88 |
+
if ev in p and ev not in doc_corpus.lower():
|
| 89 |
+
leaks.append(ev)
|
| 90 |
+
return leaks
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def main() -> int:
|
| 94 |
+
if OUT.exists():
|
| 95 |
+
OUT.unlink()
|
| 96 |
+
print(f"running audit on {len(ADDRESSES)} addresses; logging to {OUT}",
|
| 97 |
+
file=sys.stderr)
|
| 98 |
+
|
| 99 |
+
summary = {
|
| 100 |
+
"total": 0, "ok": 0, "dropped_total": 0,
|
| 101 |
+
"with_drops": 0, "event_leaks": 0,
|
| 102 |
+
}
|
| 103 |
+
t0 = time.time()
|
| 104 |
+
for q in ADDRESSES:
|
| 105 |
+
try:
|
| 106 |
+
r = run(q)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f" ! {q[:50]:<50} ERR: {type(e).__name__}: {e}", file=sys.stderr)
|
| 109 |
+
continue
|
| 110 |
+
para = r.get("paragraph") or ""
|
| 111 |
+
audit = r.get("audit") or {}
|
| 112 |
+
dropped = audit.get("dropped", []) or []
|
| 113 |
+
|
| 114 |
+
# rebuild a haystack from documents we sent to Granite
|
| 115 |
+
from app.reconcile import build_documents
|
| 116 |
+
# NOTE: build_documents needs the same snap shape the FSM stored
|
| 117 |
+
snap = {k: r.get(k) for k in ("geocode","sandy","dep","floodnet",
|
| 118 |
+
"nyc311","microtopo","ida_hwm","rag")}
|
| 119 |
+
doc_msgs = build_documents(snap)
|
| 120 |
+
haystack = "\n".join(m.get("content", "") for m in doc_msgs)
|
| 121 |
+
|
| 122 |
+
leaks = find_event_leaks(para, haystack)
|
| 123 |
+
|
| 124 |
+
rec = {
|
| 125 |
+
"query": q,
|
| 126 |
+
"address": (r.get("geocode") or {}).get("address"),
|
| 127 |
+
"borough": (r.get("geocode") or {}).get("borough"),
|
| 128 |
+
"paragraph": para,
|
| 129 |
+
"raw": audit.get("raw"),
|
| 130 |
+
"dropped": dropped,
|
| 131 |
+
"event_leaks": leaks,
|
| 132 |
+
"sandy": r.get("sandy"),
|
| 133 |
+
"n_floodnet_events_3y": (r.get("floodnet") or {}).get("n_flood_events_3y", 0),
|
| 134 |
+
"n_311": (r.get("nyc311") or {}).get("n", 0),
|
| 135 |
+
"microtopo_pct_200m": (r.get("microtopo") or {}).get("rel_elev_pct_200m"),
|
| 136 |
+
}
|
| 137 |
+
with OUT.open("a") as f:
|
| 138 |
+
f.write(json.dumps(rec, default=str) + "\n")
|
| 139 |
+
|
| 140 |
+
summary["total"] += 1
|
| 141 |
+
summary["dropped_total"] += len(dropped)
|
| 142 |
+
if dropped: summary["with_drops"] += 1
|
| 143 |
+
if leaks: summary["event_leaks"] += 1
|
| 144 |
+
if not leaks and not dropped: summary["ok"] += 1
|
| 145 |
+
|
| 146 |
+
marker = "β" if (not leaks and not dropped) else ("β " if dropped or leaks else "Β·")
|
| 147 |
+
print(f" {marker} {q[:50]:<50} dropped={len(dropped)} leaks={leaks or '-'}",
|
| 148 |
+
file=sys.stderr)
|
| 149 |
+
|
| 150 |
+
elapsed = time.time() - t0
|
| 151 |
+
print(f"\n=== SUMMARY (in {elapsed:.0f}s) ===", file=sys.stderr)
|
| 152 |
+
for k, v in summary.items():
|
| 153 |
+
print(f" {k:18s} {v}", file=sys.stderr)
|
| 154 |
+
print(f"\nfull log: {OUT}", file=sys.stderr)
|
| 155 |
+
return 0
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
sys.exit(main())
|
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-compute the MTA Subway Entrances flood-exposure register.
|
| 2 |
+
|
| 3 |
+
Run: python scripts/build_mta_entrances_register.py
|
| 4 |
+
|
| 5 |
+
Resume-safe: re-running picks up after a network blip.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import sys
|
| 10 |
+
import warnings
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
warnings.filterwarnings("ignore")
|
| 14 |
+
|
| 15 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 16 |
+
sys.path.insert(0, str(ROOT))
|
| 17 |
+
|
| 18 |
+
from app.assets import mta_entrances # noqa: E402
|
| 19 |
+
from app.register_builder import build_register # noqa: E402
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
build_register("mta_entrances", mta_entrances.load,
|
| 24 |
+
meta_keys=("name", "address", "borough", "entrance_type"))
|
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-compute the NYCHA developments flood-exposure register.
|
| 2 |
+
Run: python scripts/build_nycha_register.py
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import warnings
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
warnings.filterwarnings("ignore")
|
| 11 |
+
|
| 12 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 13 |
+
sys.path.insert(0, str(ROOT))
|
| 14 |
+
|
| 15 |
+
from app.assets import nycha # noqa: E402
|
| 16 |
+
from app.register_builder import build_register # noqa: E402
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
if __name__ == "__main__":
|
| 20 |
+
build_register("nycha", nycha.load,
|
| 21 |
+
meta_keys=("name", "address", "borough", "tds_num"))
|
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-compute the NYC public schools flood-exposure register.
|
| 2 |
+
Run: python scripts/build_schools_register.py
|
| 3 |
+
|
| 4 |
+
Resume-safe: re-running picks up after a network blip.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import sys
|
| 9 |
+
import warnings
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
warnings.filterwarnings("ignore")
|
| 13 |
+
|
| 14 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 15 |
+
sys.path.insert(0, str(ROOT))
|
| 16 |
+
|
| 17 |
+
from app.assets import schools # noqa: E402
|
| 18 |
+
from app.register_builder import build_register # noqa: E402
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
build_register("schools", schools.load,
|
| 23 |
+
meta_keys=("name", "address", "borough", "bbl", "bin"))
|
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-compute TWI (Topographic Wetness Index) and HAND (Height Above
|
| 2 |
+
Nearest Drainage) for the cached NYC DEM.
|
| 3 |
+
|
| 4 |
+
These are standard hydrology indices used by InfoWorks ICM, HEC-RAS,
|
| 5 |
+
and the Forest Service / USGS. They give the microtopo specialist new
|
| 6 |
+
per-address signal beyond elevation percentile + relief:
|
| 7 |
+
|
| 8 |
+
- **TWI** = ln(specific_catchment_area / tan(slope)). HIGH values mean
|
| 9 |
+
a cell is saturation-prone (large upslope drainage area + low slope =
|
| 10 |
+
water accumulates here).
|
| 11 |
+
- **HAND** = vertical distance from each cell to the nearest channel.
|
| 12 |
+
LOW values (sub-meter) mean the address sits at or near drainage
|
| 13 |
+
level β flood-vulnerable. HIGH values mean it's perched on dry ground.
|
| 14 |
+
|
| 15 |
+
Output: data/twi.tif and data/hand.tif, aligned with data/nyc_dem_30m.tif.
|
| 16 |
+
|
| 17 |
+
Run: python scripts/compute_hydrology_indices.py
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import sys
|
| 22 |
+
import warnings
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
|
| 25 |
+
warnings.filterwarnings("ignore")
|
| 26 |
+
|
| 27 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 28 |
+
DEM_PATH = ROOT / "data" / "nyc_dem_30m.tif"
|
| 29 |
+
TWI_OUT = ROOT / "data" / "twi.tif"
|
| 30 |
+
HAND_OUT = ROOT / "data" / "hand.tif"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def main() -> int:
|
| 34 |
+
if not DEM_PATH.exists():
|
| 35 |
+
print(f"missing {DEM_PATH}; run scripts/fetch_nyc_dem.py first",
|
| 36 |
+
file=sys.stderr)
|
| 37 |
+
return 1
|
| 38 |
+
if TWI_OUT.exists() and HAND_OUT.exists():
|
| 39 |
+
print(f"already exist: {TWI_OUT.name}, {HAND_OUT.name}", file=sys.stderr)
|
| 40 |
+
return 0
|
| 41 |
+
|
| 42 |
+
import whitebox_workflows as wbw
|
| 43 |
+
wbe = wbw.WbEnvironment()
|
| 44 |
+
wbe.verbose = True
|
| 45 |
+
wbe.working_directory = str(ROOT / "data")
|
| 46 |
+
|
| 47 |
+
print("loading DEM...", file=sys.stderr)
|
| 48 |
+
dem = wbe.read_raster(str(DEM_PATH))
|
| 49 |
+
|
| 50 |
+
# 1. Hydrologic conditioning β fill depressions so flow routes terminate
|
| 51 |
+
# at the boundary, not inside spurious sinks. Wang & Liu fill is fast.
|
| 52 |
+
print("filling depressions (Wang & Liu)...", file=sys.stderr)
|
| 53 |
+
dem_filled = wbe.fill_depressions_wang_and_liu(dem)
|
| 54 |
+
|
| 55 |
+
# 2. D-infinity flow accumulation -> specific catchment area for TWI
|
| 56 |
+
print("D-infinity flow accumulation...", file=sys.stderr)
|
| 57 |
+
sca = wbe.dinf_flow_accum(dem_filled, out_type="specific contributing area",
|
| 58 |
+
log_transform=False)
|
| 59 |
+
|
| 60 |
+
# 3. Slope (degrees) for TWI
|
| 61 |
+
print("slope...", file=sys.stderr)
|
| 62 |
+
slope = wbe.slope(dem_filled, units="degrees")
|
| 63 |
+
|
| 64 |
+
# 4. TWI = ln(SCA / tan(slope))
|
| 65 |
+
print("TWI...", file=sys.stderr)
|
| 66 |
+
twi = wbe.wetness_index(sca, slope)
|
| 67 |
+
wbe.write_raster(twi, str(TWI_OUT.name), compress=True)
|
| 68 |
+
|
| 69 |
+
# 5. Streams: D8 flow accumulation + threshold to a stream raster
|
| 70 |
+
print("D8 flow accumulation for stream extraction...", file=sys.stderr)
|
| 71 |
+
d8_accum = wbe.d8_flow_accum(dem_filled, out_type="cells",
|
| 72 |
+
log_transform=False)
|
| 73 |
+
|
| 74 |
+
# Threshold the flow accumulation to identify channels β pick a value that
|
| 75 |
+
# gives a reasonable drainage network density. For 30m DEM over NYC,
|
| 76 |
+
# >1500 cells (~1.35 kmΒ²) is a reasonable channel-initiation threshold.
|
| 77 |
+
print("extracting streams...", file=sys.stderr)
|
| 78 |
+
streams = wbe.extract_streams(d8_accum, threshold=1500.0)
|
| 79 |
+
|
| 80 |
+
# 6. HAND = vertical distance to nearest stream (along flow paths)
|
| 81 |
+
print("HAND (elevation_above_stream)...", file=sys.stderr)
|
| 82 |
+
hand = wbe.elevation_above_stream(dem_filled, streams)
|
| 83 |
+
wbe.write_raster(hand, str(HAND_OUT.name), compress=True)
|
| 84 |
+
|
| 85 |
+
print(f"\nwrote:\n {TWI_OUT} ({TWI_OUT.stat().st_size // 1024} KB)\n"
|
| 86 |
+
f" {HAND_OUT} ({HAND_OUT.stat().st_size // 1024} KB)",
|
| 87 |
+
file=sys.stderr)
|
| 88 |
+
return 0
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
sys.exit(main())
|
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Quick end-to-end sanity check.
|
| 2 |
+
|
| 3 |
+
Exercises every public route once and prints a summary. Catches:
|
| 4 |
+
- 404/500s on routes
|
| 5 |
+
- missing static assets
|
| 6 |
+
- broken /api/stream or /api/compare SSE
|
| 7 |
+
- missing register data
|
| 8 |
+
- hallucination drops > N
|
| 9 |
+
|
| 10 |
+
Run while the server is up:
|
| 11 |
+
python scripts/dry_run.py
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import json
|
| 16 |
+
import sys
|
| 17 |
+
import time
|
| 18 |
+
|
| 19 |
+
import httpx
|
| 20 |
+
|
| 21 |
+
BASE = "http://127.0.0.1:8765"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def check(label: str, fn):
|
| 25 |
+
t0 = time.time()
|
| 26 |
+
try:
|
| 27 |
+
ok, detail = fn()
|
| 28 |
+
elapsed = time.time() - t0
|
| 29 |
+
marker = "β" if ok else "β"
|
| 30 |
+
print(f" {marker} {label:42s} ({elapsed:5.2f}s) {detail}")
|
| 31 |
+
return ok
|
| 32 |
+
except Exception as e:
|
| 33 |
+
elapsed = time.time() - t0
|
| 34 |
+
print(f" β {label:42s} ({elapsed:5.2f}s) EXCEPTION: {type(e).__name__}: {e}")
|
| 35 |
+
return False
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_status(path: str) -> tuple[bool, str]:
|
| 39 |
+
r = httpx.get(BASE + path, timeout=10)
|
| 40 |
+
return r.status_code == 200, f"HTTP {r.status_code} ({len(r.content)} bytes)"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def stream_one(query: str) -> tuple[bool, str]:
|
| 44 |
+
with httpx.stream("GET", BASE + f"/api/stream?q={query}", timeout=120) as r:
|
| 45 |
+
if r.status_code != 200:
|
| 46 |
+
return False, f"HTTP {r.status_code}"
|
| 47 |
+
steps = 0; final = None
|
| 48 |
+
for line in r.iter_lines():
|
| 49 |
+
if line.startswith("data: "):
|
| 50 |
+
d = json.loads(line[6:])
|
| 51 |
+
if d.get("kind") == "step": steps += 1
|
| 52 |
+
elif d.get("kind") == "final": final = d
|
| 53 |
+
if not final:
|
| 54 |
+
return False, f"no final event (steps={steps})"
|
| 55 |
+
dropped = len(((final.get("audit") or {}).get("dropped") or []))
|
| 56 |
+
en = final.get("energy") or {}
|
| 57 |
+
return True, (f"steps={steps}, dropped={dropped}, "
|
| 58 |
+
f"energy={en.get('local_mwh','?')} mWh local")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def compare_one(a: str, b: str) -> tuple[bool, str]:
|
| 62 |
+
with httpx.stream("GET", BASE + f"/api/compare?a={a}&b={b}", timeout=120) as r:
|
| 63 |
+
if r.status_code != 200:
|
| 64 |
+
return False, f"HTTP {r.status_code}"
|
| 65 |
+
finals = {}
|
| 66 |
+
steps = 0
|
| 67 |
+
for line in r.iter_lines():
|
| 68 |
+
if line.startswith("data: "):
|
| 69 |
+
d = json.loads(line[6:])
|
| 70 |
+
if d.get("kind") == "step": steps += 1
|
| 71 |
+
elif d.get("kind") == "final": finals[d.get("side")] = d
|
| 72 |
+
if "a" not in finals or "b" not in finals:
|
| 73 |
+
return False, f"missing final (got {list(finals)})"
|
| 74 |
+
return True, f"both sides done; steps={steps}"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def register_check(asset_class: str) -> tuple[bool, str]:
|
| 78 |
+
r = httpx.get(BASE + f"/api/register/{asset_class}", timeout=10)
|
| 79 |
+
if r.status_code == 503:
|
| 80 |
+
return False, "register not built"
|
| 81 |
+
if r.status_code != 200:
|
| 82 |
+
return False, f"HTTP {r.status_code}"
|
| 83 |
+
data = r.json()
|
| 84 |
+
rows = data.get("rows", [])
|
| 85 |
+
tiers = {1: 0, 2: 0, 3: 0}
|
| 86 |
+
for r_ in rows:
|
| 87 |
+
tiers[r_.get("tier", 0)] = tiers.get(r_.get("tier", 0), 0) + 1
|
| 88 |
+
return True, f"{len(rows)} rows Β· tier1={tiers.get(1,0)} t2={tiers.get(2,0)} t3={tiers.get(3,0)}"
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def main():
|
| 92 |
+
print(f"=== Riprap dry-run vs {BASE} ===\n")
|
| 93 |
+
|
| 94 |
+
print("[Pages]")
|
| 95 |
+
check("/", lambda: get_status("/"))
|
| 96 |
+
check("/compare", lambda: get_status("/compare"))
|
| 97 |
+
check("/register/schools", lambda: get_status("/register/schools"))
|
| 98 |
+
check("/register/nycha", lambda: get_status("/register/nycha"))
|
| 99 |
+
check("/static/style.css", lambda: get_status("/static/style.css"))
|
| 100 |
+
check("/static/app.js", lambda: get_status("/static/app.js"))
|
| 101 |
+
check("/static/compare.js", lambda: get_status("/static/compare.js"))
|
| 102 |
+
check("/static/register.js",lambda: get_status("/static/register.js"))
|
| 103 |
+
fontf = "/static/vendor/nyco/fonts/IBM-Plex-Sans/IBMPlexSans-Regular.woff2"
|
| 104 |
+
check(fontf, lambda: get_status(fontf))
|
| 105 |
+
|
| 106 |
+
print("\n[API: layer endpoints]")
|
| 107 |
+
check("/api/layers/sandy", lambda: get_status("/api/layers/sandy?lat=40.59&lon=-73.77&r=1500"))
|
| 108 |
+
check("/api/layers/dep_extreme_2080",
|
| 109 |
+
lambda: get_status("/api/layers/dep_extreme_2080?lat=40.59&lon=-73.77&r=1500"))
|
| 110 |
+
check("/api/floodnet_near", lambda: get_status("/api/floodnet_near?lat=40.59&lon=-73.77&r=1000"))
|
| 111 |
+
|
| 112 |
+
print("\n[API: register endpoints]")
|
| 113 |
+
check("/api/register/schools", lambda: register_check("schools"))
|
| 114 |
+
check("/api/register/nycha", lambda: register_check("nycha"))
|
| 115 |
+
|
| 116 |
+
print("\n[Streams]")
|
| 117 |
+
check("stream Β· 180 Beach 35 St",
|
| 118 |
+
lambda: stream_one("180 Beach 35 St, Queens"))
|
| 119 |
+
check("stream Β· Empire State (cleaner case)",
|
| 120 |
+
lambda: stream_one("350 5 Avenue, Manhattan"))
|
| 121 |
+
check("compare Β· Hollis vs Empire State",
|
| 122 |
+
lambda: compare_one("153-09 90 Avenue Jamaica Queens",
|
| 123 |
+
"350 5 Avenue Manhattan"))
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
if __name__ == "__main__":
|
| 127 |
+
sys.exit(main())
|
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""One-shot fetch of NYC Hurricane Ida 2021 high-water marks from USGS STN.
|
| 2 |
+
|
| 3 |
+
Output: data/ida_2021_hwms_ny.geojson β point GeoJSON with elev_ft + site
|
| 4 |
+
metadata. Used by the Riprap agent's `step_ida_hwm` action as the
|
| 5 |
+
empirical post-event flood signal (the same role Prithvi-EO plays for
|
| 6 |
+
SAR-derived extents in the parent project).
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import sys
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
import httpx
|
| 15 |
+
|
| 16 |
+
OUT = Path(__file__).resolve().parent.parent / "data" / "ida_2021_hwms_ny.geojson"
|
| 17 |
+
URL = "https://stn.wim.usgs.gov/STNServices/HWMs/FilteredHWMs.json"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def main() -> int:
|
| 21 |
+
print("fetching USGS STN Ida 2021 NY HWMs...", file=sys.stderr)
|
| 22 |
+
r = httpx.get(URL, params={"Event": 312, "States": "NY"}, timeout=60)
|
| 23 |
+
r.raise_for_status()
|
| 24 |
+
data = r.json()
|
| 25 |
+
|
| 26 |
+
features = []
|
| 27 |
+
for d in data:
|
| 28 |
+
lat = d.get("latitude"); lon = d.get("longitude")
|
| 29 |
+
if lat is None or lon is None:
|
| 30 |
+
continue
|
| 31 |
+
features.append({
|
| 32 |
+
"type": "Feature",
|
| 33 |
+
"geometry": {"type": "Point", "coordinates": [lon, lat]},
|
| 34 |
+
"properties": {
|
| 35 |
+
"hwm_id": d.get("hwm_id"),
|
| 36 |
+
"site_no": d.get("site_no"),
|
| 37 |
+
"elev_ft": d.get("elev_ft"),
|
| 38 |
+
"height_above_gnd": d.get("height_above_gnd"),
|
| 39 |
+
"hwm_type": d.get("hwmTypeName"),
|
| 40 |
+
"hwm_quality": d.get("hwmQualityName"),
|
| 41 |
+
"county": d.get("countyName"),
|
| 42 |
+
"site_description": d.get("siteDescription"),
|
| 43 |
+
"waterbody": d.get("waterbody"),
|
| 44 |
+
},
|
| 45 |
+
})
|
| 46 |
+
OUT.parent.mkdir(exist_ok=True, parents=True)
|
| 47 |
+
OUT.write_text(json.dumps({"type": "FeatureCollection", "features": features}))
|
| 48 |
+
print(f"wrote {len(features)} HWMs -> {OUT} ({OUT.stat().st_size // 1024} KB)",
|
| 49 |
+
file=sys.stderr)
|
| 50 |
+
return 0
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
sys.exit(main())
|
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""One-shot fetch of an NYC-wide DEM for the microtopo specialist.
|
| 2 |
+
|
| 3 |
+
Run this once before launching the agent or web UI:
|
| 4 |
+
|
| 5 |
+
python scripts/fetch_nyc_dem.py
|
| 6 |
+
|
| 7 |
+
Output: data/nyc_dem_30m.tif (~few MB at 30 m, citywide).
|
| 8 |
+
We use 30 m resolution for the precomputed tile because at higher
|
| 9 |
+
resolution the file gets large and microtopo metrics (200/750 m
|
| 10 |
+
windows) don't need 10 m granularity.
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import sys
|
| 15 |
+
import warnings
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
warnings.filterwarnings("ignore")
|
| 19 |
+
|
| 20 |
+
import py3dep # noqa: E402
|
| 21 |
+
|
| 22 |
+
DATA = Path(__file__).resolve().parent.parent / "data"
|
| 23 |
+
OUT = DATA / "nyc_dem_30m.tif"
|
| 24 |
+
|
| 25 |
+
# NYC bbox (lon_min, lat_min, lon_max, lat_max) plus a bit of padding
|
| 26 |
+
NYC_BBOX = (-74.30, 40.45, -73.65, 40.95)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def main() -> int:
|
| 30 |
+
if OUT.exists():
|
| 31 |
+
print(f"already exists: {OUT}", file=sys.stderr)
|
| 32 |
+
return 0
|
| 33 |
+
DATA.mkdir(exist_ok=True, parents=True)
|
| 34 |
+
print(f"fetching NYC DEM @ 30 m for bbox {NYC_BBOX}", file=sys.stderr)
|
| 35 |
+
dem = py3dep.get_dem(NYC_BBOX, resolution=30)
|
| 36 |
+
print(f" shape: {dem.shape}", file=sys.stderr)
|
| 37 |
+
# Reproject to WGS84 if needed
|
| 38 |
+
try:
|
| 39 |
+
if dem.rio.crs and dem.rio.crs.to_epsg() != 4326:
|
| 40 |
+
dem = dem.rio.reproject("EPSG:4326")
|
| 41 |
+
print(" reprojected to EPSG:4326", file=sys.stderr)
|
| 42 |
+
except Exception:
|
| 43 |
+
pass
|
| 44 |
+
dem.rio.to_raster(str(OUT), compress="DEFLATE", dtype="float32")
|
| 45 |
+
print(f"wrote {OUT} ({OUT.stat().st_size // 1024} KB)", file=sys.stderr)
|
| 46 |
+
return 0
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
sys.exit(main())
|
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Run Prithvi-EO-2.0-300M-TL-Sen1Floods11 once on a low-cloud HLS scene
|
| 2 |
+
over NYC. Save the resulting water mask as a vectorized GeoJSON for use
|
| 3 |
+
as a Riprap flood-layer specialist.
|
| 4 |
+
|
| 5 |
+
This script defers to IBM's official inference.py (downloaded from the
|
| 6 |
+
model repo) rather than reimplementing the inference loop β that file
|
| 7 |
+
knows about the temporal/location-coord embeddings, the per-window
|
| 8 |
+
albumentations stack, and the upernet decoder output shape, all of
|
| 9 |
+
which are easy to get wrong.
|
| 10 |
+
|
| 11 |
+
python scripts/run_prithvi_flood.py
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import importlib.util
|
| 16 |
+
import json
|
| 17 |
+
import sys
|
| 18 |
+
import warnings
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
warnings.filterwarnings("ignore")
|
| 22 |
+
|
| 23 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 24 |
+
OUT_DIR = ROOT / "data"
|
| 25 |
+
OUT_DIR.mkdir(exist_ok=True, parents=True)
|
| 26 |
+
|
| 27 |
+
# NYC needs two MGRS tiles to cover everything:
|
| 28 |
+
# T18TWL covers Manhattan, Bronx, western Brooklyn, Newark Bay
|
| 29 |
+
# T18TXK covers eastern Brooklyn, Queens, Far Rockaway, Jamaica Bay, Long Island Sound
|
| 30 |
+
SCENES = [
|
| 31 |
+
("HLS.S30.T18TWL.2024247T153941.v2.0", "2024-09-04"), # 1% cloud, central NYC
|
| 32 |
+
("HLS.S30.T18TXK.2024252T153819.v2.0", "2024-09-08"), # 0% cloud, eastern NYC
|
| 33 |
+
]
|
| 34 |
+
SCENE_ID, SCENE_DATE = SCENES[0] # back-compat for legacy users
|
| 35 |
+
MODEL_REPO = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
|
| 36 |
+
PRITHVI_BAND_NAMES = ["B02", "B03", "B04", "B8A", "B11", "B12"]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _stage_stack(out_path: Path, scene_id: str = SCENE_ID) -> bool:
|
| 40 |
+
if out_path.exists():
|
| 41 |
+
return True
|
| 42 |
+
import pystac_client, planetary_computer, rasterio, numpy as np
|
| 43 |
+
print(f"fetching scene {scene_id}...", file=sys.stderr)
|
| 44 |
+
catalog = pystac_client.Client.open(
|
| 45 |
+
"https://planetarycomputer.microsoft.com/api/stac/v1",
|
| 46 |
+
modifier=planetary_computer.sign_inplace,
|
| 47 |
+
)
|
| 48 |
+
item = catalog.get_collection("hls2-s30").get_item(scene_id)
|
| 49 |
+
if item is None:
|
| 50 |
+
print(" scene not retrievable", file=sys.stderr)
|
| 51 |
+
return False
|
| 52 |
+
arrays = []; profile = None
|
| 53 |
+
for band in PRITHVI_BAND_NAMES:
|
| 54 |
+
with rasterio.open(item.assets[band].href) as ds:
|
| 55 |
+
arrays.append(ds.read(1))
|
| 56 |
+
if profile is None:
|
| 57 |
+
profile = ds.profile.copy()
|
| 58 |
+
stack = np.stack(arrays, axis=0).astype("float32")
|
| 59 |
+
# Replace nodata -9999 with the inference.py NO_DATA_FLOAT sentinel (0.0001).
|
| 60 |
+
# inference.py only treats nodata correctly when explicit mean/std are
|
| 61 |
+
# configured β for this Sen1Floods11 fine-tune mean/std are None, so we
|
| 62 |
+
# do the substitution upstream and write a clean float32 raster in 0..1
|
| 63 |
+
# reflectance units (constant_scale=0.0001 in config => DN/10000).
|
| 64 |
+
stack[stack <= -9000] = 0.0
|
| 65 |
+
stack = stack / 10000.0
|
| 66 |
+
stack = np.clip(stack, 0.0, 1.0).astype("float32")
|
| 67 |
+
profile.update(count=6, dtype="float32",
|
| 68 |
+
compress="DEFLATE", tiled=True,
|
| 69 |
+
blockxsize=256, blockysize=256, nodata=0.0)
|
| 70 |
+
with rasterio.open(out_path, "w", **profile) as ds:
|
| 71 |
+
for i in range(6):
|
| 72 |
+
ds.write(stack[i], i + 1)
|
| 73 |
+
print(f" wrote {out_path} ({out_path.stat().st_size // (1024*1024)} MB) "
|
| 74 |
+
f"(reflectance units, nodataβ0)", file=sys.stderr)
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _process_one(scene_id: str, scene_date: str) -> list[dict]:
|
| 79 |
+
"""Stage one MGRS tile, run Prithvi, vectorise to features. Returns
|
| 80 |
+
a list of GeoJSON Features in EPSG:4326 (so they can be merged across
|
| 81 |
+
tiles in different UTM zones)."""
|
| 82 |
+
stack_path = OUT_DIR / f"hls_stack_{scene_date}.tif"
|
| 83 |
+
if not _stage_stack(stack_path, scene_id=scene_id):
|
| 84 |
+
return []
|
| 85 |
+
|
| 86 |
+
from huggingface_hub import hf_hub_download
|
| 87 |
+
inf_py = hf_hub_download(MODEL_REPO, "inference.py")
|
| 88 |
+
cfg = hf_hub_download(MODEL_REPO, "config.yaml")
|
| 89 |
+
ckpt = hf_hub_download(MODEL_REPO, "Prithvi-EO-V2-300M-TL-Sen1Floods11.pt")
|
| 90 |
+
|
| 91 |
+
spec = importlib.util.spec_from_file_location("prithvi_inf", inf_py)
|
| 92 |
+
pm = importlib.util.module_from_spec(spec)
|
| 93 |
+
spec.loader.exec_module(pm)
|
| 94 |
+
|
| 95 |
+
out_dir = OUT_DIR / "prithvi_runs"
|
| 96 |
+
out_dir.mkdir(exist_ok=True)
|
| 97 |
+
|
| 98 |
+
pred_path = out_dir / f"pred_{stack_path.stem}.tiff"
|
| 99 |
+
if not pred_path.exists():
|
| 100 |
+
print(f"running Prithvi on {scene_id}...", file=sys.stderr)
|
| 101 |
+
pm.main(data_file=str(stack_path), config=cfg, checkpoint=ckpt,
|
| 102 |
+
output_dir=str(out_dir), rgb_outputs=False, input_indices=None)
|
| 103 |
+
else:
|
| 104 |
+
print(f" reusing existing pred: {pred_path}", file=sys.stderr)
|
| 105 |
+
|
| 106 |
+
if not pred_path.exists():
|
| 107 |
+
cands = list(out_dir.glob(f"pred_{stack_path.stem}*"))
|
| 108 |
+
pred_path = cands[0] if cands else None
|
| 109 |
+
if pred_path is None or not pred_path.exists():
|
| 110 |
+
print(f" no prediction tiff for {scene_id}", file=sys.stderr)
|
| 111 |
+
return []
|
| 112 |
+
|
| 113 |
+
import rasterio
|
| 114 |
+
from rasterio.features import shapes
|
| 115 |
+
from shapely.geometry import shape, mapping
|
| 116 |
+
import geopandas as gpd
|
| 117 |
+
|
| 118 |
+
with rasterio.open(pred_path) as ds:
|
| 119 |
+
pred = ds.read(1); transform = ds.transform; src_crs = ds.crs
|
| 120 |
+
|
| 121 |
+
water_mask = pred == 255
|
| 122 |
+
n_water = int(water_mask.sum())
|
| 123 |
+
print(f" {scene_id}: {n_water} water px "
|
| 124 |
+
f"({100*n_water/pred.size:.2f}%)", file=sys.stderr)
|
| 125 |
+
|
| 126 |
+
feats = []
|
| 127 |
+
for geom, val in shapes(water_mask.astype("uint8"),
|
| 128 |
+
mask=water_mask, transform=transform):
|
| 129 |
+
if val == 1:
|
| 130 |
+
poly = shape(geom)
|
| 131 |
+
if poly.area > 0:
|
| 132 |
+
feats.append({"type": "Feature",
|
| 133 |
+
"geometry": mapping(poly),
|
| 134 |
+
"properties": {"class": "water",
|
| 135 |
+
"scene_id": scene_id,
|
| 136 |
+
"scene_date": scene_date}})
|
| 137 |
+
|
| 138 |
+
if not feats:
|
| 139 |
+
return []
|
| 140 |
+
|
| 141 |
+
# Reproject to EPSG:4326 for cross-tile merging
|
| 142 |
+
g = gpd.GeoDataFrame.from_features(feats, crs=src_crs)
|
| 143 |
+
g = g.to_crs("EPSG:4326")
|
| 144 |
+
return json.loads(g.to_json())["features"]
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def main() -> int:
|
| 148 |
+
out_geojson = OUT_DIR / "prithvi_flood_nyc.geojson"
|
| 149 |
+
if out_geojson.exists():
|
| 150 |
+
print(f"already exists: {out_geojson}", file=sys.stderr)
|
| 151 |
+
return 0
|
| 152 |
+
|
| 153 |
+
all_features = []
|
| 154 |
+
scene_ids = []; scene_dates = []
|
| 155 |
+
for scene_id, scene_date in SCENES:
|
| 156 |
+
feats = _process_one(scene_id, scene_date)
|
| 157 |
+
all_features.extend(feats)
|
| 158 |
+
if feats:
|
| 159 |
+
scene_ids.append(scene_id); scene_dates.append(scene_date)
|
| 160 |
+
|
| 161 |
+
out = {"type": "FeatureCollection", "features": all_features,
|
| 162 |
+
"scene_ids": scene_ids, "scene_dates": scene_dates,
|
| 163 |
+
"model": MODEL_REPO, "crs": "EPSG:4326"}
|
| 164 |
+
out_geojson.write_text(json.dumps(out))
|
| 165 |
+
print(f"\nwrote {len(all_features)} water polygons across "
|
| 166 |
+
f"{len(scene_ids)} scenes -> {out_geojson} "
|
| 167 |
+
f"({out_geojson.stat().st_size // 1024} KB)", file=sys.stderr)
|
| 168 |
+
return 0
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
if __name__ == "__main__":
|
| 172 |
+
sys.exit(main())
|
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Run Prithvi-EO 2.0 (Sen1Floods11) on a real Hurricane Ida pre/post pair.
|
| 2 |
+
|
| 3 |
+
Pre-event: HLS.S30.T18TWK.2021237T153809.v2.0 (2021-08-25, 3% cloud)
|
| 4 |
+
Post-event: HLS.S30.T18TWK.2021245T154911.v2.0 (2021-09-02, 1% cloud,
|
| 5 |
+
~12h after peak rainfall)
|
| 6 |
+
|
| 7 |
+
This is the genuinely-defensible Prithvi run for the demo: a real flood
|
| 8 |
+
event, two clean scenes within the model's optical comfort zone, with a
|
| 9 |
+
diff that isolates *new* surface water attributable to Ida from the
|
| 10 |
+
permanent rivers/harbor that are present in both scenes.
|
| 11 |
+
|
| 12 |
+
Honest framing baked into the metadata:
|
| 13 |
+
- The model still misses subway and basement flooding (sub-surface; the
|
| 14 |
+
dominant Ida damage mode in NYC). Optical satellite cannot see those.
|
| 15 |
+
- By 16:02 UTC Sep 2 (~12 h post-peak), pluvial street water had largely
|
| 16 |
+
drained. The diff signal is mostly: Jamaica Bay marsh ponding,
|
| 17 |
+
riverside spillover, low-lying park inundation.
|
| 18 |
+
- This is what an Apache-2.0 foundation model can defensibly contribute
|
| 19 |
+
to a flood-event assessment, and we say so in the report.
|
| 20 |
+
|
| 21 |
+
python scripts/run_prithvi_ida.py
|
| 22 |
+
"""
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import importlib.util
|
| 26 |
+
import json
|
| 27 |
+
import sys
|
| 28 |
+
import warnings
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
|
| 31 |
+
warnings.filterwarnings("ignore")
|
| 32 |
+
|
| 33 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 34 |
+
OUT_DIR = ROOT / "data"
|
| 35 |
+
OUT_DIR.mkdir(exist_ok=True, parents=True)
|
| 36 |
+
|
| 37 |
+
PRE_SCENE = "HLS.S30.T18TWK.2021237T153809.v2.0"
|
| 38 |
+
POST_SCENE = "HLS.S30.T18TWK.2021245T154911.v2.0"
|
| 39 |
+
PRE_DATE = "2021-08-25"
|
| 40 |
+
POST_DATE = "2021-09-02"
|
| 41 |
+
EVENT = "Hurricane Ida"
|
| 42 |
+
|
| 43 |
+
MODEL_REPO = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
|
| 44 |
+
PRITHVI_BAND_NAMES = ["B02", "B03", "B04", "B8A", "B11", "B12"]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _stage_stack(out_path: Path, scene_id: str) -> bool:
|
| 48 |
+
if out_path.exists():
|
| 49 |
+
print(f" reusing {out_path.name}", file=sys.stderr)
|
| 50 |
+
return True
|
| 51 |
+
import pystac_client, planetary_computer, rasterio, numpy as np
|
| 52 |
+
print(f"fetching {scene_id}...", file=sys.stderr)
|
| 53 |
+
catalog = pystac_client.Client.open(
|
| 54 |
+
"https://planetarycomputer.microsoft.com/api/stac/v1",
|
| 55 |
+
modifier=planetary_computer.sign_inplace,
|
| 56 |
+
)
|
| 57 |
+
item = catalog.get_collection("hls2-s30").get_item(scene_id)
|
| 58 |
+
if item is None:
|
| 59 |
+
print(f" {scene_id} not retrievable", file=sys.stderr)
|
| 60 |
+
return False
|
| 61 |
+
arrays = []
|
| 62 |
+
profile = None
|
| 63 |
+
for band in PRITHVI_BAND_NAMES:
|
| 64 |
+
with rasterio.open(item.assets[band].href) as ds:
|
| 65 |
+
arrays.append(ds.read(1))
|
| 66 |
+
if profile is None:
|
| 67 |
+
profile = ds.profile.copy()
|
| 68 |
+
stack = np.stack(arrays, axis=0).astype("float32")
|
| 69 |
+
stack[stack <= -9000] = 0.0
|
| 70 |
+
stack = stack / 10000.0
|
| 71 |
+
stack = np.clip(stack, 0.0, 1.0).astype("float32")
|
| 72 |
+
profile.update(count=6, dtype="float32",
|
| 73 |
+
compress="DEFLATE", tiled=True,
|
| 74 |
+
blockxsize=256, blockysize=256, nodata=0.0)
|
| 75 |
+
with rasterio.open(out_path, "w", **profile) as ds:
|
| 76 |
+
for i in range(6):
|
| 77 |
+
ds.write(stack[i], i + 1)
|
| 78 |
+
print(f" wrote {out_path.name} ({out_path.stat().st_size // (1024*1024)} MB)",
|
| 79 |
+
file=sys.stderr)
|
| 80 |
+
return True
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _run_prithvi(stack_path: Path, out_dir: Path) -> Path | None:
|
| 84 |
+
"""Run inference if needed; return path to pred .tiff."""
|
| 85 |
+
pred_path = out_dir / f"pred_{stack_path.stem}.tiff"
|
| 86 |
+
if pred_path.exists():
|
| 87 |
+
print(f" reusing existing pred: {pred_path.name}", file=sys.stderr)
|
| 88 |
+
return pred_path
|
| 89 |
+
|
| 90 |
+
from huggingface_hub import hf_hub_download
|
| 91 |
+
inf_py = hf_hub_download(MODEL_REPO, "inference.py")
|
| 92 |
+
cfg = hf_hub_download(MODEL_REPO, "config.yaml")
|
| 93 |
+
ckpt = hf_hub_download(MODEL_REPO, "Prithvi-EO-V2-300M-TL-Sen1Floods11.pt")
|
| 94 |
+
|
| 95 |
+
spec = importlib.util.spec_from_file_location("prithvi_inf", inf_py)
|
| 96 |
+
pm = importlib.util.module_from_spec(spec)
|
| 97 |
+
spec.loader.exec_module(pm)
|
| 98 |
+
|
| 99 |
+
print(f" running Prithvi on {stack_path.name}...", file=sys.stderr)
|
| 100 |
+
pm.main(data_file=str(stack_path), config=cfg, checkpoint=ckpt,
|
| 101 |
+
output_dir=str(out_dir), rgb_outputs=False, input_indices=None)
|
| 102 |
+
if pred_path.exists():
|
| 103 |
+
return pred_path
|
| 104 |
+
cands = list(out_dir.glob(f"pred_{stack_path.stem}*"))
|
| 105 |
+
return cands[0] if cands else None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def main() -> int:
|
| 109 |
+
out_geojson = OUT_DIR / "prithvi_ida_2021.geojson"
|
| 110 |
+
if out_geojson.exists():
|
| 111 |
+
print(f"already exists: {out_geojson}", file=sys.stderr)
|
| 112 |
+
return 0
|
| 113 |
+
|
| 114 |
+
pre_stack = OUT_DIR / f"hls_stack_pre_ida_{PRE_DATE}.tif"
|
| 115 |
+
post_stack = OUT_DIR / f"hls_stack_post_ida_{POST_DATE}.tif"
|
| 116 |
+
if not (_stage_stack(pre_stack, PRE_SCENE) and
|
| 117 |
+
_stage_stack(post_stack, POST_SCENE)):
|
| 118 |
+
return 1
|
| 119 |
+
|
| 120 |
+
out_dir = OUT_DIR / "prithvi_runs"
|
| 121 |
+
out_dir.mkdir(exist_ok=True)
|
| 122 |
+
pre_pred = _run_prithvi(pre_stack, out_dir)
|
| 123 |
+
post_pred = _run_prithvi(post_stack, out_dir)
|
| 124 |
+
if pre_pred is None or post_pred is None:
|
| 125 |
+
print("inference failed", file=sys.stderr)
|
| 126 |
+
return 2
|
| 127 |
+
|
| 128 |
+
# ---- diff: NEW water in post that wasn't in pre = Ida-attributable ----
|
| 129 |
+
import rasterio
|
| 130 |
+
import numpy as np
|
| 131 |
+
from rasterio.features import shapes
|
| 132 |
+
from shapely.geometry import shape, mapping
|
| 133 |
+
import geopandas as gpd
|
| 134 |
+
|
| 135 |
+
with rasterio.open(pre_pred) as ds:
|
| 136 |
+
pre = ds.read(1)
|
| 137 |
+
with rasterio.open(post_pred) as ds:
|
| 138 |
+
post = ds.read(1)
|
| 139 |
+
transform = ds.transform
|
| 140 |
+
crs = ds.crs
|
| 141 |
+
|
| 142 |
+
# The model emits 0 / 255. New-water = post(255) AND pre(!=255)
|
| 143 |
+
new_water = (post == 255) & (pre != 255)
|
| 144 |
+
n_new = int(new_water.sum())
|
| 145 |
+
n_pre = int((pre == 255).sum())
|
| 146 |
+
n_post = int((post == 255).sum())
|
| 147 |
+
print(f" pre water px: {n_pre:>8d} ({100*n_pre/pre.size:.2f}%)", file=sys.stderr)
|
| 148 |
+
print(f" post water px: {n_post:>8d} ({100*n_post/post.size:.2f}%)", file=sys.stderr)
|
| 149 |
+
print(f" NEW water px: {n_new:>8d} ({100*n_new/post.size:.2f}%)", file=sys.stderr)
|
| 150 |
+
|
| 151 |
+
# also save the post mask for "all post-event water" if useful
|
| 152 |
+
post_water = post == 255
|
| 153 |
+
|
| 154 |
+
# vectorize NEW water (Ida-attributable inundation)
|
| 155 |
+
feats_new = []
|
| 156 |
+
for geom, val in shapes(new_water.astype("uint8"),
|
| 157 |
+
mask=new_water, transform=transform):
|
| 158 |
+
if val == 1:
|
| 159 |
+
poly = shape(geom)
|
| 160 |
+
if poly.area > 0:
|
| 161 |
+
feats_new.append({"type": "Feature",
|
| 162 |
+
"geometry": mapping(poly),
|
| 163 |
+
"properties": {"class": "new_water_post_ida"}})
|
| 164 |
+
|
| 165 |
+
# vectorize ALL post-event water (for legend / context)
|
| 166 |
+
feats_post = []
|
| 167 |
+
for geom, val in shapes(post_water.astype("uint8"),
|
| 168 |
+
mask=post_water, transform=transform):
|
| 169 |
+
if val == 1:
|
| 170 |
+
poly = shape(geom)
|
| 171 |
+
if poly.area > 0:
|
| 172 |
+
feats_post.append({"type": "Feature",
|
| 173 |
+
"geometry": mapping(poly),
|
| 174 |
+
"properties": {"class": "post_event_water"}})
|
| 175 |
+
|
| 176 |
+
g_new = gpd.GeoDataFrame.from_features(feats_new, crs=crs).to_crs("EPSG:4326") \
|
| 177 |
+
if feats_new else gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
|
| 178 |
+
g_post = gpd.GeoDataFrame.from_features(feats_post, crs=crs).to_crs("EPSG:4326") \
|
| 179 |
+
if feats_post else gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
|
| 180 |
+
|
| 181 |
+
new_features = json.loads(g_new.to_json())["features"]
|
| 182 |
+
post_features = json.loads(g_post.to_json())["features"]
|
| 183 |
+
|
| 184 |
+
out = {
|
| 185 |
+
"type": "FeatureCollection",
|
| 186 |
+
"features": new_features,
|
| 187 |
+
"_post_event_water_features": post_features, # carried for reference
|
| 188 |
+
"event": EVENT,
|
| 189 |
+
"pre_scene_id": PRE_SCENE, "pre_scene_date": PRE_DATE,
|
| 190 |
+
"post_scene_id": POST_SCENE, "post_scene_date": POST_DATE,
|
| 191 |
+
"model": MODEL_REPO,
|
| 192 |
+
"crs": "EPSG:4326",
|
| 193 |
+
"interpretation": (
|
| 194 |
+
"Polygons in `features` are pixels classified as water in the "
|
| 195 |
+
"post-event scene but NOT in the pre-event scene β i.e., "
|
| 196 |
+
"candidate Hurricane Ida-attributable inundation. The Sep 2 "
|
| 197 |
+
"Sentinel-2 pass was ~12 h after peak rainfall; pluvial street "
|
| 198 |
+
"and basement flooding (the dominant Ida damage mode in NYC) "
|
| 199 |
+
"had largely drained by then, so this signal mostly captures "
|
| 200 |
+
"marsh ponding, riverside spillover, and low-lying park water. "
|
| 201 |
+
"Subway and basement flooding are not surface-visible to "
|
| 202 |
+
"optical satellites."
|
| 203 |
+
),
|
| 204 |
+
}
|
| 205 |
+
out_geojson.write_text(json.dumps(out))
|
| 206 |
+
print(f"\nwrote {len(new_features)} new-water polygons + "
|
| 207 |
+
f"{len(post_features)} post-event water polygons "
|
| 208 |
+
f"-> {out_geojson} ({out_geojson.stat().st_size // 1024} KB)",
|
| 209 |
+
file=sys.stderr)
|
| 210 |
+
return 0
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
if __name__ == "__main__":
|
| 214 |
+
sys.exit(main())
|