File size: 8,750 Bytes
dbf7a0e b9a10ad dbf7a0e b9a10ad dbf7a0e b9a10ad dbf7a0e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | """Run Prithvi-EO 2.0 (Sen1Floods11) on a real Hurricane Ida pre/post pair.
Pre-event: HLS.S30.T18TWK.2021237T153809.v2.0 (2021-08-25, 3% cloud)
Post-event: HLS.S30.T18TWK.2021245T154911.v2.0 (2021-09-02, 1% cloud,
~12h after peak rainfall)
This is the genuinely-defensible Prithvi run for the demo: a real flood
event, two clean scenes within the model's optical comfort zone, with a
diff that isolates *new* surface water attributable to Ida from the
permanent rivers/harbor that are present in both scenes.
Honest framing baked into the metadata:
- The model still misses subway and basement flooding (sub-surface; the
dominant Ida damage mode in NYC). Optical satellite cannot see those.
- By 16:02 UTC Sep 2 (~12 h post-peak), pluvial street water had largely
drained. The diff signal is mostly: Jamaica Bay marsh ponding,
riverside spillover, low-lying park inundation.
- This is what an Apache-2.0 foundation model can defensibly contribute
to a flood-event assessment, and we say so in the report.
python scripts/run_prithvi_ida.py
"""
from __future__ import annotations
import importlib.util
import json
import sys
import warnings
from pathlib import Path
warnings.filterwarnings("ignore")
ROOT = Path(__file__).resolve().parent.parent
OUT_DIR = ROOT / "data"
OUT_DIR.mkdir(exist_ok=True, parents=True)
PRE_SCENE = "HLS.S30.T18TWK.2021237T153809.v2.0"
POST_SCENE = "HLS.S30.T18TWK.2021245T154911.v2.0"
PRE_DATE = "2021-08-25"
POST_DATE = "2021-09-02"
EVENT = "Hurricane Ida"
MODEL_REPO = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
PRITHVI_BAND_NAMES = ["B02", "B03", "B04", "B8A", "B11", "B12"]
def _stage_stack(out_path: Path, scene_id: str) -> bool:
if out_path.exists():
print(f" reusing {out_path.name}", file=sys.stderr)
return True
import numpy as np
import planetary_computer
import pystac_client
import rasterio
print(f"fetching {scene_id}...", file=sys.stderr)
catalog = pystac_client.Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace,
)
item = catalog.get_collection("hls2-s30").get_item(scene_id)
if item is None:
print(f" {scene_id} not retrievable", file=sys.stderr)
return False
arrays = []
profile = None
for band in PRITHVI_BAND_NAMES:
with rasterio.open(item.assets[band].href) as ds:
arrays.append(ds.read(1))
if profile is None:
profile = ds.profile.copy()
stack = np.stack(arrays, axis=0).astype("float32")
stack[stack <= -9000] = 0.0
stack = stack / 10000.0
stack = np.clip(stack, 0.0, 1.0).astype("float32")
profile.update(count=6, dtype="float32",
compress="DEFLATE", tiled=True,
blockxsize=256, blockysize=256, nodata=0.0)
with rasterio.open(out_path, "w", **profile) as ds:
for i in range(6):
ds.write(stack[i], i + 1)
print(f" wrote {out_path.name} ({out_path.stat().st_size // (1024*1024)} MB)",
file=sys.stderr)
return True
def _run_prithvi(stack_path: Path, out_dir: Path) -> Path | None:
"""Run inference if needed; return path to pred .tiff."""
pred_path = out_dir / f"pred_{stack_path.stem}.tiff"
if pred_path.exists():
print(f" reusing existing pred: {pred_path.name}", file=sys.stderr)
return pred_path
from huggingface_hub import hf_hub_download
inf_py = hf_hub_download(MODEL_REPO, "inference.py")
cfg = hf_hub_download(MODEL_REPO, "config.yaml")
ckpt = hf_hub_download(MODEL_REPO, "Prithvi-EO-V2-300M-TL-Sen1Floods11.pt")
spec = importlib.util.spec_from_file_location("prithvi_inf", inf_py)
pm = importlib.util.module_from_spec(spec)
spec.loader.exec_module(pm)
print(f" running Prithvi on {stack_path.name}...", file=sys.stderr)
pm.main(data_file=str(stack_path), config=cfg, checkpoint=ckpt,
output_dir=str(out_dir), rgb_outputs=False, input_indices=None)
if pred_path.exists():
return pred_path
cands = list(out_dir.glob(f"pred_{stack_path.stem}*"))
return cands[0] if cands else None
def main() -> int:
out_geojson = OUT_DIR / "prithvi_ida_2021.geojson"
if out_geojson.exists():
print(f"already exists: {out_geojson}", file=sys.stderr)
return 0
pre_stack = OUT_DIR / f"hls_stack_pre_ida_{PRE_DATE}.tif"
post_stack = OUT_DIR / f"hls_stack_post_ida_{POST_DATE}.tif"
if not (_stage_stack(pre_stack, PRE_SCENE) and
_stage_stack(post_stack, POST_SCENE)):
return 1
out_dir = OUT_DIR / "prithvi_runs"
out_dir.mkdir(exist_ok=True)
pre_pred = _run_prithvi(pre_stack, out_dir)
post_pred = _run_prithvi(post_stack, out_dir)
if pre_pred is None or post_pred is None:
print("inference failed", file=sys.stderr)
return 2
# ---- diff: NEW water in post that wasn't in pre = Ida-attributable ----
import geopandas as gpd
import rasterio
from rasterio.features import shapes
from shapely.geometry import mapping, shape
with rasterio.open(pre_pred) as ds:
pre = ds.read(1)
with rasterio.open(post_pred) as ds:
post = ds.read(1)
transform = ds.transform
crs = ds.crs
# The model emits 0 / 255. New-water = post(255) AND pre(!=255)
new_water = (post == 255) & (pre != 255)
n_new = int(new_water.sum())
n_pre = int((pre == 255).sum())
n_post = int((post == 255).sum())
print(f" pre water px: {n_pre:>8d} ({100*n_pre/pre.size:.2f}%)", file=sys.stderr)
print(f" post water px: {n_post:>8d} ({100*n_post/post.size:.2f}%)", file=sys.stderr)
print(f" NEW water px: {n_new:>8d} ({100*n_new/post.size:.2f}%)", file=sys.stderr)
# also save the post mask for "all post-event water" if useful
post_water = post == 255
# vectorize NEW water (Ida-attributable inundation)
feats_new = []
for geom, val in shapes(new_water.astype("uint8"),
mask=new_water, transform=transform):
if val == 1:
poly = shape(geom)
if poly.area > 0:
feats_new.append({"type": "Feature",
"geometry": mapping(poly),
"properties": {"class": "new_water_post_ida"}})
# vectorize ALL post-event water (for legend / context)
feats_post = []
for geom, val in shapes(post_water.astype("uint8"),
mask=post_water, transform=transform):
if val == 1:
poly = shape(geom)
if poly.area > 0:
feats_post.append({"type": "Feature",
"geometry": mapping(poly),
"properties": {"class": "post_event_water"}})
g_new = gpd.GeoDataFrame.from_features(feats_new, crs=crs).to_crs("EPSG:4326") \
if feats_new else gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
g_post = gpd.GeoDataFrame.from_features(feats_post, crs=crs).to_crs("EPSG:4326") \
if feats_post else gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
new_features = json.loads(g_new.to_json())["features"]
post_features = json.loads(g_post.to_json())["features"]
out = {
"type": "FeatureCollection",
"features": new_features,
"_post_event_water_features": post_features, # carried for reference
"event": EVENT,
"pre_scene_id": PRE_SCENE, "pre_scene_date": PRE_DATE,
"post_scene_id": POST_SCENE, "post_scene_date": POST_DATE,
"model": MODEL_REPO,
"crs": "EPSG:4326",
"interpretation": (
"Polygons in `features` are pixels classified as water in the "
"post-event scene but NOT in the pre-event scene — i.e., "
"candidate Hurricane Ida-attributable inundation. The Sep 2 "
"Sentinel-2 pass was ~12 h after peak rainfall; pluvial street "
"and basement flooding (the dominant Ida damage mode in NYC) "
"had largely drained by then, so this signal mostly captures "
"marsh ponding, riverside spillover, and low-lying park water. "
"Subway and basement flooding are not surface-visible to "
"optical satellites."
),
}
out_geojson.write_text(json.dumps(out))
print(f"\nwrote {len(new_features)} new-water polygons + "
f"{len(post_features)} post-event water polygons "
f"-> {out_geojson} ({out_geojson.stat().st_size // 1024} KB)",
file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())
|