Stones C5: upgrade prithvi_live to NYC-Pluvial v2 fine-tune
Browse filesSwitches the live Sentinel-2 segmentation specialist from the
Sen1Floods11 base (`ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-
Sen1Floods11`, test flood IoU ~0.10 on NYC chips) to the NYC-specialized
fine-tune (`msradam/Prithvi-EO-2.0-NYC-Pluvial`, test flood IoU
0.5979 β 6x lift). Same input/output contract; doc_id `prithvi_live`
unchanged so reconciler citations and frontend chips stay stable.
Loader is now flexible across two artifact shapes:
v2 (default): Lightning ckpt restored via
SemanticSegmentationTask.load_from_checkpoint. Probes
best_val_loss.ckpt / model.ckpt / last.ckpt then walks for any
*.ckpt in the snapshot.
v1 (fallback): raw .pt + config.yaml via
LightningInferenceModel.from_config β the original code path,
kept so RIPRAP_PRITHVI_LIVE_REPO=ibm-nasa-geospatial/... still
works for A/B comparisons.
Either path resolves the inference helper from BASE_REPO (only the
IBM-NASA repo ships inference.py).
Reconciler doc body now credits the v2 lineage and the AMD MI300X
training. Frontend SOURCE_LABELS / SOURCE_URLS / SOURCE_VINTAGES point
at the new HF artifact.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app/flood_layers/prithvi_live.py +93 -14
- app/reconcile.py +5 -3
- web/static/agent.js +3 -3
|
@@ -1,13 +1,17 @@
|
|
| 1 |
-
"""Prithvi-EO 2.0 (
|
| 2 |
|
| 3 |
A per-query specialist: pulls the most recent low-cloud Sentinel-2 L2A
|
| 4 |
scene over the address from Microsoft Planetary Computer, runs the
|
| 5 |
-
|
| 6 |
|
| 7 |
Distinct from `app/flood_layers/prithvi_water.py`, which serves the
|
| 8 |
offline-precomputed 2021 Ida polygons. This one is *fresh observation*
|
| 9 |
-
each query β
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
Network calls (STAC search + COG band reads) and a 300M-param model
|
| 13 |
forward pass make this the slowest specialist after the LLM. Gated by
|
|
@@ -15,8 +19,7 @@ RIPRAP_PRITHVI_LIVE_ENABLE so deployments without the deps installed
|
|
| 15 |
silently skip it. Cloud-cover refuses out at 30%+ to honor the
|
| 16 |
Sen1Floods11 training distribution.
|
| 17 |
|
| 18 |
-
License: Apache-2.0
|
| 19 |
-
300M-TL-Sen1Floods11`). See experiments/shared/licenses.md.
|
| 20 |
"""
|
| 21 |
|
| 22 |
from __future__ import annotations
|
|
@@ -33,7 +36,15 @@ ENABLE = os.environ.get("RIPRAP_PRITHVI_LIVE_ENABLE", "1").lower() in ("1", "tru
|
|
| 33 |
SEARCH_DAYS = int(os.environ.get("RIPRAP_PRITHVI_LIVE_SEARCH_DAYS", "120"))
|
| 34 |
MAX_CLOUD_PCT = float(os.environ.get("RIPRAP_PRITHVI_LIVE_MAX_CLOUD", "30"))
|
| 35 |
DEVICE = os.environ.get("RIPRAP_PRITHVI_LIVE_DEVICE", "cpu")
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# Sen1Floods11 expects 6 bands in this exact order.
|
| 39 |
BANDS = ["B02", "B03", "B04", "B8A", "B11", "B12"]
|
|
@@ -87,6 +98,21 @@ def warm():
|
|
| 87 |
|
| 88 |
|
| 89 |
def _ensure_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
global _MODEL, _RUN_MODEL
|
| 91 |
if _MODEL is not None:
|
| 92 |
return _MODEL, _RUN_MODEL
|
|
@@ -95,12 +121,64 @@ def _ensure_model():
|
|
| 95 |
return _MODEL, _RUN_MODEL
|
| 96 |
import importlib.util
|
| 97 |
|
| 98 |
-
from huggingface_hub import hf_hub_download
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
m.model.eval()
|
| 105 |
if DEVICE == "cuda":
|
| 106 |
try:
|
|
@@ -110,7 +188,8 @@ def _ensure_model():
|
|
| 110 |
except Exception:
|
| 111 |
log.exception("prithvi_live: cuda move failed")
|
| 112 |
|
| 113 |
-
|
|
|
|
| 114 |
spec = importlib.util.spec_from_file_location("_prithvi_inference",
|
| 115 |
inference_py)
|
| 116 |
mod = importlib.util.module_from_spec(spec)
|
|
|
|
| 1 |
+
"""Prithvi-EO 2.0 (NYC Pluvial v2 fine-tune) live water segmentation.
|
| 2 |
|
| 3 |
A per-query specialist: pulls the most recent low-cloud Sentinel-2 L2A
|
| 4 |
scene over the address from Microsoft Planetary Computer, runs the
|
| 5 |
+
NYC-specialized fine-tune, and reports % water within 500 m.
|
| 6 |
|
| 7 |
Distinct from `app/flood_layers/prithvi_water.py`, which serves the
|
| 8 |
offline-precomputed 2021 Ida polygons. This one is *fresh observation*
|
| 9 |
+
each query β same doc_id (`prithvi_live`), but the underlying model
|
| 10 |
+
has been swapped from the Sen1Floods11 base to
|
| 11 |
+
`msradam/Prithvi-EO-2.0-NYC-Pluvial` (Apache-2.0, fine-tuned on AMD
|
| 12 |
+
Instinct MI300X via AMD Developer Cloud β test flood IoU 0.5979,
|
| 13 |
+
6Γ over the base). The base model is still loadable by setting
|
| 14 |
+
RIPRAP_PRITHVI_LIVE_REPO to the IBM repo as a fallback.
|
| 15 |
|
| 16 |
Network calls (STAC search + COG band reads) and a 300M-param model
|
| 17 |
forward pass make this the slowest specialist after the LLM. Gated by
|
|
|
|
| 19 |
silently skip it. Cloud-cover refuses out at 30%+ to honor the
|
| 20 |
Sen1Floods11 training distribution.
|
| 21 |
|
| 22 |
+
License: Apache-2.0. See experiments/shared/licenses.md.
|
|
|
|
| 23 |
"""
|
| 24 |
|
| 25 |
from __future__ import annotations
|
|
|
|
| 36 |
SEARCH_DAYS = int(os.environ.get("RIPRAP_PRITHVI_LIVE_SEARCH_DAYS", "120"))
|
| 37 |
MAX_CLOUD_PCT = float(os.environ.get("RIPRAP_PRITHVI_LIVE_MAX_CLOUD", "30"))
|
| 38 |
DEVICE = os.environ.get("RIPRAP_PRITHVI_LIVE_DEVICE", "cpu")
|
| 39 |
+
|
| 40 |
+
# Default to the NYC Pluvial v2 fine-tune; override to the IBM-NASA base
|
| 41 |
+
# (`ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11`) when the v2
|
| 42 |
+
# artifact is unreachable or for A/B comparisons.
|
| 43 |
+
REPO = os.environ.get(
|
| 44 |
+
"RIPRAP_PRITHVI_LIVE_REPO",
|
| 45 |
+
"msradam/Prithvi-EO-2.0-NYC-Pluvial",
|
| 46 |
+
)
|
| 47 |
+
BASE_REPO = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
|
| 48 |
|
| 49 |
# Sen1Floods11 expects 6 bands in this exact order.
|
| 50 |
BANDS = ["B02", "B03", "B04", "B8A", "B11", "B12"]
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
def _ensure_model():
|
| 101 |
+
"""Load Prithvi-EO 2.0 once into RAM. Two artifact shapes are
|
| 102 |
+
supported, in priority order:
|
| 103 |
+
|
| 104 |
+
1) **NYC Pluvial v2** (`msradam/Prithvi-EO-2.0-NYC-Pluvial`) β
|
| 105 |
+
Lightning checkpoint (`*.ckpt`) restored via
|
| 106 |
+
`SemanticSegmentationTask.load_from_checkpoint`. Full task
|
| 107 |
+
(config + weights) lives inside the ckpt.
|
| 108 |
+
2) **Sen1Floods11 base** (`ibm-nasa-geospatial/...`) β raw `.pt`
|
| 109 |
+
weights + a separate `config.yaml`, loaded via
|
| 110 |
+
`LightningInferenceModel.from_config(config, ckpt)`. This is
|
| 111 |
+
the path the original prithvi_live.py used.
|
| 112 |
+
|
| 113 |
+
The shared inference helper (`run_model`) only ships in the IBM-NASA
|
| 114 |
+
base repo; for the v2 path we monkey-import it from the base repo
|
| 115 |
+
so a single code path drives prediction either way."""
|
| 116 |
global _MODEL, _RUN_MODEL
|
| 117 |
if _MODEL is not None:
|
| 118 |
return _MODEL, _RUN_MODEL
|
|
|
|
| 121 |
return _MODEL, _RUN_MODEL
|
| 122 |
import importlib.util
|
| 123 |
|
| 124 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 125 |
+
log.info("prithvi_live: loading model from %s", REPO)
|
| 126 |
+
|
| 127 |
+
# ---- Try the v2 / Lightning-ckpt path first -----------------
|
| 128 |
+
m = None
|
| 129 |
+
try:
|
| 130 |
+
from terratorch.tasks import SemanticSegmentationTask
|
| 131 |
+
local_dir = snapshot_download(REPO)
|
| 132 |
+
ckpt = None
|
| 133 |
+
# Lightning saves under various conventional names; probe
|
| 134 |
+
# the most likely candidates rather than trusting one path.
|
| 135 |
+
for name in ("best_val_loss.ckpt", "model.ckpt",
|
| 136 |
+
"last.ckpt"):
|
| 137 |
+
candidate = os.path.join(local_dir, name)
|
| 138 |
+
if os.path.exists(candidate):
|
| 139 |
+
ckpt = candidate
|
| 140 |
+
break
|
| 141 |
+
if ckpt is None:
|
| 142 |
+
# Walk the snapshot for any *.ckpt file.
|
| 143 |
+
for root, _, files in os.walk(local_dir):
|
| 144 |
+
for f in files:
|
| 145 |
+
if f.endswith(".ckpt"):
|
| 146 |
+
ckpt = os.path.join(root, f)
|
| 147 |
+
break
|
| 148 |
+
if ckpt:
|
| 149 |
+
break
|
| 150 |
+
if ckpt is not None:
|
| 151 |
+
log.info("prithvi_live: loading Lightning ckpt %s", ckpt)
|
| 152 |
+
map_loc = "cuda" if (DEVICE == "cuda") else "cpu"
|
| 153 |
+
task = SemanticSegmentationTask.load_from_checkpoint(
|
| 154 |
+
ckpt, map_location=map_loc, strict=False,
|
| 155 |
+
)
|
| 156 |
+
task.eval()
|
| 157 |
+
|
| 158 |
+
# Mimic LightningInferenceModel's surface so the rest
|
| 159 |
+
# of the file (which expects `.model` and `.datamodule`)
|
| 160 |
+
# keeps working. datamodule isn't strictly needed by
|
| 161 |
+
# run_model in current terratorch but we set it to None
|
| 162 |
+
# explicitly so a missing-attr access surfaces clearly.
|
| 163 |
+
class _LightningTaskWrapper:
|
| 164 |
+
def __init__(self, task):
|
| 165 |
+
self.model = task
|
| 166 |
+
self.datamodule = None
|
| 167 |
+
|
| 168 |
+
m = _LightningTaskWrapper(task)
|
| 169 |
+
except Exception as e:
|
| 170 |
+
log.warning("prithvi_live: Lightning-ckpt load failed (%s); "
|
| 171 |
+
"falling back to raw-weights path", e)
|
| 172 |
+
|
| 173 |
+
# ---- Fallback: raw .pt + config.yaml (Sen1Floods11 base) ----
|
| 174 |
+
if m is None:
|
| 175 |
+
from terratorch.cli_tools import LightningInferenceModel
|
| 176 |
+
base = REPO if REPO == BASE_REPO else BASE_REPO
|
| 177 |
+
config_path = hf_hub_download(base, "config.yaml")
|
| 178 |
+
checkpoint = hf_hub_download(
|
| 179 |
+
base, "Prithvi-EO-V2-300M-TL-Sen1Floods11.pt")
|
| 180 |
+
m = LightningInferenceModel.from_config(config_path, checkpoint)
|
| 181 |
+
|
| 182 |
m.model.eval()
|
| 183 |
if DEVICE == "cuda":
|
| 184 |
try:
|
|
|
|
| 188 |
except Exception:
|
| 189 |
log.exception("prithvi_live: cuda move failed")
|
| 190 |
|
| 191 |
+
# Inference helper lives only in the IBM-NASA base repo.
|
| 192 |
+
inference_py = hf_hub_download(BASE_REPO, "inference.py")
|
| 193 |
spec = importlib.util.spec_from_file_location("_prithvi_inference",
|
| 194 |
inference_py)
|
| 195 |
mod = importlib.util.module_from_spec(spec)
|
|
@@ -839,9 +839,11 @@ def build_documents(state: dict[str, Any]) -> list[dict]:
|
|
| 839 |
plive = state.get("prithvi_live")
|
| 840 |
if not out_of_nyc and plive and plive.get("ok"):
|
| 841 |
body = [
|
| 842 |
-
"Source: Prithvi-EO
|
| 843 |
-
"
|
| 844 |
-
"
|
|
|
|
|
|
|
| 845 |
f"Sentinel-2 scene id: {plive.get('item_id', 'unknown')}.",
|
| 846 |
f"Observation date: {(plive.get('item_datetime') or 'unknown')[:10]}.",
|
| 847 |
f"Cloud cover: {plive.get('cloud_cover', 0):.3f}%.",
|
|
|
|
| 839 |
plive = state.get("prithvi_live")
|
| 840 |
if not out_of_nyc and plive and plive.get("ok"):
|
| 841 |
body = [
|
| 842 |
+
"Source: msradam/Prithvi-EO-2.0-NYC-Pluvial (Apache-2.0) β "
|
| 843 |
+
"NYC-Pluvial v2 fine-tune of Prithvi-EO 2.0 trained on AMD "
|
| 844 |
+
"Instinct MI300X via AMD Developer Cloud (test flood IoU "
|
| 845 |
+
"0.5979). Live segmentation over a Sentinel-2 L2A scene "
|
| 846 |
+
"from Microsoft Planetary Computer.",
|
| 847 |
f"Sentinel-2 scene id: {plive.get('item_id', 'unknown')}.",
|
| 848 |
f"Observation date: {(plive.get('item_datetime') or 'unknown')[:10]}.",
|
| 849 |
f"Cloud cover: {plive.get('cloud_cover', 0):.3f}%.",
|
|
@@ -67,7 +67,7 @@ const SOURCE_LABELS = {
|
|
| 67 |
microtopo_nta: "USGS 3DEP DEM, polygon-aggregated",
|
| 68 |
ida_hwm: "USGS Hurricane Ida 2021 HWMs",
|
| 69 |
prithvi_water: "Prithvi-EO 2.0 β Hurricane Ida 2021 polygons",
|
| 70 |
-
prithvi_live: "Prithvi-EO 2.0 β live Sentinel-2 water segmentation",
|
| 71 |
terramind_synthetic: "TerraMind 1.0 base β synthetic LULC (DEMβESRI Land Cover)",
|
| 72 |
tm_lulc: "TerraMind-NYC LULC LoRA (msradam/TerraMind-NYC-Adapters)",
|
| 73 |
tm_buildings: "TerraMind-NYC Buildings LoRA (msradam/TerraMind-NYC-Adapters)",
|
|
@@ -118,7 +118,7 @@ const SOURCE_URLS = {
|
|
| 118 |
microtopo_nta: "https://www.usgs.gov/3d-elevation-program",
|
| 119 |
ida_hwm: "https://stn.wim.usgs.gov/STNDataPortal/",
|
| 120 |
prithvi_water: "https://huggingface.co/ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
|
| 121 |
-
prithvi_live: "https://huggingface.co/
|
| 122 |
terramind_synthetic: "https://huggingface.co/ibm-esa-geospatial/TerraMind-1.0-base",
|
| 123 |
tm_lulc: "https://huggingface.co/msradam/TerraMind-NYC-Adapters",
|
| 124 |
tm_buildings: "https://huggingface.co/msradam/TerraMind-NYC-Adapters",
|
|
@@ -166,7 +166,7 @@ const SOURCE_VINTAGES = {
|
|
| 166 |
microtopo_nta: "USGS 3DEP DEM (NYC ~2018) β polygon-aggregated stats",
|
| 167 |
ida_hwm: "USGS Short-Term Network Event 312 β Hurricane Ida 2021 high-water marks (Sept 1-2 2021 survey)",
|
| 168 |
prithvi_water: "Prithvi-EO 2.0 satellite segmentation, scenes 2021-08-25 (pre) & 2021-09-02 (post Ida)",
|
| 169 |
-
prithvi_live: "live Sentinel-2 L2A scene from Microsoft Planetary Computer (acquisition timestamp in payload)",
|
| 170 |
terramind_synthetic: "synthetic prior β TerraMind 1.0 base generated a plausible categorical land-cover map from the LiDAR terrain at this point (deterministic seed, 10 diffusion steps; class fractions cite-able; not a measurement)",
|
| 171 |
tm_lulc: "live empirical observation β TerraMind-NYC LULC LoRA (msradam/TerraMind-NYC-Adapters, fine-tuned on NYC chips on AMD MI300X) over the per-query Sentinel-2/1/DEM chip; 5-class macro land cover with class fractions cite-able",
|
| 172 |
tm_buildings: "live empirical observation β TerraMind-NYC Buildings LoRA (msradam/TerraMind-NYC-Adapters, fine-tuned on NYC chips on AMD MI300X) over the per-query Sentinel-2/1/DEM chip; binary building-footprint mask + connected-component count",
|
|
|
|
| 67 |
microtopo_nta: "USGS 3DEP DEM, polygon-aggregated",
|
| 68 |
ida_hwm: "USGS Hurricane Ida 2021 HWMs",
|
| 69 |
prithvi_water: "Prithvi-EO 2.0 β Hurricane Ida 2021 polygons",
|
| 70 |
+
prithvi_live: "Prithvi-EO 2.0 NYC-Pluvial v2 β live Sentinel-2 water segmentation (msradam/Prithvi-EO-2.0-NYC-Pluvial)",
|
| 71 |
terramind_synthetic: "TerraMind 1.0 base β synthetic LULC (DEMβESRI Land Cover)",
|
| 72 |
tm_lulc: "TerraMind-NYC LULC LoRA (msradam/TerraMind-NYC-Adapters)",
|
| 73 |
tm_buildings: "TerraMind-NYC Buildings LoRA (msradam/TerraMind-NYC-Adapters)",
|
|
|
|
| 118 |
microtopo_nta: "https://www.usgs.gov/3d-elevation-program",
|
| 119 |
ida_hwm: "https://stn.wim.usgs.gov/STNDataPortal/",
|
| 120 |
prithvi_water: "https://huggingface.co/ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
|
| 121 |
+
prithvi_live: "https://huggingface.co/msradam/Prithvi-EO-2.0-NYC-Pluvial",
|
| 122 |
terramind_synthetic: "https://huggingface.co/ibm-esa-geospatial/TerraMind-1.0-base",
|
| 123 |
tm_lulc: "https://huggingface.co/msradam/TerraMind-NYC-Adapters",
|
| 124 |
tm_buildings: "https://huggingface.co/msradam/TerraMind-NYC-Adapters",
|
|
|
|
| 166 |
microtopo_nta: "USGS 3DEP DEM (NYC ~2018) β polygon-aggregated stats",
|
| 167 |
ida_hwm: "USGS Short-Term Network Event 312 β Hurricane Ida 2021 high-water marks (Sept 1-2 2021 survey)",
|
| 168 |
prithvi_water: "Prithvi-EO 2.0 satellite segmentation, scenes 2021-08-25 (pre) & 2021-09-02 (post Ida)",
|
| 169 |
+
prithvi_live: "live Sentinel-2 L2A scene from Microsoft Planetary Computer (acquisition timestamp in payload), segmented by the NYC-Pluvial v2 fine-tune of Prithvi-EO 2.0 (test flood IoU 0.5979)",
|
| 170 |
terramind_synthetic: "synthetic prior β TerraMind 1.0 base generated a plausible categorical land-cover map from the LiDAR terrain at this point (deterministic seed, 10 diffusion steps; class fractions cite-able; not a measurement)",
|
| 171 |
tm_lulc: "live empirical observation β TerraMind-NYC LULC LoRA (msradam/TerraMind-NYC-Adapters, fine-tuned on NYC chips on AMD MI300X) over the per-query Sentinel-2/1/DEM chip; 5-class macro land cover with class fractions cite-able",
|
| 172 |
tm_buildings: "live empirical observation β TerraMind-NYC Buildings LoRA (msradam/TerraMind-NYC-Adapters, fine-tuned on NYC chips on AMD MI300X) over the per-query Sentinel-2/1/DEM chip; binary building-footprint mask + connected-component count",
|