Spaces:

lablab-ai-amd-developer-hackathon
/

riprap-nyc

Running

App Files Files Community

riprap-nyc / experiments /17_riprap_integration /terramind_nyc.py

seriffic

Backend evolution: Phases 1-10 specialists + agentic FSM + Mellea + LiteLLM router

6a82282 18 days ago

raw

history blame contribute delete

10.9 kB

	"""TerraMind-NYC specialist for Riprap — replaces zero-shot terramind_synthesis.

	Loads our AMD-trained NYC checkpoints, fetches recent Sentinel-1 + Sentinel-2 +
	DEM at the queried (lat, lon) via the live STAC path from
	experiments/11_live_sentinel_fetch, runs inference, returns a structured dict
	in the same shape the reconciler already expects.

	Output contract (compatible with `app/context/terramind_synthesis.py:fetch`):

	{
	"ok": True,
	"synthetic_modality": True,
	"tim_chain": ["S2L2A", "S1RTC", "DEM", "LULC_predicted"],
	"label_schema": "ESA WorldCover 2021 v200, 5 macro-classes (confirmed)",
	"model": "msradam/TerraMind-base-Flood-NYC (AMD MI300X fine-tune)",
	"imagery": {
	"s2_acquired_iso": "2026-05-04T16:01:44Z",
	"s2_age_days": 1,
	"s2_cloud_pct": 7.0,
	"s2_source": "Element 84 Earth Search (ESA Copernicus License)",
	"s1_acquired_iso": "2026-05-01T22:51:31Z",
	"s1_age_days": 4,
	"s1_source": "Microsoft Planetary Computer (ESA Copernicus License)",
	},
	"class_fractions": {"developed": 78.3, "forest": 8.1, "water": 7.4, ...},
	"dominant_class": "developed",
	"dominant_pct": 78.3,
	"imperviousness_pct": 78.3,
	"green_space_pct": 13.7,
	"water_pct": 7.4,
	"polygons_geojson": {...},
	"elapsed_s": 5.2,
	}

	Drop-in replacement for `app/context/terramind_synthesis.py:fetch(lat, lon)`.
	"""
	from __future__ import annotations

	import json
	import logging
	import os
	import threading
	import time
	from typing import Any

	log = logging.getLogger("riprap.terramind_nyc")

	ENABLE = os.environ.get("RIPRAP_TERRAMIND_NYC_ENABLE", "1").lower() in ("1", "true", "yes")
	HF_REPO = os.environ.get("RIPRAP_TERRAMIND_NYC_REPO",
	"msradam/TerraMind-base-Flood-NYC")
	CHIP_PX = 256
	CACHE_DIR = os.environ.get("RIPRAP_TERRAMIND_NYC_CACHE",
	"/tmp/riprap_terramind_nyc_cache")

	_MODEL = None
	_MODEL_LOCK = threading.Lock()
	_CONFIG_PATH = None
	_CKPT_PATH = None

	# WorldCover 5-macro-class palette for the GeoJSON polygons
	COLORS = {
	0: ("water", "#0284c7"),
	1: ("developed", "#9ca3af"),
	2: ("forest", "#16a34a"),
	3: ("herbaceous", "#86efac"),
	4: ("other", "#d6d3d1"),
	}
	# Classes we don't paint (water best left transparent so basemap shoreline shows;
	# "other" is too small in NYC to be informative)
	HIDE_CLASSES = {"water", "other"}


	def _ensure_model():
	"""Lazy load the AMD-fine-tuned NYC checkpoint."""
	global _MODEL, _CONFIG_PATH, _CKPT_PATH
	if _MODEL is not None:
	return _MODEL
	with _MODEL_LOCK:
	if _MODEL is not None:
	return _MODEL
	from huggingface_hub import hf_hub_download
	from terratorch.tasks import SemanticSegmentationTask
	from safetensors.torch import load_file
	import torch
	import yaml as yamllib

	log.info("terramind_nyc: fetching from HF %s", HF_REPO)
	yaml_p = hf_hub_download(repo_id=HF_REPO,
	filename="terramind_v1_base_nyc_phase2.yaml",
	cache_dir=CACHE_DIR)
	ckpt_p = hf_hub_download(repo_id=HF_REPO,
	filename="TerraMind_v1_base_NYC_LULC.safetensors",
	cache_dir=CACHE_DIR)
	cfg = yamllib.safe_load(open(yaml_p))
	task = SemanticSegmentationTask(**cfg["model"]["init_args"])
	state = load_file(ckpt_p)
	state = {k: v for k, v in state.items() if not k.startswith("criterion.")}
	task.load_state_dict(state, strict=False)

	device = ("cuda" if torch.cuda.is_available()
	else "mps" if torch.backends.mps.is_available()
	else "cpu")
	_MODEL = task.to(device).eval()
	_CONFIG_PATH, _CKPT_PATH = yaml_p, ckpt_p
	log.info("terramind_nyc: model on %s", device)
	return _MODEL


	def warm():
	if ENABLE:
	try:
	_ensure_model()
	except Exception:
	log.exception("terramind_nyc: warm() failed; specialist will no-op")


	def _normalize_inputs(s2: "np.ndarray", s1: "np.ndarray", dem: "np.ndarray"):
	"""ImpactMesh-flood per-band normalization. The model was trained with these
	z-scores; inference must match."""
	import numpy as np
	S2_MEAN = np.array([1223.128, 1251.355, 1423.443, 1408.984, 1786.818, 2448.316,
	2685.642, 2745.795, 2817.936, 3194.081, 1964.659, 1399.317],
	dtype=np.float32)
	S2_STD = np.array([2358.709, 2227.598, 2082.363, 2068.519, 2086.682, 2003.085,
	2019.494, 2060.309, 2014.732, 2992.644, 1414.951, 1218.357],
	dtype=np.float32)
	S1_MEAN = np.array([-9.98, -15.968], dtype=np.float32)
	S1_STD = np.array([4.24, 4.105], dtype=np.float32)
	DEM_MEAN, DEM_STD = 141.786, 189.363
	s2n = (s2 - S2_MEAN[:, None, None]) / S2_STD[:, None, None]
	s1n = (s1 - S1_MEAN[:, None, None]) / S1_STD[:, None, None]
	demn = (dem - DEM_MEAN) / DEM_STD
	return s2n, s1n, demn


	def _polygonize_pred(class_idx, transform, crs):
	"""Vectorize per-class predictions into a GeoJSON FeatureCollection."""
	import json
	import geopandas as gpd
	from rasterio.features import shapes
	from shapely.geometry import shape
	import numpy as np

	feats = []
	for c, (label, color) in COLORS.items():
	if label in HIDE_CLASSES:
	continue
	mask = (class_idx == c).astype("uint8")
	if mask.sum() < 8:
	continue
	polys = [shape(geom) for geom, val in
	shapes(mask, mask=mask.astype(bool), transform=transform)
	if val == 1]
	if not polys:
	continue
	gdf = gpd.GeoDataFrame({"geometry": polys}, crs=crs).to_crs("EPSG:4326")
	gdf["geometry"] = gdf.geometry.simplify(1e-4, preserve_topology=True)
	for geom in gdf.geometry:
	feats.append({
	"type": "Feature",
	"geometry": json.loads(
	gpd.GeoSeries([geom], crs="EPSG:4326").to_json()
	)["features"][0]["geometry"],
	"properties": {"label": label, "class_idx": c, "fill_color": color},
	})
	return {"type": "FeatureCollection", "features": feats}


	def fetch(lat: float, lon: float, timeout_s: float = 30.0) -> dict[str, Any]:
	"""Run the AMD-trained TerraMind-NYC specialist at (lat, lon).

	Returns a dict matching the existing terramind_synthesis output schema,
	plus new fields for imagery freshness disclosure. Designed to never raise.
	"""
	if not ENABLE:
	return {"ok": False, "skipped": "RIPRAP_TERRAMIND_NYC_ENABLE=0"}
	t0 = time.time()
	try:
	import collections
	import numpy as np
	import torch

	# Late-import the live Sentinel fetch (it lives next to us in app/context)
	from app.context.sentinel_live import fetch_recent_chips

	# Get most-recent S2 + S1 + DEM at this point (Earth Search + PC fallback)
	chips = fetch_recent_chips(lat, lon, chip_px=CHIP_PX,
	max_age_days=14, max_cloud=30)
	if not chips or not chips.get("ok"):
	return {"ok": False, "skipped": "no recent imagery for this point"}

	s2 = chips["s2"] # (12, 256, 256) float32, raw L2A reflectance
	s1 = chips["s1"] # (2, 256, 256) float32, RTC sigma0 linear
	dem = chips["dem"] # (256, 256) float32

	s2n, s1n, demn = _normalize_inputs(s2, s1, dem)

	model = _ensure_model()
	device = next(model.parameters()).device
	# (B, C, T, H, W) — repeat single timestep 4× (matches Phase 2 training)
	T = 4
	s2_t = torch.from_numpy(s2n).unsqueeze(1).repeat(1, T, 1, 1).unsqueeze(0).to(device)
	s1_t = torch.from_numpy(s1n).unsqueeze(1).repeat(1, T, 1, 1).unsqueeze(0).to(device)
	dem_t = torch.from_numpy(demn).unsqueeze(0).unsqueeze(0).repeat(1, T, 1, 1).unsqueeze(0).to(device)

	if time.time() - t0 > timeout_s:
	return {"ok": False, "skipped": "exceeded timeout before inference"}

	with torch.no_grad():
	out = model({"S2L2A": s2_t, "S1RTC": s1_t, "DEM": dem_t})
	logits = out.output if hasattr(out, "output") else out
	if isinstance(logits, (list, tuple)):
	logits = logits[0]
	pred = logits.argmax(1)[0].cpu().numpy().astype(np.int8)

	hist = collections.Counter(pred.flatten().tolist())
	total = float(pred.size)
	fractions = {COLORS[c][0]: round(100.0 * v / total, 2)
	for c, v in hist.items() if 0 <= c < 5}
	ordered = dict(sorted(fractions.items(),
	key=lambda kv: kv[1], reverse=True))
	dominant = next(iter(ordered)) if ordered else "unknown"

	# Polygonize for the map layer
	polygons = None
	try:
	polygons = _polygonize_pred(pred, chips["transform"], chips["crs"])
	except Exception:
	log.exception("terramind_nyc: polygonize failed; skipping map layer")

	return {
	"ok": True,
	"synthetic_modality": True,
	"tim_chain": ["S2L2A", "S1RTC", "DEM", "LULC_predicted"],
	"label_schema": "ESA WorldCover 2021 v200, 5 macro-classes (confirmed)",
	"model": f"{HF_REPO} (AMD MI300X fine-tune)",
	"imagery": {
	"s2_acquired_iso": chips["s2_acquired_iso"],
	"s2_age_days": chips["s2_age_days"],
	"s2_cloud_pct": chips.get("s2_cloud_pct"),
	"s2_source": chips["s2_source"],
	"s1_acquired_iso": chips["s1_acquired_iso"],
	"s1_age_days": chips["s1_age_days"],
	"s1_source": chips["s1_source"],
	},
	"class_fractions": ordered,
	"dominant_class": dominant,
	"dominant_class_display": dominant,
	"dominant_pct": ordered.get(dominant, 0.0),
	"imperviousness_pct": ordered.get("developed", 0.0),
	"green_space_pct": round(ordered.get("forest", 0.0)
	+ ordered.get("herbaceous", 0.0), 2),
	"water_pct": ordered.get("water", 0.0),
	"n_classes_observed": len(ordered),
	"chip_shape": [5, CHIP_PX, CHIP_PX],
	"polygons_geojson": polygons,
	"elapsed_s": round(time.time() - t0, 2),
	}
	except Exception as e:
	log.exception("terramind_nyc: fetch failed")
	return {"ok": False, "err": f"{type(e).__name__}: {e}",
	"elapsed_s": round(time.time() - t0, 2)}