riprap-nyc / app /score.py
seriffic's picture
Backend evolution: Phases 1-10 specialists + agentic FSM + Mellea + LiteLLM router
6a82282
"""Riprap exposure scoring — research-grounded deterministic rubric.
This is an EXPOSURE index, not a damage probability. It produces a tier
1-4 from a thematic additive composite over min-max-normalized indicators
within sub-indices. The same input always produces the same tier; live
signals (NWS alerts, surge residual, hourly precip) are NOT in this
score — they are surfaced as a separate "current conditions" badge per
NPCC4 / IPCC AR6 WG II's distinction between exposure (quasi-stationary
property of place) and event occurrence (time-varying).
Methodology:
- Cutter, Boruff & Shirley, 2003. "Social Vulnerability to Environmental
Hazards." Social Science Quarterly 84(2): 242-261. — hazards-of-place
composite construction.
- Tate, 2012. "Social Vulnerability Indices: A Comparative Assessment
Using Uncertainty and Sensitivity Analysis." Natural Hazards 63: 325-
347. — equal weights within thematic groups are the most rank-stable
default; differential weighting is hard to defend.
- Balica, Wright & van der Meulen, 2012. "A Flood Vulnerability Index
for Coastal Cities." Natural Hazards 64: 73-105. — multiplicative
override behaviour; we recover the important part as a "max-empirical
floor" rather than a full multiplicative form.
Per-indicator citations:
- HAND breakpoints: Nobre et al., 2011. "Height Above the Nearest
Drainage." J. Hydrology 404: 13-29.
- TWI: Beven & Kirkby, 1979. Hydrological Sciences Bulletin 24; Sørensen,
Zinko & Seibert, 2006. HESS 10: 101-112. (Half-weight because TWI is
noisier than HAND in flat urban DEMs; we percentile-bin rather than
use absolute cutoffs.)
- Zone hierarchy: NYC NPCC4 (2024) Ch. 3; NYC Hazard Mitigation Plan 2024.
- USGS HWM proximity floor: USGS HWM positional uncertainty is typically
5-30 m horizontal, so 100 m gives ~3σ headroom for a true "this
address was inundated" signal.
Scope limit: We have no labeled flood-damage outcomes. The tier is a
literature-grounded exposure prior, not a calibrated loss prediction.
For insurance pricing, use FEMA Risk Rating 2.0 (claims-driven GLM).
"""
from __future__ import annotations
import pandas as pd
# ---------- Indicator schemas ----------------------------------------------
#
# Each sub-index is a mapping {indicator_name: weight}. Within a sub-index,
# the weighted sum is normalized by the maximum possible weight, giving a
# 0-1 score per sub-index. The composite is the sum of the three sub-index
# scores (range 0-3), then mapped to tiers.
#
# Why equal weights within thematic groups: Tate 2012's uncertainty
# analysis showed that differential weighting is the most-attacked axis
# of any composite vulnerability/exposure index. Equal weights are the
# safest default; agency tiering (which puts FEMA 1% above 0.2%, Sandy
# above modeled scenarios) supplies the remaining structure.
REGULATORY = {
# FEMA NFHL — regulatory baseline. SFHA (1%) is the mandate threshold.
"fema_1pct": 1.00,
"fema_02pct": 0.50,
# NYC DEP Stormwater Maps (2021) — modeled pluvial scenarios.
# Moderate-2050 is treated heavier than Extreme-2080 because NPCC4
# explicitly designates 2080 SLR + 7 in/hr as a TAIL scenario.
"dep_moderate_2050": 0.75,
"dep_extreme_2080": 0.50,
"dep_tidal_2050": 0.75,
}
HYDROLOGICAL = {
# HAND (Height Above Nearest Drainage), banded per Nobre et al. 2011.
# Bands: <1 m (channel/floodplain near-certain wet) → 1.0
# 1-3 m (floodplain) → 0.66
# 3-10 m (transitional) → 0.33
# >10 m (hillslope, dry) → 0
"hand_band": 1.00,
# TWI quartile (top quartile = saturation-prone). Half-weight
# because TWI is noisier than HAND in urban DEMs; we percentile-bin
# within NYC rather than using absolute cutoffs.
"twi_quartile": 0.50,
# Local-relief inversions: low percentile = topographic low point.
# Bins: <10th=1.0, 10-25th=0.66, 25-50th=0.33, ≥50th=0.
"elev_pct_200m_inv": 0.50,
"elev_pct_750m_inv": 0.50,
# Basin relief contributes a small additional terrain term.
"basin_relief_band": 0.25,
}
EMPIRICAL = {
# Sandy 2012 inundation — empirical post-event extent. Also triggers
# the max-empirical FLOOR rule below.
"sandy": 1.00,
# USGS Hurricane Ida 2021 high-water marks. Within 100 m → "direct"
# (also triggers the floor); 100-800 m → "neighborhood proximity".
"ida_hwm_within_100m": 1.00,
"ida_hwm_within_800m": 0.50,
# Prithvi-EO 2.0 satellite-derived inundation polygon (Hurricane Ida
# pre/post diff) — semi-empirical because model-derived but
# conditioned on observed Sentinel-2 imagery.
"prithvi_polygon": 0.75,
# NYC 311 flood-related complaint count, banded over 5-year window:
# ≥10 → 1.0, 3-9 → 0.66, 1-2 → 0.33, 0 → 0
# Weight capped at 0.75 because 311 has documented socio-economic
# reporting bias (engagement varies by neighborhood).
"complaints_band": 0.75,
# FloodNet trigger flag (any labeled flood event at any sensor
# within 600 m, last 3 years). Same 0.75 cap as 311 since both have
# spatial coverage bias.
"floodnet_trigger": 0.75,
}
def _hand_band(hand_m: float | None) -> float:
"""Nobre et al. 2011 HAND classes adapted for NYC's flat urban terrain."""
if hand_m is None:
return 0.0
if hand_m < 1.0:
return 1.0
if hand_m < 3.0:
return 0.66
if hand_m < 10.0:
return 0.33
return 0.0
def _percentile_inv_band(pct: float | None) -> float:
"""Inverted relief percentile: lower = more exposed (water pools here)."""
if pct is None:
return 0.0
if pct < 10:
return 1.0
if pct < 25:
return 0.66
if pct < 50:
return 0.33
return 0.0
def _twi_quartile(twi: float | None) -> float:
"""TWI thresholds calibrated to NYC's flat 30 m DEM. Top quartile
cutoff comes from the NYC-wide TWI distribution; here we approximate
with literature-typical breakpoints (Sørensen 2006 site-specific
advice)."""
if twi is None:
return 0.0
if twi >= 12:
return 1.0
if twi >= 10:
return 0.66
if twi >= 8:
return 0.33
return 0.0
def _basin_relief_band(relief_m: float | None) -> float:
if relief_m is None:
return 0.0
# Higher basin relief in a flat area means the address sits in a real
# depression. Banding is empirical for NYC.
if relief_m >= 8:
return 1.0
if relief_m >= 4:
return 0.66
if relief_m >= 2:
return 0.33
return 0.0
def _complaints_band(n: int | None) -> float:
if not n:
return 0.0
if n >= 10:
return 1.0
if n >= 3:
return 0.66
if n >= 1:
return 0.33
return 0.0
# ---------- Sub-index computation ------------------------------------------
def _normalize(weighted: float, weights: dict[str, float]) -> float:
max_w = sum(weights.values())
return weighted / max_w if max_w else 0.0
def regulatory_subindex(s: dict) -> float:
"""0..1. All inputs are binary (inside zone or not)."""
w = REGULATORY
raw = sum(w[k] * (1.0 if s.get(k) else 0.0) for k in w)
return _normalize(raw, w)
def hydrological_subindex(s: dict) -> float:
"""0..1. Inputs are continuous; convert to ordinal bands first."""
w = HYDROLOGICAL
bands = {
"hand_band": _hand_band(s.get("hand_m")),
"twi_quartile": _twi_quartile(s.get("twi")),
"elev_pct_200m_inv": _percentile_inv_band(s.get("rel_elev_pct_200m")),
"elev_pct_750m_inv": _percentile_inv_band(s.get("rel_elev_pct_750m")),
"basin_relief_band": _basin_relief_band(s.get("basin_relief_m")),
}
raw = sum(w[k] * bands[k] for k in w)
return _normalize(raw, w)
def empirical_subindex(s: dict) -> float:
"""0..1. Mix of binary and banded count signals."""
w = EMPIRICAL
vals = {
"sandy": 1.0 if s.get("sandy") else 0.0,
"ida_hwm_within_100m": 1.0 if s.get("ida_hwm_within_100m") else 0.0,
"ida_hwm_within_800m": 1.0 if s.get("ida_hwm_within_800m") else 0.0,
"prithvi_polygon": 1.0 if s.get("prithvi_polygon") else 0.0,
"complaints_band": _complaints_band(s.get("complaints_count")),
"floodnet_trigger": 1.0 if s.get("floodnet_trigger") else 0.0,
}
raw = sum(w[k] * vals[k] for k in w)
return _normalize(raw, w)
# ---------- Composite + tier mapping ---------------------------------------
# Tier breakpoints over the composite (range 0-3, since each sub-index is
# 0-1). Tuned so that "Sandy + DEP-2050 + HAND<1m" lands in Tier 1, and a
# single positive signal lands in Tier 4. Documented in METHODOLOGY.md.
TIER_BREAKPOINTS = [
(1.50, 1), # high — multiple sub-indices saturated
(1.00, 2), # elevated — at least one strong sub-index
(0.50, 3), # moderate — partial signals across categories
(0.01, 4), # limited — a single contextual signal
]
TIER_LABELS = {
1: ("High exposure", "Multiple sub-indices saturated; empirical and/or "
"modeled scenarios both indicate substantial exposure."),
2: ("Elevated exposure", "At least one sub-index near saturation; significant "
"overlap with empirical or modeled scenarios."),
3: ("Moderate exposure", "Partial signals across categories; scenario- or "
"neighborhood-specific exposure."),
4: ("Limited exposure", "A single contextual signal; no positive scenario hits."),
0: ("No flagged exposure", "No positive flood signal across the assessed sources."),
}
def composite(signals: dict) -> dict:
"""Compute sub-indices, composite score, and tier with the floor rule.
Returns: {
'subindices': {'regulatory': 0..1, 'hydrological': 0..1, 'empirical': 0..1},
'composite': 0..3,
'tier': 0..4,
'floor_applied': bool,
}
Max-empirical floor: if Sandy 2012 inundation OR a USGS Ida HWM within
100 m fired, the tier is capped at 2 (cannot be worse). This recovers
the multiplicative behavior — empirical evidence overrides terrain or
modeled scenarios — without giving up additive transparency.
"""
reg = regulatory_subindex(signals)
hyd = hydrological_subindex(signals)
emp = empirical_subindex(signals)
composite_score = reg + hyd + emp
raw_tier = 0
for breakpoint, t in TIER_BREAKPOINTS:
if composite_score >= breakpoint:
raw_tier = t
break
floor_applied = bool(signals.get("sandy") or signals.get("ida_hwm_within_100m"))
if floor_applied and (raw_tier == 0 or raw_tier > 2):
final_tier = 2
else:
final_tier = raw_tier
return {
"subindices": {
"regulatory": round(reg, 3),
"hydrological": round(hyd, 3),
"empirical": round(emp, 3),
},
"composite": round(composite_score, 3),
"tier": final_tier,
"floor_applied": floor_applied,
}
# ---------- Backward-compat shims ------------------------------------------
# Register CLI and register_builder consume a flat `tier` column on a
# DataFrame. The shim materializes composite() over rows and writes back
# `score` (composite scaled 0-100) and `tier`.
def tier(score: int) -> int:
"""Legacy bridge for callers that still pass a small-integer score.
Maps the OLD additive-integer score to the new tier breakpoints by
scaling. Prefer composite() for new code."""
if score >= 6: return 1
if score >= 4: return 2
if score >= 2: return 3
if score >= 1: return 4
return 0
# Legacy WEIGHTS map kept so riprap.py and any external consumer
# continue to import without breaking. The new composite() is the
# authoritative scorer.
WEIGHTS = {
"sandy": 3,
"dep_extreme_2080": 2,
"dep_moderate_2050": 2,
"dep_moderate_current": 1,
"complaints_3plus": 1,
"floodnet_trigger": 1,
"policy_named": 1,
}
def score_row(signals: dict) -> tuple[int, int]:
"""Legacy-shape wrapper around composite(). Returns (composite_x100, tier)."""
c = composite(signals)
return int(round(c["composite"] * 100)), c["tier"]
def score_frame(df: pd.DataFrame) -> pd.DataFrame:
"""Vectorized composite over a DataFrame whose columns name our
indicators. Missing columns are treated as 0 / None.
Adds columns: subindex_regulatory, subindex_hydrological,
subindex_empirical, composite, score, tier, floor_applied.
`score` is the composite scaled 0-100 for register CSV legibility.
"""
out = df.copy()
rows = out.to_dict(orient="records")
results = [composite(r) for r in rows]
out["subindex_regulatory"] = [r["subindices"]["regulatory"] for r in results]
out["subindex_hydrological"] = [r["subindices"]["hydrological"] for r in results]
out["subindex_empirical"] = [r["subindices"]["empirical"] for r in results]
out["composite"] = [r["composite"] for r in results]
out["score"] = (out["composite"] * 100).round().astype(int)
out["tier"] = [r["tier"] for r in results]
out["floor_applied"] = [r["floor_applied"] for r in results]
return out