"""Riprap exposure scoring — research-grounded deterministic rubric. This is an EXPOSURE index, not a damage probability. It produces a tier 1-4 from a thematic additive composite over min-max-normalized indicators within sub-indices. The same input always produces the same tier; live signals (NWS alerts, surge residual, hourly precip) are NOT in this score — they are surfaced as a separate "current conditions" badge per NPCC4 / IPCC AR6 WG II's distinction between exposure (quasi-stationary property of place) and event occurrence (time-varying). Methodology: - Cutter, Boruff & Shirley, 2003. "Social Vulnerability to Environmental Hazards." Social Science Quarterly 84(2): 242-261. — hazards-of-place composite construction. - Tate, 2012. "Social Vulnerability Indices: A Comparative Assessment Using Uncertainty and Sensitivity Analysis." Natural Hazards 63: 325- 347. — equal weights within thematic groups are the most rank-stable default; differential weighting is hard to defend. - Balica, Wright & van der Meulen, 2012. "A Flood Vulnerability Index for Coastal Cities." Natural Hazards 64: 73-105. — multiplicative override behaviour; we recover the important part as a "max-empirical floor" rather than a full multiplicative form. Per-indicator citations: - HAND breakpoints: Nobre et al., 2011. "Height Above the Nearest Drainage." J. Hydrology 404: 13-29. - TWI: Beven & Kirkby, 1979. Hydrological Sciences Bulletin 24; Sørensen, Zinko & Seibert, 2006. HESS 10: 101-112. (Half-weight because TWI is noisier than HAND in flat urban DEMs; we percentile-bin rather than use absolute cutoffs.) - Zone hierarchy: NYC NPCC4 (2024) Ch. 3; NYC Hazard Mitigation Plan 2024. - USGS HWM proximity floor: USGS HWM positional uncertainty is typically 5-30 m horizontal, so 100 m gives ~3σ headroom for a true "this address was inundated" signal. Scope limit: We have no labeled flood-damage outcomes. The tier is a literature-grounded exposure prior, not a calibrated loss prediction. For insurance pricing, use FEMA Risk Rating 2.0 (claims-driven GLM). """ from __future__ import annotations import pandas as pd # ---------- Indicator schemas ---------------------------------------------- # # Each sub-index is a mapping {indicator_name: weight}. Within a sub-index, # the weighted sum is normalized by the maximum possible weight, giving a # 0-1 score per sub-index. The composite is the sum of the three sub-index # scores (range 0-3), then mapped to tiers. # # Why equal weights within thematic groups: Tate 2012's uncertainty # analysis showed that differential weighting is the most-attacked axis # of any composite vulnerability/exposure index. Equal weights are the # safest default; agency tiering (which puts FEMA 1% above 0.2%, Sandy # above modeled scenarios) supplies the remaining structure. REGULATORY = { # FEMA NFHL — regulatory baseline. SFHA (1%) is the mandate threshold. "fema_1pct": 1.00, "fema_02pct": 0.50, # NYC DEP Stormwater Maps (2021) — modeled pluvial scenarios. # Moderate-2050 is treated heavier than Extreme-2080 because NPCC4 # explicitly designates 2080 SLR + 7 in/hr as a TAIL scenario. "dep_moderate_2050": 0.75, "dep_extreme_2080": 0.50, "dep_tidal_2050": 0.75, } HYDROLOGICAL = { # HAND (Height Above Nearest Drainage), banded per Nobre et al. 2011. # Bands: <1 m (channel/floodplain near-certain wet) → 1.0 # 1-3 m (floodplain) → 0.66 # 3-10 m (transitional) → 0.33 # >10 m (hillslope, dry) → 0 "hand_band": 1.00, # TWI quartile (top quartile = saturation-prone). Half-weight # because TWI is noisier than HAND in urban DEMs; we percentile-bin # within NYC rather than using absolute cutoffs. "twi_quartile": 0.50, # Local-relief inversions: low percentile = topographic low point. # Bins: <10th=1.0, 10-25th=0.66, 25-50th=0.33, ≥50th=0. "elev_pct_200m_inv": 0.50, "elev_pct_750m_inv": 0.50, # Basin relief contributes a small additional terrain term. "basin_relief_band": 0.25, } EMPIRICAL = { # Sandy 2012 inundation — empirical post-event extent. Also triggers # the max-empirical FLOOR rule below. "sandy": 1.00, # USGS Hurricane Ida 2021 high-water marks. Within 100 m → "direct" # (also triggers the floor); 100-800 m → "neighborhood proximity". "ida_hwm_within_100m": 1.00, "ida_hwm_within_800m": 0.50, # Prithvi-EO 2.0 satellite-derived inundation polygon (Hurricane Ida # pre/post diff) — semi-empirical because model-derived but # conditioned on observed Sentinel-2 imagery. "prithvi_polygon": 0.75, # NYC 311 flood-related complaint count, banded over 5-year window: # ≥10 → 1.0, 3-9 → 0.66, 1-2 → 0.33, 0 → 0 # Weight capped at 0.75 because 311 has documented socio-economic # reporting bias (engagement varies by neighborhood). "complaints_band": 0.75, # FloodNet trigger flag (any labeled flood event at any sensor # within 600 m, last 3 years). Same 0.75 cap as 311 since both have # spatial coverage bias. "floodnet_trigger": 0.75, } def _hand_band(hand_m: float | None) -> float: """Nobre et al. 2011 HAND classes adapted for NYC's flat urban terrain.""" if hand_m is None: return 0.0 if hand_m < 1.0: return 1.0 if hand_m < 3.0: return 0.66 if hand_m < 10.0: return 0.33 return 0.0 def _percentile_inv_band(pct: float | None) -> float: """Inverted relief percentile: lower = more exposed (water pools here).""" if pct is None: return 0.0 if pct < 10: return 1.0 if pct < 25: return 0.66 if pct < 50: return 0.33 return 0.0 def _twi_quartile(twi: float | None) -> float: """TWI thresholds calibrated to NYC's flat 30 m DEM. Top quartile cutoff comes from the NYC-wide TWI distribution; here we approximate with literature-typical breakpoints (Sørensen 2006 site-specific advice).""" if twi is None: return 0.0 if twi >= 12: return 1.0 if twi >= 10: return 0.66 if twi >= 8: return 0.33 return 0.0 def _basin_relief_band(relief_m: float | None) -> float: if relief_m is None: return 0.0 # Higher basin relief in a flat area means the address sits in a real # depression. Banding is empirical for NYC. if relief_m >= 8: return 1.0 if relief_m >= 4: return 0.66 if relief_m >= 2: return 0.33 return 0.0 def _complaints_band(n: int | None) -> float: if not n: return 0.0 if n >= 10: return 1.0 if n >= 3: return 0.66 if n >= 1: return 0.33 return 0.0 # ---------- Sub-index computation ------------------------------------------ def _normalize(weighted: float, weights: dict[str, float]) -> float: max_w = sum(weights.values()) return weighted / max_w if max_w else 0.0 def regulatory_subindex(s: dict) -> float: """0..1. All inputs are binary (inside zone or not).""" w = REGULATORY raw = sum(w[k] * (1.0 if s.get(k) else 0.0) for k in w) return _normalize(raw, w) def hydrological_subindex(s: dict) -> float: """0..1. Inputs are continuous; convert to ordinal bands first.""" w = HYDROLOGICAL bands = { "hand_band": _hand_band(s.get("hand_m")), "twi_quartile": _twi_quartile(s.get("twi")), "elev_pct_200m_inv": _percentile_inv_band(s.get("rel_elev_pct_200m")), "elev_pct_750m_inv": _percentile_inv_band(s.get("rel_elev_pct_750m")), "basin_relief_band": _basin_relief_band(s.get("basin_relief_m")), } raw = sum(w[k] * bands[k] for k in w) return _normalize(raw, w) def empirical_subindex(s: dict) -> float: """0..1. Mix of binary and banded count signals.""" w = EMPIRICAL vals = { "sandy": 1.0 if s.get("sandy") else 0.0, "ida_hwm_within_100m": 1.0 if s.get("ida_hwm_within_100m") else 0.0, "ida_hwm_within_800m": 1.0 if s.get("ida_hwm_within_800m") else 0.0, "prithvi_polygon": 1.0 if s.get("prithvi_polygon") else 0.0, "complaints_band": _complaints_band(s.get("complaints_count")), "floodnet_trigger": 1.0 if s.get("floodnet_trigger") else 0.0, } raw = sum(w[k] * vals[k] for k in w) return _normalize(raw, w) # ---------- Composite + tier mapping --------------------------------------- # Tier breakpoints over the composite (range 0-3, since each sub-index is # 0-1). Tuned so that "Sandy + DEP-2050 + HAND<1m" lands in Tier 1, and a # single positive signal lands in Tier 4. Documented in METHODOLOGY.md. TIER_BREAKPOINTS = [ (1.50, 1), # high — multiple sub-indices saturated (1.00, 2), # elevated — at least one strong sub-index (0.50, 3), # moderate — partial signals across categories (0.01, 4), # limited — a single contextual signal ] TIER_LABELS = { 1: ("High exposure", "Multiple sub-indices saturated; empirical and/or " "modeled scenarios both indicate substantial exposure."), 2: ("Elevated exposure", "At least one sub-index near saturation; significant " "overlap with empirical or modeled scenarios."), 3: ("Moderate exposure", "Partial signals across categories; scenario- or " "neighborhood-specific exposure."), 4: ("Limited exposure", "A single contextual signal; no positive scenario hits."), 0: ("No flagged exposure", "No positive flood signal across the assessed sources."), } def composite(signals: dict) -> dict: """Compute sub-indices, composite score, and tier with the floor rule. Returns: { 'subindices': {'regulatory': 0..1, 'hydrological': 0..1, 'empirical': 0..1}, 'composite': 0..3, 'tier': 0..4, 'floor_applied': bool, } Max-empirical floor: if Sandy 2012 inundation OR a USGS Ida HWM within 100 m fired, the tier is capped at 2 (cannot be worse). This recovers the multiplicative behavior — empirical evidence overrides terrain or modeled scenarios — without giving up additive transparency. """ reg = regulatory_subindex(signals) hyd = hydrological_subindex(signals) emp = empirical_subindex(signals) composite_score = reg + hyd + emp raw_tier = 0 for breakpoint, t in TIER_BREAKPOINTS: if composite_score >= breakpoint: raw_tier = t break floor_applied = bool(signals.get("sandy") or signals.get("ida_hwm_within_100m")) if floor_applied and (raw_tier == 0 or raw_tier > 2): final_tier = 2 else: final_tier = raw_tier return { "subindices": { "regulatory": round(reg, 3), "hydrological": round(hyd, 3), "empirical": round(emp, 3), }, "composite": round(composite_score, 3), "tier": final_tier, "floor_applied": floor_applied, } # ---------- Backward-compat shims ------------------------------------------ # Register CLI and register_builder consume a flat `tier` column on a # DataFrame. The shim materializes composite() over rows and writes back # `score` (composite scaled 0-100) and `tier`. def tier(score: int) -> int: """Legacy bridge for callers that still pass a small-integer score. Maps the OLD additive-integer score to the new tier breakpoints by scaling. Prefer composite() for new code.""" if score >= 6: return 1 if score >= 4: return 2 if score >= 2: return 3 if score >= 1: return 4 return 0 # Legacy WEIGHTS map kept so riprap.py and any external consumer # continue to import without breaking. The new composite() is the # authoritative scorer. WEIGHTS = { "sandy": 3, "dep_extreme_2080": 2, "dep_moderate_2050": 2, "dep_moderate_current": 1, "complaints_3plus": 1, "floodnet_trigger": 1, "policy_named": 1, } def score_row(signals: dict) -> tuple[int, int]: """Legacy-shape wrapper around composite(). Returns (composite_x100, tier).""" c = composite(signals) return int(round(c["composite"] * 100)), c["tier"] def score_frame(df: pd.DataFrame) -> pd.DataFrame: """Vectorized composite over a DataFrame whose columns name our indicators. Missing columns are treated as 0 / None. Adds columns: subindex_regulatory, subindex_hydrological, subindex_empirical, composite, score, tier, floor_applied. `score` is the composite scaled 0-100 for register CSV legibility. """ out = df.copy() rows = out.to_dict(orient="records") results = [composite(r) for r in rows] out["subindex_regulatory"] = [r["subindices"]["regulatory"] for r in results] out["subindex_hydrological"] = [r["subindices"]["hydrological"] for r in results] out["subindex_empirical"] = [r["subindices"]["empirical"] for r in results] out["composite"] = [r["composite"] for r in results] out["score"] = (out["composite"] * 100).round().astype(int) out["tier"] = [r["tier"] for r in results] out["floor_applied"] = [r["floor_applied"] for r in results] return out