| """Address geocoding — NYC primary + national fallback. |
| |
| NYC primary: NYC DCP Geosearch (geosearch.planninglabs.nyc), no auth, |
| NYC-only. It will fuzzy-match upstate addresses to NYC streets — e.g. |
| '257 Washington Ave, Albany NY' silently maps to Clinton Hill, Brooklyn. |
| We detect this via a non-NYC region or non-NYC ZIP and fall back to |
| OpenStreetMap Nominatim (no key, free, rate-limited per usage policy). |
| |
| Includes a borough-hint post-filter so Queens hyphenated-style addresses |
| (e.g. '153-09 90 Ave, Jamaica, Queens') preferentially resolve to the |
| borough the user named. |
| """ |
| from __future__ import annotations |
|
|
| import logging |
| import re |
| from dataclasses import dataclass |
|
|
| import httpx |
|
|
| log = logging.getLogger("riprap.geocode") |
|
|
| URL = "https://geosearch.planninglabs.nyc/v2/search" |
| NOMINATIM_URL = "https://nominatim.openstreetmap.org/search" |
| NOMINATIM_UA = "Riprap-NYC/0.5 (civic-flood-tool; +https://huggingface.co/spaces/msradam/riprap-nyc)" |
|
|
| |
| NYC_BBOX = (40.49, -74.27, 40.92, -73.69) |
|
|
| _UPSTATE_ZIP_RE = re.compile(r"\b1[2-4]\d{3}\b") |
| _BOROUGHS = ("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island") |
|
|
| def _detect_borough(text: str) -> str | None: |
| t = text.lower() |
| for b in _BOROUGHS: |
| if b.lower() in t: |
| return b |
| |
| hints = { |
| "queens": "Queens", "jamaica": "Queens", "rockaway": "Queens", |
| "astoria": "Queens", "flushing": "Queens", |
| "manhattan": "Manhattan", "harlem": "Manhattan", "soho": "Manhattan", |
| "brooklyn": "Brooklyn", "bushwick": "Brooklyn", "red hook": "Brooklyn", |
| "bronx": "Bronx", "fordham": "Bronx", |
| "staten island": "Staten Island", |
| } |
| for needle, boro in hints.items(): |
| if needle in t: |
| return boro |
| return None |
|
|
| @dataclass |
| class GeocodeHit: |
| address: str |
| borough: str | None |
| lat: float |
| lon: float |
| bbl: str | None |
| bin: str | None |
| raw: dict |
|
|
| def geocode(text: str, limit: int = 5) -> list[GeocodeHit]: |
| """NYC Geosearch primary.""" |
| try: |
| r = httpx.get(URL, params={"text": text, "size": limit}, timeout=5) |
| r.raise_for_status() |
| feats = r.json().get("features", []) |
| out = [] |
| for f in feats: |
| p = f.get("properties", {}) |
| coords = (f.get("geometry") or {}).get("coordinates") or [None, None] |
| out.append(GeocodeHit( |
| address=p.get("label") or p.get("name") or text, |
| borough=p.get("borough"), |
| lat=coords[1], |
| lon=coords[0], |
| bbl=p.get("addendum", {}).get("pad", {}).get("bbl"), |
| bin=p.get("addendum", {}).get("pad", {}).get("bin"), |
| raw=p, |
| )) |
| return out |
| except Exception as e: |
| log.warning("Geosearch failed: %r", e) |
| return [] |
|
|
| def geocode_nominatim(text: str) -> GeocodeHit | None: |
| """National OSM Nominatim fallback.""" |
| try: |
| r = httpx.get(NOMINATIM_URL, params={ |
| "q": text, "format": "jsonv2", "addressdetails": "1", |
| "limit": 1, "countrycodes": "us", |
| }, headers={"User-Agent": NOMINATIM_UA}, timeout=10) |
| r.raise_for_status() |
| rows = r.json() |
| except Exception as e: |
| log.warning("Nominatim fetch failed: %r", e) |
| return None |
| if not rows: |
| return None |
| row = rows[0] |
| addr = row.get("address") or {} |
| |
| |
| boro = addr.get("suburb") or addr.get("city_district") or addr.get("county") |
| if boro and "Kings" in boro: boro = "Brooklyn" |
| if boro and "New York County" in boro: boro = "Manhattan" |
| if boro and "Queens" in boro: boro = "Queens" |
| if boro and "Bronx" in boro: boro = "Bronx" |
| if boro and "Richmond" in boro: boro = "Staten Island" |
|
|
| return GeocodeHit( |
| address=row.get("display_name") or text, |
| borough=boro, |
| lat=float(row["lat"]), |
| lon=float(row["lon"]), |
| bbl=None, |
| bin=None, |
| raw={"source": "nominatim", **row}, |
| ) |
|
|
| def geocode_one(text: str) -> GeocodeHit | None: |
| """Dynamic geocoder with failover.""" |
| |
| hits = geocode(text) |
| hint = _detect_borough(text) |
| |
| if hint: |
| in_boro = [h for h in hits if h.borough and h.borough.lower() == hint.lower()] |
| if in_boro: return in_boro[0] |
| |
| if hits: |
| top = hits[0] |
| if top.lat and 40.4 <= top.lat <= 41.0: |
| return top |
|
|
| |
| log.info("Falling back to Nominatim for %r", text) |
| return geocode_nominatim(text) |
|
|