File size: 4,740 Bytes
6a82282 316533f 6a82282 316533f 6a82282 316533f 6a82282 316533f 6a82282 316533f 6a82282 820f968 6a82282 820f968 6a82282 316533f 820f968 316533f 820f968 316533f 820f968 316533f 820f968 6a82282 820f968 6a82282 820f968 6a82282 820f968 6a82282 820f968 6a82282 316533f 820f968 316533f 820f968 316533f 820f968 6a82282 820f968 6a82282 820f968 6a82282 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | """Address geocoding — NYC primary + national fallback.
NYC primary: NYC DCP Geosearch (geosearch.planninglabs.nyc), no auth,
NYC-only. It will fuzzy-match upstate addresses to NYC streets — e.g.
'257 Washington Ave, Albany NY' silently maps to Clinton Hill, Brooklyn.
We detect this via a non-NYC region or non-NYC ZIP and fall back to
OpenStreetMap Nominatim (no key, free, rate-limited per usage policy).
Includes a borough-hint post-filter so Queens hyphenated-style addresses
(e.g. '153-09 90 Ave, Jamaica, Queens') preferentially resolve to the
borough the user named.
"""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
import httpx
log = logging.getLogger("riprap.geocode")
URL = "https://geosearch.planninglabs.nyc/v2/search"
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
NOMINATIM_UA = "Riprap-NYC/0.5 (civic-flood-tool; +https://huggingface.co/spaces/msradam/riprap-nyc)"
# NYC-bbox guard: lat 40.49–40.92, lon -74.27 to -73.69.
NYC_BBOX = (40.49, -74.27, 40.92, -73.69)
_UPSTATE_ZIP_RE = re.compile(r"\b1[2-4]\d{3}\b")
_BOROUGHS = ("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island")
def _detect_borough(text: str) -> str | None:
t = text.lower()
for b in _BOROUGHS:
if b.lower() in t:
return b
# neighborhood -> borough hints
hints = {
"queens": "Queens", "jamaica": "Queens", "rockaway": "Queens",
"astoria": "Queens", "flushing": "Queens",
"manhattan": "Manhattan", "harlem": "Manhattan", "soho": "Manhattan",
"brooklyn": "Brooklyn", "bushwick": "Brooklyn", "red hook": "Brooklyn",
"bronx": "Bronx", "fordham": "Bronx",
"staten island": "Staten Island",
}
for needle, boro in hints.items():
if needle in t:
return boro
return None
@dataclass
class GeocodeHit:
address: str
borough: str | None
lat: float
lon: float
bbl: str | None
bin: str | None
raw: dict
def geocode(text: str, limit: int = 5) -> list[GeocodeHit]:
"""NYC Geosearch primary."""
try:
r = httpx.get(URL, params={"text": text, "size": limit}, timeout=5)
r.raise_for_status()
feats = r.json().get("features", [])
out = []
for f in feats:
p = f.get("properties", {})
coords = (f.get("geometry") or {}).get("coordinates") or [None, None]
out.append(GeocodeHit(
address=p.get("label") or p.get("name") or text,
borough=p.get("borough"),
lat=coords[1],
lon=coords[0],
bbl=p.get("addendum", {}).get("pad", {}).get("bbl"),
bin=p.get("addendum", {}).get("pad", {}).get("bin"),
raw=p,
))
return out
except Exception as e:
log.warning("Geosearch failed: %r", e)
return []
def geocode_nominatim(text: str) -> GeocodeHit | None:
"""National OSM Nominatim fallback."""
try:
r = httpx.get(NOMINATIM_URL, params={
"q": text, "format": "jsonv2", "addressdetails": "1",
"limit": 1, "countrycodes": "us",
}, headers={"User-Agent": NOMINATIM_UA}, timeout=10)
r.raise_for_status()
rows = r.json()
except Exception as e:
log.warning("Nominatim fetch failed: %r", e)
return None
if not rows:
return None
row = rows[0]
addr = row.get("address") or {}
# Try to map Nominatim borough/county back to NYC standard
boro = addr.get("suburb") or addr.get("city_district") or addr.get("county")
if boro and "Kings" in boro: boro = "Brooklyn"
if boro and "New York County" in boro: boro = "Manhattan"
if boro and "Queens" in boro: boro = "Queens"
if boro and "Bronx" in boro: boro = "Bronx"
if boro and "Richmond" in boro: boro = "Staten Island"
return GeocodeHit(
address=row.get("display_name") or text,
borough=boro,
lat=float(row["lat"]),
lon=float(row["lon"]),
bbl=None, # Nominatim doesn't have BBLs
bin=None,
raw={"source": "nominatim", **row},
)
def geocode_one(text: str) -> GeocodeHit | None:
"""Dynamic geocoder with failover."""
# 1. Try Geosearch
hits = geocode(text)
hint = _detect_borough(text)
if hint:
in_boro = [h for h in hits if h.borough and h.borough.lower() == hint.lower()]
if in_boro: return in_boro[0]
if hits:
top = hits[0]
if top.lat and 40.4 <= top.lat <= 41.0: # Broad NYC check
return top
# 2. Fall back to Nominatim
log.info("Falling back to Nominatim for %r", text)
return geocode_nominatim(text)
|