DCP Geosearch geocoder for NYC addresses
Browse filesNYC's official address resolver. Returns (lat, lon, BBL, borough,
match-quality) for free-form input. Used as the entry point of every
single-address query — every downstream specialist takes lat/lon, so
geocoding has to land first or the FSM has nothing to work with.
- app/geocode.py +93 -0
app/geocode.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""NYC address geocoding via the city's public Geosupport service (no key).
|
| 2 |
+
|
| 3 |
+
Uses NYC Department of City Planning's Geoclient-replacement via the open
|
| 4 |
+
Geosearch API (geosearch.planninglabs.nyc) — no auth required, NYC-only,
|
| 5 |
+
runs against the public service. Stays inside the "open civic data" lane.
|
| 6 |
+
|
| 7 |
+
Includes a borough-hint post-filter so Queens hyphenated-style addresses
|
| 8 |
+
(e.g. "153-09 90 Ave, Jamaica, Queens") preferentially resolve to the
|
| 9 |
+
borough the user named.
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import re
|
| 14 |
+
from dataclasses import dataclass
|
| 15 |
+
|
| 16 |
+
import httpx
|
| 17 |
+
|
| 18 |
+
URL = "https://geosearch.planninglabs.nyc/v2/search"
|
| 19 |
+
|
| 20 |
+
_BOROUGHS = ("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _detect_borough(text: str) -> str | None:
|
| 24 |
+
t = text.lower()
|
| 25 |
+
for b in _BOROUGHS:
|
| 26 |
+
if b.lower() in t:
|
| 27 |
+
return b
|
| 28 |
+
# neighborhood -> borough hints (incomplete but covers our demo set)
|
| 29 |
+
hints = {
|
| 30 |
+
"queens": "Queens",
|
| 31 |
+
"jamaica": "Queens", "hollis": "Queens", "rockaway": "Queens",
|
| 32 |
+
"elmhurst": "Queens", "maspeth": "Queens", "ozone park": "Queens",
|
| 33 |
+
"astoria": "Queens", "flushing": "Queens", "edgemere": "Queens",
|
| 34 |
+
"manhattan": "Manhattan", "harlem": "Manhattan", "soho": "Manhattan",
|
| 35 |
+
"tribeca": "Manhattan", "midtown": "Manhattan", "les": "Manhattan",
|
| 36 |
+
"chelsea": "Manhattan", "noho": "Manhattan",
|
| 37 |
+
"brooklyn": "Brooklyn", "bushwick": "Brooklyn",
|
| 38 |
+
"carroll gardens": "Brooklyn", "gowanus": "Brooklyn",
|
| 39 |
+
"park slope": "Brooklyn", "williamsburg": "Brooklyn",
|
| 40 |
+
"coney island": "Brooklyn", "red hook": "Brooklyn",
|
| 41 |
+
"bronx": "Bronx", "fordham": "Bronx", "riverdale": "Bronx",
|
| 42 |
+
"staten island": "Staten Island", "richmond": "Staten Island",
|
| 43 |
+
}
|
| 44 |
+
for needle, boro in hints.items():
|
| 45 |
+
if needle in t:
|
| 46 |
+
return boro
|
| 47 |
+
return None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class GeocodeHit:
|
| 52 |
+
address: str
|
| 53 |
+
borough: str | None
|
| 54 |
+
lat: float
|
| 55 |
+
lon: float
|
| 56 |
+
bbl: str | None
|
| 57 |
+
bin: str | None
|
| 58 |
+
raw: dict
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def geocode(text: str, limit: int = 5) -> list[GeocodeHit]:
|
| 62 |
+
"""Return up to `limit` candidates from Geosearch, ranked by API order."""
|
| 63 |
+
r = httpx.get(URL, params={"text": text, "size": limit}, timeout=15)
|
| 64 |
+
r.raise_for_status()
|
| 65 |
+
feats = r.json().get("features", [])
|
| 66 |
+
out = []
|
| 67 |
+
for f in feats:
|
| 68 |
+
p = f.get("properties", {})
|
| 69 |
+
coords = (f.get("geometry") or {}).get("coordinates") or [None, None]
|
| 70 |
+
out.append(GeocodeHit(
|
| 71 |
+
address=p.get("label") or p.get("name") or text,
|
| 72 |
+
borough=p.get("borough"),
|
| 73 |
+
lat=coords[1],
|
| 74 |
+
lon=coords[0],
|
| 75 |
+
bbl=p.get("addendum", {}).get("pad", {}).get("bbl"),
|
| 76 |
+
bin=p.get("addendum", {}).get("pad", {}).get("bin"),
|
| 77 |
+
raw=p,
|
| 78 |
+
))
|
| 79 |
+
return out
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def geocode_one(text: str) -> GeocodeHit | None:
|
| 83 |
+
"""Return the best NYC match for `text`. If the user mentions a
|
| 84 |
+
borough or neighborhood we recognize, filter candidates to that
|
| 85 |
+
borough before picking the top hit. Avoids `183-12 Liberty Avenue,
|
| 86 |
+
Queens` resolving to a Brooklyn match the API surfaced first."""
|
| 87 |
+
hint = _detect_borough(text)
|
| 88 |
+
hits = geocode(text, limit=8)
|
| 89 |
+
if hint:
|
| 90 |
+
in_boro = [h for h in hits if h.borough and h.borough.lower() == hint.lower()]
|
| 91 |
+
if in_boro:
|
| 92 |
+
return in_boro[0]
|
| 93 |
+
return hits[0] if hits else None
|