seriffic commited on
Commit
316533f
·
1 Parent(s): 3d53b81

DCP Geosearch geocoder for NYC addresses

Browse files

NYC's official address resolver. Returns (lat, lon, BBL, borough,
match-quality) for free-form input. Used as the entry point of every
single-address query — every downstream specialist takes lat/lon, so
geocoding has to land first or the FSM has nothing to work with.

Files changed (1) hide show
  1. app/geocode.py +93 -0
app/geocode.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """NYC address geocoding via the city's public Geosupport service (no key).
2
+
3
+ Uses NYC Department of City Planning's Geoclient-replacement via the open
4
+ Geosearch API (geosearch.planninglabs.nyc) — no auth required, NYC-only,
5
+ runs against the public service. Stays inside the "open civic data" lane.
6
+
7
+ Includes a borough-hint post-filter so Queens hyphenated-style addresses
8
+ (e.g. "153-09 90 Ave, Jamaica, Queens") preferentially resolve to the
9
+ borough the user named.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from dataclasses import dataclass
15
+
16
+ import httpx
17
+
18
+ URL = "https://geosearch.planninglabs.nyc/v2/search"
19
+
20
+ _BOROUGHS = ("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island")
21
+
22
+
23
+ def _detect_borough(text: str) -> str | None:
24
+ t = text.lower()
25
+ for b in _BOROUGHS:
26
+ if b.lower() in t:
27
+ return b
28
+ # neighborhood -> borough hints (incomplete but covers our demo set)
29
+ hints = {
30
+ "queens": "Queens",
31
+ "jamaica": "Queens", "hollis": "Queens", "rockaway": "Queens",
32
+ "elmhurst": "Queens", "maspeth": "Queens", "ozone park": "Queens",
33
+ "astoria": "Queens", "flushing": "Queens", "edgemere": "Queens",
34
+ "manhattan": "Manhattan", "harlem": "Manhattan", "soho": "Manhattan",
35
+ "tribeca": "Manhattan", "midtown": "Manhattan", "les": "Manhattan",
36
+ "chelsea": "Manhattan", "noho": "Manhattan",
37
+ "brooklyn": "Brooklyn", "bushwick": "Brooklyn",
38
+ "carroll gardens": "Brooklyn", "gowanus": "Brooklyn",
39
+ "park slope": "Brooklyn", "williamsburg": "Brooklyn",
40
+ "coney island": "Brooklyn", "red hook": "Brooklyn",
41
+ "bronx": "Bronx", "fordham": "Bronx", "riverdale": "Bronx",
42
+ "staten island": "Staten Island", "richmond": "Staten Island",
43
+ }
44
+ for needle, boro in hints.items():
45
+ if needle in t:
46
+ return boro
47
+ return None
48
+
49
+
50
+ @dataclass
51
+ class GeocodeHit:
52
+ address: str
53
+ borough: str | None
54
+ lat: float
55
+ lon: float
56
+ bbl: str | None
57
+ bin: str | None
58
+ raw: dict
59
+
60
+
61
+ def geocode(text: str, limit: int = 5) -> list[GeocodeHit]:
62
+ """Return up to `limit` candidates from Geosearch, ranked by API order."""
63
+ r = httpx.get(URL, params={"text": text, "size": limit}, timeout=15)
64
+ r.raise_for_status()
65
+ feats = r.json().get("features", [])
66
+ out = []
67
+ for f in feats:
68
+ p = f.get("properties", {})
69
+ coords = (f.get("geometry") or {}).get("coordinates") or [None, None]
70
+ out.append(GeocodeHit(
71
+ address=p.get("label") or p.get("name") or text,
72
+ borough=p.get("borough"),
73
+ lat=coords[1],
74
+ lon=coords[0],
75
+ bbl=p.get("addendum", {}).get("pad", {}).get("bbl"),
76
+ bin=p.get("addendum", {}).get("pad", {}).get("bin"),
77
+ raw=p,
78
+ ))
79
+ return out
80
+
81
+
82
+ def geocode_one(text: str) -> GeocodeHit | None:
83
+ """Return the best NYC match for `text`. If the user mentions a
84
+ borough or neighborhood we recognize, filter candidates to that
85
+ borough before picking the top hit. Avoids `183-12 Liberty Avenue,
86
+ Queens` resolving to a Brooklyn match the API surfaced first."""
87
+ hint = _detect_borough(text)
88
+ hits = geocode(text, limit=8)
89
+ if hint:
90
+ in_boro = [h for h in hits if h.borough and h.borough.lower() == hint.lower()]
91
+ if in_boro:
92
+ return in_boro[0]
93
+ return hits[0] if hits else None