File size: 4,740 Bytes
6a82282
316533f
6a82282
 
 
 
 
316533f
 
6a82282
316533f
 
 
 
6a82282
316533f
 
 
 
 
6a82282
 
316533f
6a82282
820f968
6a82282
820f968
6a82282
 
 
316533f
 
 
 
 
 
 
820f968
316533f
820f968
 
316533f
820f968
 
 
316533f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820f968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a82282
 
820f968
6a82282
 
 
 
 
 
 
 
 
 
 
 
 
 
820f968
 
 
 
 
 
 
 
 
6a82282
820f968
 
6a82282
 
820f968
6a82282
 
 
 
316533f
820f968
 
 
316533f
820f968
316533f
 
820f968
 
6a82282
 
820f968
6a82282
 
820f968
 
6a82282
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""Address geocoding — NYC primary + national fallback.

NYC primary: NYC DCP Geosearch (geosearch.planninglabs.nyc), no auth,
NYC-only. It will fuzzy-match upstate addresses to NYC streets — e.g.
'257 Washington Ave, Albany NY' silently maps to Clinton Hill, Brooklyn.
We detect this via a non-NYC region or non-NYC ZIP and fall back to
OpenStreetMap Nominatim (no key, free, rate-limited per usage policy).

Includes a borough-hint post-filter so Queens hyphenated-style addresses
(e.g. '153-09 90 Ave, Jamaica, Queens') preferentially resolve to the
borough the user named.
"""
from __future__ import annotations

import logging
import re
from dataclasses import dataclass

import httpx

log = logging.getLogger("riprap.geocode")

URL = "https://geosearch.planninglabs.nyc/v2/search"
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
NOMINATIM_UA = "Riprap-NYC/0.5 (civic-flood-tool; +https://huggingface.co/spaces/msradam/riprap-nyc)"

# NYC-bbox guard: lat 40.49–40.92, lon -74.27 to -73.69.
NYC_BBOX = (40.49, -74.27, 40.92, -73.69)

_UPSTATE_ZIP_RE = re.compile(r"\b1[2-4]\d{3}\b")
_BOROUGHS = ("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island")

def _detect_borough(text: str) -> str | None:
    t = text.lower()
    for b in _BOROUGHS:
        if b.lower() in t:
            return b
    # neighborhood -> borough hints
    hints = {
        "queens": "Queens", "jamaica": "Queens", "rockaway": "Queens",
        "astoria": "Queens", "flushing": "Queens",
        "manhattan": "Manhattan", "harlem": "Manhattan", "soho": "Manhattan",
        "brooklyn": "Brooklyn", "bushwick": "Brooklyn", "red hook": "Brooklyn",
        "bronx": "Bronx", "fordham": "Bronx",
        "staten island": "Staten Island",
    }
    for needle, boro in hints.items():
        if needle in t:
            return boro
    return None

@dataclass
class GeocodeHit:
    address: str
    borough: str | None
    lat: float
    lon: float
    bbl: str | None
    bin: str | None
    raw: dict

def geocode(text: str, limit: int = 5) -> list[GeocodeHit]:
    """NYC Geosearch primary."""
    try:
        r = httpx.get(URL, params={"text": text, "size": limit}, timeout=5)
        r.raise_for_status()
        feats = r.json().get("features", [])
        out = []
        for f in feats:
            p = f.get("properties", {})
            coords = (f.get("geometry") or {}).get("coordinates") or [None, None]
            out.append(GeocodeHit(
                address=p.get("label") or p.get("name") or text,
                borough=p.get("borough"),
                lat=coords[1],
                lon=coords[0],
                bbl=p.get("addendum", {}).get("pad", {}).get("bbl"),
                bin=p.get("addendum", {}).get("pad", {}).get("bin"),
                raw=p,
            ))
        return out
    except Exception as e:
        log.warning("Geosearch failed: %r", e)
        return []

def geocode_nominatim(text: str) -> GeocodeHit | None:
    """National OSM Nominatim fallback."""
    try:
        r = httpx.get(NOMINATIM_URL, params={
            "q": text, "format": "jsonv2", "addressdetails": "1",
            "limit": 1, "countrycodes": "us",
        }, headers={"User-Agent": NOMINATIM_UA}, timeout=10)
        r.raise_for_status()
        rows = r.json()
    except Exception as e:
        log.warning("Nominatim fetch failed: %r", e)
        return None
    if not rows:
        return None
    row = rows[0]
    addr = row.get("address") or {}
    
    # Try to map Nominatim borough/county back to NYC standard
    boro = addr.get("suburb") or addr.get("city_district") or addr.get("county")
    if boro and "Kings" in boro: boro = "Brooklyn"
    if boro and "New York County" in boro: boro = "Manhattan"
    if boro and "Queens" in boro: boro = "Queens"
    if boro and "Bronx" in boro: boro = "Bronx"
    if boro and "Richmond" in boro: boro = "Staten Island"

    return GeocodeHit(
        address=row.get("display_name") or text,
        borough=boro,
        lat=float(row["lat"]),
        lon=float(row["lon"]),
        bbl=None, # Nominatim doesn't have BBLs
        bin=None,
        raw={"source": "nominatim", **row},
    )

def geocode_one(text: str) -> GeocodeHit | None:
    """Dynamic geocoder with failover."""
    # 1. Try Geosearch
    hits = geocode(text)
    hint = _detect_borough(text)
    
    if hint:
        in_boro = [h for h in hits if h.borough and h.borough.lower() == hint.lower()]
        if in_boro: return in_boro[0]
    
    if hits:
        top = hits[0]
        if top.lat and 40.4 <= top.lat <= 41.0: # Broad NYC check
            return top

    # 2. Fall back to Nominatim
    log.info("Falling back to Nominatim for %r", text)
    return geocode_nominatim(text)