File size: 6,700 Bytes
6a82282 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | """Verify the 5 demo sample-query buttons all return rich, useful output.
These five are the buttons we put on the agent landing page; if any
fail or produce empty output, the demo is dead. This is the
gating test before shipping.
"""
from __future__ import annotations
import sys
import time
import httpx
BASE = "http://127.0.0.1:8000"
SAMPLES = [
{
"label": "live",
"q": "is there flooding right now in NYC",
"intent": "live_now",
"min_chars": 30,
"min_specialists_fired": 2, # at least nws_alerts + noaa_tides
},
{
"label": "address",
"q": "2940 Brighton 3rd St, Brooklyn",
"intent": "single_address",
"min_chars": 100,
"must_have_geocode": True,
"must_be_in_sandy": True,
},
{
"label": "neighborhood (coastal)",
"q": "is Brighton Beach at risk?",
"intent": "neighborhood",
"min_chars": 150,
"must_have_target": "Brighton Beach",
"min_sandy_fraction": 0.7,
},
{
"label": "neighborhood (inland)",
"q": "Hollis",
"intent": "neighborhood",
"min_chars": 80,
"must_have_target_borough": "Queens",
},
{
"label": "development_check (marquee)",
"q": "what are they building in Gowanus and is it risky",
"intent": "development_check",
"min_chars": 200,
"min_n_total": 5,
"min_n_in_sandy": 1,
"min_flagged_top": 1,
},
{
"label": "upstate (Albany convention venue)",
"q": "257 Washington Avenue, Albany NY 12205",
"intent": "single_address",
"min_chars": 80,
"must_resolve_outside_nyc": True,
# NOAA + NWS pickers should now select Hudson Corridor stations
"must_pick_albany_stations": True,
# Reconciler must NOT hallucinate NYC-specific layers for an upstate addr
# Phrases the reconciler shouldn't claim about an upstate address.
# Note we accept "Sandy" or "DEP" appearing in scope-guard negation
# ("does not apply"), only fail on definitive Albany-is-in-NYC claims.
"must_not_contain_phrases": ["gowanus", "carroll gardens",
"red hook", "brighton beach"],
},
]
FAILS = []
def run(s):
label = s["label"]; q = s["q"]
print(f"\n=== {label}: {q!r}")
t0 = time.time()
try:
r = httpx.get(f"{BASE}/api/agent", params={"q": q}, timeout=240.0)
r.raise_for_status()
d = r.json()
except Exception as e:
FAILS.append((label, f"HTTP error: {e}"))
print(f" β {e}")
return
dt = time.time() - t0
intent = d.get("intent")
para = d.get("paragraph", "") or ""
print(f" intent={intent} wall={dt:.1f}s para={len(para)} chars")
def fail(why):
FAILS.append((label, why))
print(f" β {why}")
if intent != s["intent"]:
fail(f"intent {intent} != {s['intent']}")
return
if len(para) < s.get("min_chars", 0):
fail(f"paragraph {len(para)} < min {s['min_chars']}")
if s.get("must_have_geocode") and not (d.get("geocode") or {}).get("lat"):
fail("geocode missing lat")
if s.get("must_be_in_sandy") and d.get("sandy") is not True:
fail("expected sandy=True")
if s.get("must_have_target"):
nta = (d.get("target") or {}).get("nta_name", "")
if s["must_have_target"].lower() not in nta.lower():
fail(f"target NTA {nta!r} doesn't contain {s['must_have_target']!r}")
if s.get("must_have_target_borough"):
boro = (d.get("target") or {}).get("borough", "")
if boro != s["must_have_target_borough"]:
fail(f"target borough {boro!r} != {s['must_have_target_borough']!r}")
if "min_sandy_fraction" in s:
f = (d.get("sandy_nta") or {}).get("fraction", 0)
if f < s["min_sandy_fraction"]:
fail(f"sandy_nta.fraction {f} < {s['min_sandy_fraction']}")
if "min_n_total" in s:
n = (d.get("dob_summary") or {}).get("n_total", 0)
if n < s["min_n_total"]:
fail(f"dob.n_total {n} < {s['min_n_total']}")
if "min_n_in_sandy" in s:
n = (d.get("dob_summary") or {}).get("n_in_sandy", 0)
if n < s["min_n_in_sandy"]:
fail(f"dob.n_in_sandy {n} < {s['min_n_in_sandy']}")
if "min_flagged_top" in s:
n = len((d.get("dob_summary") or {}).get("flagged_top") or [])
if n < s["min_flagged_top"]:
fail(f"flagged_top size {n} < {s['min_flagged_top']}")
if "min_specialists_fired" in s:
n = sum(1 for k in ("noaa_tides","nws_alerts","nws_obs","ttm_forecast")
if d.get(k) is not None)
if n < s["min_specialists_fired"]:
fail(f"only {n} live specialists fired, need {s['min_specialists_fired']}")
if s.get("must_resolve_outside_nyc"):
geo = d.get("geocode") or {}
lat, lon = geo.get("lat"), geo.get("lon")
in_nyc = lat is not None and 40.49 <= lat <= 40.92 and -74.27 <= lon <= -73.69
if in_nyc:
fail(f"resolved coords ({lat},{lon}) ARE inside NYC bbox; expected upstate")
if s.get("must_pick_albany_stations"):
tides_id = (d.get("noaa_tides") or {}).get("station_id")
obs_id = (d.get("nws_obs") or {}).get("station_id")
if tides_id != "8518995":
fail(f"NOAA station {tides_id!r} != 8518995 (Albany Hudson)")
if obs_id != "KALB":
fail(f"NWS ASOS {obs_id!r} != KALB (Albany Intl)")
if s.get("must_not_contain_phrases"):
bad = [p for p in s["must_not_contain_phrases"]
if p.lower() in para.lower()]
if bad:
fail(f"paragraph leaked NYC content for upstate addr: {bad}")
if not any(label in para for label in ["**Status.**", "**Live signals.**", "**Flagged projects.**"]):
# Soft check β paragraph should have at least one section header
print(" β no recognized section header (soft)")
else:
print(" β has section header")
print(f" β {label}")
def main():
try:
httpx.get(f"{BASE}/", timeout=5.0)
except Exception as e:
print(f"server not up: {e}")
sys.exit(1)
print("=" * 60)
print(f"SAMPLE-QUERY GATE β {len(SAMPLES)} buttons")
print("=" * 60)
for s in SAMPLES:
run(s)
print("\n" + "=" * 60)
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for label, why in FAILS:
print(f" - {label}: {why}")
sys.exit(1)
else:
print("ALL 5 SAMPLE QUERIES PASS β safe to ship buttons")
print("=" * 60)
if __name__ == "__main__":
main()
|