riprap-nyc / tests /test_sample_queries.py
seriffic's picture
Backend evolution: Phases 1-10 specialists + agentic FSM + Mellea + LiteLLM router
6a82282
"""Verify the 5 demo sample-query buttons all return rich, useful output.
These five are the buttons we put on the agent landing page; if any
fail or produce empty output, the demo is dead. This is the
gating test before shipping.
"""
from __future__ import annotations
import sys
import time
import httpx
BASE = "http://127.0.0.1:8000"
SAMPLES = [
{
"label": "live",
"q": "is there flooding right now in NYC",
"intent": "live_now",
"min_chars": 30,
"min_specialists_fired": 2, # at least nws_alerts + noaa_tides
},
{
"label": "address",
"q": "2940 Brighton 3rd St, Brooklyn",
"intent": "single_address",
"min_chars": 100,
"must_have_geocode": True,
"must_be_in_sandy": True,
},
{
"label": "neighborhood (coastal)",
"q": "is Brighton Beach at risk?",
"intent": "neighborhood",
"min_chars": 150,
"must_have_target": "Brighton Beach",
"min_sandy_fraction": 0.7,
},
{
"label": "neighborhood (inland)",
"q": "Hollis",
"intent": "neighborhood",
"min_chars": 80,
"must_have_target_borough": "Queens",
},
{
"label": "development_check (marquee)",
"q": "what are they building in Gowanus and is it risky",
"intent": "development_check",
"min_chars": 200,
"min_n_total": 5,
"min_n_in_sandy": 1,
"min_flagged_top": 1,
},
{
"label": "upstate (Albany convention venue)",
"q": "257 Washington Avenue, Albany NY 12205",
"intent": "single_address",
"min_chars": 80,
"must_resolve_outside_nyc": True,
# NOAA + NWS pickers should now select Hudson Corridor stations
"must_pick_albany_stations": True,
# Reconciler must NOT hallucinate NYC-specific layers for an upstate addr
# Phrases the reconciler shouldn't claim about an upstate address.
# Note we accept "Sandy" or "DEP" appearing in scope-guard negation
# ("does not apply"), only fail on definitive Albany-is-in-NYC claims.
"must_not_contain_phrases": ["gowanus", "carroll gardens",
"red hook", "brighton beach"],
},
]
FAILS = []
def run(s):
label = s["label"]; q = s["q"]
print(f"\n=== {label}: {q!r}")
t0 = time.time()
try:
r = httpx.get(f"{BASE}/api/agent", params={"q": q}, timeout=240.0)
r.raise_for_status()
d = r.json()
except Exception as e:
FAILS.append((label, f"HTTP error: {e}"))
print(f" ❌ {e}")
return
dt = time.time() - t0
intent = d.get("intent")
para = d.get("paragraph", "") or ""
print(f" intent={intent} wall={dt:.1f}s para={len(para)} chars")
def fail(why):
FAILS.append((label, why))
print(f" ❌ {why}")
if intent != s["intent"]:
fail(f"intent {intent} != {s['intent']}")
return
if len(para) < s.get("min_chars", 0):
fail(f"paragraph {len(para)} < min {s['min_chars']}")
if s.get("must_have_geocode") and not (d.get("geocode") or {}).get("lat"):
fail("geocode missing lat")
if s.get("must_be_in_sandy") and d.get("sandy") is not True:
fail("expected sandy=True")
if s.get("must_have_target"):
nta = (d.get("target") or {}).get("nta_name", "")
if s["must_have_target"].lower() not in nta.lower():
fail(f"target NTA {nta!r} doesn't contain {s['must_have_target']!r}")
if s.get("must_have_target_borough"):
boro = (d.get("target") or {}).get("borough", "")
if boro != s["must_have_target_borough"]:
fail(f"target borough {boro!r} != {s['must_have_target_borough']!r}")
if "min_sandy_fraction" in s:
f = (d.get("sandy_nta") or {}).get("fraction", 0)
if f < s["min_sandy_fraction"]:
fail(f"sandy_nta.fraction {f} < {s['min_sandy_fraction']}")
if "min_n_total" in s:
n = (d.get("dob_summary") or {}).get("n_total", 0)
if n < s["min_n_total"]:
fail(f"dob.n_total {n} < {s['min_n_total']}")
if "min_n_in_sandy" in s:
n = (d.get("dob_summary") or {}).get("n_in_sandy", 0)
if n < s["min_n_in_sandy"]:
fail(f"dob.n_in_sandy {n} < {s['min_n_in_sandy']}")
if "min_flagged_top" in s:
n = len((d.get("dob_summary") or {}).get("flagged_top") or [])
if n < s["min_flagged_top"]:
fail(f"flagged_top size {n} < {s['min_flagged_top']}")
if "min_specialists_fired" in s:
n = sum(1 for k in ("noaa_tides","nws_alerts","nws_obs","ttm_forecast")
if d.get(k) is not None)
if n < s["min_specialists_fired"]:
fail(f"only {n} live specialists fired, need {s['min_specialists_fired']}")
if s.get("must_resolve_outside_nyc"):
geo = d.get("geocode") or {}
lat, lon = geo.get("lat"), geo.get("lon")
in_nyc = lat is not None and 40.49 <= lat <= 40.92 and -74.27 <= lon <= -73.69
if in_nyc:
fail(f"resolved coords ({lat},{lon}) ARE inside NYC bbox; expected upstate")
if s.get("must_pick_albany_stations"):
tides_id = (d.get("noaa_tides") or {}).get("station_id")
obs_id = (d.get("nws_obs") or {}).get("station_id")
if tides_id != "8518995":
fail(f"NOAA station {tides_id!r} != 8518995 (Albany Hudson)")
if obs_id != "KALB":
fail(f"NWS ASOS {obs_id!r} != KALB (Albany Intl)")
if s.get("must_not_contain_phrases"):
bad = [p for p in s["must_not_contain_phrases"]
if p.lower() in para.lower()]
if bad:
fail(f"paragraph leaked NYC content for upstate addr: {bad}")
if not any(label in para for label in ["**Status.**", "**Live signals.**", "**Flagged projects.**"]):
# Soft check β€” paragraph should have at least one section header
print(" ⚠ no recognized section header (soft)")
else:
print(" βœ“ has section header")
print(f" βœ“ {label}")
def main():
try:
httpx.get(f"{BASE}/", timeout=5.0)
except Exception as e:
print(f"server not up: {e}")
sys.exit(1)
print("=" * 60)
print(f"SAMPLE-QUERY GATE β€” {len(SAMPLES)} buttons")
print("=" * 60)
for s in SAMPLES:
run(s)
print("\n" + "=" * 60)
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for label, why in FAILS:
print(f" - {label}: {why}")
sys.exit(1)
else:
print("ALL 5 SAMPLE QUERIES PASS β€” safe to ship buttons")
print("=" * 60)
if __name__ == "__main__":
main()