"""End-to-end tests for the agentic /api/agent endpoint. Run against a live local server: .venv/bin/uvicorn web.main:app --port 8000 & .venv/bin/python tests/test_agent_e2e.py Each test sends a query, asserts on the planner's intent + structure, times the round-trip, and shows what the user would see. Output is a pass/fail summary so we can iterate without clicking through the UI. """ from __future__ import annotations import sys import time import httpx BASE = "http://127.0.0.1:8000" HARD_FAIL = [] # serious issues (route returns 500, no paragraph, etc.) SOFT_WARN = [] # quality issues (citation tags missing, etc.) def case(name, q, expected_intent, asserts): """One test case. `asserts` is a list of (label, callable(d) → bool).""" print(f"\n=== {name}") print(f" query: {q!r}") t0 = time.time() try: r = httpx.get(f"{BASE}/api/agent", params={"q": q}, timeout=240.0) r.raise_for_status() d = r.json() except Exception as e: print(f" ❌ HTTP/JSON error: {e!r}") HARD_FAIL.append((name, str(e))) return None dt = time.time() - t0 intent = d.get("intent") plan = d.get("plan", {}) print(f" → intent={intent} total_s={d.get('total_s', '?')} wall_s={dt:.2f}") print(f" → plan.specialists ({len(plan.get('specialists', []))}): " f"{plan.get('specialists', [])}") print(f" → plan.rationale: {plan.get('rationale', '')[:120]}") if intent != expected_intent: print(f" ❌ expected intent={expected_intent}, got {intent}") HARD_FAIL.append((name, f"intent {intent} != {expected_intent}")) for label, fn in asserts: try: res = fn(d) except Exception as e: res = False print(f" ❌ assert raised — {label}: {e!r}") if res: print(f" ✓ {label}") else: print(f" ❌ {label}") HARD_FAIL.append((name, label)) para = d.get("paragraph", "") or "" has_section = "**Status.**" in para or "**Live signals.**" in para if not has_section: print(" ⚠ no recognizable section header in paragraph") SOFT_WARN.append((name, "no section header")) has_cite = "[" in para and "]" in para if not has_cite: SOFT_WARN.append((name, "paragraph has no [doc_id] citations")) print(" ⚠ paragraph has no [doc_id] citations") return d def has_signal(key): def _check(d): v = d.get(key) return v is not None and v != [] and v != {} return _check def has_target_field(field, expected_substring): def _check(d): t = d.get("target") or {} return expected_substring.lower() in (t.get(field, "") or "").lower() return _check def fraction_inside(lo, hi): def _check(d): s = d.get("sandy_nta") or {} f = s.get("fraction", -1) return lo <= f <= hi return _check def main(): # Sanity check the server is up try: httpx.get(f"{BASE}/", timeout=5.0) except Exception as e: print(f"server not reachable at {BASE}: {e!r}") sys.exit(1) print("=" * 60) print("PLANNER + EXECUTOR END-TO-END TESTS") print("=" * 60) # ---- single_address ---------------------------------------------------- case("single_address: full NYC address", "116-50 Sutphin Blvd, Queens", expected_intent="single_address", asserts=[ ("geocode populated", lambda d: (d.get("geocode") or {}).get("address")), ("dep populated", has_signal("dep")), ("nyc311 populated", has_signal("nyc311")), ("paragraph nonempty", lambda d: len(d.get("paragraph", "")) > 50), ]) case("single_address: coastal Brooklyn (Sandy hit)", "2940 Brighton 3rd St, Brooklyn", expected_intent="single_address", asserts=[ ("sandy is True", lambda d: d.get("sandy") is True), ("dep populated", has_signal("dep")), ("microtopo populated", has_signal("microtopo")), ]) # ---- neighborhood ------------------------------------------------------ case("neighborhood: Brighton Beach (high coastal exposure)", "Brighton Beach", expected_intent="neighborhood", asserts=[ ("target NTA name = Brighton Beach", has_target_field("nta_name", "Brighton Beach")), ("target borough = Brooklyn", has_target_field("borough", "Brooklyn")), ("sandy_nta fraction > 0.5", fraction_inside(0.5, 1.0)), ("dep_nta has 3 scenarios", lambda d: len(d.get("dep_nta") or {}) == 3), ("nyc311_nta n > 50", lambda d: (d.get("nyc311_nta") or {}).get("n", 0) > 50), ("microtopo_nta has hand_median_m", lambda d: (d.get("microtopo_nta") or {}).get("hand_median_m") is not None), ]) case("neighborhood: Carroll Gardens (inland Brooklyn, Ida-deaths archetype)", "Carroll Gardens", expected_intent="neighborhood", asserts=[ ("target borough = Brooklyn", has_target_field("borough", "Brooklyn")), ("sandy_nta fraction < 0.5 (inland)", lambda d: (d.get("sandy_nta") or {"fraction": 1}).get("fraction", 1) < 0.5), ("nyc311_nta n > 0", lambda d: (d.get("nyc311_nta") or {}).get("n", 0) > 0), ]) case("neighborhood: borough-wide (Brooklyn → many NTAs, picks one)", "Brooklyn", expected_intent="neighborhood", asserts=[ ("target borough = Brooklyn", has_target_field("borough", "Brooklyn")), ("n_matches > 50", lambda d: d.get("n_matches", 0) > 50), ]) # ---- development_check ------------------------------------------------- case("development_check: 'what are they building in Gowanus and is it risky?'", "what are they building in Gowanus and is it risky", expected_intent="development_check", asserts=[ ("dob_summary present", lambda d: d.get("dob_summary") is not None), ("n_total > 0", lambda d: (d.get("dob_summary") or {}).get("n_total", 0) > 0), ("n_in_sandy >= 1 (Gowanus is coastal)", lambda d: (d.get("dob_summary") or {}).get("n_in_sandy", 0) >= 1), ("flagged_top has at least one project", lambda d: len((d.get("dob_summary") or {}).get("flagged_top") or []) >= 1), ("paragraph mentions specific BBL or address", lambda d: "BBL " in d.get("paragraph", "") or "St" in d.get("paragraph", "")), ]) case("development_check: 'show me new construction in Red Hook'", "show me new construction in Red Hook", expected_intent="development_check", asserts=[ ("dob_summary present", lambda d: d.get("dob_summary") is not None), ("paragraph nonempty", lambda d: len(d.get("paragraph", "")) > 50), ]) # ---- live_now ---------------------------------------------------------- case("live_now: explicit 'right now'", "is there flooding right now in NYC", expected_intent="live_now", asserts=[ ("noaa_tides has observed_ft_mllw", lambda d: (d.get("noaa_tides") or {}).get("observed_ft_mllw") is not None), ("nws_alerts present", lambda d: d.get("nws_alerts") is not None), ("paragraph mentions Status", lambda d: "Status" in d.get("paragraph", "")), ]) case("live_now: borough-scoped", "what's happening in Brooklyn right now", expected_intent="live_now", asserts=[ ("place looks like a borough or NYC", lambda d: d.get("place") in ("Brooklyn", "NYC")), ]) # ---- edge cases -------------------------------------------------------- case("edge: typo'd address", "2940 Brighten 3rd St, Brkln", expected_intent="single_address", asserts=[ ("paragraph nonempty (best-effort)", lambda d: len(d.get("paragraph", "")) > 0), ]) case("edge: nonsense neighborhood — should fail gracefully", "Nonsense Heights", expected_intent="neighborhood", asserts=[ ("error or paragraph fallback", lambda d: "error" in d or "Could not" in d.get("paragraph", "")), ]) case("edge: very ambiguous query", "what about flood", expected_intent="live_now", # planner usually maps this to live asserts=[ ("paragraph nonempty", lambda d: len(d.get("paragraph", "")) > 0), ]) # ---- summary ----------------------------------------------------------- print("\n" + "=" * 60) print(f"HARD FAILS: {len(HARD_FAIL)}") for name, why in HARD_FAIL: print(f" - {name}: {why}") print(f"SOFT WARNS: {len(SOFT_WARN)}") for name, why in SOFT_WARN: print(f" - {name}: {why}") print("=" * 60) sys.exit(1 if HARD_FAIL else 0) if __name__ == "__main__": main()