fix(compare): wire compare intent into SSE handler
Browse filesThe compare intent was classified by the planner but silently fell
through to single_address routing in api_agent_stream because (a)
"compare" was not in INTENTS so _validate() defaulted to
single_address, and (b) there was no routing case in the SSE handler.
Changes:
- app/planner.py: add "compare" to INTENTS, SPECIALISTS (compare is
applicable to all single_address specialists), PLAN_SCHEMA_DESC
hard-rules, _required_specialists, _default_specialists, and
_validate fallback. Planner now emits intent=compare with two
address targets for "compare X vs Y" queries.
- web/main.py: add _run_compare() helper that runs the full
single_address specialist suite sequentially for each target via
i_addr.run() and merges the two paragraphs into one Markdown
document (## PLACE A / ## PLACE B sections). Add compare routing
case to both api_agent_stream and api_agent endpoints.
Verified: "Compare 80 Pioneer Street, Brooklyn to 100 Gold Street,
Manhattan" produces a two-target briefing (2820 chars) with both
addresses cited and Mellea grounding passing. 38 step events (19
per address), no errors.
Map limitation: map re-centers to the last-geocoded address (PLACE B).
Both places appear in the briefing text. Dual-marker map requires
a RipMap prop change (deferred).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app/planner.py +32 -14
- web/main.py +84 -1
|
@@ -65,24 +65,32 @@ INTENTS = {
|
|
| 65 |
"DOB construction permits inside it, cross-reference each project "
|
| 66 |
"with Sandy + DEP flood layers, return a flagged-projects list."
|
| 67 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
SPECIALISTS = {
|
| 71 |
# name: (description, which intents may invoke it)
|
| 72 |
-
"geocode": ("Resolve address text to lat/lon via NYC DCP Geosearch.", ["single_address"]),
|
| 73 |
"nta_resolve": ("Resolve a neighborhood or borough name to NTA polygon(s).", ["neighborhood"]),
|
| 74 |
-
"sandy": ("2012 Sandy inundation extent (point-in-polygon or % of NTA).", ["single_address", "neighborhood"]),
|
| 75 |
-
"dep_stormwater":("DEP Stormwater Maps — 3 modeled scenarios.", ["single_address", "neighborhood"]),
|
| 76 |
-
"floodnet": ("Live FloodNet ultrasonic sensors + trigger history.", ["single_address", "neighborhood", "live_now"]),
|
| 77 |
-
"nyc311": ("NYC 311 flood-related complaints in buffer or polygon.", ["single_address", "neighborhood"]),
|
| 78 |
-
"noaa_tides": ("Live NOAA Battery / Kings Pt / Sandy Hook water level.", ["single_address", "neighborhood", "live_now"]),
|
| 79 |
-
"nws_alerts": ("Live NWS active flood-relevant alerts at point.", ["single_address", "neighborhood", "live_now"]),
|
| 80 |
-
"nws_obs": ("Live NWS hourly precip from nearest ASOS station.", ["single_address", "neighborhood", "live_now"]),
|
| 81 |
-
"ttm_forecast": ("Granite TTM r2 surge-residual nowcast at the Battery.", ["single_address", "neighborhood", "live_now"]),
|
| 82 |
-
"microtopo": ("LiDAR-derived terrain (HAND, TWI, percentile) at point or aggregated over polygon.", ["single_address", "neighborhood"]),
|
| 83 |
-
"ida_hwm": ("USGS Hurricane Ida 2021 high-water marks proximity.", ["single_address", "neighborhood"]),
|
| 84 |
-
"prithvi": ("Prithvi-EO 2.0 Hurricane Ida 2021 satellite flood polygons.", ["single_address", "neighborhood"]),
|
| 85 |
-
"rag": ("Retrieve relevant agency-report passages over the policy corpus.", ["single_address", "neighborhood", "development_check"]),
|
| 86 |
"dob_permits": ("Active NYC DOB construction permits inside a polygon, each cross-referenced with Sandy + DEP flood scenarios. Use for 'what are they building' / 'projects in progress' queries.", ["development_check"]),
|
| 87 |
}
|
| 88 |
|
|
@@ -117,6 +125,7 @@ Hard rules:
|
|
| 117 |
- For intent=neighborhood: ALWAYS include "nta_resolve". Skip "geocode". Include polygon-capable specialists.
|
| 118 |
- For intent=live_now: ONLY live specialists. Skip historic/modeled (sandy, dep_*, ida_hwm, prithvi).
|
| 119 |
- For intent=development_check: ALWAYS include "nta_resolve" AND "dob_permits". Sandy + DEP are also useful so the model can compare project locations to flood layers.
|
|
|
|
| 120 |
- IMPORTANT — TARGETS: extract neighborhood/borough names directly from the query text. If the query says "in Gowanus", "what about Brighton Beach", "around Carroll Gardens", etc., the target MUST be {"type": "nta", "text": "<the place name>"}. Use {"type": "nyc"} ONLY when the query mentions NYC as a whole and no specific place. Failing to extract a place name will cause the executor to give up — be explicit.
|
| 121 |
- "targets" is a list because the user may name multiple places (e.g. "compare Brighton Beach and Coney Island").
|
| 122 |
- "rationale" is one short sentence — what your reasoning was.
|
|
@@ -259,6 +268,13 @@ def _validate(d: dict[str, Any], raw_query: str) -> Plan:
|
|
| 259 |
targets = [{"type": "address", "text": raw_query}]
|
| 260 |
elif intent == "neighborhood":
|
| 261 |
targets = [{"type": "nta", "text": raw_query}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
else:
|
| 263 |
targets = [{"type": "nyc", "text": "NYC"}]
|
| 264 |
|
|
@@ -295,11 +311,13 @@ def _required_specialists(intent: str) -> list[str]:
|
|
| 295 |
return ["nws_alerts", "noaa_tides"]
|
| 296 |
if intent == "development_check":
|
| 297 |
return ["nta_resolve", "dob_permits", "sandy", "dep_stormwater"]
|
|
|
|
|
|
|
| 298 |
return []
|
| 299 |
|
| 300 |
|
| 301 |
def _default_specialists(intent: str) -> list[str]:
|
| 302 |
-
if intent
|
| 303 |
return ["geocode", "sandy", "dep_stormwater", "floodnet", "nyc311",
|
| 304 |
"noaa_tides", "nws_alerts", "nws_obs", "ttm_forecast",
|
| 305 |
"microtopo", "ida_hwm", "prithvi", "rag"]
|
|
|
|
| 65 |
"DOB construction permits inside it, cross-reference each project "
|
| 66 |
"with Sandy + DEP flood layers, return a flagged-projects list."
|
| 67 |
),
|
| 68 |
+
"compare": (
|
| 69 |
+
"Use ONLY when the query explicitly compares TWO specific street "
|
| 70 |
+
"ADDRESSES (e.g. 'compare 80 Pioneer St Brooklyn to 100 Gold St "
|
| 71 |
+
"Manhattan', 'which is riskier: X or Y?', 'X vs Y flood risk'). "
|
| 72 |
+
"Extract BOTH full street addresses into targets as two separate "
|
| 73 |
+
"{type: 'address', text: ...} objects. Run the full single-address "
|
| 74 |
+
"specialist suite for each."
|
| 75 |
+
),
|
| 76 |
}
|
| 77 |
|
| 78 |
SPECIALISTS = {
|
| 79 |
# name: (description, which intents may invoke it)
|
| 80 |
+
"geocode": ("Resolve address text to lat/lon via NYC DCP Geosearch.", ["single_address", "compare"]),
|
| 81 |
"nta_resolve": ("Resolve a neighborhood or borough name to NTA polygon(s).", ["neighborhood"]),
|
| 82 |
+
"sandy": ("2012 Sandy inundation extent (point-in-polygon or % of NTA).", ["single_address", "neighborhood", "compare"]),
|
| 83 |
+
"dep_stormwater":("DEP Stormwater Maps — 3 modeled scenarios.", ["single_address", "neighborhood", "compare"]),
|
| 84 |
+
"floodnet": ("Live FloodNet ultrasonic sensors + trigger history.", ["single_address", "neighborhood", "live_now", "compare"]),
|
| 85 |
+
"nyc311": ("NYC 311 flood-related complaints in buffer or polygon.", ["single_address", "neighborhood", "compare"]),
|
| 86 |
+
"noaa_tides": ("Live NOAA Battery / Kings Pt / Sandy Hook water level.", ["single_address", "neighborhood", "live_now", "compare"]),
|
| 87 |
+
"nws_alerts": ("Live NWS active flood-relevant alerts at point.", ["single_address", "neighborhood", "live_now", "compare"]),
|
| 88 |
+
"nws_obs": ("Live NWS hourly precip from nearest ASOS station.", ["single_address", "neighborhood", "live_now", "compare"]),
|
| 89 |
+
"ttm_forecast": ("Granite TTM r2 surge-residual nowcast at the Battery.", ["single_address", "neighborhood", "live_now", "compare"]),
|
| 90 |
+
"microtopo": ("LiDAR-derived terrain (HAND, TWI, percentile) at point or aggregated over polygon.", ["single_address", "neighborhood", "compare"]),
|
| 91 |
+
"ida_hwm": ("USGS Hurricane Ida 2021 high-water marks proximity.", ["single_address", "neighborhood", "compare"]),
|
| 92 |
+
"prithvi": ("Prithvi-EO 2.0 Hurricane Ida 2021 satellite flood polygons.", ["single_address", "neighborhood", "compare"]),
|
| 93 |
+
"rag": ("Retrieve relevant agency-report passages over the policy corpus.", ["single_address", "neighborhood", "development_check", "compare"]),
|
| 94 |
"dob_permits": ("Active NYC DOB construction permits inside a polygon, each cross-referenced with Sandy + DEP flood scenarios. Use for 'what are they building' / 'projects in progress' queries.", ["development_check"]),
|
| 95 |
}
|
| 96 |
|
|
|
|
| 125 |
- For intent=neighborhood: ALWAYS include "nta_resolve". Skip "geocode". Include polygon-capable specialists.
|
| 126 |
- For intent=live_now: ONLY live specialists. Skip historic/modeled (sandy, dep_*, ida_hwm, prithvi).
|
| 127 |
- For intent=development_check: ALWAYS include "nta_resolve" AND "dob_permits". Sandy + DEP are also useful so the model can compare project locations to flood layers.
|
| 128 |
+
- For intent=compare: ALWAYS include "geocode". Extract BOTH street addresses into targets — the executor runs the full specialist suite once per address. Targets must be exactly 2 items, both type="address".
|
| 129 |
- IMPORTANT — TARGETS: extract neighborhood/borough names directly from the query text. If the query says "in Gowanus", "what about Brighton Beach", "around Carroll Gardens", etc., the target MUST be {"type": "nta", "text": "<the place name>"}. Use {"type": "nyc"} ONLY when the query mentions NYC as a whole and no specific place. Failing to extract a place name will cause the executor to give up — be explicit.
|
| 130 |
- "targets" is a list because the user may name multiple places (e.g. "compare Brighton Beach and Coney Island").
|
| 131 |
- "rationale" is one short sentence — what your reasoning was.
|
|
|
|
| 268 |
targets = [{"type": "address", "text": raw_query}]
|
| 269 |
elif intent == "neighborhood":
|
| 270 |
targets = [{"type": "nta", "text": raw_query}]
|
| 271 |
+
elif intent == "compare":
|
| 272 |
+
# Planner failed to extract two addresses — treat whole query as
|
| 273 |
+
# single address so the caller gets at least one result rather
|
| 274 |
+
# than a confusing empty response.
|
| 275 |
+
log.warning("compare intent but no valid targets extracted; "
|
| 276 |
+
"falling back to single raw query")
|
| 277 |
+
targets = [{"type": "address", "text": raw_query}]
|
| 278 |
else:
|
| 279 |
targets = [{"type": "nyc", "text": "NYC"}]
|
| 280 |
|
|
|
|
| 311 |
return ["nws_alerts", "noaa_tides"]
|
| 312 |
if intent == "development_check":
|
| 313 |
return ["nta_resolve", "dob_permits", "sandy", "dep_stormwater"]
|
| 314 |
+
if intent == "compare":
|
| 315 |
+
return ["geocode", "sandy", "dep_stormwater", "microtopo"]
|
| 316 |
return []
|
| 317 |
|
| 318 |
|
| 319 |
def _default_specialists(intent: str) -> list[str]:
|
| 320 |
+
if intent in ("single_address", "compare"):
|
| 321 |
return ["geocode", "sandy", "dep_stormwater", "floodnet", "nyc311",
|
| 322 |
"noaa_tides", "nws_alerts", "nws_obs", "ttm_forecast",
|
| 323 |
"microtopo", "ida_hwm", "prithvi", "rag"]
|
|
@@ -497,6 +497,85 @@ async def stream(q: str, request: Request):
|
|
| 497 |
"X-Accel-Buffering": "no"})
|
| 498 |
|
| 499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
@app.get("/api/agent")
|
| 501 |
def api_agent(q: str):
|
| 502 |
"""Agentic endpoint: take a natural-language query, plan it via
|
|
@@ -523,7 +602,9 @@ def api_agent(q: str):
|
|
| 523 |
"requirements_total": 0},
|
| 524 |
"status": "not_implemented",
|
| 525 |
})
|
| 526 |
-
if p.intent == "
|
|
|
|
|
|
|
| 527 |
out = i_dev.run(p, q, strict=True)
|
| 528 |
elif p.intent == "neighborhood":
|
| 529 |
out = i_nbhd.run(p, q, strict=True)
|
|
@@ -568,6 +649,8 @@ async def api_agent_stream(q: str):
|
|
| 568 |
"requirements_total": 0},
|
| 569 |
"status": "not_implemented",
|
| 570 |
}
|
|
|
|
|
|
|
| 571 |
elif p.intent == "development_check":
|
| 572 |
final = i_dev.run(p, q, progress_q=out_q, strict=True)
|
| 573 |
elif p.intent == "neighborhood":
|
|
|
|
| 497 |
"X-Accel-Buffering": "no"})
|
| 498 |
|
| 499 |
|
| 500 |
+
def _run_compare(p, raw_query: str, out_q, i_addr) -> dict:
|
| 501 |
+
"""Run the compare intent: execute the full single_address specialist
|
| 502 |
+
suite sequentially for each target, then merge the two paragraphs into
|
| 503 |
+
one Markdown document clearly labelled PLACE A and PLACE B.
|
| 504 |
+
|
| 505 |
+
Sequential execution is required because the FSM uses thread-local hooks
|
| 506 |
+
(set_strict_mode, set_token_callback) — concurrent runs on the same
|
| 507 |
+
thread would corrupt the hooks. See app/intents/single_address.py.
|
| 508 |
+
|
| 509 |
+
Step events from each target are forwarded to out_q tagged with a
|
| 510 |
+
`target_label` key so the trace UI can optionally group them, but the
|
| 511 |
+
existing trace UI ignores unknown keys gracefully."""
|
| 512 |
+
from app.planner import Plan
|
| 513 |
+
|
| 514 |
+
addr_targets = [t for t in p.targets if t.get("type") == "address"]
|
| 515 |
+
if len(addr_targets) < 2:
|
| 516 |
+
# Fallback: only one (or zero) address extracted — run as single_address
|
| 517 |
+
return i_addr.run(p, raw_query, progress_q=out_q, strict=True)
|
| 518 |
+
|
| 519 |
+
results = []
|
| 520 |
+
for idx, target in enumerate(addr_targets[:2]):
|
| 521 |
+
label = "PLACE A" if idx == 0 else "PLACE B"
|
| 522 |
+
addr_text = target["text"]
|
| 523 |
+
# Synthetic single-address plan for this target
|
| 524 |
+
sub_plan = Plan(
|
| 525 |
+
intent="single_address",
|
| 526 |
+
targets=[{"type": "address", "text": addr_text}],
|
| 527 |
+
specialists=p.specialists,
|
| 528 |
+
rationale=p.rationale,
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
if out_q is not None:
|
| 532 |
+
# Wrap out_q to tag step events with the target label so the
|
| 533 |
+
# trace UI can optionally group them; token/mellea_attempt pass
|
| 534 |
+
# through untagged so the SvelteKit briefing buffer works.
|
| 535 |
+
_label = label
|
| 536 |
+
_q = out_q
|
| 537 |
+
class _TaggedQ:
|
| 538 |
+
def put(self, ev):
|
| 539 |
+
if ev.get("kind") == "step":
|
| 540 |
+
_q.put({**ev, "target_label": _label})
|
| 541 |
+
else:
|
| 542 |
+
_q.put(ev)
|
| 543 |
+
effective_q = _TaggedQ()
|
| 544 |
+
else:
|
| 545 |
+
effective_q = None
|
| 546 |
+
|
| 547 |
+
result = i_addr.run(sub_plan, addr_text, progress_q=effective_q, strict=True)
|
| 548 |
+
results.append((label, addr_text, result))
|
| 549 |
+
|
| 550 |
+
# Merge: produce one paragraph with both place sections.
|
| 551 |
+
parts = []
|
| 552 |
+
for label, addr_text, res in results:
|
| 553 |
+
para = (res.get("paragraph") or "").strip()
|
| 554 |
+
parts.append(f"## {label}: {addr_text}\n\n{para}")
|
| 555 |
+
merged_paragraph = "\n\n---\n\n".join(parts)
|
| 556 |
+
|
| 557 |
+
# Combine Mellea metadata: sum attempts, union passed/failed.
|
| 558 |
+
def _merge_mellea(a, b):
|
| 559 |
+
def _lst(m, k): return m.get(k) or []
|
| 560 |
+
return {
|
| 561 |
+
"rerolls": (a.get("rerolls") or 0) + (b.get("rerolls") or 0),
|
| 562 |
+
"n_attempts": (a.get("n_attempts") or 0) + (b.get("n_attempts") or 0),
|
| 563 |
+
"requirements_passed": list(set(_lst(a, "requirements_passed") + _lst(b, "requirements_passed"))),
|
| 564 |
+
"requirements_failed": list(set(_lst(a, "requirements_failed") + _lst(b, "requirements_failed"))),
|
| 565 |
+
"requirements_total": max(a.get("requirements_total") or 0, b.get("requirements_total") or 0),
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
mellea_a = results[0][2].get("mellea") or {}
|
| 569 |
+
mellea_b = results[1][2].get("mellea") or {}
|
| 570 |
+
return {
|
| 571 |
+
"paragraph": merged_paragraph,
|
| 572 |
+
"mellea": _merge_mellea(mellea_a, mellea_b),
|
| 573 |
+
"intent": "compare",
|
| 574 |
+
"targets": [{"label": lbl, "address": addr} for lbl, addr, _ in results],
|
| 575 |
+
"tier": results[0][2].get("tier"),
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
|
| 579 |
@app.get("/api/agent")
|
| 580 |
def api_agent(q: str):
|
| 581 |
"""Agentic endpoint: take a natural-language query, plan it via
|
|
|
|
| 602 |
"requirements_total": 0},
|
| 603 |
"status": "not_implemented",
|
| 604 |
})
|
| 605 |
+
if p.intent == "compare":
|
| 606 |
+
out = _run_compare(p, q, None, i_addr)
|
| 607 |
+
elif p.intent == "development_check":
|
| 608 |
out = i_dev.run(p, q, strict=True)
|
| 609 |
elif p.intent == "neighborhood":
|
| 610 |
out = i_nbhd.run(p, q, strict=True)
|
|
|
|
| 649 |
"requirements_total": 0},
|
| 650 |
"status": "not_implemented",
|
| 651 |
}
|
| 652 |
+
elif p.intent == "compare":
|
| 653 |
+
final = _run_compare(p, q, out_q, i_addr)
|
| 654 |
elif p.intent == "development_check":
|
| 655 |
final = i_dev.run(p, q, progress_q=out_q, strict=True)
|
| 656 |
elif p.intent == "neighborhood":
|