seriffic Claude Sonnet 4.6 commited on
Commit
f7bf63f
·
1 Parent(s): b9a10ad

fix(compare): wire compare intent into SSE handler

Browse files

The compare intent was classified by the planner but silently fell
through to single_address routing in api_agent_stream because (a)
"compare" was not in INTENTS so _validate() defaulted to
single_address, and (b) there was no routing case in the SSE handler.

Changes:
- app/planner.py: add "compare" to INTENTS, SPECIALISTS (compare is
applicable to all single_address specialists), PLAN_SCHEMA_DESC
hard-rules, _required_specialists, _default_specialists, and
_validate fallback. Planner now emits intent=compare with two
address targets for "compare X vs Y" queries.
- web/main.py: add _run_compare() helper that runs the full
single_address specialist suite sequentially for each target via
i_addr.run() and merges the two paragraphs into one Markdown
document (## PLACE A / ## PLACE B sections). Add compare routing
case to both api_agent_stream and api_agent endpoints.

Verified: "Compare 80 Pioneer Street, Brooklyn to 100 Gold Street,
Manhattan" produces a two-target briefing (2820 chars) with both
addresses cited and Mellea grounding passing. 38 step events (19
per address), no errors.

Map limitation: map re-centers to the last-geocoded address (PLACE B).
Both places appear in the briefing text. Dual-marker map requires
a RipMap prop change (deferred).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. app/planner.py +32 -14
  2. web/main.py +84 -1
app/planner.py CHANGED
@@ -65,24 +65,32 @@ INTENTS = {
65
  "DOB construction permits inside it, cross-reference each project "
66
  "with Sandy + DEP flood layers, return a flagged-projects list."
67
  ),
 
 
 
 
 
 
 
 
68
  }
69
 
70
  SPECIALISTS = {
71
  # name: (description, which intents may invoke it)
72
- "geocode": ("Resolve address text to lat/lon via NYC DCP Geosearch.", ["single_address"]),
73
  "nta_resolve": ("Resolve a neighborhood or borough name to NTA polygon(s).", ["neighborhood"]),
74
- "sandy": ("2012 Sandy inundation extent (point-in-polygon or % of NTA).", ["single_address", "neighborhood"]),
75
- "dep_stormwater":("DEP Stormwater Maps — 3 modeled scenarios.", ["single_address", "neighborhood"]),
76
- "floodnet": ("Live FloodNet ultrasonic sensors + trigger history.", ["single_address", "neighborhood", "live_now"]),
77
- "nyc311": ("NYC 311 flood-related complaints in buffer or polygon.", ["single_address", "neighborhood"]),
78
- "noaa_tides": ("Live NOAA Battery / Kings Pt / Sandy Hook water level.", ["single_address", "neighborhood", "live_now"]),
79
- "nws_alerts": ("Live NWS active flood-relevant alerts at point.", ["single_address", "neighborhood", "live_now"]),
80
- "nws_obs": ("Live NWS hourly precip from nearest ASOS station.", ["single_address", "neighborhood", "live_now"]),
81
- "ttm_forecast": ("Granite TTM r2 surge-residual nowcast at the Battery.", ["single_address", "neighborhood", "live_now"]),
82
- "microtopo": ("LiDAR-derived terrain (HAND, TWI, percentile) at point or aggregated over polygon.", ["single_address", "neighborhood"]),
83
- "ida_hwm": ("USGS Hurricane Ida 2021 high-water marks proximity.", ["single_address", "neighborhood"]),
84
- "prithvi": ("Prithvi-EO 2.0 Hurricane Ida 2021 satellite flood polygons.", ["single_address", "neighborhood"]),
85
- "rag": ("Retrieve relevant agency-report passages over the policy corpus.", ["single_address", "neighborhood", "development_check"]),
86
  "dob_permits": ("Active NYC DOB construction permits inside a polygon, each cross-referenced with Sandy + DEP flood scenarios. Use for 'what are they building' / 'projects in progress' queries.", ["development_check"]),
87
  }
88
 
@@ -117,6 +125,7 @@ Hard rules:
117
  - For intent=neighborhood: ALWAYS include "nta_resolve". Skip "geocode". Include polygon-capable specialists.
118
  - For intent=live_now: ONLY live specialists. Skip historic/modeled (sandy, dep_*, ida_hwm, prithvi).
119
  - For intent=development_check: ALWAYS include "nta_resolve" AND "dob_permits". Sandy + DEP are also useful so the model can compare project locations to flood layers.
 
120
  - IMPORTANT — TARGETS: extract neighborhood/borough names directly from the query text. If the query says "in Gowanus", "what about Brighton Beach", "around Carroll Gardens", etc., the target MUST be {"type": "nta", "text": "<the place name>"}. Use {"type": "nyc"} ONLY when the query mentions NYC as a whole and no specific place. Failing to extract a place name will cause the executor to give up — be explicit.
121
  - "targets" is a list because the user may name multiple places (e.g. "compare Brighton Beach and Coney Island").
122
  - "rationale" is one short sentence — what your reasoning was.
@@ -259,6 +268,13 @@ def _validate(d: dict[str, Any], raw_query: str) -> Plan:
259
  targets = [{"type": "address", "text": raw_query}]
260
  elif intent == "neighborhood":
261
  targets = [{"type": "nta", "text": raw_query}]
 
 
 
 
 
 
 
262
  else:
263
  targets = [{"type": "nyc", "text": "NYC"}]
264
 
@@ -295,11 +311,13 @@ def _required_specialists(intent: str) -> list[str]:
295
  return ["nws_alerts", "noaa_tides"]
296
  if intent == "development_check":
297
  return ["nta_resolve", "dob_permits", "sandy", "dep_stormwater"]
 
 
298
  return []
299
 
300
 
301
  def _default_specialists(intent: str) -> list[str]:
302
- if intent == "single_address":
303
  return ["geocode", "sandy", "dep_stormwater", "floodnet", "nyc311",
304
  "noaa_tides", "nws_alerts", "nws_obs", "ttm_forecast",
305
  "microtopo", "ida_hwm", "prithvi", "rag"]
 
65
  "DOB construction permits inside it, cross-reference each project "
66
  "with Sandy + DEP flood layers, return a flagged-projects list."
67
  ),
68
+ "compare": (
69
+ "Use ONLY when the query explicitly compares TWO specific street "
70
+ "ADDRESSES (e.g. 'compare 80 Pioneer St Brooklyn to 100 Gold St "
71
+ "Manhattan', 'which is riskier: X or Y?', 'X vs Y flood risk'). "
72
+ "Extract BOTH full street addresses into targets as two separate "
73
+ "{type: 'address', text: ...} objects. Run the full single-address "
74
+ "specialist suite for each."
75
+ ),
76
  }
77
 
78
  SPECIALISTS = {
79
  # name: (description, which intents may invoke it)
80
+ "geocode": ("Resolve address text to lat/lon via NYC DCP Geosearch.", ["single_address", "compare"]),
81
  "nta_resolve": ("Resolve a neighborhood or borough name to NTA polygon(s).", ["neighborhood"]),
82
+ "sandy": ("2012 Sandy inundation extent (point-in-polygon or % of NTA).", ["single_address", "neighborhood", "compare"]),
83
+ "dep_stormwater":("DEP Stormwater Maps — 3 modeled scenarios.", ["single_address", "neighborhood", "compare"]),
84
+ "floodnet": ("Live FloodNet ultrasonic sensors + trigger history.", ["single_address", "neighborhood", "live_now", "compare"]),
85
+ "nyc311": ("NYC 311 flood-related complaints in buffer or polygon.", ["single_address", "neighborhood", "compare"]),
86
+ "noaa_tides": ("Live NOAA Battery / Kings Pt / Sandy Hook water level.", ["single_address", "neighborhood", "live_now", "compare"]),
87
+ "nws_alerts": ("Live NWS active flood-relevant alerts at point.", ["single_address", "neighborhood", "live_now", "compare"]),
88
+ "nws_obs": ("Live NWS hourly precip from nearest ASOS station.", ["single_address", "neighborhood", "live_now", "compare"]),
89
+ "ttm_forecast": ("Granite TTM r2 surge-residual nowcast at the Battery.", ["single_address", "neighborhood", "live_now", "compare"]),
90
+ "microtopo": ("LiDAR-derived terrain (HAND, TWI, percentile) at point or aggregated over polygon.", ["single_address", "neighborhood", "compare"]),
91
+ "ida_hwm": ("USGS Hurricane Ida 2021 high-water marks proximity.", ["single_address", "neighborhood", "compare"]),
92
+ "prithvi": ("Prithvi-EO 2.0 Hurricane Ida 2021 satellite flood polygons.", ["single_address", "neighborhood", "compare"]),
93
+ "rag": ("Retrieve relevant agency-report passages over the policy corpus.", ["single_address", "neighborhood", "development_check", "compare"]),
94
  "dob_permits": ("Active NYC DOB construction permits inside a polygon, each cross-referenced with Sandy + DEP flood scenarios. Use for 'what are they building' / 'projects in progress' queries.", ["development_check"]),
95
  }
96
 
 
125
  - For intent=neighborhood: ALWAYS include "nta_resolve". Skip "geocode". Include polygon-capable specialists.
126
  - For intent=live_now: ONLY live specialists. Skip historic/modeled (sandy, dep_*, ida_hwm, prithvi).
127
  - For intent=development_check: ALWAYS include "nta_resolve" AND "dob_permits". Sandy + DEP are also useful so the model can compare project locations to flood layers.
128
+ - For intent=compare: ALWAYS include "geocode". Extract BOTH street addresses into targets — the executor runs the full specialist suite once per address. Targets must be exactly 2 items, both type="address".
129
  - IMPORTANT — TARGETS: extract neighborhood/borough names directly from the query text. If the query says "in Gowanus", "what about Brighton Beach", "around Carroll Gardens", etc., the target MUST be {"type": "nta", "text": "<the place name>"}. Use {"type": "nyc"} ONLY when the query mentions NYC as a whole and no specific place. Failing to extract a place name will cause the executor to give up — be explicit.
130
  - "targets" is a list because the user may name multiple places (e.g. "compare Brighton Beach and Coney Island").
131
  - "rationale" is one short sentence — what your reasoning was.
 
268
  targets = [{"type": "address", "text": raw_query}]
269
  elif intent == "neighborhood":
270
  targets = [{"type": "nta", "text": raw_query}]
271
+ elif intent == "compare":
272
+ # Planner failed to extract two addresses — treat whole query as
273
+ # single address so the caller gets at least one result rather
274
+ # than a confusing empty response.
275
+ log.warning("compare intent but no valid targets extracted; "
276
+ "falling back to single raw query")
277
+ targets = [{"type": "address", "text": raw_query}]
278
  else:
279
  targets = [{"type": "nyc", "text": "NYC"}]
280
 
 
311
  return ["nws_alerts", "noaa_tides"]
312
  if intent == "development_check":
313
  return ["nta_resolve", "dob_permits", "sandy", "dep_stormwater"]
314
+ if intent == "compare":
315
+ return ["geocode", "sandy", "dep_stormwater", "microtopo"]
316
  return []
317
 
318
 
319
  def _default_specialists(intent: str) -> list[str]:
320
+ if intent in ("single_address", "compare"):
321
  return ["geocode", "sandy", "dep_stormwater", "floodnet", "nyc311",
322
  "noaa_tides", "nws_alerts", "nws_obs", "ttm_forecast",
323
  "microtopo", "ida_hwm", "prithvi", "rag"]
web/main.py CHANGED
@@ -497,6 +497,85 @@ async def stream(q: str, request: Request):
497
  "X-Accel-Buffering": "no"})
498
 
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  @app.get("/api/agent")
501
  def api_agent(q: str):
502
  """Agentic endpoint: take a natural-language query, plan it via
@@ -523,7 +602,9 @@ def api_agent(q: str):
523
  "requirements_total": 0},
524
  "status": "not_implemented",
525
  })
526
- if p.intent == "development_check":
 
 
527
  out = i_dev.run(p, q, strict=True)
528
  elif p.intent == "neighborhood":
529
  out = i_nbhd.run(p, q, strict=True)
@@ -568,6 +649,8 @@ async def api_agent_stream(q: str):
568
  "requirements_total": 0},
569
  "status": "not_implemented",
570
  }
 
 
571
  elif p.intent == "development_check":
572
  final = i_dev.run(p, q, progress_q=out_q, strict=True)
573
  elif p.intent == "neighborhood":
 
497
  "X-Accel-Buffering": "no"})
498
 
499
 
500
+ def _run_compare(p, raw_query: str, out_q, i_addr) -> dict:
501
+ """Run the compare intent: execute the full single_address specialist
502
+ suite sequentially for each target, then merge the two paragraphs into
503
+ one Markdown document clearly labelled PLACE A and PLACE B.
504
+
505
+ Sequential execution is required because the FSM uses thread-local hooks
506
+ (set_strict_mode, set_token_callback) — concurrent runs on the same
507
+ thread would corrupt the hooks. See app/intents/single_address.py.
508
+
509
+ Step events from each target are forwarded to out_q tagged with a
510
+ `target_label` key so the trace UI can optionally group them, but the
511
+ existing trace UI ignores unknown keys gracefully."""
512
+ from app.planner import Plan
513
+
514
+ addr_targets = [t for t in p.targets if t.get("type") == "address"]
515
+ if len(addr_targets) < 2:
516
+ # Fallback: only one (or zero) address extracted — run as single_address
517
+ return i_addr.run(p, raw_query, progress_q=out_q, strict=True)
518
+
519
+ results = []
520
+ for idx, target in enumerate(addr_targets[:2]):
521
+ label = "PLACE A" if idx == 0 else "PLACE B"
522
+ addr_text = target["text"]
523
+ # Synthetic single-address plan for this target
524
+ sub_plan = Plan(
525
+ intent="single_address",
526
+ targets=[{"type": "address", "text": addr_text}],
527
+ specialists=p.specialists,
528
+ rationale=p.rationale,
529
+ )
530
+
531
+ if out_q is not None:
532
+ # Wrap out_q to tag step events with the target label so the
533
+ # trace UI can optionally group them; token/mellea_attempt pass
534
+ # through untagged so the SvelteKit briefing buffer works.
535
+ _label = label
536
+ _q = out_q
537
+ class _TaggedQ:
538
+ def put(self, ev):
539
+ if ev.get("kind") == "step":
540
+ _q.put({**ev, "target_label": _label})
541
+ else:
542
+ _q.put(ev)
543
+ effective_q = _TaggedQ()
544
+ else:
545
+ effective_q = None
546
+
547
+ result = i_addr.run(sub_plan, addr_text, progress_q=effective_q, strict=True)
548
+ results.append((label, addr_text, result))
549
+
550
+ # Merge: produce one paragraph with both place sections.
551
+ parts = []
552
+ for label, addr_text, res in results:
553
+ para = (res.get("paragraph") or "").strip()
554
+ parts.append(f"## {label}: {addr_text}\n\n{para}")
555
+ merged_paragraph = "\n\n---\n\n".join(parts)
556
+
557
+ # Combine Mellea metadata: sum attempts, union passed/failed.
558
+ def _merge_mellea(a, b):
559
+ def _lst(m, k): return m.get(k) or []
560
+ return {
561
+ "rerolls": (a.get("rerolls") or 0) + (b.get("rerolls") or 0),
562
+ "n_attempts": (a.get("n_attempts") or 0) + (b.get("n_attempts") or 0),
563
+ "requirements_passed": list(set(_lst(a, "requirements_passed") + _lst(b, "requirements_passed"))),
564
+ "requirements_failed": list(set(_lst(a, "requirements_failed") + _lst(b, "requirements_failed"))),
565
+ "requirements_total": max(a.get("requirements_total") or 0, b.get("requirements_total") or 0),
566
+ }
567
+
568
+ mellea_a = results[0][2].get("mellea") or {}
569
+ mellea_b = results[1][2].get("mellea") or {}
570
+ return {
571
+ "paragraph": merged_paragraph,
572
+ "mellea": _merge_mellea(mellea_a, mellea_b),
573
+ "intent": "compare",
574
+ "targets": [{"label": lbl, "address": addr} for lbl, addr, _ in results],
575
+ "tier": results[0][2].get("tier"),
576
+ }
577
+
578
+
579
  @app.get("/api/agent")
580
  def api_agent(q: str):
581
  """Agentic endpoint: take a natural-language query, plan it via
 
602
  "requirements_total": 0},
603
  "status": "not_implemented",
604
  })
605
+ if p.intent == "compare":
606
+ out = _run_compare(p, q, None, i_addr)
607
+ elif p.intent == "development_check":
608
  out = i_dev.run(p, q, strict=True)
609
  elif p.intent == "neighborhood":
610
  out = i_nbhd.run(p, q, strict=True)
 
649
  "requirements_total": 0},
650
  "status": "not_implemented",
651
  }
652
+ elif p.intent == "compare":
653
+ final = _run_compare(p, q, out_q, i_addr)
654
  elif p.intent == "development_check":
655
  final = i_dev.run(p, q, progress_q=out_q, strict=True)
656
  elif p.intent == "neighborhood":