File size: 11,528 Bytes
b9a10ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | """Question-aware framing for the Capstone briefing opening.
The four-section structure (Status / Empirical / Modeled / Policy) is
load-bearing for the Mellea grounding checks and stays unchanged. What
this module does is detect the *shape* of the user's question from the
raw query string + planner intent, then return a single-sentence
directive that conditions only the opening Status sentence.
Eleven question types are recognised; they mirror the rubric in
`tests/integration/stakeholder_queries.py:FRAMING_RUBRICS`. Detection
is deterministic regex matching — no extra LLM call, no added latency.
Usage:
from app.framing import augment_system_prompt
system_prompt = augment_system_prompt(
EXTRA_SYSTEM_PROMPT, query=user_query, intent=plan.intent,
)
The returned prompt has the original text plus a trailing
`QUESTION-AWARE OPENING:` block. Granite 4.1 attends to this through
the system-prompt cache and applies it to the Status sentence.
"""
from __future__ import annotations
import re
from typing import Final
QUESTION_TYPES: Final[tuple[str, ...]] = (
"habitability_decision",
"legal_disclosure",
"capital_planning",
"underwriting",
"journalism",
"development_siting",
"grant_evidence",
"retrospective",
"emergency_response",
"comparison",
"generic_exposure",
)
# ---- Per-type opening directives ------------------------------------------
#
# Each directive is one sentence that supplements (does not replace) the
# Status section's existing instruction. Granite 4.1 has a strong prior
# toward "this address is exposed to ..." openings; the directive
# overrides that in a question-shaped way without disturbing the four
# grounding invariants.
_DIRECTIVES: dict[str, str] = {
"habitability_decision": (
"The Status sentence MUST start with a direct verdict word "
"(\"Yes\" if the documents show meaningful flood evidence, \"No\" "
"if they don't), then name the single strongest piece of "
"evidence with its [doc_id]. The user is deciding whether to "
"live here — answer the question, then cite."
),
"legal_disclosure": (
"The Status sentence MUST state whether the documents contain "
"facts a NY RPL §462(2) or §231-b disclosure would need to "
"record. Begin with \"Disclosure is warranted\" or \"Disclosure "
"is not triggered\" based on the evidence, then name the "
"specific fact with its [doc_id]. The user is a real-estate "
"professional checking the disclosure threshold."
),
"capital_planning": (
"The Status sentence MUST frame the place as a capital-planning "
"candidate: name the dominant exposure with its [doc_id] and "
"indicate whether the evidence supports prioritization "
"(\"merits prioritization\", \"ranks high for hardening\") or "
"not. The user allocates infrastructure investment."
),
"underwriting": (
"The Status sentence MUST emphasize that every figure in the "
"briefing is independently sourced — open with the dominant "
"exposure and the specific [doc_id], then add a half-clause "
"noting that the audit chain follows below. The user is an "
"underwriter who needs a defensible loss narrative."
),
"journalism": (
"The Status sentence MUST be reproducible reporting prose: "
"name the place, name the dominant exposure with [doc_id], "
"and avoid editorial verbs like \"shocking\" or \"alarming\". "
"The user is a data journalist who will cite this prose verbatim."
),
"development_siting": (
"The Status sentence MUST start with the count of active "
"construction filings cited from [dob_permits] (e.g. \"N "
"active construction filings sit inside ...\") and indicate "
"which flood layer they intersect. The user is a developer or "
"architect doing a pre-design siting check."
),
"grant_evidence": (
"The Status sentence MUST open with \"Vulnerability "
"assessment:\" and name the place + dominant exposure with "
"[doc_id]. Treat the briefing as the evidence section of a "
"HUD CDBG-DR or FEMA BRIC application — formal, third-person, "
"free of advocacy framing."
),
"retrospective": (
"Riprap currently runs on present-day data sources. The Status "
"sentence MUST acknowledge the question is retrospective and "
"state explicitly that the briefing reflects the CURRENT state "
"of these data sources, not a snapshot from the requested date. "
"Then proceed with the present-day exposure picture so the user "
"still gets the geography. Silence-over-confabulation: never "
"reconstruct historical conditions you can't verify."
),
"emergency_response": (
"The Status sentence MUST quantify what is at risk in the "
"next few hours, citing the live signal that triggered the "
"query and any active alerts with [doc_id]. The user needs an "
"operational picture, not a historical exposure summary."
),
"comparison": (
"The Status sentence MUST name BOTH places the user is "
"comparing and indicate which one shows greater exposure on "
"the strongest cited signal. If only one place's data is "
"available in the documents, say so explicitly. The user is "
"doing a head-to-head decision."
),
"generic_exposure": "", # default — no override
}
# ---- Detector -------------------------------------------------------------
#
# Patterns are ordered: the FIRST type whose pattern matches wins. Order
# matters — more specific question shapes (legal_disclosure, grant_evidence,
# emergency_response) come before more general ones (habitability_decision,
# capital_planning) so the obvious specialist tags don't get swallowed.
_PATTERNS: list[tuple[str, list[re.Pattern]]] = [
("retrospective", [
re.compile(r"\b(would have|would Riprap|on (the )?date of|as of (the )?(date|day)|"
r"day before|prior to|before (Hurricane|Ida|Sandy|the storm)|"
r"on (August|September|October|November|December|January|February|March|"
r"April|May|June|July) \d{1,2},? ?\d{4}|"
r"time.?machine|retrospective|court (exhibit|testimony))\b", re.I),
]),
("emergency_response", [
re.compile(r"\b(just triggered|right now|next (few |six |\d+ )?hours?|"
r"in the next \d+|currently flooding|flood (warning|watch) is active|"
r"sensor [A-Z]{2}-?\d+|live (alert|trigger))\b", re.I),
]),
("legal_disclosure", [
re.compile(r"\b(disclos(e|ure|ed)|RPL\s*§?\s*\d+|Property Condition Disclosure|"
r"§\s*462|§\s*231-?b|seller'?s? disclosure|landlord'?s? disclosure|"
r"required to disclose|need to disclose)\b", re.I),
]),
("grant_evidence", [
re.compile(r"\b(vulnerability assessment|CDBG-?DR|HUD|BRIC|"
r"grant application|funding application|community resilience grant|"
r"FEMA application|disaster recovery (application|funding))\b", re.I),
]),
("development_siting", [
re.compile(r"\b(what (are|is) (they|being) build(ing)?|new construction|"
r"under construction|active (construction|filing|project|permit)|"
r"projects? (in progress|underway|planned)|architects?|"
r"siting check|pre.?design|"
r"DOB filing|developer)\b", re.I),
]),
("comparison", [
# `prioritize X over Y` can have many words between, hence the
# bounded non-greedy span — capped at 80 chars to avoid runaway.
re.compile(r"\b(compare\b|comparison|\bvs\b|\bversus\b|"
r"head-?to-?head|\brank\s+the\s+top)\b", re.I),
re.compile(r"\bprioritize\b.{1,80}\bover\b", re.I | re.S),
re.compile(r"\bover\s+\w+(?:\s+\w+){0,3}\s+for\s+(hardening|investment)\b", re.I),
]),
("capital_planning", [
re.compile(r"\b(prioritiz(e|ation)|capital plan(ning)?|harden(ing|s)?|"
r"infrastructure investment|where (should|to) (we |the )(invest|"
r"prioritize|harden)|MTA.+prioritize|DEP.+prioritize|"
r"protection envelope|outside (it|the protection)|"
r"resilien(ce|cy) project)\b", re.I),
]),
("habitability_decision", [
re.compile(r"\b(should I worry|should I (be|consider)|is (it|this) safe|"
r"can I (rent|live|move|raise (my )?kids?)|considering (renting|leasing|moving)|"
r"(thinking about|planning to) (rent|lease|move|buy)|"
r"is (this|that|the landlord) true|landlord (says|claims|told)|"
r"no flood history|just got a lease|new lease|signing a lease|"
r"\bworry\b)", re.I),
]),
("underwriting", [
re.compile(r"\b(underwrit(e|er|ing|able)|actuarial|loss history|"
r"insurabl[ey]|catastrophe (model|risk)|"
r"insurance (audit|memo|profile)|"
r"audit (chain|trail))\b", re.I),
]),
("journalism", [
re.compile(r"\b(reporter|journalist|newsroom|story|coverage|"
r"published?|publish (this|the))", re.I),
]),
]
def detect(query: str, intent: str | None = None) -> str:
"""Classify the question shape from the raw query and planner intent.
Returns one of `QUESTION_TYPES`. Falls back to `generic_exposure`
when no pattern matches — that's the existing behavior, preserved.
`intent` is currently advisory only (the patterns don't read it),
but the parameter is part of the API so future refinements can
use it (e.g. an `intent=neighborhood` query without a verdict
keyword could default to `journalism` rather than `generic_exposure`).
"""
if not query:
return "generic_exposure"
q = query.strip()
for qt, patterns in _PATTERNS:
if any(p.search(q) for p in patterns):
return qt
# Heuristic fallback: bare neighborhood/borough names from a planner
# context default to journalism (most common stakeholder reading a
# neighborhood-only query is a reporter or planner). For
# single_address with no question keyword, fall back to generic.
if intent == "neighborhood" and len(q.split()) <= 3:
return "journalism"
return "generic_exposure"
def opening_instruction(question_type: str) -> str:
"""Return the directive sentence(s) for a question type.
Returns empty string for `generic_exposure` (no override)."""
return _DIRECTIVES.get(question_type, "")
def augment_system_prompt(base: str, *, query: str,
intent: str | None = None) -> str:
"""Wrap a base system prompt with a question-aware opening directive.
No-op when the detector returns `generic_exposure` — the original
behavior is preserved.
"""
qt = detect(query, intent)
directive = opening_instruction(qt)
if not directive:
return base
return (
f"{base}\n\n"
f"QUESTION-AWARE OPENING (this directive overrides ONLY the opening "
f"**Status.** sentence; the four-section structure and citation "
f"discipline above remain in force):\n{directive}"
)
|