"""Question-aware framing for the Capstone briefing opening. The four-section structure (Status / Empirical / Modeled / Policy) is load-bearing for the Mellea grounding checks and stays unchanged. What this module does is detect the *shape* of the user's question from the raw query string + planner intent, then return a single-sentence directive that conditions only the opening Status sentence. Eleven question types are recognised; they mirror the rubric in `tests/integration/stakeholder_queries.py:FRAMING_RUBRICS`. Detection is deterministic regex matching — no extra LLM call, no added latency. Usage: from app.framing import augment_system_prompt system_prompt = augment_system_prompt( EXTRA_SYSTEM_PROMPT, query=user_query, intent=plan.intent, ) The returned prompt has the original text plus a trailing `QUESTION-AWARE OPENING:` block. Granite 4.1 attends to this through the system-prompt cache and applies it to the Status sentence. """ from __future__ import annotations import re from typing import Final QUESTION_TYPES: Final[tuple[str, ...]] = ( "habitability_decision", "legal_disclosure", "capital_planning", "underwriting", "journalism", "development_siting", "grant_evidence", "retrospective", "emergency_response", "comparison", "generic_exposure", ) # ---- Per-type opening directives ------------------------------------------ # # Each directive is one sentence that supplements (does not replace) the # Status section's existing instruction. Granite 4.1 has a strong prior # toward "this address is exposed to ..." openings; the directive # overrides that in a question-shaped way without disturbing the four # grounding invariants. _DIRECTIVES: dict[str, str] = { "habitability_decision": ( "The Status sentence MUST start with a direct verdict word " "(\"Yes\" if the documents show meaningful flood evidence, \"No\" " "if they don't), then name the single strongest piece of " "evidence with its [doc_id]. The user is deciding whether to " "live here — answer the question, then cite." ), "legal_disclosure": ( "The Status sentence MUST state whether the documents contain " "facts a NY RPL §462(2) or §231-b disclosure would need to " "record. Begin with \"Disclosure is warranted\" or \"Disclosure " "is not triggered\" based on the evidence, then name the " "specific fact with its [doc_id]. The user is a real-estate " "professional checking the disclosure threshold." ), "capital_planning": ( "The Status sentence MUST frame the place as a capital-planning " "candidate: name the dominant exposure with its [doc_id] and " "indicate whether the evidence supports prioritization " "(\"merits prioritization\", \"ranks high for hardening\") or " "not. The user allocates infrastructure investment." ), "underwriting": ( "The Status sentence MUST emphasize that every figure in the " "briefing is independently sourced — open with the dominant " "exposure and the specific [doc_id], then add a half-clause " "noting that the audit chain follows below. The user is an " "underwriter who needs a defensible loss narrative." ), "journalism": ( "The Status sentence MUST be reproducible reporting prose: " "name the place, name the dominant exposure with [doc_id], " "and avoid editorial verbs like \"shocking\" or \"alarming\". " "The user is a data journalist who will cite this prose verbatim." ), "development_siting": ( "The Status sentence MUST start with the count of active " "construction filings cited from [dob_permits] (e.g. \"N " "active construction filings sit inside ...\") and indicate " "which flood layer they intersect. The user is a developer or " "architect doing a pre-design siting check." ), "grant_evidence": ( "The Status sentence MUST open with \"Vulnerability " "assessment:\" and name the place + dominant exposure with " "[doc_id]. Treat the briefing as the evidence section of a " "HUD CDBG-DR or FEMA BRIC application — formal, third-person, " "free of advocacy framing." ), "retrospective": ( "Riprap currently runs on present-day data sources. The Status " "sentence MUST acknowledge the question is retrospective and " "state explicitly that the briefing reflects the CURRENT state " "of these data sources, not a snapshot from the requested date. " "Then proceed with the present-day exposure picture so the user " "still gets the geography. Silence-over-confabulation: never " "reconstruct historical conditions you can't verify." ), "emergency_response": ( "The Status sentence MUST quantify what is at risk in the " "next few hours, citing the live signal that triggered the " "query and any active alerts with [doc_id]. The user needs an " "operational picture, not a historical exposure summary." ), "comparison": ( "The Status sentence MUST name BOTH places the user is " "comparing and indicate which one shows greater exposure on " "the strongest cited signal. If only one place's data is " "available in the documents, say so explicitly. The user is " "doing a head-to-head decision." ), "generic_exposure": "", # default — no override } # ---- Detector ------------------------------------------------------------- # # Patterns are ordered: the FIRST type whose pattern matches wins. Order # matters — more specific question shapes (legal_disclosure, grant_evidence, # emergency_response) come before more general ones (habitability_decision, # capital_planning) so the obvious specialist tags don't get swallowed. _PATTERNS: list[tuple[str, list[re.Pattern]]] = [ ("retrospective", [ re.compile(r"\b(would have|would Riprap|on (the )?date of|as of (the )?(date|day)|" r"day before|prior to|before (Hurricane|Ida|Sandy|the storm)|" r"on (August|September|October|November|December|January|February|March|" r"April|May|June|July) \d{1,2},? ?\d{4}|" r"time.?machine|retrospective|court (exhibit|testimony))\b", re.I), ]), ("emergency_response", [ re.compile(r"\b(just triggered|right now|next (few |six |\d+ )?hours?|" r"in the next \d+|currently flooding|flood (warning|watch) is active|" r"sensor [A-Z]{2}-?\d+|live (alert|trigger))\b", re.I), ]), ("legal_disclosure", [ re.compile(r"\b(disclos(e|ure|ed)|RPL\s*§?\s*\d+|Property Condition Disclosure|" r"§\s*462|§\s*231-?b|seller'?s? disclosure|landlord'?s? disclosure|" r"required to disclose|need to disclose)\b", re.I), ]), ("grant_evidence", [ re.compile(r"\b(vulnerability assessment|CDBG-?DR|HUD|BRIC|" r"grant application|funding application|community resilience grant|" r"FEMA application|disaster recovery (application|funding))\b", re.I), ]), ("development_siting", [ re.compile(r"\b(what (are|is) (they|being) build(ing)?|new construction|" r"under construction|active (construction|filing|project|permit)|" r"projects? (in progress|underway|planned)|architects?|" r"siting check|pre.?design|" r"DOB filing|developer)\b", re.I), ]), ("comparison", [ # `prioritize X over Y` can have many words between, hence the # bounded non-greedy span — capped at 80 chars to avoid runaway. re.compile(r"\b(compare\b|comparison|\bvs\b|\bversus\b|" r"head-?to-?head|\brank\s+the\s+top)\b", re.I), re.compile(r"\bprioritize\b.{1,80}\bover\b", re.I | re.S), re.compile(r"\bover\s+\w+(?:\s+\w+){0,3}\s+for\s+(hardening|investment)\b", re.I), ]), ("capital_planning", [ re.compile(r"\b(prioritiz(e|ation)|capital plan(ning)?|harden(ing|s)?|" r"infrastructure investment|where (should|to) (we |the )(invest|" r"prioritize|harden)|MTA.+prioritize|DEP.+prioritize|" r"protection envelope|outside (it|the protection)|" r"resilien(ce|cy) project)\b", re.I), ]), ("habitability_decision", [ re.compile(r"\b(should I worry|should I (be|consider)|is (it|this) safe|" r"can I (rent|live|move|raise (my )?kids?)|considering (renting|leasing|moving)|" r"(thinking about|planning to) (rent|lease|move|buy)|" r"is (this|that|the landlord) true|landlord (says|claims|told)|" r"no flood history|just got a lease|new lease|signing a lease|" r"\bworry\b)", re.I), ]), ("underwriting", [ re.compile(r"\b(underwrit(e|er|ing|able)|actuarial|loss history|" r"insurabl[ey]|catastrophe (model|risk)|" r"insurance (audit|memo|profile)|" r"audit (chain|trail))\b", re.I), ]), ("journalism", [ re.compile(r"\b(reporter|journalist|newsroom|story|coverage|" r"published?|publish (this|the))", re.I), ]), ] def detect(query: str, intent: str | None = None) -> str: """Classify the question shape from the raw query and planner intent. Returns one of `QUESTION_TYPES`. Falls back to `generic_exposure` when no pattern matches — that's the existing behavior, preserved. `intent` is currently advisory only (the patterns don't read it), but the parameter is part of the API so future refinements can use it (e.g. an `intent=neighborhood` query without a verdict keyword could default to `journalism` rather than `generic_exposure`). """ if not query: return "generic_exposure" q = query.strip() for qt, patterns in _PATTERNS: if any(p.search(q) for p in patterns): return qt # Heuristic fallback: bare neighborhood/borough names from a planner # context default to journalism (most common stakeholder reading a # neighborhood-only query is a reporter or planner). For # single_address with no question keyword, fall back to generic. if intent == "neighborhood" and len(q.split()) <= 3: return "journalism" return "generic_exposure" def opening_instruction(question_type: str) -> str: """Return the directive sentence(s) for a question type. Returns empty string for `generic_exposure` (no override).""" return _DIRECTIVES.get(question_type, "") def augment_system_prompt(base: str, *, query: str, intent: str | None = None) -> str: """Wrap a base system prompt with a question-aware opening directive. No-op when the detector returns `generic_exposure` — the original behavior is preserved. """ qt = detect(query, intent) directive = opening_instruction(qt) if not directive: return base return ( f"{base}\n\n" f"QUESTION-AWARE OPENING (this directive overrides ONLY the opening " f"**Status.** sentence; the four-section structure and citation " f"discipline above remain in force):\n{directive}" )