Spaces:

lablab-ai-amd-developer-hackathon
/

riprap-nyc

Running

App Files Files Community

riprap-nyc / app /framing.py

seriffic

deploy: sync all changes from main at 6904684

b9a10ad 2 days ago

raw

history blame contribute delete

11.5 kB

	"""Question-aware framing for the Capstone briefing opening.

	The four-section structure (Status / Empirical / Modeled / Policy) is
	load-bearing for the Mellea grounding checks and stays unchanged. What
	this module does is detect the shape of the user's question from the
	raw query string + planner intent, then return a single-sentence
	directive that conditions only the opening Status sentence.

	Eleven question types are recognised; they mirror the rubric in
	`tests/integration/stakeholder_queries.py:FRAMING_RUBRICS`. Detection
	is deterministic regex matching — no extra LLM call, no added latency.

	Usage:

	from app.framing import augment_system_prompt
	system_prompt = augment_system_prompt(
	EXTRA_SYSTEM_PROMPT, query=user_query, intent=plan.intent,
	)

	The returned prompt has the original text plus a trailing
	`QUESTION-AWARE OPENING:` block. Granite 4.1 attends to this through
	the system-prompt cache and applies it to the Status sentence.
	"""
	from __future__ import annotations

	import re
	from typing import Final

	QUESTION_TYPES: Final[tuple[str, ...]] = (
	"habitability_decision",
	"legal_disclosure",
	"capital_planning",
	"underwriting",
	"journalism",
	"development_siting",
	"grant_evidence",
	"retrospective",
	"emergency_response",
	"comparison",
	"generic_exposure",
	)


	# ---- Per-type opening directives ------------------------------------------
	#
	# Each directive is one sentence that supplements (does not replace) the
	# Status section's existing instruction. Granite 4.1 has a strong prior
	# toward "this address is exposed to ..." openings; the directive
	# overrides that in a question-shaped way without disturbing the four
	# grounding invariants.

	_DIRECTIVES: dict[str, str] = {
	"habitability_decision": (
	"The Status sentence MUST start with a direct verdict word "
	"(\"Yes\" if the documents show meaningful flood evidence, \"No\" "
	"if they don't), then name the single strongest piece of "
	"evidence with its [doc_id]. The user is deciding whether to "
	"live here — answer the question, then cite."
	),
	"legal_disclosure": (
	"The Status sentence MUST state whether the documents contain "
	"facts a NY RPL §462(2) or §231-b disclosure would need to "
	"record. Begin with \"Disclosure is warranted\" or \"Disclosure "
	"is not triggered\" based on the evidence, then name the "
	"specific fact with its [doc_id]. The user is a real-estate "
	"professional checking the disclosure threshold."
	),
	"capital_planning": (
	"The Status sentence MUST frame the place as a capital-planning "
	"candidate: name the dominant exposure with its [doc_id] and "
	"indicate whether the evidence supports prioritization "
	"(\"merits prioritization\", \"ranks high for hardening\") or "
	"not. The user allocates infrastructure investment."
	),
	"underwriting": (
	"The Status sentence MUST emphasize that every figure in the "
	"briefing is independently sourced — open with the dominant "
	"exposure and the specific [doc_id], then add a half-clause "
	"noting that the audit chain follows below. The user is an "
	"underwriter who needs a defensible loss narrative."
	),
	"journalism": (
	"The Status sentence MUST be reproducible reporting prose: "
	"name the place, name the dominant exposure with [doc_id], "
	"and avoid editorial verbs like \"shocking\" or \"alarming\". "
	"The user is a data journalist who will cite this prose verbatim."
	),
	"development_siting": (
	"The Status sentence MUST start with the count of active "
	"construction filings cited from [dob_permits] (e.g. \"N "
	"active construction filings sit inside ...\") and indicate "
	"which flood layer they intersect. The user is a developer or "
	"architect doing a pre-design siting check."
	),
	"grant_evidence": (
	"The Status sentence MUST open with \"Vulnerability "
	"assessment:\" and name the place + dominant exposure with "
	"[doc_id]. Treat the briefing as the evidence section of a "
	"HUD CDBG-DR or FEMA BRIC application — formal, third-person, "
	"free of advocacy framing."
	),
	"retrospective": (
	"Riprap currently runs on present-day data sources. The Status "
	"sentence MUST acknowledge the question is retrospective and "
	"state explicitly that the briefing reflects the CURRENT state "
	"of these data sources, not a snapshot from the requested date. "
	"Then proceed with the present-day exposure picture so the user "
	"still gets the geography. Silence-over-confabulation: never "
	"reconstruct historical conditions you can't verify."
	),
	"emergency_response": (
	"The Status sentence MUST quantify what is at risk in the "
	"next few hours, citing the live signal that triggered the "
	"query and any active alerts with [doc_id]. The user needs an "
	"operational picture, not a historical exposure summary."
	),
	"comparison": (
	"The Status sentence MUST name BOTH places the user is "
	"comparing and indicate which one shows greater exposure on "
	"the strongest cited signal. If only one place's data is "
	"available in the documents, say so explicitly. The user is "
	"doing a head-to-head decision."
	),
	"generic_exposure": "", # default — no override
	}


	# ---- Detector -------------------------------------------------------------
	#
	# Patterns are ordered: the FIRST type whose pattern matches wins. Order
	# matters — more specific question shapes (legal_disclosure, grant_evidence,
	# emergency_response) come before more general ones (habitability_decision,
	# capital_planning) so the obvious specialist tags don't get swallowed.

	_PATTERNS: list[tuple[str, list[re.Pattern]]] = [
	("retrospective", [
	re.compile(r"\b(would have\|would Riprap\|on (the )?date of\|as of (the )?(date\|day)\|"
	r"day before\|prior to\|before (Hurricane\|Ida\|Sandy\|the storm)\|"
	r"on (August\|September\|October\|November\|December\|January\|February\|March\|"
	r"April\|May\|June\|July) \d{1,2},? ?\d{4}\|"
	r"time.?machine\|retrospective\|court (exhibit\|testimony))\b", re.I),
	]),
	("emergency_response", [
	re.compile(r"\b(just triggered\|right now\|next (few \|six \|\d+ )?hours?\|"
	r"in the next \d+\|currently flooding\|flood (warning\|watch) is active\|"
	r"sensor [A-Z]{2}-?\d+\|live (alert\|trigger))\b", re.I),
	]),
	("legal_disclosure", [
	re.compile(r"\b(disclos(e\|ure\|ed)\|RPL\s§?\s\d+\|Property Condition Disclosure\|"
	r"§\s462\|§\s231-?b\|seller'?s? disclosure\|landlord'?s? disclosure\|"
	r"required to disclose\|need to disclose)\b", re.I),
	]),
	("grant_evidence", [
	re.compile(r"\b(vulnerability assessment\|CDBG-?DR\|HUD\|BRIC\|"
	r"grant application\|funding application\|community resilience grant\|"
	r"FEMA application\|disaster recovery (application\|funding))\b", re.I),
	]),
	("development_siting", [
	re.compile(r"\b(what (are\|is) (they\|being) build(ing)?\|new construction\|"
	r"under construction\|active (construction\|filing\|project\|permit)\|"
	r"projects? (in progress\|underway\|planned)\|architects?\|"
	r"siting check\|pre.?design\|"
	r"DOB filing\|developer)\b", re.I),
	]),
	("comparison", [
	# `prioritize X over Y` can have many words between, hence the
	# bounded non-greedy span — capped at 80 chars to avoid runaway.
	re.compile(r"\b(compare\b\|comparison\|\bvs\b\|\bversus\b\|"
	r"head-?to-?head\|\brank\s+the\s+top)\b", re.I),
	re.compile(r"\bprioritize\b.{1,80}\bover\b", re.I \| re.S),
	re.compile(r"\bover\s+\w+(?:\s+\w+){0,3}\s+for\s+(hardening\|investment)\b", re.I),
	]),
	("capital_planning", [
	re.compile(r"\b(prioritiz(e\|ation)\|capital plan(ning)?\|harden(ing\|s)?\|"
	r"infrastructure investment\|where (should\|to) (we \|the )(invest\|"
	r"prioritize\|harden)\|MTA.+prioritize\|DEP.+prioritize\|"
	r"protection envelope\|outside (it\|the protection)\|"
	r"resilien(ce\|cy) project)\b", re.I),
	]),
	("habitability_decision", [
	re.compile(r"\b(should I worry\|should I (be\|consider)\|is (it\|this) safe\|"
	r"can I (rent\|live\|move\|raise (my )?kids?)\|considering (renting\|leasing\|moving)\|"
	r"(thinking about\|planning to) (rent\|lease\|move\|buy)\|"
	r"is (this\|that\|the landlord) true\|landlord (says\|claims\|told)\|"
	r"no flood history\|just got a lease\|new lease\|signing a lease\|"
	r"\bworry\b)", re.I),
	]),
	("underwriting", [
	re.compile(r"\b(underwrit(e\|er\|ing\|able)\|actuarial\|loss history\|"
	r"insurabl[ey]\|catastrophe (model\|risk)\|"
	r"insurance (audit\|memo\|profile)\|"
	r"audit (chain\|trail))\b", re.I),
	]),
	("journalism", [
	re.compile(r"\b(reporter\|journalist\|newsroom\|story\|coverage\|"
	r"published?\|publish (this\|the))", re.I),
	]),
	]


	def detect(query: str, intent: str \| None = None) -> str:
	"""Classify the question shape from the raw query and planner intent.

	Returns one of `QUESTION_TYPES`. Falls back to `generic_exposure`
	when no pattern matches — that's the existing behavior, preserved.

	`intent` is currently advisory only (the patterns don't read it),
	but the parameter is part of the API so future refinements can
	use it (e.g. an `intent=neighborhood` query without a verdict
	keyword could default to `journalism` rather than `generic_exposure`).
	"""
	if not query:
	return "generic_exposure"
	q = query.strip()
	for qt, patterns in _PATTERNS:
	if any(p.search(q) for p in patterns):
	return qt
	# Heuristic fallback: bare neighborhood/borough names from a planner
	# context default to journalism (most common stakeholder reading a
	# neighborhood-only query is a reporter or planner). For
	# single_address with no question keyword, fall back to generic.
	if intent == "neighborhood" and len(q.split()) <= 3:
	return "journalism"
	return "generic_exposure"


	def opening_instruction(question_type: str) -> str:
	"""Return the directive sentence(s) for a question type.
	Returns empty string for `generic_exposure` (no override)."""
	return _DIRECTIVES.get(question_type, "")


	def augment_system_prompt(base: str, *, query: str,
	intent: str \| None = None) -> str:
	"""Wrap a base system prompt with a question-aware opening directive.

	No-op when the detector returns `generic_exposure` — the original
	behavior is preserved.
	"""
	qt = detect(query, intent)
	directive = opening_instruction(qt)
	if not directive:
	return base
	return (
	f"{base}\n\n"
	f"QUESTION-AWARE OPENING (this directive overrides ONLY the opening "
	f"Status. sentence; the four-section structure and citation "
	f"discipline above remain in force):\n{directive}"
	)