Spaces:

lablab-ai-amd-developer-hackathon
/

riprap-nyc

Running

seriffic Claude Opus 4.7 (1M context) commited on 3 days ago

Commit

f24976f

1 Parent(s): 84bb74d

Stones C1: add taxonomy modules without changing behaviour

First atomic step of the Riprap → Stones migration described in
UPDATE_STONES.md. Adds app/stones/{cornerstone,keystone,touchstone,
lodestone,capstone}.py — each exposes NAME / TAGLINE / DESCRIPTION /
SOURCES / collect(state). The FSM is unchanged; nothing imports the
new modules yet. Capstone is a thin alias around app/reconcile.py.

Forward-looking SOURCES entries (terramind_buildings, terramind_lulc,
ttm_battery_surge) are included now and gated in the unit test against
a FUTURE_STATE_KEYS allowlist so commits 4 and 6 can land without
churn here.

tests/test_stones.py is pure-import (no server required) and covers:
- required attrs on every Stone
- SOURCES keys are valid FSM state writes (modulo future keys)
- SOURCES are disjoint across data-Stones
- collect() drops silent specialists
- canonical Stone iteration order
- uniform collect() arity

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (7) hide show

app/stones/__init__.py +41 -0
app/stones/capstone.py +47 -0
app/stones/cornerstone.py +38 -0
app/stones/keystone.py +35 -0
app/stones/lodestone.py +34 -0
app/stones/touchstone.py +35 -0
tests/test_stones.py +128 -0

app/stones/__init__.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""Five Stones — conceptual grouping over the FSM specialists.
+Riprap's FSM runs ~20 atomic specialist actions; the Stones layer is a
+thin re-grouping that gives the trace UI, the briefing prompt, and the
+project's public framing five legible roles instead of 20 atomic
+function calls.
+Each Stone module exposes the same shape:
+    NAME         — display name (e.g. "Cornerstone")
+    TAGLINE      — single phrase used as a section header
+    DESCRIPTION  — one-sentence description for the README / trace UI
+    SOURCES      — list of FSM state keys this Stone aggregates from
+    collect(state)  — pull this Stone's documents out of the state dict
+Order is meaningful:
+  1. Cornerstone — the hazard reader (static record)
+  2. Keystone    — the asset register (exposure)
+  3. Touchstone  — the live observer (current sensors + EO)
+  4. Lodestone   — the projector (forecast)
+  5. Capstone    — the synthesiser (Granite 4.1 + Mellea)
+The first four are *data-Stones*; the Capstone IS the reconciler.
+"""
+from __future__ import annotations
+from app.stones import capstone, cornerstone, keystone, lodestone, touchstone
+# Iteration order for the briefing prompt and trace UI.
+DATA_STONES = [cornerstone, keystone, touchstone, lodestone]
+ALL_STONES = DATA_STONES + [capstone]
+__all__ = [
+    "ALL_STONES",
+    "DATA_STONES",
+    "capstone",
+    "cornerstone",
+    "keystone",
+    "lodestone",
+    "touchstone",
+]

app/stones/capstone.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Capstone — the Synthesiser.
+Granite 4.1 (8B) writes the cited briefing under Mellea-validated
+rejection sampling. Every numeric claim is anchored to a `[doc_id]`
+citation pointing back into one of the four data-Stones; sentences
+that fail the four grounding checks (`numerics_grounded`,
+`no_placeholder_tokens`, `citations_dense`, `citations_resolve`) are
+rolled with surgical feedback until the budget is exhausted.
+This module is a thin alias around `app.reconcile` — the working code
+stays in `app/reconcile.py` for git-blame continuity. The naming is in
+the user-facing trace and the README.
+"""
+from __future__ import annotations
+from typing import Any
+from app import reconcile as _reconcile
+NAME = "Capstone"
+TAGLINE = "The Synthesiser"
+DESCRIPTION = (
+    "Writes the cited briefing — Granite 4.1 + Mellea rejection sampling."
+)
+# Capstone consumes everything the four data-Stones produced; we don't
+# enumerate state keys here because the reconciler reads the FSM state
+# directly and `app/reconcile.py:build_documents()` is the source of
+# truth for which keys it touches.
+SOURCES: list[str] = []
+# Re-export the reconciler entrypoints under the Stone name so callers
+# can write `from app.stones import capstone; capstone.run(state)`.
+build_documents = _reconcile.build_documents
+trim_docs_to_plan = _reconcile.trim_docs_to_plan
+verify_paragraph = _reconcile.verify_paragraph
+run = _reconcile.reconcile
+EXTRA_SYSTEM_PROMPT = _reconcile.EXTRA_SYSTEM_PROMPT
+def collect(state: dict[str, Any]) -> dict[str, Any]:
+    """Return the Capstone's outputs from the state dict (for the trace)."""
+    out: dict[str, Any] = {}
+    for k in ("paragraph", "audit", "mellea"):
+        if state.get(k) is not None:
+            out[k] = state[k]
+    return out

app/stones/cornerstone.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Cornerstone — the Hazard Reader.
+Reads what NYC's ground remembers about flooding: empirical 2012 Sandy
+extent, modelled DEP scenarios, 2021 Ida USGS high-water marks, baked
+Prithvi-EO Ida-attributable polygons, and LiDAR-derived microtopography
+(elevation / HAND / TWI).
+These are static records — they don't change between queries. They
+ground the briefing in what already happened or has already been
+modelled, and serve as the empirical anchor for everything the live
+sensors and forecasts report.
+"""
+from __future__ import annotations
+from typing import Any
+NAME = "Cornerstone"
+TAGLINE = "The Hazard Reader"
+DESCRIPTION = "Reads what NYC's ground remembers about flooding."
+# FSM state keys this Stone aggregates. The order here mirrors the order
+# documents are emitted into the reconciler prompt today.
+SOURCES = [
+    "sandy",          # step_sandy           — 2012 Sandy inundation extent
+    "dep",            # step_dep             — NYC DEP stormwater scenarios
+    "ida_hwm",        # step_ida_hwm         — USGS Ida 2021 high-water marks
+    "prithvi_water",  # step_prithvi         — baked Prithvi-EO Ida polygons
+    "microtopo",      # step_microtopo       — USGS 3DEP DEM + HAND/TWI
+]
+def collect(state: dict[str, Any]) -> dict[str, Any]:
+    """Return {state_key: value} for every Cornerstone source that fired.
+    Drops keys whose value is None (the silence-over-confabulation
+    contract — specialists that didn't fire emit nothing).
+    """
+    return {k: state[k] for k in SOURCES if state.get(k) is not None}

app/stones/keystone.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Keystone — the Asset Register.
+Counts what the city has built on top of those hazards: subway
+entrances, NYCHA developments, DOE schools, NYS DOH hospitals, and
+(via the TerraMind-NYC-Buildings adapter, fine-tuned on NYC building
+footprints on AMD MI300X) the building stock visible in current EO.
+These are the public-asset registers — the per-address briefing
+quantifies how many of each asset class fall inside the hazard
+footprints the Cornerstone established.
+"""
+from __future__ import annotations
+from typing import Any
+NAME = "Keystone"
+TAGLINE = "The Asset Register"
+DESCRIPTION = "Counts the public assets and built fabric exposed to the hazards."
+# Existing register specialists + the new TerraMind-Buildings tool
+# (added in commit 4 of the Stones migration). Stones layer is
+# tolerant of state keys that don't exist yet — `collect` skips
+# anything absent.
+SOURCES = [
+    "mta_entrances",        # step_mta_entrances   — MTA entrance exposure
+    "nycha_developments",   # step_nycha           — NYCHA exposure
+    "doe_schools",          # step_doe_schools     — DOE schools exposure
+    "doh_hospitals",        # step_doh_hospitals   — NYS DOH hospitals
+    "terramind_buildings",  # step_terramind_buildings (commit 4) — NYC LoRA
+]
+def collect(state: dict[str, Any]) -> dict[str, Any]:
+    """Return {state_key: value} for every Keystone source that fired."""
+    return {k: state[k] for k in SOURCES if state.get(k) is not None}

app/stones/lodestone.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""Lodestone — the Projector.
+Projects what's coming next: NWS active flood-relevant alerts (the
+National Weather Service's authoritative short-horizon watches /
+warnings), Granite TimeSeries TTM r2 zero-shot forecasts of the Battery
+surge residual and per-address NYC 311 complaint rates and per-sensor
+FloodNet event recurrence, and (via the Granite-TTM-r2-Battery-Surge
+fine-tune on AMD MI300X) a 96-hour surge nowcast.
+The Lodestone is the forward-looking Stone — every cited number here
+is a forecast, framed as such in the briefing.
+"""
+from __future__ import annotations
+from typing import Any
+NAME = "Lodestone"
+TAGLINE = "The Projector"
+DESCRIPTION = "Projects what's coming: alerts, surge, and recurrence forecasts."
+# Existing forecast specialists + the new fine-tuned Battery surge
+# nowcast (added in commit 6).
+SOURCES = [
+    "nws_alerts",          # step_nws_alerts         — NWS public alerts
+    "ttm_forecast",        # step_ttm_forecast       — TTM r2 Battery zero-shot
+    "ttm_311_forecast",    # step_ttm_311_forecast   — TTM r2 311 weekly
+    "floodnet_forecast",   # step_floodnet_forecast  — TTM r2 FloodNet recurrence
+    "ttm_battery_surge",   # step_ttm_battery_surge (commit 6) — fine-tuned
+]
+def collect(state: dict[str, Any]) -> dict[str, Any]:
+    """Return {state_key: value} for every Lodestone source that fired."""
+    return {k: state[k] for k in SOURCES if state.get(k) is not None}

app/stones/touchstone.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Touchstone — the Live Observer.
+Watches what's happening right now: FloodNet ultrasonic depth sensors,
+NYC 311 flood-complaint history, NWS hourly METAR observations, NOAA
+tide-gauge water levels, and per-query EO segmentation
+(Prithvi-EO 2.0 NYC Pluvial fine-tune for water/flood; TerraMind-NYC
+LULC adapter for current land cover).
+The Touchstone is the "current state of the world" Stone. Its outputs
+change minute to minute and are explicitly framed in the briefing as
+right-now context, not historical record.
+"""
+from __future__ import annotations
+from typing import Any
+NAME = "Touchstone"
+TAGLINE = "The Live Observer"
+DESCRIPTION = "Watches the current state of the city's flood signals and EO."
+# Live sensors + per-query EO. `prithvi_live` becomes the NYC Pluvial
+# v2 fine-tune in commit 5; `terramind_lulc` is added in commit 4.
+SOURCES = [
+    "floodnet",         # step_floodnet         — FloodNet sensor network
+    "nyc311",           # step_311              — NYC 311 flood complaints
+    "nws_obs",          # step_nws_obs          — NWS hourly METAR obs
+    "noaa_tides",       # step_noaa_tides       — NOAA tide gauge water level
+    "prithvi_live",     # step_prithvi_live     — Prithvi-EO 2.0 (v2 in commit 5)
+    "terramind_lulc",   # step_terramind_lulc (commit 4) — NYC LULC adapter
+]
+def collect(state: dict[str, Any]) -> dict[str, Any]:
+    """Return {state_key: value} for every Touchstone source that fired."""
+    return {k: state[k] for k in SOURCES if state.get(k) is not None}

tests/test_stones.py ADDED Viewed

	@@ -0,0 +1,128 @@

+"""Unit tests for the Stones taxonomy layer.
+Pure-import tests; no server / FSM required. Each data-Stone exposes
+`NAME`, `TAGLINE`, `DESCRIPTION`, `SOURCES`, `collect()`. The SOURCES
+keys must be a subset of the FSM's actual state keys so the migration
+stays honest as new specialists land.
+Some SOURCES entries are forward-looking (state keys added by later
+commits in the migration). Those are explicitly listed in
+`FUTURE_STATE_KEYS` and skipped from the validity check.
+"""
+from __future__ import annotations
+import inspect
+import re
+from app import fsm
+from app.stones import (
+    ALL_STONES,
+    DATA_STONES,
+    capstone,
+    cornerstone,
+    keystone,
+    lodestone,
+    touchstone,
+)
+# State keys added by later migration commits (C4 / C5 / C6). The Stones
+# taxonomy is allowed to declare them up-front so the SOURCES list stays
+# stable as the specialists land.
+FUTURE_STATE_KEYS = {
+    "terramind_buildings",  # commit 4
+    "terramind_lulc",       # commit 4
+    "ttm_battery_surge",    # commit 6
+}
+def _fsm_state_keys() -> set[str]:
+    """Scrape every state key written by an @action in app/fsm.py.
+    We don't import every action to introspect — Burr's @action wraps
+    the function so the `writes` declaration isn't readable on the
+    decorated object without instantiating an Application. The cheapest
+    reliable read is regex over the module source.
+    """
+    src = inspect.getsource(fsm)
+    keys: set[str] = set()
+    # @action(reads=[...], writes=["k1", "k2", ...])
+    for m in re.finditer(r"writes\s*=\s*\[([^\]]+)\]", src):
+        for tok in re.findall(r'"([^"]+)"', m.group(1)):
+            keys.add(tok)
+    return keys
+def test_data_stones_have_required_attrs():
+    for st in DATA_STONES:
+        assert isinstance(st.NAME, str) and st.NAME
+        assert isinstance(st.TAGLINE, str) and st.TAGLINE
+        assert isinstance(st.DESCRIPTION, str) and st.DESCRIPTION
+        assert isinstance(st.SOURCES, list) and st.SOURCES
+        assert callable(st.collect)
+def test_capstone_has_required_attrs():
+    assert capstone.NAME == "Capstone"
+    assert capstone.TAGLINE
+    assert capstone.DESCRIPTION
+    # Capstone re-exports the reconciler.
+    assert callable(capstone.build_documents)
+    assert callable(capstone.run)
+    assert isinstance(capstone.EXTRA_SYSTEM_PROMPT, str)
+def test_data_stone_sources_are_valid_state_keys():
+    fsm_keys = _fsm_state_keys()
+    # Sanity: a couple of well-known keys really do appear.
+    for required in ("sandy", "dep", "floodnet", "nyc311", "ida_hwm"):
+        assert required in fsm_keys, f"FSM scrape missed {required!r}"
+    for st in DATA_STONES:
+        for key in st.SOURCES:
+            if key in FUTURE_STATE_KEYS:
+                continue
+            assert key in fsm_keys, (
+                f"{st.NAME}.SOURCES references {key!r}, which no @action in "
+                f"app/fsm.py writes. Either fix the Stone or add the future "
+                f"key to FUTURE_STATE_KEYS in this test."
+            )
+def test_data_stone_sources_are_disjoint():
+    """A given state key belongs to exactly one Stone — no double-counting."""
+    seen: dict[str, str] = {}
+    for st in DATA_STONES:
+        for key in st.SOURCES:
+            assert key not in seen, (
+                f"state key {key!r} listed in both {seen[key]} and {st.NAME}"
+            )
+            seen[key] = st.NAME
+def test_collect_drops_silent_specialists():
+    state = {
+        "sandy": True,
+        "dep": None,
+        "ida_hwm": None,
+        "prithvi_water": {"some": "data"},
+        "microtopo": None,
+        # unrelated key, should be ignored entirely
+        "paragraph": "irrelevant",
+    }
+    out = cornerstone.collect(state)
+    assert out == {"sandy": True, "prithvi_water": {"some": "data"}}
+def test_all_stones_iteration_order():
+    """The four data-Stones must appear in canonical order; Capstone last."""
+    assert [s.NAME for s in DATA_STONES] == [
+        "Cornerstone", "Keystone", "Touchstone", "Lodestone",
+    ]
+    assert ALL_STONES[-1].NAME == "Capstone"
+def test_collect_signatures_are_uniform():
+    """Every Stone's collect() takes a single dict argument."""
+    for st in (cornerstone, keystone, touchstone, lodestone, capstone):
+        sig = inspect.signature(st.collect)
+        params = list(sig.parameters.values())
+        assert len(params) == 1, f"{st.NAME}.collect arity"