| """Produce a before/after FRAMING-DELTA.md from two suite runs. |
| |
| Usage: |
| python tests/integration/build_delta.py \\ |
| --baseline tests/integration/results/2026-05-06 \\ |
| --framed tests/integration/results/2026-05-06-framed \\ |
| --out tests/integration/results/2026-05-06/FRAMING-DELTA.md |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| from pathlib import Path |
|
|
|
|
| def load_run(d: Path) -> dict[str, dict]: |
| """Map qid -> per-query JSON payload.""" |
| out: dict[str, dict] = {} |
| for p in sorted(d.glob("q*-*.json")): |
| try: |
| payload = json.loads(p.read_text()) |
| except json.JSONDecodeError: |
| continue |
| qid = payload.get("qid") or p.stem.split("-", 1)[0].lstrip("q") |
| out[qid] = payload |
| return out |
|
|
|
|
| def opening_first_sentence(p: str) -> str: |
| """Best-effort first non-header sentence of the briefing.""" |
| if not p: |
| return "(no prose)" |
| |
| lines = [line.strip() for line in p.splitlines() if line.strip()] |
| body = [] |
| for line in lines: |
| if line.startswith("**Status"): |
| continue |
| if line.startswith("**"): |
| break |
| body.append(line) |
| text = " ".join(body) |
| |
| for end in (". ", ".\n", ". **"): |
| if end in text: |
| return text.split(end, 1)[0].strip() + "." |
| return text[:200] |
|
|
|
|
| def main() -> int: |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--baseline", required=True) |
| ap.add_argument("--framed", required=True) |
| ap.add_argument("--out", required=True) |
| args = ap.parse_args() |
|
|
| base_dir = Path(args.baseline) |
| framed_dir = Path(args.framed) |
| out_path = Path(args.out) |
|
|
| baseline = load_run(base_dir) |
| framed = load_run(framed_dir) |
| qids = sorted(set(baseline) | set(framed), |
| key=lambda x: int(x.lstrip("q") or "0")) |
|
|
| rows: list[str] = [] |
| rows.append("# Question-aware framing β before/after delta") |
| rows.append("") |
| rows.append("Compares two runs of `tests/integration/stakeholder_queries.py`:") |
| rows.append("") |
| rows.append(f"- **baseline**: `{base_dir}` β system before `app/framing.py`") |
| rows.append(f"- **framed**: `{framed_dir}` β same suite, Capstone now " |
| f"augmented with a per-question-type opening directive") |
| rows.append("") |
| rows.append("Framing score is 0β5 (5 = opening directly answers the " |
| "user's question shape; 3 = generic Status with place named; " |
| "1 = no engagement). The same scorer runs against both runs.") |
| rows.append("") |
|
|
| base_total = sum(b.get("framing_score", 0) for b in baseline.values()) |
| framed_total = sum(f.get("framing_score", 0) for f in framed.values()) |
| base_n = len(baseline) |
| framed_n = len(framed) |
| rows.append("## Aggregate") |
| rows.append("") |
| rows.append("| Metric | Baseline | Framed | Ξ |") |
| rows.append("|--------|---------:|-------:|---:|") |
| if base_n: |
| rows.append(f"| n queries | {base_n} | {framed_n} | β |") |
| rows.append(f"| sum framing | {base_total} | {framed_total} | " |
| f"{framed_total - base_total:+d} |") |
| rows.append(f"| mean framing | {base_total/base_n:.2f} | " |
| f"{framed_total/max(framed_n,1):.2f} | " |
| f"{(framed_total/max(framed_n,1)) - (base_total/base_n):+.2f} |") |
| for thresh in (3, 4, 5): |
| b = sum(1 for x in baseline.values() if x.get("framing_score", 0) >= thresh) |
| f = sum(1 for x in framed.values() if x.get("framing_score", 0) >= thresh) |
| rows.append(f"| β₯ {thresh}/5 | {b} | {f} | {f - b:+d} |") |
|
|
| rows.append("") |
| rows.append("## Per-query detail") |
| rows.append("") |
| rows.append("| # | Persona | Q-type | Frame | Mellea | Wall | Ξ frame |") |
| rows.append("|---|---------|--------|------:|-------:|-----:|---------|") |
| for qid in qids: |
| b = baseline.get(qid) or {} |
| f = framed.get(qid) or {} |
| bs = b.get("framing_score", 0) |
| fs = f.get("framing_score", 0) |
| delta_frame = fs - bs |
| delta_str = f"{delta_frame:+d}" |
| if delta_frame > 0: |
| delta_str = f"**{delta_str}**" |
| elif delta_frame < 0: |
| delta_str = f"_{delta_str}_" |
| m_b = (b.get("mellea") or {}).get("passed", "?") |
| m_f = (f.get("mellea") or {}).get("passed", "?") |
| rows.append( |
| f"| {qid} | {(b.get('persona') or f.get('persona') or '?')[:35]} | " |
| f"{b.get('question_type') or f.get('question_type') or '?'} | " |
| f"{bs}β{fs} | {m_b}β{m_f}/4 | " |
| f"{(f.get('wall_clock_s') or b.get('wall_clock_s') or 0):.0f}s | " |
| f"{delta_str} |" |
| ) |
|
|
| rows.append("") |
| rows.append("## Opening sentence diff") |
| rows.append("") |
| for qid in qids: |
| b = baseline.get(qid) or {} |
| f = framed.get(qid) or {} |
| rows.append(f"### q{qid} β {b.get('persona') or f.get('persona') or '?'}") |
| rows.append("") |
| rows.append(f"_Question_: `{b.get('query') or f.get('query') or ''}`") |
| rows.append("") |
| rows.append("Baseline opening:") |
| rows.append("") |
| rows.append(f"> {opening_first_sentence(b.get('paragraph', ''))}") |
| rows.append("") |
| rows.append("Framed opening:") |
| rows.append("") |
| rows.append(f"> {opening_first_sentence(f.get('paragraph', ''))}") |
| rows.append("") |
| rows.append(f"_Frame: {b.get('framing_score','?')} β {f.get('framing_score','?')}; " |
| f"detector matched type: `{b.get('framing_rationale','')[:80]}` β " |
| f"`{f.get('framing_rationale','')[:80]}`_") |
| rows.append("") |
|
|
| |
| rows.append("## Stop-condition check") |
| rows.append("") |
| below_three = sum(1 for x in framed.values() if x.get("framing_score", 0) < 3) |
| rows.append(f"Queries with framing < 3 in the framed run: **{below_three}**.") |
| rows.append("") |
| if below_three > 5: |
| rows.append("**Threshold exceeded.** Per Adam's stop condition, this means " |
| "the Capstone prompt-conditional alone is insufficient. The next " |
| "step would be option (a) β planner sub-classifier β or option " |
| "(c) β both. Documented but NOT implemented in this overnight pass.") |
| else: |
| rows.append("Within budget. Capstone prompt-conditional is the right intervention; " |
| "no need to escalate to option (a)/(c).") |
|
|
| out_path.write_text("\n".join(rows) + "\n") |
| print(f"wrote {out_path}") |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|