cjc0013 commited on
Commit
f14edf7
·
verified ·
1 Parent(s): fbba473

Tighten consistency, explainability, and deterministic exports for public records Space

Browse files
__pycache__/public_space_app.cpython-311.pyc CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af1f987a2e3a2ed9596d9bc3620aa02adf88bdbdddfd35eea9728cb2b7ad3289
3
- size 111283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7fd4155ab406455d61abeab73be9fdb3d4f85e5ebdb4a63b9cdbe78f832505
3
+ size 170984
dataset_bundle/evidence_audit/consistency_report.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "generated_at": "2026-04-19T09:21:59-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
 
1
  {
2
+ "generated_at": "2026-04-19T19:38:57-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
dataset_bundle/graph_links.csv CHANGED
The diff for this file is too large to render. See raw diff
 
dataset_bundle/network_graph/edges.csv CHANGED
The diff for this file is too large to render. See raw diff
 
dataset_bundle/network_graph/graph_config.json CHANGED
@@ -14,15 +14,16 @@
14
  "unresolved": 1808,
15
  "linked": 1117,
16
  "needs_review": 119,
17
- "acceptable_with_label": 800,
18
- "release_ok": 713
19
  },
20
  "default_filters": {
21
  "relationship_family": "sector",
22
  "review_status": "stronger",
23
- "max_edges": 60,
24
  "hide_unresolved_only": true,
25
- "overview_member_limit": 8
 
26
  },
27
  "example_member_searches": [
28
  "Josh Gottheimer",
 
14
  "unresolved": 1808,
15
  "linked": 1117,
16
  "needs_review": 119,
17
+ "acceptable_with_label": 1174,
18
+ "release_ok": 339
19
  },
20
  "default_filters": {
21
  "relationship_family": "sector",
22
  "review_status": "stronger",
23
+ "max_edges": 30,
24
  "hide_unresolved_only": true,
25
+ "overview_member_limit": 5,
26
+ "default_member_search": "Josh Gottheimer"
27
  },
28
  "example_member_searches": [
29
  "Josh Gottheimer",
dataset_bundle/public_release_manifest.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
- "release_date": "2026-04-19T09:22:53-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
 
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
+ "release_date": "2026-04-19T19:40:15-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
public_copy.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "public_version": "congress-public-records-slice-2026-04-v1-private-spacefix",
3
  "title": "Congress Public Records Slice",
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
 
1
  {
2
+ "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
public_space_app.py CHANGED
@@ -899,31 +899,353 @@ def _stronger_support_count(row: Dict[str, Any]) -> int:
899
  )
900
 
901
 
902
- def _edge_evidence_chips(row: Dict[str, Any]) -> list[str]:
903
- urls = _split_pipe_values(row.get("source_urls", ""), limit=12)
904
- reason_codes = set(_split_pipe_values(row.get("reason_codes", ""), limit=20))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905
  chips: list[str] = []
906
- if any("/ptr-pdfs/" in url for url in urls):
907
- chips.append("trade disclosure")
908
- if any("/financial-pdfs/" in url for url in urls):
909
- chips.append("annual disclosure")
910
- if any("govinfo.gov/bulkdata/BILLSTATUS" in url for url in urls):
911
- chips.append("bill record")
912
- if any("usaspending.gov/award/" in url for url in urls):
913
- chips.append("funding award")
914
- if any("committee_info" in url for url in urls):
915
- chips.append("committee roster")
916
- if "major_vote_overlap" in reason_codes or "vote_density_support" in reason_codes:
917
- chips.append("vote activity")
918
- if "lobbying_issue_overlap" in reason_codes or "lobbying_density_support" in reason_codes:
919
- chips.append("lobbying activity")
920
- if int(row.get("profile_link_count", 0) or 0) > 0:
921
- chips.append("member profile")
922
- deduped: list[str] = []
923
- for chip in chips:
924
- if chip not in deduped:
925
- deduped.append(chip)
926
- return deduped[:6]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927
 
928
 
929
  def _window_overlap_text(row: Dict[str, Any]) -> str:
@@ -1034,7 +1356,7 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
1034
  {
1035
  "relationship_id": str(row.get("edge_id") or ""),
1036
  "member": str(row.get("member_name") or row.get("member_slug") or ""),
1037
- "counterparty / sector": str(row.get("target_label") or ""),
1038
  "overall score": display_score,
1039
  "raw score": raw_score,
1040
  "relative score": relative_score,
@@ -1188,19 +1510,26 @@ def _select_edge_row(edges: pd.DataFrame, relationship_id: str) -> Dict[str, Any
1188
  return matched.head(1).to_dict("records")[0]
1189
 
1190
 
1191
- def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str, ranking_mode: str = "raw") -> str:
1192
- row = _select_edge_row(edges, relationship_id)
1193
- if not row:
 
 
 
 
 
 
1194
  return "Select a relationship to inspect why it appears in this released slice."
 
1195
  family = str(row.get("relationship_family", "") or "")
1196
- chips = _edge_evidence_chips(row)
1197
- reason_codes = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=8)]
1198
- urls = _split_pipe_values(row.get("source_urls", ""), limit=5)
1199
- raw_score = _relationship_score(row)
1200
- relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
1201
- display_score = relative_score if str(ranking_mode or "raw").strip().lower() == "relative" else raw_score
1202
  lines = [
1203
- f"### {row.get('member_name') or row.get('member_slug')} -> {row.get('target_label')}",
1204
  "",
1205
  f"- Relationship view: `{_plain_family_label(family)}`",
1206
  f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
@@ -1210,14 +1539,22 @@ def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str, ran
1210
  f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
1211
  f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
1212
  f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
1213
- f"- Integrity-checked source records attached: `{int(row.get('sha_backed_source_artifact_count', 0) or 0)}`",
1214
  f"- Unresolved source refs still counted: `{int(row.get('unresolved_source_ref_count', 0) or 0)}`",
1215
  f"- Evidence signals: `{', '.join(chips) if chips else 'published source support'}`",
1216
  f"- Time-window overlap: `{_window_overlap_text(row)}`",
1217
  ]
 
 
 
 
1218
  if reason_codes:
1219
  lines.extend(["", "#### Why it is linked in this slice", ""])
1220
  lines.extend(f"- {item}" for item in reason_codes)
 
 
 
 
1221
  if urls:
1222
  lines.extend(["", "#### Example published source URLs", ""])
1223
  lines.extend(f"- [{item}]({item})" for item in urls)
@@ -1227,17 +1564,12 @@ def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str, ran
1227
  "#### Integrity note",
1228
  "",
1229
  "- `Integrity-checked` means the release includes a cryptographic fingerprint to help show a published record has not been altered.",
 
 
 
 
1230
  ]
1231
  )
1232
- if str(ranking_mode or "raw").strip().lower() == "relative":
1233
- lines.extend(
1234
- [
1235
- "",
1236
- "#### Ranking note",
1237
- "",
1238
- "- This view is using the experimental relative score, which compares this relationship to the same member's other visible links in the current filtered view.",
1239
- ]
1240
- )
1241
  return "\n".join(lines)
1242
 
1243
 
@@ -1246,15 +1578,20 @@ def _safe_export_stem(value: str) -> str:
1246
  return slug or "relationship-export"
1247
 
1248
 
1249
- def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking_mode: str) -> list[dict[str, Any]]:
1250
- row = _select_edge_row(edges, relationship_id)
1251
- if not row:
 
 
 
 
 
 
1252
  return []
1253
- raw_score = _relationship_score(row)
1254
- relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
1255
- display_score = relative_score if str(ranking_mode or "raw").strip().lower() == "relative" else raw_score
1256
- reason_codes = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=8)]
1257
- urls = _split_pipe_values(row.get("source_urls", ""), limit=8)
1258
  export_rows: list[dict[str, Any]] = [
1259
  {
1260
  "relationship_id": str(row.get("edge_id") or ""),
@@ -1271,7 +1608,24 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1271
  "item_detail": "Top-level relationship summary for export.",
1272
  }
1273
  ]
1274
- for chip in _edge_evidence_chips(row):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1275
  export_rows.append(
1276
  {
1277
  "relationship_id": str(row.get("edge_id") or ""),
@@ -1288,7 +1642,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1288
  "item_detail": _evidence_chip_help(chip),
1289
  }
1290
  )
1291
- for reason in sorted(reason_codes):
1292
  export_rows.append(
1293
  {
1294
  "relationship_id": str(row.get("edge_id") or ""),
@@ -1305,7 +1659,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1305
  "item_detail": reason,
1306
  }
1307
  )
1308
- for item in sorted(_plain_strengthener(value) for value in _split_pipe_values(row.get("missing_to_strengthen", ""), limit=12)):
1309
  export_rows.append(
1310
  {
1311
  "relationship_id": str(row.get("edge_id") or ""),
@@ -1322,7 +1676,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1322
  "item_detail": item,
1323
  }
1324
  )
1325
- for url in sorted(urls):
1326
  export_rows.append(
1327
  {
1328
  "relationship_id": str(row.get("edge_id") or ""),
@@ -1342,14 +1696,21 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1342
  return export_rows
1343
 
1344
 
1345
- def _relationship_handoff_rows(edges: pd.DataFrame, relationship_id: str, ranking_mode: str) -> list[dict[str, Any]]:
1346
- row = _select_edge_row(edges, relationship_id)
1347
- if not row:
 
 
 
 
 
 
1348
  return []
 
1349
  family = str(row.get("relationship_family", "") or "")
1350
- raw_score = _relationship_score(row)
1351
- relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
1352
- display_score = relative_score if str(ranking_mode or "raw").strip().lower() == "relative" else raw_score
1353
  handoff_rows: list[dict[str, Any]] = []
1354
 
1355
  def add(section: str, label: str, explanation: str, source_url: str = "") -> None:
@@ -1363,34 +1724,52 @@ def _relationship_handoff_rows(edges: pd.DataFrame, relationship_id: str, rankin
1363
  )
1364
 
1365
  add("Summary", "Member", str(row.get("member_name") or row.get("member_slug") or ""))
1366
- add("Summary", "Target", str(row.get("target_label") or ""))
1367
  add("Summary", "Relationship view", _plain_family_label(family))
1368
  add("Summary", "Strength label", _plain_status_label(str(row.get("relationship_status", "") or "")))
1369
  add("Summary", "Displayed score", str(display_score))
1370
  add("Summary", "Raw score", str(raw_score))
1371
  add("Summary", "Relative-to-baseline score (experimental)", str(relative_score))
 
 
 
 
 
1372
  add("Summary", "Supporting relationship rows", str(int(row.get("link_count", 0) or 0)))
1373
  add("Summary", "Stronger-support rows", str(_stronger_support_count(row)))
1374
  add("Summary", "Caution / weaker rows", str(int(row.get("review_count", 0) or 0) if family == "recipient" else int(row.get("weak_event_count", 0) or 0)))
1375
- add("Summary", "Integrity-checked source records attached", str(int(row.get("sha_backed_source_artifact_count", 0) or 0)))
1376
  add("Summary", "Unresolved source refs still counted", str(int(row.get("unresolved_source_ref_count", 0) or 0)))
1377
  add("Summary", "Evidence window", _window_overlap_text(row))
 
 
 
 
1378
 
1379
- for chip in sorted(_edge_evidence_chips(row)):
1380
  add("Evidence signals", chip.title(), _evidence_chip_help(chip))
1381
- for reason in sorted(_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=8)):
 
 
 
 
 
1382
  add("Why this link appears", reason, reason)
1383
- for item in sorted(_plain_strengthener(value) for value in _split_pipe_values(row.get("missing_to_strengthen", ""), limit=12)):
1384
  add("What would strengthen it", "Needs stronger support", item)
1385
- for url in sorted(_split_pipe_values(row.get("source_urls", ""), limit=20)):
1386
  add("Published source URLs", urlparse(url).netloc or "Published source URL", "Open this published record directly.", url)
1387
  return handoff_rows
1388
 
1389
 
1390
  def _write_relationship_export_bundle(
1391
- edges: pd.DataFrame, relationship_id: str, ranking_mode: str
 
 
 
 
1392
  ) -> tuple[str, str | None, str | None, str | None]:
1393
- export_rows = _relationship_export_rows(edges, relationship_id, ranking_mode)
1394
  if not export_rows:
1395
  return "Pick one relationship to generate exportable evidence files.", None, None, None
1396
  relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
@@ -1421,7 +1800,7 @@ def _write_relationship_export_bundle(
1421
  for export_row in export_rows:
1422
  writer.writerow({name: export_row.get(name, "") for name in fieldnames})
1423
 
1424
- handoff_rows = _relationship_handoff_rows(edges, relationship_id, ranking_mode)
1425
  handoff_fieldnames = ["section", "label", "explanation", "source_url"]
1426
  with handoff_csv_path.open("w", encoding="utf-8", newline="") as handle:
1427
  writer = csv.DictWriter(handle, fieldnames=handoff_fieldnames)
@@ -1429,7 +1808,8 @@ def _write_relationship_export_bundle(
1429
  for export_row in handoff_rows:
1430
  writer.writerow({name: export_row.get(name, "") for name in handoff_fieldnames})
1431
 
1432
- title = f"{export_rows[0]['member_name']} -> {export_rows[0]['target_label']}"
 
1433
  pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
1434
  width, height = LETTER
1435
  left = 54
@@ -1488,20 +1868,27 @@ def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
1488
  return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")
1489
 
1490
 
1491
- def _relationship_timeline_html(edges: pd.DataFrame, relationship_id: str) -> str:
1492
- row = _select_edge_row(edges, relationship_id)
1493
- if not row:
 
 
 
 
 
 
1494
  return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">Choose a relationship to see its evidence window.</div>"
 
1495
  entries: list[tuple[int, str, str, str]] = []
1496
  seen: set[tuple[str, str, str]] = set()
1497
- for url in _split_pipe_values(row.get("source_urls", ""), limit=8):
1498
  sort_key, window_label, track_label = _timeline_window_from_url(url)
1499
  detail = url
1500
  dedupe_key = (window_label, track_label, detail)
1501
  if dedupe_key not in seen:
1502
  seen.add(dedupe_key)
1503
  entries.append((sort_key, window_label, track_label, detail))
1504
- if int(row.get("profile_link_count", 0) or 0) > 0:
1505
  entries.append((70, "Undated support", "Member profile support", "Profile-based support is included in this relationship summary."))
1506
  if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
1507
  entries.append((80, "Partly unresolved", "Some official references remain unresolved", f"{int(row.get('unresolved_source_ref_count', 0) or 0)} unresolved refs are still counted in this released row."))
@@ -1614,7 +2001,15 @@ def _filter_graph(
1614
  if score_label != "all":
1615
  filtered = filtered[filtered["score_labels"].fillna("").str.contains(score_label, case=False, na=False)]
1616
  if review_status == "stronger":
1617
- filtered = filtered[filtered["relationship_status"].isin(["linked", "release_ok"])]
 
 
 
 
 
 
 
 
1618
  elif review_status != "all":
1619
  filtered = filtered[filtered["relationship_status"] == review_status]
1620
  if hide_unresolved_only:
@@ -1708,11 +2103,14 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
1708
  if not node:
1709
  continue
1710
  node_type = str(node.get("node_type", ""))
1711
- title_lines = [f"<b>{html.escape(str(node.get('label', '')))}</b>"]
 
 
 
1712
  role_label = {
1713
  "member": "House member",
1714
  "recipient": "Funding recipient",
1715
- "sector": "Sector",
1716
  }.get(node_type, node_type.title())
1717
  title_lines.append(f"Role: {html.escape(role_label)}")
1718
  if node_type == "member":
@@ -1723,7 +2121,7 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
1723
  title_lines.append(f"Released relationships in graph data: {int(node.get('connected_edge_count', 0) or 0)}")
1724
  network.add_node(
1725
  node_id,
1726
- label=str(node.get("label", "")),
1727
  title="<br>".join(title_lines),
1728
  color=color_map.get(str(node.get("node_type", "")), "#6e6e6e"),
1729
  shape="dot",
@@ -1741,7 +2139,7 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
1741
  if ":" in part
1742
  ]
1743
  title_lines = [
1744
- f"<b>{html.escape(str(row.get('member_name', '') or row.get('member_slug', '')))} -> {html.escape(str(row.get('target_label', '')))}</b>",
1745
  f"Relationship type: {html.escape(family_label)}",
1746
  f"Presentation tier: {html.escape(_plain_status_label(status))}",
1747
  html.escape(_plain_status_explainer(status)),
@@ -1832,6 +2230,7 @@ def build_app(copy_path: str | Path):
1832
  data = load_release_data(copy_path)
1833
  manifest = data["manifest"]
1834
  events = data["events"]
 
1835
  nodes = data["graph_nodes"]
1836
  edges = data["graph_edges"]
1837
  provenance = data["event_provenance"]
@@ -1888,7 +2287,7 @@ def build_app(copy_path: str | Path):
1888
  valid_ids = {value for _, value in options}
1889
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
1890
  export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
1891
- filtered_edges, selected or "", ranking_mode
1892
  )
1893
  return (
1894
  _overview_summary_markdown(
@@ -1908,8 +2307,8 @@ def build_app(copy_path: str | Path):
1908
  ranking_mode=ranking_mode,
1909
  ),
1910
  gr.update(choices=options, value=selected),
1911
- _relationship_detail_markdown(filtered_edges, selected or "", ranking_mode),
1912
- _relationship_timeline_html(filtered_edges, selected or ""),
1913
  export_note,
1914
  export_csv,
1915
  export_handoff_csv,
@@ -1926,11 +2325,11 @@ def build_app(copy_path: str | Path):
1926
  ):
1927
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
1928
  export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
1929
- filtered_edges, relationship_id, ranking_mode
1930
  )
1931
  return (
1932
- _relationship_detail_markdown(filtered_edges, relationship_id, ranking_mode),
1933
- _relationship_timeline_html(filtered_edges, relationship_id),
1934
  export_note,
1935
  export_csv,
1936
  export_handoff_csv,
 
899
  )
900
 
901
 
902
+ TOPIC_AREA_PREFERRED_BILL_HINTS = {
903
+ "finance": ("billstatus-118hr2891.xml",),
904
+ }
905
+
906
+
907
+ def _relationship_target_key(value: Any) -> str:
908
+ normalized = re.sub(r"[^a-z0-9]+", "_", str(value or "").strip().lower()).strip("_")
909
+ return normalized
910
+
911
+
912
+ def _display_target_label(row: Dict[str, Any]) -> str:
913
+ label = str(row.get("target_label") or "").strip()
914
+ if str(row.get("relationship_family") or "") != "sector" or not label:
915
+ return label
916
+ words = re.sub(r"[_-]+", " ", label).strip()
917
+ return f"{words.title()} topic area"
918
+
919
+
920
+ def _plain_link_type(value: str) -> str:
921
+ normalized = str(value or "").strip().lower()
922
+ mapping = {
923
+ "trade_disclosure_to_sector": "Trade disclosure to topic-area mapping",
924
+ "annual_financial_disclosure_to_sector": "Annual financial disclosure to topic-area mapping",
925
+ "member_to_sector_profile": "Member profile or committee-context mapping",
926
+ "member_to_earmark_request": "Funding-recipient linkage",
927
+ "member_to_earmark_request_unresolved": "Funding-recipient linkage still needing more review",
928
+ }
929
+ return mapping.get(normalized, normalized.replace("_", " ").title() or "Released relationship row")
930
+
931
+
932
+ def _source_family_for_url(url: str) -> str:
933
+ normalized = str(url or "").strip().lower()
934
+ if "/financial-pdfs/" in normalized:
935
+ return "annual disclosure"
936
+ if "/ptr-pdfs/" in normalized:
937
+ return "trade disclosure"
938
+ if "committee_info" in normalized:
939
+ return "committee roster"
940
+ if "lda.senate.gov" in normalized:
941
+ return "lobbying activity"
942
+ if "govinfo.gov/bulkdata/billstatus" in normalized:
943
+ return "bill record"
944
+ if "/evs/" in normalized or "rollcall" in normalized:
945
+ return "vote activity"
946
+ if "usaspending.gov/award/" in normalized:
947
+ return "funding award"
948
+ if "memberdata.xml" in normalized or ".house.gov/" in normalized:
949
+ return "member profile"
950
+ return "published source support"
951
+
952
+
953
+ def _edge_evidence_chips(row: Dict[str, Any], url_values: list[str] | None = None) -> list[str]:
954
+ urls = url_values if url_values is not None else _split_pipe_values(row.get("source_urls", ""), limit=12)
955
  chips: list[str] = []
956
+ for url in urls:
957
+ chip = _source_family_for_url(url)
958
+ if chip not in chips:
959
+ chips.append(chip)
960
+ return chips[:6]
961
+
962
+
963
+ def _relationship_constituents(
964
+ links: pd.DataFrame,
965
+ events: pd.DataFrame,
966
+ row: Dict[str, Any],
967
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
968
+ empty_links = links.head(0).copy()
969
+ empty_events = events.head(0).copy()
970
+ if not row:
971
+ return empty_links, empty_events
972
+ member_slug = str(row.get("member_slug") or "").strip()
973
+ family = str(row.get("relationship_family") or "").strip()
974
+ target_key = str(row.get("target_key") or _relationship_target_key(row.get("target_label")))
975
+ if not member_slug or not family or not target_key:
976
+ return empty_links, empty_events
977
+
978
+ link_rows = empty_links
979
+ if not links.empty:
980
+ link_mask = links["member_slug"].fillna("").astype(str).eq(member_slug)
981
+ if "link_family" in links.columns:
982
+ link_mask &= links["link_family"].fillna("").astype(str).eq(family)
983
+ link_target_series = links.get("relationship_target", pd.Series("", index=links.index)).fillna("").astype(str)
984
+ if family == "recipient":
985
+ fallback_series = links.get("recipient_name", pd.Series("", index=links.index)).fillna("").astype(str)
986
+ else:
987
+ fallback_series = links.get("sector", pd.Series("", index=links.index)).fillna("").astype(str)
988
+ link_target_series = link_target_series.where(link_target_series.str.strip() != "", fallback_series)
989
+ link_mask &= link_target_series.map(_relationship_target_key).eq(target_key)
990
+ link_rows = links[link_mask].copy()
991
+
992
+ event_rows = empty_events
993
+ if not events.empty:
994
+ event_mask = events["member_slug"].fillna("").astype(str).eq(member_slug)
995
+ if family == "sector":
996
+ event_mask &= events["event_type"].fillna("").astype(str).eq("sector_overlap_event")
997
+ event_target_series = events.get("sector", pd.Series("", index=events.index)).fillna("").astype(str)
998
+ else:
999
+ event_mask &= events["event_type"].fillna("").astype(str).eq("recipient_overlap_event")
1000
+ event_target_series = events.get("recipient_name", pd.Series("", index=events.index)).fillna("").astype(str)
1001
+ if "relationship_target" in events.columns:
1002
+ relationship_target_series = events.get("relationship_target", pd.Series("", index=events.index)).fillna("").astype(str)
1003
+ event_target_series = relationship_target_series.where(relationship_target_series.str.strip() != "", event_target_series)
1004
+ event_mask &= event_target_series.map(_relationship_target_key).eq(target_key)
1005
+ event_rows = events[event_mask].copy()
1006
+ return link_rows, event_rows
1007
+
1008
+
1009
+ def _collect_pipe_values(frame: pd.DataFrame, column: str, *, limit: int = 20) -> list[str]:
1010
+ if frame.empty or column not in frame.columns:
1011
+ return []
1012
+ items: list[str] = []
1013
+ for value in frame[column].fillna("").tolist():
1014
+ for item in _split_pipe_values(value, limit=limit):
1015
+ if item not in items:
1016
+ items.append(item)
1017
+ return items
1018
+
1019
+
1020
+ def _relationship_reason_labels(link_rows: pd.DataFrame, event_rows: pd.DataFrame, row: Dict[str, Any]) -> list[str]:
1021
+ labels = [
1022
+ _plain_reason_code(item)
1023
+ for item in _collect_pipe_values(link_rows, "reason_codes", limit=20)
1024
+ + _collect_pipe_values(event_rows, "reason_codes", limit=20)
1025
+ ]
1026
+ if not labels:
1027
+ labels = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=20)]
1028
+ ordered: list[str] = []
1029
+ for label in labels:
1030
+ if label and label not in ordered:
1031
+ ordered.append(label)
1032
+ return ordered[:10]
1033
+
1034
+
1035
+ def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
1036
+ labels = [
1037
+ _plain_strengthener(item)
1038
+ for item in _collect_pipe_values(link_rows, "missing_to_strengthen", limit=20)
1039
+ + _collect_pipe_values(event_rows, "missing_to_strengthen", limit=20)
1040
+ ]
1041
+ ordered: list[str] = []
1042
+ for label in labels:
1043
+ if label and label not in ordered:
1044
+ ordered.append(label)
1045
+ return ordered[:10]
1046
+
1047
+
1048
+ def _relationship_sha_values(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
1049
+ values = _collect_pipe_values(link_rows, "sha256_values", limit=40) + _collect_pipe_values(event_rows, "sha256_values", limit=40)
1050
+ ordered: list[str] = []
1051
+ for value in values:
1052
+ if value and value not in ordered:
1053
+ ordered.append(value)
1054
+ return ordered
1055
+
1056
+
1057
+ def _relationship_link_type_mix(link_rows: pd.DataFrame) -> list[str]:
1058
+ if link_rows.empty or "link_type" not in link_rows.columns:
1059
+ return []
1060
+ ordered: list[str] = []
1061
+ for value in link_rows["link_type"].fillna("").astype(str).tolist():
1062
+ label = _plain_link_type(value)
1063
+ if label and label not in ordered:
1064
+ ordered.append(label)
1065
+ return ordered
1066
+
1067
+
1068
+ def _source_record_priority(record: Dict[str, Any], target_label: str) -> tuple[Any, ...]:
1069
+ family_rank = {
1070
+ "annual disclosure": 0,
1071
+ "trade disclosure": 1,
1072
+ "committee roster": 2,
1073
+ "lobbying activity": 3,
1074
+ "bill record": 4,
1075
+ "vote activity": 5,
1076
+ "funding award": 6,
1077
+ "member profile": 7,
1078
+ "published source support": 8,
1079
+ }.get(str(record.get("family") or ""), 9)
1080
+ score_label = str(record.get("score_label") or "")
1081
+ score_rank = {
1082
+ "strong_sector_overlap": 0,
1083
+ "earmark_recipient_linked": 0,
1084
+ "weak_sector_overlap": 1,
1085
+ }.get(score_label, 2)
1086
+ reason_codes = set(record.get("reason_codes") or [])
1087
+ reason_rank = 3
1088
+ for candidate, rank in (
1089
+ ("issuer_match", 0),
1090
+ ("legislative_relevance_match", 0),
1091
+ ("bill_sponsor_overlap", 1),
1092
+ ("major_vote_overlap", 2),
1093
+ ):
1094
+ if candidate in reason_codes:
1095
+ reason_rank = rank
1096
+ break
1097
+ normalized_target = _relationship_target_key(target_label)
1098
+ preferred_bill_rank = 9
1099
+ if str(record.get("family") or "") in {"bill record", "vote activity"}:
1100
+ hints = TOPIC_AREA_PREFERRED_BILL_HINTS.get(normalized_target, ())
1101
+ for index, hint in enumerate(hints):
1102
+ if hint in str(record.get("url") or "").lower():
1103
+ preferred_bill_rank = index
1104
+ break
1105
+ origin_rank = 0 if str(record.get("origin") or "") == "event" else 1
1106
+ return (
1107
+ family_rank,
1108
+ preferred_bill_rank,
1109
+ score_rank,
1110
+ reason_rank,
1111
+ origin_rank,
1112
+ str(record.get("url") or ""),
1113
+ )
1114
+
1115
+
1116
+ def _relationship_source_records(link_rows: pd.DataFrame, event_rows: pd.DataFrame, target_label: str) -> list[Dict[str, Any]]:
1117
+ raw_records: list[Dict[str, Any]] = []
1118
+ for origin, frame in (("link", link_rows), ("event", event_rows)):
1119
+ if frame.empty:
1120
+ continue
1121
+ for record in frame.to_dict("records"):
1122
+ reason_codes = set(_split_pipe_values(record.get("reason_codes", ""), limit=20))
1123
+ score_label = str(record.get("score_label") or "")
1124
+ for url in _split_pipe_values(record.get("source_urls", ""), limit=24):
1125
+ raw_records.append(
1126
+ {
1127
+ "url": url,
1128
+ "family": _source_family_for_url(url),
1129
+ "origin": origin,
1130
+ "score_label": score_label,
1131
+ "reason_codes": reason_codes,
1132
+ }
1133
+ )
1134
+ best_by_url: Dict[str, Dict[str, Any]] = {}
1135
+ for record in sorted(raw_records, key=lambda item: _source_record_priority(item, target_label)):
1136
+ best_by_url.setdefault(str(record.get("url") or ""), record)
1137
+ return list(best_by_url.values())
1138
+
1139
+
1140
+ def _select_example_urls(
1141
+ row: Dict[str, Any],
1142
+ link_rows: pd.DataFrame,
1143
+ event_rows: pd.DataFrame,
1144
+ *,
1145
+ limit: int = 6,
1146
+ ) -> list[str]:
1147
+ records = _relationship_source_records(link_rows, event_rows, str(row.get("target_label") or ""))
1148
+ selected: list[str] = []
1149
+ selected_set: set[str] = set()
1150
+
1151
+ def choose_one(family_name: str) -> None:
1152
+ candidates = [
1153
+ record
1154
+ for record in records
1155
+ if str(record.get("family") or "") == family_name and str(record.get("url") or "") not in selected_set
1156
+ ]
1157
+ if not candidates:
1158
+ return
1159
+ chosen = sorted(candidates, key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))[0]
1160
+ url = str(chosen.get("url") or "")
1161
+ selected.append(url)
1162
+ selected_set.add(url)
1163
+
1164
+ choose_one("annual disclosure")
1165
+ choose_one("trade disclosure")
1166
+
1167
+ combined_reason_codes = set(_collect_pipe_values(link_rows, "reason_codes", limit=20) + _collect_pipe_values(event_rows, "reason_codes", limit=20))
1168
+ if "committee_jurisdiction_match" in combined_reason_codes:
1169
+ choose_one("committee roster")
1170
+ if any(str(record.get("family") or "") == "lobbying activity" for record in records):
1171
+ choose_one("lobbying activity")
1172
+
1173
+ fallback_records = [
1174
+ record
1175
+ for record in records
1176
+ if str(record.get("url") or "") not in selected_set
1177
+ ]
1178
+ family_order = {
1179
+ "bill record": 0,
1180
+ "vote activity": 1,
1181
+ "funding award": 2,
1182
+ "committee roster": 3,
1183
+ "member profile": 4,
1184
+ "published source support": 5,
1185
+ "annual disclosure": 6,
1186
+ "trade disclosure": 7,
1187
+ "lobbying activity": 8,
1188
+ }
1189
+ for record in sorted(
1190
+ fallback_records,
1191
+ key=lambda item: (
1192
+ family_order.get(str(item.get("family") or ""), 9),
1193
+ _source_record_priority(item, str(row.get("target_label") or "")),
1194
+ ),
1195
+ ):
1196
+ url = str(record.get("url") or "")
1197
+ if not url or url in selected_set:
1198
+ continue
1199
+ selected.append(url)
1200
+ selected_set.add(url)
1201
+ if len(selected) >= int(limit):
1202
+ break
1203
+ return selected[: int(limit)]
1204
+
1205
+
1206
+ def _relationship_context(
1207
+ edges: pd.DataFrame,
1208
+ links: pd.DataFrame,
1209
+ events: pd.DataFrame,
1210
+ relationship_id: str,
1211
+ ranking_mode: str,
1212
+ ) -> Dict[str, Any] | None:
1213
+ row = _select_edge_row(edges, relationship_id)
1214
+ if not row:
1215
+ return None
1216
+ link_rows, event_rows = _relationship_constituents(links, events, row)
1217
+ raw_score = _relationship_score(row)
1218
+ relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
1219
+ normalized_mode = str(ranking_mode or "raw").strip().lower()
1220
+ display_score = relative_score if normalized_mode == "relative" else raw_score
1221
+ surfaced_urls = _select_example_urls(row, link_rows, event_rows, limit=6)
1222
+ all_urls = [record.get("url", "") for record in sorted(_relationship_source_records(link_rows, event_rows, str(row.get("target_label") or "")), key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))]
1223
+ all_urls = [url for url in all_urls if url]
1224
+ reason_labels = _relationship_reason_labels(link_rows, event_rows, row)
1225
+ strengtheners = _relationship_strengtheners(link_rows, event_rows)
1226
+ sha_values = _relationship_sha_values(link_rows, event_rows)
1227
+ evidence_chips = _edge_evidence_chips(row, surfaced_urls)
1228
+ link_type_mix = _relationship_link_type_mix(link_rows)
1229
+ return {
1230
+ "row": row,
1231
+ "raw_score": raw_score,
1232
+ "relative_score": relative_score,
1233
+ "display_score": display_score,
1234
+ "surfaced_urls": surfaced_urls,
1235
+ "all_urls": all_urls,
1236
+ "reason_labels": reason_labels,
1237
+ "strengtheners": strengtheners,
1238
+ "sha_values": sha_values,
1239
+ "integrity_count": len(sha_values),
1240
+ "evidence_chips": evidence_chips,
1241
+ "link_type_mix": link_type_mix,
1242
+ "display_target_label": _display_target_label(row),
1243
+ "topic_area_note": (
1244
+ "Topic-area links combine several public-record signals. They do not claim that every supporting bill is narrowly about that sector."
1245
+ if str(row.get("relationship_family") or "") == "sector"
1246
+ else ""
1247
+ ),
1248
+ }
1249
 
1250
 
1251
  def _window_overlap_text(row: Dict[str, Any]) -> str:
 
1356
  {
1357
  "relationship_id": str(row.get("edge_id") or ""),
1358
  "member": str(row.get("member_name") or row.get("member_slug") or ""),
1359
+ "counterparty / sector": _display_target_label(row),
1360
  "overall score": display_score,
1361
  "raw score": raw_score,
1362
  "relative score": relative_score,
 
1510
  return matched.head(1).to_dict("records")[0]
1511
 
1512
 
1513
+ def _relationship_detail_markdown(
1514
+ edges: pd.DataFrame,
1515
+ links: pd.DataFrame,
1516
+ events: pd.DataFrame,
1517
+ relationship_id: str,
1518
+ ranking_mode: str = "raw",
1519
+ ) -> str:
1520
+ context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
1521
+ if not context:
1522
  return "Select a relationship to inspect why it appears in this released slice."
1523
+ row = context["row"]
1524
  family = str(row.get("relationship_family", "") or "")
1525
+ chips = context["evidence_chips"]
1526
+ reason_codes = context["reason_labels"]
1527
+ urls = context["surfaced_urls"]
1528
+ raw_score = int(context["raw_score"])
1529
+ relative_score = int(context["relative_score"])
1530
+ display_score = int(context["display_score"])
1531
  lines = [
1532
+ f"### {row.get('member_name') or row.get('member_slug')} -> {context['display_target_label']}",
1533
  "",
1534
  f"- Relationship view: `{_plain_family_label(family)}`",
1535
  f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
 
1539
  f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
1540
  f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
1541
  f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
1542
+ f"- Integrity-checked source records attached: `{int(context['integrity_count'])}`",
1543
  f"- Unresolved source refs still counted: `{int(row.get('unresolved_source_ref_count', 0) or 0)}`",
1544
  f"- Evidence signals: `{', '.join(chips) if chips else 'published source support'}`",
1545
  f"- Time-window overlap: `{_window_overlap_text(row)}`",
1546
  ]
1547
+ if context["link_type_mix"]:
1548
+ lines.append(f"- Released row kinds involved: `{'; '.join(context['link_type_mix'])}`")
1549
+ if context["topic_area_note"]:
1550
+ lines.append(f"- Topic-area note: {context['topic_area_note']}")
1551
  if reason_codes:
1552
  lines.extend(["", "#### Why it is linked in this slice", ""])
1553
  lines.extend(f"- {item}" for item in reason_codes)
1554
+ lines.append("- Note: one released row can contribute multiple signals, so the signal list can be longer than the supporting-row count.")
1555
+ if context["strengtheners"]:
1556
+ lines.extend(["", "#### What would strengthen it", ""])
1557
+ lines.extend(f"- {item}" for item in context["strengtheners"])
1558
  if urls:
1559
  lines.extend(["", "#### Example published source URLs", ""])
1560
  lines.extend(f"- [{item}]({item})" for item in urls)
 
1564
  "#### Integrity note",
1565
  "",
1566
  "- `Integrity-checked` means the release includes a cryptographic fingerprint to help show a published record has not been altered.",
1567
+ "",
1568
+ "#### Ranking note",
1569
+ "",
1570
+ "- Raw score is the default public ranking. The relative score is experimental and changes with the current filtered comparison set.",
1571
  ]
1572
  )
 
 
 
 
 
 
 
 
 
1573
  return "\n".join(lines)
1574
 
1575
 
 
1578
  return slug or "relationship-export"
1579
 
1580
 
1581
+ def _relationship_export_rows(
1582
+ edges: pd.DataFrame,
1583
+ links: pd.DataFrame,
1584
+ events: pd.DataFrame,
1585
+ relationship_id: str,
1586
+ ranking_mode: str,
1587
+ ) -> list[dict[str, Any]]:
1588
+ context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
1589
+ if not context:
1590
  return []
1591
+ row = context["row"]
1592
+ raw_score = int(context["raw_score"])
1593
+ relative_score = int(context["relative_score"])
1594
+ display_score = int(context["display_score"])
 
1595
  export_rows: list[dict[str, Any]] = [
1596
  {
1597
  "relationship_id": str(row.get("edge_id") or ""),
 
1608
  "item_detail": "Top-level relationship summary for export.",
1609
  }
1610
  ]
1611
+ for row_kind in context["link_type_mix"]:
1612
+ export_rows.append(
1613
+ {
1614
+ "relationship_id": str(row.get("edge_id") or ""),
1615
+ "member_name": str(row.get("member_name") or row.get("member_slug") or ""),
1616
+ "target_label": str(row.get("target_label") or ""),
1617
+ "relationship_family": _plain_family_label(str(row.get("relationship_family", "") or "")),
1618
+ "strength_label": _plain_status_label(str(row.get("relationship_status", "") or "")),
1619
+ "ranking_mode": str(ranking_mode or "raw"),
1620
+ "displayed_score": display_score,
1621
+ "raw_score": raw_score,
1622
+ "relative_score": relative_score,
1623
+ "item_type": "released_row_kind",
1624
+ "item_label": "Released row kind",
1625
+ "item_detail": row_kind,
1626
+ }
1627
+ )
1628
+ for chip in context["evidence_chips"]:
1629
  export_rows.append(
1630
  {
1631
  "relationship_id": str(row.get("edge_id") or ""),
 
1642
  "item_detail": _evidence_chip_help(chip),
1643
  }
1644
  )
1645
+ for reason in context["reason_labels"]:
1646
  export_rows.append(
1647
  {
1648
  "relationship_id": str(row.get("edge_id") or ""),
 
1659
  "item_detail": reason,
1660
  }
1661
  )
1662
+ for item in context["strengtheners"]:
1663
  export_rows.append(
1664
  {
1665
  "relationship_id": str(row.get("edge_id") or ""),
 
1676
  "item_detail": item,
1677
  }
1678
  )
1679
+ for url in context["all_urls"]:
1680
  export_rows.append(
1681
  {
1682
  "relationship_id": str(row.get("edge_id") or ""),
 
1696
  return export_rows
1697
 
1698
 
1699
+ def _relationship_handoff_rows(
1700
+ edges: pd.DataFrame,
1701
+ links: pd.DataFrame,
1702
+ events: pd.DataFrame,
1703
+ relationship_id: str,
1704
+ ranking_mode: str,
1705
+ ) -> list[dict[str, Any]]:
1706
+ context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
1707
+ if not context:
1708
  return []
1709
+ row = context["row"]
1710
  family = str(row.get("relationship_family", "") or "")
1711
+ raw_score = int(context["raw_score"])
1712
+ relative_score = int(context["relative_score"])
1713
+ display_score = int(context["display_score"])
1714
  handoff_rows: list[dict[str, Any]] = []
1715
 
1716
  def add(section: str, label: str, explanation: str, source_url: str = "") -> None:
 
1724
  )
1725
 
1726
  add("Summary", "Member", str(row.get("member_name") or row.get("member_slug") or ""))
1727
+ add("Summary", "Target", context["display_target_label"])
1728
  add("Summary", "Relationship view", _plain_family_label(family))
1729
  add("Summary", "Strength label", _plain_status_label(str(row.get("relationship_status", "") or "")))
1730
  add("Summary", "Displayed score", str(display_score))
1731
  add("Summary", "Raw score", str(raw_score))
1732
  add("Summary", "Relative-to-baseline score (experimental)", str(relative_score))
1733
+ add(
1734
+ "Summary",
1735
+ "Relative score note",
1736
+ "Raw score is the default public ranking. Relative score is experimental and changes with the current filtered comparison set.",
1737
+ )
1738
  add("Summary", "Supporting relationship rows", str(int(row.get("link_count", 0) or 0)))
1739
  add("Summary", "Stronger-support rows", str(_stronger_support_count(row)))
1740
  add("Summary", "Caution / weaker rows", str(int(row.get("review_count", 0) or 0) if family == "recipient" else int(row.get("weak_event_count", 0) or 0)))
1741
+ add("Summary", "Integrity-checked source records attached", str(int(context["integrity_count"])))
1742
  add("Summary", "Unresolved source refs still counted", str(int(row.get("unresolved_source_ref_count", 0) or 0)))
1743
  add("Summary", "Evidence window", _window_overlap_text(row))
1744
+ if context["topic_area_note"]:
1745
+ add("Summary", "Topic-area note", context["topic_area_note"])
1746
+ for row_kind in context["link_type_mix"]:
1747
+ add("Summary", "Released row kind", row_kind)
1748
 
1749
+ for chip in context["evidence_chips"]:
1750
  add("Evidence signals", chip.title(), _evidence_chip_help(chip))
1751
+ add(
1752
+ "Why this link appears",
1753
+ "Signal-count note",
1754
+ "One released row can contribute multiple signals, so the signal list can be longer than the supporting-row count.",
1755
+ )
1756
+ for reason in context["reason_labels"]:
1757
  add("Why this link appears", reason, reason)
1758
+ for item in context["strengtheners"]:
1759
  add("What would strengthen it", "Needs stronger support", item)
1760
+ for url in context["surfaced_urls"]:
1761
  add("Published source URLs", urlparse(url).netloc or "Published source URL", "Open this published record directly.", url)
1762
  return handoff_rows
1763
 
1764
 
1765
  def _write_relationship_export_bundle(
1766
+ edges: pd.DataFrame,
1767
+ links: pd.DataFrame,
1768
+ events: pd.DataFrame,
1769
+ relationship_id: str,
1770
+ ranking_mode: str,
1771
  ) -> tuple[str, str | None, str | None, str | None]:
1772
+ export_rows = _relationship_export_rows(edges, links, events, relationship_id, ranking_mode)
1773
  if not export_rows:
1774
  return "Pick one relationship to generate exportable evidence files.", None, None, None
1775
  relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
 
1800
  for export_row in export_rows:
1801
  writer.writerow({name: export_row.get(name, "") for name in fieldnames})
1802
 
1803
+ handoff_rows = _relationship_handoff_rows(edges, links, events, relationship_id, ranking_mode)
1804
  handoff_fieldnames = ["section", "label", "explanation", "source_url"]
1805
  with handoff_csv_path.open("w", encoding="utf-8", newline="") as handle:
1806
  writer = csv.DictWriter(handle, fieldnames=handoff_fieldnames)
 
1808
  for export_row in handoff_rows:
1809
  writer.writerow({name: export_row.get(name, "") for name in handoff_fieldnames})
1810
 
1811
+ context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
1812
+ title = f"{export_rows[0]['member_name']} -> {context['display_target_label'] if context else export_rows[0]['target_label']}"
1813
  pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
1814
  width, height = LETTER
1815
  left = 54
 
1868
  return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")
1869
 
1870
 
1871
+ def _relationship_timeline_html(
1872
+ edges: pd.DataFrame,
1873
+ links: pd.DataFrame,
1874
+ events: pd.DataFrame,
1875
+ relationship_id: str,
1876
+ ranking_mode: str = "raw",
1877
+ ) -> str:
1878
+ context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
1879
+ if not context:
1880
  return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">Choose a relationship to see its evidence window.</div>"
1881
+ row = context["row"]
1882
  entries: list[tuple[int, str, str, str]] = []
1883
  seen: set[tuple[str, str, str]] = set()
1884
+ for url in context["surfaced_urls"]:
1885
  sort_key, window_label, track_label = _timeline_window_from_url(url)
1886
  detail = url
1887
  dedupe_key = (window_label, track_label, detail)
1888
  if dedupe_key not in seen:
1889
  seen.add(dedupe_key)
1890
  entries.append((sort_key, window_label, track_label, detail))
1891
+ if any("Member profile" in item for item in context.get("link_type_mix", [])):
1892
  entries.append((70, "Undated support", "Member profile support", "Profile-based support is included in this relationship summary."))
1893
  if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
1894
  entries.append((80, "Partly unresolved", "Some official references remain unresolved", f"{int(row.get('unresolved_source_ref_count', 0) or 0)} unresolved refs are still counted in this released row."))
 
2001
  if score_label != "all":
2002
  filtered = filtered[filtered["score_labels"].fillna("").str.contains(score_label, case=False, na=False)]
2003
  if review_status == "stronger":
2004
+ stronger_mask = (
2005
+ ((filtered["relationship_family"] == "recipient") & (filtered["relationship_status"] == "linked"))
2006
+ | (
2007
+ (filtered["relationship_family"] == "sector")
2008
+ & (filtered["relationship_status"] == "release_ok")
2009
+ & (filtered["strong_event_count"].fillna(0).astype(int) > 0)
2010
+ )
2011
+ )
2012
+ filtered = filtered[stronger_mask]
2013
  elif review_status != "all":
2014
  filtered = filtered[filtered["relationship_status"] == review_status]
2015
  if hide_unresolved_only:
 
2103
  if not node:
2104
  continue
2105
  node_type = str(node.get("node_type", ""))
2106
+ display_label = str(node.get("label", "") or "")
2107
+ if node_type == "sector":
2108
+ display_label = f"{re.sub(r'[_-]+', ' ', display_label).strip().title()} topic area"
2109
+ title_lines = [f"<b>{html.escape(display_label)}</b>"]
2110
  role_label = {
2111
  "member": "House member",
2112
  "recipient": "Funding recipient",
2113
+ "sector": "Topic area",
2114
  }.get(node_type, node_type.title())
2115
  title_lines.append(f"Role: {html.escape(role_label)}")
2116
  if node_type == "member":
 
2121
  title_lines.append(f"Released relationships in graph data: {int(node.get('connected_edge_count', 0) or 0)}")
2122
  network.add_node(
2123
  node_id,
2124
+ label=display_label,
2125
  title="<br>".join(title_lines),
2126
  color=color_map.get(str(node.get("node_type", "")), "#6e6e6e"),
2127
  shape="dot",
 
2139
  if ":" in part
2140
  ]
2141
  title_lines = [
2142
+ f"<b>{html.escape(str(row.get('member_name', '') or row.get('member_slug', '')))} -> {html.escape(_display_target_label(row))}</b>",
2143
  f"Relationship type: {html.escape(family_label)}",
2144
  f"Presentation tier: {html.escape(_plain_status_label(status))}",
2145
  html.escape(_plain_status_explainer(status)),
 
2230
  data = load_release_data(copy_path)
2231
  manifest = data["manifest"]
2232
  events = data["events"]
2233
+ links = data["links"]
2234
  nodes = data["graph_nodes"]
2235
  edges = data["graph_edges"]
2236
  provenance = data["event_provenance"]
 
2287
  valid_ids = {value for _, value in options}
2288
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
2289
  export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
2290
+ filtered_edges, links, events, selected or "", ranking_mode
2291
  )
2292
  return (
2293
  _overview_summary_markdown(
 
2307
  ranking_mode=ranking_mode,
2308
  ),
2309
  gr.update(choices=options, value=selected),
2310
+ _relationship_detail_markdown(filtered_edges, links, events, selected or "", ranking_mode),
2311
+ _relationship_timeline_html(filtered_edges, links, events, selected or "", ranking_mode),
2312
  export_note,
2313
  export_csv,
2314
  export_handoff_csv,
 
2325
  ):
2326
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
2327
  export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
2328
+ filtered_edges, links, events, relationship_id, ranking_mode
2329
  )
2330
  return (
2331
+ _relationship_detail_markdown(filtered_edges, links, events, relationship_id, ranking_mode),
2332
+ _relationship_timeline_html(filtered_edges, links, events, relationship_id, ranking_mode),
2333
  export_note,
2334
  export_csv,
2335
  export_handoff_csv,